1 | /* |
2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | /* |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University |
34 | * All Rights Reserved. |
35 | * |
36 | * Permission to use, copy, modify and distribute this software and its |
37 | * documentation is hereby granted, provided that both the copyright |
38 | * notice and this permission notice appear in all copies of the |
39 | * software, derivative works or modified versions, and any portions |
40 | * thereof, and that both notices appear in supporting documentation. |
41 | * |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
45 | * |
46 | * Carnegie Mellon requests users of this software to return to |
47 | * |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
49 | * School of Computer Science |
50 | * Carnegie Mellon University |
51 | * Pittsburgh PA 15213-3890 |
52 | * |
53 | * any improvements or extensions that they make and grant Carnegie Mellon |
54 | * the rights to redistribute these changes. |
55 | */ |
56 | /* |
57 | */ |
58 | /* |
59 | * File: vm/vm_pageout.c |
60 | * Author: Avadis Tevanian, Jr., Michael Wayne Young |
61 | * Date: 1985 |
62 | * |
63 | * The proverbial page-out daemon. |
64 | */ |
65 | |
66 | #include <stdint.h> |
67 | #include <ptrauth.h> |
68 | |
69 | #include <debug.h> |
70 | |
71 | #include <mach/mach_types.h> |
72 | #include <mach/memory_object.h> |
73 | #include <mach/mach_host_server.h> |
74 | #include <mach/upl.h> |
75 | #include <mach/vm_map.h> |
76 | #include <mach/vm_param.h> |
77 | #include <mach/vm_statistics.h> |
78 | #include <mach/sdt.h> |
79 | |
80 | #include <kern/kern_types.h> |
81 | #include <kern/counter.h> |
82 | #include <kern/host_statistics.h> |
83 | #include <kern/machine.h> |
84 | #include <kern/misc_protos.h> |
85 | #include <kern/sched.h> |
86 | #include <kern/thread.h> |
87 | #include <kern/kalloc.h> |
88 | #include <kern/zalloc_internal.h> |
89 | #include <kern/policy_internal.h> |
90 | #include <kern/thread_group.h> |
91 | |
92 | #include <os/log.h> |
93 | |
94 | #include <sys/kdebug_triage.h> |
95 | |
96 | #include <machine/vm_tuning.h> |
97 | #include <machine/commpage.h> |
98 | |
99 | #include <vm/pmap.h> |
100 | #include <vm/vm_compressor_pager.h> |
101 | #include <vm/vm_fault.h> |
102 | #include <vm/vm_map_internal.h> |
103 | #include <vm/vm_object.h> |
104 | #include <vm/vm_page.h> |
105 | #include <vm/vm_pageout.h> |
106 | #include <vm/vm_protos.h> /* must be last */ |
107 | #include <vm/memory_object.h> |
108 | #include <vm/vm_purgeable_internal.h> |
109 | #include <vm/vm_shared_region.h> |
110 | #include <vm/vm_compressor.h> |
111 | |
112 | #include <san/kasan.h> |
113 | |
114 | #if CONFIG_PHANTOM_CACHE |
115 | #include <vm/vm_phantom_cache.h> |
116 | #endif |
117 | |
118 | #if UPL_DEBUG |
119 | #include <libkern/OSDebug.h> |
120 | #endif |
121 | |
122 | extern int cs_debug; |
123 | |
124 | #if CONFIG_MBUF_MCACHE |
125 | extern void mbuf_drain(boolean_t); |
126 | #endif /* CONFIG_MBUF_MCACHE */ |
127 | |
128 | #if VM_PRESSURE_EVENTS |
129 | #if CONFIG_JETSAM |
130 | extern unsigned int memorystatus_available_pages; |
131 | extern unsigned int memorystatus_available_pages_pressure; |
132 | extern unsigned int memorystatus_available_pages_critical; |
133 | #else /* CONFIG_JETSAM */ |
134 | extern uint64_t memorystatus_available_pages; |
135 | extern uint64_t memorystatus_available_pages_pressure; |
136 | extern uint64_t memorystatus_available_pages_critical; |
137 | #endif /* CONFIG_JETSAM */ |
138 | |
139 | extern unsigned int memorystatus_frozen_count; |
140 | extern unsigned int memorystatus_suspended_count; |
141 | extern vm_pressure_level_t memorystatus_vm_pressure_level; |
142 | |
143 | extern lck_mtx_t memorystatus_jetsam_fg_band_lock; |
144 | extern uint32_t memorystatus_jetsam_fg_band_waiters; |
145 | |
146 | void vm_pressure_response(void); |
147 | extern void consider_vm_pressure_events(void); |
148 | |
149 | #define MEMORYSTATUS_SUSPENDED_THRESHOLD 4 |
150 | #endif /* VM_PRESSURE_EVENTS */ |
151 | |
152 | SECURITY_READ_ONLY_LATE(thread_t) vm_pageout_scan_thread; |
153 | SECURITY_READ_ONLY_LATE(thread_t) vm_pageout_gc_thread; |
154 | #if CONFIG_VPS_DYNAMIC_PRIO |
155 | TUNABLE(bool, vps_dynamic_priority_enabled, "vps_dynamic_priority_enabled" , false); |
156 | #else |
157 | const bool vps_dynamic_priority_enabled = false; |
158 | #endif |
159 | boolean_t vps_yield_for_pgqlockwaiters = TRUE; |
160 | |
161 | #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */ |
162 | #if !XNU_TARGET_OS_OSX |
163 | #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024 |
164 | #else /* !XNU_TARGET_OS_OSX */ |
165 | #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 |
166 | #endif /* !XNU_TARGET_OS_OSX */ |
167 | #endif |
168 | |
169 | #ifndef VM_PAGEOUT_DEADLOCK_RELIEF |
170 | #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */ |
171 | #endif |
172 | |
173 | #ifndef VM_PAGE_LAUNDRY_MAX |
174 | #define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */ |
175 | #endif /* VM_PAGEOUT_LAUNDRY_MAX */ |
176 | |
177 | #ifndef VM_PAGEOUT_BURST_WAIT |
178 | #define VM_PAGEOUT_BURST_WAIT 1 /* milliseconds */ |
179 | #endif /* VM_PAGEOUT_BURST_WAIT */ |
180 | |
181 | #ifndef VM_PAGEOUT_EMPTY_WAIT |
182 | #define VM_PAGEOUT_EMPTY_WAIT 50 /* milliseconds */ |
183 | #endif /* VM_PAGEOUT_EMPTY_WAIT */ |
184 | |
185 | #ifndef VM_PAGEOUT_DEADLOCK_WAIT |
186 | #define VM_PAGEOUT_DEADLOCK_WAIT 100 /* milliseconds */ |
187 | #endif /* VM_PAGEOUT_DEADLOCK_WAIT */ |
188 | |
189 | #ifndef VM_PAGEOUT_IDLE_WAIT |
190 | #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */ |
191 | #endif /* VM_PAGEOUT_IDLE_WAIT */ |
192 | |
193 | #ifndef VM_PAGEOUT_SWAP_WAIT |
194 | #define VM_PAGEOUT_SWAP_WAIT 10 /* milliseconds */ |
195 | #endif /* VM_PAGEOUT_SWAP_WAIT */ |
196 | |
197 | |
198 | #ifndef VM_PAGE_SPECULATIVE_TARGET |
199 | #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_pageout_state.vm_page_speculative_percentage)) |
200 | #endif /* VM_PAGE_SPECULATIVE_TARGET */ |
201 | |
202 | |
203 | /* |
204 | * To obtain a reasonable LRU approximation, the inactive queue |
205 | * needs to be large enough to give pages on it a chance to be |
206 | * referenced a second time. This macro defines the fraction |
207 | * of active+inactive pages that should be inactive. |
208 | * The pageout daemon uses it to update vm_page_inactive_target. |
209 | * |
210 | * If vm_page_free_count falls below vm_page_free_target and |
211 | * vm_page_inactive_count is below vm_page_inactive_target, |
212 | * then the pageout daemon starts running. |
213 | */ |
214 | |
215 | #ifndef VM_PAGE_INACTIVE_TARGET |
216 | #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2) |
217 | #endif /* VM_PAGE_INACTIVE_TARGET */ |
218 | |
219 | /* |
220 | * Once the pageout daemon starts running, it keeps going |
221 | * until vm_page_free_count meets or exceeds vm_page_free_target. |
222 | */ |
223 | |
224 | #ifndef VM_PAGE_FREE_TARGET |
225 | #if !XNU_TARGET_OS_OSX |
226 | #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100) |
227 | #else /* !XNU_TARGET_OS_OSX */ |
228 | #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80) |
229 | #endif /* !XNU_TARGET_OS_OSX */ |
230 | #endif /* VM_PAGE_FREE_TARGET */ |
231 | |
232 | |
233 | /* |
234 | * The pageout daemon always starts running once vm_page_free_count |
235 | * falls below vm_page_free_min. |
236 | */ |
237 | |
238 | #ifndef VM_PAGE_FREE_MIN |
239 | #if !XNU_TARGET_OS_OSX |
240 | #define VM_PAGE_FREE_MIN(free) (10 + (free) / 200) |
241 | #else /* !XNU_TARGET_OS_OSX */ |
242 | #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100) |
243 | #endif /* !XNU_TARGET_OS_OSX */ |
244 | #endif /* VM_PAGE_FREE_MIN */ |
245 | |
246 | #if !XNU_TARGET_OS_OSX |
247 | #define VM_PAGE_FREE_RESERVED_LIMIT 100 |
248 | #define VM_PAGE_FREE_MIN_LIMIT 1500 |
249 | #define VM_PAGE_FREE_TARGET_LIMIT 2000 |
250 | #else /* !XNU_TARGET_OS_OSX */ |
251 | #define VM_PAGE_FREE_RESERVED_LIMIT 1700 |
252 | #define VM_PAGE_FREE_MIN_LIMIT 3500 |
253 | #define VM_PAGE_FREE_TARGET_LIMIT 4000 |
254 | #endif /* !XNU_TARGET_OS_OSX */ |
255 | |
256 | /* |
257 | * When vm_page_free_count falls below vm_page_free_reserved, |
258 | * only vm-privileged threads can allocate pages. vm-privilege |
259 | * allows the pageout daemon and default pager (and any other |
260 | * associated threads needed for default pageout) to continue |
261 | * operation by dipping into the reserved pool of pages. |
262 | */ |
263 | |
264 | #ifndef VM_PAGE_FREE_RESERVED |
265 | #define VM_PAGE_FREE_RESERVED(n) \ |
266 | ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n)) |
267 | #endif /* VM_PAGE_FREE_RESERVED */ |
268 | |
269 | /* |
270 | * When we dequeue pages from the inactive list, they are |
271 | * reactivated (ie, put back on the active queue) if referenced. |
272 | * However, it is possible to starve the free list if other |
273 | * processors are referencing pages faster than we can turn off |
274 | * the referenced bit. So we limit the number of reactivations |
275 | * we will make per call of vm_pageout_scan(). |
276 | */ |
277 | #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000 |
278 | |
279 | #ifndef VM_PAGE_REACTIVATE_LIMIT |
280 | #if !XNU_TARGET_OS_OSX |
281 | #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2) |
282 | #else /* !XNU_TARGET_OS_OSX */ |
283 | #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX)) |
284 | #endif /* !XNU_TARGET_OS_OSX */ |
285 | #endif /* VM_PAGE_REACTIVATE_LIMIT */ |
286 | #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 1000 |
287 | |
288 | int vm_pageout_protect_realtime = true; |
289 | |
290 | extern boolean_t hibernate_cleaning_in_progress; |
291 | |
292 | struct pgo_iothread_state pgo_iothread_internal_state[MAX_COMPRESSOR_THREAD_COUNT]; |
293 | struct pgo_iothread_state pgo_iothread_external_state; |
294 | |
295 | #if VM_PRESSURE_EVENTS |
296 | void vm_pressure_thread(void); |
297 | |
298 | boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void); |
299 | boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void); |
300 | |
301 | boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void); |
302 | boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void); |
303 | #endif |
304 | |
305 | static void vm_pageout_iothread_external(struct pgo_iothread_state *, wait_result_t); |
306 | static void vm_pageout_iothread_internal(struct pgo_iothread_state *, wait_result_t); |
307 | static void vm_pageout_adjust_eq_iothrottle(struct pgo_iothread_state *, boolean_t); |
308 | |
309 | extern void vm_pageout_continue(void); |
310 | extern void vm_pageout_scan(void); |
311 | |
312 | boolean_t vm_pageout_running = FALSE; |
313 | |
314 | uint32_t vm_page_upl_tainted = 0; |
315 | uint32_t vm_page_iopl_tainted = 0; |
316 | |
317 | #if XNU_TARGET_OS_OSX |
318 | static boolean_t vm_pageout_waiter = FALSE; |
319 | #endif /* XNU_TARGET_OS_OSX */ |
320 | |
321 | |
322 | #if DEVELOPMENT || DEBUG |
323 | struct vm_pageout_debug vm_pageout_debug; |
324 | #endif |
325 | struct vm_pageout_vminfo vm_pageout_vminfo; |
326 | struct vm_pageout_state vm_pageout_state; |
327 | struct vm_config vm_config; |
328 | |
329 | struct vm_pageout_queue vm_pageout_queue_internal VM_PAGE_PACKED_ALIGNED; |
330 | struct vm_pageout_queue vm_pageout_queue_external VM_PAGE_PACKED_ALIGNED; |
331 | #if DEVELOPMENT || DEBUG |
332 | struct vm_pageout_queue vm_pageout_queue_benchmark VM_PAGE_PACKED_ALIGNED; |
333 | #endif /* DEVELOPMENT || DEBUG */ |
334 | |
335 | int vm_upl_wait_for_pages = 0; |
336 | vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL; |
337 | |
338 | boolean_t(*volatile consider_buffer_cache_collect)(int) = NULL; |
339 | |
340 | int vm_debug_events = 0; |
341 | |
342 | LCK_GRP_DECLARE(vm_pageout_lck_grp, "vm_pageout" ); |
343 | |
344 | #if CONFIG_MEMORYSTATUS |
345 | extern void memorystatus_kill_on_vps_starvation(void); |
346 | |
347 | uint32_t vm_pageout_memorystatus_fb_factor_nr = 5; |
348 | uint32_t vm_pageout_memorystatus_fb_factor_dr = 2; |
349 | |
350 | #endif |
351 | |
352 | #if __AMP__ |
353 | |
354 | |
355 | /* |
356 | * Bind compressor threads to e-cores unless there are multiple non-e clusters |
357 | */ |
358 | #if (MAX_CPU_CLUSTERS > 2) |
359 | #define VM_COMPRESSOR_EBOUND_DEFAULT false |
360 | #else |
361 | #define VM_COMPRESSOR_EBOUND_DEFAULT true |
362 | #endif |
363 | |
364 | TUNABLE(bool, vm_compressor_ebound, "vmcomp_ecluster" , VM_COMPRESSOR_EBOUND_DEFAULT); |
365 | int vm_pgo_pbound = 0; |
366 | extern void thread_bind_cluster_type(thread_t, char, bool); |
367 | |
368 | #endif /* __AMP__ */ |
369 | |
370 | |
371 | /* |
372 | * Routine: vm_pageout_object_terminate |
373 | * Purpose: |
374 | * Destroy the pageout_object, and perform all of the |
375 | * required cleanup actions. |
376 | * |
377 | * In/Out conditions: |
378 | * The object must be locked, and will be returned locked. |
379 | */ |
380 | void |
381 | vm_pageout_object_terminate( |
382 | vm_object_t object) |
383 | { |
384 | vm_object_t shadow_object; |
385 | |
386 | /* |
387 | * Deal with the deallocation (last reference) of a pageout object |
388 | * (used for cleaning-in-place) by dropping the paging references/ |
389 | * freeing pages in the original object. |
390 | */ |
391 | |
392 | assert(object->pageout); |
393 | shadow_object = object->shadow; |
394 | vm_object_lock(shadow_object); |
395 | |
396 | while (!vm_page_queue_empty(&object->memq)) { |
397 | vm_page_t p, m; |
398 | vm_object_offset_t offset; |
399 | |
400 | p = (vm_page_t) vm_page_queue_first(&object->memq); |
401 | |
402 | assert(p->vmp_private); |
403 | assert(p->vmp_free_when_done); |
404 | p->vmp_free_when_done = FALSE; |
405 | assert(!p->vmp_cleaning); |
406 | assert(!p->vmp_laundry); |
407 | |
408 | offset = p->vmp_offset; |
409 | VM_PAGE_FREE(p); |
410 | p = VM_PAGE_NULL; |
411 | |
412 | m = vm_page_lookup(object: shadow_object, |
413 | offset: offset + object->vo_shadow_offset); |
414 | |
415 | if (m == VM_PAGE_NULL) { |
416 | continue; |
417 | } |
418 | |
419 | assert((m->vmp_dirty) || (m->vmp_precious) || |
420 | (m->vmp_busy && m->vmp_cleaning)); |
421 | |
422 | /* |
423 | * Handle the trusted pager throttle. |
424 | * Also decrement the burst throttle (if external). |
425 | */ |
426 | vm_page_lock_queues(); |
427 | if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) { |
428 | vm_pageout_throttle_up(page: m); |
429 | } |
430 | |
431 | /* |
432 | * Handle the "target" page(s). These pages are to be freed if |
433 | * successfully cleaned. Target pages are always busy, and are |
434 | * wired exactly once. The initial target pages are not mapped, |
435 | * (so cannot be referenced or modified) but converted target |
436 | * pages may have been modified between the selection as an |
437 | * adjacent page and conversion to a target. |
438 | */ |
439 | if (m->vmp_free_when_done) { |
440 | assert(m->vmp_busy); |
441 | assert(m->vmp_q_state == VM_PAGE_IS_WIRED); |
442 | assert(m->vmp_wire_count == 1); |
443 | m->vmp_cleaning = FALSE; |
444 | m->vmp_free_when_done = FALSE; |
445 | /* |
446 | * Revoke all access to the page. Since the object is |
447 | * locked, and the page is busy, this prevents the page |
448 | * from being dirtied after the pmap_disconnect() call |
449 | * returns. |
450 | * |
451 | * Since the page is left "dirty" but "not modifed", we |
452 | * can detect whether the page was redirtied during |
453 | * pageout by checking the modify state. |
454 | */ |
455 | if (pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) { |
456 | SET_PAGE_DIRTY(m, FALSE); |
457 | } else { |
458 | m->vmp_dirty = FALSE; |
459 | } |
460 | |
461 | if (m->vmp_dirty) { |
462 | vm_page_unwire(page: m, TRUE); /* reactivates */ |
463 | counter_inc(&vm_statistics_reactivations); |
464 | PAGE_WAKEUP_DONE(m); |
465 | } else { |
466 | vm_page_free(page: m); /* clears busy, etc. */ |
467 | } |
468 | vm_page_unlock_queues(); |
469 | continue; |
470 | } |
471 | /* |
472 | * Handle the "adjacent" pages. These pages were cleaned in |
473 | * place, and should be left alone. |
474 | * If prep_pin_count is nonzero, then someone is using the |
475 | * page, so make it active. |
476 | */ |
477 | if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) && !m->vmp_private) { |
478 | if (m->vmp_reference) { |
479 | vm_page_activate(page: m); |
480 | } else { |
481 | vm_page_deactivate(page: m); |
482 | } |
483 | } |
484 | if (m->vmp_overwriting) { |
485 | /* |
486 | * the (COPY_OUT_FROM == FALSE) request_page_list case |
487 | */ |
488 | if (m->vmp_busy) { |
489 | /* |
490 | * We do not re-set m->vmp_dirty ! |
491 | * The page was busy so no extraneous activity |
492 | * could have occurred. COPY_INTO is a read into the |
493 | * new pages. CLEAN_IN_PLACE does actually write |
494 | * out the pages but handling outside of this code |
495 | * will take care of resetting dirty. We clear the |
496 | * modify however for the Programmed I/O case. |
497 | */ |
498 | pmap_clear_modify(pn: VM_PAGE_GET_PHYS_PAGE(m)); |
499 | |
500 | m->vmp_busy = FALSE; |
501 | m->vmp_absent = FALSE; |
502 | } else { |
503 | /* |
504 | * alternate (COPY_OUT_FROM == FALSE) request_page_list case |
505 | * Occurs when the original page was wired |
506 | * at the time of the list request |
507 | */ |
508 | assert(VM_PAGE_WIRED(m)); |
509 | vm_page_unwire(page: m, TRUE); /* reactivates */ |
510 | } |
511 | m->vmp_overwriting = FALSE; |
512 | } else { |
513 | m->vmp_dirty = FALSE; |
514 | } |
515 | m->vmp_cleaning = FALSE; |
516 | |
517 | /* |
518 | * Wakeup any thread waiting for the page to be un-cleaning. |
519 | */ |
520 | PAGE_WAKEUP(m); |
521 | vm_page_unlock_queues(); |
522 | } |
523 | /* |
524 | * Account for the paging reference taken in vm_paging_object_allocate. |
525 | */ |
526 | vm_object_activity_end(shadow_object); |
527 | vm_object_unlock(shadow_object); |
528 | |
529 | assert(object->ref_count == 0); |
530 | assert(object->paging_in_progress == 0); |
531 | assert(object->activity_in_progress == 0); |
532 | assert(object->resident_page_count == 0); |
533 | return; |
534 | } |
535 | |
536 | /* |
537 | * Routine: vm_pageclean_setup |
538 | * |
539 | * Purpose: setup a page to be cleaned (made non-dirty), but not |
540 | * necessarily flushed from the VM page cache. |
541 | * This is accomplished by cleaning in place. |
542 | * |
543 | * The page must not be busy, and new_object |
544 | * must be locked. |
545 | * |
546 | */ |
547 | static void |
548 | vm_pageclean_setup( |
549 | vm_page_t m, |
550 | vm_page_t new_m, |
551 | vm_object_t new_object, |
552 | vm_object_offset_t new_offset) |
553 | { |
554 | assert(!m->vmp_busy); |
555 | #if 0 |
556 | assert(!m->vmp_cleaning); |
557 | #endif |
558 | |
559 | pmap_clear_modify(pn: VM_PAGE_GET_PHYS_PAGE(m)); |
560 | |
561 | /* |
562 | * Mark original page as cleaning in place. |
563 | */ |
564 | m->vmp_cleaning = TRUE; |
565 | SET_PAGE_DIRTY(m, FALSE); |
566 | m->vmp_precious = FALSE; |
567 | |
568 | /* |
569 | * Convert the fictitious page to a private shadow of |
570 | * the real page. |
571 | */ |
572 | assert(new_m->vmp_fictitious); |
573 | assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr); |
574 | new_m->vmp_fictitious = FALSE; |
575 | new_m->vmp_private = TRUE; |
576 | new_m->vmp_free_when_done = TRUE; |
577 | VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m)); |
578 | |
579 | vm_page_lockspin_queues(); |
580 | vm_page_wire(page: new_m, VM_KERN_MEMORY_NONE, TRUE); |
581 | vm_page_unlock_queues(); |
582 | |
583 | vm_page_insert_wired(page: new_m, object: new_object, offset: new_offset, VM_KERN_MEMORY_NONE); |
584 | assert(!new_m->vmp_wanted); |
585 | new_m->vmp_busy = FALSE; |
586 | } |
587 | |
588 | /* |
589 | * Routine: vm_pageout_initialize_page |
590 | * Purpose: |
591 | * Causes the specified page to be initialized in |
592 | * the appropriate memory object. This routine is used to push |
593 | * pages into a copy-object when they are modified in the |
594 | * permanent object. |
595 | * |
596 | * The page is moved to a temporary object and paged out. |
597 | * |
598 | * In/out conditions: |
599 | * The page in question must not be on any pageout queues. |
600 | * The object to which it belongs must be locked. |
601 | * The page must be busy, but not hold a paging reference. |
602 | * |
603 | * Implementation: |
604 | * Move this page to a completely new object. |
605 | */ |
606 | void |
607 | vm_pageout_initialize_page( |
608 | vm_page_t m) |
609 | { |
610 | vm_object_t object; |
611 | vm_object_offset_t paging_offset; |
612 | memory_object_t ; |
613 | |
614 | assert(VM_CONFIG_COMPRESSOR_IS_PRESENT); |
615 | |
616 | object = VM_PAGE_OBJECT(m); |
617 | |
618 | assert(m->vmp_busy); |
619 | assert(object->internal); |
620 | |
621 | /* |
622 | * Verify that we really want to clean this page |
623 | */ |
624 | assert(!m->vmp_absent); |
625 | assert(m->vmp_dirty); |
626 | |
627 | /* |
628 | * Create a paging reference to let us play with the object. |
629 | */ |
630 | paging_offset = m->vmp_offset + object->paging_offset; |
631 | |
632 | if (m->vmp_absent || VMP_ERROR_GET(m) || m->vmp_restart || (!m->vmp_dirty && !m->vmp_precious)) { |
633 | panic("reservation without pageout?" ); /* alan */ |
634 | |
635 | VM_PAGE_FREE(m); |
636 | vm_object_unlock(object); |
637 | |
638 | return; |
639 | } |
640 | |
641 | /* |
642 | * If there's no pager, then we can't clean the page. This should |
643 | * never happen since this should be a copy object and therefore not |
644 | * an external object, so the pager should always be there. |
645 | */ |
646 | |
647 | pager = object->pager; |
648 | |
649 | if (pager == MEMORY_OBJECT_NULL) { |
650 | panic("missing pager for copy object" ); |
651 | |
652 | VM_PAGE_FREE(m); |
653 | return; |
654 | } |
655 | |
656 | /* |
657 | * set the page for future call to vm_fault_list_request |
658 | */ |
659 | pmap_clear_modify(pn: VM_PAGE_GET_PHYS_PAGE(m)); |
660 | SET_PAGE_DIRTY(m, FALSE); |
661 | |
662 | /* |
663 | * keep the object from collapsing or terminating |
664 | */ |
665 | vm_object_paging_begin(object); |
666 | vm_object_unlock(object); |
667 | |
668 | /* |
669 | * Write the data to its pager. |
670 | * Note that the data is passed by naming the new object, |
671 | * not a virtual address; the pager interface has been |
672 | * manipulated to use the "internal memory" data type. |
673 | * [The object reference from its allocation is donated |
674 | * to the eventual recipient.] |
675 | */ |
676 | memory_object_data_initialize(memory_object: pager, offset: paging_offset, PAGE_SIZE); |
677 | |
678 | vm_object_lock(object); |
679 | vm_object_paging_end(object); |
680 | } |
681 | |
682 | |
683 | /* |
684 | * vm_pageout_cluster: |
685 | * |
686 | * Given a page, queue it to the appropriate I/O thread, |
687 | * which will page it out and attempt to clean adjacent pages |
688 | * in the same operation. |
689 | * |
690 | * The object and queues must be locked. We will take a |
691 | * paging reference to prevent deallocation or collapse when we |
692 | * release the object lock back at the call site. The I/O thread |
693 | * is responsible for consuming this reference |
694 | * |
695 | * The page must not be on any pageout queue. |
696 | */ |
697 | #if DEVELOPMENT || DEBUG |
698 | vmct_stats_t vmct_stats; |
699 | |
700 | int32_t vmct_active = 0; |
701 | uint64_t vm_compressor_epoch_start = 0; |
702 | uint64_t vm_compressor_epoch_stop = 0; |
703 | |
704 | typedef enum vmct_state_t { |
705 | VMCT_IDLE, |
706 | VMCT_AWAKENED, |
707 | VMCT_ACTIVE, |
708 | } vmct_state_t; |
709 | vmct_state_t vmct_state[MAX_COMPRESSOR_THREAD_COUNT]; |
710 | #endif |
711 | |
712 | |
713 | |
714 | static void |
715 | vm_pageout_cluster_to_queue(vm_page_t m, struct vm_pageout_queue *q) |
716 | { |
717 | vm_object_t object = VM_PAGE_OBJECT(m); |
718 | |
719 | VM_PAGE_CHECK(m); |
720 | LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); |
721 | vm_object_lock_assert_exclusive(object); |
722 | |
723 | /* |
724 | * Make sure it's OK to page this out. |
725 | */ |
726 | assert((m->vmp_dirty || m->vmp_precious) && (!VM_PAGE_WIRED(m))); |
727 | assert(!m->vmp_cleaning && !m->vmp_laundry); |
728 | assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q); |
729 | |
730 | /* |
731 | * protect the object from collapse or termination |
732 | */ |
733 | vm_object_activity_begin(object); |
734 | |
735 | |
736 | /* |
737 | * pgo_laundry count is tied to the laundry bit |
738 | */ |
739 | m->vmp_laundry = TRUE; |
740 | q->pgo_laundry++; |
741 | |
742 | m->vmp_q_state = VM_PAGE_ON_PAGEOUT_Q; |
743 | vm_page_queue_enter(&q->pgo_pending, m, vmp_pageq); |
744 | |
745 | // the benchmark queue will be woken up independently by the benchmark itself |
746 | if ( |
747 | object->internal == TRUE |
748 | #if DEVELOPMENT || DEBUG |
749 | && q != &vm_pageout_queue_benchmark |
750 | #endif |
751 | ) { |
752 | assert(VM_CONFIG_COMPRESSOR_IS_PRESENT); |
753 | m->vmp_busy = TRUE; |
754 | // Wake up the first compressor thread. It will wake subsequent threads if necessary. |
755 | sched_cond_signal(cond: &pgo_iothread_internal_state[0].pgo_wakeup, thread: pgo_iothread_internal_state[0].pgo_iothread); |
756 | } else { |
757 | sched_cond_signal(cond: &pgo_iothread_external_state.pgo_wakeup, thread: pgo_iothread_external_state.pgo_iothread); |
758 | } |
759 | VM_PAGE_CHECK(m); |
760 | } |
761 | |
762 | void |
763 | vm_pageout_cluster(vm_page_t m) |
764 | { |
765 | struct vm_pageout_queue *q; |
766 | vm_object_t object = VM_PAGE_OBJECT(m); |
767 | if (object->internal) { |
768 | q = &vm_pageout_queue_internal; |
769 | } else { |
770 | q = &vm_pageout_queue_external; |
771 | } |
772 | vm_pageout_cluster_to_queue(m, q); |
773 | } |
774 | |
775 | |
776 | /* |
777 | * A page is back from laundry or we are stealing it back from |
778 | * the laundering state. See if there are some pages waiting to |
779 | * go to laundry and if we can let some of them go now. |
780 | * |
781 | * Object and page queues must be locked. |
782 | */ |
783 | void |
784 | vm_pageout_throttle_up( |
785 | vm_page_t m) |
786 | { |
787 | struct vm_pageout_queue *q; |
788 | vm_object_t m_object; |
789 | |
790 | m_object = VM_PAGE_OBJECT(m); |
791 | |
792 | assert(m_object != VM_OBJECT_NULL); |
793 | assert(!is_kernel_object(m_object)); |
794 | |
795 | LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); |
796 | vm_object_lock_assert_exclusive(m_object); |
797 | |
798 | if (m_object->internal == TRUE) { |
799 | q = &vm_pageout_queue_internal; |
800 | } else { |
801 | q = &vm_pageout_queue_external; |
802 | } |
803 | |
804 | if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) { |
805 | vm_page_queue_remove(&q->pgo_pending, m, vmp_pageq); |
806 | m->vmp_q_state = VM_PAGE_NOT_ON_Q; |
807 | |
808 | VM_PAGE_ZERO_PAGEQ_ENTRY(m); |
809 | |
810 | vm_object_activity_end(m_object); |
811 | |
812 | VM_PAGEOUT_DEBUG(vm_page_steal_pageout_page, 1); |
813 | } |
814 | if (m->vmp_laundry == TRUE) { |
815 | m->vmp_laundry = FALSE; |
816 | q->pgo_laundry--; |
817 | |
818 | if (q->pgo_throttled == TRUE) { |
819 | q->pgo_throttled = FALSE; |
820 | thread_wakeup((event_t) &q->pgo_laundry); |
821 | } |
822 | if (q->pgo_draining == TRUE && q->pgo_laundry == 0) { |
823 | q->pgo_draining = FALSE; |
824 | thread_wakeup((event_t) (&q->pgo_laundry + 1)); |
825 | } |
826 | VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, 1); |
827 | } |
828 | } |
829 | |
830 | |
831 | static void |
832 | vm_pageout_throttle_up_batch( |
833 | struct vm_pageout_queue *q, |
834 | int batch_cnt) |
835 | { |
836 | LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); |
837 | |
838 | VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, batch_cnt); |
839 | |
840 | q->pgo_laundry -= batch_cnt; |
841 | |
842 | if (q->pgo_throttled == TRUE) { |
843 | q->pgo_throttled = FALSE; |
844 | thread_wakeup((event_t) &q->pgo_laundry); |
845 | } |
846 | if (q->pgo_draining == TRUE && q->pgo_laundry == 0) { |
847 | q->pgo_draining = FALSE; |
848 | thread_wakeup((event_t) (&q->pgo_laundry + 1)); |
849 | } |
850 | } |
851 | |
852 | |
853 | |
854 | /* |
855 | * VM memory pressure monitoring. |
856 | * |
857 | * vm_pageout_scan() keeps track of the number of pages it considers and |
858 | * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now]. |
859 | * |
860 | * compute_memory_pressure() is called every second from compute_averages() |
861 | * and moves "vm_pageout_stat_now" forward, to start accumulating the number |
862 | * of recalimed pages in a new vm_pageout_stat[] bucket. |
863 | * |
864 | * mach_vm_pressure_monitor() collects past statistics about memory pressure. |
865 | * The caller provides the number of seconds ("nsecs") worth of statistics |
866 | * it wants, up to 30 seconds. |
867 | * It computes the number of pages reclaimed in the past "nsecs" seconds and |
868 | * also returns the number of pages the system still needs to reclaim at this |
869 | * moment in time. |
870 | */ |
871 | #if DEVELOPMENT || DEBUG |
872 | #define VM_PAGEOUT_STAT_SIZE (30 * 8) + 1 |
873 | #else |
874 | #define VM_PAGEOUT_STAT_SIZE (1 * 8) + 1 |
875 | #endif |
876 | struct vm_pageout_stat { |
877 | unsigned long vm_page_active_count; |
878 | unsigned long vm_page_speculative_count; |
879 | unsigned long vm_page_inactive_count; |
880 | unsigned long vm_page_anonymous_count; |
881 | |
882 | unsigned long vm_page_free_count; |
883 | unsigned long vm_page_wire_count; |
884 | unsigned long vm_page_compressor_count; |
885 | |
886 | unsigned long vm_page_pages_compressed; |
887 | unsigned long vm_page_pageable_internal_count; |
888 | unsigned long vm_page_pageable_external_count; |
889 | unsigned long vm_page_xpmapped_external_count; |
890 | |
891 | unsigned int pages_grabbed; |
892 | unsigned int pages_freed; |
893 | |
894 | unsigned int pages_compressed; |
895 | unsigned int pages_grabbed_by_compressor; |
896 | unsigned int failed_compressions; |
897 | |
898 | unsigned int pages_evicted; |
899 | unsigned int pages_purged; |
900 | |
901 | unsigned int considered; |
902 | unsigned int considered_bq_internal; |
903 | unsigned int considered_bq_external; |
904 | |
905 | unsigned int skipped_external; |
906 | unsigned int skipped_internal; |
907 | unsigned int filecache_min_reactivations; |
908 | |
909 | unsigned int freed_speculative; |
910 | unsigned int freed_cleaned; |
911 | unsigned int freed_internal; |
912 | unsigned int freed_external; |
913 | |
914 | unsigned int cleaned_dirty_external; |
915 | unsigned int cleaned_dirty_internal; |
916 | |
917 | unsigned int inactive_referenced; |
918 | unsigned int inactive_nolock; |
919 | unsigned int reactivation_limit_exceeded; |
920 | unsigned int forced_inactive_reclaim; |
921 | |
922 | unsigned int throttled_internal_q; |
923 | unsigned int throttled_external_q; |
924 | |
925 | unsigned int phantom_ghosts_found; |
926 | unsigned int phantom_ghosts_added; |
927 | |
928 | unsigned int vm_page_realtime_count; |
929 | unsigned int forcereclaimed_sharedcache; |
930 | unsigned int forcereclaimed_realtime; |
931 | unsigned int protected_sharedcache; |
932 | unsigned int protected_realtime; |
933 | } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE]; |
934 | |
935 | unsigned int vm_pageout_stat_now = 0; |
936 | |
937 | #define VM_PAGEOUT_STAT_BEFORE(i) \ |
938 | (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1) |
939 | #define VM_PAGEOUT_STAT_AFTER(i) \ |
940 | (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1) |
941 | |
942 | #if VM_PAGE_BUCKETS_CHECK |
943 | int vm_page_buckets_check_interval = 80; /* in eighths of a second */ |
944 | #endif /* VM_PAGE_BUCKETS_CHECK */ |
945 | |
946 | |
947 | void |
948 | record_memory_pressure(void); |
949 | void |
950 | record_memory_pressure(void) |
951 | { |
952 | unsigned int vm_pageout_next; |
953 | |
954 | #if VM_PAGE_BUCKETS_CHECK |
955 | /* check the consistency of VM page buckets at regular interval */ |
956 | static int counter = 0; |
957 | if ((++counter % vm_page_buckets_check_interval) == 0) { |
958 | vm_page_buckets_check(); |
959 | } |
960 | #endif /* VM_PAGE_BUCKETS_CHECK */ |
961 | |
962 | vm_pageout_state.vm_memory_pressure = |
963 | vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_speculative + |
964 | vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_cleaned + |
965 | vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_internal + |
966 | vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_external; |
967 | |
968 | commpage_set_memory_pressure(pressure: (unsigned int)vm_pageout_state.vm_memory_pressure ); |
969 | |
970 | /* move "now" forward */ |
971 | vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now); |
972 | |
973 | bzero(s: &vm_pageout_stats[vm_pageout_next], n: sizeof(struct vm_pageout_stat)); |
974 | |
975 | vm_pageout_stat_now = vm_pageout_next; |
976 | } |
977 | |
978 | |
979 | /* |
980 | * IMPORTANT |
981 | * mach_vm_ctl_page_free_wanted() is called indirectly, via |
982 | * mach_vm_pressure_monitor(), when taking a stackshot. Therefore, |
983 | * it must be safe in the restricted stackshot context. Locks and/or |
984 | * blocking are not allowable. |
985 | */ |
986 | unsigned int |
987 | mach_vm_ctl_page_free_wanted(void) |
988 | { |
989 | unsigned int page_free_target, page_free_count, page_free_wanted; |
990 | |
991 | page_free_target = vm_page_free_target; |
992 | page_free_count = vm_page_free_count; |
993 | if (page_free_target > page_free_count) { |
994 | page_free_wanted = page_free_target - page_free_count; |
995 | } else { |
996 | page_free_wanted = 0; |
997 | } |
998 | |
999 | return page_free_wanted; |
1000 | } |
1001 | |
1002 | |
1003 | /* |
1004 | * IMPORTANT: |
1005 | * mach_vm_pressure_monitor() is called when taking a stackshot, with |
1006 | * wait_for_pressure FALSE, so that code path must remain safe in the |
1007 | * restricted stackshot context. No blocking or locks are allowable. |
1008 | * on that code path. |
1009 | */ |
1010 | |
1011 | kern_return_t |
1012 | mach_vm_pressure_monitor( |
1013 | boolean_t wait_for_pressure, |
1014 | unsigned int nsecs_monitored, |
1015 | unsigned int *pages_reclaimed_p, |
1016 | unsigned int *pages_wanted_p) |
1017 | { |
1018 | wait_result_t wr; |
1019 | unsigned int vm_pageout_then, vm_pageout_now; |
1020 | unsigned int pages_reclaimed; |
1021 | unsigned int units_of_monitor; |
1022 | |
1023 | units_of_monitor = 8 * nsecs_monitored; |
1024 | /* |
1025 | * We don't take the vm_page_queue_lock here because we don't want |
1026 | * vm_pressure_monitor() to get in the way of the vm_pageout_scan() |
1027 | * thread when it's trying to reclaim memory. We don't need fully |
1028 | * accurate monitoring anyway... |
1029 | */ |
1030 | |
1031 | if (wait_for_pressure) { |
1032 | /* wait until there's memory pressure */ |
1033 | while (vm_page_free_count >= vm_page_free_target) { |
1034 | wr = assert_wait(event: (event_t) &vm_page_free_wanted, |
1035 | THREAD_INTERRUPTIBLE); |
1036 | if (wr == THREAD_WAITING) { |
1037 | wr = thread_block(THREAD_CONTINUE_NULL); |
1038 | } |
1039 | if (wr == THREAD_INTERRUPTED) { |
1040 | return KERN_ABORTED; |
1041 | } |
1042 | if (wr == THREAD_AWAKENED) { |
1043 | /* |
1044 | * The memory pressure might have already |
1045 | * been relieved but let's not block again |
1046 | * and let's report that there was memory |
1047 | * pressure at some point. |
1048 | */ |
1049 | break; |
1050 | } |
1051 | } |
1052 | } |
1053 | |
1054 | /* provide the number of pages the system wants to reclaim */ |
1055 | if (pages_wanted_p != NULL) { |
1056 | *pages_wanted_p = mach_vm_ctl_page_free_wanted(); |
1057 | } |
1058 | |
1059 | if (pages_reclaimed_p == NULL) { |
1060 | return KERN_SUCCESS; |
1061 | } |
1062 | |
1063 | /* provide number of pages reclaimed in the last "nsecs_monitored" */ |
1064 | vm_pageout_now = vm_pageout_stat_now; |
1065 | pages_reclaimed = 0; |
1066 | for (vm_pageout_then = |
1067 | VM_PAGEOUT_STAT_BEFORE(vm_pageout_now); |
1068 | vm_pageout_then != vm_pageout_now && |
1069 | units_of_monitor-- != 0; |
1070 | vm_pageout_then = |
1071 | VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) { |
1072 | pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_speculative; |
1073 | pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_cleaned; |
1074 | pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_internal; |
1075 | pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_external; |
1076 | } |
1077 | *pages_reclaimed_p = pages_reclaimed; |
1078 | |
1079 | return KERN_SUCCESS; |
1080 | } |
1081 | |
1082 | |
1083 | |
1084 | #if DEVELOPMENT || DEBUG |
1085 | |
1086 | static void |
1087 | vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *, int); |
1088 | |
1089 | /* |
1090 | * condition variable used to make sure there is |
1091 | * only a single sweep going on at a time |
1092 | */ |
1093 | bool vm_pageout_disconnect_all_pages_active = false; |
1094 | |
1095 | void |
1096 | vm_pageout_disconnect_all_pages() |
1097 | { |
1098 | vm_page_lock_queues(); |
1099 | |
1100 | if (vm_pageout_disconnect_all_pages_active) { |
1101 | vm_page_unlock_queues(); |
1102 | return; |
1103 | } |
1104 | vm_pageout_disconnect_all_pages_active = true; |
1105 | |
1106 | vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled, |
1107 | vm_page_throttled_count); |
1108 | vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous, |
1109 | vm_page_anonymous_count); |
1110 | vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_inactive, |
1111 | (vm_page_inactive_count - vm_page_anonymous_count)); |
1112 | vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active, |
1113 | vm_page_active_count); |
1114 | #ifdef CONFIG_SECLUDED_MEMORY |
1115 | vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_secluded, |
1116 | vm_page_secluded_count); |
1117 | #endif /* CONFIG_SECLUDED_MEMORY */ |
1118 | vm_page_unlock_queues(); |
1119 | |
1120 | vm_pageout_disconnect_all_pages_active = false; |
1121 | } |
1122 | |
1123 | /* NB: assumes the page_queues lock is held on entry, returns with page queue lock held */ |
1124 | void |
1125 | vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *q, int qcount) |
1126 | { |
1127 | vm_page_t m; |
1128 | vm_object_t t_object = NULL; |
1129 | vm_object_t l_object = NULL; |
1130 | vm_object_t m_object = NULL; |
1131 | int delayed_unlock = 0; |
1132 | int try_failed_count = 0; |
1133 | int disconnected_count = 0; |
1134 | int paused_count = 0; |
1135 | int object_locked_count = 0; |
1136 | |
1137 | KDBG((MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS) | |
1138 | DBG_FUNC_START), |
1139 | q, qcount); |
1140 | |
1141 | while (qcount && !vm_page_queue_empty(q)) { |
1142 | LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); |
1143 | |
1144 | m = (vm_page_t) vm_page_queue_first(q); |
1145 | m_object = VM_PAGE_OBJECT(m); |
1146 | |
1147 | /* |
1148 | * check to see if we currently are working |
1149 | * with the same object... if so, we've |
1150 | * already got the lock |
1151 | */ |
1152 | if (m_object != l_object) { |
1153 | /* |
1154 | * the object associated with candidate page is |
1155 | * different from the one we were just working |
1156 | * with... dump the lock if we still own it |
1157 | */ |
1158 | if (l_object != NULL) { |
1159 | vm_object_unlock(l_object); |
1160 | l_object = NULL; |
1161 | } |
1162 | if (m_object != t_object) { |
1163 | try_failed_count = 0; |
1164 | } |
1165 | |
1166 | /* |
1167 | * Try to lock object; since we've alread got the |
1168 | * page queues lock, we can only 'try' for this one. |
1169 | * if the 'try' fails, we need to do a mutex_pause |
1170 | * to allow the owner of the object lock a chance to |
1171 | * run... |
1172 | */ |
1173 | if (!vm_object_lock_try_scan(m_object)) { |
1174 | if (try_failed_count > 20) { |
1175 | goto reenter_pg_on_q; |
1176 | } |
1177 | vm_page_unlock_queues(); |
1178 | mutex_pause(try_failed_count++); |
1179 | vm_page_lock_queues(); |
1180 | delayed_unlock = 0; |
1181 | |
1182 | paused_count++; |
1183 | |
1184 | t_object = m_object; |
1185 | continue; |
1186 | } |
1187 | object_locked_count++; |
1188 | |
1189 | l_object = m_object; |
1190 | } |
1191 | if (!m_object->alive || m->vmp_cleaning || m->vmp_laundry || |
1192 | m->vmp_busy || m->vmp_absent || VMP_ERROR_GET(m) || |
1193 | m->vmp_free_when_done) { |
1194 | /* |
1195 | * put it back on the head of its queue |
1196 | */ |
1197 | goto reenter_pg_on_q; |
1198 | } |
1199 | if (m->vmp_pmapped == TRUE) { |
1200 | pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)); |
1201 | |
1202 | disconnected_count++; |
1203 | } |
1204 | reenter_pg_on_q: |
1205 | vm_page_queue_remove(q, m, vmp_pageq); |
1206 | vm_page_queue_enter(q, m, vmp_pageq); |
1207 | |
1208 | qcount--; |
1209 | try_failed_count = 0; |
1210 | |
1211 | if (delayed_unlock++ > 128) { |
1212 | if (l_object != NULL) { |
1213 | vm_object_unlock(l_object); |
1214 | l_object = NULL; |
1215 | } |
1216 | lck_mtx_yield(&vm_page_queue_lock); |
1217 | delayed_unlock = 0; |
1218 | } |
1219 | } |
1220 | if (l_object != NULL) { |
1221 | vm_object_unlock(l_object); |
1222 | l_object = NULL; |
1223 | } |
1224 | |
1225 | KDBG((MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS) | |
1226 | DBG_FUNC_END), |
1227 | q, disconnected_count, object_locked_count, paused_count); |
1228 | } |
1229 | |
1230 | extern char* proc_best_name(struct proc* proc); |
1231 | |
1232 | int |
1233 | vm_toggle_task_selfdonate_pages(task_t task) |
1234 | { |
1235 | int state = 0; |
1236 | if (vm_page_donate_mode == VM_PAGE_DONATE_DISABLED) { |
1237 | printf("VM Donation mode is OFF on the system\n" ); |
1238 | return state; |
1239 | } |
1240 | if (task != kernel_task) { |
1241 | task_lock(task); |
1242 | if (!task->donates_own_pages) { |
1243 | printf("SELF DONATE for %s ON\n" , proc_best_name(get_bsdtask_info(task))); |
1244 | task->donates_own_pages = true; |
1245 | state = 1; |
1246 | } else if (task->donates_own_pages) { |
1247 | printf("SELF DONATE for %s OFF\n" , proc_best_name(get_bsdtask_info(task))); |
1248 | task->donates_own_pages = false; |
1249 | state = 0; |
1250 | } |
1251 | task_unlock(task); |
1252 | } |
1253 | return state; |
1254 | } |
1255 | #endif /* DEVELOPMENT || DEBUG */ |
1256 | |
1257 | void |
1258 | vm_task_set_selfdonate_pages(task_t task, bool donate) |
1259 | { |
1260 | assert(vm_page_donate_mode != VM_PAGE_DONATE_DISABLED); |
1261 | assert(task != kernel_task); |
1262 | |
1263 | task_lock(task); |
1264 | task->donates_own_pages = donate; |
1265 | task_unlock(task); |
1266 | } |
1267 | |
1268 | |
1269 | |
1270 | static size_t |
1271 | vm_pageout_page_queue(vm_page_queue_head_t *, size_t, bool); |
1272 | |
1273 | /* |
1274 | * condition variable used to make sure there is |
1275 | * only a single sweep going on at a time |
1276 | */ |
1277 | boolean_t vm_pageout_anonymous_pages_active = FALSE; |
1278 | |
1279 | |
1280 | void |
1281 | vm_pageout_anonymous_pages() |
1282 | { |
1283 | if (VM_CONFIG_COMPRESSOR_IS_PRESENT) { |
1284 | vm_page_lock_queues(); |
1285 | |
1286 | if (vm_pageout_anonymous_pages_active == TRUE) { |
1287 | vm_page_unlock_queues(); |
1288 | return; |
1289 | } |
1290 | vm_pageout_anonymous_pages_active = TRUE; |
1291 | vm_page_unlock_queues(); |
1292 | |
1293 | vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count, false); |
1294 | vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count, false); |
1295 | vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count, false); |
1296 | |
1297 | if (VM_CONFIG_SWAP_IS_PRESENT) { |
1298 | vm_consider_swapping(); |
1299 | } |
1300 | |
1301 | vm_page_lock_queues(); |
1302 | vm_pageout_anonymous_pages_active = FALSE; |
1303 | vm_page_unlock_queues(); |
1304 | } |
1305 | } |
1306 | |
1307 | |
1308 | size_t |
1309 | vm_pageout_page_queue(vm_page_queue_head_t *q, size_t qcount, bool perf_test) |
1310 | { |
1311 | vm_page_t m; |
1312 | vm_object_t t_object = NULL; |
1313 | vm_object_t l_object = NULL; |
1314 | vm_object_t m_object = NULL; |
1315 | int delayed_unlock = 0; |
1316 | int try_failed_count = 0; |
1317 | int refmod_state; |
1318 | int pmap_options; |
1319 | struct vm_pageout_queue *iq; |
1320 | ppnum_t phys_page; |
1321 | size_t pages_moved = 0; |
1322 | |
1323 | |
1324 | iq = &vm_pageout_queue_internal; |
1325 | |
1326 | vm_page_lock_queues(); |
1327 | |
1328 | #if DEVELOPMENT || DEBUG |
1329 | if (perf_test) { |
1330 | iq = &vm_pageout_queue_benchmark; |
1331 | // ensure the benchmark queue isn't throttled |
1332 | iq->pgo_maxlaundry = (unsigned int) qcount; |
1333 | } |
1334 | #endif /* DEVELOPMENT ||DEBUG */ |
1335 | |
1336 | while (qcount && !vm_page_queue_empty(q)) { |
1337 | LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); |
1338 | |
1339 | if (VM_PAGE_Q_THROTTLED(iq)) { |
1340 | if (l_object != NULL) { |
1341 | vm_object_unlock(l_object); |
1342 | l_object = NULL; |
1343 | } |
1344 | iq->pgo_draining = TRUE; |
1345 | |
1346 | assert_wait(event: (event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE); |
1347 | vm_page_unlock_queues(); |
1348 | |
1349 | thread_block(THREAD_CONTINUE_NULL); |
1350 | |
1351 | vm_page_lock_queues(); |
1352 | delayed_unlock = 0; |
1353 | continue; |
1354 | } |
1355 | m = (vm_page_t) vm_page_queue_first(q); |
1356 | m_object = VM_PAGE_OBJECT(m); |
1357 | |
1358 | /* |
1359 | * check to see if we currently are working |
1360 | * with the same object... if so, we've |
1361 | * already got the lock |
1362 | */ |
1363 | if (m_object != l_object) { |
1364 | if (!m_object->internal) { |
1365 | goto reenter_pg_on_q; |
1366 | } |
1367 | |
1368 | /* |
1369 | * the object associated with candidate page is |
1370 | * different from the one we were just working |
1371 | * with... dump the lock if we still own it |
1372 | */ |
1373 | if (l_object != NULL) { |
1374 | vm_object_unlock(l_object); |
1375 | l_object = NULL; |
1376 | } |
1377 | if (m_object != t_object) { |
1378 | try_failed_count = 0; |
1379 | } |
1380 | |
1381 | /* |
1382 | * Try to lock object; since we've alread got the |
1383 | * page queues lock, we can only 'try' for this one. |
1384 | * if the 'try' fails, we need to do a mutex_pause |
1385 | * to allow the owner of the object lock a chance to |
1386 | * run... |
1387 | */ |
1388 | if (!vm_object_lock_try_scan(m_object)) { |
1389 | if (try_failed_count > 20) { |
1390 | goto reenter_pg_on_q; |
1391 | } |
1392 | vm_page_unlock_queues(); |
1393 | mutex_pause(try_failed_count++); |
1394 | vm_page_lock_queues(); |
1395 | delayed_unlock = 0; |
1396 | |
1397 | t_object = m_object; |
1398 | continue; |
1399 | } |
1400 | l_object = m_object; |
1401 | } |
1402 | if (!m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || VMP_ERROR_GET(m) || m->vmp_free_when_done) { |
1403 | /* |
1404 | * page is not to be cleaned |
1405 | * put it back on the head of its queue |
1406 | */ |
1407 | goto reenter_pg_on_q; |
1408 | } |
1409 | phys_page = VM_PAGE_GET_PHYS_PAGE(m); |
1410 | |
1411 | if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) { |
1412 | refmod_state = pmap_get_refmod(pn: phys_page); |
1413 | |
1414 | if (refmod_state & VM_MEM_REFERENCED) { |
1415 | m->vmp_reference = TRUE; |
1416 | } |
1417 | if (refmod_state & VM_MEM_MODIFIED) { |
1418 | SET_PAGE_DIRTY(m, FALSE); |
1419 | } |
1420 | } |
1421 | if (m->vmp_reference == TRUE) { |
1422 | m->vmp_reference = FALSE; |
1423 | pmap_clear_refmod_options(pn: phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL); |
1424 | goto reenter_pg_on_q; |
1425 | } |
1426 | if (m->vmp_pmapped == TRUE) { |
1427 | if (m->vmp_dirty || m->vmp_precious) { |
1428 | pmap_options = PMAP_OPTIONS_COMPRESSOR; |
1429 | } else { |
1430 | pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED; |
1431 | } |
1432 | refmod_state = pmap_disconnect_options(phys: phys_page, options: pmap_options, NULL); |
1433 | if (refmod_state & VM_MEM_MODIFIED) { |
1434 | SET_PAGE_DIRTY(m, FALSE); |
1435 | } |
1436 | } |
1437 | |
1438 | if (!m->vmp_dirty && !m->vmp_precious) { |
1439 | vm_page_unlock_queues(); |
1440 | VM_PAGE_FREE(m); |
1441 | vm_page_lock_queues(); |
1442 | delayed_unlock = 0; |
1443 | |
1444 | goto next_pg; |
1445 | } |
1446 | if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) { |
1447 | if (!m_object->pager_initialized) { |
1448 | vm_page_unlock_queues(); |
1449 | |
1450 | vm_object_collapse(object: m_object, offset: (vm_object_offset_t) 0, TRUE); |
1451 | |
1452 | if (!m_object->pager_initialized) { |
1453 | vm_object_compressor_pager_create(object: m_object); |
1454 | } |
1455 | |
1456 | vm_page_lock_queues(); |
1457 | delayed_unlock = 0; |
1458 | } |
1459 | if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) { |
1460 | goto reenter_pg_on_q; |
1461 | } |
1462 | /* |
1463 | * vm_object_compressor_pager_create will drop the object lock |
1464 | * which means 'm' may no longer be valid to use |
1465 | */ |
1466 | continue; |
1467 | } |
1468 | |
1469 | if (!perf_test) { |
1470 | /* |
1471 | * we've already factored out pages in the laundry which |
1472 | * means this page can't be on the pageout queue so it's |
1473 | * safe to do the vm_page_queues_remove |
1474 | */ |
1475 | bool donate = (m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE); |
1476 | vm_page_queues_remove(mem: m, TRUE); |
1477 | if (donate) { |
1478 | /* |
1479 | * The compressor needs to see this bit to know |
1480 | * where this page needs to land. Also if stolen, |
1481 | * this bit helps put the page back in the right |
1482 | * special queue where it belongs. |
1483 | */ |
1484 | m->vmp_on_specialq = VM_PAGE_SPECIAL_Q_DONATE; |
1485 | } |
1486 | } else { |
1487 | vm_page_queue_remove(q, m, vmp_pageq); |
1488 | } |
1489 | |
1490 | LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); |
1491 | |
1492 | vm_pageout_cluster_to_queue(m, q: iq); |
1493 | |
1494 | pages_moved++; |
1495 | goto next_pg; |
1496 | |
1497 | reenter_pg_on_q: |
1498 | vm_page_queue_remove(q, m, vmp_pageq); |
1499 | vm_page_queue_enter(q, m, vmp_pageq); |
1500 | next_pg: |
1501 | qcount--; |
1502 | try_failed_count = 0; |
1503 | |
1504 | if (delayed_unlock++ > 128) { |
1505 | if (l_object != NULL) { |
1506 | vm_object_unlock(l_object); |
1507 | l_object = NULL; |
1508 | } |
1509 | lck_mtx_yield(lck: &vm_page_queue_lock); |
1510 | delayed_unlock = 0; |
1511 | } |
1512 | } |
1513 | if (l_object != NULL) { |
1514 | vm_object_unlock(l_object); |
1515 | l_object = NULL; |
1516 | } |
1517 | vm_page_unlock_queues(); |
1518 | return pages_moved; |
1519 | } |
1520 | |
1521 | |
1522 | |
1523 | /* |
1524 | * function in BSD to apply I/O throttle to the pageout thread |
1525 | */ |
1526 | extern void vm_pageout_io_throttle(void); |
1527 | |
1528 | #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj) \ |
1529 | MACRO_BEGIN \ |
1530 | /* \ |
1531 | * If a "reusable" page somehow made it back into \ |
1532 | * the active queue, it's been re-used and is not \ |
1533 | * quite re-usable. \ |
1534 | * If the VM object was "all_reusable", consider it \ |
1535 | * as "all re-used" instead of converting it to \ |
1536 | * "partially re-used", which could be expensive. \ |
1537 | */ \ |
1538 | assert(VM_PAGE_OBJECT((m)) == (obj)); \ |
1539 | if ((m)->vmp_reusable || \ |
1540 | (obj)->all_reusable) { \ |
1541 | vm_object_reuse_pages((obj), \ |
1542 | (m)->vmp_offset, \ |
1543 | (m)->vmp_offset + PAGE_SIZE_64, \ |
1544 | FALSE); \ |
1545 | } \ |
1546 | MACRO_END |
1547 | |
1548 | |
1549 | #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64 |
1550 | #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024 |
1551 | |
1552 | #define FCS_IDLE 0 |
1553 | #define FCS_DELAYED 1 |
1554 | #define FCS_DEADLOCK_DETECTED 2 |
1555 | |
1556 | struct flow_control { |
1557 | int state; |
1558 | mach_timespec_t ts; |
1559 | }; |
1560 | |
1561 | |
1562 | uint64_t vm_pageout_rejected_bq_internal = 0; |
1563 | uint64_t vm_pageout_rejected_bq_external = 0; |
1564 | uint64_t vm_pageout_skipped_bq_internal = 0; |
1565 | uint64_t vm_pageout_skipped_bq_external = 0; |
1566 | |
1567 | #define ANONS_GRABBED_LIMIT 2 |
1568 | |
1569 | |
1570 | #if 0 |
1571 | static void vm_pageout_delayed_unlock(int *, int *, vm_page_t *); |
1572 | #endif |
1573 | static void vm_pageout_prepare_to_block(vm_object_t *, int *, vm_page_t *, int *, int); |
1574 | |
1575 | #define VM_PAGEOUT_PB_NO_ACTION 0 |
1576 | #define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1 |
1577 | #define VM_PAGEOUT_PB_THREAD_YIELD 2 |
1578 | |
1579 | |
1580 | #if 0 |
1581 | static void |
1582 | vm_pageout_delayed_unlock(int *delayed_unlock, int *local_freed, vm_page_t *local_freeq) |
1583 | { |
1584 | if (*local_freeq) { |
1585 | vm_page_unlock_queues(); |
1586 | |
1587 | VM_DEBUG_CONSTANT_EVENT( |
1588 | vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, |
1589 | vm_page_free_count, 0, 0, 1); |
1590 | |
1591 | vm_page_free_list(*local_freeq, TRUE); |
1592 | |
1593 | VM_DEBUG_CONSTANT_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, |
1594 | vm_page_free_count, *local_freed, 0, 1); |
1595 | |
1596 | *local_freeq = NULL; |
1597 | *local_freed = 0; |
1598 | |
1599 | vm_page_lock_queues(); |
1600 | } else { |
1601 | lck_mtx_yield(&vm_page_queue_lock); |
1602 | } |
1603 | *delayed_unlock = 1; |
1604 | } |
1605 | #endif |
1606 | |
1607 | |
1608 | static void |
1609 | vm_pageout_prepare_to_block(vm_object_t *object, int *delayed_unlock, |
1610 | vm_page_t *local_freeq, int *local_freed, int action) |
1611 | { |
1612 | vm_page_unlock_queues(); |
1613 | |
1614 | if (*object != NULL) { |
1615 | vm_object_unlock(*object); |
1616 | *object = NULL; |
1617 | } |
1618 | if (*local_freeq) { |
1619 | vm_page_free_list(mem: *local_freeq, TRUE); |
1620 | |
1621 | *local_freeq = NULL; |
1622 | *local_freed = 0; |
1623 | } |
1624 | *delayed_unlock = 1; |
1625 | |
1626 | switch (action) { |
1627 | case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER: |
1628 | vm_consider_waking_compactor_swapper(); |
1629 | break; |
1630 | case VM_PAGEOUT_PB_THREAD_YIELD: |
1631 | thread_yield_internal(interval: 1); |
1632 | break; |
1633 | case VM_PAGEOUT_PB_NO_ACTION: |
1634 | default: |
1635 | break; |
1636 | } |
1637 | vm_page_lock_queues(); |
1638 | } |
1639 | |
1640 | |
1641 | static struct vm_pageout_vminfo last; |
1642 | |
1643 | uint64_t last_vm_page_pages_grabbed = 0; |
1644 | |
1645 | extern uint32_t c_segment_pages_compressed; |
1646 | |
1647 | extern uint64_t ; |
1648 | extern struct memory_object_pager_ops ; |
1649 | |
1650 | void |
1651 | update_vm_info(void) |
1652 | { |
1653 | unsigned long tmp; |
1654 | uint64_t tmp64; |
1655 | |
1656 | vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count = vm_page_active_count; |
1657 | vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count = vm_page_speculative_count; |
1658 | vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count = vm_page_inactive_count; |
1659 | vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count = vm_page_anonymous_count; |
1660 | |
1661 | vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count = vm_page_free_count; |
1662 | vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count = vm_page_wire_count; |
1663 | vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count = VM_PAGE_COMPRESSOR_COUNT; |
1664 | |
1665 | vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed = c_segment_pages_compressed; |
1666 | vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count = vm_page_pageable_internal_count; |
1667 | vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count = vm_page_pageable_external_count; |
1668 | vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count = vm_page_xpmapped_external_count; |
1669 | vm_pageout_stats[vm_pageout_stat_now].vm_page_realtime_count = vm_page_realtime_count; |
1670 | |
1671 | tmp = vm_pageout_vminfo.vm_pageout_considered_page; |
1672 | vm_pageout_stats[vm_pageout_stat_now].considered = (unsigned int)(tmp - last.vm_pageout_considered_page); |
1673 | last.vm_pageout_considered_page = tmp; |
1674 | |
1675 | tmp64 = vm_pageout_vminfo.vm_pageout_compressions; |
1676 | vm_pageout_stats[vm_pageout_stat_now].pages_compressed = (unsigned int)(tmp64 - last.vm_pageout_compressions); |
1677 | last.vm_pageout_compressions = tmp64; |
1678 | |
1679 | tmp = vm_pageout_vminfo.vm_compressor_failed; |
1680 | vm_pageout_stats[vm_pageout_stat_now].failed_compressions = (unsigned int)(tmp - last.vm_compressor_failed); |
1681 | last.vm_compressor_failed = tmp; |
1682 | |
1683 | tmp64 = vm_pageout_vminfo.vm_compressor_pages_grabbed; |
1684 | vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor = (unsigned int)(tmp64 - last.vm_compressor_pages_grabbed); |
1685 | last.vm_compressor_pages_grabbed = tmp64; |
1686 | |
1687 | tmp = vm_pageout_vminfo.vm_phantom_cache_found_ghost; |
1688 | vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found = (unsigned int)(tmp - last.vm_phantom_cache_found_ghost); |
1689 | last.vm_phantom_cache_found_ghost = tmp; |
1690 | |
1691 | tmp = vm_pageout_vminfo.vm_phantom_cache_added_ghost; |
1692 | vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added = (unsigned int)(tmp - last.vm_phantom_cache_added_ghost); |
1693 | last.vm_phantom_cache_added_ghost = tmp; |
1694 | |
1695 | tmp64 = counter_load(&vm_page_grab_count); |
1696 | vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp64 - last_vm_page_pages_grabbed); |
1697 | last_vm_page_pages_grabbed = tmp64; |
1698 | |
1699 | tmp = vm_pageout_vminfo.vm_page_pages_freed; |
1700 | vm_pageout_stats[vm_pageout_stat_now].pages_freed = (unsigned int)(tmp - last.vm_page_pages_freed); |
1701 | last.vm_page_pages_freed = tmp; |
1702 | |
1703 | if (vm_pageout_stats[vm_pageout_stat_now].considered) { |
1704 | tmp = vm_pageout_vminfo.vm_pageout_pages_evicted; |
1705 | vm_pageout_stats[vm_pageout_stat_now].pages_evicted = (unsigned int)(tmp - last.vm_pageout_pages_evicted); |
1706 | last.vm_pageout_pages_evicted = tmp; |
1707 | |
1708 | tmp = vm_pageout_vminfo.vm_pageout_pages_purged; |
1709 | vm_pageout_stats[vm_pageout_stat_now].pages_purged = (unsigned int)(tmp - last.vm_pageout_pages_purged); |
1710 | last.vm_pageout_pages_purged = tmp; |
1711 | |
1712 | tmp = vm_pageout_vminfo.vm_pageout_freed_speculative; |
1713 | vm_pageout_stats[vm_pageout_stat_now].freed_speculative = (unsigned int)(tmp - last.vm_pageout_freed_speculative); |
1714 | last.vm_pageout_freed_speculative = tmp; |
1715 | |
1716 | tmp = vm_pageout_vminfo.vm_pageout_freed_external; |
1717 | vm_pageout_stats[vm_pageout_stat_now].freed_external = (unsigned int)(tmp - last.vm_pageout_freed_external); |
1718 | last.vm_pageout_freed_external = tmp; |
1719 | |
1720 | tmp = vm_pageout_vminfo.vm_pageout_inactive_referenced; |
1721 | vm_pageout_stats[vm_pageout_stat_now].inactive_referenced = (unsigned int)(tmp - last.vm_pageout_inactive_referenced); |
1722 | last.vm_pageout_inactive_referenced = tmp; |
1723 | |
1724 | tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external; |
1725 | vm_pageout_stats[vm_pageout_stat_now].throttled_external_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_external); |
1726 | last.vm_pageout_scan_inactive_throttled_external = tmp; |
1727 | |
1728 | tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_external; |
1729 | vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_external); |
1730 | last.vm_pageout_inactive_dirty_external = tmp; |
1731 | |
1732 | tmp = vm_pageout_vminfo.vm_pageout_freed_cleaned; |
1733 | vm_pageout_stats[vm_pageout_stat_now].freed_cleaned = (unsigned int)(tmp - last.vm_pageout_freed_cleaned); |
1734 | last.vm_pageout_freed_cleaned = tmp; |
1735 | |
1736 | tmp = vm_pageout_vminfo.vm_pageout_inactive_nolock; |
1737 | vm_pageout_stats[vm_pageout_stat_now].inactive_nolock = (unsigned int)(tmp - last.vm_pageout_inactive_nolock); |
1738 | last.vm_pageout_inactive_nolock = tmp; |
1739 | |
1740 | tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal; |
1741 | vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_internal); |
1742 | last.vm_pageout_scan_inactive_throttled_internal = tmp; |
1743 | |
1744 | tmp = vm_pageout_vminfo.vm_pageout_skipped_external; |
1745 | vm_pageout_stats[vm_pageout_stat_now].skipped_external = (unsigned int)(tmp - last.vm_pageout_skipped_external); |
1746 | last.vm_pageout_skipped_external = tmp; |
1747 | |
1748 | tmp = vm_pageout_vminfo.vm_pageout_skipped_internal; |
1749 | vm_pageout_stats[vm_pageout_stat_now].skipped_internal = (unsigned int)(tmp - last.vm_pageout_skipped_internal); |
1750 | last.vm_pageout_skipped_internal = tmp; |
1751 | |
1752 | tmp = vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded; |
1753 | vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded = (unsigned int)(tmp - last.vm_pageout_reactivation_limit_exceeded); |
1754 | last.vm_pageout_reactivation_limit_exceeded = tmp; |
1755 | |
1756 | tmp = vm_pageout_vminfo.vm_pageout_inactive_force_reclaim; |
1757 | vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim = (unsigned int)(tmp - last.vm_pageout_inactive_force_reclaim); |
1758 | last.vm_pageout_inactive_force_reclaim = tmp; |
1759 | |
1760 | tmp = vm_pageout_vminfo.vm_pageout_freed_internal; |
1761 | vm_pageout_stats[vm_pageout_stat_now].freed_internal = (unsigned int)(tmp - last.vm_pageout_freed_internal); |
1762 | last.vm_pageout_freed_internal = tmp; |
1763 | |
1764 | tmp = vm_pageout_vminfo.vm_pageout_considered_bq_internal; |
1765 | vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal = (unsigned int)(tmp - last.vm_pageout_considered_bq_internal); |
1766 | last.vm_pageout_considered_bq_internal = tmp; |
1767 | |
1768 | tmp = vm_pageout_vminfo.vm_pageout_considered_bq_external; |
1769 | vm_pageout_stats[vm_pageout_stat_now].considered_bq_external = (unsigned int)(tmp - last.vm_pageout_considered_bq_external); |
1770 | last.vm_pageout_considered_bq_external = tmp; |
1771 | |
1772 | tmp = vm_pageout_vminfo.vm_pageout_filecache_min_reactivated; |
1773 | vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations = (unsigned int)(tmp - last.vm_pageout_filecache_min_reactivated); |
1774 | last.vm_pageout_filecache_min_reactivated = tmp; |
1775 | |
1776 | tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_internal; |
1777 | vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_internal); |
1778 | last.vm_pageout_inactive_dirty_internal = tmp; |
1779 | |
1780 | tmp = vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache; |
1781 | vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_sharedcache = (unsigned int)(tmp - last.vm_pageout_forcereclaimed_sharedcache); |
1782 | last.vm_pageout_forcereclaimed_sharedcache = tmp; |
1783 | |
1784 | tmp = vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime; |
1785 | vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_realtime = (unsigned int)(tmp - last.vm_pageout_forcereclaimed_realtime); |
1786 | last.vm_pageout_forcereclaimed_realtime = tmp; |
1787 | |
1788 | tmp = vm_pageout_vminfo.vm_pageout_protected_sharedcache; |
1789 | vm_pageout_stats[vm_pageout_stat_now].protected_sharedcache = (unsigned int)(tmp - last.vm_pageout_protected_sharedcache); |
1790 | last.vm_pageout_protected_sharedcache = tmp; |
1791 | |
1792 | tmp = vm_pageout_vminfo.vm_pageout_protected_realtime; |
1793 | vm_pageout_stats[vm_pageout_stat_now].protected_realtime = (unsigned int)(tmp - last.vm_pageout_protected_realtime); |
1794 | last.vm_pageout_protected_realtime = tmp; |
1795 | } |
1796 | |
1797 | KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO1)) | DBG_FUNC_NONE, |
1798 | vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count, |
1799 | vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count, |
1800 | vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count, |
1801 | vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count, |
1802 | 0); |
1803 | |
1804 | KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO2)) | DBG_FUNC_NONE, |
1805 | vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count, |
1806 | vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count, |
1807 | vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count, |
1808 | 0, |
1809 | 0); |
1810 | |
1811 | KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO3)) | DBG_FUNC_NONE, |
1812 | vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed, |
1813 | vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count, |
1814 | vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count, |
1815 | vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count, |
1816 | 0); |
1817 | |
1818 | if (vm_pageout_stats[vm_pageout_stat_now].considered || |
1819 | vm_pageout_stats[vm_pageout_stat_now].pages_compressed || |
1820 | vm_pageout_stats[vm_pageout_stat_now].failed_compressions) { |
1821 | KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO4)) | DBG_FUNC_NONE, |
1822 | vm_pageout_stats[vm_pageout_stat_now].considered, |
1823 | vm_pageout_stats[vm_pageout_stat_now].freed_speculative, |
1824 | vm_pageout_stats[vm_pageout_stat_now].freed_external, |
1825 | vm_pageout_stats[vm_pageout_stat_now].inactive_referenced, |
1826 | 0); |
1827 | |
1828 | KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO5)) | DBG_FUNC_NONE, |
1829 | vm_pageout_stats[vm_pageout_stat_now].throttled_external_q, |
1830 | vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external, |
1831 | vm_pageout_stats[vm_pageout_stat_now].freed_cleaned, |
1832 | vm_pageout_stats[vm_pageout_stat_now].inactive_nolock, |
1833 | 0); |
1834 | |
1835 | KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO6)) | DBG_FUNC_NONE, |
1836 | vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q, |
1837 | vm_pageout_stats[vm_pageout_stat_now].pages_compressed, |
1838 | vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor, |
1839 | vm_pageout_stats[vm_pageout_stat_now].skipped_external, |
1840 | 0); |
1841 | |
1842 | KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO7)) | DBG_FUNC_NONE, |
1843 | vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded, |
1844 | vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim, |
1845 | vm_pageout_stats[vm_pageout_stat_now].failed_compressions, |
1846 | vm_pageout_stats[vm_pageout_stat_now].freed_internal, |
1847 | 0); |
1848 | |
1849 | KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO8)) | DBG_FUNC_NONE, |
1850 | vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal, |
1851 | vm_pageout_stats[vm_pageout_stat_now].considered_bq_external, |
1852 | vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations, |
1853 | vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal, |
1854 | 0); |
1855 | |
1856 | KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO10)) | DBG_FUNC_NONE, |
1857 | vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_sharedcache, |
1858 | vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_realtime, |
1859 | vm_pageout_stats[vm_pageout_stat_now].protected_sharedcache, |
1860 | vm_pageout_stats[vm_pageout_stat_now].protected_realtime, |
1861 | 0); |
1862 | } |
1863 | KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO9)) | DBG_FUNC_NONE, |
1864 | vm_pageout_stats[vm_pageout_stat_now].pages_grabbed, |
1865 | vm_pageout_stats[vm_pageout_stat_now].pages_freed, |
1866 | vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found, |
1867 | vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added, |
1868 | 0); |
1869 | |
1870 | record_memory_pressure(); |
1871 | } |
1872 | |
1873 | extern boolean_t hibernation_vmqueues_inspection; |
1874 | |
1875 | /* |
1876 | * Return values for functions called by vm_pageout_scan |
1877 | * that control its flow. |
1878 | * |
1879 | * PROCEED -- vm_pageout_scan will keep making forward progress. |
1880 | * DONE_RETURN -- page demand satisfied, work is done -> vm_pageout_scan returns. |
1881 | * NEXT_ITERATION -- restart the 'for' loop in vm_pageout_scan aka continue. |
1882 | */ |
1883 | |
1884 | #define VM_PAGEOUT_SCAN_PROCEED (0) |
1885 | #define VM_PAGEOUT_SCAN_DONE_RETURN (1) |
1886 | #define VM_PAGEOUT_SCAN_NEXT_ITERATION (2) |
1887 | |
1888 | /* |
1889 | * This function is called only from vm_pageout_scan and |
1890 | * it moves overflow secluded pages (one-at-a-time) to the |
1891 | * batched 'local' free Q or active Q. |
1892 | */ |
1893 | static void |
1894 | vps_deal_with_secluded_page_overflow(vm_page_t *local_freeq, int *local_freed) |
1895 | { |
1896 | #if CONFIG_SECLUDED_MEMORY |
1897 | /* |
1898 | * Deal with secluded_q overflow. |
1899 | */ |
1900 | if (vm_page_secluded_count > vm_page_secluded_target) { |
1901 | vm_page_t secluded_page; |
1902 | |
1903 | /* |
1904 | * SECLUDED_AGING_BEFORE_ACTIVE: |
1905 | * Excess secluded pages go to the active queue and |
1906 | * will later go to the inactive queue. |
1907 | */ |
1908 | assert((vm_page_secluded_count_free + |
1909 | vm_page_secluded_count_inuse) == |
1910 | vm_page_secluded_count); |
1911 | secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded); |
1912 | assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q); |
1913 | |
1914 | vm_page_queues_remove(secluded_page, FALSE); |
1915 | assert(!secluded_page->vmp_fictitious); |
1916 | assert(!VM_PAGE_WIRED(secluded_page)); |
1917 | |
1918 | if (secluded_page->vmp_object == 0) { |
1919 | /* transfer to free queue */ |
1920 | assert(secluded_page->vmp_busy); |
1921 | secluded_page->vmp_snext = *local_freeq; |
1922 | *local_freeq = secluded_page; |
1923 | *local_freed += 1; |
1924 | } else { |
1925 | /* transfer to head of active queue */ |
1926 | vm_page_enqueue_active(secluded_page, FALSE); |
1927 | secluded_page = VM_PAGE_NULL; |
1928 | } |
1929 | } |
1930 | #else /* CONFIG_SECLUDED_MEMORY */ |
1931 | |
1932 | #pragma unused(local_freeq) |
1933 | #pragma unused(local_freed) |
1934 | |
1935 | return; |
1936 | |
1937 | #endif /* CONFIG_SECLUDED_MEMORY */ |
1938 | } |
1939 | |
1940 | /* |
1941 | * This function is called only from vm_pageout_scan and |
1942 | * it initializes the loop targets for vm_pageout_scan(). |
1943 | */ |
1944 | static void |
1945 | vps_init_page_targets(void) |
1946 | { |
1947 | /* |
1948 | * LD TODO: Other page targets should be calculated here too. |
1949 | */ |
1950 | vm_page_anonymous_min = vm_page_inactive_target / 20; |
1951 | |
1952 | if (vm_pageout_state.vm_page_speculative_percentage > 50) { |
1953 | vm_pageout_state.vm_page_speculative_percentage = 50; |
1954 | } else if (vm_pageout_state.vm_page_speculative_percentage <= 0) { |
1955 | vm_pageout_state.vm_page_speculative_percentage = 1; |
1956 | } |
1957 | |
1958 | vm_pageout_state.vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count + |
1959 | vm_page_inactive_count); |
1960 | } |
1961 | |
1962 | /* |
1963 | * This function is called only from vm_pageout_scan and |
1964 | * it purges a single VM object at-a-time and will either |
1965 | * make vm_pageout_scan() restart the loop or keeping moving forward. |
1966 | */ |
1967 | static int |
1968 | vps_purge_object() |
1969 | { |
1970 | int force_purge; |
1971 | |
1972 | assert(available_for_purge >= 0); |
1973 | force_purge = 0; /* no force-purging */ |
1974 | |
1975 | #if VM_PRESSURE_EVENTS |
1976 | vm_pressure_level_t pressure_level; |
1977 | |
1978 | pressure_level = memorystatus_vm_pressure_level; |
1979 | |
1980 | if (pressure_level > kVMPressureNormal) { |
1981 | if (pressure_level >= kVMPressureCritical) { |
1982 | force_purge = vm_pageout_state.memorystatus_purge_on_critical; |
1983 | } else if (pressure_level >= kVMPressureUrgent) { |
1984 | force_purge = vm_pageout_state.memorystatus_purge_on_urgent; |
1985 | } else if (pressure_level >= kVMPressureWarning) { |
1986 | force_purge = vm_pageout_state.memorystatus_purge_on_warning; |
1987 | } |
1988 | } |
1989 | #endif /* VM_PRESSURE_EVENTS */ |
1990 | |
1991 | if (available_for_purge || force_purge) { |
1992 | memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START); |
1993 | |
1994 | VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0); |
1995 | if (vm_purgeable_object_purge_one(force_purge_below_group: force_purge, flags: C_DONT_BLOCK)) { |
1996 | VM_PAGEOUT_DEBUG(vm_pageout_purged_objects, 1); |
1997 | VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0); |
1998 | memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END); |
1999 | |
2000 | return VM_PAGEOUT_SCAN_NEXT_ITERATION; |
2001 | } |
2002 | VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1); |
2003 | memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END); |
2004 | } |
2005 | |
2006 | return VM_PAGEOUT_SCAN_PROCEED; |
2007 | } |
2008 | |
2009 | /* |
2010 | * This function is called only from vm_pageout_scan and |
2011 | * it will try to age the next speculative Q if the oldest |
2012 | * one is empty. |
2013 | */ |
2014 | static int |
2015 | vps_age_speculative_queue(boolean_t force_speculative_aging) |
2016 | { |
2017 | #define DELAY_SPECULATIVE_AGE 1000 |
2018 | |
2019 | /* |
2020 | * try to pull pages from the aging bins... |
2021 | * see vm_page.h for an explanation of how |
2022 | * this mechanism works |
2023 | */ |
2024 | boolean_t can_steal = FALSE; |
2025 | int num_scanned_queues; |
2026 | static int delay_speculative_age = 0; /* depends the # of times we go through the main pageout_scan loop.*/ |
2027 | mach_timespec_t ts; |
2028 | struct vm_speculative_age_q *aq; |
2029 | struct vm_speculative_age_q *sq; |
2030 | |
2031 | sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q]; |
2032 | |
2033 | aq = &vm_page_queue_speculative[speculative_steal_index]; |
2034 | |
2035 | num_scanned_queues = 0; |
2036 | while (vm_page_queue_empty(&aq->age_q) && |
2037 | num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) { |
2038 | speculative_steal_index++; |
2039 | |
2040 | if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) { |
2041 | speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q; |
2042 | } |
2043 | |
2044 | aq = &vm_page_queue_speculative[speculative_steal_index]; |
2045 | } |
2046 | |
2047 | if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) { |
2048 | /* |
2049 | * XXX We've scanned all the speculative |
2050 | * queues but still haven't found one |
2051 | * that is not empty, even though |
2052 | * vm_page_speculative_count is not 0. |
2053 | */ |
2054 | if (!vm_page_queue_empty(&sq->age_q)) { |
2055 | return VM_PAGEOUT_SCAN_NEXT_ITERATION; |
2056 | } |
2057 | #if DEVELOPMENT || DEBUG |
2058 | panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty" , vm_page_speculative_count); |
2059 | #endif |
2060 | /* readjust... */ |
2061 | vm_page_speculative_count = 0; |
2062 | /* ... and continue */ |
2063 | return VM_PAGEOUT_SCAN_NEXT_ITERATION; |
2064 | } |
2065 | |
2066 | if (vm_page_speculative_count > vm_pageout_state.vm_page_speculative_target || force_speculative_aging == TRUE) { |
2067 | can_steal = TRUE; |
2068 | } else { |
2069 | if (!delay_speculative_age) { |
2070 | mach_timespec_t ts_fully_aged; |
2071 | |
2072 | ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) / 1000; |
2073 | ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) % 1000) |
2074 | * 1000 * NSEC_PER_USEC; |
2075 | |
2076 | ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts); |
2077 | |
2078 | clock_sec_t sec; |
2079 | clock_nsec_t nsec; |
2080 | clock_get_system_nanotime(secs: &sec, nanosecs: &nsec); |
2081 | ts.tv_sec = (unsigned int) sec; |
2082 | ts.tv_nsec = nsec; |
2083 | |
2084 | if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0) { |
2085 | can_steal = TRUE; |
2086 | } else { |
2087 | delay_speculative_age++; |
2088 | } |
2089 | } else { |
2090 | delay_speculative_age++; |
2091 | if (delay_speculative_age == DELAY_SPECULATIVE_AGE) { |
2092 | delay_speculative_age = 0; |
2093 | } |
2094 | } |
2095 | } |
2096 | if (can_steal == TRUE) { |
2097 | vm_page_speculate_ageit(aq); |
2098 | } |
2099 | |
2100 | return VM_PAGEOUT_SCAN_PROCEED; |
2101 | } |
2102 | |
2103 | /* |
2104 | * This function is called only from vm_pageout_scan and |
2105 | * it evicts a single VM object from the cache. |
2106 | */ |
2107 | static int inline |
2108 | vps_object_cache_evict(vm_object_t *object_to_unlock) |
2109 | { |
2110 | static int cache_evict_throttle = 0; |
2111 | struct vm_speculative_age_q *sq; |
2112 | |
2113 | sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q]; |
2114 | |
2115 | if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0) { |
2116 | int pages_evicted; |
2117 | |
2118 | if (*object_to_unlock != NULL) { |
2119 | vm_object_unlock(*object_to_unlock); |
2120 | *object_to_unlock = NULL; |
2121 | } |
2122 | KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0); |
2123 | |
2124 | pages_evicted = vm_object_cache_evict(100, 10); |
2125 | |
2126 | KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_END, pages_evicted, 0, 0, 0, 0); |
2127 | |
2128 | if (pages_evicted) { |
2129 | vm_pageout_vminfo.vm_pageout_pages_evicted += pages_evicted; |
2130 | |
2131 | VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE, |
2132 | vm_page_free_count, pages_evicted, vm_pageout_vminfo.vm_pageout_pages_evicted, 0); |
2133 | memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE); |
2134 | |
2135 | /* |
2136 | * we just freed up to 100 pages, |
2137 | * so go back to the top of the main loop |
2138 | * and re-evaulate the memory situation |
2139 | */ |
2140 | return VM_PAGEOUT_SCAN_NEXT_ITERATION; |
2141 | } else { |
2142 | cache_evict_throttle = 1000; |
2143 | } |
2144 | } |
2145 | if (cache_evict_throttle) { |
2146 | cache_evict_throttle--; |
2147 | } |
2148 | |
2149 | return VM_PAGEOUT_SCAN_PROCEED; |
2150 | } |
2151 | |
2152 | |
2153 | /* |
2154 | * This function is called only from vm_pageout_scan and |
2155 | * it calculates the filecache min. that needs to be maintained |
2156 | * as we start to steal pages. |
2157 | */ |
2158 | static void |
2159 | vps_calculate_filecache_min(void) |
2160 | { |
2161 | int divisor = vm_pageout_state.vm_page_filecache_min_divisor; |
2162 | |
2163 | #if CONFIG_JETSAM |
2164 | /* |
2165 | * don't let the filecache_min fall below 15% of available memory |
2166 | * on systems with an active compressor that isn't nearing its |
2167 | * limits w/r to accepting new data |
2168 | * |
2169 | * on systems w/o the compressor/swapper, the filecache is always |
2170 | * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY |
2171 | * since most (if not all) of the anonymous pages are in the |
2172 | * throttled queue (which isn't counted as available) which |
2173 | * effectively disables this filter |
2174 | */ |
2175 | if (vm_compressor_low_on_space() || divisor == 0) { |
2176 | vm_pageout_state.vm_page_filecache_min = 0; |
2177 | } else { |
2178 | vm_pageout_state.vm_page_filecache_min = |
2179 | ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor; |
2180 | } |
2181 | #else |
2182 | if (vm_compressor_out_of_space() || divisor == 0) { |
2183 | vm_pageout_state.vm_page_filecache_min = 0; |
2184 | } else { |
2185 | /* |
2186 | * don't let the filecache_min fall below the specified critical level |
2187 | */ |
2188 | vm_pageout_state.vm_page_filecache_min = |
2189 | ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor; |
2190 | } |
2191 | #endif |
2192 | if (vm_page_free_count < (vm_page_free_reserved / 4)) { |
2193 | vm_pageout_state.vm_page_filecache_min = 0; |
2194 | } |
2195 | } |
2196 | |
2197 | /* |
2198 | * This function is called only from vm_pageout_scan and |
2199 | * it updates the flow control time to detect if VM pageoutscan |
2200 | * isn't making progress. |
2201 | */ |
2202 | static void |
2203 | vps_flow_control_reset_deadlock_timer(struct flow_control *flow_control) |
2204 | { |
2205 | mach_timespec_t ts; |
2206 | clock_sec_t sec; |
2207 | clock_nsec_t nsec; |
2208 | |
2209 | ts.tv_sec = vm_pageout_state.vm_pageout_deadlock_wait / 1000; |
2210 | ts.tv_nsec = (vm_pageout_state.vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC; |
2211 | clock_get_system_nanotime(secs: &sec, nanosecs: &nsec); |
2212 | flow_control->ts.tv_sec = (unsigned int) sec; |
2213 | flow_control->ts.tv_nsec = nsec; |
2214 | ADD_MACH_TIMESPEC(&flow_control->ts, &ts); |
2215 | |
2216 | flow_control->state = FCS_DELAYED; |
2217 | |
2218 | vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal++; |
2219 | } |
2220 | |
2221 | /* |
2222 | * This function is called only from vm_pageout_scan and |
2223 | * it is the flow control logic of VM pageout scan which |
2224 | * controls if it should block and for how long. |
2225 | * Any blocking of vm_pageout_scan happens ONLY in this function. |
2226 | */ |
2227 | static int |
2228 | vps_flow_control(struct flow_control *flow_control, int *anons_grabbed, vm_object_t *object, int *delayed_unlock, |
2229 | vm_page_t *local_freeq, int *local_freed, int *vm_pageout_deadlock_target, unsigned int inactive_burst_count) |
2230 | { |
2231 | boolean_t exceeded_burst_throttle = FALSE; |
2232 | unsigned int msecs = 0; |
2233 | uint32_t inactive_external_count; |
2234 | mach_timespec_t ts; |
2235 | struct vm_pageout_queue *iq; |
2236 | struct vm_pageout_queue *eq; |
2237 | struct vm_speculative_age_q *sq; |
2238 | |
2239 | iq = &vm_pageout_queue_internal; |
2240 | eq = &vm_pageout_queue_external; |
2241 | sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q]; |
2242 | |
2243 | /* |
2244 | * Sometimes we have to pause: |
2245 | * 1) No inactive pages - nothing to do. |
2246 | * 2) Loop control - no acceptable pages found on the inactive queue |
2247 | * within the last vm_pageout_burst_inactive_throttle iterations |
2248 | * 3) Flow control - default pageout queue is full |
2249 | */ |
2250 | if (vm_page_queue_empty(&vm_page_queue_inactive) && |
2251 | vm_page_queue_empty(&vm_page_queue_anonymous) && |
2252 | vm_page_queue_empty(&vm_page_queue_cleaned) && |
2253 | vm_page_queue_empty(&sq->age_q)) { |
2254 | VM_PAGEOUT_DEBUG(vm_pageout_scan_empty_throttle, 1); |
2255 | msecs = vm_pageout_state.vm_pageout_empty_wait; |
2256 | } else if (inactive_burst_count >= |
2257 | MIN(vm_pageout_state.vm_pageout_burst_inactive_throttle, |
2258 | (vm_page_inactive_count + |
2259 | vm_page_speculative_count))) { |
2260 | VM_PAGEOUT_DEBUG(vm_pageout_scan_burst_throttle, 1); |
2261 | msecs = vm_pageout_state.vm_pageout_burst_wait; |
2262 | |
2263 | exceeded_burst_throttle = TRUE; |
2264 | } else if (VM_PAGE_Q_THROTTLED(iq) && |
2265 | VM_DYNAMIC_PAGING_ENABLED()) { |
2266 | clock_sec_t sec; |
2267 | clock_nsec_t nsec; |
2268 | |
2269 | switch (flow_control->state) { |
2270 | case FCS_IDLE: |
2271 | if ((vm_page_free_count + *local_freed) < vm_page_free_target && |
2272 | vm_pageout_state.vm_restricted_to_single_processor == FALSE) { |
2273 | /* |
2274 | * since the compressor is running independently of vm_pageout_scan |
2275 | * let's not wait for it just yet... as long as we have a healthy supply |
2276 | * of filecache pages to work with, let's keep stealing those. |
2277 | */ |
2278 | inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count; |
2279 | |
2280 | if (vm_page_pageable_external_count > vm_pageout_state.vm_page_filecache_min && |
2281 | (inactive_external_count >= VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) { |
2282 | *anons_grabbed = ANONS_GRABBED_LIMIT; |
2283 | VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle_deferred, 1); |
2284 | return VM_PAGEOUT_SCAN_PROCEED; |
2285 | } |
2286 | } |
2287 | |
2288 | vps_flow_control_reset_deadlock_timer(flow_control); |
2289 | msecs = vm_pageout_state.vm_pageout_deadlock_wait; |
2290 | |
2291 | break; |
2292 | |
2293 | case FCS_DELAYED: |
2294 | clock_get_system_nanotime(secs: &sec, nanosecs: &nsec); |
2295 | ts.tv_sec = (unsigned int) sec; |
2296 | ts.tv_nsec = nsec; |
2297 | |
2298 | if (CMP_MACH_TIMESPEC(&ts, &flow_control->ts) >= 0) { |
2299 | /* |
2300 | * the pageout thread for the default pager is potentially |
2301 | * deadlocked since the |
2302 | * default pager queue has been throttled for more than the |
2303 | * allowable time... we need to move some clean pages or dirty |
2304 | * pages belonging to the external pagers if they aren't throttled |
2305 | * vm_page_free_wanted represents the number of threads currently |
2306 | * blocked waiting for pages... we'll move one page for each of |
2307 | * these plus a fixed amount to break the logjam... once we're done |
2308 | * moving this number of pages, we'll re-enter the FSC_DELAYED state |
2309 | * with a new timeout target since we have no way of knowing |
2310 | * whether we've broken the deadlock except through observation |
2311 | * of the queue associated with the default pager... we need to |
2312 | * stop moving pages and allow the system to run to see what |
2313 | * state it settles into. |
2314 | */ |
2315 | |
2316 | *vm_pageout_deadlock_target = vm_pageout_state.vm_pageout_deadlock_relief + |
2317 | vm_page_free_wanted + vm_page_free_wanted_privileged; |
2318 | VM_PAGEOUT_DEBUG(vm_pageout_scan_deadlock_detected, 1); |
2319 | flow_control->state = FCS_DEADLOCK_DETECTED; |
2320 | thread_wakeup(VM_PAGEOUT_GC_EVENT); |
2321 | return VM_PAGEOUT_SCAN_PROCEED; |
2322 | } |
2323 | /* |
2324 | * just resniff instead of trying |
2325 | * to compute a new delay time... we're going to be |
2326 | * awakened immediately upon a laundry completion, |
2327 | * so we won't wait any longer than necessary |
2328 | */ |
2329 | msecs = vm_pageout_state.vm_pageout_idle_wait; |
2330 | break; |
2331 | |
2332 | case FCS_DEADLOCK_DETECTED: |
2333 | if (*vm_pageout_deadlock_target) { |
2334 | return VM_PAGEOUT_SCAN_PROCEED; |
2335 | } |
2336 | |
2337 | vps_flow_control_reset_deadlock_timer(flow_control); |
2338 | msecs = vm_pageout_state.vm_pageout_deadlock_wait; |
2339 | |
2340 | break; |
2341 | } |
2342 | } else { |
2343 | /* |
2344 | * No need to pause... |
2345 | */ |
2346 | return VM_PAGEOUT_SCAN_PROCEED; |
2347 | } |
2348 | |
2349 | vm_pageout_scan_wants_object = VM_OBJECT_NULL; |
2350 | |
2351 | vm_pageout_prepare_to_block(object, delayed_unlock, local_freeq, local_freed, |
2352 | VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER); |
2353 | |
2354 | if (vm_page_free_count >= vm_page_free_target) { |
2355 | /* |
2356 | * we're here because |
2357 | * 1) someone else freed up some pages while we had |
2358 | * the queues unlocked above |
2359 | * and we've hit one of the 3 conditions that |
2360 | * cause us to pause the pageout scan thread |
2361 | * |
2362 | * since we already have enough free pages, |
2363 | * let's avoid stalling and return normally |
2364 | * |
2365 | * before we return, make sure the pageout I/O threads |
2366 | * are running throttled in case there are still requests |
2367 | * in the laundry... since we have enough free pages |
2368 | * we don't need the laundry to be cleaned in a timely |
2369 | * fashion... so let's avoid interfering with foreground |
2370 | * activity |
2371 | * |
2372 | * we don't want to hold vm_page_queue_free_lock when |
2373 | * calling vm_pageout_adjust_eq_iothrottle (since it |
2374 | * may cause other locks to be taken), we do the intitial |
2375 | * check outside of the lock. Once we take the lock, |
2376 | * we recheck the condition since it may have changed. |
2377 | * if it has, no problem, we will make the threads |
2378 | * non-throttled before actually blocking |
2379 | */ |
2380 | vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, TRUE); |
2381 | } |
2382 | vm_free_page_lock(); |
2383 | |
2384 | if (vm_page_free_count >= vm_page_free_target && |
2385 | (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) { |
2386 | return VM_PAGEOUT_SCAN_DONE_RETURN; |
2387 | } |
2388 | vm_free_page_unlock(); |
2389 | |
2390 | if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) { |
2391 | /* |
2392 | * we're most likely about to block due to one of |
2393 | * the 3 conditions that cause vm_pageout_scan to |
2394 | * not be able to make forward progress w/r |
2395 | * to providing new pages to the free queue, |
2396 | * so unthrottle the I/O threads in case we |
2397 | * have laundry to be cleaned... it needs |
2398 | * to be completed ASAP. |
2399 | * |
2400 | * even if we don't block, we want the io threads |
2401 | * running unthrottled since the sum of free + |
2402 | * clean pages is still under our free target |
2403 | */ |
2404 | vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, FALSE); |
2405 | } |
2406 | if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) { |
2407 | /* |
2408 | * if we get here we're below our free target and |
2409 | * we're stalling due to a full laundry queue or |
2410 | * we don't have any inactive pages other then |
2411 | * those in the clean queue... |
2412 | * however, we have pages on the clean queue that |
2413 | * can be moved to the free queue, so let's not |
2414 | * stall the pageout scan |
2415 | */ |
2416 | flow_control->state = FCS_IDLE; |
2417 | return VM_PAGEOUT_SCAN_PROCEED; |
2418 | } |
2419 | if (flow_control->state == FCS_DELAYED && !VM_PAGE_Q_THROTTLED(iq)) { |
2420 | flow_control->state = FCS_IDLE; |
2421 | return VM_PAGEOUT_SCAN_PROCEED; |
2422 | } |
2423 | |
2424 | VM_CHECK_MEMORYSTATUS; |
2425 | |
2426 | if (flow_control->state != FCS_IDLE) { |
2427 | VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle, 1); |
2428 | } |
2429 | |
2430 | iq->pgo_throttled = TRUE; |
2431 | assert_wait_timeout(event: (event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, interval: msecs, scale_factor: 1000 * NSEC_PER_USEC); |
2432 | |
2433 | vm_page_unlock_queues(); |
2434 | |
2435 | assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); |
2436 | |
2437 | VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START, |
2438 | iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0); |
2439 | memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START); |
2440 | |
2441 | thread_block(THREAD_CONTINUE_NULL); |
2442 | |
2443 | VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END, |
2444 | iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0); |
2445 | memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END); |
2446 | |
2447 | vm_page_lock_queues(); |
2448 | |
2449 | iq->pgo_throttled = FALSE; |
2450 | |
2451 | vps_init_page_targets(); |
2452 | |
2453 | return VM_PAGEOUT_SCAN_NEXT_ITERATION; |
2454 | } |
2455 | |
2456 | extern boolean_t vm_darkwake_mode; |
2457 | /* |
2458 | * This function is called only from vm_pageout_scan and |
2459 | * it will find and return the most appropriate page to be |
2460 | * reclaimed. |
2461 | */ |
2462 | static int |
2463 | vps_choose_victim_page(vm_page_t *victim_page, int *anons_grabbed, boolean_t *grab_anonymous, boolean_t force_anonymous, |
2464 | boolean_t *is_page_from_bg_q, unsigned int *reactivated_this_call) |
2465 | { |
2466 | vm_page_t m = NULL; |
2467 | vm_object_t m_object = VM_OBJECT_NULL; |
2468 | uint32_t inactive_external_count; |
2469 | struct vm_speculative_age_q *sq; |
2470 | struct vm_pageout_queue *iq; |
2471 | int retval = VM_PAGEOUT_SCAN_PROCEED; |
2472 | |
2473 | sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q]; |
2474 | iq = &vm_pageout_queue_internal; |
2475 | |
2476 | *is_page_from_bg_q = FALSE; |
2477 | |
2478 | m = NULL; |
2479 | m_object = VM_OBJECT_NULL; |
2480 | |
2481 | if (VM_DYNAMIC_PAGING_ENABLED()) { |
2482 | assert(vm_page_throttled_count == 0); |
2483 | assert(vm_page_queue_empty(&vm_page_queue_throttled)); |
2484 | } |
2485 | |
2486 | /* |
2487 | * Try for a clean-queue inactive page. |
2488 | * These are pages that vm_pageout_scan tried to steal earlier, but |
2489 | * were dirty and had to be cleaned. Pick them up now that they are clean. |
2490 | */ |
2491 | if (!vm_page_queue_empty(&vm_page_queue_cleaned)) { |
2492 | m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned); |
2493 | |
2494 | assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q); |
2495 | |
2496 | goto found_page; |
2497 | } |
2498 | |
2499 | /* |
2500 | * The next most eligible pages are ones we paged in speculatively, |
2501 | * but which have not yet been touched and have been aged out. |
2502 | */ |
2503 | if (!vm_page_queue_empty(&sq->age_q)) { |
2504 | m = (vm_page_t) vm_page_queue_first(&sq->age_q); |
2505 | |
2506 | assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q); |
2507 | |
2508 | if (!m->vmp_dirty || force_anonymous == FALSE) { |
2509 | goto found_page; |
2510 | } else { |
2511 | m = NULL; |
2512 | } |
2513 | } |
2514 | |
2515 | #if !CONFIG_JETSAM |
2516 | if (vm_page_donate_mode != VM_PAGE_DONATE_DISABLED) { |
2517 | if (vm_page_donate_queue_ripe && !vm_page_queue_empty(&vm_page_queue_donate)) { |
2518 | m = (vm_page_t) vm_page_queue_first(&vm_page_queue_donate); |
2519 | assert(m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE); |
2520 | goto found_page; |
2521 | } |
2522 | } |
2523 | #endif /* !CONFIG_JETSAM */ |
2524 | |
2525 | if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) { |
2526 | vm_object_t bg_m_object = NULL; |
2527 | |
2528 | m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background); |
2529 | |
2530 | bg_m_object = VM_PAGE_OBJECT(m); |
2531 | |
2532 | if (!VM_PAGE_PAGEABLE(m) || (vm_darkwake_mode && m->vmp_busy)) { |
2533 | /* |
2534 | * This page is on the background queue |
2535 | * but not on a pageable queue OR is busy during |
2536 | * darkwake mode when the target is artificially lowered. |
2537 | * If it is busy during darkwake mode, and we don't skip it, |
2538 | * we will just swing back around and try again with the same |
2539 | * queue and might hit the same page or its neighbor in a |
2540 | * similar state. Both of these are transient states and will |
2541 | * get resolved, but, at this point let's ignore this page. |
2542 | */ |
2543 | if (vm_darkwake_mode && m->vmp_busy) { |
2544 | if (bg_m_object->internal) { |
2545 | vm_pageout_skipped_bq_internal++; |
2546 | } else { |
2547 | vm_pageout_skipped_bq_external++; |
2548 | } |
2549 | } |
2550 | } else if (force_anonymous == FALSE || bg_m_object->internal) { |
2551 | if (bg_m_object->internal && |
2552 | (VM_PAGE_Q_THROTTLED(iq) || |
2553 | vm_compressor_out_of_space() == TRUE || |
2554 | vm_page_free_count < (vm_page_free_reserved / 4))) { |
2555 | vm_pageout_skipped_bq_internal++; |
2556 | } else { |
2557 | *is_page_from_bg_q = TRUE; |
2558 | |
2559 | if (bg_m_object->internal) { |
2560 | vm_pageout_vminfo.vm_pageout_considered_bq_internal++; |
2561 | } else { |
2562 | vm_pageout_vminfo.vm_pageout_considered_bq_external++; |
2563 | } |
2564 | goto found_page; |
2565 | } |
2566 | } |
2567 | } |
2568 | |
2569 | inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count; |
2570 | |
2571 | if ((vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min || force_anonymous == TRUE) || |
2572 | (inactive_external_count < VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) { |
2573 | *grab_anonymous = TRUE; |
2574 | *anons_grabbed = 0; |
2575 | |
2576 | if (VM_CONFIG_SWAP_IS_ACTIVE) { |
2577 | vm_pageout_vminfo.vm_pageout_skipped_external++; |
2578 | } else { |
2579 | if (vm_page_free_count < (COMPRESSOR_FREE_RESERVED_LIMIT * 2)) { |
2580 | /* |
2581 | * No swap and we are in dangerously low levels of free memory. |
2582 | * If we keep going ahead with anonymous pages, we are going to run into a situation |
2583 | * where the compressor will be stuck waiting for free pages (if it isn't already). |
2584 | * |
2585 | * So, pick a file backed page... |
2586 | */ |
2587 | *grab_anonymous = FALSE; |
2588 | *anons_grabbed = ANONS_GRABBED_LIMIT; |
2589 | vm_pageout_vminfo.vm_pageout_skipped_internal++; |
2590 | } |
2591 | } |
2592 | goto want_anonymous; |
2593 | } |
2594 | *grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min); |
2595 | |
2596 | #if CONFIG_JETSAM |
2597 | /* If the file-backed pool has accumulated |
2598 | * significantly more pages than the jetsam |
2599 | * threshold, prefer to reclaim those |
2600 | * inline to minimise compute overhead of reclaiming |
2601 | * anonymous pages. |
2602 | * This calculation does not account for the CPU local |
2603 | * external page queues, as those are expected to be |
2604 | * much smaller relative to the global pools. |
2605 | */ |
2606 | |
2607 | struct vm_pageout_queue *eq = &vm_pageout_queue_external; |
2608 | |
2609 | if (*grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) { |
2610 | if (vm_page_pageable_external_count > |
2611 | vm_pageout_state.vm_page_filecache_min) { |
2612 | if ((vm_page_pageable_external_count * |
2613 | vm_pageout_memorystatus_fb_factor_dr) > |
2614 | (memorystatus_available_pages_critical * |
2615 | vm_pageout_memorystatus_fb_factor_nr)) { |
2616 | *grab_anonymous = FALSE; |
2617 | |
2618 | VM_PAGEOUT_DEBUG(vm_grab_anon_overrides, 1); |
2619 | } |
2620 | } |
2621 | if (*grab_anonymous) { |
2622 | VM_PAGEOUT_DEBUG(vm_grab_anon_nops, 1); |
2623 | } |
2624 | } |
2625 | #endif /* CONFIG_JETSAM */ |
2626 | |
2627 | want_anonymous: |
2628 | if (*grab_anonymous == FALSE || *anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) { |
2629 | if (!vm_page_queue_empty(&vm_page_queue_inactive)) { |
2630 | m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive); |
2631 | |
2632 | assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q); |
2633 | *anons_grabbed = 0; |
2634 | |
2635 | if (vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min) { |
2636 | if (!vm_page_queue_empty(&vm_page_queue_anonymous)) { |
2637 | if ((++(*reactivated_this_call) % 100)) { |
2638 | vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++; |
2639 | |
2640 | vm_page_activate(page: m); |
2641 | counter_inc(&vm_statistics_reactivations); |
2642 | #if DEVELOPMENT || DEBUG |
2643 | if (*is_page_from_bg_q == TRUE) { |
2644 | if (m_object->internal) { |
2645 | vm_pageout_rejected_bq_internal++; |
2646 | } else { |
2647 | vm_pageout_rejected_bq_external++; |
2648 | } |
2649 | } |
2650 | #endif /* DEVELOPMENT || DEBUG */ |
2651 | vm_pageout_state.vm_pageout_inactive_used++; |
2652 | |
2653 | m = NULL; |
2654 | retval = VM_PAGEOUT_SCAN_NEXT_ITERATION; |
2655 | |
2656 | goto found_page; |
2657 | } |
2658 | |
2659 | /* |
2660 | * steal 1 of the file backed pages even if |
2661 | * we are under the limit that has been set |
2662 | * for a healthy filecache |
2663 | */ |
2664 | } |
2665 | } |
2666 | goto found_page; |
2667 | } |
2668 | } |
2669 | if (!vm_page_queue_empty(&vm_page_queue_anonymous)) { |
2670 | m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous); |
2671 | |
2672 | assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q); |
2673 | *anons_grabbed += 1; |
2674 | |
2675 | goto found_page; |
2676 | } |
2677 | |
2678 | m = NULL; |
2679 | |
2680 | found_page: |
2681 | *victim_page = m; |
2682 | |
2683 | return retval; |
2684 | } |
2685 | |
2686 | /* |
2687 | * This function is called only from vm_pageout_scan and |
2688 | * it will put a page back on the active/inactive queue |
2689 | * if we can't reclaim it for some reason. |
2690 | */ |
2691 | static void |
2692 | vps_requeue_page(vm_page_t m, int page_prev_q_state, __unused boolean_t page_from_bg_q) |
2693 | { |
2694 | if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) { |
2695 | vm_page_enqueue_inactive(mem: m, FALSE); |
2696 | } else { |
2697 | vm_page_activate(page: m); |
2698 | } |
2699 | |
2700 | #if DEVELOPMENT || DEBUG |
2701 | vm_object_t m_object = VM_PAGE_OBJECT(m); |
2702 | |
2703 | if (page_from_bg_q == TRUE) { |
2704 | if (m_object->internal) { |
2705 | vm_pageout_rejected_bq_internal++; |
2706 | } else { |
2707 | vm_pageout_rejected_bq_external++; |
2708 | } |
2709 | } |
2710 | #endif /* DEVELOPMENT || DEBUG */ |
2711 | } |
2712 | |
2713 | /* |
2714 | * This function is called only from vm_pageout_scan and |
2715 | * it will try to grab the victim page's VM object (m_object) |
2716 | * which differs from the previous victim page's object (object). |
2717 | */ |
2718 | static int |
2719 | vps_switch_object(vm_page_t m, vm_object_t m_object, vm_object_t *object, int page_prev_q_state, boolean_t avoid_anon_pages, boolean_t page_from_bg_q) |
2720 | { |
2721 | struct vm_speculative_age_q *sq; |
2722 | |
2723 | sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q]; |
2724 | |
2725 | /* |
2726 | * the object associated with candidate page is |
2727 | * different from the one we were just working |
2728 | * with... dump the lock if we still own it |
2729 | */ |
2730 | if (*object != NULL) { |
2731 | vm_object_unlock(*object); |
2732 | *object = NULL; |
2733 | } |
2734 | /* |
2735 | * Try to lock object; since we've alread got the |
2736 | * page queues lock, we can only 'try' for this one. |
2737 | * if the 'try' fails, we need to do a mutex_pause |
2738 | * to allow the owner of the object lock a chance to |
2739 | * run... otherwise, we're likely to trip over this |
2740 | * object in the same state as we work our way through |
2741 | * the queue... clumps of pages associated with the same |
2742 | * object are fairly typical on the inactive and active queues |
2743 | */ |
2744 | if (!vm_object_lock_try_scan(m_object)) { |
2745 | vm_page_t m_want = NULL; |
2746 | |
2747 | vm_pageout_vminfo.vm_pageout_inactive_nolock++; |
2748 | |
2749 | if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) { |
2750 | VM_PAGEOUT_DEBUG(vm_pageout_cleaned_nolock, 1); |
2751 | } |
2752 | |
2753 | pmap_clear_reference(pn: VM_PAGE_GET_PHYS_PAGE(m)); |
2754 | |
2755 | m->vmp_reference = FALSE; |
2756 | |
2757 | if (!m_object->object_is_shared_cache) { |
2758 | /* |
2759 | * don't apply this optimization if this is the shared cache |
2760 | * object, it's too easy to get rid of very hot and important |
2761 | * pages... |
2762 | * m->vmp_object must be stable since we hold the page queues lock... |
2763 | * we can update the scan_collisions field sans the object lock |
2764 | * since it is a separate field and this is the only spot that does |
2765 | * a read-modify-write operation and it is never executed concurrently... |
2766 | * we can asynchronously set this field to 0 when creating a UPL, so it |
2767 | * is possible for the value to be a bit non-determistic, but that's ok |
2768 | * since it's only used as a hint |
2769 | */ |
2770 | m_object->scan_collisions = 1; |
2771 | } |
2772 | if (page_from_bg_q) { |
2773 | m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_background); |
2774 | } else if (!vm_page_queue_empty(&vm_page_queue_cleaned)) { |
2775 | m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned); |
2776 | } else if (!vm_page_queue_empty(&sq->age_q)) { |
2777 | m_want = (vm_page_t) vm_page_queue_first(&sq->age_q); |
2778 | } else if ((avoid_anon_pages || vm_page_queue_empty(&vm_page_queue_anonymous)) && |
2779 | !vm_page_queue_empty(&vm_page_queue_inactive)) { |
2780 | m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive); |
2781 | } else if (!vm_page_queue_empty(&vm_page_queue_anonymous)) { |
2782 | m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous); |
2783 | } |
2784 | |
2785 | /* |
2786 | * this is the next object we're going to be interested in |
2787 | * try to make sure its available after the mutex_pause |
2788 | * returns control |
2789 | */ |
2790 | if (m_want) { |
2791 | vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want); |
2792 | } |
2793 | |
2794 | vps_requeue_page(m, page_prev_q_state, page_from_bg_q); |
2795 | |
2796 | return VM_PAGEOUT_SCAN_NEXT_ITERATION; |
2797 | } else { |
2798 | *object = m_object; |
2799 | vm_pageout_scan_wants_object = VM_OBJECT_NULL; |
2800 | } |
2801 | |
2802 | return VM_PAGEOUT_SCAN_PROCEED; |
2803 | } |
2804 | |
2805 | /* |
2806 | * This function is called only from vm_pageout_scan and |
2807 | * it notices that pageout scan may be rendered ineffective |
2808 | * due to a FS deadlock and will jetsam a process if possible. |
2809 | * If jetsam isn't supported, it'll move the page to the active |
2810 | * queue to try and get some different pages pushed onwards so |
2811 | * we can try to get out of this scenario. |
2812 | */ |
2813 | static void |
2814 | vps_deal_with_throttled_queues(vm_page_t m, vm_object_t *object, uint32_t *vm_pageout_inactive_external_forced_reactivate_limit, |
2815 | boolean_t *force_anonymous, __unused boolean_t is_page_from_bg_q) |
2816 | { |
2817 | struct vm_pageout_queue *eq; |
2818 | vm_object_t cur_object = VM_OBJECT_NULL; |
2819 | |
2820 | cur_object = *object; |
2821 | |
2822 | eq = &vm_pageout_queue_external; |
2823 | |
2824 | if (cur_object->internal == FALSE) { |
2825 | /* |
2826 | * we need to break up the following potential deadlock case... |
2827 | * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written. |
2828 | * b) The thread doing the writing is waiting for pages while holding the truncate lock |
2829 | * c) Most of the pages in the inactive queue belong to this file. |
2830 | * |
2831 | * we are potentially in this deadlock because... |
2832 | * a) the external pageout queue is throttled |
2833 | * b) we're done with the active queue and moved on to the inactive queue |
2834 | * c) we've got a dirty external page |
2835 | * |
2836 | * since we don't know the reason for the external pageout queue being throttled we |
2837 | * must suspect that we are deadlocked, so move the current page onto the active queue |
2838 | * in an effort to cause a page from the active queue to 'age' to the inactive queue |
2839 | * |
2840 | * if we don't have jetsam configured (i.e. we have a dynamic pager), set |
2841 | * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous |
2842 | * pool the next time we select a victim page... if we can make enough new free pages, |
2843 | * the deadlock will break, the external pageout queue will empty and it will no longer |
2844 | * be throttled |
2845 | * |
2846 | * if we have jetsam configured, keep a count of the pages reactivated this way so |
2847 | * that we can try to find clean pages in the active/inactive queues before |
2848 | * deciding to jetsam a process |
2849 | */ |
2850 | vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external++; |
2851 | |
2852 | vm_page_check_pageable_safe(page: m); |
2853 | assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q); |
2854 | vm_page_queue_enter(&vm_page_queue_active, m, vmp_pageq); |
2855 | m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q; |
2856 | vm_page_active_count++; |
2857 | vm_page_pageable_external_count++; |
2858 | |
2859 | vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, FALSE); |
2860 | |
2861 | #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM |
2862 | |
2863 | #pragma unused(force_anonymous) |
2864 | |
2865 | *vm_pageout_inactive_external_forced_reactivate_limit -= 1; |
2866 | |
2867 | if (*vm_pageout_inactive_external_forced_reactivate_limit <= 0) { |
2868 | *vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count; |
2869 | /* |
2870 | * Possible deadlock scenario so request jetsam action |
2871 | */ |
2872 | memorystatus_kill_on_vps_starvation(); |
2873 | VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_NONE, |
2874 | vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count); |
2875 | } |
2876 | #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ |
2877 | |
2878 | #pragma unused(vm_pageout_inactive_external_forced_reactivate_limit) |
2879 | |
2880 | *force_anonymous = TRUE; |
2881 | #endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ |
2882 | } else { |
2883 | vm_page_activate(page: m); |
2884 | counter_inc(&vm_statistics_reactivations); |
2885 | |
2886 | #if DEVELOPMENT || DEBUG |
2887 | if (is_page_from_bg_q == TRUE) { |
2888 | if (cur_object->internal) { |
2889 | vm_pageout_rejected_bq_internal++; |
2890 | } else { |
2891 | vm_pageout_rejected_bq_external++; |
2892 | } |
2893 | } |
2894 | #endif /* DEVELOPMENT || DEBUG */ |
2895 | |
2896 | vm_pageout_state.vm_pageout_inactive_used++; |
2897 | } |
2898 | } |
2899 | |
2900 | |
2901 | void |
2902 | vm_page_balance_inactive(int max_to_move) |
2903 | { |
2904 | vm_page_t m; |
2905 | |
2906 | LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); |
2907 | |
2908 | if (hibernation_vmqueues_inspection || hibernate_cleaning_in_progress) { |
2909 | /* |
2910 | * It is likely that the hibernation code path is |
2911 | * dealing with these very queues as we are about |
2912 | * to move pages around in/from them and completely |
2913 | * change the linkage of the pages. |
2914 | * |
2915 | * And so we skip the rebalancing of these queues. |
2916 | */ |
2917 | return; |
2918 | } |
2919 | vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + |
2920 | vm_page_inactive_count + |
2921 | vm_page_speculative_count); |
2922 | |
2923 | while (max_to_move-- && (vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) { |
2924 | VM_PAGEOUT_DEBUG(vm_pageout_balanced, 1); |
2925 | |
2926 | m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active); |
2927 | |
2928 | assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q); |
2929 | assert(!m->vmp_laundry); |
2930 | assert(!is_kernel_object(VM_PAGE_OBJECT(m))); |
2931 | assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr); |
2932 | |
2933 | DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); |
2934 | |
2935 | /* |
2936 | * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise... |
2937 | * |
2938 | * a TLB flush isn't really needed here since at worst we'll miss the reference bit being |
2939 | * updated in the PTE if a remote processor still has this mapping cached in its TLB when the |
2940 | * new reference happens. If no futher references happen on the page after that remote TLB flushes |
2941 | * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue |
2942 | * by pageout_scan, which is just fine since the last reference would have happened quite far |
2943 | * in the past (TLB caches don't hang around for very long), and of course could just as easily |
2944 | * have happened before we moved the page |
2945 | */ |
2946 | if (m->vmp_pmapped == TRUE) { |
2947 | /* |
2948 | * We might be holding the page queue lock as a |
2949 | * spin lock and clearing the "referenced" bit could |
2950 | * take a while if there are lots of mappings of |
2951 | * that page, so make sure we acquire the lock as |
2952 | * as mutex to avoid a spinlock timeout. |
2953 | */ |
2954 | vm_page_lockconvert_queues(); |
2955 | pmap_clear_refmod_options(pn: VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL); |
2956 | } |
2957 | |
2958 | /* |
2959 | * The page might be absent or busy, |
2960 | * but vm_page_deactivate can handle that. |
2961 | * FALSE indicates that we don't want a H/W clear reference |
2962 | */ |
2963 | vm_page_deactivate_internal(page: m, FALSE); |
2964 | } |
2965 | } |
2966 | |
2967 | /* |
2968 | * vm_pageout_scan does the dirty work for the pageout daemon. |
2969 | * It returns with both vm_page_queue_free_lock and vm_page_queue_lock |
2970 | * held and vm_page_free_wanted == 0. |
2971 | */ |
2972 | void |
2973 | vm_pageout_scan(void) |
2974 | { |
2975 | unsigned int loop_count = 0; |
2976 | unsigned int inactive_burst_count = 0; |
2977 | unsigned int reactivated_this_call; |
2978 | unsigned int reactivate_limit; |
2979 | vm_page_t local_freeq = NULL; |
2980 | int local_freed = 0; |
2981 | int delayed_unlock; |
2982 | int delayed_unlock_limit = 0; |
2983 | int refmod_state = 0; |
2984 | int vm_pageout_deadlock_target = 0; |
2985 | struct vm_pageout_queue *iq; |
2986 | struct vm_pageout_queue *eq; |
2987 | struct vm_speculative_age_q *sq; |
2988 | struct flow_control flow_control = { .state = 0, .ts = { .tv_sec = 0, .tv_nsec = 0 } }; |
2989 | boolean_t inactive_throttled = FALSE; |
2990 | vm_object_t object = NULL; |
2991 | uint32_t inactive_reclaim_run; |
2992 | boolean_t grab_anonymous = FALSE; |
2993 | boolean_t force_anonymous = FALSE; |
2994 | boolean_t force_speculative_aging = FALSE; |
2995 | int anons_grabbed = 0; |
2996 | int page_prev_q_state = 0; |
2997 | boolean_t page_from_bg_q = FALSE; |
2998 | uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0; |
2999 | vm_object_t m_object = VM_OBJECT_NULL; |
3000 | int retval = 0; |
3001 | boolean_t lock_yield_check = FALSE; |
3002 | |
3003 | |
3004 | VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START, |
3005 | vm_pageout_vminfo.vm_pageout_freed_speculative, |
3006 | vm_pageout_state.vm_pageout_inactive_clean, |
3007 | vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, |
3008 | vm_pageout_vminfo.vm_pageout_inactive_dirty_external); |
3009 | |
3010 | flow_control.state = FCS_IDLE; |
3011 | iq = &vm_pageout_queue_internal; |
3012 | eq = &vm_pageout_queue_external; |
3013 | sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q]; |
3014 | |
3015 | /* Ask the pmap layer to return any pages it no longer needs. */ |
3016 | pmap_release_pages_fast(); |
3017 | |
3018 | vm_page_lock_queues(); |
3019 | |
3020 | delayed_unlock = 1; |
3021 | |
3022 | /* |
3023 | * Calculate the max number of referenced pages on the inactive |
3024 | * queue that we will reactivate. |
3025 | */ |
3026 | reactivated_this_call = 0; |
3027 | reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count + |
3028 | vm_page_inactive_count); |
3029 | inactive_reclaim_run = 0; |
3030 | |
3031 | vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count; |
3032 | |
3033 | /* |
3034 | * We must limit the rate at which we send pages to the pagers |
3035 | * so that we don't tie up too many pages in the I/O queues. |
3036 | * We implement a throttling mechanism using the laundry count |
3037 | * to limit the number of pages outstanding to the default |
3038 | * and external pagers. We can bypass the throttles and look |
3039 | * for clean pages if the pageout queues don't drain in a timely |
3040 | * fashion since this may indicate that the pageout paths are |
3041 | * stalled waiting for memory, which only we can provide. |
3042 | */ |
3043 | |
3044 | vps_init_page_targets(); |
3045 | assert(object == NULL); |
3046 | assert(delayed_unlock != 0); |
3047 | |
3048 | for (;;) { |
3049 | vm_page_t m; |
3050 | |
3051 | DTRACE_VM2(rev, int, 1, (uint64_t *), NULL); |
3052 | |
3053 | if (lock_yield_check) { |
3054 | lock_yield_check = FALSE; |
3055 | |
3056 | if (delayed_unlock++ > delayed_unlock_limit) { |
3057 | vm_pageout_prepare_to_block(object: &object, delayed_unlock: &delayed_unlock, local_freeq: &local_freeq, local_freed: &local_freed, |
3058 | VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER); |
3059 | } else if (vm_pageout_scan_wants_object) { |
3060 | vm_page_unlock_queues(); |
3061 | mutex_pause(0); |
3062 | vm_page_lock_queues(); |
3063 | } else if (vps_yield_for_pgqlockwaiters && lck_mtx_yield(lck: &vm_page_queue_lock)) { |
3064 | VM_PAGEOUT_DEBUG(vm_pageout_yield_for_free_pages, 1); |
3065 | } |
3066 | } |
3067 | |
3068 | if (vm_upl_wait_for_pages < 0) { |
3069 | vm_upl_wait_for_pages = 0; |
3070 | } |
3071 | |
3072 | delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages; |
3073 | |
3074 | if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX) { |
3075 | delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX; |
3076 | } |
3077 | |
3078 | vps_deal_with_secluded_page_overflow(local_freeq: &local_freeq, local_freed: &local_freed); |
3079 | |
3080 | assert(delayed_unlock); |
3081 | |
3082 | /* |
3083 | * maintain our balance |
3084 | */ |
3085 | vm_page_balance_inactive(max_to_move: 1); |
3086 | |
3087 | |
3088 | /********************************************************************** |
3089 | * above this point we're playing with the active and secluded queues |
3090 | * below this point we're playing with the throttling mechanisms |
3091 | * and the inactive queue |
3092 | **********************************************************************/ |
3093 | |
3094 | if (vm_page_free_count + local_freed >= vm_page_free_target) { |
3095 | vm_pageout_scan_wants_object = VM_OBJECT_NULL; |
3096 | |
3097 | vm_pageout_prepare_to_block(object: &object, delayed_unlock: &delayed_unlock, local_freeq: &local_freeq, local_freed: &local_freed, |
3098 | VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER); |
3099 | /* |
3100 | * make sure the pageout I/O threads are running |
3101 | * throttled in case there are still requests |
3102 | * in the laundry... since we have met our targets |
3103 | * we don't need the laundry to be cleaned in a timely |
3104 | * fashion... so let's avoid interfering with foreground |
3105 | * activity |
3106 | */ |
3107 | vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, TRUE); |
3108 | |
3109 | vm_free_page_lock(); |
3110 | |
3111 | if ((vm_page_free_count >= vm_page_free_target) && |
3112 | (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) { |
3113 | /* |
3114 | * done - we have met our target *and* |
3115 | * there is no one waiting for a page. |
3116 | */ |
3117 | return_from_scan: |
3118 | assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); |
3119 | |
3120 | VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE, |
3121 | vm_pageout_state.vm_pageout_inactive, |
3122 | vm_pageout_state.vm_pageout_inactive_used, 0, 0); |
3123 | VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END, |
3124 | vm_pageout_vminfo.vm_pageout_freed_speculative, |
3125 | vm_pageout_state.vm_pageout_inactive_clean, |
3126 | vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, |
3127 | vm_pageout_vminfo.vm_pageout_inactive_dirty_external); |
3128 | |
3129 | return; |
3130 | } |
3131 | vm_free_page_unlock(); |
3132 | } |
3133 | |
3134 | /* |
3135 | * Before anything, we check if we have any ripe volatile |
3136 | * objects around. If so, try to purge the first object. |
3137 | * If the purge fails, fall through to reclaim a page instead. |
3138 | * If the purge succeeds, go back to the top and reevalute |
3139 | * the new memory situation. |
3140 | */ |
3141 | retval = vps_purge_object(); |
3142 | |
3143 | if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) { |
3144 | /* |
3145 | * Success |
3146 | */ |
3147 | if (object != NULL) { |
3148 | vm_object_unlock(object); |
3149 | object = NULL; |
3150 | } |
3151 | |
3152 | lock_yield_check = FALSE; |
3153 | continue; |
3154 | } |
3155 | |
3156 | /* |
3157 | * If our 'aged' queue is empty and we have some speculative pages |
3158 | * in the other queues, let's go through and see if we need to age |
3159 | * them. |
3160 | * |
3161 | * If we succeeded in aging a speculative Q or just that everything |
3162 | * looks normal w.r.t queue age and queue counts, we keep going onward. |
3163 | * |
3164 | * If, for some reason, we seem to have a mismatch between the spec. |
3165 | * page count and the page queues, we reset those variables and |
3166 | * restart the loop (LD TODO: Track this better?). |
3167 | */ |
3168 | if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) { |
3169 | retval = vps_age_speculative_queue(force_speculative_aging); |
3170 | |
3171 | if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) { |
3172 | lock_yield_check = FALSE; |
3173 | continue; |
3174 | } |
3175 | } |
3176 | force_speculative_aging = FALSE; |
3177 | |
3178 | /* |
3179 | * Check to see if we need to evict objects from the cache. |
3180 | * |
3181 | * Note: 'object' here doesn't have anything to do with |
3182 | * the eviction part. We just need to make sure we have dropped |
3183 | * any object lock we might be holding if we need to go down |
3184 | * into the eviction logic. |
3185 | */ |
3186 | retval = vps_object_cache_evict(object_to_unlock: &object); |
3187 | |
3188 | if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) { |
3189 | lock_yield_check = FALSE; |
3190 | continue; |
3191 | } |
3192 | |
3193 | |
3194 | /* |
3195 | * Calculate our filecache_min that will affect the loop |
3196 | * going forward. |
3197 | */ |
3198 | vps_calculate_filecache_min(); |
3199 | |
3200 | /* |
3201 | * LD TODO: Use a structure to hold all state variables for a single |
3202 | * vm_pageout_scan iteration and pass that structure to this function instead. |
3203 | */ |
3204 | retval = vps_flow_control(flow_control: &flow_control, anons_grabbed: &anons_grabbed, object: &object, |
3205 | delayed_unlock: &delayed_unlock, local_freeq: &local_freeq, local_freed: &local_freed, |
3206 | vm_pageout_deadlock_target: &vm_pageout_deadlock_target, inactive_burst_count); |
3207 | |
3208 | if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) { |
3209 | if (loop_count >= vm_page_inactive_count) { |
3210 | loop_count = 0; |
3211 | } |
3212 | |
3213 | inactive_burst_count = 0; |
3214 | |
3215 | assert(object == NULL); |
3216 | assert(delayed_unlock != 0); |
3217 | |
3218 | lock_yield_check = FALSE; |
3219 | continue; |
3220 | } else if (retval == VM_PAGEOUT_SCAN_DONE_RETURN) { |
3221 | goto return_from_scan; |
3222 | } |
3223 | |
3224 | flow_control.state = FCS_IDLE; |
3225 | |
3226 | vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count), |
3227 | vm_pageout_inactive_external_forced_reactivate_limit); |
3228 | loop_count++; |
3229 | inactive_burst_count++; |
3230 | vm_pageout_state.vm_pageout_inactive++; |
3231 | |
3232 | /* |
3233 | * Choose a victim. |
3234 | */ |
3235 | |
3236 | m = NULL; |
3237 | retval = vps_choose_victim_page(victim_page: &m, anons_grabbed: &anons_grabbed, grab_anonymous: &grab_anonymous, force_anonymous, is_page_from_bg_q: &page_from_bg_q, reactivated_this_call: &reactivated_this_call); |
3238 | |
3239 | if (m == NULL) { |
3240 | if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) { |
3241 | inactive_burst_count = 0; |
3242 | |
3243 | if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) { |
3244 | VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1); |
3245 | } |
3246 | |
3247 | lock_yield_check = TRUE; |
3248 | continue; |
3249 | } |
3250 | |
3251 | /* |
3252 | * if we've gotten here, we have no victim page. |
3253 | * check to see if we've not finished balancing the queues |
3254 | * or we have a page on the aged speculative queue that we |
3255 | * skipped due to force_anonymous == TRUE.. or we have |
3256 | * speculative pages that we can prematurely age... if |
3257 | * one of these cases we'll keep going, else panic |
3258 | */ |
3259 | force_anonymous = FALSE; |
3260 | VM_PAGEOUT_DEBUG(vm_pageout_no_victim, 1); |
3261 | |
3262 | if (!vm_page_queue_empty(&sq->age_q)) { |
3263 | lock_yield_check = TRUE; |
3264 | continue; |
3265 | } |
3266 | |
3267 | if (vm_page_speculative_count) { |
3268 | force_speculative_aging = TRUE; |
3269 | lock_yield_check = TRUE; |
3270 | continue; |
3271 | } |
3272 | panic("vm_pageout: no victim" ); |
3273 | |
3274 | /* NOTREACHED */ |
3275 | } |
3276 | |
3277 | assert(VM_PAGE_PAGEABLE(m)); |
3278 | m_object = VM_PAGE_OBJECT(m); |
3279 | force_anonymous = FALSE; |
3280 | |
3281 | page_prev_q_state = m->vmp_q_state; |
3282 | /* |
3283 | * we just found this page on one of our queues... |
3284 | * it can't also be on the pageout queue, so safe |
3285 | * to call vm_page_queues_remove |
3286 | */ |
3287 | bool donate = (m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE); |
3288 | vm_page_queues_remove(mem: m, TRUE); |
3289 | if (donate) { |
3290 | /* |
3291 | * The compressor needs to see this bit to know |
3292 | * where this page needs to land. Also if stolen, |
3293 | * this bit helps put the page back in the right |
3294 | * special queue where it belongs. |
3295 | */ |
3296 | m->vmp_on_specialq = VM_PAGE_SPECIAL_Q_DONATE; |
3297 | } |
3298 | |
3299 | assert(!m->vmp_laundry); |
3300 | assert(!m->vmp_private); |
3301 | assert(!m->vmp_fictitious); |
3302 | assert(!is_kernel_object(m_object)); |
3303 | assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr); |
3304 | |
3305 | vm_pageout_vminfo.vm_pageout_considered_page++; |
3306 | |
3307 | DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); |
3308 | |
3309 | /* |
3310 | * check to see if we currently are working |
3311 | * with the same object... if so, we've |
3312 | * already got the lock |
3313 | */ |
3314 | if (m_object != object) { |
3315 | boolean_t avoid_anon_pages = (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT); |
3316 | |
3317 | /* |
3318 | * vps_switch_object() will always drop the 'object' lock first |
3319 | * and then try to acquire the 'm_object' lock. So 'object' has to point to |
3320 | * either 'm_object' or NULL. |
3321 | */ |
3322 | retval = vps_switch_object(m, m_object, object: &object, page_prev_q_state, avoid_anon_pages, page_from_bg_q); |
3323 | |
3324 | if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) { |
3325 | lock_yield_check = TRUE; |
3326 | continue; |
3327 | } |
3328 | } |
3329 | assert(m_object == object); |
3330 | assert(VM_PAGE_OBJECT(m) == m_object); |
3331 | |
3332 | if (m->vmp_busy) { |
3333 | /* |
3334 | * Somebody is already playing with this page. |
3335 | * Put it back on the appropriate queue |
3336 | * |
3337 | */ |
3338 | VM_PAGEOUT_DEBUG(vm_pageout_inactive_busy, 1); |
3339 | |
3340 | if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) { |
3341 | VM_PAGEOUT_DEBUG(vm_pageout_cleaned_busy, 1); |
3342 | } |
3343 | |
3344 | vps_requeue_page(m, page_prev_q_state, page_from_bg_q); |
3345 | |
3346 | lock_yield_check = TRUE; |
3347 | continue; |
3348 | } |
3349 | |
3350 | /* |
3351 | * if (m->vmp_cleaning && !m->vmp_free_when_done) |
3352 | * If already cleaning this page in place |
3353 | * just leave if off the paging queues. |
3354 | * We can leave the page mapped, and upl_commit_range |
3355 | * will put it on the clean queue. |
3356 | * |
3357 | * if (m->vmp_free_when_done && !m->vmp_cleaning) |
3358 | * an msync INVALIDATE is in progress... |
3359 | * this page has been marked for destruction |
3360 | * after it has been cleaned, |
3361 | * but not yet gathered into a UPL |
3362 | * where 'cleaning' will be set... |
3363 | * just leave it off the paging queues |
3364 | * |
3365 | * if (m->vmp_free_when_done && m->vmp_clenaing) |
3366 | * an msync INVALIDATE is in progress |
3367 | * and the UPL has already gathered this page... |
3368 | * just leave it off the paging queues |
3369 | */ |
3370 | if (m->vmp_free_when_done || m->vmp_cleaning) { |
3371 | lock_yield_check = TRUE; |
3372 | continue; |
3373 | } |
3374 | |
3375 | |
3376 | /* |
3377 | * If it's absent, in error or the object is no longer alive, |
3378 | * we can reclaim the page... in the no longer alive case, |
3379 | * there are 2 states the page can be in that preclude us |
3380 | * from reclaiming it - busy or cleaning - that we've already |
3381 | * dealt with |
3382 | */ |
3383 | if (m->vmp_absent || VMP_ERROR_GET(m) || !object->alive || |
3384 | (!object->internal && object->pager == MEMORY_OBJECT_NULL)) { |
3385 | if (m->vmp_absent) { |
3386 | VM_PAGEOUT_DEBUG(vm_pageout_inactive_absent, 1); |
3387 | } else if (!object->alive || |
3388 | (!object->internal && |
3389 | object->pager == MEMORY_OBJECT_NULL)) { |
3390 | VM_PAGEOUT_DEBUG(vm_pageout_inactive_notalive, 1); |
3391 | } else { |
3392 | VM_PAGEOUT_DEBUG(vm_pageout_inactive_error, 1); |
3393 | } |
3394 | reclaim_page: |
3395 | if (vm_pageout_deadlock_target) { |
3396 | VM_PAGEOUT_DEBUG(vm_pageout_scan_inactive_throttle_success, 1); |
3397 | vm_pageout_deadlock_target--; |
3398 | } |
3399 | |
3400 | DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL); |
3401 | |
3402 | if (object->internal) { |
3403 | DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL); |
3404 | } else { |
3405 | DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL); |
3406 | } |
3407 | assert(!m->vmp_cleaning); |
3408 | assert(!m->vmp_laundry); |
3409 | |
3410 | if (!object->internal && |
3411 | object->pager != NULL && |
3412 | object->pager->mo_pager_ops == &shared_region_pager_ops) { |
3413 | shared_region_pager_reclaimed++; |
3414 | } |
3415 | |
3416 | m->vmp_busy = TRUE; |
3417 | |
3418 | /* |
3419 | * remove page from object here since we're already |
3420 | * behind the object lock... defer the rest of the work |
3421 | * we'd normally do in vm_page_free_prepare_object |
3422 | * until 'vm_page_free_list' is called |
3423 | */ |
3424 | if (m->vmp_tabled) { |
3425 | vm_page_remove(page: m, TRUE); |
3426 | } |
3427 | |
3428 | assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0); |
3429 | m->vmp_snext = local_freeq; |
3430 | local_freeq = m; |
3431 | local_freed++; |
3432 | |
3433 | if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) { |
3434 | vm_pageout_vminfo.vm_pageout_freed_speculative++; |
3435 | } else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) { |
3436 | vm_pageout_vminfo.vm_pageout_freed_cleaned++; |
3437 | } else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) { |
3438 | vm_pageout_vminfo.vm_pageout_freed_internal++; |
3439 | } else { |
3440 | vm_pageout_vminfo.vm_pageout_freed_external++; |
3441 | } |
3442 | |
3443 | inactive_burst_count = 0; |
3444 | |
3445 | lock_yield_check = TRUE; |
3446 | continue; |
3447 | } |
3448 | if (object->vo_copy == VM_OBJECT_NULL) { |
3449 | /* |
3450 | * No one else can have any interest in this page. |
3451 | * If this is an empty purgable object, the page can be |
3452 | * reclaimed even if dirty. |
3453 | * If the page belongs to a volatile purgable object, we |
3454 | * reactivate it if the compressor isn't active. |
3455 | */ |
3456 | if (object->purgable == VM_PURGABLE_EMPTY) { |
3457 | if (m->vmp_pmapped == TRUE) { |
3458 | /* unmap the page */ |
3459 | refmod_state = pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m)); |
3460 | if (refmod_state & VM_MEM_MODIFIED) { |
3461 | SET_PAGE_DIRTY(m, FALSE); |
3462 | } |
3463 | } |
3464 | if (m->vmp_dirty || m->vmp_precious) { |
3465 | /* we saved the cost of cleaning this page ! */ |
3466 | vm_page_purged_count++; |
3467 | } |
3468 | goto reclaim_page; |
3469 | } |
3470 | |
3471 | if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) { |
3472 | /* |
3473 | * With the VM compressor, the cost of |
3474 | * reclaiming a page is much lower (no I/O), |
3475 | * so if we find a "volatile" page, it's better |
3476 | * to let it get compressed rather than letting |
3477 | * it occupy a full page until it gets purged. |
3478 | * So no need to check for "volatile" here. |
3479 | */ |
3480 | } else if (object->purgable == VM_PURGABLE_VOLATILE) { |
3481 | /* |
3482 | * Avoid cleaning a "volatile" page which might |
3483 | * be purged soon. |
3484 | */ |
3485 | |
3486 | /* if it's wired, we can't put it on our queue */ |
3487 | assert(!VM_PAGE_WIRED(m)); |
3488 | |
3489 | /* just stick it back on! */ |
3490 | reactivated_this_call++; |
3491 | |
3492 | if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) { |
3493 | VM_PAGEOUT_DEBUG(vm_pageout_cleaned_volatile_reactivated, 1); |
3494 | } |
3495 | |
3496 | goto reactivate_page; |
3497 | } |
3498 | } |
3499 | /* |
3500 | * If it's being used, reactivate. |
3501 | * (Fictitious pages are either busy or absent.) |
3502 | * First, update the reference and dirty bits |
3503 | * to make sure the page is unreferenced. |
3504 | */ |
3505 | refmod_state = -1; |
3506 | |
3507 | if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) { |
3508 | refmod_state = pmap_get_refmod(pn: VM_PAGE_GET_PHYS_PAGE(m)); |
3509 | |
3510 | if (refmod_state & VM_MEM_REFERENCED) { |
3511 | m->vmp_reference = TRUE; |
3512 | } |
3513 | if (refmod_state & VM_MEM_MODIFIED) { |
3514 | SET_PAGE_DIRTY(m, FALSE); |
3515 | } |
3516 | } |
3517 | |
3518 | if (m->vmp_reference || m->vmp_dirty) { |
3519 | /* deal with a rogue "reusable" page */ |
3520 | VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object); |
3521 | } |
3522 | |
3523 | if (vm_pageout_state.vm_page_xpmapped_min_divisor == 0) { |
3524 | vm_pageout_state.vm_page_xpmapped_min = 0; |
3525 | } else { |
3526 | vm_pageout_state.vm_page_xpmapped_min = (vm_page_external_count * 10) / vm_pageout_state.vm_page_xpmapped_min_divisor; |
3527 | } |
3528 | |
3529 | if (!m->vmp_no_cache && |
3530 | page_from_bg_q == FALSE && |
3531 | (m->vmp_reference || (m->vmp_xpmapped && !object->internal && |
3532 | (vm_page_xpmapped_external_count < vm_pageout_state.vm_page_xpmapped_min)))) { |
3533 | /* |
3534 | * The page we pulled off the inactive list has |
3535 | * been referenced. It is possible for other |
3536 | * processors to be touching pages faster than we |
3537 | * can clear the referenced bit and traverse the |
3538 | * inactive queue, so we limit the number of |
3539 | * reactivations. |
3540 | */ |
3541 | if (++reactivated_this_call >= reactivate_limit && |
3542 | !object->object_is_shared_cache && |
3543 | !((m->vmp_realtime || |
3544 | object->for_realtime) && |
3545 | vm_pageout_protect_realtime)) { |
3546 | vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded++; |
3547 | } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) { |
3548 | vm_pageout_vminfo.vm_pageout_inactive_force_reclaim++; |
3549 | if (object->object_is_shared_cache) { |
3550 | vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache++; |
3551 | } else if (m->vmp_realtime || |
3552 | object->for_realtime) { |
3553 | vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime++; |
3554 | } |
3555 | } else { |
3556 | uint32_t isinuse; |
3557 | |
3558 | if (reactivated_this_call >= reactivate_limit) { |
3559 | if (object->object_is_shared_cache) { |
3560 | vm_pageout_vminfo.vm_pageout_protected_sharedcache++; |
3561 | } else if ((m->vmp_realtime || |
3562 | object->for_realtime) && |
3563 | vm_pageout_protect_realtime) { |
3564 | vm_pageout_vminfo.vm_pageout_protected_realtime++; |
3565 | } |
3566 | } |
3567 | if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) { |
3568 | VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reference_reactivated, 1); |
3569 | } |
3570 | |
3571 | vm_pageout_vminfo.vm_pageout_inactive_referenced++; |
3572 | reactivate_page: |
3573 | if (!object->internal && object->pager != MEMORY_OBJECT_NULL && |
3574 | vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) { |
3575 | /* |
3576 | * no explict mappings of this object exist |
3577 | * and it's not open via the filesystem |
3578 | */ |
3579 | vm_page_deactivate(page: m); |
3580 | VM_PAGEOUT_DEBUG(vm_pageout_inactive_deactivated, 1); |
3581 | } else { |
3582 | /* |
3583 | * The page was/is being used, so put back on active list. |
3584 | */ |
3585 | vm_page_activate(page: m); |
3586 | counter_inc(&vm_statistics_reactivations); |
3587 | inactive_burst_count = 0; |
3588 | } |
3589 | #if DEVELOPMENT || DEBUG |
3590 | if (page_from_bg_q == TRUE) { |
3591 | if (m_object->internal) { |
3592 | vm_pageout_rejected_bq_internal++; |
3593 | } else { |
3594 | vm_pageout_rejected_bq_external++; |
3595 | } |
3596 | } |
3597 | #endif /* DEVELOPMENT || DEBUG */ |
3598 | |
3599 | if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) { |
3600 | VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1); |
3601 | } |
3602 | vm_pageout_state.vm_pageout_inactive_used++; |
3603 | |
3604 | lock_yield_check = TRUE; |
3605 | continue; |
3606 | } |
3607 | /* |
3608 | * Make sure we call pmap_get_refmod() if it |
3609 | * wasn't already called just above, to update |
3610 | * the dirty bit. |
3611 | */ |
3612 | if ((refmod_state == -1) && !m->vmp_dirty && m->vmp_pmapped) { |
3613 | refmod_state = pmap_get_refmod(pn: VM_PAGE_GET_PHYS_PAGE(m)); |
3614 | if (refmod_state & VM_MEM_MODIFIED) { |
3615 | SET_PAGE_DIRTY(m, FALSE); |
3616 | } |
3617 | } |
3618 | } |
3619 | |
3620 | /* |
3621 | * we've got a candidate page to steal... |
3622 | * |
3623 | * m->vmp_dirty is up to date courtesy of the |
3624 | * preceding check for m->vmp_reference... if |
3625 | * we get here, then m->vmp_reference had to be |
3626 | * FALSE (or possibly "reactivate_limit" was |
3627 | * exceeded), but in either case we called |
3628 | * pmap_get_refmod() and updated both |
3629 | * m->vmp_reference and m->vmp_dirty |
3630 | * |
3631 | * if it's dirty or precious we need to |
3632 | * see if the target queue is throtttled |
3633 | * it if is, we need to skip over it by moving it back |
3634 | * to the end of the inactive queue |
3635 | */ |
3636 | |
3637 | inactive_throttled = FALSE; |
3638 | |
3639 | if (m->vmp_dirty || m->vmp_precious) { |
3640 | if (object->internal) { |
3641 | if (VM_PAGE_Q_THROTTLED(iq)) { |
3642 | inactive_throttled = TRUE; |
3643 | } |
3644 | } else if (VM_PAGE_Q_THROTTLED(eq)) { |
3645 | inactive_throttled = TRUE; |
3646 | } |
3647 | } |
3648 | throttle_inactive: |
3649 | if (!VM_DYNAMIC_PAGING_ENABLED() && |
3650 | object->internal && m->vmp_dirty && |
3651 | (object->purgable == VM_PURGABLE_DENY || |
3652 | object->purgable == VM_PURGABLE_NONVOLATILE || |
3653 | object->purgable == VM_PURGABLE_VOLATILE)) { |
3654 | vm_page_check_pageable_safe(page: m); |
3655 | assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q); |
3656 | vm_page_queue_enter(&vm_page_queue_throttled, m, vmp_pageq); |
3657 | m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q; |
3658 | vm_page_throttled_count++; |
3659 | |
3660 | VM_PAGEOUT_DEBUG(vm_pageout_scan_reclaimed_throttled, 1); |
3661 | |
3662 | inactive_burst_count = 0; |
3663 | |
3664 | lock_yield_check = TRUE; |
3665 | continue; |
3666 | } |
3667 | if (inactive_throttled == TRUE) { |
3668 | vps_deal_with_throttled_queues(m, object: &object, vm_pageout_inactive_external_forced_reactivate_limit: &vm_pageout_inactive_external_forced_reactivate_limit, |
3669 | force_anonymous: &force_anonymous, is_page_from_bg_q: page_from_bg_q); |
3670 | |
3671 | inactive_burst_count = 0; |
3672 | |
3673 | if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) { |
3674 | VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1); |
3675 | } |
3676 | |
3677 | lock_yield_check = TRUE; |
3678 | continue; |
3679 | } |
3680 | |
3681 | /* |
3682 | * we've got a page that we can steal... |
3683 | * eliminate all mappings and make sure |
3684 | * we have the up-to-date modified state |
3685 | * |
3686 | * if we need to do a pmap_disconnect then we |
3687 | * need to re-evaluate m->vmp_dirty since the pmap_disconnect |
3688 | * provides the true state atomically... the |
3689 | * page was still mapped up to the pmap_disconnect |
3690 | * and may have been dirtied at the last microsecond |
3691 | * |
3692 | * Note that if 'pmapped' is FALSE then the page is not |
3693 | * and has not been in any map, so there is no point calling |
3694 | * pmap_disconnect(). m->vmp_dirty could have been set in anticipation |
3695 | * of likely usage of the page. |
3696 | */ |
3697 | if (m->vmp_pmapped == TRUE) { |
3698 | int pmap_options; |
3699 | |
3700 | /* |
3701 | * Don't count this page as going into the compressor |
3702 | * if any of these are true: |
3703 | * 1) compressed pager isn't enabled |
3704 | * 2) Freezer enabled device with compressed pager |
3705 | * backend (exclusive use) i.e. most of the VM system |
3706 | * (including vm_pageout_scan) has no knowledge of |
3707 | * the compressor |
3708 | * 3) This page belongs to a file and hence will not be |
3709 | * sent into the compressor |
3710 | */ |
3711 | if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE || |
3712 | object->internal == FALSE) { |
3713 | pmap_options = 0; |
3714 | } else if (m->vmp_dirty || m->vmp_precious) { |
3715 | /* |
3716 | * VM knows that this page is dirty (or |
3717 | * precious) and needs to be compressed |
3718 | * rather than freed. |
3719 | * Tell the pmap layer to count this page |
3720 | * as "compressed". |
3721 | */ |
3722 | pmap_options = PMAP_OPTIONS_COMPRESSOR; |
3723 | } else { |
3724 | /* |
3725 | * VM does not know if the page needs to |
3726 | * be preserved but the pmap layer might tell |
3727 | * us if any mapping has "modified" it. |
3728 | * Let's the pmap layer to count this page |
3729 | * as compressed if and only if it has been |
3730 | * modified. |
3731 | */ |
3732 | pmap_options = |
3733 | PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED; |
3734 | } |
3735 | refmod_state = pmap_disconnect_options(phys: VM_PAGE_GET_PHYS_PAGE(m), |
3736 | options: pmap_options, |
3737 | NULL); |
3738 | if (refmod_state & VM_MEM_MODIFIED) { |
3739 | SET_PAGE_DIRTY(m, FALSE); |
3740 | } |
3741 | } |
3742 | |
3743 | /* |
3744 | * reset our count of pages that have been reclaimed |
3745 | * since the last page was 'stolen' |
3746 | */ |
3747 | inactive_reclaim_run = 0; |
3748 | |
3749 | /* |
3750 | * If it's clean and not precious, we can free the page. |
3751 | */ |
3752 | if (!m->vmp_dirty && !m->vmp_precious) { |
3753 | vm_pageout_state.vm_pageout_inactive_clean++; |
3754 | |
3755 | /* |
3756 | * OK, at this point we have found a page we are going to free. |
3757 | */ |
3758 | #if CONFIG_PHANTOM_CACHE |
3759 | if (!object->internal) { |
3760 | vm_phantom_cache_add_ghost(m); |
3761 | } |
3762 | #endif |
3763 | goto reclaim_page; |
3764 | } |
3765 | |
3766 | /* |
3767 | * The page may have been dirtied since the last check |
3768 | * for a throttled target queue (which may have been skipped |
3769 | * if the page was clean then). With the dirty page |
3770 | * disconnected here, we can make one final check. |
3771 | */ |
3772 | if (object->internal) { |
3773 | if (VM_PAGE_Q_THROTTLED(iq)) { |
3774 | inactive_throttled = TRUE; |
3775 | } |
3776 | } else if (VM_PAGE_Q_THROTTLED(eq)) { |
3777 | inactive_throttled = TRUE; |
3778 | } |
3779 | |
3780 | if (inactive_throttled == TRUE) { |
3781 | goto throttle_inactive; |
3782 | } |
3783 | |
3784 | #if VM_PRESSURE_EVENTS |
3785 | #if CONFIG_JETSAM |
3786 | |
3787 | /* |
3788 | * If Jetsam is enabled, then the sending |
3789 | * of memory pressure notifications is handled |
3790 | * from the same thread that takes care of high-water |
3791 | * and other jetsams i.e. the memorystatus_thread. |
3792 | */ |
3793 | |
3794 | #else /* CONFIG_JETSAM */ |
3795 | |
3796 | vm_pressure_response(); |
3797 | |
3798 | #endif /* CONFIG_JETSAM */ |
3799 | #endif /* VM_PRESSURE_EVENTS */ |
3800 | |
3801 | if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) { |
3802 | VM_PAGEOUT_DEBUG(vm_pageout_speculative_dirty, 1); |
3803 | } |
3804 | |
3805 | if (object->internal) { |
3806 | vm_pageout_vminfo.vm_pageout_inactive_dirty_internal++; |
3807 | } else { |
3808 | vm_pageout_vminfo.vm_pageout_inactive_dirty_external++; |
3809 | } |
3810 | |
3811 | /* |
3812 | * internal pages will go to the compressor... |
3813 | * external pages will go to the appropriate pager to be cleaned |
3814 | * and upon completion will end up on 'vm_page_queue_cleaned' which |
3815 | * is a preferred queue to steal from |
3816 | */ |
3817 | vm_pageout_cluster(m); |
3818 | inactive_burst_count = 0; |
3819 | |
3820 | /* |
3821 | * back to top of pageout scan loop |
3822 | */ |
3823 | } |
3824 | } |
3825 | |
3826 | |
3827 | void |
3828 | vm_page_free_reserve( |
3829 | int pages) |
3830 | { |
3831 | int free_after_reserve; |
3832 | |
3833 | if (VM_CONFIG_COMPRESSOR_IS_PRESENT) { |
3834 | if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT)) { |
3835 | vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT; |
3836 | } else { |
3837 | vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT); |
3838 | } |
3839 | } else { |
3840 | if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT) { |
3841 | vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT; |
3842 | } else { |
3843 | vm_page_free_reserved += pages; |
3844 | } |
3845 | } |
3846 | free_after_reserve = vm_pageout_state.vm_page_free_count_init - vm_page_free_reserved; |
3847 | |
3848 | vm_page_free_min = vm_page_free_reserved + |
3849 | VM_PAGE_FREE_MIN(free_after_reserve); |
3850 | |
3851 | if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT) { |
3852 | vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT; |
3853 | } |
3854 | |
3855 | vm_page_free_target = vm_page_free_reserved + |
3856 | VM_PAGE_FREE_TARGET(free_after_reserve); |
3857 | |
3858 | if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT) { |
3859 | vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT; |
3860 | } |
3861 | |
3862 | if (vm_page_free_target < vm_page_free_min + 5) { |
3863 | vm_page_free_target = vm_page_free_min + 5; |
3864 | } |
3865 | |
3866 | vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2); |
3867 | } |
3868 | |
3869 | /* |
3870 | * vm_pageout is the high level pageout daemon. |
3871 | */ |
3872 | |
3873 | void |
3874 | vm_pageout_continue(void) |
3875 | { |
3876 | DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL); |
3877 | VM_PAGEOUT_DEBUG(vm_pageout_scan_event_counter, 1); |
3878 | |
3879 | vm_free_page_lock(); |
3880 | vm_pageout_running = TRUE; |
3881 | vm_free_page_unlock(); |
3882 | |
3883 | vm_pageout_scan(); |
3884 | /* |
3885 | * we hold both the vm_page_queue_free_lock |
3886 | * and the vm_page_queues_lock at this point |
3887 | */ |
3888 | assert(vm_page_free_wanted == 0); |
3889 | assert(vm_page_free_wanted_privileged == 0); |
3890 | assert_wait(event: (event_t) &vm_page_free_wanted, THREAD_UNINT); |
3891 | |
3892 | vm_pageout_running = FALSE; |
3893 | #if XNU_TARGET_OS_OSX |
3894 | if (vm_pageout_waiter) { |
3895 | vm_pageout_waiter = FALSE; |
3896 | thread_wakeup((event_t)&vm_pageout_waiter); |
3897 | } |
3898 | #endif /* XNU_TARGET_OS_OSX */ |
3899 | |
3900 | vm_free_page_unlock(); |
3901 | vm_page_unlock_queues(); |
3902 | |
3903 | thread_block(continuation: (thread_continue_t)vm_pageout_continue); |
3904 | /*NOTREACHED*/ |
3905 | } |
3906 | |
3907 | #if XNU_TARGET_OS_OSX |
3908 | kern_return_t |
3909 | vm_pageout_wait(uint64_t deadline) |
3910 | { |
3911 | kern_return_t kr; |
3912 | |
3913 | vm_free_page_lock(); |
3914 | for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr);) { |
3915 | vm_pageout_waiter = TRUE; |
3916 | if (THREAD_AWAKENED != lck_mtx_sleep_deadline( |
3917 | lck: &vm_page_queue_free_lock, lck_sleep_action: LCK_SLEEP_DEFAULT, |
3918 | event: (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) { |
3919 | kr = KERN_OPERATION_TIMED_OUT; |
3920 | } |
3921 | } |
3922 | vm_free_page_unlock(); |
3923 | |
3924 | return kr; |
3925 | } |
3926 | #endif /* XNU_TARGET_OS_OSX */ |
3927 | |
3928 | OS_NORETURN |
3929 | static void |
3930 | vm_pageout_iothread_external_continue(struct pgo_iothread_state *ethr, __unused wait_result_t w) |
3931 | { |
3932 | vm_page_t m = NULL; |
3933 | vm_object_t object; |
3934 | vm_object_offset_t offset; |
3935 | memory_object_t ; |
3936 | struct vm_pageout_queue *q = ethr->q; |
3937 | |
3938 | /* On systems with a compressor, the external IO thread clears its |
3939 | * VM privileged bit to accommodate large allocations (e.g. bulk UPL |
3940 | * creation) |
3941 | */ |
3942 | if (VM_CONFIG_COMPRESSOR_IS_PRESENT) { |
3943 | current_thread()->options &= ~TH_OPT_VMPRIV; |
3944 | } |
3945 | |
3946 | sched_cond_ack(cond: &(ethr->pgo_wakeup)); |
3947 | |
3948 | while (true) { |
3949 | vm_page_lockspin_queues(); |
3950 | |
3951 | while (!vm_page_queue_empty(&q->pgo_pending)) { |
3952 | q->pgo_busy = TRUE; |
3953 | vm_page_queue_remove_first(&q->pgo_pending, m, vmp_pageq); |
3954 | |
3955 | assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q); |
3956 | VM_PAGE_CHECK(m); |
3957 | /* |
3958 | * grab a snapshot of the object and offset this |
3959 | * page is tabled in so that we can relookup this |
3960 | * page after we've taken the object lock - these |
3961 | * fields are stable while we hold the page queues lock |
3962 | * but as soon as we drop it, there is nothing to keep |
3963 | * this page in this object... we hold an activity_in_progress |
3964 | * on this object which will keep it from terminating |
3965 | */ |
3966 | object = VM_PAGE_OBJECT(m); |
3967 | offset = m->vmp_offset; |
3968 | |
3969 | m->vmp_q_state = VM_PAGE_NOT_ON_Q; |
3970 | VM_PAGE_ZERO_PAGEQ_ENTRY(m); |
3971 | |
3972 | vm_page_unlock_queues(); |
3973 | |
3974 | vm_object_lock(object); |
3975 | |
3976 | m = vm_page_lookup(object, offset); |
3977 | |
3978 | if (m == NULL || m->vmp_busy || m->vmp_cleaning || |
3979 | !m->vmp_laundry || (m->vmp_q_state != VM_PAGE_NOT_ON_Q)) { |
3980 | /* |
3981 | * it's either the same page that someone else has |
3982 | * started cleaning (or it's finished cleaning or |
3983 | * been put back on the pageout queue), or |
3984 | * the page has been freed or we have found a |
3985 | * new page at this offset... in all of these cases |
3986 | * we merely need to release the activity_in_progress |
3987 | * we took when we put the page on the pageout queue |
3988 | */ |
3989 | vm_object_activity_end(object); |
3990 | vm_object_unlock(object); |
3991 | |
3992 | vm_page_lockspin_queues(); |
3993 | continue; |
3994 | } |
3995 | pager = object->pager; |
3996 | |
3997 | if (pager == MEMORY_OBJECT_NULL) { |
3998 | /* |
3999 | * This pager has been destroyed by either |
4000 | * memory_object_destroy or vm_object_destroy, and |
4001 | * so there is nowhere for the page to go. |
4002 | */ |
4003 | if (m->vmp_free_when_done) { |
4004 | /* |
4005 | * Just free the page... VM_PAGE_FREE takes |
4006 | * care of cleaning up all the state... |
4007 | * including doing the vm_pageout_throttle_up |
4008 | */ |
4009 | VM_PAGE_FREE(m); |
4010 | } else { |
4011 | vm_page_lockspin_queues(); |
4012 | |
4013 | vm_pageout_throttle_up(m); |
4014 | vm_page_activate(page: m); |
4015 | |
4016 | vm_page_unlock_queues(); |
4017 | |
4018 | /* |
4019 | * And we are done with it. |
4020 | */ |
4021 | } |
4022 | vm_object_activity_end(object); |
4023 | vm_object_unlock(object); |
4024 | |
4025 | vm_page_lockspin_queues(); |
4026 | continue; |
4027 | } |
4028 | #if 0 |
4029 | /* |
4030 | * we don't hold the page queue lock |
4031 | * so this check isn't safe to make |
4032 | */ |
4033 | VM_PAGE_CHECK(m); |
4034 | #endif |
4035 | /* |
4036 | * give back the activity_in_progress reference we |
4037 | * took when we queued up this page and replace it |
4038 | * it with a paging_in_progress reference that will |
4039 | * also hold the paging offset from changing and |
4040 | * prevent the object from terminating |
4041 | */ |
4042 | vm_object_activity_end(object); |
4043 | vm_object_paging_begin(object); |
4044 | vm_object_unlock(object); |
4045 | |
4046 | /* |
4047 | * Send the data to the pager. |
4048 | * any pageout clustering happens there |
4049 | */ |
4050 | memory_object_data_return(memory_object: pager, |
4051 | offset: m->vmp_offset + object->paging_offset, |
4052 | PAGE_SIZE, |
4053 | NULL, |
4054 | NULL, |
4055 | FALSE, |
4056 | FALSE, |
4057 | upl_flags: 0); |
4058 | |
4059 | vm_object_lock(object); |
4060 | vm_object_paging_end(object); |
4061 | vm_object_unlock(object); |
4062 | |
4063 | vm_pageout_io_throttle(); |
4064 | |
4065 | vm_page_lockspin_queues(); |
4066 | } |
4067 | q->pgo_busy = FALSE; |
4068 | |
4069 | vm_page_unlock_queues(); |
4070 | sched_cond_wait_parameter(cond: &(ethr->pgo_wakeup), THREAD_UNINT, continuation: (thread_continue_t)vm_pageout_iothread_external_continue, parameter: ethr); |
4071 | } |
4072 | /*NOTREACHED*/ |
4073 | } |
4074 | |
4075 | |
4076 | #define MAX_FREE_BATCH 32 |
4077 | uint32_t vm_compressor_time_thread; /* Set via sysctl to record time accrued by |
4078 | * this thread. |
4079 | */ |
4080 | |
4081 | |
4082 | OS_NORETURN |
4083 | static void |
4084 | vm_pageout_iothread_internal_continue(struct pgo_iothread_state *cq, __unused wait_result_t w) |
4085 | { |
4086 | struct vm_pageout_queue *q; |
4087 | vm_page_t m = NULL; |
4088 | boolean_t pgo_draining; |
4089 | vm_page_t local_q; |
4090 | int local_cnt; |
4091 | vm_page_t local_freeq = NULL; |
4092 | int local_freed = 0; |
4093 | int local_batch_size; |
4094 | #if DEVELOPMENT || DEBUG |
4095 | int ncomps = 0; |
4096 | boolean_t marked_active = FALSE; |
4097 | int num_pages_processed = 0; |
4098 | #endif |
4099 | void *chead = NULL; |
4100 | |
4101 | KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0); |
4102 | |
4103 | sched_cond_ack(cond: &(cq->pgo_wakeup)); |
4104 | |
4105 | q = cq->q; |
4106 | |
4107 | while (true) { |
4108 | #if DEVELOPMENT || DEBUG |
4109 | bool benchmark_accounting = false; |
4110 | /* |
4111 | * If we're running the compressor perf test, only process the benchmark pages. |
4112 | * We'll get back to our regular queue once the benchmark is done |
4113 | */ |
4114 | if (compressor_running_perf_test) { |
4115 | q = cq->benchmark_q; |
4116 | if (!vm_page_queue_empty(&q->pgo_pending)) { |
4117 | benchmark_accounting = true; |
4118 | } else { |
4119 | q = cq->q; |
4120 | benchmark_accounting = false; |
4121 | } |
4122 | } |
4123 | #endif /* DEVELOPMENT || DEBUG */ |
4124 | |
4125 | #if __AMP__ |
4126 | if (vm_compressor_ebound && (vm_pageout_state.vm_compressor_thread_count > 1)) { |
4127 | local_batch_size = (q->pgo_maxlaundry >> 3); |
4128 | local_batch_size = MAX(local_batch_size, 16); |
4129 | } else { |
4130 | local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2); |
4131 | } |
4132 | #else |
4133 | local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2); |
4134 | #endif |
4135 | |
4136 | #if RECORD_THE_COMPRESSED_DATA |
4137 | if (q->pgo_laundry) { |
4138 | c_compressed_record_init(); |
4139 | } |
4140 | #endif |
4141 | while (true) { |
4142 | int pages_left_on_q = 0; |
4143 | |
4144 | local_cnt = 0; |
4145 | local_q = NULL; |
4146 | |
4147 | KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0); |
4148 | |
4149 | vm_page_lock_queues(); |
4150 | #if DEVELOPMENT || DEBUG |
4151 | if (marked_active == FALSE) { |
4152 | vmct_active++; |
4153 | vmct_state[cq->id] = VMCT_ACTIVE; |
4154 | marked_active = TRUE; |
4155 | if (vmct_active == 1) { |
4156 | vm_compressor_epoch_start = mach_absolute_time(); |
4157 | } |
4158 | } |
4159 | #endif |
4160 | KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0); |
4161 | |
4162 | KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0); |
4163 | |
4164 | while (!vm_page_queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) { |
4165 | vm_page_queue_remove_first(&q->pgo_pending, m, vmp_pageq); |
4166 | assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q); |
4167 | VM_PAGE_CHECK(m); |
4168 | |
4169 | m->vmp_q_state = VM_PAGE_NOT_ON_Q; |
4170 | VM_PAGE_ZERO_PAGEQ_ENTRY(m); |
4171 | m->vmp_laundry = FALSE; |
4172 | |
4173 | m->vmp_snext = local_q; |
4174 | local_q = m; |
4175 | local_cnt++; |
4176 | } |
4177 | if (local_q == NULL) { |
4178 | break; |
4179 | } |
4180 | |
4181 | q->pgo_busy = TRUE; |
4182 | |
4183 | if ((pgo_draining = q->pgo_draining) == FALSE) { |
4184 | vm_pageout_throttle_up_batch(q, batch_cnt: local_cnt); |
4185 | pages_left_on_q = q->pgo_laundry; |
4186 | } else { |
4187 | pages_left_on_q = q->pgo_laundry - local_cnt; |
4188 | } |
4189 | |
4190 | vm_page_unlock_queues(); |
4191 | |
4192 | #if !RECORD_THE_COMPRESSED_DATA |
4193 | if (pages_left_on_q >= local_batch_size && cq->id < (vm_pageout_state.vm_compressor_thread_count - 1)) { |
4194 | // wake up the next compressor thread |
4195 | sched_cond_signal(cond: &pgo_iothread_internal_state[cq->id + 1].pgo_wakeup, |
4196 | thread: pgo_iothread_internal_state[cq->id + 1].pgo_iothread); |
4197 | } |
4198 | #endif |
4199 | KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0); |
4200 | |
4201 | while (local_q) { |
4202 | KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0); |
4203 | |
4204 | m = local_q; |
4205 | local_q = m->vmp_snext; |
4206 | m->vmp_snext = NULL; |
4207 | |
4208 | /* |
4209 | * Technically we need the pageq locks to manipulate this field. |
4210 | * However, this page has been removed from all queues and is only |
4211 | * known to this compressor thread dealing with this local queue. |
4212 | * |
4213 | * TODO LIONEL: Add a second localq that is the early localq and |
4214 | * put special pages like this one on that queue in the block above |
4215 | * under the pageq lock to avoid this 'works but not clean' logic. |
4216 | */ |
4217 | void *donate_queue_head; |
4218 | #if XNU_TARGET_OS_OSX |
4219 | donate_queue_head = &cq->current_early_swapout_chead; |
4220 | #else /* XNU_TARGET_OS_OSX */ |
4221 | donate_queue_head = &cq->current_late_swapout_chead; |
4222 | #endif /* XNU_TARGET_OS_OSX */ |
4223 | if (m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE) { |
4224 | m->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY; |
4225 | chead = donate_queue_head; |
4226 | } else { |
4227 | chead = &cq->current_regular_swapout_chead; |
4228 | } |
4229 | |
4230 | if (vm_pageout_compress_page(chead, cq->scratch_buf, m) == KERN_SUCCESS) { |
4231 | #if DEVELOPMENT || DEBUG |
4232 | ncomps++; |
4233 | #endif |
4234 | KERNEL_DEBUG(0xe0400024 | DBG_FUNC_END, local_cnt, 0, 0, 0, 0); |
4235 | |
4236 | m->vmp_snext = local_freeq; |
4237 | local_freeq = m; |
4238 | local_freed++; |
4239 | |
4240 | if (local_freed >= MAX_FREE_BATCH) { |
4241 | OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions); |
4242 | |
4243 | vm_page_free_list(mem: local_freeq, TRUE); |
4244 | |
4245 | local_freeq = NULL; |
4246 | local_freed = 0; |
4247 | } |
4248 | } |
4249 | #if DEVELOPMENT || DEBUG |
4250 | num_pages_processed++; |
4251 | #endif /* DEVELOPMENT || DEBUG */ |
4252 | #if !CONFIG_JETSAM |
4253 | while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) { |
4254 | kern_return_t wait_result; |
4255 | int need_wakeup = 0; |
4256 | |
4257 | if (local_freeq) { |
4258 | OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions); |
4259 | |
4260 | vm_page_free_list(mem: local_freeq, TRUE); |
4261 | local_freeq = NULL; |
4262 | local_freed = 0; |
4263 | |
4264 | continue; |
4265 | } |
4266 | vm_free_page_lock_spin(); |
4267 | |
4268 | if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) { |
4269 | if (vm_page_free_wanted_privileged++ == 0) { |
4270 | need_wakeup = 1; |
4271 | } |
4272 | wait_result = assert_wait(event: (event_t)&vm_page_free_wanted_privileged, THREAD_UNINT); |
4273 | |
4274 | vm_free_page_unlock(); |
4275 | |
4276 | if (need_wakeup) { |
4277 | thread_wakeup((event_t)&vm_page_free_wanted); |
4278 | } |
4279 | |
4280 | if (wait_result == THREAD_WAITING) { |
4281 | thread_block(THREAD_CONTINUE_NULL); |
4282 | } |
4283 | } else { |
4284 | vm_free_page_unlock(); |
4285 | } |
4286 | } |
4287 | #endif |
4288 | } |
4289 | if (local_freeq) { |
4290 | OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions); |
4291 | |
4292 | vm_page_free_list(mem: local_freeq, TRUE); |
4293 | local_freeq = NULL; |
4294 | local_freed = 0; |
4295 | } |
4296 | if (pgo_draining == TRUE) { |
4297 | vm_page_lockspin_queues(); |
4298 | vm_pageout_throttle_up_batch(q, batch_cnt: local_cnt); |
4299 | vm_page_unlock_queues(); |
4300 | } |
4301 | } |
4302 | KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0); |
4303 | |
4304 | /* |
4305 | * queue lock is held and our q is empty |
4306 | */ |
4307 | q->pgo_busy = FALSE; |
4308 | #if DEVELOPMENT || DEBUG |
4309 | if (marked_active == TRUE) { |
4310 | vmct_active--; |
4311 | vmct_state[cq->id] = VMCT_IDLE; |
4312 | |
4313 | if (vmct_active == 0) { |
4314 | vm_compressor_epoch_stop = mach_absolute_time(); |
4315 | assertf(vm_compressor_epoch_stop >= vm_compressor_epoch_start, |
4316 | "Compressor epoch non-monotonic: 0x%llx -> 0x%llx" , |
4317 | vm_compressor_epoch_start, vm_compressor_epoch_stop); |
4318 | /* This interval includes intervals where one or more |
4319 | * compressor threads were pre-empted |
4320 | */ |
4321 | vmct_stats.vmct_cthreads_total += vm_compressor_epoch_stop - vm_compressor_epoch_start; |
4322 | } |
4323 | } |
4324 | if (compressor_running_perf_test && benchmark_accounting) { |
4325 | /* |
4326 | * We could turn ON compressor_running_perf_test while still processing |
4327 | * regular non-benchmark pages. We shouldn't count them here else we |
4328 | * could overshoot. We might also still be populating that benchmark Q |
4329 | * and be under pressure. So we will go back to the regular queues. And |
4330 | * benchmark accounting will be off for that case too. |
4331 | */ |
4332 | compressor_perf_test_pages_processed += num_pages_processed; |
4333 | thread_wakeup(&compressor_perf_test_pages_processed); |
4334 | } |
4335 | #endif |
4336 | vm_page_unlock_queues(); |
4337 | #if DEVELOPMENT || DEBUG |
4338 | if (__improbable(vm_compressor_time_thread)) { |
4339 | vmct_stats.vmct_runtimes[cq->id] = thread_get_runtime_self(); |
4340 | vmct_stats.vmct_pages[cq->id] += ncomps; |
4341 | vmct_stats.vmct_iterations[cq->id]++; |
4342 | if (ncomps > vmct_stats.vmct_maxpages[cq->id]) { |
4343 | vmct_stats.vmct_maxpages[cq->id] = ncomps; |
4344 | } |
4345 | if (ncomps < vmct_stats.vmct_minpages[cq->id]) { |
4346 | vmct_stats.vmct_minpages[cq->id] = ncomps; |
4347 | } |
4348 | } |
4349 | #endif |
4350 | |
4351 | KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0); |
4352 | #if DEVELOPMENT || DEBUG |
4353 | if (compressor_running_perf_test && benchmark_accounting) { |
4354 | /* |
4355 | * We've been exclusively compressing pages from the benchmark queue, |
4356 | * do 1 pass over the internal queue before blocking. |
4357 | */ |
4358 | continue; |
4359 | } |
4360 | #endif |
4361 | |
4362 | sched_cond_wait_parameter(cond: &(cq->pgo_wakeup), THREAD_UNINT, continuation: (thread_continue_t)vm_pageout_iothread_internal_continue, parameter: (void *) cq); |
4363 | } |
4364 | /*NOTREACHED*/ |
4365 | } |
4366 | |
4367 | |
4368 | kern_return_t |
4369 | vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m) |
4370 | { |
4371 | vm_object_t object; |
4372 | memory_object_t ; |
4373 | int compressed_count_delta; |
4374 | kern_return_t retval; |
4375 | |
4376 | object = VM_PAGE_OBJECT(m); |
4377 | |
4378 | assert(!m->vmp_free_when_done); |
4379 | assert(!m->vmp_laundry); |
4380 | |
4381 | pager = object->pager; |
4382 | |
4383 | if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) { |
4384 | KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0); |
4385 | |
4386 | vm_object_lock(object); |
4387 | |
4388 | /* |
4389 | * If there is no memory object for the page, create |
4390 | * one and hand it to the compression pager. |
4391 | */ |
4392 | |
4393 | if (!object->pager_initialized) { |
4394 | vm_object_collapse(object, offset: (vm_object_offset_t) 0, TRUE); |
4395 | } |
4396 | if (!object->pager_initialized) { |
4397 | vm_object_compressor_pager_create(object); |
4398 | } |
4399 | |
4400 | pager = object->pager; |
4401 | |
4402 | if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) { |
4403 | /* |
4404 | * Still no pager for the object, |
4405 | * or the pager has been destroyed. |
4406 | * Reactivate the page. |
4407 | * |
4408 | * Should only happen if there is no |
4409 | * compression pager |
4410 | */ |
4411 | PAGE_WAKEUP_DONE(m); |
4412 | |
4413 | vm_page_lockspin_queues(); |
4414 | vm_page_activate(page: m); |
4415 | VM_PAGEOUT_DEBUG(vm_pageout_dirty_no_pager, 1); |
4416 | vm_page_unlock_queues(); |
4417 | |
4418 | /* |
4419 | * And we are done with it. |
4420 | */ |
4421 | vm_object_activity_end(object); |
4422 | vm_object_unlock(object); |
4423 | |
4424 | return KERN_FAILURE; |
4425 | } |
4426 | vm_object_unlock(object); |
4427 | |
4428 | KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0); |
4429 | } |
4430 | assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL); |
4431 | assert(object->activity_in_progress > 0); |
4432 | |
4433 | #if CONFIG_TRACK_UNMODIFIED_ANON_PAGES |
4434 | if (m->vmp_unmodified_ro == true) { |
4435 | os_atomic_inc(&compressor_ro_uncompressed_total_returned, relaxed); |
4436 | } |
4437 | #endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */ |
4438 | |
4439 | retval = vm_compressor_pager_put( |
4440 | mem_obj: pager, |
4441 | offset: m->vmp_offset + object->paging_offset, |
4442 | ppnum: VM_PAGE_GET_PHYS_PAGE(m), |
4443 | #if CONFIG_TRACK_UNMODIFIED_ANON_PAGES |
4444 | m->vmp_unmodified_ro, |
4445 | #else /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */ |
4446 | false, |
4447 | #endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */ |
4448 | current_chead, |
4449 | scratch_buf, |
4450 | compressed_count_delta_p: &compressed_count_delta); |
4451 | |
4452 | vm_object_lock(object); |
4453 | |
4454 | assert(object->activity_in_progress > 0); |
4455 | assert(VM_PAGE_OBJECT(m) == object); |
4456 | assert( !VM_PAGE_WIRED(m)); |
4457 | |
4458 | vm_compressor_pager_count(mem_obj: pager, |
4459 | compressed_count_delta, |
4460 | FALSE, /* shared_lock */ |
4461 | object); |
4462 | |
4463 | if (retval == KERN_SUCCESS) { |
4464 | /* |
4465 | * If the object is purgeable, its owner's |
4466 | * purgeable ledgers will be updated in |
4467 | * vm_page_remove() but the page still |
4468 | * contributes to the owner's memory footprint, |
4469 | * so account for it as such. |
4470 | */ |
4471 | if ((object->purgable != VM_PURGABLE_DENY || |
4472 | object->vo_ledger_tag) && |
4473 | object->vo_owner != NULL) { |
4474 | /* one more compressed purgeable/tagged page */ |
4475 | vm_object_owner_compressed_update(object, |
4476 | delta: compressed_count_delta); |
4477 | } |
4478 | counter_inc(&vm_statistics_compressions); |
4479 | |
4480 | if (m->vmp_tabled) { |
4481 | vm_page_remove(page: m, TRUE); |
4482 | } |
4483 | } else { |
4484 | PAGE_WAKEUP_DONE(m); |
4485 | |
4486 | vm_page_lockspin_queues(); |
4487 | |
4488 | vm_page_activate(page: m); |
4489 | vm_pageout_vminfo.vm_compressor_failed++; |
4490 | |
4491 | vm_page_unlock_queues(); |
4492 | } |
4493 | vm_object_activity_end(object); |
4494 | vm_object_unlock(object); |
4495 | |
4496 | return retval; |
4497 | } |
4498 | |
4499 | |
4500 | static void |
4501 | vm_pageout_adjust_eq_iothrottle(struct pgo_iothread_state *ethr, boolean_t req_lowpriority) |
4502 | { |
4503 | uint32_t policy; |
4504 | |
4505 | if (hibernate_cleaning_in_progress == TRUE) { |
4506 | req_lowpriority = FALSE; |
4507 | } |
4508 | |
4509 | if (ethr->q->pgo_inited == TRUE && ethr->q->pgo_lowpriority != req_lowpriority) { |
4510 | vm_page_unlock_queues(); |
4511 | |
4512 | if (req_lowpriority == TRUE) { |
4513 | policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED; |
4514 | DTRACE_VM(laundrythrottle); |
4515 | } else { |
4516 | policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED; |
4517 | DTRACE_VM(laundryunthrottle); |
4518 | } |
4519 | proc_set_thread_policy(thread: ethr->pgo_iothread, |
4520 | TASK_POLICY_EXTERNAL, TASK_POLICY_IO, value: policy); |
4521 | |
4522 | vm_page_lock_queues(); |
4523 | ethr->q->pgo_lowpriority = req_lowpriority; |
4524 | } |
4525 | } |
4526 | |
4527 | OS_NORETURN |
4528 | static void |
4529 | vm_pageout_iothread_external(struct pgo_iothread_state *ethr, __unused wait_result_t w) |
4530 | { |
4531 | thread_t self = current_thread(); |
4532 | |
4533 | self->options |= TH_OPT_VMPRIV; |
4534 | |
4535 | DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL); |
4536 | |
4537 | proc_set_thread_policy(thread: self, TASK_POLICY_EXTERNAL, |
4538 | TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED); |
4539 | |
4540 | vm_page_lock_queues(); |
4541 | |
4542 | vm_pageout_queue_external.pgo_lowpriority = TRUE; |
4543 | vm_pageout_queue_external.pgo_inited = TRUE; |
4544 | |
4545 | vm_page_unlock_queues(); |
4546 | |
4547 | #if CONFIG_THREAD_GROUPS |
4548 | thread_group_vm_add(); |
4549 | #endif /* CONFIG_THREAD_GROUPS */ |
4550 | |
4551 | vm_pageout_iothread_external_continue(ethr, w: 0); |
4552 | /*NOTREACHED*/ |
4553 | } |
4554 | |
4555 | |
4556 | OS_NORETURN |
4557 | static void |
4558 | vm_pageout_iothread_internal(struct pgo_iothread_state *cthr, __unused wait_result_t w) |
4559 | { |
4560 | thread_t self = current_thread(); |
4561 | |
4562 | self->options |= TH_OPT_VMPRIV; |
4563 | |
4564 | vm_page_lock_queues(); |
4565 | |
4566 | vm_pageout_queue_internal.pgo_lowpriority = TRUE; |
4567 | vm_pageout_queue_internal.pgo_inited = TRUE; |
4568 | |
4569 | #if DEVELOPMENT || DEBUG |
4570 | vm_pageout_queue_benchmark.pgo_lowpriority = vm_pageout_queue_internal.pgo_lowpriority; |
4571 | vm_pageout_queue_benchmark.pgo_inited = vm_pageout_queue_internal.pgo_inited; |
4572 | vm_pageout_queue_benchmark.pgo_busy = FALSE; |
4573 | #endif /* DEVELOPMENT || DEBUG */ |
4574 | |
4575 | vm_page_unlock_queues(); |
4576 | |
4577 | if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) { |
4578 | thread_vm_bind_group_add(); |
4579 | } |
4580 | |
4581 | #if CONFIG_THREAD_GROUPS |
4582 | thread_group_vm_add(); |
4583 | #endif /* CONFIG_THREAD_GROUPS */ |
4584 | |
4585 | #if __AMP__ |
4586 | if (vm_compressor_ebound) { |
4587 | /* |
4588 | * Use the soft bound option for vm_compressor to allow it to run on |
4589 | * P-cores if E-cluster is unavailable. |
4590 | */ |
4591 | thread_bind_cluster_type(self, 'E', true); |
4592 | } |
4593 | #endif /* __AMP__ */ |
4594 | |
4595 | thread_set_thread_name(th: current_thread(), name: "VM_compressor" ); |
4596 | #if DEVELOPMENT || DEBUG |
4597 | vmct_stats.vmct_minpages[cthr->id] = INT32_MAX; |
4598 | #endif |
4599 | vm_pageout_iothread_internal_continue(cq: cthr, w: 0); |
4600 | |
4601 | /*NOTREACHED*/ |
4602 | } |
4603 | |
4604 | kern_return_t |
4605 | vm_set_buffer_cleanup_callout(boolean_t (*func)(int)) |
4606 | { |
4607 | if (OSCompareAndSwapPtr(NULL, ptrauth_nop_cast(void *, func), (void * volatile *) &consider_buffer_cache_collect)) { |
4608 | return KERN_SUCCESS; |
4609 | } else { |
4610 | return KERN_FAILURE; /* Already set */ |
4611 | } |
4612 | } |
4613 | |
4614 | extern boolean_t memorystatus_manual_testing_on; |
4615 | extern unsigned int memorystatus_level; |
4616 | |
4617 | |
4618 | #if VM_PRESSURE_EVENTS |
4619 | |
4620 | boolean_t vm_pressure_events_enabled = FALSE; |
4621 | |
4622 | extern uint64_t next_warning_notification_sent_at_ts; |
4623 | extern uint64_t next_critical_notification_sent_at_ts; |
4624 | |
4625 | #define PRESSURE_LEVEL_STUCK_THRESHOLD_MINS (30) /* 30 minutes. */ |
4626 | |
4627 | /* |
4628 | * The last time there was change in pressure level OR we forced a check |
4629 | * because the system is stuck in a non-normal pressure level. |
4630 | */ |
4631 | uint64_t vm_pressure_last_level_transition_abs = 0; |
4632 | |
4633 | /* |
4634 | * This is how the long the system waits 'stuck' in an unchanged non-normal pressure |
4635 | * level before resending out notifications for that level again. |
4636 | */ |
4637 | int vm_pressure_level_transition_threshold = PRESSURE_LEVEL_STUCK_THRESHOLD_MINS; |
4638 | |
4639 | void |
4640 | vm_pressure_response(void) |
4641 | { |
4642 | vm_pressure_level_t old_level = kVMPressureNormal; |
4643 | int new_level = -1; |
4644 | unsigned int total_pages; |
4645 | uint64_t available_memory = 0; |
4646 | uint64_t curr_ts, abs_time_since_level_transition, time_in_ns; |
4647 | bool force_check = false; |
4648 | int time_in_mins; |
4649 | |
4650 | |
4651 | if (vm_pressure_events_enabled == FALSE) { |
4652 | return; |
4653 | } |
4654 | |
4655 | #if !XNU_TARGET_OS_OSX |
4656 | |
4657 | available_memory = (uint64_t) memorystatus_available_pages; |
4658 | |
4659 | #else /* !XNU_TARGET_OS_OSX */ |
4660 | |
4661 | available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY; |
4662 | memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY; |
4663 | |
4664 | #endif /* !XNU_TARGET_OS_OSX */ |
4665 | |
4666 | total_pages = (unsigned int) atop_64(max_mem); |
4667 | #if CONFIG_SECLUDED_MEMORY |
4668 | total_pages -= vm_page_secluded_count; |
4669 | #endif /* CONFIG_SECLUDED_MEMORY */ |
4670 | memorystatus_level = (unsigned int) ((available_memory * 100) / total_pages); |
4671 | |
4672 | if (memorystatus_manual_testing_on) { |
4673 | return; |
4674 | } |
4675 | |
4676 | curr_ts = mach_absolute_time(); |
4677 | abs_time_since_level_transition = curr_ts - vm_pressure_last_level_transition_abs; |
4678 | |
4679 | absolutetime_to_nanoseconds(abstime: abs_time_since_level_transition, result: &time_in_ns); |
4680 | time_in_mins = (int) ((time_in_ns / NSEC_PER_SEC) / 60); |
4681 | force_check = (time_in_mins >= vm_pressure_level_transition_threshold); |
4682 | |
4683 | old_level = memorystatus_vm_pressure_level; |
4684 | |
4685 | switch (memorystatus_vm_pressure_level) { |
4686 | case kVMPressureNormal: |
4687 | { |
4688 | if (VM_PRESSURE_WARNING_TO_CRITICAL()) { |
4689 | new_level = kVMPressureCritical; |
4690 | } else if (VM_PRESSURE_NORMAL_TO_WARNING()) { |
4691 | new_level = kVMPressureWarning; |
4692 | } |
4693 | break; |
4694 | } |
4695 | |
4696 | case kVMPressureWarning: |
4697 | case kVMPressureUrgent: |
4698 | { |
4699 | if (VM_PRESSURE_WARNING_TO_NORMAL()) { |
4700 | new_level = kVMPressureNormal; |
4701 | } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) { |
4702 | new_level = kVMPressureCritical; |
4703 | } else if (force_check) { |
4704 | new_level = kVMPressureWarning; |
4705 | next_warning_notification_sent_at_ts = curr_ts; |
4706 | } |
4707 | break; |
4708 | } |
4709 | |
4710 | case kVMPressureCritical: |
4711 | { |
4712 | if (VM_PRESSURE_WARNING_TO_NORMAL()) { |
4713 | new_level = kVMPressureNormal; |
4714 | } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) { |
4715 | new_level = kVMPressureWarning; |
4716 | } else if (force_check) { |
4717 | new_level = kVMPressureCritical; |
4718 | next_critical_notification_sent_at_ts = curr_ts; |
4719 | } |
4720 | break; |
4721 | } |
4722 | |
4723 | default: |
4724 | return; |
4725 | } |
4726 | |
4727 | if (new_level != -1 || force_check) { |
4728 | if (new_level != -1) { |
4729 | memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level; |
4730 | |
4731 | if (new_level != (int) old_level) { |
4732 | VM_DEBUG_CONSTANT_EVENT(vm_pressure_level_change, VM_PRESSURE_LEVEL_CHANGE, DBG_FUNC_NONE, |
4733 | new_level, old_level, 0, 0); |
4734 | } |
4735 | } else { |
4736 | VM_DEBUG_CONSTANT_EVENT(vm_pressure_level_change, VM_PRESSURE_LEVEL_CHANGE, DBG_FUNC_NONE, |
4737 | new_level, old_level, force_check, 0); |
4738 | } |
4739 | |
4740 | if (hibernation_vmqueues_inspection || hibernate_cleaning_in_progress) { |
4741 | /* |
4742 | * We don't want to schedule a wakeup while hibernation is in progress |
4743 | * because that could collide with checks for non-monotonicity in the scheduler. |
4744 | * We do however do all the updates to memorystatus_vm_pressure_level because |
4745 | * we _might_ want to use that for decisions regarding which pages or how |
4746 | * many pages we want to dump in hibernation. |
4747 | */ |
4748 | return; |
4749 | } |
4750 | |
4751 | if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != memorystatus_vm_pressure_level) || force_check) { |
4752 | if (vm_pageout_state.vm_pressure_thread_running == FALSE) { |
4753 | thread_wakeup(&vm_pressure_thread); |
4754 | } |
4755 | |
4756 | if (old_level != memorystatus_vm_pressure_level) { |
4757 | thread_wakeup(&vm_pageout_state.vm_pressure_changed); |
4758 | } |
4759 | vm_pressure_last_level_transition_abs = curr_ts; /* renew the window of observation for a stuck pressure level */ |
4760 | } |
4761 | } |
4762 | } |
4763 | #endif /* VM_PRESSURE_EVENTS */ |
4764 | |
4765 | |
4766 | /** |
4767 | * Called by a kernel thread to ask if a number of pages may be wired. |
4768 | */ |
4769 | kern_return_t |
4770 | mach_vm_wire_level_monitor(int64_t requested_pages) |
4771 | { |
4772 | if (requested_pages <= 0) { |
4773 | return KERN_INVALID_ARGUMENT; |
4774 | } |
4775 | |
4776 | const int64_t max_wire_pages = atop_64(vm_global_user_wire_limit); |
4777 | /** |
4778 | * Available pages can be negative in the case where more system memory is |
4779 | * wired than the threshold, so we must use a signed integer. |
4780 | */ |
4781 | const int64_t available_pages = max_wire_pages - vm_page_wire_count; |
4782 | |
4783 | if (requested_pages > available_pages) { |
4784 | return KERN_RESOURCE_SHORTAGE; |
4785 | } |
4786 | return KERN_SUCCESS; |
4787 | } |
4788 | |
4789 | /* |
4790 | * Function called by a kernel thread to either get the current pressure level or |
4791 | * wait until memory pressure changes from a given level. |
4792 | */ |
4793 | kern_return_t |
4794 | mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) |
4795 | { |
4796 | #if !VM_PRESSURE_EVENTS |
4797 | |
4798 | return KERN_FAILURE; |
4799 | |
4800 | #else /* VM_PRESSURE_EVENTS */ |
4801 | |
4802 | wait_result_t wr = 0; |
4803 | vm_pressure_level_t old_level = memorystatus_vm_pressure_level; |
4804 | |
4805 | if (pressure_level == NULL) { |
4806 | return KERN_INVALID_ARGUMENT; |
4807 | } |
4808 | |
4809 | if (*pressure_level == kVMPressureJetsam) { |
4810 | if (!wait_for_pressure) { |
4811 | return KERN_INVALID_ARGUMENT; |
4812 | } |
4813 | |
4814 | lck_mtx_lock(lck: &memorystatus_jetsam_fg_band_lock); |
4815 | wr = assert_wait(event: (event_t)&memorystatus_jetsam_fg_band_waiters, |
4816 | THREAD_INTERRUPTIBLE); |
4817 | if (wr == THREAD_WAITING) { |
4818 | ++memorystatus_jetsam_fg_band_waiters; |
4819 | lck_mtx_unlock(lck: &memorystatus_jetsam_fg_band_lock); |
4820 | wr = thread_block(THREAD_CONTINUE_NULL); |
4821 | } else { |
4822 | lck_mtx_unlock(lck: &memorystatus_jetsam_fg_band_lock); |
4823 | } |
4824 | if (wr != THREAD_AWAKENED) { |
4825 | return KERN_ABORTED; |
4826 | } |
4827 | *pressure_level = kVMPressureJetsam; |
4828 | return KERN_SUCCESS; |
4829 | } |
4830 | |
4831 | if (wait_for_pressure == TRUE) { |
4832 | while (old_level == *pressure_level) { |
4833 | wr = assert_wait(event: (event_t) &vm_pageout_state.vm_pressure_changed, |
4834 | THREAD_INTERRUPTIBLE); |
4835 | if (wr == THREAD_WAITING) { |
4836 | wr = thread_block(THREAD_CONTINUE_NULL); |
4837 | } |
4838 | if (wr == THREAD_INTERRUPTED) { |
4839 | return KERN_ABORTED; |
4840 | } |
4841 | |
4842 | if (wr == THREAD_AWAKENED) { |
4843 | old_level = memorystatus_vm_pressure_level; |
4844 | } |
4845 | } |
4846 | } |
4847 | |
4848 | *pressure_level = old_level; |
4849 | return KERN_SUCCESS; |
4850 | #endif /* VM_PRESSURE_EVENTS */ |
4851 | } |
4852 | |
4853 | #if VM_PRESSURE_EVENTS |
4854 | void |
4855 | vm_pressure_thread(void) |
4856 | { |
4857 | static boolean_t thread_initialized = FALSE; |
4858 | |
4859 | if (thread_initialized == TRUE) { |
4860 | vm_pageout_state.vm_pressure_thread_running = TRUE; |
4861 | consider_vm_pressure_events(); |
4862 | vm_pageout_state.vm_pressure_thread_running = FALSE; |
4863 | } |
4864 | |
4865 | #if CONFIG_THREAD_GROUPS |
4866 | thread_group_vm_add(); |
4867 | #endif /* CONFIG_THREAD_GROUPS */ |
4868 | |
4869 | thread_set_thread_name(th: current_thread(), name: "VM_pressure" ); |
4870 | thread_initialized = TRUE; |
4871 | assert_wait(event: (event_t) &vm_pressure_thread, THREAD_UNINT); |
4872 | thread_block(continuation: (thread_continue_t)vm_pressure_thread); |
4873 | } |
4874 | #endif /* VM_PRESSURE_EVENTS */ |
4875 | |
4876 | |
4877 | /* |
4878 | * called once per-second via "compute_averages" |
4879 | */ |
4880 | void |
4881 | compute_pageout_gc_throttle(__unused void *arg) |
4882 | { |
4883 | if (vm_pageout_vminfo.vm_pageout_considered_page != vm_pageout_state.vm_pageout_considered_page_last) { |
4884 | vm_pageout_state.vm_pageout_considered_page_last = vm_pageout_vminfo.vm_pageout_considered_page; |
4885 | |
4886 | thread_wakeup(VM_PAGEOUT_GC_EVENT); |
4887 | } |
4888 | } |
4889 | |
4890 | /* |
4891 | * vm_pageout_garbage_collect can also be called when the zone allocator needs |
4892 | * to call zone_gc on a different thread in order to trigger zone-map-exhaustion |
4893 | * jetsams. We need to check if the zone map size is above its jetsam limit to |
4894 | * decide if this was indeed the case. |
4895 | * |
4896 | * We need to do this on a different thread because of the following reasons: |
4897 | * |
4898 | * 1. In the case of synchronous jetsams, the leaking process can try to jetsam |
4899 | * itself causing the system to hang. We perform synchronous jetsams if we're |
4900 | * leaking in the VM map entries zone, so the leaking process could be doing a |
4901 | * zalloc for a VM map entry while holding its vm_map lock, when it decides to |
4902 | * jetsam itself. We also need the vm_map lock on the process termination path, |
4903 | * which would now lead the dying process to deadlock against itself. |
4904 | * |
4905 | * 2. The jetsam path might need to allocate zone memory itself. We could try |
4906 | * using the non-blocking variant of zalloc for this path, but we can still |
4907 | * end up trying to do a kmem_alloc when the zone maps are almost full. |
4908 | */ |
4909 | __dead2 |
4910 | void |
4911 | vm_pageout_garbage_collect(void *step, wait_result_t wr __unused) |
4912 | { |
4913 | assert(step == VM_PAGEOUT_GC_INIT || step == VM_PAGEOUT_GC_COLLECT); |
4914 | |
4915 | if (step == VM_PAGEOUT_GC_INIT) { |
4916 | /* first time being called is not about GC */ |
4917 | #if CONFIG_THREAD_GROUPS |
4918 | thread_group_vm_add(); |
4919 | #endif /* CONFIG_THREAD_GROUPS */ |
4920 | } else if (zone_map_nearing_exhaustion()) { |
4921 | /* |
4922 | * Woken up by the zone allocator for zone-map-exhaustion jetsams. |
4923 | * |
4924 | * Bail out after calling zone_gc (which triggers the |
4925 | * zone-map-exhaustion jetsams). If we fall through, the subsequent |
4926 | * operations that clear out a bunch of caches might allocate zone |
4927 | * memory themselves (for eg. vm_map operations would need VM map |
4928 | * entries). Since the zone map is almost full at this point, we |
4929 | * could end up with a panic. We just need to quickly jetsam a |
4930 | * process and exit here. |
4931 | * |
4932 | * It could so happen that we were woken up to relieve memory |
4933 | * pressure and the zone map also happened to be near its limit at |
4934 | * the time, in which case we'll skip out early. But that should be |
4935 | * ok; if memory pressure persists, the thread will simply be woken |
4936 | * up again. |
4937 | */ |
4938 | zone_gc(level: ZONE_GC_JETSAM); |
4939 | } else { |
4940 | /* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */ |
4941 | boolean_t buf_large_zfree = FALSE; |
4942 | boolean_t first_try = TRUE; |
4943 | |
4944 | stack_collect(); |
4945 | |
4946 | consider_machine_collect(); |
4947 | #if CONFIG_MBUF_MCACHE |
4948 | mbuf_drain(FALSE); |
4949 | #endif /* CONFIG_MBUF_MCACHE */ |
4950 | |
4951 | do { |
4952 | if (consider_buffer_cache_collect != NULL) { |
4953 | buf_large_zfree = (*consider_buffer_cache_collect)(0); |
4954 | } |
4955 | if (first_try == TRUE || buf_large_zfree == TRUE) { |
4956 | /* |
4957 | * zone_gc should be last, because the other operations |
4958 | * might return memory to zones. |
4959 | */ |
4960 | zone_gc(level: ZONE_GC_TRIM); |
4961 | } |
4962 | first_try = FALSE; |
4963 | } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target); |
4964 | |
4965 | consider_machine_adjust(); |
4966 | } |
4967 | |
4968 | assert_wait(VM_PAGEOUT_GC_EVENT, THREAD_UNINT); |
4969 | |
4970 | thread_block_parameter(continuation: vm_pageout_garbage_collect, VM_PAGEOUT_GC_COLLECT); |
4971 | __builtin_unreachable(); |
4972 | } |
4973 | |
4974 | |
4975 | #if VM_PAGE_BUCKETS_CHECK |
4976 | #if VM_PAGE_FAKE_BUCKETS |
4977 | extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end; |
4978 | #endif /* VM_PAGE_FAKE_BUCKETS */ |
4979 | #endif /* VM_PAGE_BUCKETS_CHECK */ |
4980 | |
4981 | |
4982 | |
4983 | void |
4984 | vm_set_restrictions(unsigned int num_cpus) |
4985 | { |
4986 | int vm_restricted_to_single_processor = 0; |
4987 | |
4988 | if (PE_parse_boot_argn(arg_string: "vm_restricted_to_single_processor" , arg_ptr: &vm_restricted_to_single_processor, max_arg: sizeof(vm_restricted_to_single_processor))) { |
4989 | kprintf(fmt: "Overriding vm_restricted_to_single_processor to %d\n" , vm_restricted_to_single_processor); |
4990 | vm_pageout_state.vm_restricted_to_single_processor = (vm_restricted_to_single_processor ? TRUE : FALSE); |
4991 | } else { |
4992 | assert(num_cpus > 0); |
4993 | |
4994 | if (num_cpus <= 3) { |
4995 | /* |
4996 | * on systems with a limited number of CPUS, bind the |
4997 | * 4 major threads that can free memory and that tend to use |
4998 | * a fair bit of CPU under pressured conditions to a single processor. |
4999 | * This insures that these threads don't hog all of the available CPUs |
5000 | * (important for camera launch), while allowing them to run independently |
5001 | * w/r to locks... the 4 threads are |
5002 | * vm_pageout_scan, vm_pageout_iothread_internal (compressor), |
5003 | * vm_compressor_swap_trigger_thread (minor and major compactions), |
5004 | * memorystatus_thread (jetsams). |
5005 | * |
5006 | * the first time the thread is run, it is responsible for checking the |
5007 | * state of vm_restricted_to_single_processor, and if TRUE it calls |
5008 | * thread_bind_master... someday this should be replaced with a group |
5009 | * scheduling mechanism and KPI. |
5010 | */ |
5011 | vm_pageout_state.vm_restricted_to_single_processor = TRUE; |
5012 | } else { |
5013 | vm_pageout_state.vm_restricted_to_single_processor = FALSE; |
5014 | } |
5015 | } |
5016 | } |
5017 | |
5018 | /* |
5019 | * Set up vm_config based on the vm_compressor_mode. |
5020 | * Must run BEFORE the pageout thread starts up. |
5021 | */ |
5022 | __startup_func |
5023 | void |
5024 | vm_config_init(void) |
5025 | { |
5026 | bzero(s: &vm_config, n: sizeof(vm_config)); |
5027 | |
5028 | switch (vm_compressor_mode) { |
5029 | case VM_PAGER_DEFAULT: |
5030 | printf(format: "mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n" ); |
5031 | OS_FALLTHROUGH; |
5032 | |
5033 | case VM_PAGER_COMPRESSOR_WITH_SWAP: |
5034 | vm_config.compressor_is_present = TRUE; |
5035 | vm_config.swap_is_present = TRUE; |
5036 | vm_config.compressor_is_active = TRUE; |
5037 | vm_config.swap_is_active = TRUE; |
5038 | break; |
5039 | |
5040 | case VM_PAGER_COMPRESSOR_NO_SWAP: |
5041 | vm_config.compressor_is_present = TRUE; |
5042 | vm_config.swap_is_present = TRUE; |
5043 | vm_config.compressor_is_active = TRUE; |
5044 | break; |
5045 | |
5046 | case VM_PAGER_FREEZER_DEFAULT: |
5047 | printf(format: "mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n" ); |
5048 | OS_FALLTHROUGH; |
5049 | |
5050 | case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP: |
5051 | vm_config.compressor_is_present = TRUE; |
5052 | vm_config.swap_is_present = TRUE; |
5053 | break; |
5054 | |
5055 | case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP: |
5056 | vm_config.compressor_is_present = TRUE; |
5057 | vm_config.swap_is_present = TRUE; |
5058 | vm_config.compressor_is_active = TRUE; |
5059 | vm_config.freezer_swap_is_active = TRUE; |
5060 | break; |
5061 | |
5062 | case VM_PAGER_NOT_CONFIGURED: |
5063 | break; |
5064 | |
5065 | default: |
5066 | printf(format: "unknown compressor mode - %x\n" , vm_compressor_mode); |
5067 | break; |
5068 | } |
5069 | } |
5070 | |
5071 | __startup_func |
5072 | static void |
5073 | vm_pageout_create_gc_thread(void) |
5074 | { |
5075 | thread_t thread; |
5076 | |
5077 | if (kernel_thread_create(continuation: vm_pageout_garbage_collect, |
5078 | VM_PAGEOUT_GC_INIT, BASEPRI_DEFAULT, new_thread: &thread) != KERN_SUCCESS) { |
5079 | panic("vm_pageout_garbage_collect: create failed" ); |
5080 | } |
5081 | thread_set_thread_name(th: thread, name: "VM_pageout_garbage_collect" ); |
5082 | if (thread->reserved_stack == 0) { |
5083 | assert(thread->kernel_stack); |
5084 | thread->reserved_stack = thread->kernel_stack; |
5085 | } |
5086 | |
5087 | /* thread is started in vm_pageout() */ |
5088 | vm_pageout_gc_thread = thread; |
5089 | } |
5090 | STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, vm_pageout_create_gc_thread); |
5091 | |
5092 | void |
5093 | vm_pageout(void) |
5094 | { |
5095 | thread_t self = current_thread(); |
5096 | thread_t thread; |
5097 | kern_return_t result; |
5098 | spl_t s; |
5099 | |
5100 | /* |
5101 | * Set thread privileges. |
5102 | */ |
5103 | s = splsched(); |
5104 | |
5105 | #if CONFIG_VPS_DYNAMIC_PRIO |
5106 | if (vps_dynamic_priority_enabled) { |
5107 | sched_set_kernel_thread_priority(self, MAXPRI_THROTTLE); |
5108 | thread_set_eager_preempt(self); |
5109 | } else { |
5110 | sched_set_kernel_thread_priority(self, BASEPRI_VM); |
5111 | } |
5112 | #else /* CONFIG_VPS_DYNAMIC_PRIO */ |
5113 | sched_set_kernel_thread_priority(thread: self, BASEPRI_VM); |
5114 | #endif /* CONFIG_VPS_DYNAMIC_PRIO */ |
5115 | |
5116 | thread_lock(self); |
5117 | self->options |= TH_OPT_VMPRIV; |
5118 | thread_unlock(self); |
5119 | |
5120 | if (!self->reserved_stack) { |
5121 | self->reserved_stack = self->kernel_stack; |
5122 | } |
5123 | |
5124 | if (vm_pageout_state.vm_restricted_to_single_processor == TRUE && |
5125 | !vps_dynamic_priority_enabled) { |
5126 | thread_vm_bind_group_add(); |
5127 | } |
5128 | |
5129 | |
5130 | #if CONFIG_THREAD_GROUPS |
5131 | thread_group_vm_add(); |
5132 | #endif /* CONFIG_THREAD_GROUPS */ |
5133 | |
5134 | #if __AMP__ |
5135 | PE_parse_boot_argn("vmpgo_pcluster" , &vm_pgo_pbound, sizeof(vm_pgo_pbound)); |
5136 | if (vm_pgo_pbound) { |
5137 | /* |
5138 | * Use the soft bound option for vm pageout to allow it to run on |
5139 | * E-cores if P-cluster is unavailable. |
5140 | */ |
5141 | thread_bind_cluster_type(self, 'P', true); |
5142 | } |
5143 | #endif /* __AMP__ */ |
5144 | |
5145 | PE_parse_boot_argn(arg_string: "vmpgo_protect_realtime" , |
5146 | arg_ptr: &vm_pageout_protect_realtime, |
5147 | max_arg: sizeof(vm_pageout_protect_realtime)); |
5148 | splx(s); |
5149 | |
5150 | thread_set_thread_name(th: current_thread(), name: "VM_pageout_scan" ); |
5151 | |
5152 | /* |
5153 | * Initialize some paging parameters. |
5154 | */ |
5155 | |
5156 | vm_pageout_state.vm_pressure_thread_running = FALSE; |
5157 | vm_pageout_state.vm_pressure_changed = FALSE; |
5158 | vm_pageout_state.memorystatus_purge_on_warning = 2; |
5159 | vm_pageout_state.memorystatus_purge_on_urgent = 5; |
5160 | vm_pageout_state.memorystatus_purge_on_critical = 8; |
5161 | vm_pageout_state.vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS; |
5162 | vm_pageout_state.vm_page_speculative_percentage = 5; |
5163 | vm_pageout_state.vm_page_speculative_target = 0; |
5164 | |
5165 | vm_pageout_state.vm_pageout_swap_wait = 0; |
5166 | vm_pageout_state.vm_pageout_idle_wait = 0; |
5167 | vm_pageout_state.vm_pageout_empty_wait = 0; |
5168 | vm_pageout_state.vm_pageout_burst_wait = 0; |
5169 | vm_pageout_state.vm_pageout_deadlock_wait = 0; |
5170 | vm_pageout_state.vm_pageout_deadlock_relief = 0; |
5171 | vm_pageout_state.vm_pageout_burst_inactive_throttle = 0; |
5172 | |
5173 | vm_pageout_state.vm_pageout_inactive = 0; |
5174 | vm_pageout_state.vm_pageout_inactive_used = 0; |
5175 | vm_pageout_state.vm_pageout_inactive_clean = 0; |
5176 | |
5177 | vm_pageout_state.vm_memory_pressure = 0; |
5178 | vm_pageout_state.vm_page_filecache_min = 0; |
5179 | #if CONFIG_JETSAM |
5180 | vm_pageout_state.vm_page_filecache_min_divisor = 70; |
5181 | vm_pageout_state.vm_page_xpmapped_min_divisor = 40; |
5182 | #else |
5183 | vm_pageout_state.vm_page_filecache_min_divisor = 27; |
5184 | vm_pageout_state.vm_page_xpmapped_min_divisor = 36; |
5185 | #endif |
5186 | vm_pageout_state.vm_page_free_count_init = vm_page_free_count; |
5187 | |
5188 | vm_pageout_state.vm_pageout_considered_page_last = 0; |
5189 | |
5190 | if (vm_pageout_state.vm_pageout_swap_wait == 0) { |
5191 | vm_pageout_state.vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT; |
5192 | } |
5193 | |
5194 | if (vm_pageout_state.vm_pageout_idle_wait == 0) { |
5195 | vm_pageout_state.vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT; |
5196 | } |
5197 | |
5198 | if (vm_pageout_state.vm_pageout_burst_wait == 0) { |
5199 | vm_pageout_state.vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT; |
5200 | } |
5201 | |
5202 | if (vm_pageout_state.vm_pageout_empty_wait == 0) { |
5203 | vm_pageout_state.vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT; |
5204 | } |
5205 | |
5206 | if (vm_pageout_state.vm_pageout_deadlock_wait == 0) { |
5207 | vm_pageout_state.vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT; |
5208 | } |
5209 | |
5210 | if (vm_pageout_state.vm_pageout_deadlock_relief == 0) { |
5211 | vm_pageout_state.vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF; |
5212 | } |
5213 | |
5214 | if (vm_pageout_state.vm_pageout_burst_inactive_throttle == 0) { |
5215 | vm_pageout_state.vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE; |
5216 | } |
5217 | /* |
5218 | * even if we've already called vm_page_free_reserve |
5219 | * call it again here to insure that the targets are |
5220 | * accurately calculated (it uses vm_page_free_count_init) |
5221 | * calling it with an arg of 0 will not change the reserve |
5222 | * but will re-calculate free_min and free_target |
5223 | */ |
5224 | if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) { |
5225 | vm_page_free_reserve(pages: (VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved); |
5226 | } else { |
5227 | vm_page_free_reserve(pages: 0); |
5228 | } |
5229 | |
5230 | bzero(s: &vm_pageout_queue_external, n: sizeof(struct vm_pageout_queue)); |
5231 | bzero(s: &vm_pageout_queue_internal, n: sizeof(struct vm_pageout_queue)); |
5232 | |
5233 | vm_page_queue_init(&vm_pageout_queue_external.pgo_pending); |
5234 | vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; |
5235 | |
5236 | vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending); |
5237 | |
5238 | #if DEVELOPMENT || DEBUG |
5239 | bzero(&vm_pageout_queue_benchmark, sizeof(struct vm_pageout_queue)); |
5240 | vm_page_queue_init(&vm_pageout_queue_benchmark.pgo_pending); |
5241 | #endif /* DEVELOPMENT || DEBUG */ |
5242 | |
5243 | |
5244 | /* internal pageout thread started when default pager registered first time */ |
5245 | /* external pageout and garbage collection threads started here */ |
5246 | struct pgo_iothread_state *ethr = &pgo_iothread_external_state; |
5247 | ethr->id = 0; |
5248 | ethr->q = &vm_pageout_queue_external; |
5249 | ethr->current_early_swapout_chead = NULL; |
5250 | ethr->current_regular_swapout_chead = NULL; |
5251 | ethr->current_late_swapout_chead = NULL; |
5252 | ethr->scratch_buf = NULL; |
5253 | #if DEVELOPMENT || DEBUG |
5254 | ethr->benchmark_q = NULL; |
5255 | #endif /* DEVELOPMENT || DEBUG */ |
5256 | sched_cond_init(cond: &(ethr->pgo_wakeup)); |
5257 | |
5258 | result = kernel_thread_start_priority(continuation: (thread_continue_t)vm_pageout_iothread_external, |
5259 | parameter: (void *)ethr, BASEPRI_VM, |
5260 | new_thread: &(ethr->pgo_iothread)); |
5261 | if (result != KERN_SUCCESS) { |
5262 | panic("vm_pageout: Unable to create external thread (%d)\n" , result); |
5263 | } |
5264 | thread_set_thread_name(th: ethr->pgo_iothread, name: "VM_pageout_external_iothread" ); |
5265 | |
5266 | thread_mtx_lock(thread: vm_pageout_gc_thread ); |
5267 | thread_start(thread: vm_pageout_gc_thread ); |
5268 | thread_mtx_unlock(thread: vm_pageout_gc_thread); |
5269 | |
5270 | #if VM_PRESSURE_EVENTS |
5271 | result = kernel_thread_start_priority(continuation: (thread_continue_t)vm_pressure_thread, NULL, |
5272 | BASEPRI_DEFAULT, |
5273 | new_thread: &thread); |
5274 | |
5275 | if (result != KERN_SUCCESS) { |
5276 | panic("vm_pressure_thread: create failed" ); |
5277 | } |
5278 | |
5279 | thread_deallocate(thread); |
5280 | #endif |
5281 | |
5282 | vm_object_reaper_init(); |
5283 | |
5284 | |
5285 | if (VM_CONFIG_COMPRESSOR_IS_PRESENT) { |
5286 | vm_compressor_init(); |
5287 | } |
5288 | |
5289 | #if VM_PRESSURE_EVENTS |
5290 | vm_pressure_events_enabled = TRUE; |
5291 | #endif /* VM_PRESSURE_EVENTS */ |
5292 | |
5293 | #if CONFIG_PHANTOM_CACHE |
5294 | vm_phantom_cache_init(); |
5295 | #endif |
5296 | #if VM_PAGE_BUCKETS_CHECK |
5297 | #if VM_PAGE_FAKE_BUCKETS |
5298 | printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n" , |
5299 | (uint64_t) vm_page_fake_buckets_start, |
5300 | (uint64_t) vm_page_fake_buckets_end); |
5301 | pmap_protect(kernel_pmap, |
5302 | vm_page_fake_buckets_start, |
5303 | vm_page_fake_buckets_end, |
5304 | VM_PROT_READ); |
5305 | // *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */ |
5306 | #endif /* VM_PAGE_FAKE_BUCKETS */ |
5307 | #endif /* VM_PAGE_BUCKETS_CHECK */ |
5308 | |
5309 | #if VM_OBJECT_TRACKING |
5310 | vm_object_tracking_init(); |
5311 | #endif /* VM_OBJECT_TRACKING */ |
5312 | |
5313 | #if __arm64__ |
5314 | // vm_tests(); |
5315 | #endif /* __arm64__ */ |
5316 | |
5317 | vm_pageout_continue(); |
5318 | |
5319 | /* |
5320 | * Unreached code! |
5321 | * |
5322 | * The vm_pageout_continue() call above never returns, so the code below is never |
5323 | * executed. We take advantage of this to declare several DTrace VM related probe |
5324 | * points that our kernel doesn't have an analog for. These are probe points that |
5325 | * exist in Solaris and are in the DTrace documentation, so people may have written |
5326 | * scripts that use them. Declaring the probe points here means their scripts will |
5327 | * compile and execute which we want for portability of the scripts, but since this |
5328 | * section of code is never reached, the probe points will simply never fire. Yes, |
5329 | * this is basically a hack. The problem is the DTrace probe points were chosen with |
5330 | * Solaris specific VM events in mind, not portability to different VM implementations. |
5331 | */ |
5332 | |
5333 | DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL); |
5334 | DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL); |
5335 | DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL); |
5336 | DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL); |
5337 | DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL); |
5338 | DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL); |
5339 | DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL); |
5340 | /*NOTREACHED*/ |
5341 | } |
5342 | |
5343 | |
5344 | |
5345 | kern_return_t |
5346 | vm_pageout_internal_start(void) |
5347 | { |
5348 | kern_return_t result = KERN_SUCCESS; |
5349 | host_basic_info_data_t hinfo; |
5350 | vm_offset_t buf, bufsize; |
5351 | |
5352 | assert(VM_CONFIG_COMPRESSOR_IS_PRESENT); |
5353 | |
5354 | mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; |
5355 | #define BSD_HOST 1 |
5356 | host_info(host: (host_t)BSD_HOST, HOST_BASIC_INFO, host_info_out: (host_info_t)&hinfo, host_info_outCnt: &count); |
5357 | |
5358 | assert(hinfo.max_cpus > 0); |
5359 | |
5360 | #if !XNU_TARGET_OS_OSX |
5361 | vm_pageout_state.vm_compressor_thread_count = 1; |
5362 | #else /* !XNU_TARGET_OS_OSX */ |
5363 | if (hinfo.max_cpus > 4) { |
5364 | vm_pageout_state.vm_compressor_thread_count = 2; |
5365 | } else { |
5366 | vm_pageout_state.vm_compressor_thread_count = 1; |
5367 | } |
5368 | #endif /* !XNU_TARGET_OS_OSX */ |
5369 | #if __AMP__ |
5370 | if (vm_compressor_ebound) { |
5371 | vm_pageout_state.vm_compressor_thread_count = 2; |
5372 | } |
5373 | #endif |
5374 | PE_parse_boot_argn(arg_string: "vmcomp_threads" , arg_ptr: &vm_pageout_state.vm_compressor_thread_count, |
5375 | max_arg: sizeof(vm_pageout_state.vm_compressor_thread_count)); |
5376 | |
5377 | if (vm_pageout_state.vm_compressor_thread_count >= hinfo.max_cpus) { |
5378 | vm_pageout_state.vm_compressor_thread_count = hinfo.max_cpus - 1; |
5379 | } |
5380 | if (vm_pageout_state.vm_compressor_thread_count <= 0) { |
5381 | vm_pageout_state.vm_compressor_thread_count = 1; |
5382 | } else if (vm_pageout_state.vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT) { |
5383 | vm_pageout_state.vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT; |
5384 | } |
5385 | |
5386 | vm_pageout_queue_internal.pgo_maxlaundry = |
5387 | (vm_pageout_state.vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX; |
5388 | |
5389 | PE_parse_boot_argn(arg_string: "vmpgoi_maxlaundry" , |
5390 | arg_ptr: &vm_pageout_queue_internal.pgo_maxlaundry, |
5391 | max_arg: sizeof(vm_pageout_queue_internal.pgo_maxlaundry)); |
5392 | |
5393 | #if DEVELOPMENT || DEBUG |
5394 | // Note: this will be modified at enqueue-time such that the benchmark queue is never throttled |
5395 | vm_pageout_queue_benchmark.pgo_maxlaundry = vm_pageout_queue_internal.pgo_maxlaundry; |
5396 | #endif /* DEVELOPMENT || DEBUG */ |
5397 | |
5398 | bufsize = COMPRESSOR_SCRATCH_BUF_SIZE; |
5399 | |
5400 | kmem_alloc(map: kernel_map, addrp: &buf, |
5401 | size: bufsize * vm_pageout_state.vm_compressor_thread_count, |
5402 | flags: KMA_DATA | KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT, |
5403 | VM_KERN_MEMORY_COMPRESSOR); |
5404 | |
5405 | for (int i = 0; i < vm_pageout_state.vm_compressor_thread_count; i++) { |
5406 | struct pgo_iothread_state *iq = &pgo_iothread_internal_state[i]; |
5407 | iq->id = i; |
5408 | iq->q = &vm_pageout_queue_internal; |
5409 | iq->current_early_swapout_chead = NULL; |
5410 | iq->current_regular_swapout_chead = NULL; |
5411 | iq->current_late_swapout_chead = NULL; |
5412 | iq->scratch_buf = (char *)(buf + i * bufsize); |
5413 | #if DEVELOPMENT || DEBUG |
5414 | iq->benchmark_q = &vm_pageout_queue_benchmark; |
5415 | #endif /* DEVELOPMENT || DEBUG */ |
5416 | sched_cond_init(cond: &(iq->pgo_wakeup)); |
5417 | result = kernel_thread_start_priority(continuation: (thread_continue_t)vm_pageout_iothread_internal, |
5418 | parameter: (void *)iq, BASEPRI_VM, |
5419 | new_thread: &(iq->pgo_iothread)); |
5420 | |
5421 | if (result != KERN_SUCCESS) { |
5422 | panic("vm_pageout: Unable to create compressor thread no. %d (%d)\n" , i, result); |
5423 | } |
5424 | } |
5425 | return result; |
5426 | } |
5427 | |
5428 | #if CONFIG_IOSCHED |
5429 | /* |
5430 | * To support I/O Expedite for compressed files we mark the upls with special flags. |
5431 | * The way decmpfs works is that we create a big upl which marks all the pages needed to |
5432 | * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs |
5433 | * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages |
5434 | * being held in the big original UPL. We mark each of these smaller UPLs with the flag |
5435 | * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the |
5436 | * decmp_io_upl field (in the upl structure). This link is protected in the forward direction |
5437 | * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link |
5438 | * unless the real I/O upl is being destroyed). |
5439 | */ |
5440 | |
5441 | |
5442 | static void |
5443 | upl_set_decmp_info(upl_t upl, upl_t src_upl) |
5444 | { |
5445 | assert((src_upl->flags & UPL_DECMP_REQ) != 0); |
5446 | |
5447 | upl_lock(src_upl); |
5448 | if (src_upl->decmp_io_upl) { |
5449 | /* |
5450 | * If there is already an alive real I/O UPL, ignore this new UPL. |
5451 | * This case should rarely happen and even if it does, it just means |
5452 | * that we might issue a spurious expedite which the driver is expected |
5453 | * to handle. |
5454 | */ |
5455 | upl_unlock(src_upl); |
5456 | return; |
5457 | } |
5458 | src_upl->decmp_io_upl = (void *)upl; |
5459 | src_upl->ref_count++; |
5460 | |
5461 | upl->flags |= UPL_DECMP_REAL_IO; |
5462 | upl->decmp_io_upl = (void *)src_upl; |
5463 | upl_unlock(src_upl); |
5464 | } |
5465 | #endif /* CONFIG_IOSCHED */ |
5466 | |
5467 | #if UPL_DEBUG |
5468 | int upl_debug_enabled = 1; |
5469 | #else |
5470 | int upl_debug_enabled = 0; |
5471 | #endif |
5472 | |
5473 | static upl_t |
5474 | upl_create(int type, int flags, upl_size_t size) |
5475 | { |
5476 | uint32_t pages = (uint32_t)atop(round_page_32(size)); |
5477 | upl_t upl; |
5478 | |
5479 | assert(page_aligned(size)); |
5480 | |
5481 | /* |
5482 | * FIXME: this code assumes the allocation always succeeds, |
5483 | * however `pages` can be up to MAX_UPL_SIZE. |
5484 | * |
5485 | * The allocation size is above 32k (resp. 128k) |
5486 | * on 16k pages (resp. 4k), which kalloc might fail |
5487 | * to allocate. |
5488 | */ |
5489 | upl = kalloc_type(struct upl, struct upl_page_info, |
5490 | (type & UPL_CREATE_INTERNAL) ? pages : 0, Z_WAITOK | Z_ZERO); |
5491 | if (type & UPL_CREATE_INTERNAL) { |
5492 | flags |= UPL_INTERNAL; |
5493 | } |
5494 | |
5495 | if (type & UPL_CREATE_LITE) { |
5496 | flags |= UPL_LITE; |
5497 | if (pages) { |
5498 | upl->lite_list = bitmap_alloc(nbits: pages); |
5499 | } |
5500 | } |
5501 | |
5502 | upl->flags = flags; |
5503 | upl->ref_count = 1; |
5504 | upl_lock_init(upl); |
5505 | #if CONFIG_IOSCHED |
5506 | if (type & UPL_CREATE_IO_TRACKING) { |
5507 | upl->upl_priority = proc_get_effective_thread_policy(thread: current_thread(), TASK_POLICY_IO); |
5508 | } |
5509 | |
5510 | if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) { |
5511 | /* Only support expedite on internal UPLs */ |
5512 | thread_t curthread = current_thread(); |
5513 | upl->upl_reprio_info = kalloc_data(sizeof(uint64_t) * pages, |
5514 | Z_WAITOK | Z_ZERO); |
5515 | upl->flags |= UPL_EXPEDITE_SUPPORTED; |
5516 | if (curthread->decmp_upl != NULL) { |
5517 | upl_set_decmp_info(upl, src_upl: curthread->decmp_upl); |
5518 | } |
5519 | } |
5520 | #endif |
5521 | #if CONFIG_IOSCHED || UPL_DEBUG |
5522 | if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) { |
5523 | upl->upl_creator = current_thread(); |
5524 | upl->flags |= UPL_TRACKED_BY_OBJECT; |
5525 | } |
5526 | #endif |
5527 | |
5528 | #if UPL_DEBUG |
5529 | upl->uple_create_btref = btref_get(__builtin_frame_address(0), 0); |
5530 | #endif /* UPL_DEBUG */ |
5531 | |
5532 | return upl; |
5533 | } |
5534 | |
5535 | static void |
5536 | upl_destroy(upl_t upl) |
5537 | { |
5538 | uint32_t pages; |
5539 | |
5540 | // DEBUG4K_UPL("upl %p (u_offset 0x%llx u_size 0x%llx) object %p\n", upl, (uint64_t)upl->u_offset, (uint64_t)upl->u_size, upl->map_object); |
5541 | |
5542 | if (upl->ext_ref_count) { |
5543 | panic("upl(%p) ext_ref_count" , upl); |
5544 | } |
5545 | |
5546 | #if CONFIG_IOSCHED |
5547 | if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) { |
5548 | upl_t src_upl; |
5549 | src_upl = upl->decmp_io_upl; |
5550 | assert((src_upl->flags & UPL_DECMP_REQ) != 0); |
5551 | upl_lock(src_upl); |
5552 | src_upl->decmp_io_upl = NULL; |
5553 | upl_unlock(src_upl); |
5554 | upl_deallocate(upl: src_upl); |
5555 | } |
5556 | #endif /* CONFIG_IOSCHED */ |
5557 | |
5558 | #if CONFIG_IOSCHED || UPL_DEBUG |
5559 | if (((upl->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) && |
5560 | !(upl->flags & UPL_VECTOR)) { |
5561 | vm_object_t object; |
5562 | |
5563 | if (upl->flags & UPL_SHADOWED) { |
5564 | object = upl->map_object->shadow; |
5565 | } else { |
5566 | object = upl->map_object; |
5567 | } |
5568 | |
5569 | vm_object_lock(object); |
5570 | queue_remove(&object->uplq, upl, upl_t, uplq); |
5571 | vm_object_activity_end(object); |
5572 | vm_object_collapse(object, offset: 0, TRUE); |
5573 | vm_object_unlock(object); |
5574 | } |
5575 | #endif |
5576 | /* |
5577 | * drop a reference on the map_object whether or |
5578 | * not a pageout object is inserted |
5579 | */ |
5580 | if (upl->flags & UPL_SHADOWED) { |
5581 | vm_object_deallocate(object: upl->map_object); |
5582 | } |
5583 | |
5584 | if (upl->flags & UPL_DEVICE_MEMORY) { |
5585 | pages = 1; |
5586 | } else { |
5587 | pages = (uint32_t)atop(upl_adjusted_size(upl, PAGE_MASK)); |
5588 | } |
5589 | |
5590 | upl_lock_destroy(upl); |
5591 | |
5592 | #if CONFIG_IOSCHED |
5593 | if (upl->flags & UPL_EXPEDITE_SUPPORTED) { |
5594 | kfree_data(upl->upl_reprio_info, sizeof(uint64_t) * pages); |
5595 | } |
5596 | #endif |
5597 | |
5598 | #if UPL_DEBUG |
5599 | for (int i = 0; i < upl->upl_commit_index; i++) { |
5600 | btref_put(upl->upl_commit_records[i].c_btref); |
5601 | } |
5602 | btref_put(upl->uple_create_btref); |
5603 | #endif /* UPL_DEBUG */ |
5604 | |
5605 | if ((upl->flags & UPL_LITE) && pages) { |
5606 | bitmap_free(map: upl->lite_list, nbits: pages); |
5607 | } |
5608 | kfree_type(struct upl, struct upl_page_info, |
5609 | (upl->flags & UPL_INTERNAL) ? pages : 0, upl); |
5610 | } |
5611 | |
5612 | void |
5613 | upl_deallocate(upl_t upl) |
5614 | { |
5615 | upl_lock(upl); |
5616 | |
5617 | if (--upl->ref_count == 0) { |
5618 | if (vector_upl_is_valid(upl)) { |
5619 | vector_upl_deallocate(upl); |
5620 | } |
5621 | upl_unlock(upl); |
5622 | |
5623 | if (upl->upl_iodone) { |
5624 | upl_callout_iodone(upl); |
5625 | } |
5626 | |
5627 | upl_destroy(upl); |
5628 | } else { |
5629 | upl_unlock(upl); |
5630 | } |
5631 | } |
5632 | |
5633 | #if CONFIG_IOSCHED |
5634 | void |
5635 | upl_mark_decmp(upl_t upl) |
5636 | { |
5637 | if (upl->flags & UPL_TRACKED_BY_OBJECT) { |
5638 | upl->flags |= UPL_DECMP_REQ; |
5639 | upl->upl_creator->decmp_upl = (void *)upl; |
5640 | } |
5641 | } |
5642 | |
5643 | void |
5644 | upl_unmark_decmp(upl_t upl) |
5645 | { |
5646 | if (upl && (upl->flags & UPL_DECMP_REQ)) { |
5647 | upl->upl_creator->decmp_upl = NULL; |
5648 | } |
5649 | } |
5650 | |
5651 | #endif /* CONFIG_IOSCHED */ |
5652 | |
5653 | #define VM_PAGE_Q_BACKING_UP(q) \ |
5654 | ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10)) |
5655 | |
5656 | boolean_t must_throttle_writes(void); |
5657 | |
5658 | boolean_t |
5659 | must_throttle_writes() |
5660 | { |
5661 | if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) && |
5662 | vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10) { |
5663 | return TRUE; |
5664 | } |
5665 | |
5666 | return FALSE; |
5667 | } |
5668 | |
5669 | int vm_page_delayed_work_ctx_needed = 0; |
5670 | KALLOC_TYPE_DEFINE(dw_ctx_zone, struct vm_page_delayed_work_ctx, KT_PRIV_ACCT); |
5671 | |
5672 | __startup_func |
5673 | static void |
5674 | vm_page_delayed_work_init_ctx(void) |
5675 | { |
5676 | uint16_t min_delayed_work_ctx_allocated = 16; |
5677 | |
5678 | /* |
5679 | * try really hard to always keep NCPU elements around in the zone |
5680 | * in order for the UPL code to almost always get an element. |
5681 | */ |
5682 | if (min_delayed_work_ctx_allocated < zpercpu_count()) { |
5683 | min_delayed_work_ctx_allocated = (uint16_t)zpercpu_count(); |
5684 | } |
5685 | |
5686 | zone_raise_reserve(zone_or_view: dw_ctx_zone, min_elements: min_delayed_work_ctx_allocated); |
5687 | } |
5688 | STARTUP(ZALLOC, STARTUP_RANK_LAST, vm_page_delayed_work_init_ctx); |
5689 | |
5690 | struct vm_page_delayed_work* |
5691 | vm_page_delayed_work_get_ctx(void) |
5692 | { |
5693 | struct vm_page_delayed_work_ctx * dw_ctx = NULL; |
5694 | |
5695 | dw_ctx = zalloc_flags(dw_ctx_zone, Z_ZERO | Z_NOWAIT); |
5696 | |
5697 | if (__probable(dw_ctx)) { |
5698 | dw_ctx->delayed_owner = current_thread(); |
5699 | } else { |
5700 | vm_page_delayed_work_ctx_needed++; |
5701 | } |
5702 | return dw_ctx ? dw_ctx->dwp : NULL; |
5703 | } |
5704 | |
5705 | void |
5706 | vm_page_delayed_work_finish_ctx(struct vm_page_delayed_work* dwp) |
5707 | { |
5708 | struct vm_page_delayed_work_ctx *ldw_ctx; |
5709 | |
5710 | ldw_ctx = (struct vm_page_delayed_work_ctx *)dwp; |
5711 | ldw_ctx->delayed_owner = NULL; |
5712 | |
5713 | zfree(dw_ctx_zone, ldw_ctx); |
5714 | } |
5715 | |
5716 | /* |
5717 | * Routine: vm_object_upl_request |
5718 | * Purpose: |
5719 | * Cause the population of a portion of a vm_object. |
5720 | * Depending on the nature of the request, the pages |
5721 | * returned may be contain valid data or be uninitialized. |
5722 | * A page list structure, listing the physical pages |
5723 | * will be returned upon request. |
5724 | * This function is called by the file system or any other |
5725 | * supplier of backing store to a pager. |
5726 | * IMPORTANT NOTE: The caller must still respect the relationship |
5727 | * between the vm_object and its backing memory object. The |
5728 | * caller MUST NOT substitute changes in the backing file |
5729 | * without first doing a memory_object_lock_request on the |
5730 | * target range unless it is know that the pages are not |
5731 | * shared with another entity at the pager level. |
5732 | * Copy_in_to: |
5733 | * if a page list structure is present |
5734 | * return the mapped physical pages, where a |
5735 | * page is not present, return a non-initialized |
5736 | * one. If the no_sync bit is turned on, don't |
5737 | * call the pager unlock to synchronize with other |
5738 | * possible copies of the page. Leave pages busy |
5739 | * in the original object, if a page list structure |
5740 | * was specified. When a commit of the page list |
5741 | * pages is done, the dirty bit will be set for each one. |
5742 | * Copy_out_from: |
5743 | * If a page list structure is present, return |
5744 | * all mapped pages. Where a page does not exist |
5745 | * map a zero filled one. Leave pages busy in |
5746 | * the original object. If a page list structure |
5747 | * is not specified, this call is a no-op. |
5748 | * |
5749 | * Note: access of default pager objects has a rather interesting |
5750 | * twist. The caller of this routine, presumably the file system |
5751 | * page cache handling code, will never actually make a request |
5752 | * against a default pager backed object. Only the default |
5753 | * pager will make requests on backing store related vm_objects |
5754 | * In this way the default pager can maintain the relationship |
5755 | * between backing store files (abstract memory objects) and |
5756 | * the vm_objects (cache objects), they support. |
5757 | * |
5758 | */ |
5759 | |
5760 | __private_extern__ kern_return_t |
5761 | vm_object_upl_request( |
5762 | vm_object_t object, |
5763 | vm_object_offset_t offset, |
5764 | upl_size_t size, |
5765 | upl_t *upl_ptr, |
5766 | upl_page_info_array_t user_page_list, |
5767 | unsigned int *page_list_count, |
5768 | upl_control_flags_t cntrl_flags, |
5769 | vm_tag_t tag) |
5770 | { |
5771 | vm_page_t dst_page = VM_PAGE_NULL; |
5772 | vm_object_offset_t dst_offset; |
5773 | upl_size_t xfer_size; |
5774 | unsigned int size_in_pages; |
5775 | boolean_t dirty; |
5776 | boolean_t hw_dirty; |
5777 | upl_t upl = NULL; |
5778 | unsigned int entry; |
5779 | vm_page_t alias_page = NULL; |
5780 | int refmod_state = 0; |
5781 | vm_object_t last_copy_object; |
5782 | uint32_t last_copy_version; |
5783 | struct vm_page_delayed_work dw_array; |
5784 | struct vm_page_delayed_work *dwp, *dwp_start; |
5785 | bool dwp_finish_ctx = TRUE; |
5786 | int dw_count; |
5787 | int dw_limit; |
5788 | int io_tracking_flag = 0; |
5789 | int grab_options; |
5790 | int page_grab_count = 0; |
5791 | ppnum_t phys_page; |
5792 | pmap_flush_context pmap_flush_context_storage; |
5793 | boolean_t pmap_flushes_delayed = FALSE; |
5794 | #if DEVELOPMENT || DEBUG |
5795 | task_t task = current_task(); |
5796 | #endif /* DEVELOPMENT || DEBUG */ |
5797 | |
5798 | dwp_start = dwp = NULL; |
5799 | |
5800 | if (cntrl_flags & ~UPL_VALID_FLAGS) { |
5801 | /* |
5802 | * For forward compatibility's sake, |
5803 | * reject any unknown flag. |
5804 | */ |
5805 | return KERN_INVALID_VALUE; |
5806 | } |
5807 | if ((!object->internal) && (object->paging_offset != 0)) { |
5808 | panic("vm_object_upl_request: external object with non-zero paging offset" ); |
5809 | } |
5810 | if (object->phys_contiguous) { |
5811 | panic("vm_object_upl_request: contiguous object specified" ); |
5812 | } |
5813 | |
5814 | assertf(page_aligned(offset) && page_aligned(size), |
5815 | "offset 0x%llx size 0x%x" , |
5816 | offset, size); |
5817 | |
5818 | VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, 0, 0); |
5819 | |
5820 | dw_count = 0; |
5821 | dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); |
5822 | dwp_start = vm_page_delayed_work_get_ctx(); |
5823 | if (dwp_start == NULL) { |
5824 | dwp_start = &dw_array; |
5825 | dw_limit = 1; |
5826 | dwp_finish_ctx = FALSE; |
5827 | } |
5828 | |
5829 | dwp = dwp_start; |
5830 | |
5831 | if (size > MAX_UPL_SIZE_BYTES) { |
5832 | size = MAX_UPL_SIZE_BYTES; |
5833 | } |
5834 | |
5835 | if ((cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL) { |
5836 | *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT; |
5837 | } |
5838 | |
5839 | #if CONFIG_IOSCHED || UPL_DEBUG |
5840 | if (object->io_tracking || upl_debug_enabled) { |
5841 | io_tracking_flag |= UPL_CREATE_IO_TRACKING; |
5842 | } |
5843 | #endif |
5844 | #if CONFIG_IOSCHED |
5845 | if (object->io_tracking) { |
5846 | io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP; |
5847 | } |
5848 | #endif |
5849 | |
5850 | if (cntrl_flags & UPL_SET_INTERNAL) { |
5851 | if (cntrl_flags & UPL_SET_LITE) { |
5852 | upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, flags: 0, size); |
5853 | } else { |
5854 | upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, flags: 0, size); |
5855 | } |
5856 | user_page_list = size ? upl->page_list : NULL; |
5857 | } else { |
5858 | if (cntrl_flags & UPL_SET_LITE) { |
5859 | upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, flags: 0, size); |
5860 | } else { |
5861 | upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, flags: 0, size); |
5862 | } |
5863 | } |
5864 | *upl_ptr = upl; |
5865 | |
5866 | if (user_page_list) { |
5867 | user_page_list[0].device = FALSE; |
5868 | } |
5869 | |
5870 | if (cntrl_flags & UPL_SET_LITE) { |
5871 | upl->map_object = object; |
5872 | } else { |
5873 | upl->map_object = vm_object_allocate(size); |
5874 | vm_object_lock(upl->map_object); |
5875 | /* |
5876 | * No neeed to lock the new object: nobody else knows |
5877 | * about it yet, so it's all ours so far. |
5878 | */ |
5879 | upl->map_object->shadow = object; |
5880 | VM_OBJECT_SET_PAGEOUT(object: upl->map_object, TRUE); |
5881 | VM_OBJECT_SET_CAN_PERSIST(object: upl->map_object, FALSE); |
5882 | upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; |
5883 | upl->map_object->vo_shadow_offset = offset; |
5884 | upl->map_object->wimg_bits = object->wimg_bits; |
5885 | assertf(page_aligned(upl->map_object->vo_shadow_offset), |
5886 | "object %p shadow_offset 0x%llx" , |
5887 | upl->map_object, upl->map_object->vo_shadow_offset); |
5888 | vm_object_unlock(upl->map_object); |
5889 | |
5890 | alias_page = vm_page_grab_fictitious(TRUE); |
5891 | |
5892 | upl->flags |= UPL_SHADOWED; |
5893 | } |
5894 | if (cntrl_flags & UPL_FOR_PAGEOUT) { |
5895 | upl->flags |= UPL_PAGEOUT; |
5896 | } |
5897 | |
5898 | vm_object_lock(object); |
5899 | vm_object_activity_begin(object); |
5900 | |
5901 | grab_options = 0; |
5902 | #if CONFIG_SECLUDED_MEMORY |
5903 | if (object->can_grab_secluded) { |
5904 | grab_options |= VM_PAGE_GRAB_SECLUDED; |
5905 | } |
5906 | #endif /* CONFIG_SECLUDED_MEMORY */ |
5907 | |
5908 | /* |
5909 | * we can lock in the paging_offset once paging_in_progress is set |
5910 | */ |
5911 | upl->u_size = size; |
5912 | upl->u_offset = offset + object->paging_offset; |
5913 | |
5914 | #if CONFIG_IOSCHED || UPL_DEBUG |
5915 | if (object->io_tracking || upl_debug_enabled) { |
5916 | vm_object_activity_begin(object); |
5917 | queue_enter(&object->uplq, upl, upl_t, uplq); |
5918 | } |
5919 | #endif |
5920 | if ((cntrl_flags & UPL_WILL_MODIFY) && object->vo_copy != VM_OBJECT_NULL) { |
5921 | /* |
5922 | * Honor copy-on-write obligations |
5923 | * |
5924 | * The caller is gathering these pages and |
5925 | * might modify their contents. We need to |
5926 | * make sure that the copy object has its own |
5927 | * private copies of these pages before we let |
5928 | * the caller modify them. |
5929 | */ |
5930 | vm_object_update(object, |
5931 | offset, |
5932 | size, |
5933 | NULL, |
5934 | NULL, |
5935 | FALSE, /* should_return */ |
5936 | MEMORY_OBJECT_COPY_SYNC, |
5937 | VM_PROT_NO_CHANGE); |
5938 | |
5939 | VM_PAGEOUT_DEBUG(upl_cow, 1); |
5940 | VM_PAGEOUT_DEBUG(upl_cow_pages, (size >> PAGE_SHIFT)); |
5941 | } |
5942 | /* |
5943 | * remember which copy object we synchronized with |
5944 | */ |
5945 | last_copy_object = object->vo_copy; |
5946 | last_copy_version = object->vo_copy_version; |
5947 | entry = 0; |
5948 | |
5949 | xfer_size = size; |
5950 | dst_offset = offset; |
5951 | size_in_pages = size / PAGE_SIZE; |
5952 | |
5953 | if (vm_page_free_count > (vm_page_free_target + size_in_pages) || |
5954 | object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT)) { |
5955 | object->scan_collisions = 0; |
5956 | } |
5957 | |
5958 | if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) { |
5959 | boolean_t isSSD = FALSE; |
5960 | |
5961 | #if !XNU_TARGET_OS_OSX |
5962 | isSSD = TRUE; |
5963 | #else /* !XNU_TARGET_OS_OSX */ |
5964 | vnode_pager_get_isSSD(object->pager, &isSSD); |
5965 | #endif /* !XNU_TARGET_OS_OSX */ |
5966 | vm_object_unlock(object); |
5967 | |
5968 | OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); |
5969 | |
5970 | if (isSSD == TRUE) { |
5971 | delay(usec: 1000 * size_in_pages); |
5972 | } else { |
5973 | delay(usec: 5000 * size_in_pages); |
5974 | } |
5975 | OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); |
5976 | |
5977 | vm_object_lock(object); |
5978 | } |
5979 | |
5980 | while (xfer_size) { |
5981 | dwp->dw_mask = 0; |
5982 | |
5983 | if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) { |
5984 | vm_object_unlock(object); |
5985 | alias_page = vm_page_grab_fictitious(TRUE); |
5986 | vm_object_lock(object); |
5987 | } |
5988 | if (cntrl_flags & UPL_COPYOUT_FROM) { |
5989 | upl->flags |= UPL_PAGE_SYNC_DONE; |
5990 | |
5991 | if (((dst_page = vm_page_lookup(object, offset: dst_offset)) == VM_PAGE_NULL) || |
5992 | dst_page->vmp_fictitious || |
5993 | dst_page->vmp_absent || |
5994 | VMP_ERROR_GET(dst_page) || |
5995 | dst_page->vmp_cleaning || |
5996 | (VM_PAGE_WIRED(dst_page))) { |
5997 | if (user_page_list) { |
5998 | user_page_list[entry].phys_addr = 0; |
5999 | } |
6000 | |
6001 | goto try_next_page; |
6002 | } |
6003 | phys_page = VM_PAGE_GET_PHYS_PAGE(m: dst_page); |
6004 | |
6005 | /* |
6006 | * grab this up front... |
6007 | * a high percentange of the time we're going to |
6008 | * need the hardware modification state a bit later |
6009 | * anyway... so we can eliminate an extra call into |
6010 | * the pmap layer by grabbing it here and recording it |
6011 | */ |
6012 | if (dst_page->vmp_pmapped) { |
6013 | refmod_state = pmap_get_refmod(pn: phys_page); |
6014 | } else { |
6015 | refmod_state = 0; |
6016 | } |
6017 | |
6018 | if ((refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) { |
6019 | /* |
6020 | * page is on inactive list and referenced... |
6021 | * reactivate it now... this gets it out of the |
6022 | * way of vm_pageout_scan which would have to |
6023 | * reactivate it upon tripping over it |
6024 | */ |
6025 | dwp->dw_mask |= DW_vm_page_activate; |
6026 | } |
6027 | if (cntrl_flags & UPL_RET_ONLY_DIRTY) { |
6028 | /* |
6029 | * we're only asking for DIRTY pages to be returned |
6030 | */ |
6031 | if (dst_page->vmp_laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) { |
6032 | /* |
6033 | * if we were the page stolen by vm_pageout_scan to be |
6034 | * cleaned (as opposed to a buddy being clustered in |
6035 | * or this request is not being driven by a PAGEOUT cluster |
6036 | * then we only need to check for the page being dirty or |
6037 | * precious to decide whether to return it |
6038 | */ |
6039 | if (dst_page->vmp_dirty || dst_page->vmp_precious || (refmod_state & VM_MEM_MODIFIED)) { |
6040 | goto check_busy; |
6041 | } |
6042 | goto dont_return; |
6043 | } |
6044 | /* |
6045 | * this is a request for a PAGEOUT cluster and this page |
6046 | * is merely along for the ride as a 'buddy'... not only |
6047 | * does it have to be dirty to be returned, but it also |
6048 | * can't have been referenced recently... |
6049 | */ |
6050 | if ((hibernate_cleaning_in_progress == TRUE || |
6051 | (!((refmod_state & VM_MEM_REFERENCED) || dst_page->vmp_reference) || |
6052 | (dst_page->vmp_q_state == VM_PAGE_ON_THROTTLED_Q))) && |
6053 | ((refmod_state & VM_MEM_MODIFIED) || dst_page->vmp_dirty || dst_page->vmp_precious)) { |
6054 | goto check_busy; |
6055 | } |
6056 | dont_return: |
6057 | /* |
6058 | * if we reach here, we're not to return |
6059 | * the page... go on to the next one |
6060 | */ |
6061 | if (dst_page->vmp_laundry == TRUE) { |
6062 | /* |
6063 | * if we get here, the page is not 'cleaning' (filtered out above). |
6064 | * since it has been referenced, remove it from the laundry |
6065 | * so we don't pay the cost of an I/O to clean a page |
6066 | * we're just going to take back |
6067 | */ |
6068 | vm_page_lockspin_queues(); |
6069 | |
6070 | vm_pageout_steal_laundry(page: dst_page, TRUE); |
6071 | vm_page_activate(page: dst_page); |
6072 | |
6073 | vm_page_unlock_queues(); |
6074 | } |
6075 | if (user_page_list) { |
6076 | user_page_list[entry].phys_addr = 0; |
6077 | } |
6078 | |
6079 | goto try_next_page; |
6080 | } |
6081 | check_busy: |
6082 | if (dst_page->vmp_busy) { |
6083 | if (cntrl_flags & UPL_NOBLOCK) { |
6084 | if (user_page_list) { |
6085 | user_page_list[entry].phys_addr = 0; |
6086 | } |
6087 | dwp->dw_mask = 0; |
6088 | |
6089 | goto try_next_page; |
6090 | } |
6091 | /* |
6092 | * someone else is playing with the |
6093 | * page. We will have to wait. |
6094 | */ |
6095 | PAGE_SLEEP(object, dst_page, THREAD_UNINT); |
6096 | |
6097 | continue; |
6098 | } |
6099 | if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) { |
6100 | vm_page_lockspin_queues(); |
6101 | |
6102 | if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) { |
6103 | /* |
6104 | * we've buddied up a page for a clustered pageout |
6105 | * that has already been moved to the pageout |
6106 | * queue by pageout_scan... we need to remove |
6107 | * it from the queue and drop the laundry count |
6108 | * on that queue |
6109 | */ |
6110 | vm_pageout_throttle_up(m: dst_page); |
6111 | } |
6112 | vm_page_unlock_queues(); |
6113 | } |
6114 | hw_dirty = refmod_state & VM_MEM_MODIFIED; |
6115 | dirty = hw_dirty ? TRUE : dst_page->vmp_dirty; |
6116 | |
6117 | if (phys_page > upl->highest_page) { |
6118 | upl->highest_page = phys_page; |
6119 | } |
6120 | |
6121 | assert(!pmap_is_noencrypt(phys_page)); |
6122 | |
6123 | if (cntrl_flags & UPL_SET_LITE) { |
6124 | unsigned int pg_num; |
6125 | |
6126 | pg_num = (unsigned int) ((dst_offset - offset) / PAGE_SIZE); |
6127 | assert(pg_num == (dst_offset - offset) / PAGE_SIZE); |
6128 | bitmap_set(map: upl->lite_list, n: pg_num); |
6129 | |
6130 | if (hw_dirty) { |
6131 | if (pmap_flushes_delayed == FALSE) { |
6132 | pmap_flush_context_init(&pmap_flush_context_storage); |
6133 | pmap_flushes_delayed = TRUE; |
6134 | } |
6135 | pmap_clear_refmod_options(pn: phys_page, |
6136 | VM_MEM_MODIFIED, |
6137 | PMAP_OPTIONS_NOFLUSH | PMAP_OPTIONS_CLEAR_WRITE, |
6138 | &pmap_flush_context_storage); |
6139 | } |
6140 | |
6141 | /* |
6142 | * Mark original page as cleaning |
6143 | * in place. |
6144 | */ |
6145 | dst_page->vmp_cleaning = TRUE; |
6146 | dst_page->vmp_precious = FALSE; |
6147 | } else { |
6148 | /* |
6149 | * use pageclean setup, it is more |
6150 | * convenient even for the pageout |
6151 | * cases here |
6152 | */ |
6153 | vm_object_lock(upl->map_object); |
6154 | vm_pageclean_setup(m: dst_page, new_m: alias_page, new_object: upl->map_object, new_offset: size - xfer_size); |
6155 | vm_object_unlock(upl->map_object); |
6156 | |
6157 | alias_page->vmp_absent = FALSE; |
6158 | alias_page = NULL; |
6159 | } |
6160 | if (dirty) { |
6161 | SET_PAGE_DIRTY(dst_page, FALSE); |
6162 | } else { |
6163 | dst_page->vmp_dirty = FALSE; |
6164 | } |
6165 | |
6166 | if (!dirty) { |
6167 | dst_page->vmp_precious = TRUE; |
6168 | } |
6169 | |
6170 | if (!(cntrl_flags & UPL_CLEAN_IN_PLACE)) { |
6171 | if (!VM_PAGE_WIRED(dst_page)) { |
6172 | dst_page->vmp_free_when_done = TRUE; |
6173 | } |
6174 | } |
6175 | } else { |
6176 | if ((cntrl_flags & UPL_WILL_MODIFY) && |
6177 | (object->vo_copy != last_copy_object || |
6178 | object->vo_copy_version != last_copy_version)) { |
6179 | /* |
6180 | * Honor copy-on-write obligations |
6181 | * |
6182 | * The copy object has changed since we |
6183 | * last synchronized for copy-on-write. |
6184 | * Another copy object might have been |
6185 | * inserted while we released the object's |
6186 | * lock. Since someone could have seen the |
6187 | * original contents of the remaining pages |
6188 | * through that new object, we have to |
6189 | * synchronize with it again for the remaining |
6190 | * pages only. The previous pages are "busy" |
6191 | * so they can not be seen through the new |
6192 | * mapping. The new mapping will see our |
6193 | * upcoming changes for those previous pages, |
6194 | * but that's OK since they couldn't see what |
6195 | * was there before. It's just a race anyway |
6196 | * and there's no guarantee of consistency or |
6197 | * atomicity. We just don't want new mappings |
6198 | * to see both the *before* and *after* pages. |
6199 | */ |
6200 | if (object->vo_copy != VM_OBJECT_NULL) { |
6201 | vm_object_update( |
6202 | object, |
6203 | offset: dst_offset,/* current offset */ |
6204 | size: xfer_size, /* remaining size */ |
6205 | NULL, |
6206 | NULL, |
6207 | FALSE, /* should_return */ |
6208 | MEMORY_OBJECT_COPY_SYNC, |
6209 | VM_PROT_NO_CHANGE); |
6210 | |
6211 | VM_PAGEOUT_DEBUG(upl_cow_again, 1); |
6212 | VM_PAGEOUT_DEBUG(upl_cow_again_pages, (xfer_size >> PAGE_SHIFT)); |
6213 | } |
6214 | /* |
6215 | * remember the copy object we synced with |
6216 | */ |
6217 | last_copy_object = object->vo_copy; |
6218 | last_copy_version = object->vo_copy_version; |
6219 | } |
6220 | dst_page = vm_page_lookup(object, offset: dst_offset); |
6221 | |
6222 | if (dst_page != VM_PAGE_NULL) { |
6223 | if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) { |
6224 | /* |
6225 | * skip over pages already present in the cache |
6226 | */ |
6227 | if (user_page_list) { |
6228 | user_page_list[entry].phys_addr = 0; |
6229 | } |
6230 | |
6231 | goto try_next_page; |
6232 | } |
6233 | if (dst_page->vmp_fictitious) { |
6234 | panic("need corner case for fictitious page" ); |
6235 | } |
6236 | |
6237 | if (dst_page->vmp_busy || dst_page->vmp_cleaning) { |
6238 | /* |
6239 | * someone else is playing with the |
6240 | * page. We will have to wait. |
6241 | */ |
6242 | PAGE_SLEEP(object, dst_page, THREAD_UNINT); |
6243 | |
6244 | continue; |
6245 | } |
6246 | if (dst_page->vmp_laundry) { |
6247 | vm_pageout_steal_laundry(page: dst_page, FALSE); |
6248 | } |
6249 | } else { |
6250 | if (object->private) { |
6251 | /* |
6252 | * This is a nasty wrinkle for users |
6253 | * of upl who encounter device or |
6254 | * private memory however, it is |
6255 | * unavoidable, only a fault can |
6256 | * resolve the actual backing |
6257 | * physical page by asking the |
6258 | * backing device. |
6259 | */ |
6260 | if (user_page_list) { |
6261 | user_page_list[entry].phys_addr = 0; |
6262 | } |
6263 | |
6264 | goto try_next_page; |
6265 | } |
6266 | if (object->scan_collisions) { |
6267 | /* |
6268 | * the pageout_scan thread is trying to steal |
6269 | * pages from this object, but has run into our |
6270 | * lock... grab 2 pages from the head of the object... |
6271 | * the first is freed on behalf of pageout_scan, the |
6272 | * 2nd is for our own use... we use vm_object_page_grab |
6273 | * in both cases to avoid taking pages from the free |
6274 | * list since we are under memory pressure and our |
6275 | * lock on this object is getting in the way of |
6276 | * relieving it |
6277 | */ |
6278 | dst_page = vm_object_page_grab(object); |
6279 | |
6280 | if (dst_page != VM_PAGE_NULL) { |
6281 | vm_page_release(page: dst_page, |
6282 | FALSE); |
6283 | } |
6284 | |
6285 | dst_page = vm_object_page_grab(object); |
6286 | } |
6287 | if (dst_page == VM_PAGE_NULL) { |
6288 | /* |
6289 | * need to allocate a page |
6290 | */ |
6291 | dst_page = vm_page_grab_options(flags: grab_options); |
6292 | if (dst_page != VM_PAGE_NULL) { |
6293 | page_grab_count++; |
6294 | } |
6295 | } |
6296 | if (dst_page == VM_PAGE_NULL) { |
6297 | if ((cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) { |
6298 | /* |
6299 | * we don't want to stall waiting for pages to come onto the free list |
6300 | * while we're already holding absent pages in this UPL |
6301 | * the caller will deal with the empty slots |
6302 | */ |
6303 | if (user_page_list) { |
6304 | user_page_list[entry].phys_addr = 0; |
6305 | } |
6306 | |
6307 | goto try_next_page; |
6308 | } |
6309 | /* |
6310 | * no pages available... wait |
6311 | * then try again for the same |
6312 | * offset... |
6313 | */ |
6314 | vm_object_unlock(object); |
6315 | |
6316 | OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); |
6317 | |
6318 | VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0); |
6319 | |
6320 | VM_PAGE_WAIT(); |
6321 | OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); |
6322 | |
6323 | VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0); |
6324 | |
6325 | vm_object_lock(object); |
6326 | |
6327 | continue; |
6328 | } |
6329 | vm_page_insert(page: dst_page, object, offset: dst_offset); |
6330 | |
6331 | dst_page->vmp_absent = TRUE; |
6332 | dst_page->vmp_busy = FALSE; |
6333 | |
6334 | if (cntrl_flags & UPL_RET_ONLY_ABSENT) { |
6335 | /* |
6336 | * if UPL_RET_ONLY_ABSENT was specified, |
6337 | * than we're definitely setting up a |
6338 | * upl for a clustered read/pagein |
6339 | * operation... mark the pages as clustered |
6340 | * so upl_commit_range can put them on the |
6341 | * speculative list |
6342 | */ |
6343 | dst_page->vmp_clustered = TRUE; |
6344 | |
6345 | if (!(cntrl_flags & UPL_FILE_IO)) { |
6346 | counter_inc(&vm_statistics_pageins); |
6347 | } |
6348 | } |
6349 | } |
6350 | phys_page = VM_PAGE_GET_PHYS_PAGE(m: dst_page); |
6351 | |
6352 | dst_page->vmp_overwriting = TRUE; |
6353 | |
6354 | if (dst_page->vmp_pmapped) { |
6355 | if (!(cntrl_flags & UPL_FILE_IO)) { |
6356 | /* |
6357 | * eliminate all mappings from the |
6358 | * original object and its prodigy |
6359 | */ |
6360 | refmod_state = pmap_disconnect(phys: phys_page); |
6361 | } else { |
6362 | refmod_state = pmap_get_refmod(pn: phys_page); |
6363 | } |
6364 | } else { |
6365 | refmod_state = 0; |
6366 | } |
6367 | |
6368 | hw_dirty = refmod_state & VM_MEM_MODIFIED; |
6369 | dirty = hw_dirty ? TRUE : dst_page->vmp_dirty; |
6370 | |
6371 | if (cntrl_flags & UPL_SET_LITE) { |
6372 | unsigned int pg_num; |
6373 | |
6374 | pg_num = (unsigned int) ((dst_offset - offset) / PAGE_SIZE); |
6375 | assert(pg_num == (dst_offset - offset) / PAGE_SIZE); |
6376 | bitmap_set(map: upl->lite_list, n: pg_num); |
6377 | |
6378 | if (hw_dirty) { |
6379 | pmap_clear_modify(pn: phys_page); |
6380 | } |
6381 | |
6382 | /* |
6383 | * Mark original page as cleaning |
6384 | * in place. |
6385 | */ |
6386 | dst_page->vmp_cleaning = TRUE; |
6387 | dst_page->vmp_precious = FALSE; |
6388 | } else { |
6389 | /* |
6390 | * use pageclean setup, it is more |
6391 | * convenient even for the pageout |
6392 | * cases here |
6393 | */ |
6394 | vm_object_lock(upl->map_object); |
6395 | vm_pageclean_setup(m: dst_page, new_m: alias_page, new_object: upl->map_object, new_offset: size - xfer_size); |
6396 | vm_object_unlock(upl->map_object); |
6397 | |
6398 | alias_page->vmp_absent = FALSE; |
6399 | alias_page = NULL; |
6400 | } |
6401 | |
6402 | if (cntrl_flags & UPL_REQUEST_SET_DIRTY) { |
6403 | upl->flags &= ~UPL_CLEAR_DIRTY; |
6404 | upl->flags |= UPL_SET_DIRTY; |
6405 | dirty = TRUE; |
6406 | /* |
6407 | * Page belonging to a code-signed object is about to |
6408 | * be written. Mark it tainted and disconnect it from |
6409 | * all pmaps so processes have to fault it back in and |
6410 | * deal with the tainted bit. |
6411 | */ |
6412 | if (object->code_signed && dst_page->vmp_cs_tainted != VMP_CS_ALL_TRUE) { |
6413 | dst_page->vmp_cs_tainted = VMP_CS_ALL_TRUE; |
6414 | vm_page_upl_tainted++; |
6415 | if (dst_page->vmp_pmapped) { |
6416 | refmod_state = pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m: dst_page)); |
6417 | if (refmod_state & VM_MEM_REFERENCED) { |
6418 | dst_page->vmp_reference = TRUE; |
6419 | } |
6420 | } |
6421 | } |
6422 | } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) { |
6423 | /* |
6424 | * clean in place for read implies |
6425 | * that a write will be done on all |
6426 | * the pages that are dirty before |
6427 | * a upl commit is done. The caller |
6428 | * is obligated to preserve the |
6429 | * contents of all pages marked dirty |
6430 | */ |
6431 | upl->flags |= UPL_CLEAR_DIRTY; |
6432 | } |
6433 | dst_page->vmp_dirty = dirty; |
6434 | |
6435 | if (!dirty) { |
6436 | dst_page->vmp_precious = TRUE; |
6437 | } |
6438 | |
6439 | if (!VM_PAGE_WIRED(dst_page)) { |
6440 | /* |
6441 | * deny access to the target page while |
6442 | * it is being worked on |
6443 | */ |
6444 | dst_page->vmp_busy = TRUE; |
6445 | } else { |
6446 | dwp->dw_mask |= DW_vm_page_wire; |
6447 | } |
6448 | |
6449 | /* |
6450 | * We might be about to satisfy a fault which has been |
6451 | * requested. So no need for the "restart" bit. |
6452 | */ |
6453 | dst_page->vmp_restart = FALSE; |
6454 | if (!dst_page->vmp_absent && !(cntrl_flags & UPL_WILL_MODIFY)) { |
6455 | /* |
6456 | * expect the page to be used |
6457 | */ |
6458 | dwp->dw_mask |= DW_set_reference; |
6459 | } |
6460 | if (cntrl_flags & UPL_PRECIOUS) { |
6461 | if (object->internal) { |
6462 | SET_PAGE_DIRTY(dst_page, FALSE); |
6463 | dst_page->vmp_precious = FALSE; |
6464 | } else { |
6465 | dst_page->vmp_precious = TRUE; |
6466 | } |
6467 | } else { |
6468 | dst_page->vmp_precious = FALSE; |
6469 | } |
6470 | } |
6471 | if (dst_page->vmp_busy) { |
6472 | upl->flags |= UPL_HAS_BUSY; |
6473 | } |
6474 | |
6475 | if (phys_page > upl->highest_page) { |
6476 | upl->highest_page = phys_page; |
6477 | } |
6478 | assert(!pmap_is_noencrypt(phys_page)); |
6479 | if (user_page_list) { |
6480 | user_page_list[entry].phys_addr = phys_page; |
6481 | user_page_list[entry].free_when_done = dst_page->vmp_free_when_done; |
6482 | user_page_list[entry].absent = dst_page->vmp_absent; |
6483 | user_page_list[entry].dirty = dst_page->vmp_dirty; |
6484 | user_page_list[entry].precious = dst_page->vmp_precious; |
6485 | user_page_list[entry].device = FALSE; |
6486 | user_page_list[entry].needed = FALSE; |
6487 | if (dst_page->vmp_clustered == TRUE) { |
6488 | user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE; |
6489 | } else { |
6490 | user_page_list[entry].speculative = FALSE; |
6491 | } |
6492 | user_page_list[entry].cs_validated = dst_page->vmp_cs_validated; |
6493 | user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted; |
6494 | user_page_list[entry].cs_nx = dst_page->vmp_cs_nx; |
6495 | user_page_list[entry].mark = FALSE; |
6496 | } |
6497 | /* |
6498 | * if UPL_RET_ONLY_ABSENT is set, then |
6499 | * we are working with a fresh page and we've |
6500 | * just set the clustered flag on it to |
6501 | * indicate that it was drug in as part of a |
6502 | * speculative cluster... so leave it alone |
6503 | */ |
6504 | if (!(cntrl_flags & UPL_RET_ONLY_ABSENT)) { |
6505 | /* |
6506 | * someone is explicitly grabbing this page... |
6507 | * update clustered and speculative state |
6508 | * |
6509 | */ |
6510 | if (dst_page->vmp_clustered) { |
6511 | VM_PAGE_CONSUME_CLUSTERED(dst_page); |
6512 | } |
6513 | } |
6514 | try_next_page: |
6515 | if (dwp->dw_mask) { |
6516 | if (dwp->dw_mask & DW_vm_page_activate) { |
6517 | counter_inc(&vm_statistics_reactivations); |
6518 | } |
6519 | |
6520 | VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count); |
6521 | |
6522 | if (dw_count >= dw_limit) { |
6523 | vm_page_do_delayed_work(object, tag, dwp: dwp_start, dw_count); |
6524 | |
6525 | dwp = dwp_start; |
6526 | dw_count = 0; |
6527 | } |
6528 | } |
6529 | entry++; |
6530 | dst_offset += PAGE_SIZE_64; |
6531 | xfer_size -= PAGE_SIZE; |
6532 | } |
6533 | if (dw_count) { |
6534 | vm_page_do_delayed_work(object, tag, dwp: dwp_start, dw_count); |
6535 | dwp = dwp_start; |
6536 | dw_count = 0; |
6537 | } |
6538 | |
6539 | if (alias_page != NULL) { |
6540 | VM_PAGE_FREE(alias_page); |
6541 | } |
6542 | if (pmap_flushes_delayed == TRUE) { |
6543 | pmap_flush(&pmap_flush_context_storage); |
6544 | } |
6545 | |
6546 | if (page_list_count != NULL) { |
6547 | if (upl->flags & UPL_INTERNAL) { |
6548 | *page_list_count = 0; |
6549 | } else if (*page_list_count > entry) { |
6550 | *page_list_count = entry; |
6551 | } |
6552 | } |
6553 | #if UPL_DEBUG |
6554 | upl->upl_state = 1; |
6555 | #endif |
6556 | vm_object_unlock(object); |
6557 | |
6558 | VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0); |
6559 | #if DEVELOPMENT || DEBUG |
6560 | if (task != NULL) { |
6561 | ledger_credit(task->ledger, task_ledgers.pages_grabbed_upl, page_grab_count); |
6562 | } |
6563 | #endif /* DEVELOPMENT || DEBUG */ |
6564 | |
6565 | if (dwp_start && dwp_finish_ctx) { |
6566 | vm_page_delayed_work_finish_ctx(dwp: dwp_start); |
6567 | dwp_start = dwp = NULL; |
6568 | } |
6569 | |
6570 | return KERN_SUCCESS; |
6571 | } |
6572 | |
6573 | /* |
6574 | * Routine: vm_object_super_upl_request |
6575 | * Purpose: |
6576 | * Cause the population of a portion of a vm_object |
6577 | * in much the same way as memory_object_upl_request. |
6578 | * Depending on the nature of the request, the pages |
6579 | * returned may be contain valid data or be uninitialized. |
6580 | * However, the region may be expanded up to the super |
6581 | * cluster size provided. |
6582 | */ |
6583 | |
6584 | __private_extern__ kern_return_t |
6585 | vm_object_super_upl_request( |
6586 | vm_object_t object, |
6587 | vm_object_offset_t offset, |
6588 | upl_size_t size, |
6589 | upl_size_t super_cluster, |
6590 | upl_t *upl, |
6591 | upl_page_info_t *user_page_list, |
6592 | unsigned int *page_list_count, |
6593 | upl_control_flags_t cntrl_flags, |
6594 | vm_tag_t tag) |
6595 | { |
6596 | if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR) == UPL_VECTOR)) { |
6597 | return KERN_FAILURE; |
6598 | } |
6599 | |
6600 | assert(object->paging_in_progress); |
6601 | offset = offset - object->paging_offset; |
6602 | |
6603 | if (super_cluster > size) { |
6604 | vm_object_offset_t base_offset; |
6605 | upl_size_t super_size; |
6606 | vm_object_size_t super_size_64; |
6607 | |
6608 | base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1)); |
6609 | super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster << 1 : super_cluster; |
6610 | super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size; |
6611 | super_size = (upl_size_t) super_size_64; |
6612 | assert(super_size == super_size_64); |
6613 | |
6614 | if (offset > (base_offset + super_size)) { |
6615 | panic("vm_object_super_upl_request: Missed target pageout" |
6616 | " %#llx,%#llx, %#x, %#x, %#x, %#llx\n" , |
6617 | offset, base_offset, super_size, super_cluster, |
6618 | size, object->paging_offset); |
6619 | } |
6620 | /* |
6621 | * apparently there is a case where the vm requests a |
6622 | * page to be written out who's offset is beyond the |
6623 | * object size |
6624 | */ |
6625 | if ((offset + size) > (base_offset + super_size)) { |
6626 | super_size_64 = (offset + size) - base_offset; |
6627 | super_size = (upl_size_t) super_size_64; |
6628 | assert(super_size == super_size_64); |
6629 | } |
6630 | |
6631 | offset = base_offset; |
6632 | size = super_size; |
6633 | } |
6634 | return vm_object_upl_request(object, offset, size, upl_ptr: upl, user_page_list, page_list_count, cntrl_flags, tag); |
6635 | } |
6636 | |
6637 | int cs_executable_create_upl = 0; |
6638 | extern int proc_selfpid(void); |
6639 | extern char *proc_name_address(void *p); |
6640 | |
6641 | kern_return_t |
6642 | vm_map_create_upl( |
6643 | vm_map_t map, |
6644 | vm_map_address_t offset, |
6645 | upl_size_t *upl_size, |
6646 | upl_t *upl, |
6647 | upl_page_info_array_t page_list, |
6648 | unsigned int *count, |
6649 | upl_control_flags_t *flags, |
6650 | vm_tag_t tag) |
6651 | { |
6652 | vm_map_entry_t entry; |
6653 | upl_control_flags_t caller_flags; |
6654 | int force_data_sync; |
6655 | int sync_cow_data; |
6656 | vm_object_t local_object; |
6657 | vm_map_offset_t local_offset; |
6658 | vm_map_offset_t local_start; |
6659 | kern_return_t ret; |
6660 | vm_map_address_t original_offset; |
6661 | vm_map_size_t original_size, adjusted_size; |
6662 | vm_map_offset_t local_entry_start; |
6663 | vm_object_offset_t local_entry_offset; |
6664 | vm_object_offset_t offset_in_mapped_page; |
6665 | boolean_t release_map = FALSE; |
6666 | |
6667 | |
6668 | start_with_map: |
6669 | |
6670 | original_offset = offset; |
6671 | original_size = *upl_size; |
6672 | adjusted_size = original_size; |
6673 | |
6674 | caller_flags = *flags; |
6675 | |
6676 | if (caller_flags & ~UPL_VALID_FLAGS) { |
6677 | /* |
6678 | * For forward compatibility's sake, |
6679 | * reject any unknown flag. |
6680 | */ |
6681 | ret = KERN_INVALID_VALUE; |
6682 | goto done; |
6683 | } |
6684 | force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC); |
6685 | sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM); |
6686 | |
6687 | if (upl == NULL) { |
6688 | ret = KERN_INVALID_ARGUMENT; |
6689 | goto done; |
6690 | } |
6691 | |
6692 | REDISCOVER_ENTRY: |
6693 | vm_map_lock_read(map); |
6694 | |
6695 | if (!vm_map_lookup_entry(map, address: offset, entry: &entry)) { |
6696 | vm_map_unlock_read(map); |
6697 | ret = KERN_FAILURE; |
6698 | goto done; |
6699 | } |
6700 | |
6701 | local_entry_start = entry->vme_start; |
6702 | local_entry_offset = VME_OFFSET(entry); |
6703 | |
6704 | if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) { |
6705 | DEBUG4K_UPL("map %p (%d) offset 0x%llx size 0x%x flags 0x%llx\n" , map, VM_MAP_PAGE_SHIFT(map), (uint64_t)offset, *upl_size, *flags); |
6706 | } |
6707 | |
6708 | if (entry->vme_end - original_offset < adjusted_size) { |
6709 | adjusted_size = entry->vme_end - original_offset; |
6710 | assert(adjusted_size > 0); |
6711 | *upl_size = (upl_size_t) adjusted_size; |
6712 | assert(*upl_size == adjusted_size); |
6713 | } |
6714 | |
6715 | if (caller_flags & UPL_QUERY_OBJECT_TYPE) { |
6716 | *flags = 0; |
6717 | |
6718 | if (!entry->is_sub_map && |
6719 | VME_OBJECT(entry) != VM_OBJECT_NULL) { |
6720 | if (VME_OBJECT(entry)->private) { |
6721 | *flags = UPL_DEV_MEMORY; |
6722 | } |
6723 | |
6724 | if (VME_OBJECT(entry)->phys_contiguous) { |
6725 | *flags |= UPL_PHYS_CONTIG; |
6726 | } |
6727 | } |
6728 | vm_map_unlock_read(map); |
6729 | ret = KERN_SUCCESS; |
6730 | goto done; |
6731 | } |
6732 | |
6733 | offset_in_mapped_page = 0; |
6734 | if (VM_MAP_PAGE_SIZE(map) < PAGE_SIZE) { |
6735 | offset = vm_map_trunc_page(original_offset, VM_MAP_PAGE_MASK(map)); |
6736 | *upl_size = (upl_size_t) |
6737 | (vm_map_round_page(original_offset + adjusted_size, |
6738 | VM_MAP_PAGE_MASK(map)) |
6739 | - offset); |
6740 | |
6741 | offset_in_mapped_page = original_offset - offset; |
6742 | assert(offset_in_mapped_page < VM_MAP_PAGE_SIZE(map)); |
6743 | |
6744 | DEBUG4K_UPL("map %p (%d) offset 0x%llx size 0x%llx flags 0x%llx -> offset 0x%llx adjusted_size 0x%llx *upl_size 0x%x offset_in_mapped_page 0x%llx\n" , map, VM_MAP_PAGE_SHIFT(map), (uint64_t)original_offset, (uint64_t)original_size, *flags, (uint64_t)offset, (uint64_t)adjusted_size, *upl_size, offset_in_mapped_page); |
6745 | } |
6746 | |
6747 | if (!entry->is_sub_map) { |
6748 | if (VME_OBJECT(entry) == VM_OBJECT_NULL || |
6749 | !VME_OBJECT(entry)->phys_contiguous) { |
6750 | if (*upl_size > MAX_UPL_SIZE_BYTES) { |
6751 | *upl_size = MAX_UPL_SIZE_BYTES; |
6752 | } |
6753 | } |
6754 | |
6755 | /* |
6756 | * Create an object if necessary. |
6757 | */ |
6758 | if (VME_OBJECT(entry) == VM_OBJECT_NULL) { |
6759 | if (vm_map_lock_read_to_write(map)) { |
6760 | goto REDISCOVER_ENTRY; |
6761 | } |
6762 | |
6763 | VME_OBJECT_SET(entry, |
6764 | object: vm_object_allocate(size: (vm_size_t) |
6765 | vm_object_round_page((entry->vme_end - entry->vme_start))), |
6766 | false, context: 0); |
6767 | VME_OFFSET_SET(entry, offset: 0); |
6768 | assert(entry->use_pmap); |
6769 | |
6770 | vm_map_lock_write_to_read(map); |
6771 | } |
6772 | |
6773 | if (!(caller_flags & UPL_COPYOUT_FROM) && |
6774 | !(entry->protection & VM_PROT_WRITE)) { |
6775 | vm_map_unlock_read(map); |
6776 | ret = KERN_PROTECTION_FAILURE; |
6777 | goto done; |
6778 | } |
6779 | } |
6780 | |
6781 | #if !XNU_TARGET_OS_OSX |
6782 | if (map->pmap != kernel_pmap && |
6783 | (caller_flags & UPL_COPYOUT_FROM) && |
6784 | (entry->protection & VM_PROT_EXECUTE) && |
6785 | !(entry->protection & VM_PROT_WRITE)) { |
6786 | vm_offset_t kaddr; |
6787 | vm_size_t ksize; |
6788 | |
6789 | /* |
6790 | * We're about to create a read-only UPL backed by |
6791 | * memory from an executable mapping. |
6792 | * Wiring the pages would result in the pages being copied |
6793 | * (due to the "MAP_PRIVATE" mapping) and no longer |
6794 | * code-signed, so no longer eligible for execution. |
6795 | * Instead, let's copy the data into a kernel buffer and |
6796 | * create the UPL from this kernel buffer. |
6797 | * The kernel buffer is then freed, leaving the UPL holding |
6798 | * the last reference on the VM object, so the memory will |
6799 | * be released when the UPL is committed. |
6800 | */ |
6801 | |
6802 | vm_map_unlock_read(map); |
6803 | entry = VM_MAP_ENTRY_NULL; |
6804 | /* allocate kernel buffer */ |
6805 | ksize = round_page(*upl_size); |
6806 | kaddr = 0; |
6807 | ret = kmem_alloc(kernel_map, &kaddr, ksize, |
6808 | KMA_PAGEABLE | KMA_DATA, tag); |
6809 | if (ret == KERN_SUCCESS) { |
6810 | /* copyin the user data */ |
6811 | ret = copyinmap(map, offset, (void *)kaddr, *upl_size); |
6812 | } |
6813 | if (ret == KERN_SUCCESS) { |
6814 | if (ksize > *upl_size) { |
6815 | /* zero out the extra space in kernel buffer */ |
6816 | memset((void *)(kaddr + *upl_size), |
6817 | 0, |
6818 | ksize - *upl_size); |
6819 | } |
6820 | /* create the UPL from the kernel buffer */ |
6821 | vm_object_offset_t offset_in_object; |
6822 | vm_object_offset_t offset_in_object_page; |
6823 | |
6824 | offset_in_object = offset - local_entry_start + local_entry_offset; |
6825 | offset_in_object_page = offset_in_object - vm_object_trunc_page(offset_in_object); |
6826 | assert(offset_in_object_page < PAGE_SIZE); |
6827 | assert(offset_in_object_page + offset_in_mapped_page < PAGE_SIZE); |
6828 | *upl_size -= offset_in_object_page + offset_in_mapped_page; |
6829 | ret = vm_map_create_upl(kernel_map, |
6830 | (vm_map_address_t)(kaddr + offset_in_object_page + offset_in_mapped_page), |
6831 | upl_size, upl, page_list, count, flags, tag); |
6832 | } |
6833 | if (kaddr != 0) { |
6834 | /* free the kernel buffer */ |
6835 | kmem_free(kernel_map, kaddr, ksize); |
6836 | kaddr = 0; |
6837 | ksize = 0; |
6838 | } |
6839 | #if DEVELOPMENT || DEBUG |
6840 | DTRACE_VM4(create_upl_from_executable, |
6841 | vm_map_t, map, |
6842 | vm_map_address_t, offset, |
6843 | upl_size_t, *upl_size, |
6844 | kern_return_t, ret); |
6845 | #endif /* DEVELOPMENT || DEBUG */ |
6846 | goto done; |
6847 | } |
6848 | #endif /* !XNU_TARGET_OS_OSX */ |
6849 | |
6850 | if (!entry->is_sub_map) { |
6851 | local_object = VME_OBJECT(entry); |
6852 | assert(local_object != VM_OBJECT_NULL); |
6853 | } |
6854 | |
6855 | if (!entry->is_sub_map && |
6856 | !entry->needs_copy && |
6857 | *upl_size != 0 && |
6858 | local_object->vo_size > *upl_size && /* partial UPL */ |
6859 | entry->wired_count == 0 && /* No COW for entries that are wired */ |
6860 | (map->pmap != kernel_pmap) && /* alias checks */ |
6861 | (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */ |
6862 | || |
6863 | ( /* case 2 */ |
6864 | local_object->internal && |
6865 | (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) && |
6866 | local_object->ref_count > 1))) { |
6867 | vm_prot_t prot; |
6868 | |
6869 | /* |
6870 | * Case 1: |
6871 | * Set up the targeted range for copy-on-write to avoid |
6872 | * applying true_share/copy_delay to the entire object. |
6873 | * |
6874 | * Case 2: |
6875 | * This map entry covers only part of an internal |
6876 | * object. There could be other map entries covering |
6877 | * other areas of this object and some of these map |
6878 | * entries could be marked as "needs_copy", which |
6879 | * assumes that the object is COPY_SYMMETRIC. |
6880 | * To avoid marking this object as COPY_DELAY and |
6881 | * "true_share", let's shadow it and mark the new |
6882 | * (smaller) object as "true_share" and COPY_DELAY. |
6883 | */ |
6884 | |
6885 | if (vm_map_lock_read_to_write(map)) { |
6886 | goto REDISCOVER_ENTRY; |
6887 | } |
6888 | vm_map_lock_assert_exclusive(map); |
6889 | assert(VME_OBJECT(entry) == local_object); |
6890 | |
6891 | vm_map_clip_start(map, |
6892 | entry, |
6893 | vm_map_trunc_page(offset, |
6894 | VM_MAP_PAGE_MASK(map))); |
6895 | vm_map_clip_end(map, |
6896 | entry, |
6897 | vm_map_round_page(offset + *upl_size, |
6898 | VM_MAP_PAGE_MASK(map))); |
6899 | if ((entry->vme_end - offset) < *upl_size) { |
6900 | *upl_size = (upl_size_t) (entry->vme_end - offset); |
6901 | assert(*upl_size == entry->vme_end - offset); |
6902 | } |
6903 | |
6904 | prot = entry->protection & ~VM_PROT_WRITE; |
6905 | if (override_nx(map, VME_ALIAS(entry)) && prot) { |
6906 | prot |= VM_PROT_EXECUTE; |
6907 | } |
6908 | vm_object_pmap_protect(object: local_object, |
6909 | offset: VME_OFFSET(entry), |
6910 | size: entry->vme_end - entry->vme_start, |
6911 | pmap: ((entry->is_shared || |
6912 | map->mapped_in_other_pmaps) |
6913 | ? PMAP_NULL |
6914 | : map->pmap), |
6915 | VM_MAP_PAGE_SIZE(map), |
6916 | pmap_start: entry->vme_start, |
6917 | prot); |
6918 | |
6919 | assert(entry->wired_count == 0); |
6920 | |
6921 | /* |
6922 | * Lock the VM object and re-check its status: if it's mapped |
6923 | * in another address space, we could still be racing with |
6924 | * another thread holding that other VM map exclusively. |
6925 | */ |
6926 | vm_object_lock(local_object); |
6927 | if (local_object->true_share) { |
6928 | /* object is already in proper state: no COW needed */ |
6929 | assert(local_object->copy_strategy != |
6930 | MEMORY_OBJECT_COPY_SYMMETRIC); |
6931 | } else { |
6932 | /* not true_share: ask for copy-on-write below */ |
6933 | assert(local_object->copy_strategy == |
6934 | MEMORY_OBJECT_COPY_SYMMETRIC); |
6935 | entry->needs_copy = TRUE; |
6936 | } |
6937 | vm_object_unlock(local_object); |
6938 | |
6939 | vm_map_lock_write_to_read(map); |
6940 | } |
6941 | |
6942 | if (entry->needs_copy) { |
6943 | /* |
6944 | * Honor copy-on-write for COPY_SYMMETRIC |
6945 | * strategy. |
6946 | */ |
6947 | vm_map_t local_map; |
6948 | vm_object_t object; |
6949 | vm_object_offset_t new_offset; |
6950 | vm_prot_t prot; |
6951 | boolean_t wired; |
6952 | vm_map_version_t version; |
6953 | vm_map_t real_map; |
6954 | vm_prot_t fault_type; |
6955 | |
6956 | local_map = map; |
6957 | |
6958 | if (caller_flags & UPL_COPYOUT_FROM) { |
6959 | fault_type = VM_PROT_READ | VM_PROT_COPY; |
6960 | vm_counters.create_upl_extra_cow++; |
6961 | vm_counters.create_upl_extra_cow_pages += |
6962 | (entry->vme_end - entry->vme_start) / PAGE_SIZE; |
6963 | } else { |
6964 | fault_type = VM_PROT_WRITE; |
6965 | } |
6966 | if (vm_map_lookup_and_lock_object(var_map: &local_map, |
6967 | vaddr: offset, fault_type, |
6968 | OBJECT_LOCK_EXCLUSIVE, |
6969 | out_version: &version, object: &object, |
6970 | offset: &new_offset, out_prot: &prot, wired: &wired, |
6971 | NULL, |
6972 | real_map: &real_map, NULL) != KERN_SUCCESS) { |
6973 | if (fault_type == VM_PROT_WRITE) { |
6974 | vm_counters.create_upl_lookup_failure_write++; |
6975 | } else { |
6976 | vm_counters.create_upl_lookup_failure_copy++; |
6977 | } |
6978 | vm_map_unlock_read(local_map); |
6979 | ret = KERN_FAILURE; |
6980 | goto done; |
6981 | } |
6982 | if (real_map != local_map) { |
6983 | vm_map_unlock(real_map); |
6984 | } |
6985 | vm_map_unlock_read(local_map); |
6986 | |
6987 | vm_object_unlock(object); |
6988 | |
6989 | goto REDISCOVER_ENTRY; |
6990 | } |
6991 | |
6992 | if (entry->is_sub_map) { |
6993 | vm_map_t submap; |
6994 | |
6995 | submap = VME_SUBMAP(entry); |
6996 | local_start = entry->vme_start; |
6997 | local_offset = (vm_map_offset_t)VME_OFFSET(entry); |
6998 | |
6999 | vm_map_reference(map: submap); |
7000 | vm_map_unlock_read(map); |
7001 | |
7002 | DEBUG4K_UPL("map %p offset 0x%llx (0x%llx) size 0x%x (adjusted 0x%llx original 0x%llx) offset_in_mapped_page 0x%llx submap %p\n" , map, (uint64_t)offset, (uint64_t)original_offset, *upl_size, (uint64_t)adjusted_size, (uint64_t)original_size, offset_in_mapped_page, submap); |
7003 | offset += offset_in_mapped_page; |
7004 | *upl_size -= offset_in_mapped_page; |
7005 | |
7006 | if (release_map) { |
7007 | vm_map_deallocate(map); |
7008 | } |
7009 | map = submap; |
7010 | release_map = TRUE; |
7011 | offset = local_offset + (offset - local_start); |
7012 | goto start_with_map; |
7013 | } |
7014 | |
7015 | if (sync_cow_data && |
7016 | (VME_OBJECT(entry)->shadow || |
7017 | VME_OBJECT(entry)->vo_copy)) { |
7018 | local_object = VME_OBJECT(entry); |
7019 | local_start = entry->vme_start; |
7020 | local_offset = (vm_map_offset_t)VME_OFFSET(entry); |
7021 | |
7022 | vm_object_reference(local_object); |
7023 | vm_map_unlock_read(map); |
7024 | |
7025 | if (local_object->shadow && local_object->vo_copy) { |
7026 | vm_object_lock_request(object: local_object->shadow, |
7027 | offset: ((vm_object_offset_t) |
7028 | ((offset - local_start) + |
7029 | local_offset) + |
7030 | local_object->vo_shadow_offset), |
7031 | size: *upl_size, FALSE, |
7032 | MEMORY_OBJECT_DATA_SYNC, |
7033 | VM_PROT_NO_CHANGE); |
7034 | } |
7035 | sync_cow_data = FALSE; |
7036 | vm_object_deallocate(object: local_object); |
7037 | |
7038 | goto REDISCOVER_ENTRY; |
7039 | } |
7040 | if (force_data_sync) { |
7041 | local_object = VME_OBJECT(entry); |
7042 | local_start = entry->vme_start; |
7043 | local_offset = (vm_map_offset_t)VME_OFFSET(entry); |
7044 | |
7045 | vm_object_reference(local_object); |
7046 | vm_map_unlock_read(map); |
7047 | |
7048 | vm_object_lock_request(object: local_object, |
7049 | offset: ((vm_object_offset_t) |
7050 | ((offset - local_start) + |
7051 | local_offset)), |
7052 | size: (vm_object_size_t)*upl_size, |
7053 | FALSE, |
7054 | MEMORY_OBJECT_DATA_SYNC, |
7055 | VM_PROT_NO_CHANGE); |
7056 | |
7057 | force_data_sync = FALSE; |
7058 | vm_object_deallocate(object: local_object); |
7059 | |
7060 | goto REDISCOVER_ENTRY; |
7061 | } |
7062 | if (VME_OBJECT(entry)->private) { |
7063 | *flags = UPL_DEV_MEMORY; |
7064 | } else { |
7065 | *flags = 0; |
7066 | } |
7067 | |
7068 | if (VME_OBJECT(entry)->phys_contiguous) { |
7069 | *flags |= UPL_PHYS_CONTIG; |
7070 | } |
7071 | |
7072 | local_object = VME_OBJECT(entry); |
7073 | local_offset = (vm_map_offset_t)VME_OFFSET(entry); |
7074 | local_start = entry->vme_start; |
7075 | |
7076 | /* |
7077 | * Wiring will copy the pages to the shadow object. |
7078 | * The shadow object will not be code-signed so |
7079 | * attempting to execute code from these copied pages |
7080 | * would trigger a code-signing violation. |
7081 | */ |
7082 | if (entry->protection & VM_PROT_EXECUTE) { |
7083 | #if MACH_ASSERT |
7084 | printf("pid %d[%s] create_upl out of executable range from " |
7085 | "0x%llx to 0x%llx: side effects may include " |
7086 | "code-signing violations later on\n" , |
7087 | proc_selfpid(), |
7088 | (get_bsdtask_info(current_task()) |
7089 | ? proc_name_address(get_bsdtask_info(current_task())) |
7090 | : "?" ), |
7091 | (uint64_t) entry->vme_start, |
7092 | (uint64_t) entry->vme_end); |
7093 | #endif /* MACH_ASSERT */ |
7094 | DTRACE_VM2(cs_executable_create_upl, |
7095 | uint64_t, (uint64_t)entry->vme_start, |
7096 | uint64_t, (uint64_t)entry->vme_end); |
7097 | cs_executable_create_upl++; |
7098 | } |
7099 | |
7100 | vm_object_lock(local_object); |
7101 | |
7102 | /* |
7103 | * Ensure that this object is "true_share" and "copy_delay" now, |
7104 | * while we're still holding the VM map lock. After we unlock the map, |
7105 | * anything could happen to that mapping, including some copy-on-write |
7106 | * activity. We need to make sure that the IOPL will point at the |
7107 | * same memory as the mapping. |
7108 | */ |
7109 | if (local_object->true_share) { |
7110 | assert(local_object->copy_strategy != |
7111 | MEMORY_OBJECT_COPY_SYMMETRIC); |
7112 | } else if (!is_kernel_object(local_object) && |
7113 | local_object != compressor_object && |
7114 | !local_object->phys_contiguous) { |
7115 | #if VM_OBJECT_TRACKING_OP_TRUESHARE |
7116 | if (!local_object->true_share && |
7117 | vm_object_tracking_btlog) { |
7118 | btlog_record(vm_object_tracking_btlog, local_object, |
7119 | VM_OBJECT_TRACKING_OP_TRUESHARE, |
7120 | btref_get(__builtin_frame_address(0), 0)); |
7121 | } |
7122 | #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ |
7123 | VM_OBJECT_SET_TRUE_SHARE(object: local_object, TRUE); |
7124 | if (local_object->copy_strategy == |
7125 | MEMORY_OBJECT_COPY_SYMMETRIC) { |
7126 | local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; |
7127 | } |
7128 | } |
7129 | |
7130 | vm_object_reference_locked(local_object); |
7131 | vm_object_unlock(local_object); |
7132 | |
7133 | vm_map_unlock_read(map); |
7134 | |
7135 | offset += offset_in_mapped_page; |
7136 | assert(*upl_size > offset_in_mapped_page); |
7137 | *upl_size -= offset_in_mapped_page; |
7138 | |
7139 | ret = vm_object_iopl_request(object: local_object, |
7140 | offset: ((vm_object_offset_t) |
7141 | ((offset - local_start) + local_offset)), |
7142 | size: *upl_size, |
7143 | upl_ptr: upl, |
7144 | user_page_list: page_list, |
7145 | page_list_count: count, |
7146 | cntrl_flags: caller_flags, |
7147 | tag); |
7148 | vm_object_deallocate(object: local_object); |
7149 | |
7150 | done: |
7151 | if (release_map) { |
7152 | vm_map_deallocate(map); |
7153 | } |
7154 | |
7155 | return ret; |
7156 | } |
7157 | |
7158 | /* |
7159 | * Internal routine to enter a UPL into a VM map. |
7160 | * |
7161 | * JMM - This should just be doable through the standard |
7162 | * vm_map_enter() API. |
7163 | */ |
7164 | kern_return_t |
7165 | vm_map_enter_upl_range( |
7166 | vm_map_t map, |
7167 | upl_t upl, |
7168 | vm_object_offset_t offset_to_map, |
7169 | upl_size_t size_to_map, |
7170 | vm_prot_t prot_to_map, |
7171 | vm_map_offset_t *dst_addr) |
7172 | { |
7173 | vm_map_size_t size; |
7174 | vm_object_offset_t offset; |
7175 | vm_map_offset_t addr; |
7176 | vm_page_t m; |
7177 | kern_return_t kr; |
7178 | int isVectorUPL = 0, curr_upl = 0; |
7179 | upl_t vector_upl = NULL; |
7180 | mach_vm_offset_t vector_upl_dst_addr = 0; |
7181 | vm_map_t vector_upl_submap = NULL; |
7182 | upl_offset_t subupl_offset = 0; |
7183 | upl_size_t subupl_size = 0; |
7184 | |
7185 | if (upl == UPL_NULL) { |
7186 | return KERN_INVALID_ARGUMENT; |
7187 | } |
7188 | |
7189 | DEBUG4K_UPL("map %p upl %p flags 0x%x object %p offset 0x%llx (uploff: 0x%llx) size 0x%x (uplsz: 0x%x) \n" , map, upl, upl->flags, upl->map_object, offset_to_map, upl->u_offset, size_to_map, upl->u_size); |
7190 | assert(map == kernel_map); |
7191 | |
7192 | if ((isVectorUPL = vector_upl_is_valid(upl))) { |
7193 | int mapped = 0, valid_upls = 0; |
7194 | vector_upl = upl; |
7195 | |
7196 | upl_lock(vector_upl); |
7197 | for (curr_upl = 0; curr_upl < vector_upl_max_upls(upl: vector_upl); curr_upl++) { |
7198 | upl = vector_upl_subupl_byindex(vector_upl, curr_upl ); |
7199 | if (upl == NULL) { |
7200 | continue; |
7201 | } |
7202 | valid_upls++; |
7203 | if (UPL_PAGE_LIST_MAPPED & upl->flags) { |
7204 | mapped++; |
7205 | } |
7206 | } |
7207 | |
7208 | if (mapped) { |
7209 | if (mapped != valid_upls) { |
7210 | panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped" , mapped, valid_upls); |
7211 | } else { |
7212 | upl_unlock(vector_upl); |
7213 | return KERN_FAILURE; |
7214 | } |
7215 | } |
7216 | |
7217 | if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) { |
7218 | panic("TODO4K: vector UPL not implemented" ); |
7219 | } |
7220 | |
7221 | vector_upl_submap = kmem_suballoc(parent: map, addr: &vector_upl_dst_addr, |
7222 | size: vector_upl->u_size, vmc_options: VM_MAP_CREATE_DEFAULT, |
7223 | VM_FLAGS_ANYWHERE, flags: KMS_NOFAIL | KMS_DATA, |
7224 | VM_KERN_MEMORY_NONE).kmr_submap; |
7225 | map = vector_upl_submap; |
7226 | vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr); |
7227 | curr_upl = 0; |
7228 | } else { |
7229 | upl_lock(upl); |
7230 | } |
7231 | |
7232 | process_upl_to_enter: |
7233 | if (isVectorUPL) { |
7234 | if (curr_upl == vector_upl_max_upls(upl: vector_upl)) { |
7235 | *dst_addr = vector_upl_dst_addr; |
7236 | upl_unlock(vector_upl); |
7237 | return KERN_SUCCESS; |
7238 | } |
7239 | upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ ); |
7240 | if (upl == NULL) { |
7241 | goto process_upl_to_enter; |
7242 | } |
7243 | |
7244 | vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size); |
7245 | *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset); |
7246 | } else { |
7247 | /* |
7248 | * check to see if already mapped |
7249 | */ |
7250 | if (UPL_PAGE_LIST_MAPPED & upl->flags) { |
7251 | upl_unlock(upl); |
7252 | return KERN_FAILURE; |
7253 | } |
7254 | } |
7255 | |
7256 | if ((!(upl->flags & UPL_SHADOWED)) && |
7257 | ((upl->flags & UPL_HAS_BUSY) || |
7258 | !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) { |
7259 | vm_object_t object; |
7260 | vm_page_t alias_page; |
7261 | vm_object_offset_t new_offset; |
7262 | unsigned int pg_num; |
7263 | |
7264 | size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map)); |
7265 | object = upl->map_object; |
7266 | upl->map_object = vm_object_allocate(vm_object_round_page(size)); |
7267 | |
7268 | vm_object_lock(upl->map_object); |
7269 | |
7270 | upl->map_object->shadow = object; |
7271 | VM_OBJECT_SET_PAGEOUT(object: upl->map_object, TRUE); |
7272 | VM_OBJECT_SET_CAN_PERSIST(object: upl->map_object, FALSE); |
7273 | upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; |
7274 | upl->map_object->vo_shadow_offset = upl_adjusted_offset(upl, PAGE_MASK) - object->paging_offset; |
7275 | assertf(page_aligned(upl->map_object->vo_shadow_offset), |
7276 | "object %p shadow_offset 0x%llx" , |
7277 | upl->map_object, |
7278 | (uint64_t)upl->map_object->vo_shadow_offset); |
7279 | upl->map_object->wimg_bits = object->wimg_bits; |
7280 | offset = upl->map_object->vo_shadow_offset; |
7281 | new_offset = 0; |
7282 | |
7283 | upl->flags |= UPL_SHADOWED; |
7284 | |
7285 | while (size) { |
7286 | pg_num = (unsigned int) (new_offset / PAGE_SIZE); |
7287 | assert(pg_num == new_offset / PAGE_SIZE); |
7288 | |
7289 | if (bitmap_test(map: upl->lite_list, n: pg_num)) { |
7290 | alias_page = vm_page_grab_fictitious(TRUE); |
7291 | |
7292 | vm_object_lock(object); |
7293 | |
7294 | m = vm_page_lookup(object, offset); |
7295 | if (m == VM_PAGE_NULL) { |
7296 | panic("vm_upl_map: page missing" ); |
7297 | } |
7298 | |
7299 | /* |
7300 | * Convert the fictitious page to a private |
7301 | * shadow of the real page. |
7302 | */ |
7303 | assert(alias_page->vmp_fictitious); |
7304 | alias_page->vmp_fictitious = FALSE; |
7305 | alias_page->vmp_private = TRUE; |
7306 | alias_page->vmp_free_when_done = TRUE; |
7307 | /* |
7308 | * since m is a page in the upl it must |
7309 | * already be wired or BUSY, so it's |
7310 | * safe to assign the underlying physical |
7311 | * page to the alias |
7312 | */ |
7313 | VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m)); |
7314 | |
7315 | vm_object_unlock(object); |
7316 | |
7317 | vm_page_lockspin_queues(); |
7318 | vm_page_wire(page: alias_page, VM_KERN_MEMORY_NONE, TRUE); |
7319 | vm_page_unlock_queues(); |
7320 | |
7321 | vm_page_insert_wired(page: alias_page, object: upl->map_object, offset: new_offset, VM_KERN_MEMORY_NONE); |
7322 | |
7323 | assert(!alias_page->vmp_wanted); |
7324 | alias_page->vmp_busy = FALSE; |
7325 | alias_page->vmp_absent = FALSE; |
7326 | } |
7327 | size -= PAGE_SIZE; |
7328 | offset += PAGE_SIZE_64; |
7329 | new_offset += PAGE_SIZE_64; |
7330 | } |
7331 | vm_object_unlock(upl->map_object); |
7332 | } |
7333 | if (upl->flags & UPL_SHADOWED) { |
7334 | if (isVectorUPL) { |
7335 | offset = 0; |
7336 | } else { |
7337 | offset = offset_to_map; |
7338 | } |
7339 | } else { |
7340 | offset = upl_adjusted_offset(upl, VM_MAP_PAGE_MASK(map)) - upl->map_object->paging_offset; |
7341 | if (!isVectorUPL) { |
7342 | offset += offset_to_map; |
7343 | } |
7344 | } |
7345 | |
7346 | if (isVectorUPL) { |
7347 | size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map)); |
7348 | } else { |
7349 | size = MIN(upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map)), size_to_map); |
7350 | } |
7351 | |
7352 | vm_object_reference(upl->map_object); |
7353 | |
7354 | if (!isVectorUPL) { |
7355 | *dst_addr = 0; |
7356 | /* |
7357 | * NEED A UPL_MAP ALIAS |
7358 | */ |
7359 | kr = vm_map_enter(map, address: dst_addr, size: (vm_map_size_t)size, mask: (vm_map_offset_t) 0, |
7360 | VM_MAP_KERNEL_FLAGS_DATA_ANYWHERE(.vm_tag = VM_KERN_MEMORY_OSFMK), |
7361 | object: upl->map_object, offset, FALSE, |
7362 | cur_protection: prot_to_map, VM_PROT_ALL, VM_INHERIT_DEFAULT); |
7363 | |
7364 | if (kr != KERN_SUCCESS) { |
7365 | vm_object_deallocate(object: upl->map_object); |
7366 | upl_unlock(upl); |
7367 | return kr; |
7368 | } |
7369 | } else { |
7370 | kr = vm_map_enter(map, address: dst_addr, size: (vm_map_size_t)size, mask: (vm_map_offset_t) 0, |
7371 | VM_MAP_KERNEL_FLAGS_FIXED(.vm_tag = VM_KERN_MEMORY_OSFMK), |
7372 | object: upl->map_object, offset, FALSE, |
7373 | cur_protection: prot_to_map, VM_PROT_ALL, VM_INHERIT_DEFAULT); |
7374 | if (kr) { |
7375 | panic("vm_map_enter failed for a Vector UPL" ); |
7376 | } |
7377 | } |
7378 | upl->u_mapped_size = (upl_size_t) size; /* When we allow multiple submappings of the UPL */ |
7379 | /* this will have to be an increment rather than */ |
7380 | /* an assignment. */ |
7381 | vm_object_lock(upl->map_object); |
7382 | |
7383 | for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) { |
7384 | m = vm_page_lookup(object: upl->map_object, offset); |
7385 | |
7386 | if (m) { |
7387 | m->vmp_pmapped = TRUE; |
7388 | |
7389 | /* |
7390 | * CODE SIGNING ENFORCEMENT: page has been wpmapped, |
7391 | * but only in kernel space. If this was on a user map, |
7392 | * we'd have to set the wpmapped bit. |
7393 | */ |
7394 | /* m->vmp_wpmapped = TRUE; */ |
7395 | assert(map->pmap == kernel_pmap); |
7396 | |
7397 | kr = pmap_enter_check(pmap: map->pmap, virtual_address: addr, page: m, protection: prot_to_map, VM_PROT_NONE, flags: 0, TRUE); |
7398 | |
7399 | assert(kr == KERN_SUCCESS); |
7400 | #if KASAN |
7401 | kasan_notify_address(addr, PAGE_SIZE_64); |
7402 | #endif |
7403 | } |
7404 | offset += PAGE_SIZE_64; |
7405 | } |
7406 | vm_object_unlock(upl->map_object); |
7407 | |
7408 | /* |
7409 | * hold a reference for the mapping |
7410 | */ |
7411 | upl->ref_count++; |
7412 | upl->flags |= UPL_PAGE_LIST_MAPPED; |
7413 | upl->kaddr = (vm_offset_t) *dst_addr; |
7414 | assert(upl->kaddr == *dst_addr); |
7415 | |
7416 | if (isVectorUPL) { |
7417 | goto process_upl_to_enter; |
7418 | } |
7419 | |
7420 | if (!isVectorUPL) { |
7421 | vm_map_offset_t addr_adjustment; |
7422 | |
7423 | addr_adjustment = (vm_map_offset_t)(upl->u_offset - upl_adjusted_offset(upl, VM_MAP_PAGE_MASK(map))); |
7424 | if (addr_adjustment) { |
7425 | assert(VM_MAP_PAGE_MASK(map) != PAGE_MASK); |
7426 | DEBUG4K_UPL("dst_addr 0x%llx (+ 0x%llx) -> 0x%llx\n" , (uint64_t)*dst_addr, (uint64_t)addr_adjustment, (uint64_t)(*dst_addr + addr_adjustment)); |
7427 | *dst_addr += addr_adjustment; |
7428 | } |
7429 | } |
7430 | |
7431 | upl_unlock(upl); |
7432 | |
7433 | return KERN_SUCCESS; |
7434 | } |
7435 | |
7436 | kern_return_t |
7437 | vm_map_enter_upl( |
7438 | vm_map_t map, |
7439 | upl_t upl, |
7440 | vm_map_offset_t *dst_addr) |
7441 | { |
7442 | upl_size_t upl_size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map)); |
7443 | return vm_map_enter_upl_range(map, upl, offset_to_map: 0, size_to_map: upl_size, VM_PROT_DEFAULT, dst_addr); |
7444 | } |
7445 | |
7446 | /* |
7447 | * Internal routine to remove a UPL mapping from a VM map. |
7448 | * |
7449 | * XXX - This should just be doable through a standard |
7450 | * vm_map_remove() operation. Otherwise, implicit clean-up |
7451 | * of the target map won't be able to correctly remove |
7452 | * these (and release the reference on the UPL). Having |
7453 | * to do this means we can't map these into user-space |
7454 | * maps yet. |
7455 | */ |
7456 | kern_return_t |
7457 | vm_map_remove_upl_range( |
7458 | vm_map_t map, |
7459 | upl_t upl, |
7460 | __unused vm_object_offset_t offset_to_unmap, |
7461 | __unused upl_size_t size_to_unmap) |
7462 | { |
7463 | vm_address_t addr; |
7464 | upl_size_t size; |
7465 | int isVectorUPL = 0, curr_upl = 0; |
7466 | upl_t vector_upl = NULL; |
7467 | |
7468 | if (upl == UPL_NULL) { |
7469 | return KERN_INVALID_ARGUMENT; |
7470 | } |
7471 | |
7472 | if ((isVectorUPL = vector_upl_is_valid(upl))) { |
7473 | int unmapped = 0, valid_upls = 0; |
7474 | vector_upl = upl; |
7475 | upl_lock(vector_upl); |
7476 | for (curr_upl = 0; curr_upl < vector_upl_max_upls(upl: vector_upl); curr_upl++) { |
7477 | upl = vector_upl_subupl_byindex(vector_upl, curr_upl ); |
7478 | if (upl == NULL) { |
7479 | continue; |
7480 | } |
7481 | valid_upls++; |
7482 | if (!(UPL_PAGE_LIST_MAPPED & upl->flags)) { |
7483 | unmapped++; |
7484 | } |
7485 | } |
7486 | |
7487 | if (unmapped) { |
7488 | if (unmapped != valid_upls) { |
7489 | panic("%d of the %d sub-upls within the Vector UPL is/are not mapped" , unmapped, valid_upls); |
7490 | } else { |
7491 | upl_unlock(vector_upl); |
7492 | return KERN_FAILURE; |
7493 | } |
7494 | } |
7495 | curr_upl = 0; |
7496 | } else { |
7497 | upl_lock(upl); |
7498 | } |
7499 | |
7500 | process_upl_to_remove: |
7501 | if (isVectorUPL) { |
7502 | if (curr_upl == vector_upl_max_upls(upl: vector_upl)) { |
7503 | vm_map_t v_upl_submap; |
7504 | vm_offset_t v_upl_submap_dst_addr; |
7505 | vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr); |
7506 | |
7507 | kmem_free_guard(map, addr: v_upl_submap_dst_addr, |
7508 | size: vector_upl->u_size, flags: KMF_NONE, KMEM_GUARD_SUBMAP); |
7509 | vm_map_deallocate(map: v_upl_submap); |
7510 | upl_unlock(vector_upl); |
7511 | return KERN_SUCCESS; |
7512 | } |
7513 | |
7514 | upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ ); |
7515 | if (upl == NULL) { |
7516 | goto process_upl_to_remove; |
7517 | } |
7518 | } |
7519 | |
7520 | if (upl->flags & UPL_PAGE_LIST_MAPPED) { |
7521 | addr = upl->kaddr; |
7522 | size = upl->u_mapped_size; |
7523 | |
7524 | assert(upl->ref_count > 1); |
7525 | upl->ref_count--; /* removing mapping ref */ |
7526 | |
7527 | upl->flags &= ~UPL_PAGE_LIST_MAPPED; |
7528 | upl->kaddr = (vm_offset_t) 0; |
7529 | upl->u_mapped_size = 0; |
7530 | |
7531 | if (isVectorUPL) { |
7532 | /* |
7533 | * If it's a Vectored UPL, we'll be removing the entire |
7534 | * submap anyways, so no need to remove individual UPL |
7535 | * element mappings from within the submap |
7536 | */ |
7537 | goto process_upl_to_remove; |
7538 | } |
7539 | |
7540 | upl_unlock(upl); |
7541 | |
7542 | vm_map_remove(map, |
7543 | vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(map)), |
7544 | vm_map_round_page(addr + size, VM_MAP_PAGE_MASK(map))); |
7545 | return KERN_SUCCESS; |
7546 | } |
7547 | upl_unlock(upl); |
7548 | |
7549 | return KERN_FAILURE; |
7550 | } |
7551 | |
7552 | kern_return_t |
7553 | vm_map_remove_upl( |
7554 | vm_map_t map, |
7555 | upl_t upl) |
7556 | { |
7557 | upl_size_t upl_size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map)); |
7558 | return vm_map_remove_upl_range(map, upl, offset_to_unmap: 0, size_to_unmap: upl_size); |
7559 | } |
7560 | |
7561 | kern_return_t |
7562 | upl_commit_range( |
7563 | upl_t upl, |
7564 | upl_offset_t offset, |
7565 | upl_size_t size, |
7566 | int flags, |
7567 | upl_page_info_t *page_list, |
7568 | mach_msg_type_number_t count, |
7569 | boolean_t *empty) |
7570 | { |
7571 | upl_size_t xfer_size, subupl_size; |
7572 | vm_object_t shadow_object; |
7573 | vm_object_t object; |
7574 | vm_object_t m_object; |
7575 | vm_object_offset_t target_offset; |
7576 | upl_offset_t subupl_offset = offset; |
7577 | int entry; |
7578 | int occupied; |
7579 | int clear_refmod = 0; |
7580 | int pgpgout_count = 0; |
7581 | struct vm_page_delayed_work dw_array; |
7582 | struct vm_page_delayed_work *dwp, *dwp_start; |
7583 | bool dwp_finish_ctx = TRUE; |
7584 | int dw_count; |
7585 | int dw_limit; |
7586 | int isVectorUPL = 0; |
7587 | upl_t vector_upl = NULL; |
7588 | boolean_t should_be_throttled = FALSE; |
7589 | |
7590 | vm_page_t nxt_page = VM_PAGE_NULL; |
7591 | int fast_path_possible = 0; |
7592 | int fast_path_full_commit = 0; |
7593 | int throttle_page = 0; |
7594 | int unwired_count = 0; |
7595 | int local_queue_count = 0; |
7596 | vm_page_t first_local, last_local; |
7597 | vm_object_offset_t obj_start, obj_end, obj_offset; |
7598 | kern_return_t kr = KERN_SUCCESS; |
7599 | |
7600 | // DEBUG4K_UPL("upl %p (u_offset 0x%llx u_size 0x%llx) object %p offset 0x%llx size 0x%llx flags 0x%x\n", upl, (uint64_t)upl->u_offset, (uint64_t)upl->u_size, upl->map_object, (uint64_t)offset, (uint64_t)size, flags); |
7601 | |
7602 | dwp_start = dwp = NULL; |
7603 | |
7604 | subupl_size = size; |
7605 | *empty = FALSE; |
7606 | |
7607 | if (upl == UPL_NULL) { |
7608 | return KERN_INVALID_ARGUMENT; |
7609 | } |
7610 | |
7611 | dw_count = 0; |
7612 | dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); |
7613 | dwp_start = vm_page_delayed_work_get_ctx(); |
7614 | if (dwp_start == NULL) { |
7615 | dwp_start = &dw_array; |
7616 | dw_limit = 1; |
7617 | dwp_finish_ctx = FALSE; |
7618 | } |
7619 | |
7620 | dwp = dwp_start; |
7621 | |
7622 | if (count == 0) { |
7623 | page_list = NULL; |
7624 | } |
7625 | |
7626 | if ((isVectorUPL = vector_upl_is_valid(upl))) { |
7627 | vector_upl = upl; |
7628 | upl_lock(vector_upl); |
7629 | } else { |
7630 | upl_lock(upl); |
7631 | } |
7632 | |
7633 | process_upl_to_commit: |
7634 | |
7635 | if (isVectorUPL) { |
7636 | size = subupl_size; |
7637 | offset = subupl_offset; |
7638 | if (size == 0) { |
7639 | upl_unlock(vector_upl); |
7640 | kr = KERN_SUCCESS; |
7641 | goto done; |
7642 | } |
7643 | upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size); |
7644 | if (upl == NULL) { |
7645 | upl_unlock(vector_upl); |
7646 | kr = KERN_FAILURE; |
7647 | goto done; |
7648 | } |
7649 | page_list = upl->page_list; |
7650 | subupl_size -= size; |
7651 | subupl_offset += size; |
7652 | } |
7653 | |
7654 | #if UPL_DEBUG |
7655 | if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) { |
7656 | upl->upl_commit_records[upl->upl_commit_index].c_btref = btref_get(__builtin_frame_address(0), 0); |
7657 | upl->upl_commit_records[upl->upl_commit_index].c_beg = offset; |
7658 | upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size); |
7659 | |
7660 | upl->upl_commit_index++; |
7661 | } |
7662 | #endif |
7663 | if (upl->flags & UPL_DEVICE_MEMORY) { |
7664 | xfer_size = 0; |
7665 | } else if ((offset + size) <= upl_adjusted_size(upl, PAGE_MASK)) { |
7666 | xfer_size = size; |
7667 | } else { |
7668 | if (!isVectorUPL) { |
7669 | upl_unlock(upl); |
7670 | } else { |
7671 | upl_unlock(vector_upl); |
7672 | } |
7673 | DEBUG4K_ERROR("upl %p (u_offset 0x%llx u_size 0x%x) offset 0x%x size 0x%x\n" , upl, upl->u_offset, upl->u_size, offset, size); |
7674 | kr = KERN_FAILURE; |
7675 | goto done; |
7676 | } |
7677 | if (upl->flags & UPL_SET_DIRTY) { |
7678 | flags |= UPL_COMMIT_SET_DIRTY; |
7679 | } |
7680 | if (upl->flags & UPL_CLEAR_DIRTY) { |
7681 | flags |= UPL_COMMIT_CLEAR_DIRTY; |
7682 | } |
7683 | |
7684 | object = upl->map_object; |
7685 | |
7686 | if (upl->flags & UPL_SHADOWED) { |
7687 | vm_object_lock(object); |
7688 | shadow_object = object->shadow; |
7689 | } else { |
7690 | shadow_object = object; |
7691 | } |
7692 | entry = offset / PAGE_SIZE; |
7693 | target_offset = (vm_object_offset_t)offset; |
7694 | |
7695 | if (upl->flags & UPL_KERNEL_OBJECT) { |
7696 | vm_object_lock_shared(shadow_object); |
7697 | } else { |
7698 | vm_object_lock(shadow_object); |
7699 | } |
7700 | |
7701 | VM_OBJECT_WIRED_PAGE_UPDATE_START(shadow_object); |
7702 | |
7703 | if (upl->flags & UPL_ACCESS_BLOCKED) { |
7704 | assert(shadow_object->blocked_access); |
7705 | shadow_object->blocked_access = FALSE; |
7706 | vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED); |
7707 | } |
7708 | |
7709 | if (shadow_object->code_signed) { |
7710 | /* |
7711 | * CODE SIGNING: |
7712 | * If the object is code-signed, do not let this UPL tell |
7713 | * us if the pages are valid or not. Let the pages be |
7714 | * validated by VM the normal way (when they get mapped or |
7715 | * copied). |
7716 | */ |
7717 | flags &= ~UPL_COMMIT_CS_VALIDATED; |
7718 | } |
7719 | if (!page_list) { |
7720 | /* |
7721 | * No page list to get the code-signing info from !? |
7722 | */ |
7723 | flags &= ~UPL_COMMIT_CS_VALIDATED; |
7724 | } |
7725 | if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal) { |
7726 | should_be_throttled = TRUE; |
7727 | } |
7728 | |
7729 | if ((upl->flags & UPL_IO_WIRE) && |
7730 | !(flags & UPL_COMMIT_FREE_ABSENT) && |
7731 | !isVectorUPL && |
7732 | shadow_object->purgable != VM_PURGABLE_VOLATILE && |
7733 | shadow_object->purgable != VM_PURGABLE_EMPTY) { |
7734 | if (!vm_page_queue_empty(&shadow_object->memq)) { |
7735 | if (shadow_object->internal && size == shadow_object->vo_size) { |
7736 | nxt_page = (vm_page_t)vm_page_queue_first(&shadow_object->memq); |
7737 | fast_path_full_commit = 1; |
7738 | } |
7739 | fast_path_possible = 1; |
7740 | |
7741 | if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal && |
7742 | (shadow_object->purgable == VM_PURGABLE_DENY || |
7743 | shadow_object->purgable == VM_PURGABLE_NONVOLATILE || |
7744 | shadow_object->purgable == VM_PURGABLE_VOLATILE)) { |
7745 | throttle_page = 1; |
7746 | } |
7747 | } |
7748 | } |
7749 | first_local = VM_PAGE_NULL; |
7750 | last_local = VM_PAGE_NULL; |
7751 | |
7752 | obj_start = target_offset + upl->u_offset - shadow_object->paging_offset; |
7753 | obj_end = obj_start + xfer_size; |
7754 | obj_start = vm_object_trunc_page(obj_start); |
7755 | obj_end = vm_object_round_page(obj_end); |
7756 | for (obj_offset = obj_start; |
7757 | obj_offset < obj_end; |
7758 | obj_offset += PAGE_SIZE) { |
7759 | vm_page_t t, m; |
7760 | |
7761 | dwp->dw_mask = 0; |
7762 | clear_refmod = 0; |
7763 | |
7764 | m = VM_PAGE_NULL; |
7765 | |
7766 | if (upl->flags & UPL_LITE) { |
7767 | unsigned int pg_num; |
7768 | |
7769 | if (nxt_page != VM_PAGE_NULL) { |
7770 | m = nxt_page; |
7771 | nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq); |
7772 | target_offset = m->vmp_offset; |
7773 | } |
7774 | pg_num = (unsigned int) (target_offset / PAGE_SIZE); |
7775 | assert(pg_num == target_offset / PAGE_SIZE); |
7776 | |
7777 | if (bitmap_test(map: upl->lite_list, n: pg_num)) { |
7778 | bitmap_clear(map: upl->lite_list, n: pg_num); |
7779 | |
7780 | if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL) { |
7781 | m = vm_page_lookup(object: shadow_object, offset: obj_offset); |
7782 | } |
7783 | } else { |
7784 | m = NULL; |
7785 | } |
7786 | } |
7787 | if (upl->flags & UPL_SHADOWED) { |
7788 | if ((t = vm_page_lookup(object, offset: target_offset)) != VM_PAGE_NULL) { |
7789 | t->vmp_free_when_done = FALSE; |
7790 | |
7791 | VM_PAGE_FREE(t); |
7792 | |
7793 | if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL) { |
7794 | m = vm_page_lookup(object: shadow_object, offset: target_offset + object->vo_shadow_offset); |
7795 | } |
7796 | } |
7797 | } |
7798 | if (m == VM_PAGE_NULL) { |
7799 | goto commit_next_page; |
7800 | } |
7801 | |
7802 | m_object = VM_PAGE_OBJECT(m); |
7803 | |
7804 | if (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) { |
7805 | assert(m->vmp_busy); |
7806 | |
7807 | dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); |
7808 | goto commit_next_page; |
7809 | } |
7810 | |
7811 | if (flags & UPL_COMMIT_CS_VALIDATED) { |
7812 | /* |
7813 | * CODE SIGNING: |
7814 | * Set the code signing bits according to |
7815 | * what the UPL says they should be. |
7816 | */ |
7817 | m->vmp_cs_validated |= page_list[entry].cs_validated; |
7818 | m->vmp_cs_tainted |= page_list[entry].cs_tainted; |
7819 | m->vmp_cs_nx |= page_list[entry].cs_nx; |
7820 | } |
7821 | if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL) { |
7822 | m->vmp_written_by_kernel = TRUE; |
7823 | } |
7824 | |
7825 | if (upl->flags & UPL_IO_WIRE) { |
7826 | if (page_list) { |
7827 | page_list[entry].phys_addr = 0; |
7828 | } |
7829 | |
7830 | if (flags & UPL_COMMIT_SET_DIRTY) { |
7831 | SET_PAGE_DIRTY(m, FALSE); |
7832 | } else if (flags & UPL_COMMIT_CLEAR_DIRTY) { |
7833 | m->vmp_dirty = FALSE; |
7834 | |
7835 | if (!(flags & UPL_COMMIT_CS_VALIDATED) && |
7836 | m->vmp_cs_validated && |
7837 | m->vmp_cs_tainted != VMP_CS_ALL_TRUE) { |
7838 | /* |
7839 | * CODE SIGNING: |
7840 | * This page is no longer dirty |
7841 | * but could have been modified, |
7842 | * so it will need to be |
7843 | * re-validated. |
7844 | */ |
7845 | m->vmp_cs_validated = VMP_CS_ALL_FALSE; |
7846 | |
7847 | VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1); |
7848 | |
7849 | pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m)); |
7850 | } |
7851 | clear_refmod |= VM_MEM_MODIFIED; |
7852 | } |
7853 | if (upl->flags & UPL_ACCESS_BLOCKED) { |
7854 | /* |
7855 | * We blocked access to the pages in this UPL. |
7856 | * Clear the "busy" bit and wake up any waiter |
7857 | * for this page. |
7858 | */ |
7859 | dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); |
7860 | } |
7861 | if (fast_path_possible) { |
7862 | assert(m_object->purgable != VM_PURGABLE_EMPTY); |
7863 | assert(m_object->purgable != VM_PURGABLE_VOLATILE); |
7864 | if (m->vmp_absent) { |
7865 | assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q); |
7866 | assert(m->vmp_wire_count == 0); |
7867 | assert(m->vmp_busy); |
7868 | |
7869 | m->vmp_absent = FALSE; |
7870 | dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); |
7871 | } else { |
7872 | if (m->vmp_wire_count == 0) { |
7873 | panic("wire_count == 0, m = %p, obj = %p" , m, shadow_object); |
7874 | } |
7875 | assert(m->vmp_q_state == VM_PAGE_IS_WIRED); |
7876 | |
7877 | /* |
7878 | * XXX FBDP need to update some other |
7879 | * counters here (purgeable_wired_count) |
7880 | * (ledgers), ... |
7881 | */ |
7882 | assert(m->vmp_wire_count > 0); |
7883 | m->vmp_wire_count--; |
7884 | |
7885 | if (m->vmp_wire_count == 0) { |
7886 | m->vmp_q_state = VM_PAGE_NOT_ON_Q; |
7887 | unwired_count++; |
7888 | } |
7889 | } |
7890 | if (m->vmp_wire_count == 0) { |
7891 | assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0); |
7892 | |
7893 | if (last_local == VM_PAGE_NULL) { |
7894 | assert(first_local == VM_PAGE_NULL); |
7895 | |
7896 | last_local = m; |
7897 | first_local = m; |
7898 | } else { |
7899 | assert(first_local != VM_PAGE_NULL); |
7900 | |
7901 | m->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local); |
7902 | first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(m); |
7903 | first_local = m; |
7904 | } |
7905 | local_queue_count++; |
7906 | |
7907 | if (throttle_page) { |
7908 | m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q; |
7909 | } else { |
7910 | if (flags & UPL_COMMIT_INACTIVATE) { |
7911 | if (shadow_object->internal) { |
7912 | m->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q; |
7913 | } else { |
7914 | m->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q; |
7915 | } |
7916 | } else { |
7917 | m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q; |
7918 | } |
7919 | } |
7920 | } |
7921 | } else { |
7922 | if (flags & UPL_COMMIT_INACTIVATE) { |
7923 | dwp->dw_mask |= DW_vm_page_deactivate_internal; |
7924 | clear_refmod |= VM_MEM_REFERENCED; |
7925 | } |
7926 | if (m->vmp_absent) { |
7927 | if (flags & UPL_COMMIT_FREE_ABSENT) { |
7928 | dwp->dw_mask |= DW_vm_page_free; |
7929 | } else { |
7930 | m->vmp_absent = FALSE; |
7931 | dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); |
7932 | |
7933 | if (!(dwp->dw_mask & DW_vm_page_deactivate_internal)) { |
7934 | dwp->dw_mask |= DW_vm_page_activate; |
7935 | } |
7936 | } |
7937 | } else { |
7938 | dwp->dw_mask |= DW_vm_page_unwire; |
7939 | } |
7940 | } |
7941 | goto commit_next_page; |
7942 | } |
7943 | assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR); |
7944 | |
7945 | if (page_list) { |
7946 | page_list[entry].phys_addr = 0; |
7947 | } |
7948 | |
7949 | /* |
7950 | * make sure to clear the hardware |
7951 | * modify or reference bits before |
7952 | * releasing the BUSY bit on this page |
7953 | * otherwise we risk losing a legitimate |
7954 | * change of state |
7955 | */ |
7956 | if (flags & UPL_COMMIT_CLEAR_DIRTY) { |
7957 | m->vmp_dirty = FALSE; |
7958 | |
7959 | clear_refmod |= VM_MEM_MODIFIED; |
7960 | } |
7961 | if (m->vmp_laundry) { |
7962 | dwp->dw_mask |= DW_vm_pageout_throttle_up; |
7963 | } |
7964 | |
7965 | if (VM_PAGE_WIRED(m)) { |
7966 | m->vmp_free_when_done = FALSE; |
7967 | } |
7968 | |
7969 | if (!(flags & UPL_COMMIT_CS_VALIDATED) && |
7970 | m->vmp_cs_validated && |
7971 | m->vmp_cs_tainted != VMP_CS_ALL_TRUE) { |
7972 | /* |
7973 | * CODE SIGNING: |
7974 | * This page is no longer dirty |
7975 | * but could have been modified, |
7976 | * so it will need to be |
7977 | * re-validated. |
7978 | */ |
7979 | m->vmp_cs_validated = VMP_CS_ALL_FALSE; |
7980 | |
7981 | VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1); |
7982 | |
7983 | pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m)); |
7984 | } |
7985 | if (m->vmp_overwriting) { |
7986 | /* |
7987 | * the (COPY_OUT_FROM == FALSE) request_page_list case |
7988 | */ |
7989 | if (m->vmp_busy) { |
7990 | #if CONFIG_PHANTOM_CACHE |
7991 | if (m->vmp_absent && !m_object->internal) { |
7992 | dwp->dw_mask |= DW_vm_phantom_cache_update; |
7993 | } |
7994 | #endif |
7995 | m->vmp_absent = FALSE; |
7996 | |
7997 | dwp->dw_mask |= DW_clear_busy; |
7998 | } else { |
7999 | /* |
8000 | * alternate (COPY_OUT_FROM == FALSE) page_list case |
8001 | * Occurs when the original page was wired |
8002 | * at the time of the list request |
8003 | */ |
8004 | assert(VM_PAGE_WIRED(m)); |
8005 | |
8006 | dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */ |
8007 | } |
8008 | m->vmp_overwriting = FALSE; |
8009 | } |
8010 | m->vmp_cleaning = FALSE; |
8011 | |
8012 | if (m->vmp_free_when_done) { |
8013 | /* |
8014 | * With the clean queue enabled, UPL_PAGEOUT should |
8015 | * no longer set the pageout bit. Its pages now go |
8016 | * to the clean queue. |
8017 | * |
8018 | * We don't use the cleaned Q anymore and so this |
8019 | * assert isn't correct. The code for the clean Q |
8020 | * still exists and might be used in the future. If we |
8021 | * go back to the cleaned Q, we will re-enable this |
8022 | * assert. |
8023 | * |
8024 | * assert(!(upl->flags & UPL_PAGEOUT)); |
8025 | */ |
8026 | assert(!m_object->internal); |
8027 | |
8028 | m->vmp_free_when_done = FALSE; |
8029 | |
8030 | if ((flags & UPL_COMMIT_SET_DIRTY) || |
8031 | (m->vmp_pmapped && (pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED))) { |
8032 | /* |
8033 | * page was re-dirtied after we started |
8034 | * the pageout... reactivate it since |
8035 | * we don't know whether the on-disk |
8036 | * copy matches what is now in memory |
8037 | */ |
8038 | SET_PAGE_DIRTY(m, FALSE); |
8039 | |
8040 | dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP; |
8041 | |
8042 | if (upl->flags & UPL_PAGEOUT) { |
8043 | counter_inc(&vm_statistics_reactivations); |
8044 | DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL); |
8045 | } |
8046 | } else if (m->vmp_busy && !(upl->flags & UPL_HAS_BUSY)) { |
8047 | /* |
8048 | * Someone else might still be handling this |
8049 | * page (vm_fault() for example), so let's not |
8050 | * free it or "un-busy" it! |
8051 | * Put that page in the "speculative" queue |
8052 | * for now (since we would otherwise have freed |
8053 | * it) and let whoever is keeping the page |
8054 | * "busy" move it if needed when they're done |
8055 | * with it. |
8056 | */ |
8057 | dwp->dw_mask |= DW_vm_page_speculate; |
8058 | } else { |
8059 | /* |
8060 | * page has been successfully cleaned |
8061 | * go ahead and free it for other use |
8062 | */ |
8063 | if (m_object->internal) { |
8064 | DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL); |
8065 | } else { |
8066 | DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL); |
8067 | } |
8068 | m->vmp_dirty = FALSE; |
8069 | if (!(upl->flags & UPL_HAS_BUSY)) { |
8070 | assert(!m->vmp_busy); |
8071 | } |
8072 | m->vmp_busy = TRUE; |
8073 | |
8074 | dwp->dw_mask |= DW_vm_page_free; |
8075 | } |
8076 | goto commit_next_page; |
8077 | } |
8078 | /* |
8079 | * It is a part of the semantic of COPYOUT_FROM |
8080 | * UPLs that a commit implies cache sync |
8081 | * between the vm page and the backing store |
8082 | * this can be used to strip the precious bit |
8083 | * as well as clean |
8084 | */ |
8085 | if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS)) { |
8086 | m->vmp_precious = FALSE; |
8087 | } |
8088 | |
8089 | if (flags & UPL_COMMIT_SET_DIRTY) { |
8090 | SET_PAGE_DIRTY(m, FALSE); |
8091 | } else { |
8092 | m->vmp_dirty = FALSE; |
8093 | } |
8094 | |
8095 | /* with the clean queue on, move *all* cleaned pages to the clean queue */ |
8096 | if (hibernate_cleaning_in_progress == FALSE && !m->vmp_dirty && (upl->flags & UPL_PAGEOUT)) { |
8097 | pgpgout_count++; |
8098 | |
8099 | counter_inc(&vm_statistics_pageouts); |
8100 | DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL); |
8101 | |
8102 | dwp->dw_mask |= DW_enqueue_cleaned; |
8103 | } else if (should_be_throttled == TRUE && (m->vmp_q_state == VM_PAGE_NOT_ON_Q)) { |
8104 | /* |
8105 | * page coming back in from being 'frozen'... |
8106 | * it was dirty before it was frozen, so keep it so |
8107 | * the vm_page_activate will notice that it really belongs |
8108 | * on the throttle queue and put it there |
8109 | */ |
8110 | SET_PAGE_DIRTY(m, FALSE); |
8111 | dwp->dw_mask |= DW_vm_page_activate; |
8112 | } else { |
8113 | if ((flags & UPL_COMMIT_INACTIVATE) && !m->vmp_clustered && (m->vmp_q_state != VM_PAGE_ON_SPECULATIVE_Q)) { |
8114 | dwp->dw_mask |= DW_vm_page_deactivate_internal; |
8115 | clear_refmod |= VM_MEM_REFERENCED; |
8116 | } else if (!VM_PAGE_PAGEABLE(m)) { |
8117 | if (m->vmp_clustered || (flags & UPL_COMMIT_SPECULATE)) { |
8118 | dwp->dw_mask |= DW_vm_page_speculate; |
8119 | } else if (m->vmp_reference) { |
8120 | dwp->dw_mask |= DW_vm_page_activate; |
8121 | } else { |
8122 | dwp->dw_mask |= DW_vm_page_deactivate_internal; |
8123 | clear_refmod |= VM_MEM_REFERENCED; |
8124 | } |
8125 | } |
8126 | } |
8127 | if (upl->flags & UPL_ACCESS_BLOCKED) { |
8128 | /* |
8129 | * We blocked access to the pages in this URL. |
8130 | * Clear the "busy" bit on this page before we |
8131 | * wake up any waiter. |
8132 | */ |
8133 | dwp->dw_mask |= DW_clear_busy; |
8134 | } |
8135 | /* |
8136 | * Wakeup any thread waiting for the page to be un-cleaning. |
8137 | */ |
8138 | dwp->dw_mask |= DW_PAGE_WAKEUP; |
8139 | |
8140 | commit_next_page: |
8141 | if (clear_refmod) { |
8142 | pmap_clear_refmod(pn: VM_PAGE_GET_PHYS_PAGE(m), mask: clear_refmod); |
8143 | } |
8144 | |
8145 | target_offset += PAGE_SIZE_64; |
8146 | xfer_size -= PAGE_SIZE; |
8147 | entry++; |
8148 | |
8149 | if (dwp->dw_mask) { |
8150 | if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) { |
8151 | VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count); |
8152 | |
8153 | if (dw_count >= dw_limit) { |
8154 | vm_page_do_delayed_work(object: shadow_object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count); |
8155 | |
8156 | dwp = dwp_start; |
8157 | dw_count = 0; |
8158 | } |
8159 | } else { |
8160 | if (dwp->dw_mask & DW_clear_busy) { |
8161 | m->vmp_busy = FALSE; |
8162 | } |
8163 | |
8164 | if (dwp->dw_mask & DW_PAGE_WAKEUP) { |
8165 | PAGE_WAKEUP(m); |
8166 | } |
8167 | } |
8168 | } |
8169 | } |
8170 | if (dw_count) { |
8171 | vm_page_do_delayed_work(object: shadow_object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count); |
8172 | dwp = dwp_start; |
8173 | dw_count = 0; |
8174 | } |
8175 | |
8176 | if (fast_path_possible) { |
8177 | assert(shadow_object->purgable != VM_PURGABLE_VOLATILE); |
8178 | assert(shadow_object->purgable != VM_PURGABLE_EMPTY); |
8179 | |
8180 | if (local_queue_count || unwired_count) { |
8181 | if (local_queue_count) { |
8182 | vm_page_t first_target; |
8183 | vm_page_queue_head_t *target_queue; |
8184 | |
8185 | if (throttle_page) { |
8186 | target_queue = &vm_page_queue_throttled; |
8187 | } else { |
8188 | if (flags & UPL_COMMIT_INACTIVATE) { |
8189 | if (shadow_object->internal) { |
8190 | target_queue = &vm_page_queue_anonymous; |
8191 | } else { |
8192 | target_queue = &vm_page_queue_inactive; |
8193 | } |
8194 | } else { |
8195 | target_queue = &vm_page_queue_active; |
8196 | } |
8197 | } |
8198 | /* |
8199 | * Transfer the entire local queue to a regular LRU page queues. |
8200 | */ |
8201 | vm_page_lockspin_queues(); |
8202 | |
8203 | first_target = (vm_page_t) vm_page_queue_first(target_queue); |
8204 | |
8205 | if (vm_page_queue_empty(target_queue)) { |
8206 | target_queue->prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local); |
8207 | } else { |
8208 | first_target->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local); |
8209 | } |
8210 | |
8211 | target_queue->next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local); |
8212 | first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(target_queue); |
8213 | last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_target); |
8214 | |
8215 | /* |
8216 | * Adjust the global page counts. |
8217 | */ |
8218 | if (throttle_page) { |
8219 | vm_page_throttled_count += local_queue_count; |
8220 | } else { |
8221 | if (flags & UPL_COMMIT_INACTIVATE) { |
8222 | if (shadow_object->internal) { |
8223 | vm_page_anonymous_count += local_queue_count; |
8224 | } |
8225 | vm_page_inactive_count += local_queue_count; |
8226 | |
8227 | token_new_pagecount += local_queue_count; |
8228 | } else { |
8229 | vm_page_active_count += local_queue_count; |
8230 | } |
8231 | |
8232 | if (shadow_object->internal) { |
8233 | vm_page_pageable_internal_count += local_queue_count; |
8234 | } else { |
8235 | vm_page_pageable_external_count += local_queue_count; |
8236 | } |
8237 | } |
8238 | } else { |
8239 | vm_page_lockspin_queues(); |
8240 | } |
8241 | if (unwired_count) { |
8242 | vm_page_wire_count -= unwired_count; |
8243 | VM_CHECK_MEMORYSTATUS; |
8244 | } |
8245 | vm_page_unlock_queues(); |
8246 | |
8247 | VM_OBJECT_WIRED_PAGE_COUNT(shadow_object, -unwired_count); |
8248 | } |
8249 | } |
8250 | |
8251 | if (upl->flags & UPL_DEVICE_MEMORY) { |
8252 | occupied = 0; |
8253 | } else if (upl->flags & UPL_LITE) { |
8254 | uint32_t pages = (uint32_t)atop(upl_adjusted_size(upl, PAGE_MASK)); |
8255 | |
8256 | occupied = !fast_path_full_commit && |
8257 | !bitmap_is_empty(map: upl->lite_list, nbits: pages); |
8258 | } else { |
8259 | occupied = !vm_page_queue_empty(&upl->map_object->memq); |
8260 | } |
8261 | if (occupied == 0) { |
8262 | /* |
8263 | * If this UPL element belongs to a Vector UPL and is |
8264 | * empty, then this is the right function to deallocate |
8265 | * it. So go ahead set the *empty variable. The flag |
8266 | * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view |
8267 | * should be considered relevant for the Vector UPL and not |
8268 | * the internal UPLs. |
8269 | */ |
8270 | if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL) { |
8271 | *empty = TRUE; |
8272 | } |
8273 | |
8274 | if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) { |
8275 | /* |
8276 | * this is not a paging object |
8277 | * so we need to drop the paging reference |
8278 | * that was taken when we created the UPL |
8279 | * against this object |
8280 | */ |
8281 | vm_object_activity_end(shadow_object); |
8282 | vm_object_collapse(object: shadow_object, offset: 0, TRUE); |
8283 | } else { |
8284 | /* |
8285 | * we dontated the paging reference to |
8286 | * the map object... vm_pageout_object_terminate |
8287 | * will drop this reference |
8288 | */ |
8289 | } |
8290 | } |
8291 | VM_OBJECT_WIRED_PAGE_UPDATE_END(shadow_object, shadow_object->wire_tag); |
8292 | vm_object_unlock(shadow_object); |
8293 | if (object != shadow_object) { |
8294 | vm_object_unlock(object); |
8295 | } |
8296 | |
8297 | if (!isVectorUPL) { |
8298 | upl_unlock(upl); |
8299 | } else { |
8300 | /* |
8301 | * If we completed our operations on an UPL that is |
8302 | * part of a Vectored UPL and if empty is TRUE, then |
8303 | * we should go ahead and deallocate this UPL element. |
8304 | * Then we check if this was the last of the UPL elements |
8305 | * within that Vectored UPL. If so, set empty to TRUE |
8306 | * so that in ubc_upl_commit_range or ubc_upl_commit, we |
8307 | * can go ahead and deallocate the Vector UPL too. |
8308 | */ |
8309 | if (*empty == TRUE) { |
8310 | *empty = vector_upl_set_subupl(vector_upl, upl, 0); |
8311 | upl_deallocate(upl); |
8312 | } |
8313 | goto process_upl_to_commit; |
8314 | } |
8315 | if (pgpgout_count) { |
8316 | DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL); |
8317 | } |
8318 | |
8319 | kr = KERN_SUCCESS; |
8320 | done: |
8321 | if (dwp_start && dwp_finish_ctx) { |
8322 | vm_page_delayed_work_finish_ctx(dwp: dwp_start); |
8323 | dwp_start = dwp = NULL; |
8324 | } |
8325 | |
8326 | return kr; |
8327 | } |
8328 | |
8329 | kern_return_t |
8330 | upl_abort_range( |
8331 | upl_t upl, |
8332 | upl_offset_t offset, |
8333 | upl_size_t size, |
8334 | int error, |
8335 | boolean_t *empty) |
8336 | { |
8337 | upl_size_t xfer_size, subupl_size; |
8338 | vm_object_t shadow_object; |
8339 | vm_object_t object; |
8340 | vm_object_offset_t target_offset; |
8341 | upl_offset_t subupl_offset = offset; |
8342 | int occupied; |
8343 | struct vm_page_delayed_work dw_array; |
8344 | struct vm_page_delayed_work *dwp, *dwp_start; |
8345 | bool dwp_finish_ctx = TRUE; |
8346 | int dw_count; |
8347 | int dw_limit; |
8348 | int isVectorUPL = 0; |
8349 | upl_t vector_upl = NULL; |
8350 | vm_object_offset_t obj_start, obj_end, obj_offset; |
8351 | kern_return_t kr = KERN_SUCCESS; |
8352 | |
8353 | // DEBUG4K_UPL("upl %p (u_offset 0x%llx u_size 0x%llx) object %p offset 0x%llx size 0x%llx error 0x%x\n", upl, (uint64_t)upl->u_offset, (uint64_t)upl->u_size, upl->map_object, (uint64_t)offset, (uint64_t)size, error); |
8354 | |
8355 | dwp_start = dwp = NULL; |
8356 | |
8357 | subupl_size = size; |
8358 | *empty = FALSE; |
8359 | |
8360 | if (upl == UPL_NULL) { |
8361 | return KERN_INVALID_ARGUMENT; |
8362 | } |
8363 | |
8364 | if ((upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES)) { |
8365 | return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, count: 0, empty); |
8366 | } |
8367 | |
8368 | dw_count = 0; |
8369 | dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); |
8370 | dwp_start = vm_page_delayed_work_get_ctx(); |
8371 | if (dwp_start == NULL) { |
8372 | dwp_start = &dw_array; |
8373 | dw_limit = 1; |
8374 | dwp_finish_ctx = FALSE; |
8375 | } |
8376 | |
8377 | dwp = dwp_start; |
8378 | |
8379 | if ((isVectorUPL = vector_upl_is_valid(upl))) { |
8380 | vector_upl = upl; |
8381 | upl_lock(vector_upl); |
8382 | } else { |
8383 | upl_lock(upl); |
8384 | } |
8385 | |
8386 | process_upl_to_abort: |
8387 | if (isVectorUPL) { |
8388 | size = subupl_size; |
8389 | offset = subupl_offset; |
8390 | if (size == 0) { |
8391 | upl_unlock(vector_upl); |
8392 | kr = KERN_SUCCESS; |
8393 | goto done; |
8394 | } |
8395 | upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size); |
8396 | if (upl == NULL) { |
8397 | upl_unlock(vector_upl); |
8398 | kr = KERN_FAILURE; |
8399 | goto done; |
8400 | } |
8401 | subupl_size -= size; |
8402 | subupl_offset += size; |
8403 | } |
8404 | |
8405 | *empty = FALSE; |
8406 | |
8407 | #if UPL_DEBUG |
8408 | if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) { |
8409 | upl->upl_commit_records[upl->upl_commit_index].c_btref = btref_get(__builtin_frame_address(0), 0); |
8410 | upl->upl_commit_records[upl->upl_commit_index].c_beg = offset; |
8411 | upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size); |
8412 | upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1; |
8413 | |
8414 | upl->upl_commit_index++; |
8415 | } |
8416 | #endif |
8417 | if (upl->flags & UPL_DEVICE_MEMORY) { |
8418 | xfer_size = 0; |
8419 | } else if ((offset + size) <= upl_adjusted_size(upl, PAGE_MASK)) { |
8420 | xfer_size = size; |
8421 | } else { |
8422 | if (!isVectorUPL) { |
8423 | upl_unlock(upl); |
8424 | } else { |
8425 | upl_unlock(vector_upl); |
8426 | } |
8427 | DEBUG4K_ERROR("upl %p (u_offset 0x%llx u_size 0x%x) offset 0x%x size 0x%x\n" , upl, upl->u_offset, upl->u_size, offset, size); |
8428 | kr = KERN_FAILURE; |
8429 | goto done; |
8430 | } |
8431 | object = upl->map_object; |
8432 | |
8433 | if (upl->flags & UPL_SHADOWED) { |
8434 | vm_object_lock(object); |
8435 | shadow_object = object->shadow; |
8436 | } else { |
8437 | shadow_object = object; |
8438 | } |
8439 | |
8440 | target_offset = (vm_object_offset_t)offset; |
8441 | |
8442 | if (upl->flags & UPL_KERNEL_OBJECT) { |
8443 | vm_object_lock_shared(shadow_object); |
8444 | } else { |
8445 | vm_object_lock(shadow_object); |
8446 | } |
8447 | |
8448 | if (upl->flags & UPL_ACCESS_BLOCKED) { |
8449 | assert(shadow_object->blocked_access); |
8450 | shadow_object->blocked_access = FALSE; |
8451 | vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED); |
8452 | } |
8453 | |
8454 | if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT)) { |
8455 | panic("upl_abort_range: kernel_object being DUMPED" ); |
8456 | } |
8457 | |
8458 | obj_start = target_offset + upl->u_offset - shadow_object->paging_offset; |
8459 | obj_end = obj_start + xfer_size; |
8460 | obj_start = vm_object_trunc_page(obj_start); |
8461 | obj_end = vm_object_round_page(obj_end); |
8462 | for (obj_offset = obj_start; |
8463 | obj_offset < obj_end; |
8464 | obj_offset += PAGE_SIZE) { |
8465 | vm_page_t t, m; |
8466 | unsigned int pg_num; |
8467 | boolean_t needed; |
8468 | |
8469 | pg_num = (unsigned int) (target_offset / PAGE_SIZE); |
8470 | assert(pg_num == target_offset / PAGE_SIZE); |
8471 | |
8472 | needed = FALSE; |
8473 | |
8474 | if (upl->flags & UPL_INTERNAL) { |
8475 | needed = upl->page_list[pg_num].needed; |
8476 | } |
8477 | |
8478 | dwp->dw_mask = 0; |
8479 | m = VM_PAGE_NULL; |
8480 | |
8481 | if (upl->flags & UPL_LITE) { |
8482 | if (bitmap_test(map: upl->lite_list, n: pg_num)) { |
8483 | bitmap_clear(map: upl->lite_list, n: pg_num); |
8484 | |
8485 | if (!(upl->flags & UPL_KERNEL_OBJECT)) { |
8486 | m = vm_page_lookup(object: shadow_object, offset: obj_offset); |
8487 | } |
8488 | } |
8489 | } |
8490 | if (upl->flags & UPL_SHADOWED) { |
8491 | if ((t = vm_page_lookup(object, offset: target_offset)) != VM_PAGE_NULL) { |
8492 | t->vmp_free_when_done = FALSE; |
8493 | |
8494 | VM_PAGE_FREE(t); |
8495 | |
8496 | if (m == VM_PAGE_NULL) { |
8497 | m = vm_page_lookup(object: shadow_object, offset: target_offset + object->vo_shadow_offset); |
8498 | } |
8499 | } |
8500 | } |
8501 | if ((upl->flags & UPL_KERNEL_OBJECT)) { |
8502 | goto abort_next_page; |
8503 | } |
8504 | |
8505 | if (m != VM_PAGE_NULL) { |
8506 | assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR); |
8507 | |
8508 | if (m->vmp_absent) { |
8509 | boolean_t must_free = TRUE; |
8510 | |
8511 | /* |
8512 | * COPYOUT = FALSE case |
8513 | * check for error conditions which must |
8514 | * be passed back to the pages customer |
8515 | */ |
8516 | if (error & UPL_ABORT_RESTART) { |
8517 | m->vmp_restart = TRUE; |
8518 | m->vmp_absent = FALSE; |
8519 | m->vmp_unusual = TRUE; |
8520 | must_free = FALSE; |
8521 | } else if (error & UPL_ABORT_UNAVAILABLE) { |
8522 | m->vmp_restart = FALSE; |
8523 | m->vmp_unusual = TRUE; |
8524 | must_free = FALSE; |
8525 | } else if (error & UPL_ABORT_ERROR) { |
8526 | m->vmp_restart = FALSE; |
8527 | m->vmp_absent = FALSE; |
8528 | m->vmp_error = TRUE; |
8529 | m->vmp_unusual = TRUE; |
8530 | must_free = FALSE; |
8531 | } |
8532 | if (m->vmp_clustered && needed == FALSE) { |
8533 | /* |
8534 | * This page was a part of a speculative |
8535 | * read-ahead initiated by the kernel |
8536 | * itself. No one is expecting this |
8537 | * page and no one will clean up its |
8538 | * error state if it ever becomes valid |
8539 | * in the future. |
8540 | * We have to free it here. |
8541 | */ |
8542 | must_free = TRUE; |
8543 | } |
8544 | m->vmp_cleaning = FALSE; |
8545 | |
8546 | if (m->vmp_overwriting && !m->vmp_busy) { |
8547 | /* |
8548 | * this shouldn't happen since |
8549 | * this is an 'absent' page, but |
8550 | * it doesn't hurt to check for |
8551 | * the 'alternate' method of |
8552 | * stabilizing the page... |
8553 | * we will mark 'busy' to be cleared |
8554 | * in the following code which will |
8555 | * take care of the primary stabilzation |
8556 | * method (i.e. setting 'busy' to TRUE) |
8557 | */ |
8558 | dwp->dw_mask |= DW_vm_page_unwire; |
8559 | } |
8560 | m->vmp_overwriting = FALSE; |
8561 | |
8562 | dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); |
8563 | |
8564 | if (must_free == TRUE) { |
8565 | dwp->dw_mask |= DW_vm_page_free; |
8566 | } else { |
8567 | dwp->dw_mask |= DW_vm_page_activate; |
8568 | } |
8569 | } else { |
8570 | /* |
8571 | * Handle the trusted pager throttle. |
8572 | */ |
8573 | if (m->vmp_laundry) { |
8574 | dwp->dw_mask |= DW_vm_pageout_throttle_up; |
8575 | } |
8576 | |
8577 | if (upl->flags & UPL_ACCESS_BLOCKED) { |
8578 | /* |
8579 | * We blocked access to the pages in this UPL. |
8580 | * Clear the "busy" bit and wake up any waiter |
8581 | * for this page. |
8582 | */ |
8583 | dwp->dw_mask |= DW_clear_busy; |
8584 | } |
8585 | if (m->vmp_overwriting) { |
8586 | if (m->vmp_busy) { |
8587 | dwp->dw_mask |= DW_clear_busy; |
8588 | } else { |
8589 | /* |
8590 | * deal with the 'alternate' method |
8591 | * of stabilizing the page... |
8592 | * we will either free the page |
8593 | * or mark 'busy' to be cleared |
8594 | * in the following code which will |
8595 | * take care of the primary stabilzation |
8596 | * method (i.e. setting 'busy' to TRUE) |
8597 | */ |
8598 | dwp->dw_mask |= DW_vm_page_unwire; |
8599 | } |
8600 | m->vmp_overwriting = FALSE; |
8601 | } |
8602 | m->vmp_free_when_done = FALSE; |
8603 | m->vmp_cleaning = FALSE; |
8604 | |
8605 | if (error & UPL_ABORT_DUMP_PAGES) { |
8606 | pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m)); |
8607 | |
8608 | dwp->dw_mask |= DW_vm_page_free; |
8609 | } else { |
8610 | if (!(dwp->dw_mask & DW_vm_page_unwire)) { |
8611 | if (error & UPL_ABORT_REFERENCE) { |
8612 | /* |
8613 | * we've been told to explictly |
8614 | * reference this page... for |
8615 | * file I/O, this is done by |
8616 | * implementing an LRU on the inactive q |
8617 | */ |
8618 | dwp->dw_mask |= DW_vm_page_lru; |
8619 | } else if (!VM_PAGE_PAGEABLE(m)) { |
8620 | dwp->dw_mask |= DW_vm_page_deactivate_internal; |
8621 | } |
8622 | } |
8623 | dwp->dw_mask |= DW_PAGE_WAKEUP; |
8624 | } |
8625 | } |
8626 | } |
8627 | abort_next_page: |
8628 | target_offset += PAGE_SIZE_64; |
8629 | xfer_size -= PAGE_SIZE; |
8630 | |
8631 | if (dwp->dw_mask) { |
8632 | if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) { |
8633 | VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count); |
8634 | |
8635 | if (dw_count >= dw_limit) { |
8636 | vm_page_do_delayed_work(object: shadow_object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count); |
8637 | |
8638 | dwp = dwp_start; |
8639 | dw_count = 0; |
8640 | } |
8641 | } else { |
8642 | if (dwp->dw_mask & DW_clear_busy) { |
8643 | m->vmp_busy = FALSE; |
8644 | } |
8645 | |
8646 | if (dwp->dw_mask & DW_PAGE_WAKEUP) { |
8647 | PAGE_WAKEUP(m); |
8648 | } |
8649 | } |
8650 | } |
8651 | } |
8652 | if (dw_count) { |
8653 | vm_page_do_delayed_work(object: shadow_object, VM_KERN_MEMORY_NONE, dwp: dwp_start, dw_count); |
8654 | dwp = dwp_start; |
8655 | dw_count = 0; |
8656 | } |
8657 | |
8658 | if (upl->flags & UPL_DEVICE_MEMORY) { |
8659 | occupied = 0; |
8660 | } else if (upl->flags & UPL_LITE) { |
8661 | uint32_t pages = (uint32_t)atop(upl_adjusted_size(upl, PAGE_MASK)); |
8662 | |
8663 | occupied = !bitmap_is_empty(map: upl->lite_list, nbits: pages); |
8664 | } else { |
8665 | occupied = !vm_page_queue_empty(&upl->map_object->memq); |
8666 | } |
8667 | if (occupied == 0) { |
8668 | /* |
8669 | * If this UPL element belongs to a Vector UPL and is |
8670 | * empty, then this is the right function to deallocate |
8671 | * it. So go ahead set the *empty variable. The flag |
8672 | * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view |
8673 | * should be considered relevant for the Vector UPL and |
8674 | * not the internal UPLs. |
8675 | */ |
8676 | if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL) { |
8677 | *empty = TRUE; |
8678 | } |
8679 | |
8680 | if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) { |
8681 | /* |
8682 | * this is not a paging object |
8683 | * so we need to drop the paging reference |
8684 | * that was taken when we created the UPL |
8685 | * against this object |
8686 | */ |
8687 | vm_object_activity_end(shadow_object); |
8688 | vm_object_collapse(object: shadow_object, offset: 0, TRUE); |
8689 | } else { |
8690 | /* |
8691 | * we dontated the paging reference to |
8692 | * the map object... vm_pageout_object_terminate |
8693 | * will drop this reference |
8694 | */ |
8695 | } |
8696 | } |
8697 | vm_object_unlock(shadow_object); |
8698 | if (object != shadow_object) { |
8699 | vm_object_unlock(object); |
8700 | } |
8701 | |
8702 | if (!isVectorUPL) { |
8703 | upl_unlock(upl); |
8704 | } else { |
8705 | /* |
8706 | * If we completed our operations on an UPL that is |
8707 | * part of a Vectored UPL and if empty is TRUE, then |
8708 | * we should go ahead and deallocate this UPL element. |
8709 | * Then we check if this was the last of the UPL elements |
8710 | * within that Vectored UPL. If so, set empty to TRUE |
8711 | * so that in ubc_upl_abort_range or ubc_upl_abort, we |
8712 | * can go ahead and deallocate the Vector UPL too. |
8713 | */ |
8714 | if (*empty == TRUE) { |
8715 | *empty = vector_upl_set_subupl(vector_upl, upl, 0); |
8716 | upl_deallocate(upl); |
8717 | } |
8718 | goto process_upl_to_abort; |
8719 | } |
8720 | |
8721 | kr = KERN_SUCCESS; |
8722 | |
8723 | done: |
8724 | if (dwp_start && dwp_finish_ctx) { |
8725 | vm_page_delayed_work_finish_ctx(dwp: dwp_start); |
8726 | dwp_start = dwp = NULL; |
8727 | } |
8728 | |
8729 | return kr; |
8730 | } |
8731 | |
8732 | |
8733 | kern_return_t |
8734 | upl_abort( |
8735 | upl_t upl, |
8736 | int error) |
8737 | { |
8738 | boolean_t empty; |
8739 | |
8740 | if (upl == UPL_NULL) { |
8741 | return KERN_INVALID_ARGUMENT; |
8742 | } |
8743 | |
8744 | return upl_abort_range(upl, offset: 0, size: upl->u_size, error, empty: &empty); |
8745 | } |
8746 | |
8747 | |
8748 | /* an option on commit should be wire */ |
8749 | kern_return_t |
8750 | upl_commit( |
8751 | upl_t upl, |
8752 | upl_page_info_t *page_list, |
8753 | mach_msg_type_number_t count) |
8754 | { |
8755 | boolean_t empty; |
8756 | |
8757 | if (upl == UPL_NULL) { |
8758 | return KERN_INVALID_ARGUMENT; |
8759 | } |
8760 | |
8761 | return upl_commit_range(upl, offset: 0, size: upl->u_size, flags: 0, |
8762 | page_list, count, empty: &empty); |
8763 | } |
8764 | |
8765 | |
8766 | void |
8767 | iopl_valid_data( |
8768 | upl_t upl, |
8769 | vm_tag_t tag) |
8770 | { |
8771 | vm_object_t object; |
8772 | vm_offset_t offset; |
8773 | vm_page_t m, nxt_page = VM_PAGE_NULL; |
8774 | upl_size_t size; |
8775 | int wired_count = 0; |
8776 | |
8777 | if (upl == NULL) { |
8778 | panic("iopl_valid_data: NULL upl" ); |
8779 | } |
8780 | if (vector_upl_is_valid(upl)) { |
8781 | panic("iopl_valid_data: vector upl" ); |
8782 | } |
8783 | if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_SHADOWED | UPL_ACCESS_BLOCKED | UPL_IO_WIRE | UPL_INTERNAL)) != UPL_IO_WIRE) { |
8784 | panic("iopl_valid_data: unsupported upl, flags = %x" , upl->flags); |
8785 | } |
8786 | |
8787 | object = upl->map_object; |
8788 | |
8789 | if (is_kernel_object(object) || object == compressor_object) { |
8790 | panic("iopl_valid_data: object == kernel or compressor" ); |
8791 | } |
8792 | |
8793 | if (object->purgable == VM_PURGABLE_VOLATILE || |
8794 | object->purgable == VM_PURGABLE_EMPTY) { |
8795 | panic("iopl_valid_data: object %p purgable %d" , |
8796 | object, object->purgable); |
8797 | } |
8798 | |
8799 | size = upl_adjusted_size(upl, PAGE_MASK); |
8800 | |
8801 | vm_object_lock(object); |
8802 | VM_OBJECT_WIRED_PAGE_UPDATE_START(object); |
8803 | |
8804 | bool whole_object; |
8805 | |
8806 | if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE)) { |
8807 | nxt_page = (vm_page_t)vm_page_queue_first(&object->memq); |
8808 | whole_object = true; |
8809 | } else { |
8810 | offset = (vm_offset_t)(upl_adjusted_offset(upl, PAGE_MASK) - object->paging_offset); |
8811 | whole_object = false; |
8812 | } |
8813 | |
8814 | while (size) { |
8815 | if (whole_object) { |
8816 | if (nxt_page != VM_PAGE_NULL) { |
8817 | m = nxt_page; |
8818 | nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq); |
8819 | } |
8820 | } else { |
8821 | m = vm_page_lookup(object, offset); |
8822 | offset += PAGE_SIZE; |
8823 | |
8824 | if (m == VM_PAGE_NULL) { |
8825 | panic("iopl_valid_data: missing expected page at offset %lx" , (long)offset); |
8826 | } |
8827 | } |
8828 | if (m->vmp_busy) { |
8829 | if (!m->vmp_absent) { |
8830 | panic("iopl_valid_data: busy page w/o absent" ); |
8831 | } |
8832 | |
8833 | if (m->vmp_pageq.next || m->vmp_pageq.prev) { |
8834 | panic("iopl_valid_data: busy+absent page on page queue" ); |
8835 | } |
8836 | if (m->vmp_reusable) { |
8837 | panic("iopl_valid_data: %p is reusable" , m); |
8838 | } |
8839 | |
8840 | m->vmp_absent = FALSE; |
8841 | m->vmp_dirty = TRUE; |
8842 | assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q); |
8843 | assert(m->vmp_wire_count == 0); |
8844 | m->vmp_wire_count++; |
8845 | assert(m->vmp_wire_count); |
8846 | if (m->vmp_wire_count == 1) { |
8847 | m->vmp_q_state = VM_PAGE_IS_WIRED; |
8848 | wired_count++; |
8849 | } else { |
8850 | panic("iopl_valid_data: %p already wired" , m); |
8851 | } |
8852 | |
8853 | PAGE_WAKEUP_DONE(m); |
8854 | } |
8855 | size -= PAGE_SIZE; |
8856 | } |
8857 | if (wired_count) { |
8858 | VM_OBJECT_WIRED_PAGE_COUNT(object, wired_count); |
8859 | assert(object->resident_page_count >= object->wired_page_count); |
8860 | |
8861 | /* no need to adjust purgeable accounting for this object: */ |
8862 | assert(object->purgable != VM_PURGABLE_VOLATILE); |
8863 | assert(object->purgable != VM_PURGABLE_EMPTY); |
8864 | |
8865 | vm_page_lockspin_queues(); |
8866 | vm_page_wire_count += wired_count; |
8867 | vm_page_unlock_queues(); |
8868 | } |
8869 | VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag); |
8870 | vm_object_unlock(object); |
8871 | } |
8872 | |
8873 | |
8874 | void |
8875 | vm_object_set_pmap_cache_attr( |
8876 | vm_object_t object, |
8877 | upl_page_info_array_t user_page_list, |
8878 | unsigned int num_pages, |
8879 | boolean_t batch_pmap_op) |
8880 | { |
8881 | unsigned int cache_attr = 0; |
8882 | |
8883 | cache_attr = object->wimg_bits & VM_WIMG_MASK; |
8884 | assert(user_page_list); |
8885 | if (cache_attr != VM_WIMG_USE_DEFAULT) { |
8886 | PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op); |
8887 | } |
8888 | } |
8889 | |
8890 | |
8891 | static bool |
8892 | vm_object_iopl_wire_full( |
8893 | vm_object_t object, |
8894 | upl_t upl, |
8895 | upl_page_info_array_t user_page_list, |
8896 | upl_control_flags_t cntrl_flags, |
8897 | vm_tag_t tag) |
8898 | { |
8899 | vm_page_t dst_page; |
8900 | unsigned int entry; |
8901 | int page_count; |
8902 | int delayed_unlock = 0; |
8903 | boolean_t retval = TRUE; |
8904 | ppnum_t phys_page; |
8905 | |
8906 | vm_object_lock_assert_exclusive(object); |
8907 | assert(object->purgable != VM_PURGABLE_VOLATILE); |
8908 | assert(object->purgable != VM_PURGABLE_EMPTY); |
8909 | assert(object->pager == NULL); |
8910 | assert(object->vo_copy == NULL); |
8911 | assert(object->shadow == NULL); |
8912 | |
8913 | page_count = object->resident_page_count; |
8914 | dst_page = (vm_page_t)vm_page_queue_first(&object->memq); |
8915 | |
8916 | vm_page_lock_queues(); |
8917 | |
8918 | while (page_count--) { |
8919 | if (dst_page->vmp_busy || |
8920 | dst_page->vmp_fictitious || |
8921 | dst_page->vmp_absent || |
8922 | VMP_ERROR_GET(dst_page) || |
8923 | dst_page->vmp_cleaning || |
8924 | dst_page->vmp_restart || |
8925 | dst_page->vmp_laundry) { |
8926 | retval = FALSE; |
8927 | goto done; |
8928 | } |
8929 | if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) { |
8930 | retval = FALSE; |
8931 | goto done; |
8932 | } |
8933 | dst_page->vmp_reference = TRUE; |
8934 | |
8935 | vm_page_wire(page: dst_page, tag, FALSE); |
8936 | |
8937 | if (!(cntrl_flags & UPL_COPYOUT_FROM)) { |
8938 | SET_PAGE_DIRTY(dst_page, FALSE); |
8939 | } |
8940 | entry = (unsigned int)(dst_page->vmp_offset / PAGE_SIZE); |
8941 | assert(entry >= 0 && entry < object->resident_page_count); |
8942 | bitmap_set(map: upl->lite_list, n: entry); |
8943 | |
8944 | phys_page = VM_PAGE_GET_PHYS_PAGE(m: dst_page); |
8945 | |
8946 | if (phys_page > upl->highest_page) { |
8947 | upl->highest_page = phys_page; |
8948 | } |
8949 | |
8950 | if (user_page_list) { |
8951 | user_page_list[entry].phys_addr = phys_page; |
8952 | user_page_list[entry].absent = dst_page->vmp_absent; |
8953 | user_page_list[entry].dirty = dst_page->vmp_dirty; |
8954 | user_page_list[entry].free_when_done = dst_page->vmp_free_when_done; |
8955 | user_page_list[entry].precious = dst_page->vmp_precious; |
8956 | user_page_list[entry].device = FALSE; |
8957 | user_page_list[entry].speculative = FALSE; |
8958 | user_page_list[entry].cs_validated = FALSE; |
8959 | user_page_list[entry].cs_tainted = FALSE; |
8960 | user_page_list[entry].cs_nx = FALSE; |
8961 | user_page_list[entry].needed = FALSE; |
8962 | user_page_list[entry].mark = FALSE; |
8963 | } |
8964 | if (delayed_unlock++ > 256) { |
8965 | delayed_unlock = 0; |
8966 | lck_mtx_yield(lck: &vm_page_queue_lock); |
8967 | |
8968 | VM_CHECK_MEMORYSTATUS; |
8969 | } |
8970 | dst_page = (vm_page_t)vm_page_queue_next(&dst_page->vmp_listq); |
8971 | } |
8972 | done: |
8973 | vm_page_unlock_queues(); |
8974 | |
8975 | VM_CHECK_MEMORYSTATUS; |
8976 | |
8977 | return retval; |
8978 | } |
8979 | |
8980 | |
8981 | static kern_return_t |
8982 | vm_object_iopl_wire_empty( |
8983 | vm_object_t object, |
8984 | upl_t upl, |
8985 | upl_page_info_array_t user_page_list, |
8986 | upl_control_flags_t cntrl_flags, |
8987 | vm_tag_t tag, |
8988 | vm_object_offset_t *dst_offset, |
8989 | int page_count, |
8990 | int *page_grab_count) |
8991 | { |
8992 | vm_page_t dst_page; |
8993 | boolean_t no_zero_fill = FALSE; |
8994 | int interruptible; |
8995 | int pages_wired = 0; |
8996 | int pages_inserted = 0; |
8997 | int entry = 0; |
8998 | uint64_t delayed_ledger_update = 0; |
8999 | kern_return_t ret = KERN_SUCCESS; |
9000 | int grab_options; |
9001 | ppnum_t phys_page; |
9002 | |
9003 | vm_object_lock_assert_exclusive(object); |
9004 | assert(object->purgable != VM_PURGABLE_VOLATILE); |
9005 | assert(object->purgable != VM_PURGABLE_EMPTY); |
9006 | assert(object->pager == NULL); |
9007 | assert(object->vo_copy == NULL); |
9008 | assert(object->shadow == NULL); |
9009 | |
9010 | if (cntrl_flags & UPL_SET_INTERRUPTIBLE) { |
9011 | interruptible = THREAD_ABORTSAFE; |
9012 | } else { |
9013 | interruptible = THREAD_UNINT; |
9014 | } |
9015 | |
9016 | if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO)) { |
9017 | no_zero_fill = TRUE; |
9018 | } |
9019 | |
9020 | grab_options = 0; |
9021 | #if CONFIG_SECLUDED_MEMORY |
9022 | if (object->can_grab_secluded) { |
9023 | grab_options |= VM_PAGE_GRAB_SECLUDED; |
9024 | } |
9025 | #endif /* CONFIG_SECLUDED_MEMORY */ |
9026 | |
9027 | while (page_count--) { |
9028 | while ((dst_page = vm_page_grab_options(flags: grab_options)) |
9029 | == VM_PAGE_NULL) { |
9030 | OSAddAtomic(page_count, &vm_upl_wait_for_pages); |
9031 | |
9032 | VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0); |
9033 | |
9034 | if (vm_page_wait(interruptible) == FALSE) { |
9035 | /* |
9036 | * interrupted case |
9037 | */ |
9038 | OSAddAtomic(-page_count, &vm_upl_wait_for_pages); |
9039 | |
9040 | VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1); |
9041 | |
9042 | ret = MACH_SEND_INTERRUPTED; |
9043 | goto done; |
9044 | } |
9045 | OSAddAtomic(-page_count, &vm_upl_wait_for_pages); |
9046 | |
9047 | VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0); |
9048 | } |
9049 | if (no_zero_fill == FALSE) { |
9050 | vm_page_zero_fill(page: dst_page); |
9051 | } else { |
9052 | dst_page->vmp_absent = TRUE; |
9053 | } |
9054 | |
9055 | dst_page->vmp_reference = TRUE; |
9056 | |
9057 | if (!(cntrl_flags & UPL_COPYOUT_FROM)) { |
9058 | SET_PAGE_DIRTY(dst_page, FALSE); |
9059 | } |
9060 | if (dst_page->vmp_absent == FALSE) { |
9061 | assert(dst_page->vmp_q_state == VM_PAGE_NOT_ON_Q); |
9062 | assert(dst_page->vmp_wire_count == 0); |
9063 | dst_page->vmp_wire_count++; |
9064 | dst_page->vmp_q_state = VM_PAGE_IS_WIRED; |
9065 | assert(dst_page->vmp_wire_count); |
9066 | pages_wired++; |
9067 | PAGE_WAKEUP_DONE(dst_page); |
9068 | } |
9069 | pages_inserted++; |
9070 | |
9071 | vm_page_insert_internal(page: dst_page, object, offset: *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, delayed_ledger_update: &delayed_ledger_update); |
9072 | |
9073 | bitmap_set(map: upl->lite_list, n: entry); |
9074 | |
9075 | phys_page = VM_PAGE_GET_PHYS_PAGE(m: dst_page); |
9076 | |
9077 | if (phys_page > upl->highest_page) { |
9078 | upl->highest_page = phys_page; |
9079 | } |
9080 | |
9081 | if (user_page_list) { |
9082 | user_page_list[entry].phys_addr = phys_page; |
9083 | user_page_list[entry].absent = dst_page->vmp_absent; |
9084 | user_page_list[entry].dirty = dst_page->vmp_dirty; |
9085 | user_page_list[entry].free_when_done = FALSE; |
9086 | user_page_list[entry].precious = FALSE; |
9087 | user_page_list[entry].device = FALSE; |
9088 | user_page_list[entry].speculative = FALSE; |
9089 | user_page_list[entry].cs_validated = FALSE; |
9090 | user_page_list[entry].cs_tainted = FALSE; |
9091 | user_page_list[entry].cs_nx = FALSE; |
9092 | user_page_list[entry].needed = FALSE; |
9093 | user_page_list[entry].mark = FALSE; |
9094 | } |
9095 | entry++; |
9096 | *dst_offset += PAGE_SIZE_64; |
9097 | } |
9098 | done: |
9099 | if (pages_wired) { |
9100 | vm_page_lockspin_queues(); |
9101 | vm_page_wire_count += pages_wired; |
9102 | vm_page_unlock_queues(); |
9103 | } |
9104 | if (pages_inserted) { |
9105 | if (object->internal) { |
9106 | OSAddAtomic(pages_inserted, &vm_page_internal_count); |
9107 | } else { |
9108 | OSAddAtomic(pages_inserted, &vm_page_external_count); |
9109 | } |
9110 | } |
9111 | if (delayed_ledger_update) { |
9112 | task_t owner; |
9113 | int ledger_idx_volatile; |
9114 | int ledger_idx_nonvolatile; |
9115 | int ledger_idx_volatile_compressed; |
9116 | int ledger_idx_nonvolatile_compressed; |
9117 | boolean_t ; |
9118 | |
9119 | owner = VM_OBJECT_OWNER(object); |
9120 | assert(owner); |
9121 | |
9122 | vm_object_ledger_tag_ledgers(object, |
9123 | ledger_idx_volatile: &ledger_idx_volatile, |
9124 | ledger_idx_nonvolatile: &ledger_idx_nonvolatile, |
9125 | ledger_idx_volatile_compressed: &ledger_idx_volatile_compressed, |
9126 | ledger_idx_nonvolatile_compressed: &ledger_idx_nonvolatile_compressed, |
9127 | do_footprint: &do_footprint); |
9128 | |
9129 | /* more non-volatile bytes */ |
9130 | ledger_credit(ledger: owner->ledger, |
9131 | entry: ledger_idx_nonvolatile, |
9132 | amount: delayed_ledger_update); |
9133 | if (do_footprint) { |
9134 | /* more footprint */ |
9135 | ledger_credit(ledger: owner->ledger, |
9136 | entry: task_ledgers.phys_footprint, |
9137 | amount: delayed_ledger_update); |
9138 | } |
9139 | } |
9140 | |
9141 | assert(page_grab_count); |
9142 | *page_grab_count = pages_inserted; |
9143 | |
9144 | return ret; |
9145 | } |
9146 | |
9147 | |
9148 | |
9149 | kern_return_t |
9150 | vm_object_iopl_request( |
9151 | vm_object_t object, |
9152 | vm_object_offset_t offset, |
9153 | upl_size_t size, |
9154 | upl_t *upl_ptr, |
9155 | upl_page_info_array_t user_page_list, |
9156 | unsigned int *page_list_count, |
9157 | upl_control_flags_t cntrl_flags, |
9158 | vm_tag_t tag) |
9159 | { |
9160 | vm_page_t dst_page; |
9161 | vm_object_offset_t dst_offset; |
9162 | upl_size_t xfer_size; |
9163 | upl_t upl = NULL; |
9164 | unsigned int entry; |
9165 | int no_zero_fill = FALSE; |
9166 | unsigned int size_in_pages; |
9167 | int page_grab_count = 0; |
9168 | u_int32_t psize; |
9169 | kern_return_t ret; |
9170 | vm_prot_t prot; |
9171 | struct vm_object_fault_info fault_info = {}; |
9172 | struct vm_page_delayed_work dw_array; |
9173 | struct vm_page_delayed_work *dwp, *dwp_start; |
9174 | bool dwp_finish_ctx = TRUE; |
9175 | int dw_count; |
9176 | int dw_limit; |
9177 | int dw_index; |
9178 | boolean_t caller_lookup; |
9179 | int io_tracking_flag = 0; |
9180 | int interruptible; |
9181 | ppnum_t phys_page; |
9182 | |
9183 | boolean_t set_cache_attr_needed = FALSE; |
9184 | boolean_t free_wired_pages = FALSE; |
9185 | boolean_t fast_path_empty_req = FALSE; |
9186 | boolean_t fast_path_full_req = FALSE; |
9187 | |
9188 | #if DEVELOPMENT || DEBUG |
9189 | task_t task = current_task(); |
9190 | #endif /* DEVELOPMENT || DEBUG */ |
9191 | |
9192 | dwp_start = dwp = NULL; |
9193 | |
9194 | vm_object_offset_t original_offset = offset; |
9195 | upl_size_t original_size = size; |
9196 | |
9197 | // DEBUG4K_UPL("object %p offset 0x%llx size 0x%llx cntrl_flags 0x%llx\n", object, (uint64_t)offset, (uint64_t)size, cntrl_flags); |
9198 | |
9199 | size = (upl_size_t)(vm_object_round_page(offset + size) - vm_object_trunc_page(offset)); |
9200 | offset = vm_object_trunc_page(offset); |
9201 | if (size != original_size || offset != original_offset) { |
9202 | DEBUG4K_IOKIT("flags 0x%llx object %p offset 0x%llx size 0x%x -> offset 0x%llx size 0x%x\n" , cntrl_flags, object, original_offset, original_size, offset, size); |
9203 | } |
9204 | |
9205 | if (cntrl_flags & ~UPL_VALID_FLAGS) { |
9206 | /* |
9207 | * For forward compatibility's sake, |
9208 | * reject any unknown flag. |
9209 | */ |
9210 | return KERN_INVALID_VALUE; |
9211 | } |
9212 | if (vm_lopage_needed == FALSE) { |
9213 | cntrl_flags &= ~UPL_NEED_32BIT_ADDR; |
9214 | } |
9215 | |
9216 | if (cntrl_flags & UPL_NEED_32BIT_ADDR) { |
9217 | if ((cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE)) { |
9218 | return KERN_INVALID_VALUE; |
9219 | } |
9220 | |
9221 | if (object->phys_contiguous) { |
9222 | if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address) { |
9223 | return KERN_INVALID_ADDRESS; |
9224 | } |
9225 | |
9226 | if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address) { |
9227 | return KERN_INVALID_ADDRESS; |
9228 | } |
9229 | } |
9230 | } |
9231 | if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO)) { |
9232 | no_zero_fill = TRUE; |
9233 | } |
9234 | |
9235 | if (cntrl_flags & UPL_COPYOUT_FROM) { |
9236 | prot = VM_PROT_READ; |
9237 | } else { |
9238 | prot = VM_PROT_READ | VM_PROT_WRITE; |
9239 | } |
9240 | |
9241 | if ((!object->internal) && (object->paging_offset != 0)) { |
9242 | panic("vm_object_iopl_request: external object with non-zero paging offset" ); |
9243 | } |
9244 | |
9245 | |
9246 | VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, prot, 0); |
9247 | |
9248 | #if CONFIG_IOSCHED || UPL_DEBUG |
9249 | if ((object->io_tracking && !is_kernel_object(object)) || upl_debug_enabled) { |
9250 | io_tracking_flag |= UPL_CREATE_IO_TRACKING; |
9251 | } |
9252 | #endif |
9253 | |
9254 | #if CONFIG_IOSCHED |
9255 | if (object->io_tracking) { |
9256 | /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */ |
9257 | if (!is_kernel_object(object)) { |
9258 | io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP; |
9259 | } |
9260 | } |
9261 | #endif |
9262 | |
9263 | if (object->phys_contiguous) { |
9264 | psize = PAGE_SIZE; |
9265 | } else { |
9266 | psize = size; |
9267 | |
9268 | dw_count = 0; |
9269 | dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); |
9270 | dwp_start = vm_page_delayed_work_get_ctx(); |
9271 | if (dwp_start == NULL) { |
9272 | dwp_start = &dw_array; |
9273 | dw_limit = 1; |
9274 | dwp_finish_ctx = FALSE; |
9275 | } |
9276 | |
9277 | dwp = dwp_start; |
9278 | } |
9279 | |
9280 | if (cntrl_flags & UPL_SET_INTERNAL) { |
9281 | upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, size: psize); |
9282 | user_page_list = size ? upl->page_list : NULL; |
9283 | } else { |
9284 | upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, size: psize); |
9285 | } |
9286 | if (user_page_list) { |
9287 | user_page_list[0].device = FALSE; |
9288 | } |
9289 | *upl_ptr = upl; |
9290 | |
9291 | if (cntrl_flags & UPL_NOZEROFILLIO) { |
9292 | DTRACE_VM4(upl_nozerofillio, |
9293 | vm_object_t, object, |
9294 | vm_object_offset_t, offset, |
9295 | upl_size_t, size, |
9296 | upl_t, upl); |
9297 | } |
9298 | |
9299 | upl->map_object = object; |
9300 | upl->u_offset = original_offset; |
9301 | upl->u_size = original_size; |
9302 | |
9303 | size_in_pages = size / PAGE_SIZE; |
9304 | |
9305 | if (is_kernel_object(object) && |
9306 | !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) { |
9307 | upl->flags |= UPL_KERNEL_OBJECT; |
9308 | #if UPL_DEBUG |
9309 | vm_object_lock(object); |
9310 | #else |
9311 | vm_object_lock_shared(object); |
9312 | #endif |
9313 | } else { |
9314 | vm_object_lock(object); |
9315 | vm_object_activity_begin(object); |
9316 | } |
9317 | /* |
9318 | * paging in progress also protects the paging_offset |
9319 | */ |
9320 | upl->u_offset = original_offset + object->paging_offset; |
9321 | |
9322 | if (cntrl_flags & UPL_BLOCK_ACCESS) { |
9323 | /* |
9324 | * The user requested that access to the pages in this UPL |
9325 | * be blocked until the UPL is commited or aborted. |
9326 | */ |
9327 | upl->flags |= UPL_ACCESS_BLOCKED; |
9328 | } |
9329 | |
9330 | #if CONFIG_IOSCHED || UPL_DEBUG |
9331 | if ((upl->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) { |
9332 | vm_object_activity_begin(object); |
9333 | queue_enter(&object->uplq, upl, upl_t, uplq); |
9334 | } |
9335 | #endif |
9336 | |
9337 | if (object->phys_contiguous) { |
9338 | if (upl->flags & UPL_ACCESS_BLOCKED) { |
9339 | assert(!object->blocked_access); |
9340 | object->blocked_access = TRUE; |
9341 | } |
9342 | |
9343 | vm_object_unlock(object); |
9344 | |
9345 | /* |
9346 | * don't need any shadow mappings for this one |
9347 | * since it is already I/O memory |
9348 | */ |
9349 | upl->flags |= UPL_DEVICE_MEMORY; |
9350 | |
9351 | upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1) >> PAGE_SHIFT); |
9352 | |
9353 | if (user_page_list) { |
9354 | user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset) >> PAGE_SHIFT); |
9355 | user_page_list[0].device = TRUE; |
9356 | } |
9357 | if (page_list_count != NULL) { |
9358 | if (upl->flags & UPL_INTERNAL) { |
9359 | *page_list_count = 0; |
9360 | } else { |
9361 | *page_list_count = 1; |
9362 | } |
9363 | } |
9364 | |
9365 | VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0); |
9366 | #if DEVELOPMENT || DEBUG |
9367 | if (task != NULL) { |
9368 | ledger_credit(task->ledger, task_ledgers.pages_grabbed_iopl, page_grab_count); |
9369 | } |
9370 | #endif /* DEVELOPMENT || DEBUG */ |
9371 | return KERN_SUCCESS; |
9372 | } |
9373 | if (!is_kernel_object(object) && object != compressor_object) { |
9374 | /* |
9375 | * Protect user space from future COW operations |
9376 | */ |
9377 | #if VM_OBJECT_TRACKING_OP_TRUESHARE |
9378 | if (!object->true_share && |
9379 | vm_object_tracking_btlog) { |
9380 | btlog_record(vm_object_tracking_btlog, object, |
9381 | VM_OBJECT_TRACKING_OP_TRUESHARE, |
9382 | btref_get(__builtin_frame_address(0), 0)); |
9383 | } |
9384 | #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ |
9385 | |
9386 | vm_object_lock_assert_exclusive(object); |
9387 | VM_OBJECT_SET_TRUE_SHARE(object, TRUE); |
9388 | |
9389 | if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) { |
9390 | object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; |
9391 | } |
9392 | } |
9393 | |
9394 | if (!(cntrl_flags & UPL_COPYOUT_FROM) && |
9395 | object->vo_copy != VM_OBJECT_NULL) { |
9396 | /* |
9397 | * Honor copy-on-write obligations |
9398 | * |
9399 | * The caller is gathering these pages and |
9400 | * might modify their contents. We need to |
9401 | * make sure that the copy object has its own |
9402 | * private copies of these pages before we let |
9403 | * the caller modify them. |
9404 | * |
9405 | * NOTE: someone else could map the original object |
9406 | * after we've done this copy-on-write here, and they |
9407 | * could then see an inconsistent picture of the memory |
9408 | * while it's being modified via the UPL. To prevent this, |
9409 | * we would have to block access to these pages until the |
9410 | * UPL is released. We could use the UPL_BLOCK_ACCESS |
9411 | * code path for that... |
9412 | */ |
9413 | vm_object_update(object, |
9414 | offset, |
9415 | size, |
9416 | NULL, |
9417 | NULL, |
9418 | FALSE, /* should_return */ |
9419 | MEMORY_OBJECT_COPY_SYNC, |
9420 | VM_PROT_NO_CHANGE); |
9421 | VM_PAGEOUT_DEBUG(iopl_cow, 1); |
9422 | VM_PAGEOUT_DEBUG(iopl_cow_pages, (size >> PAGE_SHIFT)); |
9423 | } |
9424 | if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) && |
9425 | object->purgable != VM_PURGABLE_VOLATILE && |
9426 | object->purgable != VM_PURGABLE_EMPTY && |
9427 | object->vo_copy == NULL && |
9428 | size == object->vo_size && |
9429 | offset == 0 && |
9430 | object->shadow == NULL && |
9431 | object->pager == NULL) { |
9432 | if (object->resident_page_count == size_in_pages) { |
9433 | assert(object != compressor_object); |
9434 | assert(!is_kernel_object(object)); |
9435 | fast_path_full_req = TRUE; |
9436 | } else if (object->resident_page_count == 0) { |
9437 | assert(object != compressor_object); |
9438 | assert(!is_kernel_object(object)); |
9439 | fast_path_empty_req = TRUE; |
9440 | set_cache_attr_needed = TRUE; |
9441 | } |
9442 | } |
9443 | |
9444 | if (cntrl_flags & UPL_SET_INTERRUPTIBLE) { |
9445 | interruptible = THREAD_ABORTSAFE; |
9446 | } else { |
9447 | interruptible = THREAD_UNINT; |
9448 | } |
9449 | |
9450 | entry = 0; |
9451 | |
9452 | xfer_size = size; |
9453 | dst_offset = offset; |
9454 | |
9455 | if (fast_path_full_req) { |
9456 | if (vm_object_iopl_wire_full(object, upl, user_page_list, cntrl_flags, tag) == TRUE) { |
9457 | goto finish; |
9458 | } |
9459 | /* |
9460 | * we couldn't complete the processing of this request on the fast path |
9461 | * so fall through to the slow path and finish up |
9462 | */ |
9463 | } else if (fast_path_empty_req) { |
9464 | if (cntrl_flags & UPL_REQUEST_NO_FAULT) { |
9465 | ret = KERN_MEMORY_ERROR; |
9466 | goto return_err; |
9467 | } |
9468 | ret = vm_object_iopl_wire_empty(object, upl, user_page_list, |
9469 | cntrl_flags, tag, dst_offset: &dst_offset, page_count: size_in_pages, page_grab_count: &page_grab_count); |
9470 | |
9471 | if (ret) { |
9472 | free_wired_pages = TRUE; |
9473 | goto return_err; |
9474 | } |
9475 | goto finish; |
9476 | } |
9477 | |
9478 | fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; |
9479 | fault_info.lo_offset = offset; |
9480 | fault_info.hi_offset = offset + xfer_size; |
9481 | fault_info.mark_zf_absent = TRUE; |
9482 | fault_info.interruptible = interruptible; |
9483 | fault_info.batch_pmap_op = TRUE; |
9484 | |
9485 | while (xfer_size) { |
9486 | vm_fault_return_t result; |
9487 | |
9488 | dwp->dw_mask = 0; |
9489 | |
9490 | if (fast_path_full_req) { |
9491 | /* |
9492 | * if we get here, it means that we ran into a page |
9493 | * state we couldn't handle in the fast path and |
9494 | * bailed out to the slow path... since the order |
9495 | * we look at pages is different between the 2 paths, |
9496 | * the following check is needed to determine whether |
9497 | * this page was already processed in the fast path |
9498 | */ |
9499 | if (bitmap_test(map: upl->lite_list, n: entry)) { |
9500 | goto skip_page; |
9501 | } |
9502 | } |
9503 | dst_page = vm_page_lookup(object, offset: dst_offset); |
9504 | |
9505 | if (dst_page == VM_PAGE_NULL || |
9506 | dst_page->vmp_busy || |
9507 | VMP_ERROR_GET(dst_page) || |
9508 | dst_page->vmp_restart || |
9509 | dst_page->vmp_absent || |
9510 | dst_page->vmp_fictitious) { |
9511 | if (is_kernel_object(object)) { |
9512 | panic("vm_object_iopl_request: missing/bad page in kernel object" ); |
9513 | } |
9514 | if (object == compressor_object) { |
9515 | panic("vm_object_iopl_request: missing/bad page in compressor object" ); |
9516 | } |
9517 | |
9518 | if (cntrl_flags & UPL_REQUEST_NO_FAULT) { |
9519 | ret = KERN_MEMORY_ERROR; |
9520 | goto return_err; |
9521 | } |
9522 | set_cache_attr_needed = TRUE; |
9523 | |
9524 | /* |
9525 | * We just looked up the page and the result remains valid |
9526 | * until the object lock is release, so send it to |
9527 | * vm_fault_page() (as "dst_page"), to avoid having to |
9528 | * look it up again there. |
9529 | */ |
9530 | caller_lookup = TRUE; |
9531 | |
9532 | do { |
9533 | vm_page_t top_page; |
9534 | kern_return_t error_code; |
9535 | |
9536 | fault_info.cluster_size = xfer_size; |
9537 | |
9538 | vm_object_paging_begin(object); |
9539 | |
9540 | result = vm_fault_page(first_object: object, first_offset: dst_offset, |
9541 | fault_type: prot | VM_PROT_WRITE, FALSE, |
9542 | caller_lookup, |
9543 | protection: &prot, result_page: &dst_page, top_page: &top_page, |
9544 | type_of_fault: (int *)0, |
9545 | error_code: &error_code, no_zero_fill, |
9546 | fault_info: &fault_info); |
9547 | |
9548 | /* our lookup is no longer valid at this point */ |
9549 | caller_lookup = FALSE; |
9550 | |
9551 | switch (result) { |
9552 | case VM_FAULT_SUCCESS: |
9553 | page_grab_count++; |
9554 | |
9555 | if (!dst_page->vmp_absent) { |
9556 | PAGE_WAKEUP_DONE(dst_page); |
9557 | } else { |
9558 | /* |
9559 | * we only get back an absent page if we |
9560 | * requested that it not be zero-filled |
9561 | * because we are about to fill it via I/O |
9562 | * |
9563 | * absent pages should be left BUSY |
9564 | * to prevent them from being faulted |
9565 | * into an address space before we've |
9566 | * had a chance to complete the I/O on |
9567 | * them since they may contain info that |
9568 | * shouldn't be seen by the faulting task |
9569 | */ |
9570 | } |
9571 | /* |
9572 | * Release paging references and |
9573 | * top-level placeholder page, if any. |
9574 | */ |
9575 | if (top_page != VM_PAGE_NULL) { |
9576 | vm_object_t local_object; |
9577 | |
9578 | local_object = VM_PAGE_OBJECT(top_page); |
9579 | |
9580 | /* |
9581 | * comparing 2 packed pointers |
9582 | */ |
9583 | if (top_page->vmp_object != dst_page->vmp_object) { |
9584 | vm_object_lock(local_object); |
9585 | VM_PAGE_FREE(top_page); |
9586 | vm_object_paging_end(local_object); |
9587 | vm_object_unlock(local_object); |
9588 | } else { |
9589 | VM_PAGE_FREE(top_page); |
9590 | vm_object_paging_end(local_object); |
9591 | } |
9592 | } |
9593 | vm_object_paging_end(object); |
9594 | break; |
9595 | |
9596 | case VM_FAULT_RETRY: |
9597 | vm_object_lock(object); |
9598 | break; |
9599 | |
9600 | case VM_FAULT_MEMORY_SHORTAGE: |
9601 | OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages); |
9602 | |
9603 | VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0); |
9604 | |
9605 | if (vm_page_wait(interruptible)) { |
9606 | OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages); |
9607 | |
9608 | VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0); |
9609 | vm_object_lock(object); |
9610 | |
9611 | break; |
9612 | } |
9613 | OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages); |
9614 | |
9615 | VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1); |
9616 | ktriage_record(thread_id: thread_tid(thread: current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_FAULT_OBJIOPLREQ_MEMORY_SHORTAGE), arg: 0 /* arg */); |
9617 | OS_FALLTHROUGH; |
9618 | |
9619 | case VM_FAULT_INTERRUPTED: |
9620 | error_code = MACH_SEND_INTERRUPTED; |
9621 | OS_FALLTHROUGH; |
9622 | case VM_FAULT_MEMORY_ERROR: |
9623 | memory_error: |
9624 | ret = (error_code ? error_code: KERN_MEMORY_ERROR); |
9625 | |
9626 | vm_object_lock(object); |
9627 | goto return_err; |
9628 | |
9629 | case VM_FAULT_SUCCESS_NO_VM_PAGE: |
9630 | /* success but no page: fail */ |
9631 | vm_object_paging_end(object); |
9632 | vm_object_unlock(object); |
9633 | goto memory_error; |
9634 | |
9635 | default: |
9636 | panic("vm_object_iopl_request: unexpected error" |
9637 | " 0x%x from vm_fault_page()\n" , result); |
9638 | } |
9639 | } while (result != VM_FAULT_SUCCESS); |
9640 | } |
9641 | phys_page = VM_PAGE_GET_PHYS_PAGE(m: dst_page); |
9642 | |
9643 | if (upl->flags & UPL_KERNEL_OBJECT) { |
9644 | goto record_phys_addr; |
9645 | } |
9646 | |
9647 | if (dst_page->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) { |
9648 | dst_page->vmp_busy = TRUE; |
9649 | goto record_phys_addr; |
9650 | } |
9651 | |
9652 | if (dst_page->vmp_cleaning) { |
9653 | /* |
9654 | * Someone else is cleaning this page in place. |
9655 | * In theory, we should be able to proceed and use this |
9656 | * page but they'll probably end up clearing the "busy" |
9657 | * bit on it in upl_commit_range() but they didn't set |
9658 | * it, so they would clear our "busy" bit and open |
9659 | * us to race conditions. |
9660 | * We'd better wait for the cleaning to complete and |
9661 | * then try again. |
9662 | */ |
9663 | VM_PAGEOUT_DEBUG(vm_object_iopl_request_sleep_for_cleaning, 1); |
9664 | PAGE_SLEEP(object, dst_page, THREAD_UNINT); |
9665 | continue; |
9666 | } |
9667 | if (dst_page->vmp_laundry) { |
9668 | vm_pageout_steal_laundry(page: dst_page, FALSE); |
9669 | } |
9670 | |
9671 | if ((cntrl_flags & UPL_NEED_32BIT_ADDR) && |
9672 | phys_page >= (max_valid_dma_address >> PAGE_SHIFT)) { |
9673 | vm_page_t low_page; |
9674 | int refmod; |
9675 | |
9676 | /* |
9677 | * support devices that can't DMA above 32 bits |
9678 | * by substituting pages from a pool of low address |
9679 | * memory for any pages we find above the 4G mark |
9680 | * can't substitute if the page is already wired because |
9681 | * we don't know whether that physical address has been |
9682 | * handed out to some other 64 bit capable DMA device to use |
9683 | */ |
9684 | if (VM_PAGE_WIRED(dst_page)) { |
9685 | ret = KERN_PROTECTION_FAILURE; |
9686 | goto return_err; |
9687 | } |
9688 | low_page = vm_page_grablo(); |
9689 | |
9690 | if (low_page == VM_PAGE_NULL) { |
9691 | ret = KERN_RESOURCE_SHORTAGE; |
9692 | goto return_err; |
9693 | } |
9694 | /* |
9695 | * from here until the vm_page_replace completes |
9696 | * we musn't drop the object lock... we don't |
9697 | * want anyone refaulting this page in and using |
9698 | * it after we disconnect it... we want the fault |
9699 | * to find the new page being substituted. |
9700 | */ |
9701 | if (dst_page->vmp_pmapped) { |
9702 | refmod = pmap_disconnect(phys: phys_page); |
9703 | } else { |
9704 | refmod = 0; |
9705 | } |
9706 | |
9707 | if (!dst_page->vmp_absent) { |
9708 | vm_page_copy(src_page: dst_page, dest_page: low_page); |
9709 | } |
9710 | |
9711 | low_page->vmp_reference = dst_page->vmp_reference; |
9712 | low_page->vmp_dirty = dst_page->vmp_dirty; |
9713 | low_page->vmp_absent = dst_page->vmp_absent; |
9714 | |
9715 | if (refmod & VM_MEM_REFERENCED) { |
9716 | low_page->vmp_reference = TRUE; |
9717 | } |
9718 | if (refmod & VM_MEM_MODIFIED) { |
9719 | SET_PAGE_DIRTY(low_page, FALSE); |
9720 | } |
9721 | |
9722 | vm_page_replace(mem: low_page, object, offset: dst_offset); |
9723 | |
9724 | dst_page = low_page; |
9725 | /* |
9726 | * vm_page_grablo returned the page marked |
9727 | * BUSY... we don't need a PAGE_WAKEUP_DONE |
9728 | * here, because we've never dropped the object lock |
9729 | */ |
9730 | if (!dst_page->vmp_absent) { |
9731 | dst_page->vmp_busy = FALSE; |
9732 | } |
9733 | |
9734 | phys_page = VM_PAGE_GET_PHYS_PAGE(m: dst_page); |
9735 | } |
9736 | if (!dst_page->vmp_busy) { |
9737 | dwp->dw_mask |= DW_vm_page_wire; |
9738 | } |
9739 | |
9740 | if (cntrl_flags & UPL_BLOCK_ACCESS) { |
9741 | /* |
9742 | * Mark the page "busy" to block any future page fault |
9743 | * on this page in addition to wiring it. |
9744 | * We'll also remove the mapping |
9745 | * of all these pages before leaving this routine. |
9746 | */ |
9747 | assert(!dst_page->vmp_fictitious); |
9748 | dst_page->vmp_busy = TRUE; |
9749 | } |
9750 | /* |
9751 | * expect the page to be used |
9752 | * page queues lock must be held to set 'reference' |
9753 | */ |
9754 | dwp->dw_mask |= DW_set_reference; |
9755 | |
9756 | if (!(cntrl_flags & UPL_COPYOUT_FROM)) { |
9757 | SET_PAGE_DIRTY(dst_page, TRUE); |
9758 | /* |
9759 | * Page belonging to a code-signed object is about to |
9760 | * be written. Mark it tainted and disconnect it from |
9761 | * all pmaps so processes have to fault it back in and |
9762 | * deal with the tainted bit. |
9763 | */ |
9764 | if (object->code_signed && dst_page->vmp_cs_tainted != VMP_CS_ALL_TRUE) { |
9765 | dst_page->vmp_cs_tainted = VMP_CS_ALL_TRUE; |
9766 | vm_page_iopl_tainted++; |
9767 | if (dst_page->vmp_pmapped) { |
9768 | int refmod = pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m: dst_page)); |
9769 | if (refmod & VM_MEM_REFERENCED) { |
9770 | dst_page->vmp_reference = TRUE; |
9771 | } |
9772 | } |
9773 | } |
9774 | } |
9775 | if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) { |
9776 | pmap_sync_page_attributes_phys(pa: phys_page); |
9777 | dst_page->vmp_written_by_kernel = FALSE; |
9778 | } |
9779 | |
9780 | record_phys_addr: |
9781 | if (dst_page->vmp_busy) { |
9782 | upl->flags |= UPL_HAS_BUSY; |
9783 | } |
9784 | |
9785 | bitmap_set(map: upl->lite_list, n: entry); |
9786 | |
9787 | if (phys_page > upl->highest_page) { |
9788 | upl->highest_page = phys_page; |
9789 | } |
9790 | |
9791 | if (user_page_list) { |
9792 | user_page_list[entry].phys_addr = phys_page; |
9793 | user_page_list[entry].free_when_done = dst_page->vmp_free_when_done; |
9794 | user_page_list[entry].absent = dst_page->vmp_absent; |
9795 | user_page_list[entry].dirty = dst_page->vmp_dirty; |
9796 | user_page_list[entry].precious = dst_page->vmp_precious; |
9797 | user_page_list[entry].device = FALSE; |
9798 | user_page_list[entry].needed = FALSE; |
9799 | if (dst_page->vmp_clustered == TRUE) { |
9800 | user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE; |
9801 | } else { |
9802 | user_page_list[entry].speculative = FALSE; |
9803 | } |
9804 | user_page_list[entry].cs_validated = dst_page->vmp_cs_validated; |
9805 | user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted; |
9806 | user_page_list[entry].cs_nx = dst_page->vmp_cs_nx; |
9807 | user_page_list[entry].mark = FALSE; |
9808 | } |
9809 | if (!is_kernel_object(object) && object != compressor_object) { |
9810 | /* |
9811 | * someone is explicitly grabbing this page... |
9812 | * update clustered and speculative state |
9813 | * |
9814 | */ |
9815 | if (dst_page->vmp_clustered) { |
9816 | VM_PAGE_CONSUME_CLUSTERED(dst_page); |
9817 | } |
9818 | } |
9819 | skip_page: |
9820 | entry++; |
9821 | dst_offset += PAGE_SIZE_64; |
9822 | xfer_size -= PAGE_SIZE; |
9823 | |
9824 | if (dwp->dw_mask) { |
9825 | VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count); |
9826 | |
9827 | if (dw_count >= dw_limit) { |
9828 | vm_page_do_delayed_work(object, tag, dwp: dwp_start, dw_count); |
9829 | |
9830 | dwp = dwp_start; |
9831 | dw_count = 0; |
9832 | } |
9833 | } |
9834 | } |
9835 | assert(entry == size_in_pages); |
9836 | |
9837 | if (dw_count) { |
9838 | vm_page_do_delayed_work(object, tag, dwp: dwp_start, dw_count); |
9839 | dwp = dwp_start; |
9840 | dw_count = 0; |
9841 | } |
9842 | finish: |
9843 | if (user_page_list && set_cache_attr_needed == TRUE) { |
9844 | vm_object_set_pmap_cache_attr(object, user_page_list, num_pages: size_in_pages, TRUE); |
9845 | } |
9846 | |
9847 | if (page_list_count != NULL) { |
9848 | if (upl->flags & UPL_INTERNAL) { |
9849 | *page_list_count = 0; |
9850 | } else if (*page_list_count > size_in_pages) { |
9851 | *page_list_count = size_in_pages; |
9852 | } |
9853 | } |
9854 | vm_object_unlock(object); |
9855 | |
9856 | if (cntrl_flags & UPL_BLOCK_ACCESS) { |
9857 | /* |
9858 | * We've marked all the pages "busy" so that future |
9859 | * page faults will block. |
9860 | * Now remove the mapping for these pages, so that they |
9861 | * can't be accessed without causing a page fault. |
9862 | */ |
9863 | vm_object_pmap_protect(object, offset, size: (vm_object_size_t)size, |
9864 | PMAP_NULL, |
9865 | PAGE_SIZE, |
9866 | pmap_start: 0, VM_PROT_NONE); |
9867 | assert(!object->blocked_access); |
9868 | object->blocked_access = TRUE; |
9869 | } |
9870 | |
9871 | VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0); |
9872 | #if DEVELOPMENT || DEBUG |
9873 | if (task != NULL) { |
9874 | ledger_credit(task->ledger, task_ledgers.pages_grabbed_iopl, page_grab_count); |
9875 | } |
9876 | #endif /* DEVELOPMENT || DEBUG */ |
9877 | |
9878 | if (dwp_start && dwp_finish_ctx) { |
9879 | vm_page_delayed_work_finish_ctx(dwp: dwp_start); |
9880 | dwp_start = dwp = NULL; |
9881 | } |
9882 | |
9883 | return KERN_SUCCESS; |
9884 | |
9885 | return_err: |
9886 | dw_index = 0; |
9887 | |
9888 | for (; offset < dst_offset; offset += PAGE_SIZE) { |
9889 | boolean_t need_unwire; |
9890 | |
9891 | dst_page = vm_page_lookup(object, offset); |
9892 | |
9893 | if (dst_page == VM_PAGE_NULL) { |
9894 | panic("vm_object_iopl_request: Wired page missing." ); |
9895 | } |
9896 | |
9897 | /* |
9898 | * if we've already processed this page in an earlier |
9899 | * dw_do_work, we need to undo the wiring... we will |
9900 | * leave the dirty and reference bits on if they |
9901 | * were set, since we don't have a good way of knowing |
9902 | * what the previous state was and we won't get here |
9903 | * under any normal circumstances... we will always |
9904 | * clear BUSY and wakeup any waiters via vm_page_free |
9905 | * or PAGE_WAKEUP_DONE |
9906 | */ |
9907 | need_unwire = TRUE; |
9908 | |
9909 | if (dw_count) { |
9910 | if ((dwp_start)[dw_index].dw_m == dst_page) { |
9911 | /* |
9912 | * still in the deferred work list |
9913 | * which means we haven't yet called |
9914 | * vm_page_wire on this page |
9915 | */ |
9916 | need_unwire = FALSE; |
9917 | |
9918 | dw_index++; |
9919 | dw_count--; |
9920 | } |
9921 | } |
9922 | vm_page_lock_queues(); |
9923 | |
9924 | if (dst_page->vmp_absent || free_wired_pages == TRUE) { |
9925 | vm_page_free(page: dst_page); |
9926 | |
9927 | need_unwire = FALSE; |
9928 | } else { |
9929 | if (need_unwire == TRUE) { |
9930 | vm_page_unwire(page: dst_page, TRUE); |
9931 | } |
9932 | |
9933 | PAGE_WAKEUP_DONE(dst_page); |
9934 | } |
9935 | vm_page_unlock_queues(); |
9936 | |
9937 | if (need_unwire == TRUE) { |
9938 | counter_inc(&vm_statistics_reactivations); |
9939 | } |
9940 | } |
9941 | #if UPL_DEBUG |
9942 | upl->upl_state = 2; |
9943 | #endif |
9944 | if (!(upl->flags & UPL_KERNEL_OBJECT)) { |
9945 | vm_object_activity_end(object); |
9946 | vm_object_collapse(object, offset: 0, TRUE); |
9947 | } |
9948 | vm_object_unlock(object); |
9949 | upl_destroy(upl); |
9950 | |
9951 | VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, ret, 0, 0); |
9952 | #if DEVELOPMENT || DEBUG |
9953 | if (task != NULL) { |
9954 | ledger_credit(task->ledger, task_ledgers.pages_grabbed_iopl, page_grab_count); |
9955 | } |
9956 | #endif /* DEVELOPMENT || DEBUG */ |
9957 | |
9958 | if (dwp_start && dwp_finish_ctx) { |
9959 | vm_page_delayed_work_finish_ctx(dwp: dwp_start); |
9960 | dwp_start = dwp = NULL; |
9961 | } |
9962 | return ret; |
9963 | } |
9964 | |
9965 | kern_return_t |
9966 | upl_transpose( |
9967 | upl_t upl1, |
9968 | upl_t upl2) |
9969 | { |
9970 | kern_return_t retval; |
9971 | boolean_t upls_locked; |
9972 | vm_object_t object1, object2; |
9973 | |
9974 | /* LD: Should mapped UPLs be eligible for a transpose? */ |
9975 | if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR) == UPL_VECTOR) || ((upl2->flags & UPL_VECTOR) == UPL_VECTOR)) { |
9976 | return KERN_INVALID_ARGUMENT; |
9977 | } |
9978 | |
9979 | upls_locked = FALSE; |
9980 | |
9981 | /* |
9982 | * Since we need to lock both UPLs at the same time, |
9983 | * avoid deadlocks by always taking locks in the same order. |
9984 | */ |
9985 | if (upl1 < upl2) { |
9986 | upl_lock(upl1); |
9987 | upl_lock(upl2); |
9988 | } else { |
9989 | upl_lock(upl2); |
9990 | upl_lock(upl1); |
9991 | } |
9992 | upls_locked = TRUE; /* the UPLs will need to be unlocked */ |
9993 | |
9994 | object1 = upl1->map_object; |
9995 | object2 = upl2->map_object; |
9996 | |
9997 | if (upl1->u_offset != 0 || upl2->u_offset != 0 || |
9998 | upl1->u_size != upl2->u_size) { |
9999 | /* |
10000 | * We deal only with full objects, not subsets. |
10001 | * That's because we exchange the entire backing store info |
10002 | * for the objects: pager, resident pages, etc... We can't do |
10003 | * only part of it. |
10004 | */ |
10005 | retval = KERN_INVALID_VALUE; |
10006 | goto done; |
10007 | } |
10008 | |
10009 | /* |
10010 | * Tranpose the VM objects' backing store. |
10011 | */ |
10012 | retval = vm_object_transpose(object1, object2, |
10013 | transpose_size: upl_adjusted_size(upl: upl1, PAGE_MASK)); |
10014 | |
10015 | if (retval == KERN_SUCCESS) { |
10016 | /* |
10017 | * Make each UPL point to the correct VM object, i.e. the |
10018 | * object holding the pages that the UPL refers to... |
10019 | */ |
10020 | #if CONFIG_IOSCHED || UPL_DEBUG |
10021 | if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) { |
10022 | vm_object_lock(object1); |
10023 | vm_object_lock(object2); |
10024 | } |
10025 | if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) { |
10026 | queue_remove(&object1->uplq, upl1, upl_t, uplq); |
10027 | } |
10028 | if ((upl2->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) { |
10029 | queue_remove(&object2->uplq, upl2, upl_t, uplq); |
10030 | } |
10031 | #endif |
10032 | upl1->map_object = object2; |
10033 | upl2->map_object = object1; |
10034 | |
10035 | #if CONFIG_IOSCHED || UPL_DEBUG |
10036 | if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) { |
10037 | queue_enter(&object2->uplq, upl1, upl_t, uplq); |
10038 | } |
10039 | if ((upl2->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) { |
10040 | queue_enter(&object1->uplq, upl2, upl_t, uplq); |
10041 | } |
10042 | if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) { |
10043 | vm_object_unlock(object2); |
10044 | vm_object_unlock(object1); |
10045 | } |
10046 | #endif |
10047 | } |
10048 | |
10049 | done: |
10050 | /* |
10051 | * Cleanup. |
10052 | */ |
10053 | if (upls_locked) { |
10054 | upl_unlock(upl1); |
10055 | upl_unlock(upl2); |
10056 | upls_locked = FALSE; |
10057 | } |
10058 | |
10059 | return retval; |
10060 | } |
10061 | |
10062 | void |
10063 | upl_range_needed( |
10064 | upl_t upl, |
10065 | int index, |
10066 | int count) |
10067 | { |
10068 | int size_in_pages; |
10069 | |
10070 | if (!(upl->flags & UPL_INTERNAL) || count <= 0) { |
10071 | return; |
10072 | } |
10073 | |
10074 | size_in_pages = upl_adjusted_size(upl, PAGE_MASK) / PAGE_SIZE; |
10075 | |
10076 | while (count-- && index < size_in_pages) { |
10077 | upl->page_list[index++].needed = TRUE; |
10078 | } |
10079 | } |
10080 | |
10081 | |
10082 | /* |
10083 | * Reserve of virtual addresses in the kernel address space. |
10084 | * We need to map the physical pages in the kernel, so that we |
10085 | * can call the code-signing or slide routines with a kernel |
10086 | * virtual address. We keep this pool of pre-allocated kernel |
10087 | * virtual addresses so that we don't have to scan the kernel's |
10088 | * virtaul address space each time we need to work with |
10089 | * a physical page. |
10090 | */ |
10091 | SIMPLE_LOCK_DECLARE(vm_paging_lock, 0); |
10092 | #define VM_PAGING_NUM_PAGES 64 |
10093 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_paging_base_address = 0; |
10094 | bool vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, }; |
10095 | int vm_paging_max_index = 0; |
10096 | int vm_paging_page_waiter = 0; |
10097 | int vm_paging_page_waiter_total = 0; |
10098 | |
10099 | unsigned long vm_paging_no_kernel_page = 0; |
10100 | unsigned long vm_paging_objects_mapped = 0; |
10101 | unsigned long vm_paging_pages_mapped = 0; |
10102 | unsigned long vm_paging_objects_mapped_slow = 0; |
10103 | unsigned long vm_paging_pages_mapped_slow = 0; |
10104 | |
10105 | __startup_func |
10106 | static void |
10107 | vm_paging_map_init(void) |
10108 | { |
10109 | kmem_alloc(map: kernel_map, addrp: &vm_paging_base_address, |
10110 | ptoa(VM_PAGING_NUM_PAGES), |
10111 | flags: KMA_DATA | KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_PAGEABLE, |
10112 | VM_KERN_MEMORY_NONE); |
10113 | } |
10114 | STARTUP(ZALLOC, STARTUP_RANK_LAST, vm_paging_map_init); |
10115 | |
10116 | /* |
10117 | * vm_paging_map_object: |
10118 | * Maps part of a VM object's pages in the kernel |
10119 | * virtual address space, using the pre-allocated |
10120 | * kernel virtual addresses, if possible. |
10121 | * Context: |
10122 | * The VM object is locked. This lock will get |
10123 | * dropped and re-acquired though, so the caller |
10124 | * must make sure the VM object is kept alive |
10125 | * (by holding a VM map that has a reference |
10126 | * on it, for example, or taking an extra reference). |
10127 | * The page should also be kept busy to prevent |
10128 | * it from being reclaimed. |
10129 | */ |
10130 | kern_return_t |
10131 | vm_paging_map_object( |
10132 | vm_page_t page, |
10133 | vm_object_t object, |
10134 | vm_object_offset_t offset, |
10135 | vm_prot_t protection, |
10136 | boolean_t can_unlock_object, |
10137 | vm_map_size_t *size, /* IN/OUT */ |
10138 | vm_map_offset_t *address, /* OUT */ |
10139 | boolean_t *need_unmap) /* OUT */ |
10140 | { |
10141 | kern_return_t kr; |
10142 | vm_map_offset_t page_map_offset; |
10143 | vm_map_size_t map_size; |
10144 | vm_object_offset_t object_offset; |
10145 | int i; |
10146 | |
10147 | if (page != VM_PAGE_NULL && *size == PAGE_SIZE) { |
10148 | /* use permanent 1-to-1 kernel mapping of physical memory ? */ |
10149 | *address = (vm_map_offset_t) |
10150 | phystokv(pa: (pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(m: page) << PAGE_SHIFT); |
10151 | *need_unmap = FALSE; |
10152 | return KERN_SUCCESS; |
10153 | |
10154 | assert(page->vmp_busy); |
10155 | /* |
10156 | * Use one of the pre-allocated kernel virtual addresses |
10157 | * and just enter the VM page in the kernel address space |
10158 | * at that virtual address. |
10159 | */ |
10160 | simple_lock(&vm_paging_lock, &vm_pageout_lck_grp); |
10161 | |
10162 | /* |
10163 | * Try and find an available kernel virtual address |
10164 | * from our pre-allocated pool. |
10165 | */ |
10166 | page_map_offset = 0; |
10167 | for (;;) { |
10168 | for (i = 0; i < VM_PAGING_NUM_PAGES; i++) { |
10169 | if (vm_paging_page_inuse[i] == FALSE) { |
10170 | page_map_offset = |
10171 | vm_paging_base_address + |
10172 | (i * PAGE_SIZE); |
10173 | break; |
10174 | } |
10175 | } |
10176 | if (page_map_offset != 0) { |
10177 | /* found a space to map our page ! */ |
10178 | break; |
10179 | } |
10180 | |
10181 | if (can_unlock_object) { |
10182 | /* |
10183 | * If we can afford to unlock the VM object, |
10184 | * let's take the slow path now... |
10185 | */ |
10186 | break; |
10187 | } |
10188 | /* |
10189 | * We can't afford to unlock the VM object, so |
10190 | * let's wait for a space to become available... |
10191 | */ |
10192 | vm_paging_page_waiter_total++; |
10193 | vm_paging_page_waiter++; |
10194 | kr = assert_wait(event: (event_t)&vm_paging_page_waiter, THREAD_UNINT); |
10195 | if (kr == THREAD_WAITING) { |
10196 | simple_unlock(&vm_paging_lock); |
10197 | kr = thread_block(THREAD_CONTINUE_NULL); |
10198 | simple_lock(&vm_paging_lock, &vm_pageout_lck_grp); |
10199 | } |
10200 | vm_paging_page_waiter--; |
10201 | /* ... and try again */ |
10202 | } |
10203 | |
10204 | if (page_map_offset != 0) { |
10205 | /* |
10206 | * We found a kernel virtual address; |
10207 | * map the physical page to that virtual address. |
10208 | */ |
10209 | if (i > vm_paging_max_index) { |
10210 | vm_paging_max_index = i; |
10211 | } |
10212 | vm_paging_page_inuse[i] = TRUE; |
10213 | simple_unlock(&vm_paging_lock); |
10214 | |
10215 | page->vmp_pmapped = TRUE; |
10216 | |
10217 | /* |
10218 | * Keep the VM object locked over the PMAP_ENTER |
10219 | * and the actual use of the page by the kernel, |
10220 | * or this pmap mapping might get undone by a |
10221 | * vm_object_pmap_protect() call... |
10222 | */ |
10223 | kr = pmap_enter_check(pmap: kernel_pmap, |
10224 | virtual_address: page_map_offset, |
10225 | page, |
10226 | protection, |
10227 | VM_PROT_NONE, |
10228 | flags: 0, |
10229 | TRUE); |
10230 | assert(kr == KERN_SUCCESS); |
10231 | vm_paging_objects_mapped++; |
10232 | vm_paging_pages_mapped++; |
10233 | *address = page_map_offset; |
10234 | *need_unmap = TRUE; |
10235 | |
10236 | #if KASAN |
10237 | kasan_notify_address(page_map_offset, PAGE_SIZE); |
10238 | #endif |
10239 | |
10240 | /* all done and mapped, ready to use ! */ |
10241 | return KERN_SUCCESS; |
10242 | } |
10243 | |
10244 | /* |
10245 | * We ran out of pre-allocated kernel virtual |
10246 | * addresses. Just map the page in the kernel |
10247 | * the slow and regular way. |
10248 | */ |
10249 | vm_paging_no_kernel_page++; |
10250 | simple_unlock(&vm_paging_lock); |
10251 | } |
10252 | |
10253 | if (!can_unlock_object) { |
10254 | *address = 0; |
10255 | *size = 0; |
10256 | *need_unmap = FALSE; |
10257 | return KERN_NOT_SUPPORTED; |
10258 | } |
10259 | |
10260 | object_offset = vm_object_trunc_page(offset); |
10261 | map_size = vm_map_round_page(*size, |
10262 | VM_MAP_PAGE_MASK(kernel_map)); |
10263 | |
10264 | /* |
10265 | * Try and map the required range of the object |
10266 | * in the kernel_map. Given that allocation is |
10267 | * for pageable memory, it shouldn't contain |
10268 | * pointers and is mapped into the data range. |
10269 | */ |
10270 | |
10271 | vm_object_reference_locked(object); /* for the map entry */ |
10272 | vm_object_unlock(object); |
10273 | |
10274 | kr = vm_map_enter(map: kernel_map, |
10275 | address, |
10276 | size: map_size, |
10277 | mask: 0, |
10278 | VM_MAP_KERNEL_FLAGS_DATA_ANYWHERE(), |
10279 | object, |
10280 | offset: object_offset, |
10281 | FALSE, |
10282 | cur_protection: protection, |
10283 | VM_PROT_ALL, |
10284 | VM_INHERIT_NONE); |
10285 | if (kr != KERN_SUCCESS) { |
10286 | *address = 0; |
10287 | *size = 0; |
10288 | *need_unmap = FALSE; |
10289 | vm_object_deallocate(object); /* for the map entry */ |
10290 | vm_object_lock(object); |
10291 | return kr; |
10292 | } |
10293 | |
10294 | *size = map_size; |
10295 | |
10296 | /* |
10297 | * Enter the mapped pages in the page table now. |
10298 | */ |
10299 | vm_object_lock(object); |
10300 | /* |
10301 | * VM object must be kept locked from before PMAP_ENTER() |
10302 | * until after the kernel is done accessing the page(s). |
10303 | * Otherwise, the pmap mappings in the kernel could be |
10304 | * undone by a call to vm_object_pmap_protect(). |
10305 | */ |
10306 | |
10307 | for (page_map_offset = 0; |
10308 | map_size != 0; |
10309 | map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) { |
10310 | page = vm_page_lookup(object, offset: offset + page_map_offset); |
10311 | if (page == VM_PAGE_NULL) { |
10312 | printf(format: "vm_paging_map_object: no page !?" ); |
10313 | vm_object_unlock(object); |
10314 | vm_map_remove(map: kernel_map, start: *address, end: *size); |
10315 | *address = 0; |
10316 | *size = 0; |
10317 | *need_unmap = FALSE; |
10318 | vm_object_lock(object); |
10319 | return KERN_MEMORY_ERROR; |
10320 | } |
10321 | page->vmp_pmapped = TRUE; |
10322 | |
10323 | kr = pmap_enter_check(pmap: kernel_pmap, |
10324 | virtual_address: *address + page_map_offset, |
10325 | page, |
10326 | protection, |
10327 | VM_PROT_NONE, |
10328 | flags: 0, |
10329 | TRUE); |
10330 | assert(kr == KERN_SUCCESS); |
10331 | #if KASAN |
10332 | kasan_notify_address(*address + page_map_offset, PAGE_SIZE); |
10333 | #endif |
10334 | } |
10335 | |
10336 | vm_paging_objects_mapped_slow++; |
10337 | vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64); |
10338 | |
10339 | *need_unmap = TRUE; |
10340 | |
10341 | return KERN_SUCCESS; |
10342 | } |
10343 | |
10344 | /* |
10345 | * vm_paging_unmap_object: |
10346 | * Unmaps part of a VM object's pages from the kernel |
10347 | * virtual address space. |
10348 | * Context: |
10349 | * The VM object is locked. This lock will get |
10350 | * dropped and re-acquired though. |
10351 | */ |
10352 | void |
10353 | vm_paging_unmap_object( |
10354 | vm_object_t object, |
10355 | vm_map_offset_t start, |
10356 | vm_map_offset_t end) |
10357 | { |
10358 | int i; |
10359 | |
10360 | if ((vm_paging_base_address == 0) || |
10361 | (start < vm_paging_base_address) || |
10362 | (end > (vm_paging_base_address |
10363 | + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) { |
10364 | /* |
10365 | * We didn't use our pre-allocated pool of |
10366 | * kernel virtual address. Deallocate the |
10367 | * virtual memory. |
10368 | */ |
10369 | if (object != VM_OBJECT_NULL) { |
10370 | vm_object_unlock(object); |
10371 | } |
10372 | vm_map_remove(map: kernel_map, start, end); |
10373 | if (object != VM_OBJECT_NULL) { |
10374 | vm_object_lock(object); |
10375 | } |
10376 | } else { |
10377 | /* |
10378 | * We used a kernel virtual address from our |
10379 | * pre-allocated pool. Put it back in the pool |
10380 | * for next time. |
10381 | */ |
10382 | assert(end - start == PAGE_SIZE); |
10383 | i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT); |
10384 | assert(i >= 0 && i < VM_PAGING_NUM_PAGES); |
10385 | |
10386 | /* undo the pmap mapping */ |
10387 | pmap_remove(map: kernel_pmap, s: start, e: end); |
10388 | |
10389 | simple_lock(&vm_paging_lock, &vm_pageout_lck_grp); |
10390 | vm_paging_page_inuse[i] = FALSE; |
10391 | if (vm_paging_page_waiter) { |
10392 | thread_wakeup(&vm_paging_page_waiter); |
10393 | } |
10394 | simple_unlock(&vm_paging_lock); |
10395 | } |
10396 | } |
10397 | |
10398 | |
10399 | /* |
10400 | * page->vmp_object must be locked |
10401 | */ |
10402 | void |
10403 | vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked) |
10404 | { |
10405 | if (!queues_locked) { |
10406 | vm_page_lockspin_queues(); |
10407 | } |
10408 | |
10409 | page->vmp_free_when_done = FALSE; |
10410 | /* |
10411 | * need to drop the laundry count... |
10412 | * we may also need to remove it |
10413 | * from the I/O paging queue... |
10414 | * vm_pageout_throttle_up handles both cases |
10415 | * |
10416 | * the laundry and pageout_queue flags are cleared... |
10417 | */ |
10418 | vm_pageout_throttle_up(m: page); |
10419 | |
10420 | if (!queues_locked) { |
10421 | vm_page_unlock_queues(); |
10422 | } |
10423 | } |
10424 | |
10425 | #define VECTOR_UPL_ELEMENTS_UPPER_LIMIT 64 |
10426 | |
10427 | upl_t |
10428 | vector_upl_create(vm_offset_t upl_offset, uint32_t max_upls) |
10429 | { |
10430 | int i = 0; |
10431 | upl_t upl; |
10432 | |
10433 | assert(max_upls > 0); |
10434 | if (max_upls == 0) { |
10435 | return NULL; |
10436 | } |
10437 | |
10438 | if (max_upls > VECTOR_UPL_ELEMENTS_UPPER_LIMIT) { |
10439 | max_upls = VECTOR_UPL_ELEMENTS_UPPER_LIMIT; |
10440 | } |
10441 | vector_upl_t vector_upl = kalloc_type(struct _vector_upl, typeof(vector_upl->upls[0]), max_upls, Z_WAITOK | Z_NOFAIL); |
10442 | |
10443 | upl = upl_create(type: 0, UPL_VECTOR, size: 0); |
10444 | upl->vector_upl = vector_upl; |
10445 | upl->u_offset = upl_offset; |
10446 | vector_upl->size = 0; |
10447 | vector_upl->offset = upl_offset; |
10448 | vector_upl->invalid_upls = 0; |
10449 | vector_upl->num_upls = 0; |
10450 | vector_upl->pagelist = NULL; |
10451 | vector_upl->max_upls = max_upls; |
10452 | |
10453 | for (i = 0; i < max_upls; i++) { |
10454 | vector_upl->upls[i].iostate.size = 0; |
10455 | vector_upl->upls[i].iostate.offset = 0; |
10456 | } |
10457 | return upl; |
10458 | } |
10459 | |
10460 | uint32_t |
10461 | vector_upl_max_upls(const upl_t upl) |
10462 | { |
10463 | if (!vector_upl_is_valid(upl)) { |
10464 | return 0; |
10465 | } |
10466 | return ((vector_upl_t)(upl->vector_upl))->max_upls; |
10467 | } |
10468 | |
10469 | void |
10470 | vector_upl_deallocate(upl_t upl) |
10471 | { |
10472 | vector_upl_t vector_upl = upl->vector_upl; |
10473 | |
10474 | assert(vector_upl_is_valid(upl)); |
10475 | |
10476 | if (vector_upl->invalid_upls != vector_upl->num_upls) { |
10477 | panic("Deallocating non-empty Vectored UPL" ); |
10478 | } |
10479 | uint32_t max_upls = vector_upl->max_upls; |
10480 | kfree_type(struct upl_page_info, atop(vector_upl->size), vector_upl->pagelist); |
10481 | kfree_type(struct _vector_upl, typeof(vector_upl->upls[0]), max_upls, vector_upl); |
10482 | upl->vector_upl = NULL; |
10483 | } |
10484 | |
10485 | boolean_t |
10486 | vector_upl_is_valid(upl_t upl) |
10487 | { |
10488 | return upl && (upl->flags & UPL_VECTOR) && upl->vector_upl; |
10489 | } |
10490 | |
10491 | boolean_t |
10492 | vector_upl_set_subupl(upl_t upl, upl_t subupl, uint32_t io_size) |
10493 | { |
10494 | if (vector_upl_is_valid(upl)) { |
10495 | vector_upl_t vector_upl = upl->vector_upl; |
10496 | |
10497 | if (vector_upl) { |
10498 | if (subupl) { |
10499 | if (io_size) { |
10500 | if (io_size < PAGE_SIZE) { |
10501 | io_size = PAGE_SIZE; |
10502 | } |
10503 | subupl->vector_upl = (void*)vector_upl; |
10504 | vector_upl->upls[vector_upl->num_upls++].elem = subupl; |
10505 | vector_upl->size += io_size; |
10506 | upl->u_size += io_size; |
10507 | } else { |
10508 | uint32_t i = 0, invalid_upls = 0; |
10509 | for (i = 0; i < vector_upl->num_upls; i++) { |
10510 | if (vector_upl->upls[i].elem == subupl) { |
10511 | break; |
10512 | } |
10513 | } |
10514 | if (i == vector_upl->num_upls) { |
10515 | panic("Trying to remove sub-upl when none exists" ); |
10516 | } |
10517 | |
10518 | vector_upl->upls[i].elem = NULL; |
10519 | invalid_upls = os_atomic_inc(&(vector_upl)->invalid_upls, |
10520 | relaxed); |
10521 | if (invalid_upls == vector_upl->num_upls) { |
10522 | return TRUE; |
10523 | } else { |
10524 | return FALSE; |
10525 | } |
10526 | } |
10527 | } else { |
10528 | panic("vector_upl_set_subupl was passed a NULL upl element" ); |
10529 | } |
10530 | } else { |
10531 | panic("vector_upl_set_subupl was passed a non-vectored upl" ); |
10532 | } |
10533 | } else { |
10534 | panic("vector_upl_set_subupl was passed a NULL upl" ); |
10535 | } |
10536 | |
10537 | return FALSE; |
10538 | } |
10539 | |
10540 | void |
10541 | vector_upl_set_pagelist(upl_t upl) |
10542 | { |
10543 | if (vector_upl_is_valid(upl)) { |
10544 | uint32_t i = 0; |
10545 | vector_upl_t vector_upl = upl->vector_upl; |
10546 | |
10547 | if (vector_upl) { |
10548 | vm_offset_t pagelist_size = 0, cur_upl_pagelist_size = 0; |
10549 | |
10550 | vector_upl->pagelist = kalloc_type(struct upl_page_info, |
10551 | atop(vector_upl->size), Z_WAITOK); |
10552 | |
10553 | for (i = 0; i < vector_upl->num_upls; i++) { |
10554 | cur_upl_pagelist_size = sizeof(struct upl_page_info) * upl_adjusted_size(upl: vector_upl->upls[i].elem, PAGE_MASK) / PAGE_SIZE; |
10555 | bcopy(src: vector_upl->upls[i].elem->page_list, dst: (char*)vector_upl->pagelist + pagelist_size, n: cur_upl_pagelist_size); |
10556 | pagelist_size += cur_upl_pagelist_size; |
10557 | if (vector_upl->upls[i].elem->highest_page > upl->highest_page) { |
10558 | upl->highest_page = vector_upl->upls[i].elem->highest_page; |
10559 | } |
10560 | } |
10561 | assert( pagelist_size == (sizeof(struct upl_page_info) * (vector_upl->size / PAGE_SIZE))); |
10562 | } else { |
10563 | panic("vector_upl_set_pagelist was passed a non-vectored upl" ); |
10564 | } |
10565 | } else { |
10566 | panic("vector_upl_set_pagelist was passed a NULL upl" ); |
10567 | } |
10568 | } |
10569 | |
10570 | upl_t |
10571 | vector_upl_subupl_byindex(upl_t upl, uint32_t index) |
10572 | { |
10573 | if (vector_upl_is_valid(upl)) { |
10574 | vector_upl_t vector_upl = upl->vector_upl; |
10575 | if (vector_upl) { |
10576 | if (index < vector_upl->num_upls) { |
10577 | return vector_upl->upls[index].elem; |
10578 | } |
10579 | } else { |
10580 | panic("vector_upl_subupl_byindex was passed a non-vectored upl" ); |
10581 | } |
10582 | } |
10583 | return NULL; |
10584 | } |
10585 | |
10586 | upl_t |
10587 | vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size) |
10588 | { |
10589 | if (vector_upl_is_valid(upl)) { |
10590 | uint32_t i = 0; |
10591 | vector_upl_t vector_upl = upl->vector_upl; |
10592 | |
10593 | if (vector_upl) { |
10594 | upl_t subupl = NULL; |
10595 | vector_upl_iostates_t subupl_state; |
10596 | |
10597 | for (i = 0; i < vector_upl->num_upls; i++) { |
10598 | subupl = vector_upl->upls[i].elem; |
10599 | subupl_state = vector_upl->upls[i].iostate; |
10600 | if (*upl_offset <= (subupl_state.offset + subupl_state.size - 1)) { |
10601 | /* We could have been passed an offset/size pair that belongs |
10602 | * to an UPL element that has already been committed/aborted. |
10603 | * If so, return NULL. |
10604 | */ |
10605 | if (subupl == NULL) { |
10606 | return NULL; |
10607 | } |
10608 | if ((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) { |
10609 | *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset; |
10610 | if (*upl_size > subupl_state.size) { |
10611 | *upl_size = subupl_state.size; |
10612 | } |
10613 | } |
10614 | if (*upl_offset >= subupl_state.offset) { |
10615 | *upl_offset -= subupl_state.offset; |
10616 | } else if (i) { |
10617 | panic("Vector UPL offset miscalculation" ); |
10618 | } |
10619 | return subupl; |
10620 | } |
10621 | } |
10622 | } else { |
10623 | panic("vector_upl_subupl_byoffset was passed a non-vectored UPL" ); |
10624 | } |
10625 | } |
10626 | return NULL; |
10627 | } |
10628 | |
10629 | void |
10630 | vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr) |
10631 | { |
10632 | *v_upl_submap = NULL; |
10633 | |
10634 | if (vector_upl_is_valid(upl)) { |
10635 | vector_upl_t vector_upl = upl->vector_upl; |
10636 | if (vector_upl) { |
10637 | *v_upl_submap = vector_upl->submap; |
10638 | *submap_dst_addr = vector_upl->submap_dst_addr; |
10639 | } else { |
10640 | panic("vector_upl_get_submap was passed a non-vectored UPL" ); |
10641 | } |
10642 | } else { |
10643 | panic("vector_upl_get_submap was passed a null UPL" ); |
10644 | } |
10645 | } |
10646 | |
10647 | void |
10648 | vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr) |
10649 | { |
10650 | if (vector_upl_is_valid(upl)) { |
10651 | vector_upl_t vector_upl = upl->vector_upl; |
10652 | if (vector_upl) { |
10653 | vector_upl->submap = submap; |
10654 | vector_upl->submap_dst_addr = submap_dst_addr; |
10655 | } else { |
10656 | panic("vector_upl_get_submap was passed a non-vectored UPL" ); |
10657 | } |
10658 | } else { |
10659 | panic("vector_upl_get_submap was passed a NULL UPL" ); |
10660 | } |
10661 | } |
10662 | |
10663 | void |
10664 | vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size) |
10665 | { |
10666 | if (vector_upl_is_valid(upl)) { |
10667 | uint32_t i = 0; |
10668 | vector_upl_t vector_upl = upl->vector_upl; |
10669 | |
10670 | if (vector_upl) { |
10671 | for (i = 0; i < vector_upl->num_upls; i++) { |
10672 | if (vector_upl->upls[i].elem == subupl) { |
10673 | break; |
10674 | } |
10675 | } |
10676 | |
10677 | if (i == vector_upl->num_upls) { |
10678 | panic("setting sub-upl iostate when none exists" ); |
10679 | } |
10680 | |
10681 | vector_upl->upls[i].iostate.offset = offset; |
10682 | if (size < PAGE_SIZE) { |
10683 | size = PAGE_SIZE; |
10684 | } |
10685 | vector_upl->upls[i].iostate.size = size; |
10686 | } else { |
10687 | panic("vector_upl_set_iostate was passed a non-vectored UPL" ); |
10688 | } |
10689 | } else { |
10690 | panic("vector_upl_set_iostate was passed a NULL UPL" ); |
10691 | } |
10692 | } |
10693 | |
10694 | void |
10695 | vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size) |
10696 | { |
10697 | if (vector_upl_is_valid(upl)) { |
10698 | uint32_t i = 0; |
10699 | vector_upl_t vector_upl = upl->vector_upl; |
10700 | |
10701 | if (vector_upl) { |
10702 | for (i = 0; i < vector_upl->num_upls; i++) { |
10703 | if (vector_upl->upls[i].elem == subupl) { |
10704 | break; |
10705 | } |
10706 | } |
10707 | |
10708 | if (i == vector_upl->num_upls) { |
10709 | panic("getting sub-upl iostate when none exists" ); |
10710 | } |
10711 | |
10712 | *offset = vector_upl->upls[i].iostate.offset; |
10713 | *size = vector_upl->upls[i].iostate.size; |
10714 | } else { |
10715 | panic("vector_upl_get_iostate was passed a non-vectored UPL" ); |
10716 | } |
10717 | } else { |
10718 | panic("vector_upl_get_iostate was passed a NULL UPL" ); |
10719 | } |
10720 | } |
10721 | |
10722 | void |
10723 | vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size) |
10724 | { |
10725 | if (vector_upl_is_valid(upl)) { |
10726 | vector_upl_t vector_upl = upl->vector_upl; |
10727 | if (vector_upl) { |
10728 | if (index < vector_upl->num_upls) { |
10729 | *offset = vector_upl->upls[index].iostate.offset; |
10730 | *size = vector_upl->upls[index].iostate.size; |
10731 | } else { |
10732 | *offset = *size = 0; |
10733 | } |
10734 | } else { |
10735 | panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL" ); |
10736 | } |
10737 | } else { |
10738 | panic("vector_upl_get_iostate_byindex was passed a NULL UPL" ); |
10739 | } |
10740 | } |
10741 | |
10742 | void * |
10743 | upl_get_internal_vectorupl(upl_t upl) |
10744 | { |
10745 | return upl->vector_upl; |
10746 | } |
10747 | |
10748 | upl_page_info_t * |
10749 | upl_get_internal_vectorupl_pagelist(upl_t upl) |
10750 | { |
10751 | return upl->vector_upl->pagelist; |
10752 | } |
10753 | |
10754 | upl_page_info_t * |
10755 | upl_get_internal_page_list(upl_t upl) |
10756 | { |
10757 | return upl->vector_upl ? upl->vector_upl->pagelist : upl->page_list; |
10758 | } |
10759 | |
10760 | void |
10761 | upl_clear_dirty( |
10762 | upl_t upl, |
10763 | boolean_t value) |
10764 | { |
10765 | if (value) { |
10766 | upl->flags |= UPL_CLEAR_DIRTY; |
10767 | } else { |
10768 | upl->flags &= ~UPL_CLEAR_DIRTY; |
10769 | } |
10770 | } |
10771 | |
10772 | void |
10773 | upl_set_referenced( |
10774 | upl_t upl, |
10775 | boolean_t value) |
10776 | { |
10777 | upl_lock(upl); |
10778 | if (value) { |
10779 | upl->ext_ref_count++; |
10780 | } else { |
10781 | if (!upl->ext_ref_count) { |
10782 | panic("upl_set_referenced not %p" , upl); |
10783 | } |
10784 | upl->ext_ref_count--; |
10785 | } |
10786 | upl_unlock(upl); |
10787 | } |
10788 | |
10789 | #if CONFIG_IOSCHED |
10790 | void |
10791 | upl_set_blkno( |
10792 | upl_t upl, |
10793 | vm_offset_t upl_offset, |
10794 | int io_size, |
10795 | int64_t blkno) |
10796 | { |
10797 | int i, j; |
10798 | if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0) { |
10799 | return; |
10800 | } |
10801 | |
10802 | assert(upl->upl_reprio_info != 0); |
10803 | for (i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) { |
10804 | UPL_SET_REPRIO_INFO(upl, i, blkno, io_size); |
10805 | } |
10806 | } |
10807 | #endif |
10808 | |
10809 | void inline |
10810 | memoryshot(unsigned int event, unsigned int control) |
10811 | { |
10812 | if (vm_debug_events) { |
10813 | KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control, |
10814 | vm_page_active_count, vm_page_inactive_count, |
10815 | vm_page_free_count, vm_page_speculative_count, |
10816 | vm_page_throttled_count); |
10817 | } else { |
10818 | (void) event; |
10819 | (void) control; |
10820 | } |
10821 | } |
10822 | |
10823 | #ifdef MACH_BSD |
10824 | |
10825 | boolean_t |
10826 | upl_device_page(upl_page_info_t *upl) |
10827 | { |
10828 | return UPL_DEVICE_PAGE(upl); |
10829 | } |
10830 | boolean_t |
10831 | upl_page_present(upl_page_info_t *upl, int index) |
10832 | { |
10833 | return UPL_PAGE_PRESENT(upl, index); |
10834 | } |
10835 | boolean_t |
10836 | upl_speculative_page(upl_page_info_t *upl, int index) |
10837 | { |
10838 | return UPL_SPECULATIVE_PAGE(upl, index); |
10839 | } |
10840 | boolean_t |
10841 | upl_dirty_page(upl_page_info_t *upl, int index) |
10842 | { |
10843 | return UPL_DIRTY_PAGE(upl, index); |
10844 | } |
10845 | boolean_t |
10846 | upl_valid_page(upl_page_info_t *upl, int index) |
10847 | { |
10848 | return UPL_VALID_PAGE(upl, index); |
10849 | } |
10850 | ppnum_t |
10851 | upl_phys_page(upl_page_info_t *upl, int index) |
10852 | { |
10853 | return UPL_PHYS_PAGE(upl, index); |
10854 | } |
10855 | |
10856 | void |
10857 | upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v) |
10858 | { |
10859 | upl[index].mark = v; |
10860 | } |
10861 | |
10862 | boolean_t |
10863 | upl_page_get_mark(upl_page_info_t *upl, int index) |
10864 | { |
10865 | return upl[index].mark; |
10866 | } |
10867 | |
10868 | void |
10869 | vm_countdirtypages(void) |
10870 | { |
10871 | vm_page_t m; |
10872 | int dpages; |
10873 | int pgopages; |
10874 | int precpages; |
10875 | |
10876 | |
10877 | dpages = 0; |
10878 | pgopages = 0; |
10879 | precpages = 0; |
10880 | |
10881 | vm_page_lock_queues(); |
10882 | m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive); |
10883 | do { |
10884 | if (m == (vm_page_t)0) { |
10885 | break; |
10886 | } |
10887 | |
10888 | if (m->vmp_dirty) { |
10889 | dpages++; |
10890 | } |
10891 | if (m->vmp_free_when_done) { |
10892 | pgopages++; |
10893 | } |
10894 | if (m->vmp_precious) { |
10895 | precpages++; |
10896 | } |
10897 | |
10898 | assert(!is_kernel_object(VM_PAGE_OBJECT(m))); |
10899 | m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq); |
10900 | if (m == (vm_page_t)0) { |
10901 | break; |
10902 | } |
10903 | } while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m)); |
10904 | vm_page_unlock_queues(); |
10905 | |
10906 | vm_page_lock_queues(); |
10907 | m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled); |
10908 | do { |
10909 | if (m == (vm_page_t)0) { |
10910 | break; |
10911 | } |
10912 | |
10913 | dpages++; |
10914 | assert(m->vmp_dirty); |
10915 | assert(!m->vmp_free_when_done); |
10916 | assert(!is_kernel_object(VM_PAGE_OBJECT(m))); |
10917 | m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq); |
10918 | if (m == (vm_page_t)0) { |
10919 | break; |
10920 | } |
10921 | } while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m)); |
10922 | vm_page_unlock_queues(); |
10923 | |
10924 | vm_page_lock_queues(); |
10925 | m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous); |
10926 | do { |
10927 | if (m == (vm_page_t)0) { |
10928 | break; |
10929 | } |
10930 | |
10931 | if (m->vmp_dirty) { |
10932 | dpages++; |
10933 | } |
10934 | if (m->vmp_free_when_done) { |
10935 | pgopages++; |
10936 | } |
10937 | if (m->vmp_precious) { |
10938 | precpages++; |
10939 | } |
10940 | |
10941 | assert(!is_kernel_object(VM_PAGE_OBJECT(m))); |
10942 | m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq); |
10943 | if (m == (vm_page_t)0) { |
10944 | break; |
10945 | } |
10946 | } while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m)); |
10947 | vm_page_unlock_queues(); |
10948 | |
10949 | printf(format: "IN Q: %d : %d : %d\n" , dpages, pgopages, precpages); |
10950 | |
10951 | dpages = 0; |
10952 | pgopages = 0; |
10953 | precpages = 0; |
10954 | |
10955 | vm_page_lock_queues(); |
10956 | m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active); |
10957 | |
10958 | do { |
10959 | if (m == (vm_page_t)0) { |
10960 | break; |
10961 | } |
10962 | if (m->vmp_dirty) { |
10963 | dpages++; |
10964 | } |
10965 | if (m->vmp_free_when_done) { |
10966 | pgopages++; |
10967 | } |
10968 | if (m->vmp_precious) { |
10969 | precpages++; |
10970 | } |
10971 | |
10972 | assert(!is_kernel_object(VM_PAGE_OBJECT(m))); |
10973 | m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq); |
10974 | if (m == (vm_page_t)0) { |
10975 | break; |
10976 | } |
10977 | } while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m)); |
10978 | vm_page_unlock_queues(); |
10979 | |
10980 | printf(format: "AC Q: %d : %d : %d\n" , dpages, pgopages, precpages); |
10981 | } |
10982 | #endif /* MACH_BSD */ |
10983 | |
10984 | |
10985 | #if CONFIG_IOSCHED |
10986 | int |
10987 | upl_get_cached_tier(upl_t upl) |
10988 | { |
10989 | assert(upl); |
10990 | if (upl->flags & UPL_TRACKED_BY_OBJECT) { |
10991 | return upl->upl_priority; |
10992 | } |
10993 | return -1; |
10994 | } |
10995 | #endif /* CONFIG_IOSCHED */ |
10996 | |
10997 | |
10998 | void |
10999 | upl_callout_iodone(upl_t upl) |
11000 | { |
11001 | struct upl_io_completion *upl_ctx = upl->upl_iodone; |
11002 | |
11003 | if (upl_ctx) { |
11004 | void (*iodone_func)(void *, int) = upl_ctx->io_done; |
11005 | |
11006 | assert(upl_ctx->io_done); |
11007 | |
11008 | (*iodone_func)(upl_ctx->io_context, upl_ctx->io_error); |
11009 | } |
11010 | } |
11011 | |
11012 | void |
11013 | upl_set_iodone(upl_t upl, void *upl_iodone) |
11014 | { |
11015 | upl->upl_iodone = (struct upl_io_completion *)upl_iodone; |
11016 | } |
11017 | |
11018 | void |
11019 | upl_set_iodone_error(upl_t upl, int error) |
11020 | { |
11021 | struct upl_io_completion *upl_ctx = upl->upl_iodone; |
11022 | |
11023 | if (upl_ctx) { |
11024 | upl_ctx->io_error = error; |
11025 | } |
11026 | } |
11027 | |
11028 | |
11029 | ppnum_t |
11030 | upl_get_highest_page( |
11031 | upl_t upl) |
11032 | { |
11033 | return upl->highest_page; |
11034 | } |
11035 | |
11036 | upl_size_t |
11037 | upl_get_size( |
11038 | upl_t upl) |
11039 | { |
11040 | return upl_adjusted_size(upl, PAGE_MASK); |
11041 | } |
11042 | |
11043 | upl_size_t |
11044 | upl_adjusted_size( |
11045 | upl_t upl, |
11046 | vm_map_offset_t pgmask) |
11047 | { |
11048 | vm_object_offset_t start_offset, end_offset; |
11049 | |
11050 | start_offset = trunc_page_mask_64(upl->u_offset, pgmask); |
11051 | end_offset = round_page_mask_64(upl->u_offset + upl->u_size, pgmask); |
11052 | |
11053 | return (upl_size_t)(end_offset - start_offset); |
11054 | } |
11055 | |
11056 | vm_object_offset_t |
11057 | upl_adjusted_offset( |
11058 | upl_t upl, |
11059 | vm_map_offset_t pgmask) |
11060 | { |
11061 | return trunc_page_mask_64(upl->u_offset, pgmask); |
11062 | } |
11063 | |
11064 | vm_object_offset_t |
11065 | upl_get_data_offset( |
11066 | upl_t upl) |
11067 | { |
11068 | return upl->u_offset - upl_adjusted_offset(upl, PAGE_MASK); |
11069 | } |
11070 | |
11071 | upl_t |
11072 | upl_associated_upl(upl_t upl) |
11073 | { |
11074 | return upl->associated_upl; |
11075 | } |
11076 | |
11077 | void |
11078 | upl_set_associated_upl(upl_t upl, upl_t associated_upl) |
11079 | { |
11080 | upl->associated_upl = associated_upl; |
11081 | } |
11082 | |
11083 | struct vnode * |
11084 | upl_lookup_vnode(upl_t upl) |
11085 | { |
11086 | if (!upl->map_object->internal) { |
11087 | return vnode_pager_lookup_vnode(upl->map_object->pager); |
11088 | } else { |
11089 | return NULL; |
11090 | } |
11091 | } |
11092 | |
11093 | #if UPL_DEBUG |
11094 | kern_return_t |
11095 | upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2) |
11096 | { |
11097 | upl->ubc_alias1 = alias1; |
11098 | upl->ubc_alias2 = alias2; |
11099 | return KERN_SUCCESS; |
11100 | } |
11101 | int |
11102 | upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2) |
11103 | { |
11104 | if (al) { |
11105 | *al = upl->ubc_alias1; |
11106 | } |
11107 | if (al2) { |
11108 | *al2 = upl->ubc_alias2; |
11109 | } |
11110 | return KERN_SUCCESS; |
11111 | } |
11112 | #endif /* UPL_DEBUG */ |
11113 | |
11114 | #if VM_PRESSURE_EVENTS |
11115 | /* |
11116 | * Upward trajectory. |
11117 | */ |
11118 | extern boolean_t vm_compressor_low_on_space(void); |
11119 | |
11120 | boolean_t |
11121 | VM_PRESSURE_NORMAL_TO_WARNING(void) |
11122 | { |
11123 | if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) { |
11124 | /* Available pages below our threshold */ |
11125 | if (memorystatus_available_pages < memorystatus_available_pages_pressure) { |
11126 | /* No frozen processes to kill */ |
11127 | if (memorystatus_frozen_count == 0) { |
11128 | /* Not enough suspended processes available. */ |
11129 | if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) { |
11130 | return TRUE; |
11131 | } |
11132 | } |
11133 | } |
11134 | return FALSE; |
11135 | } else { |
11136 | return (AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0; |
11137 | } |
11138 | } |
11139 | |
11140 | boolean_t |
11141 | VM_PRESSURE_WARNING_TO_CRITICAL(void) |
11142 | { |
11143 | if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) { |
11144 | /* Available pages below our threshold */ |
11145 | if (memorystatus_available_pages < memorystatus_available_pages_critical) { |
11146 | return TRUE; |
11147 | } |
11148 | return FALSE; |
11149 | } else { |
11150 | return vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0; |
11151 | } |
11152 | } |
11153 | |
11154 | /* |
11155 | * Downward trajectory. |
11156 | */ |
11157 | boolean_t |
11158 | VM_PRESSURE_WARNING_TO_NORMAL(void) |
11159 | { |
11160 | if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) { |
11161 | /* Available pages above our threshold */ |
11162 | unsigned int target_threshold = (unsigned int) (memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100)); |
11163 | if (memorystatus_available_pages > target_threshold) { |
11164 | return TRUE; |
11165 | } |
11166 | return FALSE; |
11167 | } else { |
11168 | return (AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0; |
11169 | } |
11170 | } |
11171 | |
11172 | boolean_t |
11173 | VM_PRESSURE_CRITICAL_TO_WARNING(void) |
11174 | { |
11175 | if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) { |
11176 | /* Available pages above our threshold */ |
11177 | unsigned int target_threshold = (unsigned int)(memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100)); |
11178 | if (memorystatus_available_pages > target_threshold) { |
11179 | return TRUE; |
11180 | } |
11181 | return FALSE; |
11182 | } else { |
11183 | return (AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0; |
11184 | } |
11185 | } |
11186 | #endif /* VM_PRESSURE_EVENTS */ |
11187 | |
11188 | #if DEVELOPMENT || DEBUG |
11189 | bool compressor_running_perf_test; |
11190 | uint64_t compressor_perf_test_pages_processed; |
11191 | |
11192 | kern_return_t |
11193 | run_compressor_perf_test( |
11194 | user_addr_t buf, |
11195 | size_t buffer_size, |
11196 | uint64_t *time, |
11197 | uint64_t *bytes_compressed, |
11198 | uint64_t *compressor_growth); |
11199 | |
11200 | static kern_return_t |
11201 | move_pages_to_queue( |
11202 | vm_map_t map, |
11203 | user_addr_t start_addr, |
11204 | size_t buffer_size, |
11205 | vm_page_queue_head_t *queue, |
11206 | size_t *pages_moved) |
11207 | { |
11208 | kern_return_t err = KERN_SUCCESS; |
11209 | vm_map_entry_t curr_entry = VM_MAP_ENTRY_NULL; |
11210 | boolean_t addr_in_map = FALSE; |
11211 | user_addr_t end_addr = USER_ADDR_NULL, curr_addr = USER_ADDR_NULL; |
11212 | vm_object_t curr_object = VM_OBJECT_NULL; |
11213 | *pages_moved = 0; |
11214 | |
11215 | |
11216 | if (VM_MAP_PAGE_SIZE(map) != PAGE_SIZE_64) { |
11217 | /* |
11218 | * We don't currently support benchmarking maps with a different page size |
11219 | * than the kernel. |
11220 | */ |
11221 | return KERN_INVALID_ARGUMENT; |
11222 | } |
11223 | |
11224 | if (os_add_overflow(start_addr, buffer_size, &end_addr)) { |
11225 | return KERN_INVALID_ARGUMENT; |
11226 | } |
11227 | |
11228 | vm_map_lock_read(map); |
11229 | curr_addr = vm_map_trunc_page_mask(start_addr, VM_MAP_PAGE_MASK(map)); |
11230 | end_addr = vm_map_round_page_mask(start_addr + buffer_size, VM_MAP_PAGE_MASK(map)); |
11231 | |
11232 | |
11233 | while (curr_addr < end_addr) { |
11234 | addr_in_map = vm_map_lookup_entry(map, curr_addr, &curr_entry); |
11235 | if (!addr_in_map) { |
11236 | err = KERN_INVALID_ARGUMENT; |
11237 | break; |
11238 | } |
11239 | curr_object = VME_OBJECT(curr_entry); |
11240 | if (curr_object) { |
11241 | vm_object_lock(curr_object); |
11242 | /* We really only want anonymous memory that's in the top level map and object here. */ |
11243 | if (curr_entry->is_sub_map || curr_entry->wired_count != 0 || |
11244 | curr_object->shadow != VM_OBJECT_NULL || !curr_object->internal) { |
11245 | err = KERN_INVALID_ARGUMENT; |
11246 | vm_object_unlock(curr_object); |
11247 | break; |
11248 | } |
11249 | vm_map_offset_t start_offset = (curr_addr - curr_entry->vme_start) + VME_OFFSET(curr_entry); |
11250 | vm_map_offset_t end_offset = MIN(curr_entry->vme_end, end_addr) - |
11251 | (curr_entry->vme_start + VME_OFFSET(curr_entry)); |
11252 | vm_map_offset_t curr_offset = start_offset; |
11253 | vm_page_t curr_page; |
11254 | while (curr_offset < end_offset) { |
11255 | curr_page = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset)); |
11256 | if (curr_page != VM_PAGE_NULL) { |
11257 | vm_page_lock_queues(); |
11258 | if (curr_page->vmp_laundry) { |
11259 | vm_pageout_steal_laundry(curr_page, TRUE); |
11260 | } |
11261 | /* |
11262 | * we've already factored out pages in the laundry which |
11263 | * means this page can't be on the pageout queue so it's |
11264 | * safe to do the vm_page_queues_remove |
11265 | */ |
11266 | bool donate = (curr_page->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE); |
11267 | vm_page_queues_remove(curr_page, TRUE); |
11268 | if (donate) { |
11269 | /* |
11270 | * The compressor needs to see this bit to know |
11271 | * where this page needs to land. Also if stolen, |
11272 | * this bit helps put the page back in the right |
11273 | * special queue where it belongs. |
11274 | */ |
11275 | curr_page->vmp_on_specialq = VM_PAGE_SPECIAL_Q_DONATE; |
11276 | } |
11277 | // Clear the referenced bit so we ensure this gets paged out |
11278 | curr_page->vmp_reference = false; |
11279 | if (curr_page->vmp_pmapped) { |
11280 | pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(curr_page), |
11281 | VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void*)NULL); |
11282 | } |
11283 | vm_page_queue_enter(queue, curr_page, vmp_pageq); |
11284 | vm_page_unlock_queues(); |
11285 | *pages_moved += 1; |
11286 | } |
11287 | curr_offset += PAGE_SIZE_64; |
11288 | curr_addr += PAGE_SIZE_64; |
11289 | } |
11290 | } |
11291 | vm_object_unlock(curr_object); |
11292 | } |
11293 | vm_map_unlock_read(map); |
11294 | return err; |
11295 | } |
11296 | |
11297 | /* |
11298 | * Local queue for processing benchmark pages. |
11299 | * Can't be allocated on the stack because the pointer has to |
11300 | * be packable. |
11301 | */ |
11302 | vm_page_queue_head_t compressor_perf_test_queue VM_PAGE_PACKED_ALIGNED; |
11303 | kern_return_t |
11304 | run_compressor_perf_test( |
11305 | user_addr_t buf, |
11306 | size_t buffer_size, |
11307 | uint64_t *time, |
11308 | uint64_t *bytes_compressed, |
11309 | uint64_t *compressor_growth) |
11310 | { |
11311 | kern_return_t err = KERN_SUCCESS; |
11312 | if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) { |
11313 | return KERN_NOT_SUPPORTED; |
11314 | } |
11315 | if (current_task() == kernel_task) { |
11316 | return KERN_INVALID_ARGUMENT; |
11317 | } |
11318 | vm_page_lock_queues(); |
11319 | if (compressor_running_perf_test) { |
11320 | /* Only run one instance of the benchmark at a time. */ |
11321 | vm_page_unlock_queues(); |
11322 | return KERN_RESOURCE_SHORTAGE; |
11323 | } |
11324 | vm_page_unlock_queues(); |
11325 | size_t page_count = 0; |
11326 | vm_map_t map; |
11327 | vm_page_t p, next; |
11328 | uint64_t compressor_perf_test_start = 0, compressor_perf_test_end = 0; |
11329 | uint64_t compressed_bytes_start = 0, compressed_bytes_end = 0; |
11330 | *bytes_compressed = *compressor_growth = 0; |
11331 | |
11332 | vm_page_queue_init(&compressor_perf_test_queue); |
11333 | map = current_task()->map; |
11334 | err = move_pages_to_queue(map, buf, buffer_size, &compressor_perf_test_queue, &page_count); |
11335 | if (err != KERN_SUCCESS) { |
11336 | goto out; |
11337 | } |
11338 | |
11339 | vm_page_lock_queues(); |
11340 | compressor_running_perf_test = true; |
11341 | compressor_perf_test_pages_processed = 0; |
11342 | /* |
11343 | * At this point the compressor threads should only process the benchmark queue |
11344 | * so we can look at the difference in c_segment_compressed_bytes while the perf test is running |
11345 | * to determine how many compressed bytes we ended up using. |
11346 | */ |
11347 | compressed_bytes_start = c_segment_compressed_bytes; |
11348 | vm_page_unlock_queues(); |
11349 | |
11350 | page_count = vm_pageout_page_queue(&compressor_perf_test_queue, page_count, true); |
11351 | |
11352 | vm_page_lock_queues(); |
11353 | compressor_perf_test_start = mach_absolute_time(); |
11354 | |
11355 | // Wake up the compressor thread(s) |
11356 | sched_cond_signal(&pgo_iothread_internal_state[0].pgo_wakeup, |
11357 | pgo_iothread_internal_state[0].pgo_iothread); |
11358 | |
11359 | /* |
11360 | * Depending on when this test is run we could overshoot or be right on the mark |
11361 | * with our page_count. So the comparison is of the _less than_ variety. |
11362 | */ |
11363 | while (compressor_perf_test_pages_processed < page_count) { |
11364 | assert_wait((event_t) &compressor_perf_test_pages_processed, THREAD_UNINT); |
11365 | vm_page_unlock_queues(); |
11366 | thread_block(THREAD_CONTINUE_NULL); |
11367 | vm_page_lock_queues(); |
11368 | } |
11369 | compressor_perf_test_end = mach_absolute_time(); |
11370 | compressed_bytes_end = c_segment_compressed_bytes; |
11371 | vm_page_unlock_queues(); |
11372 | |
11373 | |
11374 | out: |
11375 | /* |
11376 | * If we errored out above, then we could still have some pages |
11377 | * on the local queue. Make sure to put them back on the active queue before |
11378 | * returning so they're not orphaned. |
11379 | */ |
11380 | vm_page_lock_queues(); |
11381 | absolutetime_to_nanoseconds(compressor_perf_test_end - compressor_perf_test_start, time); |
11382 | p = (vm_page_t) vm_page_queue_first(&compressor_perf_test_queue); |
11383 | while (p && !vm_page_queue_end(&compressor_perf_test_queue, (vm_page_queue_entry_t)p)) { |
11384 | next = (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.next); |
11385 | |
11386 | vm_page_enqueue_active(p, FALSE); |
11387 | p = next; |
11388 | } |
11389 | |
11390 | compressor_running_perf_test = false; |
11391 | vm_page_unlock_queues(); |
11392 | if (err == KERN_SUCCESS) { |
11393 | *bytes_compressed = page_count * PAGE_SIZE_64; |
11394 | *compressor_growth = compressed_bytes_end - compressed_bytes_start; |
11395 | } |
11396 | |
11397 | /* |
11398 | * pageout_scan will consider waking the compactor swapper |
11399 | * before it blocks. Do the same thing here before we return |
11400 | * to ensure that back to back benchmark runs can't overly fragment the |
11401 | * compressor pool. |
11402 | */ |
11403 | vm_consider_waking_compactor_swapper(); |
11404 | return err; |
11405 | } |
11406 | #endif /* DEVELOPMENT || DEBUG */ |
11407 | |