1/*
2 * Copyright (c) 2014-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/errno.h>
30
31#include <mach/mach_types.h>
32#include <mach/mach_traps.h>
33#include <mach/host_priv.h>
34#include <mach/kern_return.h>
35#include <mach/memory_object_control.h>
36#include <mach/memory_object_types.h>
37#include <mach/port.h>
38#include <mach/policy.h>
39#include <mach/upl.h>
40#include <mach/thread_act.h>
41#include <mach/mach_vm.h>
42
43#include <kern/host.h>
44#include <kern/kalloc.h>
45#include <kern/page_decrypt.h>
46#include <kern/queue.h>
47#include <kern/thread.h>
48#include <kern/ipc_kobject.h>
49
50#include <sys/kdebug_triage.h>
51
52#include <ipc/ipc_port.h>
53#include <ipc/ipc_space.h>
54
55#include <vm/vm_fault.h>
56#include <vm/vm_map.h>
57#include <vm/vm_pageout.h>
58#include <vm/memory_object.h>
59#include <vm/vm_pageout.h>
60#include <vm/vm_protos.h>
61#include <vm/vm_kern.h>
62
63
64/*
65 * 4K MEMORY PAGER
66 *
67 * This external memory manager (EMM) handles memory mappings that are
68 * 4K-aligned but not page-aligned and can therefore not be mapped directly.
69 *
70 * It mostly handles page-in requests (from memory_object_data_request()) by
71 * getting the data needed to fill in each 4K-chunk. That can require
72 * getting data from one or two pages from its backing VM object
73 * (a file or a "apple-protected" pager backed by an encrypted file), and
74 * copies the data to another page so that it is aligned as expected by
75 * the mapping.
76 *
77 * Returned pages can never be dirtied and must always be mapped copy-on-write,
78 * so the memory manager does not need to handle page-out requests (from
79 * memory_object_data_return()).
80 *
81 */
82
83/* forward declarations */
84void fourk_pager_reference(memory_object_t mem_obj);
85void fourk_pager_deallocate(memory_object_t mem_obj);
86kern_return_t fourk_pager_init(memory_object_t mem_obj,
87 memory_object_control_t control,
88 memory_object_cluster_size_t pg_size);
89kern_return_t fourk_pager_terminate(memory_object_t mem_obj);
90kern_return_t fourk_pager_data_request(memory_object_t mem_obj,
91 memory_object_offset_t offset,
92 memory_object_cluster_size_t length,
93 vm_prot_t protection_required,
94 memory_object_fault_info_t fault_info);
95kern_return_t fourk_pager_data_return(memory_object_t mem_obj,
96 memory_object_offset_t offset,
97 memory_object_cluster_size_t data_cnt,
98 memory_object_offset_t *resid_offset,
99 int *io_error,
100 boolean_t dirty,
101 boolean_t kernel_copy,
102 int upl_flags);
103kern_return_t fourk_pager_data_initialize(memory_object_t mem_obj,
104 memory_object_offset_t offset,
105 memory_object_cluster_size_t data_cnt);
106kern_return_t fourk_pager_map(memory_object_t mem_obj,
107 vm_prot_t prot);
108kern_return_t fourk_pager_last_unmap(memory_object_t mem_obj);
109
110/*
111 * Vector of VM operations for this EMM.
112 * These routines are invoked by VM via the memory_object_*() interfaces.
113 */
114const struct memory_object_pager_ops fourk_pager_ops = {
115 .memory_object_reference = fourk_pager_reference,
116 .memory_object_deallocate = fourk_pager_deallocate,
117 .memory_object_init = fourk_pager_init,
118 .memory_object_terminate = fourk_pager_terminate,
119 .memory_object_data_request = fourk_pager_data_request,
120 .memory_object_data_return = fourk_pager_data_return,
121 .memory_object_data_initialize = fourk_pager_data_initialize,
122 .memory_object_map = fourk_pager_map,
123 .memory_object_last_unmap = fourk_pager_last_unmap,
124 .memory_object_backing_object = NULL,
125 .memory_object_pager_name = "fourk_pager"
126};
127
128/*
129 * The "fourk_pager" describes a memory object backed by
130 * the "4K" EMM.
131 */
132#define FOURK_PAGER_SLOTS 4 /* 16K / 4K */
133typedef struct fourk_pager_backing {
134 vm_object_t backing_object;
135 vm_object_offset_t backing_offset;
136} *fourk_pager_backing_t;
137typedef struct fourk_pager {
138 /* mandatory generic header */
139 struct memory_object fourk_pgr_hdr;
140
141 /* pager-specific data */
142 queue_chain_t pager_queue; /* next & prev pagers */
143#if MEMORY_OBJECT_HAS_REFCOUNT
144#define fourk_pgr_hdr_ref fourk_pgr_hdr.mo_ref
145#else
146 os_ref_atomic_t fourk_pgr_hdr_ref;
147#endif
148 bool is_ready; /* is this pager ready ? */
149 bool is_mapped; /* is this mem_obj mapped ? */
150 struct fourk_pager_backing slots[FOURK_PAGER_SLOTS]; /* backing for each
151 * 4K-chunk */
152} *fourk_pager_t;
153#define FOURK_PAGER_NULL ((fourk_pager_t) NULL)
154
155/*
156 * List of memory objects managed by this EMM.
157 * The list is protected by the "fourk_pager_lock" lock.
158 */
159int fourk_pager_count = 0; /* number of pagers */
160int fourk_pager_count_mapped = 0; /* number of unmapped pagers */
161queue_head_t fourk_pager_queue = QUEUE_HEAD_INITIALIZER(fourk_pager_queue);
162LCK_GRP_DECLARE(fourk_pager_lck_grp, "4K-pager");
163LCK_MTX_DECLARE(fourk_pager_lock, &fourk_pager_lck_grp);
164
165/*
166 * Maximum number of unmapped pagers we're willing to keep around.
167 */
168int fourk_pager_cache_limit = 0;
169
170/*
171 * Statistics & counters.
172 */
173int fourk_pager_count_max = 0;
174int fourk_pager_count_unmapped_max = 0;
175int fourk_pager_num_trim_max = 0;
176int fourk_pager_num_trim_total = 0;
177
178/* internal prototypes */
179fourk_pager_t fourk_pager_lookup(memory_object_t mem_obj);
180void fourk_pager_dequeue(fourk_pager_t pager);
181void fourk_pager_deallocate_internal(fourk_pager_t pager,
182 boolean_t locked);
183void fourk_pager_terminate_internal(fourk_pager_t pager);
184void fourk_pager_trim(void);
185
186
187#if DEBUG
188int fourk_pagerdebug = 0;
189#define PAGER_ALL 0xffffffff
190#define PAGER_INIT 0x00000001
191#define PAGER_PAGEIN 0x00000002
192
193#define PAGER_DEBUG(LEVEL, A) \
194 MACRO_BEGIN \
195 if ((fourk_pagerdebug & LEVEL)==LEVEL) { \
196 printf A; \
197 } \
198 MACRO_END
199#else
200#define PAGER_DEBUG(LEVEL, A)
201#endif
202
203
204/*
205 * fourk_pager_init()
206 *
207 * Initialize the memory object and makes it ready to be used and mapped.
208 */
209kern_return_t
210fourk_pager_init(
211 memory_object_t mem_obj,
212 memory_object_control_t control,
213#if !DEBUG
214 __unused
215#endif
216 memory_object_cluster_size_t pg_size)
217{
218 fourk_pager_t pager;
219 kern_return_t kr;
220 memory_object_attr_info_data_t attributes;
221
222 PAGER_DEBUG(PAGER_ALL,
223 ("fourk_pager_init: %p, %p, %x\n",
224 mem_obj, control, pg_size));
225
226 if (control == MEMORY_OBJECT_CONTROL_NULL) {
227 return KERN_INVALID_ARGUMENT;
228 }
229
230 pager = fourk_pager_lookup(mem_obj);
231
232 memory_object_control_reference(control);
233
234 pager->fourk_pgr_hdr.mo_control = control;
235
236 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
237 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
238 attributes.cluster_size = (1 << (PAGE_SHIFT));
239 attributes.may_cache_object = FALSE;
240 attributes.temporary = TRUE;
241
242 kr = memory_object_change_attributes(
243 memory_control: control,
244 MEMORY_OBJECT_ATTRIBUTE_INFO,
245 attributes: (memory_object_info_t) &attributes,
246 MEMORY_OBJECT_ATTR_INFO_COUNT);
247 if (kr != KERN_SUCCESS) {
248 panic("fourk_pager_init: "
249 "memory_object_change_attributes() failed");
250 }
251
252#if CONFIG_SECLUDED_MEMORY
253 if (secluded_for_filecache) {
254 memory_object_mark_eligible_for_secluded(control, TRUE);
255 }
256#endif /* CONFIG_SECLUDED_MEMORY */
257
258 return KERN_SUCCESS;
259}
260
261/*
262 * fourk_pager_data_return()
263 *
264 * Handles page-out requests from VM. This should never happen since
265 * the pages provided by this EMM are not supposed to be dirty or dirtied
266 * and VM should simply discard the contents and reclaim the pages if it
267 * needs to.
268 */
269kern_return_t
270fourk_pager_data_return(
271 __unused memory_object_t mem_obj,
272 __unused memory_object_offset_t offset,
273 __unused memory_object_cluster_size_t data_cnt,
274 __unused memory_object_offset_t *resid_offset,
275 __unused int *io_error,
276 __unused boolean_t dirty,
277 __unused boolean_t kernel_copy,
278 __unused int upl_flags)
279{
280 panic("fourk_pager_data_return: should never get called");
281 return KERN_FAILURE;
282}
283
284kern_return_t
285fourk_pager_data_initialize(
286 __unused memory_object_t mem_obj,
287 __unused memory_object_offset_t offset,
288 __unused memory_object_cluster_size_t data_cnt)
289{
290 panic("fourk_pager_data_initialize: should never get called");
291 return KERN_FAILURE;
292}
293
294/*
295 * fourk_pager_reference()
296 *
297 * Get a reference on this memory object.
298 * For external usage only. Assumes that the initial reference count is not 0,
299 * i.e one should not "revive" a dead pager this way.
300 */
301void
302fourk_pager_reference(
303 memory_object_t mem_obj)
304{
305 fourk_pager_t pager;
306
307 pager = fourk_pager_lookup(mem_obj);
308
309 lck_mtx_lock(lck: &fourk_pager_lock);
310 os_ref_retain_locked_raw(&pager->fourk_pgr_hdr_ref, NULL);
311 lck_mtx_unlock(lck: &fourk_pager_lock);
312}
313
314
315/*
316 * fourk_pager_dequeue:
317 *
318 * Removes a pager from the list of pagers.
319 *
320 * The caller must hold "fourk_pager_lock".
321 */
322void
323fourk_pager_dequeue(
324 fourk_pager_t pager)
325{
326 assert(!pager->is_mapped);
327
328 queue_remove(&fourk_pager_queue,
329 pager,
330 fourk_pager_t,
331 pager_queue);
332 pager->pager_queue.next = NULL;
333 pager->pager_queue.prev = NULL;
334
335 fourk_pager_count--;
336}
337
338/*
339 * fourk_pager_terminate_internal:
340 *
341 * Trigger the asynchronous termination of the memory object associated
342 * with this pager.
343 * When the memory object is terminated, there will be one more call
344 * to memory_object_deallocate() (i.e. fourk_pager_deallocate())
345 * to finish the clean up.
346 *
347 * "fourk_pager_lock" should not be held by the caller.
348 * We don't need the lock because the pager has already been removed from
349 * the pagers' list and is now ours exclusively.
350 */
351void
352fourk_pager_terminate_internal(
353 fourk_pager_t pager)
354{
355 int i;
356
357 assert(pager->is_ready);
358 assert(!pager->is_mapped);
359
360 for (i = 0; i < FOURK_PAGER_SLOTS; i++) {
361 if (pager->slots[i].backing_object != VM_OBJECT_NULL &&
362 pager->slots[i].backing_object != (vm_object_t) -1) {
363 vm_object_deallocate(object: pager->slots[i].backing_object);
364 pager->slots[i].backing_object = (vm_object_t) -1;
365 pager->slots[i].backing_offset = (vm_object_offset_t) -1;
366 }
367 }
368
369 /* trigger the destruction of the memory object */
370 memory_object_destroy(memory_control: pager->fourk_pgr_hdr.mo_control, reason: 0);
371}
372
373/*
374 * fourk_pager_deallocate_internal()
375 *
376 * Release a reference on this pager and free it when the last
377 * reference goes away.
378 * Can be called with fourk_pager_lock held or not but always returns
379 * with it unlocked.
380 */
381void
382fourk_pager_deallocate_internal(
383 fourk_pager_t pager,
384 boolean_t locked)
385{
386 boolean_t needs_trimming;
387 int count_unmapped;
388 os_ref_count_t ref_count;
389
390 if (!locked) {
391 lck_mtx_lock(lck: &fourk_pager_lock);
392 }
393
394 count_unmapped = (fourk_pager_count -
395 fourk_pager_count_mapped);
396 if (count_unmapped > fourk_pager_cache_limit) {
397 /* we have too many unmapped pagers: trim some */
398 needs_trimming = TRUE;
399 } else {
400 needs_trimming = FALSE;
401 }
402
403 /* drop a reference on this pager */
404 ref_count = os_ref_release_locked_raw(&pager->fourk_pgr_hdr_ref, NULL);
405
406 if (ref_count == 1) {
407 /*
408 * Only the "named" reference is left, which means that
409 * no one is really holding on to this pager anymore.
410 * Terminate it.
411 */
412 fourk_pager_dequeue(pager);
413 /* the pager is all ours: no need for the lock now */
414 lck_mtx_unlock(lck: &fourk_pager_lock);
415 fourk_pager_terminate_internal(pager);
416 } else if (ref_count == 0) {
417 /*
418 * Dropped the existence reference; the memory object has
419 * been terminated. Do some final cleanup and release the
420 * pager structure.
421 */
422 lck_mtx_unlock(lck: &fourk_pager_lock);
423 if (pager->fourk_pgr_hdr.mo_control != MEMORY_OBJECT_CONTROL_NULL) {
424 memory_object_control_deallocate(control: pager->fourk_pgr_hdr.mo_control);
425 pager->fourk_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
426 }
427 kfree_type(struct fourk_pager, pager);
428 pager = FOURK_PAGER_NULL;
429 } else {
430 /* there are still plenty of references: keep going... */
431 lck_mtx_unlock(lck: &fourk_pager_lock);
432 }
433
434 if (needs_trimming) {
435 fourk_pager_trim();
436 }
437 /* caution: lock is not held on return... */
438}
439
440/*
441 * fourk_pager_deallocate()
442 *
443 * Release a reference on this pager and free it when the last
444 * reference goes away.
445 */
446void
447fourk_pager_deallocate(
448 memory_object_t mem_obj)
449{
450 fourk_pager_t pager;
451
452 PAGER_DEBUG(PAGER_ALL, ("fourk_pager_deallocate: %p\n", mem_obj));
453 pager = fourk_pager_lookup(mem_obj);
454 fourk_pager_deallocate_internal(pager, FALSE);
455}
456
457/*
458 *
459 */
460kern_return_t
461fourk_pager_terminate(
462#if !DEBUG
463 __unused
464#endif
465 memory_object_t mem_obj)
466{
467 PAGER_DEBUG(PAGER_ALL, ("fourk_pager_terminate: %p\n", mem_obj));
468
469 return KERN_SUCCESS;
470}
471
472/*
473 * fourk_pager_map()
474 *
475 * This allows VM to let us, the EMM, know that this memory object
476 * is currently mapped one or more times. This is called by VM each time
477 * the memory object gets mapped and we take one extra reference on the
478 * memory object to account for all its mappings.
479 */
480kern_return_t
481fourk_pager_map(
482 memory_object_t mem_obj,
483 __unused vm_prot_t prot)
484{
485 fourk_pager_t pager;
486
487 PAGER_DEBUG(PAGER_ALL, ("fourk_pager_map: %p\n", mem_obj));
488
489 pager = fourk_pager_lookup(mem_obj);
490
491 lck_mtx_lock(lck: &fourk_pager_lock);
492 assert(pager->is_ready);
493 assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) > 0); /* pager is alive */
494 if (pager->is_mapped == FALSE) {
495 /*
496 * First mapping of this pager: take an extra reference
497 * that will remain until all the mappings of this pager
498 * are removed.
499 */
500 pager->is_mapped = TRUE;
501 os_ref_retain_locked_raw(&pager->fourk_pgr_hdr_ref, NULL);
502 fourk_pager_count_mapped++;
503 }
504 lck_mtx_unlock(lck: &fourk_pager_lock);
505
506 return KERN_SUCCESS;
507}
508
509/*
510 * fourk_pager_last_unmap()
511 *
512 * This is called by VM when this memory object is no longer mapped anywhere.
513 */
514kern_return_t
515fourk_pager_last_unmap(
516 memory_object_t mem_obj)
517{
518 fourk_pager_t pager;
519 int count_unmapped;
520
521 PAGER_DEBUG(PAGER_ALL,
522 ("fourk_pager_last_unmap: %p\n", mem_obj));
523
524 pager = fourk_pager_lookup(mem_obj);
525
526 lck_mtx_lock(lck: &fourk_pager_lock);
527 if (pager->is_mapped) {
528 /*
529 * All the mappings are gone, so let go of the one extra
530 * reference that represents all the mappings of this pager.
531 */
532 fourk_pager_count_mapped--;
533 count_unmapped = (fourk_pager_count -
534 fourk_pager_count_mapped);
535 if (count_unmapped > fourk_pager_count_unmapped_max) {
536 fourk_pager_count_unmapped_max = count_unmapped;
537 }
538 pager->is_mapped = FALSE;
539 fourk_pager_deallocate_internal(pager, TRUE);
540 /* caution: deallocate_internal() released the lock ! */
541 } else {
542 lck_mtx_unlock(lck: &fourk_pager_lock);
543 }
544
545 return KERN_SUCCESS;
546}
547
548
549/*
550 *
551 */
552fourk_pager_t
553fourk_pager_lookup(
554 memory_object_t mem_obj)
555{
556 fourk_pager_t pager;
557
558 assert(mem_obj->mo_pager_ops == &fourk_pager_ops);
559 pager = (fourk_pager_t) mem_obj;
560 assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) > 0);
561 return pager;
562}
563
564void
565fourk_pager_trim(void)
566{
567 fourk_pager_t pager, prev_pager;
568 queue_head_t trim_queue;
569 int num_trim;
570 int count_unmapped;
571
572 lck_mtx_lock(lck: &fourk_pager_lock);
573
574 /*
575 * We have too many pagers, try and trim some unused ones,
576 * starting with the oldest pager at the end of the queue.
577 */
578 queue_init(&trim_queue);
579 num_trim = 0;
580
581 for (pager = (fourk_pager_t)
582 queue_last(&fourk_pager_queue);
583 !queue_end(&fourk_pager_queue,
584 (queue_entry_t) pager);
585 pager = prev_pager) {
586 /* get prev elt before we dequeue */
587 prev_pager = (fourk_pager_t)
588 queue_prev(&pager->pager_queue);
589
590 if (os_ref_get_count_raw(rc: &pager->fourk_pgr_hdr_ref) == 2 &&
591 pager->is_ready &&
592 !pager->is_mapped) {
593 /* this pager can be trimmed */
594 num_trim++;
595 /* remove this pager from the main list ... */
596 fourk_pager_dequeue(pager);
597 /* ... and add it to our trim queue */
598 queue_enter_first(&trim_queue,
599 pager,
600 fourk_pager_t,
601 pager_queue);
602
603 count_unmapped = (fourk_pager_count -
604 fourk_pager_count_mapped);
605 if (count_unmapped <= fourk_pager_cache_limit) {
606 /* we have enough pagers to trim */
607 break;
608 }
609 }
610 }
611 if (num_trim > fourk_pager_num_trim_max) {
612 fourk_pager_num_trim_max = num_trim;
613 }
614 fourk_pager_num_trim_total += num_trim;
615
616 lck_mtx_unlock(lck: &fourk_pager_lock);
617
618 /* terminate the trimmed pagers */
619 while (!queue_empty(&trim_queue)) {
620 queue_remove_first(&trim_queue,
621 pager,
622 fourk_pager_t,
623 pager_queue);
624 pager->pager_queue.next = NULL;
625 pager->pager_queue.prev = NULL;
626 assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) == 2);
627 /*
628 * We can't call deallocate_internal() because the pager
629 * has already been dequeued, but we still need to remove
630 * a reference.
631 */
632 (void)os_ref_release_locked_raw(&pager->fourk_pgr_hdr_ref, NULL);
633 fourk_pager_terminate_internal(pager);
634 }
635}
636
637
638
639
640
641
642vm_object_t
643fourk_pager_to_vm_object(
644 memory_object_t mem_obj)
645{
646 fourk_pager_t pager;
647 vm_object_t object;
648
649 pager = fourk_pager_lookup(mem_obj);
650 if (pager == NULL) {
651 return VM_OBJECT_NULL;
652 }
653
654 assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) > 0);
655 assert(pager->fourk_pgr_hdr.mo_control != MEMORY_OBJECT_CONTROL_NULL);
656 object = memory_object_control_to_vm_object(control: pager->fourk_pgr_hdr.mo_control);
657 assert(object != VM_OBJECT_NULL);
658 return object;
659}
660
661memory_object_t
662fourk_pager_create(void)
663{
664 fourk_pager_t pager;
665 memory_object_control_t control;
666 kern_return_t kr;
667 int i;
668
669#if 00
670 if (PAGE_SIZE_64 == FOURK_PAGE_SIZE) {
671 panic("fourk_pager_create: page size is 4K !?");
672 }
673#endif
674
675 pager = kalloc_type(struct fourk_pager, Z_WAITOK | Z_ZERO | Z_NOFAIL);
676
677 /*
678 * The vm_map call takes both named entry ports and raw memory
679 * objects in the same parameter. We need to make sure that
680 * vm_map does not see this object as a named entry port. So,
681 * we reserve the first word in the object for a fake ip_kotype
682 * setting - that will tell vm_map to use it as a memory object.
683 */
684 pager->fourk_pgr_hdr.mo_ikot = IKOT_MEMORY_OBJECT;
685 pager->fourk_pgr_hdr.mo_pager_ops = &fourk_pager_ops;
686 pager->fourk_pgr_hdr.mo_control = MEMORY_OBJECT_CONTROL_NULL;
687
688 os_ref_init_count_raw(&pager->fourk_pgr_hdr_ref, NULL, 2); /* existence + setup reference */
689 pager->is_ready = FALSE; /* not ready until it has a "name" */
690 pager->is_mapped = FALSE;
691
692 for (i = 0; i < FOURK_PAGER_SLOTS; i++) {
693 pager->slots[i].backing_object = (vm_object_t) -1;
694 pager->slots[i].backing_offset = (vm_object_offset_t) -1;
695 }
696
697 lck_mtx_lock(lck: &fourk_pager_lock);
698
699 /* enter new pager at the head of our list of pagers */
700 queue_enter_first(&fourk_pager_queue,
701 pager,
702 fourk_pager_t,
703 pager_queue);
704 fourk_pager_count++;
705 if (fourk_pager_count > fourk_pager_count_max) {
706 fourk_pager_count_max = fourk_pager_count;
707 }
708 lck_mtx_unlock(lck: &fourk_pager_lock);
709
710 kr = memory_object_create_named(pager: (memory_object_t) pager,
711 size: 0,
712 control: &control);
713 assert(kr == KERN_SUCCESS);
714
715 memory_object_mark_trusted(control);
716
717 lck_mtx_lock(lck: &fourk_pager_lock);
718 /* the new pager is now ready to be used */
719 pager->is_ready = TRUE;
720 lck_mtx_unlock(lck: &fourk_pager_lock);
721
722 /* wakeup anyone waiting for this pager to be ready */
723 thread_wakeup(&pager->is_ready);
724
725 return (memory_object_t) pager;
726}
727
728/*
729 * fourk_pager_data_request()
730 *
731 * Handles page-in requests from VM.
732 */
733int fourk_pager_data_request_debug = 0;
734kern_return_t
735fourk_pager_data_request(
736 memory_object_t mem_obj,
737 memory_object_offset_t offset,
738 memory_object_cluster_size_t length,
739#if !DEBUG
740 __unused
741#endif
742 vm_prot_t protection_required,
743 memory_object_fault_info_t mo_fault_info)
744{
745 fourk_pager_t pager;
746 memory_object_control_t mo_control;
747 upl_t upl;
748 int upl_flags;
749 upl_size_t upl_size;
750 upl_page_info_t *upl_pl;
751 unsigned int pl_count;
752 vm_object_t dst_object;
753 kern_return_t kr, retval;
754 vm_offset_t kernel_mapping;
755 vm_offset_t src_vaddr, dst_vaddr;
756 vm_offset_t cur_offset;
757 int sub_page;
758 int sub_page_idx, sub_page_cnt;
759
760 pager = fourk_pager_lookup(mem_obj);
761 assert(pager->is_ready);
762 assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) > 1); /* pager is alive and mapped */
763
764 PAGER_DEBUG(PAGER_PAGEIN, ("fourk_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager));
765
766 retval = KERN_SUCCESS;
767 kernel_mapping = 0;
768
769 offset = memory_object_trunc_page(offset);
770
771 /*
772 * Gather in a UPL all the VM pages requested by VM.
773 */
774 mo_control = pager->fourk_pgr_hdr.mo_control;
775
776 upl_size = length;
777 upl_flags =
778 UPL_RET_ONLY_ABSENT |
779 UPL_SET_LITE |
780 UPL_NO_SYNC |
781 UPL_CLEAN_IN_PLACE | /* triggers UPL_CLEAR_DIRTY */
782 UPL_SET_INTERNAL;
783 pl_count = 0;
784 kr = memory_object_upl_request(memory_control: mo_control,
785 offset, size: upl_size,
786 upl: &upl, NULL, NULL, cntrl_flags: upl_flags, VM_KERN_MEMORY_NONE);
787 if (kr != KERN_SUCCESS) {
788 retval = kr;
789 goto done;
790 }
791 dst_object = memory_object_control_to_vm_object(control: mo_control);
792 assert(dst_object != VM_OBJECT_NULL);
793
794#if __x86_64__ || __arm64__
795 /* use the 1-to-1 mapping of physical memory */
796#else /* __x86_64__ || __arm64__ */
797 /*
798 * Reserve 2 virtual pages in the kernel address space to map the
799 * source and destination physical pages when it's their turn to
800 * be processed.
801 */
802
803 kr = kmem_alloc(kernel_map, &kernel_mapping, ptoa(2),
804 KMA_DATA | KMA_KOBJECT | KMA_PAGEABLE, VM_KERN_MEMORY_NONE);
805 if (kr != KERN_SUCCESS) {
806 retval = kr;
807 goto done;
808 }
809 src_vaddr = kernel_mapping;
810 dst_vaddr = kernel_mapping + PAGE_SIZE;
811#endif /* __x86_64__ || __arm64__ */
812
813 /*
814 * Fill in the contents of the pages requested by VM.
815 */
816 upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
817 pl_count = length / PAGE_SIZE;
818 for (cur_offset = 0;
819 retval == KERN_SUCCESS && cur_offset < length;
820 cur_offset += PAGE_SIZE) {
821 ppnum_t dst_pnum;
822 int num_subpg_signed, num_subpg_validated;
823 int num_subpg_tainted, num_subpg_nx;
824
825 if (!upl_page_present(upl: upl_pl, index: (int)(cur_offset / PAGE_SIZE))) {
826 /* this page is not in the UPL: skip it */
827 continue;
828 }
829
830 /*
831 * Establish an explicit pmap mapping of the destination
832 * physical page.
833 * We can't do a regular VM mapping because the VM page
834 * is "busy".
835 */
836 dst_pnum = (ppnum_t)
837 upl_phys_page(upl: upl_pl, index: (int)(cur_offset / PAGE_SIZE));
838 assert(dst_pnum != 0);
839 dst_vaddr = (vm_map_offset_t)
840 phystokv(pa: (pmap_paddr_t)dst_pnum << PAGE_SHIFT);
841
842 /* retrieve appropriate data for each 4K-page in this page */
843 if (PAGE_SHIFT == FOURK_PAGE_SHIFT &&
844 page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
845 /*
846 * Find the slot for the requested 4KB page in
847 * the 16K page...
848 */
849 assert(PAGE_SHIFT == FOURK_PAGE_SHIFT);
850 assert(page_shift_user32 == SIXTEENK_PAGE_SHIFT);
851 sub_page_idx = ((offset & SIXTEENK_PAGE_MASK) /
852 PAGE_SIZE);
853 /*
854 * ... and provide only that one 4KB page.
855 */
856 sub_page_cnt = 1;
857 } else {
858 /*
859 * Iterate over all slots, i.e. retrieve all four 4KB
860 * pages in the requested 16KB page.
861 */
862 assert(PAGE_SHIFT == SIXTEENK_PAGE_SHIFT);
863 sub_page_idx = 0;
864 sub_page_cnt = FOURK_PAGER_SLOTS;
865 }
866
867 num_subpg_signed = 0;
868 num_subpg_validated = 0;
869 num_subpg_tainted = 0;
870 num_subpg_nx = 0;
871
872 /* retrieve appropriate data for each 4K-page in this page */
873 for (sub_page = sub_page_idx;
874 sub_page < sub_page_idx + sub_page_cnt;
875 sub_page++) {
876 vm_object_t src_object;
877 memory_object_offset_t src_offset;
878 vm_offset_t offset_in_src_page;
879 kern_return_t error_code;
880 vm_object_t src_page_object;
881 vm_page_t src_page;
882 vm_page_t top_page;
883 vm_prot_t prot;
884 int interruptible;
885 struct vm_object_fault_info fault_info;
886 boolean_t subpg_validated;
887 unsigned subpg_tainted;
888
889
890 if (offset < SIXTEENK_PAGE_SIZE) {
891 /*
892 * The 1st 16K-page can cover multiple
893 * sub-mappings, as described in the
894 * pager->slots[] array.
895 */
896 src_object =
897 pager->slots[sub_page].backing_object;
898 src_offset =
899 pager->slots[sub_page].backing_offset;
900 } else {
901 fourk_pager_backing_t slot;
902
903 /*
904 * Beyond the 1st 16K-page in the pager is
905 * an extension of the last "sub page" in
906 * the pager->slots[] array.
907 */
908 slot = &pager->slots[FOURK_PAGER_SLOTS - 1];
909 src_object = slot->backing_object;
910 src_offset = slot->backing_offset;
911 src_offset += FOURK_PAGE_SIZE;
912 src_offset +=
913 (vm_map_trunc_page(offset,
914 SIXTEENK_PAGE_MASK)
915 - SIXTEENK_PAGE_SIZE);
916 src_offset += sub_page * FOURK_PAGE_SIZE;
917 }
918 offset_in_src_page = src_offset & PAGE_MASK_64;
919 src_offset = vm_object_trunc_page(src_offset);
920
921 if (src_object == VM_OBJECT_NULL ||
922 src_object == (vm_object_t) -1) {
923 /* zero-fill */
924 bzero(s: (char *)(dst_vaddr +
925 ((sub_page - sub_page_idx)
926 * FOURK_PAGE_SIZE)),
927 FOURK_PAGE_SIZE);
928 if (fourk_pager_data_request_debug) {
929 printf(format: "fourk_pager_data_request"
930 "(%p,0x%llx+0x%lx+0x%04x): "
931 "ZERO\n",
932 pager,
933 offset,
934 cur_offset,
935 ((sub_page - sub_page_idx)
936 * FOURK_PAGE_SIZE));
937 }
938 continue;
939 }
940
941 /* fault in the source page from src_object */
942retry_src_fault:
943 src_page = VM_PAGE_NULL;
944 top_page = VM_PAGE_NULL;
945 fault_info = *((struct vm_object_fault_info *)
946 (uintptr_t)mo_fault_info);
947 fault_info.stealth = TRUE;
948 fault_info.io_sync = FALSE;
949 fault_info.mark_zf_absent = FALSE;
950 fault_info.batch_pmap_op = FALSE;
951 interruptible = fault_info.interruptible;
952 prot = VM_PROT_READ;
953 error_code = 0;
954
955 vm_object_lock(src_object);
956 vm_object_paging_begin(src_object);
957 kr = vm_fault_page(first_object: src_object,
958 first_offset: src_offset,
959 VM_PROT_READ,
960 FALSE,
961 FALSE, /* src_page not looked up */
962 protection: &prot,
963 result_page: &src_page,
964 top_page: &top_page,
965 NULL,
966 error_code: &error_code,
967 FALSE,
968 fault_info: &fault_info);
969 switch (kr) {
970 case VM_FAULT_SUCCESS:
971 break;
972 case VM_FAULT_RETRY:
973 goto retry_src_fault;
974 case VM_FAULT_MEMORY_SHORTAGE:
975 if (vm_page_wait(interruptible)) {
976 goto retry_src_fault;
977 }
978 ktriage_record(thread_id: thread_tid(thread: current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_FOURK_PAGER, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_FOURK_PAGER_MEMORY_SHORTAGE), arg: 0 /* arg */);
979 OS_FALLTHROUGH;
980 case VM_FAULT_INTERRUPTED:
981 retval = MACH_SEND_INTERRUPTED;
982 goto src_fault_done;
983 case VM_FAULT_SUCCESS_NO_VM_PAGE:
984 /* success but no VM page: fail */
985 vm_object_paging_end(src_object);
986 vm_object_unlock(src_object);
987 OS_FALLTHROUGH;
988 case VM_FAULT_MEMORY_ERROR:
989 /* the page is not there! */
990 if (error_code) {
991 retval = error_code;
992 } else {
993 retval = KERN_MEMORY_ERROR;
994 }
995 goto src_fault_done;
996 default:
997 panic("fourk_pager_data_request: "
998 "vm_fault_page() unexpected error 0x%x\n",
999 kr);
1000 }
1001 assert(src_page != VM_PAGE_NULL);
1002 assert(src_page->vmp_busy);
1003
1004 src_page_object = VM_PAGE_OBJECT(src_page);
1005
1006 if ((!VM_PAGE_PAGEABLE(src_page)) &&
1007 !VM_PAGE_WIRED(src_page)) {
1008 vm_page_lockspin_queues();
1009 if ((!VM_PAGE_PAGEABLE(src_page)) &&
1010 !VM_PAGE_WIRED(src_page)) {
1011 vm_page_deactivate(page: src_page);
1012 }
1013 vm_page_unlock_queues();
1014 }
1015
1016 src_vaddr = (vm_map_offset_t)
1017 phystokv(pa: (pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(m: src_page)
1018 << PAGE_SHIFT);
1019
1020 /*
1021 * Validate the 4K page we want from
1022 * this source page...
1023 */
1024 subpg_validated = FALSE;
1025 subpg_tainted = 0;
1026 if (src_page_object->code_signed) {
1027 vm_page_validate_cs_mapped_chunk(
1028 page: src_page,
1029 kaddr: (const void *) src_vaddr,
1030 chunk_offset: offset_in_src_page,
1031 FOURK_PAGE_SIZE,
1032 validated: &subpg_validated,
1033 tainted: &subpg_tainted);
1034 num_subpg_signed++;
1035 if (subpg_validated) {
1036 num_subpg_validated++;
1037 }
1038 if (subpg_tainted & CS_VALIDATE_TAINTED) {
1039 num_subpg_tainted++;
1040 }
1041 if (subpg_tainted & CS_VALIDATE_NX) {
1042 /* subpg should not be executable */
1043 if (sub_page_cnt > 1) {
1044 /*
1045 * The destination page has
1046 * more than 1 subpage and its
1047 * other subpages might need
1048 * EXEC, so we do not propagate
1049 * CS_VALIDATE_NX to the
1050 * destination page...
1051 */
1052 } else {
1053 num_subpg_nx++;
1054 }
1055 }
1056 }
1057
1058 /*
1059 * Copy the relevant portion of the source page
1060 * into the appropriate part of the destination page.
1061 */
1062 bcopy(src: (const char *)(src_vaddr + offset_in_src_page),
1063 dst: (char *)(dst_vaddr +
1064 ((sub_page - sub_page_idx) *
1065 FOURK_PAGE_SIZE)),
1066 FOURK_PAGE_SIZE);
1067 if (fourk_pager_data_request_debug) {
1068 printf(format: "fourk_data_request"
1069 "(%p,0x%llx+0x%lx+0x%04x): "
1070 "backed by [%p:0x%llx]: "
1071 "[0x%016llx 0x%016llx] "
1072 "code_signed=%d "
1073 "cs_valid=%d cs_tainted=%d cs_nx=%d\n",
1074 pager,
1075 offset, cur_offset,
1076 (sub_page - sub_page_idx) * FOURK_PAGE_SIZE,
1077 src_page_object,
1078 src_page->vmp_offset + offset_in_src_page,
1079 *(uint64_t *)(dst_vaddr +
1080 ((sub_page - sub_page_idx) *
1081 FOURK_PAGE_SIZE)),
1082 *(uint64_t *)(dst_vaddr +
1083 ((sub_page - sub_page_idx) *
1084 FOURK_PAGE_SIZE) +
1085 8),
1086 src_page_object->code_signed,
1087 subpg_validated,
1088 !!(subpg_tainted & CS_VALIDATE_TAINTED),
1089 !!(subpg_tainted & CS_VALIDATE_NX));
1090 }
1091
1092#if __x86_64__ || __arm64__
1093 /* we used the 1-to-1 mapping of physical memory */
1094 src_vaddr = 0;
1095#else /* __x86_64__ || __arm64__ */
1096 /*
1097 * Remove the pmap mapping of the source page
1098 * in the kernel.
1099 */
1100 pmap_remove(kernel_pmap,
1101 (addr64_t) src_vaddr,
1102 (addr64_t) src_vaddr + PAGE_SIZE_64);
1103#endif /* __x86_64__ || __arm64__ */
1104
1105src_fault_done:
1106 /*
1107 * Cleanup the result of vm_fault_page().
1108 */
1109 if (src_page) {
1110 assert(VM_PAGE_OBJECT(src_page) == src_page_object);
1111
1112 PAGE_WAKEUP_DONE(src_page);
1113 src_page = VM_PAGE_NULL;
1114 vm_object_paging_end(src_page_object);
1115 vm_object_unlock(src_page_object);
1116 if (top_page) {
1117 vm_object_t top_object;
1118
1119 top_object = VM_PAGE_OBJECT(top_page);
1120 vm_object_lock(top_object);
1121 VM_PAGE_FREE(top_page);
1122 top_page = VM_PAGE_NULL;
1123 vm_object_paging_end(top_object);
1124 vm_object_unlock(top_object);
1125 }
1126 }
1127 }
1128 if (num_subpg_signed > 0) {
1129 /* some code-signing involved with this 16K page */
1130 if (num_subpg_tainted > 0) {
1131 /* a tainted subpage taints entire 16K page */
1132 UPL_SET_CS_TAINTED(upl_pl,
1133 cur_offset / PAGE_SIZE,
1134 VMP_CS_ALL_TRUE);
1135 /* also mark as "validated" for consisteny */
1136 UPL_SET_CS_VALIDATED(upl_pl,
1137 cur_offset / PAGE_SIZE,
1138 VMP_CS_ALL_TRUE);
1139 } else if (num_subpg_validated == num_subpg_signed) {
1140 /*
1141 * All the code-signed 4K subpages of this
1142 * 16K page are validated: our 16K page is
1143 * considered validated.
1144 */
1145 UPL_SET_CS_VALIDATED(upl_pl,
1146 cur_offset / PAGE_SIZE,
1147 VMP_CS_ALL_TRUE);
1148 }
1149 if (num_subpg_nx > 0) {
1150 UPL_SET_CS_NX(upl_pl,
1151 cur_offset / PAGE_SIZE,
1152 VMP_CS_ALL_TRUE);
1153 }
1154 }
1155 }
1156
1157done:
1158 if (upl != NULL) {
1159 /* clean up the UPL */
1160
1161 /*
1162 * The pages are currently dirty because we've just been
1163 * writing on them, but as far as we're concerned, they're
1164 * clean since they contain their "original" contents as
1165 * provided by us, the pager.
1166 * Tell the UPL to mark them "clean".
1167 */
1168 upl_clear_dirty(upl, TRUE);
1169
1170 /* abort or commit the UPL */
1171 if (retval != KERN_SUCCESS) {
1172 upl_abort(upl_object: upl, abort_cond: 0);
1173 if (retval == KERN_ABORTED) {
1174 wait_result_t wait_result;
1175
1176 /*
1177 * We aborted the fault and did not provide
1178 * any contents for the requested pages but
1179 * the pages themselves are not invalid, so
1180 * let's return success and let the caller
1181 * retry the fault, in case it might succeed
1182 * later (when the decryption code is up and
1183 * running in the kernel, for example).
1184 */
1185 retval = KERN_SUCCESS;
1186 /*
1187 * Wait a little bit first to avoid using
1188 * too much CPU time retrying and failing
1189 * the same fault over and over again.
1190 */
1191 wait_result = assert_wait_timeout(
1192 event: (event_t) fourk_pager_data_request,
1193 THREAD_UNINT,
1194 interval: 10000, /* 10ms */
1195 NSEC_PER_USEC);
1196 assert(wait_result == THREAD_WAITING);
1197 wait_result = thread_block(THREAD_CONTINUE_NULL);
1198 assert(wait_result == THREAD_TIMED_OUT);
1199 }
1200 } else {
1201 boolean_t empty;
1202 assertf(page_aligned(upl->u_offset) && page_aligned(upl->u_size),
1203 "upl %p offset 0x%llx size 0x%x",
1204 upl, upl->u_offset, upl->u_size);
1205 upl_commit_range(upl_object: upl, offset: 0, size: upl->u_size,
1206 UPL_COMMIT_CS_VALIDATED | UPL_COMMIT_WRITTEN_BY_KERNEL,
1207 page_list: upl_pl, page_listCnt: pl_count, empty: &empty);
1208 }
1209
1210 /* and deallocate the UPL */
1211 upl_deallocate(upl);
1212 upl = NULL;
1213 }
1214 if (kernel_mapping != 0) {
1215 /* clean up the mapping of the source and destination pages */
1216 kmem_free(map: kernel_map, addr: kernel_mapping, ptoa(2));
1217 kernel_mapping = 0;
1218 src_vaddr = 0;
1219 dst_vaddr = 0;
1220 }
1221
1222 return retval;
1223}
1224
1225
1226
1227kern_return_t
1228fourk_pager_populate(
1229 memory_object_t mem_obj,
1230 boolean_t overwrite,
1231 int index,
1232 vm_object_t new_backing_object,
1233 vm_object_offset_t new_backing_offset,
1234 vm_object_t *old_backing_object,
1235 vm_object_offset_t *old_backing_offset)
1236{
1237 fourk_pager_t pager;
1238
1239 pager = fourk_pager_lookup(mem_obj);
1240 if (pager == NULL) {
1241 return KERN_INVALID_ARGUMENT;
1242 }
1243
1244 assert(os_ref_get_count_raw(&pager->fourk_pgr_hdr_ref) > 0);
1245 assert(pager->fourk_pgr_hdr.mo_control != MEMORY_OBJECT_CONTROL_NULL);
1246
1247 if (index < 0 || index > FOURK_PAGER_SLOTS) {
1248 return KERN_INVALID_ARGUMENT;
1249 }
1250
1251 if (!overwrite &&
1252 (pager->slots[index].backing_object != (vm_object_t) -1 ||
1253 pager->slots[index].backing_offset != (vm_object_offset_t) -1)) {
1254 return KERN_INVALID_ADDRESS;
1255 }
1256
1257 *old_backing_object = pager->slots[index].backing_object;
1258 *old_backing_offset = pager->slots[index].backing_offset;
1259
1260 pager->slots[index].backing_object = new_backing_object;
1261 pager->slots[index].backing_offset = new_backing_offset;
1262
1263 return KERN_SUCCESS;
1264}
1265