1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_kern.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Kernel memory management.
64 */
65
66#include <mach/kern_return.h>
67#include <mach/vm_param.h>
68#include <kern/assert.h>
69#include <kern/thread.h>
70#include <vm/vm_kern.h>
71#include <vm/vm_map_internal.h>
72#include <vm/vm_object.h>
73#include <vm/vm_page.h>
74#include <vm/vm_compressor.h>
75#include <vm/vm_pageout.h>
76#include <vm/vm_init.h>
77#include <vm/vm_fault.h>
78#include <vm/vm_memtag.h>
79#include <kern/misc_protos.h>
80#include <vm/cpm.h>
81#include <kern/ledger.h>
82#include <kern/bits.h>
83#include <kern/startup.h>
84
85#include <string.h>
86
87#include <libkern/OSDebug.h>
88#include <libkern/crypto/sha2.h>
89#include <libkern/section_keywords.h>
90#include <sys/kdebug.h>
91#include <sys/kdebug_triage.h>
92
93#include <san/kasan.h>
94#include <kern/kext_alloc.h>
95#include <kern/backtrace.h>
96#include <os/hash.h>
97#include <kern/zalloc_internal.h>
98#include <libkern/crypto/rand.h>
99
100/*
101 * Variables exported by this module.
102 */
103
104SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
105SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_ranges[KMEM_RANGE_COUNT];
106SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_large_ranges[KMEM_RANGE_COUNT];
107
108static TUNABLE(uint32_t, kmem_ptr_ranges, "kmem_ptr_ranges",
109 KMEM_RANGE_ID_NUM_PTR);
110#define KMEM_GOBJ_THRESHOLD (32ULL << 20)
111#if DEBUG || DEVELOPMENT
112#define KMEM_OUTLIER_LOG_SIZE (16ULL << 10)
113#define KMEM_OUTLIER_SIZE 0
114#define KMEM_OUTLIER_ALIGN 1
115btlog_t kmem_outlier_log;
116#endif /* DEBUG || DEVELOPMENT */
117
118__startup_data static vm_map_size_t data_range_size;
119__startup_data static vm_map_size_t ptr_range_size;
120__startup_data static vm_map_size_t sprayqtn_range_size;
121
122#pragma mark helpers
123
124__attribute__((overloadable))
125__header_always_inline kmem_flags_t
126ANYF(kma_flags_t flags)
127{
128 return (kmem_flags_t)flags;
129}
130
131__attribute__((overloadable))
132__header_always_inline kmem_flags_t
133ANYF(kmr_flags_t flags)
134{
135 return (kmem_flags_t)flags;
136}
137
138__attribute__((overloadable))
139__header_always_inline kmem_flags_t
140ANYF(kmf_flags_t flags)
141{
142 return (kmem_flags_t)flags;
143}
144
145__abortlike
146static void
147__kmem_invalid_size_panic(
148 vm_map_t map,
149 vm_size_t size,
150 uint32_t flags)
151{
152 panic("kmem(map=%p, flags=0x%x): invalid size %zd",
153 map, flags, (size_t)size);
154}
155
156__abortlike
157static void
158__kmem_invalid_arguments_panic(
159 const char *what,
160 vm_map_t map,
161 vm_address_t address,
162 vm_size_t size,
163 uint32_t flags)
164{
165 panic("kmem_%s(map=%p, addr=%p, size=%zd, flags=0x%x): "
166 "invalid arguments passed",
167 what, map, (void *)address, (size_t)size, flags);
168}
169
170__abortlike
171static void
172__kmem_failed_panic(
173 vm_map_t map,
174 vm_size_t size,
175 uint32_t flags,
176 kern_return_t kr,
177 const char *what)
178{
179 panic("kmem_%s(%p, %zd, 0x%x): failed with %d",
180 what, map, (size_t)size, flags, kr);
181}
182
183__abortlike
184static void
185__kmem_entry_not_found_panic(
186 vm_map_t map,
187 vm_offset_t addr)
188{
189 panic("kmem(map=%p) no entry found at %p", map, (void *)addr);
190}
191
192static inline vm_object_t
193__kmem_object(kmem_flags_t flags)
194{
195 if (flags & KMEM_COMPRESSOR) {
196 if (flags & KMEM_KOBJECT) {
197 panic("both KMEM_KOBJECT and KMEM_COMPRESSOR specified");
198 }
199 return compressor_object;
200 }
201 if (!(flags & KMEM_KOBJECT)) {
202 panic("KMEM_KOBJECT or KMEM_COMPRESSOR is required");
203 }
204 return kernel_object_default;
205}
206
207static inline pmap_mapping_type_t
208__kmem_mapping_type(kmem_flags_t flags)
209{
210 if (flags & (KMEM_DATA | KMEM_COMPRESSOR)) {
211 return PMAP_MAPPING_TYPE_DEFAULT;
212 } else {
213 return PMAP_MAPPING_TYPE_RESTRICTED;
214 }
215}
216
217static inline vm_size_t
218__kmem_guard_left(kmem_flags_t flags)
219{
220 return (flags & KMEM_GUARD_FIRST) ? PAGE_SIZE : 0;
221}
222
223static inline vm_size_t
224__kmem_guard_right(kmem_flags_t flags)
225{
226 return (flags & KMEM_GUARD_LAST) ? PAGE_SIZE : 0;
227}
228
229static inline vm_size_t
230__kmem_guard_size(kmem_flags_t flags)
231{
232 return __kmem_guard_left(flags) + __kmem_guard_right(flags);
233}
234
235__pure2
236static inline vm_size_t
237__kmem_entry_orig_size(vm_map_entry_t entry)
238{
239 vm_object_t object = VME_OBJECT(entry);
240
241 if (entry->vme_kernel_object) {
242 return entry->vme_end - entry->vme_start -
243 entry->vme_object_or_delta;
244 } else {
245 return object->vo_size - object->vo_size_delta;
246 }
247}
248
249
250#pragma mark kmem range methods
251
252#if __arm64__
253// <rdar://problem/48304934> arm64 doesn't use ldp when I'd expect it to
254#define mach_vm_range_load(r, r_min, r_max) \
255 asm("ldp %[rmin], %[rmax], [%[range]]" \
256 : [rmin] "=r"(r_min), [rmax] "=r"(r_max) \
257 : [range] "r"(r), "m"((r)->min_address), "m"((r)->max_address))
258#else
259#define mach_vm_range_load(r, rmin, rmax) \
260 ({ rmin = (r)->min_address; rmax = (r)->max_address; })
261#endif
262
263__abortlike
264static void
265__mach_vm_range_overflow(
266 mach_vm_offset_t addr,
267 mach_vm_offset_t size)
268{
269 panic("invalid vm range: [0x%llx, 0x%llx + 0x%llx) wraps around",
270 addr, addr, size);
271}
272
273__abortlike
274static void
275__mach_vm_range_invalid(
276 mach_vm_offset_t min_address,
277 mach_vm_offset_t max_address)
278{
279 panic("invalid vm range: [0x%llx, 0x%llx) wraps around",
280 min_address, max_address);
281}
282
283__header_always_inline mach_vm_size_t
284mach_vm_range_size(const struct mach_vm_range *r)
285{
286 mach_vm_offset_t rmin, rmax;
287
288 mach_vm_range_load(r, rmin, rmax);
289 return rmax - rmin;
290}
291
292__attribute__((overloadable))
293__header_always_inline bool
294mach_vm_range_contains(const struct mach_vm_range *r, mach_vm_offset_t addr)
295{
296 mach_vm_offset_t rmin, rmax;
297
298#if CONFIG_KERNEL_TAGGING
299 if (VM_KERNEL_ADDRESS(addr)) {
300 addr = vm_memtag_canonicalize_address(addr);
301 }
302#endif /* CONFIG_KERNEL_TAGGING */
303
304 /*
305 * The `&` is not a typo: we really expect the check to pass,
306 * so encourage the compiler to eagerly load and test without branches
307 */
308 mach_vm_range_load(r, rmin, rmax);
309 return (addr >= rmin) & (addr < rmax);
310}
311
312__attribute__((overloadable))
313__header_always_inline bool
314mach_vm_range_contains(
315 const struct mach_vm_range *r,
316 mach_vm_offset_t addr,
317 mach_vm_offset_t size)
318{
319 mach_vm_offset_t rmin, rmax;
320
321#if CONFIG_KERNEL_TAGGING
322 if (VM_KERNEL_ADDRESS(addr)) {
323 addr = vm_memtag_canonicalize_address(addr);
324 }
325#endif /* CONFIG_KERNEL_TAGGING */
326
327 /*
328 * The `&` is not a typo: we really expect the check to pass,
329 * so encourage the compiler to eagerly load and test without branches
330 */
331 mach_vm_range_load(r, rmin, rmax);
332 return (addr >= rmin) & (addr + size >= rmin) & (addr + size <= rmax);
333}
334
335__attribute__((overloadable))
336__header_always_inline bool
337mach_vm_range_intersects(
338 const struct mach_vm_range *r1,
339 const struct mach_vm_range *r2)
340{
341 mach_vm_offset_t r1_min, r1_max;
342 mach_vm_offset_t r2_min, r2_max;
343
344 mach_vm_range_load(r1, r1_min, r1_max);
345 r2_min = r2->min_address;
346 r2_max = r2->max_address;
347
348 if (r1_min > r1_max) {
349 __mach_vm_range_invalid(min_address: r1_min, max_address: r1_max);
350 }
351
352 if (r2_min > r2_max) {
353 __mach_vm_range_invalid(min_address: r2_min, max_address: r2_max);
354 }
355
356 return r1_max > r2_min && r1_min < r2_max;
357}
358
359__attribute__((overloadable))
360__header_always_inline bool
361mach_vm_range_intersects(
362 const struct mach_vm_range *r1,
363 mach_vm_offset_t addr,
364 mach_vm_offset_t size)
365{
366 struct mach_vm_range r2;
367
368 addr = VM_KERNEL_STRIP_UPTR(addr);
369 r2.min_address = addr;
370 if (os_add_overflow(addr, size, &r2.max_address)) {
371 __mach_vm_range_overflow(addr, size);
372 }
373
374 return mach_vm_range_intersects(r1, r2: &r2);
375}
376
377bool
378kmem_range_id_contains(
379 kmem_range_id_t range_id,
380 vm_map_offset_t addr,
381 vm_map_size_t size)
382{
383 return mach_vm_range_contains(r: &kmem_ranges[range_id], addr, size);
384}
385
386__abortlike
387static void
388kmem_range_invalid_panic(
389 kmem_range_id_t range_id,
390 vm_map_offset_t addr,
391 vm_map_size_t size)
392{
393 const struct mach_vm_range *r = &kmem_ranges[range_id];
394 mach_vm_offset_t rmin, rmax;
395
396 mach_vm_range_load(r, rmin, rmax);
397 if (addr + size < rmin) {
398 panic("addr %p + size %llu overflows %p", (void *)addr, size,
399 (void *)(addr + size));
400 }
401 panic("addr %p + size %llu doesnt fit in one range (id: %u min: %p max: %p)",
402 (void *)addr, size, range_id, (void *)rmin, (void *)rmax);
403}
404
405/*
406 * Return whether the entire allocation is contained in the given range
407 */
408static bool
409kmem_range_contains_fully(
410 kmem_range_id_t range_id,
411 vm_map_offset_t addr,
412 vm_map_size_t size)
413{
414 const struct mach_vm_range *r = &kmem_ranges[range_id];
415 mach_vm_offset_t rmin, rmax;
416 bool result = false;
417
418 if (VM_KERNEL_ADDRESS(addr)) {
419 addr = vm_memtag_canonicalize_address(addr);
420 }
421
422 /*
423 * The `&` is not a typo: we really expect the check to pass,
424 * so encourage the compiler to eagerly load and test without branches
425 */
426 mach_vm_range_load(r, rmin, rmax);
427 result = (addr >= rmin) & (addr < rmax);
428 if (__improbable(result
429 && ((addr + size < rmin) || (addr + size > rmax)))) {
430 kmem_range_invalid_panic(range_id, addr, size);
431 }
432 return result;
433}
434
435vm_map_size_t
436kmem_range_id_size(kmem_range_id_t range_id)
437{
438 return mach_vm_range_size(r: &kmem_ranges[range_id]);
439}
440
441kmem_range_id_t
442kmem_addr_get_range(vm_map_offset_t addr, vm_map_size_t size)
443{
444 kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
445
446 for (; range_id < KMEM_RANGE_COUNT; range_id++) {
447 if (kmem_range_contains_fully(range_id, addr, size)) {
448 return range_id;
449 }
450 }
451 return KMEM_RANGE_ID_NONE;
452}
453
454bool
455kmem_is_ptr_range(vm_map_range_id_t range_id)
456{
457 return (range_id >= KMEM_RANGE_ID_FIRST) &&
458 (range_id <= KMEM_RANGE_ID_NUM_PTR);
459}
460
461__abortlike
462static void
463kmem_range_invalid_for_overwrite(vm_map_offset_t addr)
464{
465 panic("Can't overwrite mappings (addr: %p) in kmem ptr ranges",
466 (void *)addr);
467}
468
469mach_vm_range_t
470kmem_validate_range_for_overwrite(
471 vm_map_offset_t addr,
472 vm_map_size_t size)
473{
474 vm_map_range_id_t range_id = kmem_addr_get_range(addr, size);
475
476 if (kmem_is_ptr_range(range_id)) {
477 kmem_range_invalid_for_overwrite(addr);
478 }
479
480 return &kmem_ranges[range_id];
481}
482
483
484#pragma mark entry parameters
485
486
487__abortlike
488static void
489__kmem_entry_validate_panic(
490 vm_map_t map,
491 vm_map_entry_t entry,
492 vm_offset_t addr,
493 vm_size_t size,
494 uint32_t flags,
495 kmem_guard_t guard)
496{
497 const char *what = "???";
498
499 if (entry->vme_atomic != guard.kmg_atomic) {
500 what = "atomicity";
501 } else if (entry->is_sub_map != guard.kmg_submap) {
502 what = "objectness";
503 } else if (addr != entry->vme_start) {
504 what = "left bound";
505 } else if ((flags & KMF_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
506 what = "right bound";
507 } else if (guard.kmg_context != entry->vme_context) {
508 what = "guard";
509 }
510
511 panic("kmem(map=%p, addr=%p, size=%zd, flags=0x%x): "
512 "entry:%p %s mismatch guard(0x%08x)",
513 map, (void *)addr, size, flags, entry,
514 what, guard.kmg_context);
515}
516
517static bool
518__kmem_entry_validate_guard(
519 vm_map_entry_t entry,
520 vm_offset_t addr,
521 vm_size_t size,
522 kmem_flags_t flags,
523 kmem_guard_t guard)
524{
525 if (entry->vme_atomic != guard.kmg_atomic) {
526 return false;
527 }
528
529 if (!guard.kmg_atomic) {
530 return true;
531 }
532
533 if (entry->is_sub_map != guard.kmg_submap) {
534 return false;
535 }
536
537 if (addr != entry->vme_start) {
538 return false;
539 }
540
541 if ((flags & KMEM_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
542 return false;
543 }
544
545 if (!guard.kmg_submap && guard.kmg_context != entry->vme_context) {
546 return false;
547 }
548
549 return true;
550}
551
552void
553kmem_entry_validate_guard(
554 vm_map_t map,
555 vm_map_entry_t entry,
556 vm_offset_t addr,
557 vm_size_t size,
558 kmem_guard_t guard)
559{
560 if (!__kmem_entry_validate_guard(entry, addr, size, flags: KMEM_NONE, guard)) {
561 __kmem_entry_validate_panic(map, entry, addr, size, flags: KMEM_NONE, guard);
562 }
563}
564
565__abortlike
566static void
567__kmem_entry_validate_object_panic(
568 vm_map_t map,
569 vm_map_entry_t entry,
570 kmem_flags_t flags)
571{
572 const char *what;
573 const char *verb;
574
575 if (entry->is_sub_map) {
576 panic("kmem(map=%p) entry %p is a submap", map, entry);
577 }
578
579 if (flags & KMEM_KOBJECT) {
580 what = "kernel";
581 verb = "isn't";
582 } else if (flags & KMEM_COMPRESSOR) {
583 what = "compressor";
584 verb = "isn't";
585 } else if (entry->vme_kernel_object) {
586 what = "kernel";
587 verb = "is unexpectedly";
588 } else {
589 what = "compressor";
590 verb = "is unexpectedly";
591 }
592
593 panic("kmem(map=%p, flags=0x%x): entry %p %s for the %s object",
594 map, flags, entry, verb, what);
595}
596
597static bool
598__kmem_entry_validate_object(
599 vm_map_entry_t entry,
600 kmem_flags_t flags)
601{
602 if (entry->is_sub_map) {
603 return false;
604 }
605 if ((bool)(flags & KMEM_KOBJECT) != entry->vme_kernel_object) {
606 return false;
607 }
608
609 return (bool)(flags & KMEM_COMPRESSOR) ==
610 (VME_OBJECT(entry) == compressor_object);
611}
612
613vm_size_t
614kmem_size_guard(
615 vm_map_t map,
616 vm_offset_t addr,
617 kmem_guard_t guard)
618{
619 kmem_flags_t flags = KMEM_GUESS_SIZE;
620 vm_map_entry_t entry;
621 vm_size_t size;
622
623 vm_map_lock_read(map);
624
625#if KASAN_CLASSIC
626 addr -= PAGE_SIZE;
627#endif /* KASAN_CLASSIC */
628 addr = vm_memtag_canonicalize_address(addr);
629
630 if (!vm_map_lookup_entry(map, address: addr, entry: &entry)) {
631 __kmem_entry_not_found_panic(map, addr);
632 }
633
634 if (!__kmem_entry_validate_guard(entry, addr, size: 0, flags, guard)) {
635 __kmem_entry_validate_panic(map, entry, addr, size: 0, flags, guard);
636 }
637
638 size = __kmem_entry_orig_size(entry);
639
640 vm_map_unlock_read(map);
641
642 return size;
643}
644
645static inline uint16_t
646kmem_hash_backtrace(
647 void *fp)
648{
649 uint64_t bt_count;
650 uintptr_t bt[8] = {};
651
652 struct backtrace_control ctl = {
653 .btc_frame_addr = (uintptr_t)fp,
654 };
655
656 bt_count = backtrace(bt, btlen: sizeof(bt) / sizeof(bt[0]), ctl: &ctl, NULL);
657 return (uint16_t) os_hash_jenkins(data: bt, length: bt_count * sizeof(bt[0]));
658}
659
660static_assert(KMEM_RANGE_ID_DATA - 1 <= KMEM_RANGE_MASK,
661 "Insufficient bits to represent ptr ranges");
662
663kmem_range_id_t
664kmem_adjust_range_id(
665 uint32_t hash)
666{
667 return (kmem_range_id_t) (KMEM_RANGE_ID_PTR_0 +
668 (hash & KMEM_RANGE_MASK) % kmem_ptr_ranges);
669}
670
671static bool
672kmem_use_sprayqtn(
673 kma_flags_t kma_flags,
674 vm_map_size_t map_size,
675 vm_offset_t mask)
676{
677 /*
678 * Pointer allocations that are above the guard objects threshold or have
679 * leading guard pages with non standard alignment requests are redirected
680 * to the sprayqtn range.
681 */
682#if DEBUG || DEVELOPMENT
683 btref_get_flags_t flags = (kma_flags & KMA_NOPAGEWAIT) ?
684 BTREF_GET_NOWAIT : 0;
685
686 if ((kma_flags & KMA_SPRAYQTN) == 0) {
687 if (map_size > KMEM_GOBJ_THRESHOLD) {
688 btlog_record(kmem_outlier_log, (void *)map_size, KMEM_OUTLIER_SIZE,
689 btref_get(__builtin_frame_address(0), flags));
690 } else if ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK)) {
691 btlog_record(kmem_outlier_log, (void *)mask, KMEM_OUTLIER_ALIGN,
692 btref_get(__builtin_frame_address(0), flags));
693 }
694 }
695#endif /* DEBUG || DEVELOPMENT */
696
697 return (kma_flags & KMA_SPRAYQTN) ||
698 (map_size > KMEM_GOBJ_THRESHOLD) ||
699 ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK));
700}
701
702static void
703kmem_apply_security_policy(
704 vm_map_t map,
705 kma_flags_t kma_flags,
706 kmem_guard_t guard,
707 vm_map_size_t map_size,
708 vm_offset_t mask,
709 vm_map_kernel_flags_t *vmk_flags,
710 bool assert_dir __unused)
711{
712 kmem_range_id_t range_id;
713 bool from_right;
714 uint16_t type_hash = guard.kmg_type_hash;
715
716 if (startup_phase < STARTUP_SUB_KMEM || map != kernel_map) {
717 return;
718 }
719
720 /*
721 * A non-zero type-hash must be passed by krealloc_type
722 */
723#if (DEBUG || DEVELOPMENT)
724 if (assert_dir && !(kma_flags & KMA_DATA)) {
725 assert(type_hash != 0);
726 }
727#endif
728
729 if (kma_flags & KMA_DATA) {
730 range_id = KMEM_RANGE_ID_DATA;
731 /*
732 * As an optimization in KMA_DATA to avoid fragmentation,
733 * allocate static carveouts at the end of the DATA range.
734 */
735 from_right = (bool)(kma_flags & KMA_PERMANENT);
736 } else if (kmem_use_sprayqtn(kma_flags, map_size, mask)) {
737 range_id = KMEM_RANGE_ID_SPRAYQTN;
738 from_right = (bool)(kma_flags & KMA_PERMANENT);
739 } else if (type_hash) {
740 range_id = (kmem_range_id_t)(type_hash & KMEM_RANGE_MASK);
741 from_right = type_hash & KMEM_DIRECTION_MASK;
742 } else {
743 /*
744 * Range id needs to correspond to one of the PTR ranges
745 */
746 type_hash = (uint16_t) kmem_hash_backtrace(fp: __builtin_frame_address(0));
747 range_id = kmem_adjust_range_id(hash: type_hash);
748 from_right = type_hash & KMEM_DIRECTION_MASK;
749 }
750
751 vmk_flags->vmkf_range_id = range_id;
752 vmk_flags->vmkf_last_free = from_right;
753}
754
755#pragma mark allocation
756
757static kmem_return_t
758kmem_alloc_guard_internal(
759 vm_map_t map,
760 vm_size_t size,
761 vm_offset_t mask,
762 kma_flags_t flags,
763 kmem_guard_t guard,
764 kern_return_t (^alloc_pages)(vm_size_t, kma_flags_t, vm_page_t *))
765{
766 vm_object_t object;
767 vm_offset_t delta = 0;
768 vm_map_entry_t entry = NULL;
769 vm_map_offset_t map_addr, fill_start;
770 vm_map_size_t map_size, fill_size;
771 vm_page_t guard_left = VM_PAGE_NULL;
772 vm_page_t guard_right = VM_PAGE_NULL;
773 vm_page_t wired_page_list = VM_PAGE_NULL;
774 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
775 bool skip_guards;
776 kmem_return_t kmr = { };
777
778 assert(kernel_map && map->pmap == kernel_pmap);
779
780#if DEBUG || DEVELOPMENT
781 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
782 size, 0, 0, 0);
783#endif
784
785 if (size == 0 ||
786 (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) ||
787 (size < __kmem_guard_size(flags: ANYF(flags)))) {
788 __kmem_invalid_size_panic(map, size, flags);
789 }
790
791 /*
792 * limit the size of a single extent of wired memory
793 * to try and limit the damage to the system if
794 * too many pages get wired down
795 * limit raised to 2GB with 128GB max physical limit,
796 * but scaled by installed memory above this
797 *
798 * Note: kmem_alloc_contig_guard() is immune to this check.
799 */
800 if (__improbable(!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
801 alloc_pages == NULL &&
802 size > MAX(1ULL << 31, sane_size / 64))) {
803 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
804 goto out_error;
805 }
806
807 /*
808 * Guard pages:
809 *
810 * Guard pages are implemented as fictitious pages.
811 *
812 * However, some maps, and some objects are known
813 * to manage their memory explicitly, and do not need
814 * those to be materialized, which saves memory.
815 *
816 * By placing guard pages on either end of a stack,
817 * they can help detect cases where a thread walks
818 * off either end of its stack.
819 *
820 * They are allocated and set up here and attempts
821 * to access those pages are trapped in vm_fault_page().
822 *
823 * The map_size we were passed may include extra space for
824 * guard pages. fill_size represents the actual size to populate.
825 * Similarly, fill_start indicates where the actual pages
826 * will begin in the range.
827 */
828
829 map_size = round_page(x: size);
830 fill_start = 0;
831 fill_size = map_size - __kmem_guard_size(flags: ANYF(flags));
832
833#if KASAN_CLASSIC
834 if (flags & KMA_KASAN_GUARD) {
835 assert((flags & (KMA_GUARD_FIRST | KMA_GUARD_LAST)) == 0);
836 flags |= KMA_GUARD_FIRST | KMEM_GUARD_LAST;
837 delta = ptoa(2);
838 map_size += delta;
839 }
840#else
841 (void)delta;
842#endif /* KASAN_CLASSIC */
843
844 skip_guards = (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) ||
845 map->never_faults;
846
847 if (flags & KMA_GUARD_FIRST) {
848 vmk_flags.vmkf_guard_before = true;
849 fill_start += PAGE_SIZE;
850 }
851 if ((flags & KMA_GUARD_FIRST) && !skip_guards) {
852 guard_left = vm_page_grab_guard(canwait: (flags & KMA_NOPAGEWAIT) == 0);
853 if (__improbable(guard_left == VM_PAGE_NULL)) {
854 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
855 goto out_error;
856 }
857 }
858 if ((flags & KMA_GUARD_LAST) && !skip_guards) {
859 guard_right = vm_page_grab_guard(canwait: (flags & KMA_NOPAGEWAIT) == 0);
860 if (__improbable(guard_right == VM_PAGE_NULL)) {
861 kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
862 goto out_error;
863 }
864 }
865
866 if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
867 if (alloc_pages) {
868 kmr.kmr_return = alloc_pages(fill_size, flags,
869 &wired_page_list);
870 } else {
871 kmr.kmr_return = vm_page_alloc_list(atop(fill_size), flags,
872 list: &wired_page_list);
873 }
874 if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
875 goto out_error;
876 }
877 }
878
879 /*
880 * Allocate a new object (if necessary). We must do this before
881 * locking the map, or risk deadlock with the default pager.
882 */
883 if (flags & KMA_KOBJECT) {
884 object = kernel_object_default;
885 vm_object_reference(object);
886 } else if (flags & KMA_COMPRESSOR) {
887 object = compressor_object;
888 vm_object_reference(object);
889 } else {
890 object = vm_object_allocate(size: map_size);
891 vm_object_lock(object);
892 vm_object_set_size(object, outer_size: map_size, inner_size: size);
893 /* stabilize the object to prevent shadowing */
894 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
895 VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
896 vm_object_unlock(object);
897 }
898
899 if (flags & KMA_LAST_FREE) {
900 vmk_flags.vmkf_last_free = true;
901 }
902 if (flags & KMA_PERMANENT) {
903 vmk_flags.vmf_permanent = true;
904 }
905 kmem_apply_security_policy(map, kma_flags: flags, guard, map_size, mask, vmk_flags: &vmk_flags,
906 false);
907
908 kmr.kmr_return = vm_map_find_space(map, hint_addr: 0, size: map_size, mask,
909 vmk_flags, o_entry: &entry);
910 if (__improbable(KERN_SUCCESS != kmr.kmr_return)) {
911 vm_object_deallocate(object);
912 goto out_error;
913 }
914
915 map_addr = entry->vme_start;
916 VME_OBJECT_SET(entry, object, atomic: guard.kmg_atomic, context: guard.kmg_context);
917 VME_ALIAS_SET(entry, alias: guard.kmg_tag);
918 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
919 VME_OFFSET_SET(entry, offset: map_addr);
920 }
921
922#if KASAN
923 if ((flags & KMA_KOBJECT) && guard.kmg_atomic) {
924 entry->vme_object_or_delta = (-size & PAGE_MASK) + delta;
925 }
926#endif /* KASAN */
927
928 if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
929 entry->wired_count = 1;
930 vme_btref_consider_and_set(entry, fp: __builtin_frame_address(0));
931 }
932
933 if (guard_left || guard_right || wired_page_list) {
934 vm_object_offset_t offset = 0ull;
935
936 vm_object_lock(object);
937 vm_map_unlock(map);
938
939 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
940 offset = map_addr;
941 }
942
943 if (guard_left) {
944 vm_page_insert(page: guard_left, object, offset);
945 guard_left->vmp_busy = FALSE;
946 guard_left = VM_PAGE_NULL;
947 }
948
949 if (guard_right) {
950 vm_page_insert(page: guard_right, object,
951 offset: offset + fill_start + fill_size);
952 guard_right->vmp_busy = FALSE;
953 guard_right = VM_PAGE_NULL;
954 }
955
956 if (wired_page_list) {
957 kernel_memory_populate_object_and_unlock(object,
958 addr: map_addr + fill_start, offset: offset + fill_start, size: fill_size,
959 page_list: wired_page_list, flags, tag: guard.kmg_tag, VM_PROT_DEFAULT,
960 mapping_type: __kmem_mapping_type(flags: ANYF(flags)));
961 } else {
962 vm_object_unlock(object);
963 }
964 } else {
965 vm_map_unlock(map);
966 }
967
968 /*
969 * now that the pages are wired, we no longer have to fear coalesce
970 */
971 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
972 vm_map_simplify(map, start: map_addr);
973 }
974
975#if DEBUG || DEVELOPMENT
976 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
977 atop(fill_size), 0, 0, 0);
978#endif /* DEBUG || DEVELOPMENT */
979 kmr.kmr_address = CAST_DOWN(vm_offset_t, map_addr);
980
981#if KASAN
982 if (flags & (KMA_KASAN_GUARD | KMA_PAGEABLE)) {
983 /*
984 * We need to allow the range for pageable memory,
985 * or faulting will not be allowed.
986 */
987 kasan_notify_address(map_addr, map_size);
988 }
989#endif /* KASAN */
990#if KASAN_CLASSIC
991 if (flags & KMA_KASAN_GUARD) {
992 kmr.kmr_address += PAGE_SIZE;
993 kasan_alloc_large(kmr.kmr_address, size);
994 }
995#endif /* KASAN_CLASSIC */
996#if CONFIG_KERNEL_TAGGING
997 if (!(flags & KMA_VAONLY) && (flags & KMA_TAG)) {
998 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, size);
999 vm_memtag_set_tag((vm_offset_t)kmr.kmr_address, size);
1000#if KASAN_TBI
1001 kasan_tbi_retag_unused_space((vm_offset_t)kmr.kmr_address, map_size, size);
1002#endif /* KASAN_TBI */
1003 }
1004#endif /* CONFIG_KERNEL_TAGGING */
1005 return kmr;
1006
1007out_error:
1008 if (flags & KMA_NOFAIL) {
1009 __kmem_failed_panic(map, size, flags, kr: kmr.kmr_return, what: "alloc");
1010 }
1011 if (guard_left) {
1012 guard_left->vmp_snext = wired_page_list;
1013 wired_page_list = guard_left;
1014 }
1015 if (guard_right) {
1016 guard_right->vmp_snext = wired_page_list;
1017 wired_page_list = guard_right;
1018 }
1019 if (wired_page_list) {
1020 vm_page_free_list(mem: wired_page_list, FALSE);
1021 }
1022
1023#if DEBUG || DEVELOPMENT
1024 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
1025 0, 0, 0, 0);
1026#endif /* DEBUG || DEVELOPMENT */
1027
1028 return kmr;
1029}
1030
1031kmem_return_t
1032kmem_alloc_guard(
1033 vm_map_t map,
1034 vm_size_t size,
1035 vm_offset_t mask,
1036 kma_flags_t flags,
1037 kmem_guard_t guard)
1038{
1039 return kmem_alloc_guard_internal(map, size, mask, flags, guard, NULL);
1040}
1041
1042kmem_return_t
1043kmem_alloc_contig_guard(
1044 vm_map_t map,
1045 vm_size_t size,
1046 vm_offset_t mask,
1047 ppnum_t max_pnum,
1048 ppnum_t pnum_mask,
1049 kma_flags_t flags,
1050 kmem_guard_t guard)
1051{
1052 __auto_type alloc_pages = ^(vm_size_t fill_size, kma_flags_t kma_flags, vm_page_t *pages) {
1053 return cpm_allocate(size: fill_size, list: pages, max_pnum, pnum_mask, FALSE, flags: kma_flags);
1054 };
1055
1056 return kmem_alloc_guard_internal(map, size, mask, flags, guard, alloc_pages);
1057}
1058
1059kmem_return_t
1060kmem_suballoc(
1061 vm_map_t parent,
1062 mach_vm_offset_t *addr,
1063 vm_size_t size,
1064 vm_map_create_options_t vmc_options,
1065 int vm_flags,
1066 kms_flags_t flags,
1067 vm_tag_t tag)
1068{
1069 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
1070 vm_map_offset_t map_addr = 0;
1071 kmem_return_t kmr = { };
1072 vm_map_t map;
1073
1074 assert(page_aligned(size));
1075 assert(parent->pmap == kernel_pmap);
1076
1077 vm_map_kernel_flags_set_vmflags(vmk_flags: &vmk_flags, vm_flags, vm_tag: tag);
1078
1079 if (parent == kernel_map) {
1080 assert(vmk_flags.vmf_overwrite || (flags & KMS_DATA));
1081 }
1082
1083 if (vmk_flags.vmf_fixed) {
1084 map_addr = trunc_page(*addr);
1085 }
1086
1087 pmap_reference(vm_map_pmap(parent));
1088 map = vm_map_create_options(vm_map_pmap(parent), min_off: 0, max_off: size, options: vmc_options);
1089
1090 /*
1091 * 1. vm_map_enter() will consume one ref on success.
1092 *
1093 * 2. make the entry atomic as kernel submaps should never be split.
1094 *
1095 * 3. instruct vm_map_enter() that it is a fresh submap
1096 * that needs to be taught its bounds as it inserted.
1097 */
1098 vm_map_reference(map);
1099
1100 vmk_flags.vmkf_submap = true;
1101 if ((flags & KMS_DATA) == 0) {
1102 /* FIXME: IOKit submaps get fragmented and can't be atomic */
1103 vmk_flags.vmkf_submap_atomic = true;
1104 }
1105 vmk_flags.vmkf_submap_adjust = true;
1106 if (flags & KMS_LAST_FREE) {
1107 vmk_flags.vmkf_last_free = true;
1108 }
1109 if (flags & KMS_PERMANENT) {
1110 vmk_flags.vmf_permanent = true;
1111 }
1112 if (flags & KMS_DATA) {
1113 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1114 }
1115
1116 kmr.kmr_return = vm_map_enter(map: parent, address: &map_addr, size, mask: 0,
1117 vmk_flags, object: (vm_object_t)map, offset: 0, FALSE,
1118 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
1119
1120 if (kmr.kmr_return != KERN_SUCCESS) {
1121 if (flags & KMS_NOFAIL) {
1122 panic("kmem_suballoc(map=%p, size=%zd) failed with %d",
1123 parent, size, kmr.kmr_return);
1124 }
1125 assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
1126 vm_map_deallocate(map);
1127 vm_map_deallocate(map); /* also removes ref to pmap */
1128 return kmr;
1129 }
1130
1131 /*
1132 * For kmem_suballocs that register a claim and are assigned a range, ensure
1133 * that the exact same range is returned.
1134 */
1135 if (*addr != 0 && parent == kernel_map &&
1136 startup_phase > STARTUP_SUB_KMEM) {
1137 assert(CAST_DOWN(vm_offset_t, map_addr) == *addr);
1138 } else {
1139 *addr = map_addr;
1140 }
1141
1142 kmr.kmr_submap = map;
1143 return kmr;
1144}
1145
1146/*
1147 * kmem_alloc:
1148 *
1149 * Allocate wired-down memory in the kernel's address map
1150 * or a submap. The memory is not zero-filled.
1151 */
1152
1153__exported kern_return_t
1154kmem_alloc_external(
1155 vm_map_t map,
1156 vm_offset_t *addrp,
1157 vm_size_t size);
1158kern_return_t
1159kmem_alloc_external(
1160 vm_map_t map,
1161 vm_offset_t *addrp,
1162 vm_size_t size)
1163{
1164 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1165 return kmem_alloc(map, addrp, size, flags: KMA_NONE, tag: vm_tag_bt());
1166 }
1167 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1168 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1169}
1170
1171
1172/*
1173 * kmem_alloc_kobject:
1174 *
1175 * Allocate wired-down memory in the kernel's address map
1176 * or a submap. The memory is not zero-filled.
1177 *
1178 * The memory is allocated in the kernel_object.
1179 * It may not be copied with vm_map_copy, and
1180 * it may not be reallocated with kmem_realloc.
1181 */
1182
1183__exported kern_return_t
1184kmem_alloc_kobject_external(
1185 vm_map_t map,
1186 vm_offset_t *addrp,
1187 vm_size_t size);
1188kern_return_t
1189kmem_alloc_kobject_external(
1190 vm_map_t map,
1191 vm_offset_t *addrp,
1192 vm_size_t size)
1193{
1194 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1195 return kmem_alloc(map, addrp, size, flags: KMA_KOBJECT, tag: vm_tag_bt());
1196 }
1197 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1198 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1199}
1200
1201/*
1202 * kmem_alloc_pageable:
1203 *
1204 * Allocate pageable memory in the kernel's address map.
1205 */
1206
1207__exported kern_return_t
1208kmem_alloc_pageable_external(
1209 vm_map_t map,
1210 vm_offset_t *addrp,
1211 vm_size_t size);
1212kern_return_t
1213kmem_alloc_pageable_external(
1214 vm_map_t map,
1215 vm_offset_t *addrp,
1216 vm_size_t size)
1217{
1218 if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
1219 return kmem_alloc(map, addrp, size, flags: KMA_PAGEABLE | KMA_DATA, tag: vm_tag_bt());
1220 }
1221 /* Maintain ABI compatibility: invalid sizes used to be allowed */
1222 return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
1223}
1224
1225
1226#pragma mark population
1227
1228static void
1229kernel_memory_populate_pmap_enter(
1230 vm_object_t object,
1231 vm_address_t addr,
1232 vm_object_offset_t offset,
1233 vm_page_t mem,
1234 vm_prot_t prot,
1235 int pe_flags,
1236 pmap_mapping_type_t mapping_type)
1237{
1238 kern_return_t pe_result;
1239 int pe_options;
1240
1241 if (VMP_ERROR_GET(mem)) {
1242 panic("VM page %p should not have an error", mem);
1243 }
1244
1245 pe_options = PMAP_OPTIONS_NOWAIT;
1246 if (object->internal) {
1247 pe_options |= PMAP_OPTIONS_INTERNAL;
1248 }
1249 if (mem->vmp_reusable || object->all_reusable) {
1250 pe_options |= PMAP_OPTIONS_REUSABLE;
1251 }
1252
1253 pe_result = pmap_enter_options(pmap: kernel_pmap, v: addr + offset,
1254 pn: VM_PAGE_GET_PHYS_PAGE(m: mem), prot, VM_PROT_NONE,
1255 flags: pe_flags, /* wired */ TRUE, options: pe_options, NULL, mapping_type);
1256
1257 if (pe_result == KERN_RESOURCE_SHORTAGE) {
1258 vm_object_unlock(object);
1259
1260 pe_options &= ~PMAP_OPTIONS_NOWAIT;
1261
1262 pe_result = pmap_enter_options(pmap: kernel_pmap, v: addr + offset,
1263 pn: VM_PAGE_GET_PHYS_PAGE(m: mem), prot, VM_PROT_NONE,
1264 flags: pe_flags, /* wired */ TRUE, options: pe_options, NULL, mapping_type);
1265
1266 vm_object_lock(object);
1267 }
1268
1269 assert(pe_result == KERN_SUCCESS);
1270}
1271
1272void
1273kernel_memory_populate_object_and_unlock(
1274 vm_object_t object, /* must be locked */
1275 vm_address_t addr,
1276 vm_offset_t offset,
1277 vm_size_t size,
1278 vm_page_t page_list,
1279 kma_flags_t flags,
1280 vm_tag_t tag,
1281 vm_prot_t prot,
1282 pmap_mapping_type_t mapping_type)
1283{
1284 vm_page_t mem;
1285 int pe_flags;
1286 bool gobbled_list = page_list && page_list->vmp_gobbled;
1287
1288 assert(((flags & KMA_KOBJECT) != 0) == (is_kernel_object(object) != 0));
1289 assert3u((bool)(flags & KMA_COMPRESSOR), ==, object == compressor_object);
1290 if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
1291 assert3u(offset, ==, addr);
1292 } else {
1293 /*
1294 * kernel_memory_populate_pmap_enter() might drop the object
1295 * lock, and the caller might not own a reference anymore
1296 * and rely on holding the vm object lock for liveness.
1297 */
1298 vm_object_reference_locked(object);
1299 }
1300
1301 if (flags & KMA_KSTACK) {
1302 pe_flags = VM_MEM_STACK;
1303 } else {
1304 pe_flags = 0;
1305 }
1306
1307
1308 for (vm_object_offset_t pg_offset = 0;
1309 pg_offset < size;
1310 pg_offset += PAGE_SIZE_64) {
1311 if (page_list == NULL) {
1312 panic("%s: page_list too short", __func__);
1313 }
1314
1315 mem = page_list;
1316 page_list = mem->vmp_snext;
1317 mem->vmp_snext = NULL;
1318
1319 assert(mem->vmp_wire_count == 0);
1320 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
1321 assert(!mem->vmp_fictitious && !mem->vmp_private);
1322
1323 if (flags & KMA_COMPRESSOR) {
1324 mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
1325 /*
1326 * Background processes doing I/O accounting can call
1327 * into NVME driver to do some work which results in
1328 * an allocation here and so we want to make sure
1329 * that the pages used by compressor, regardless of
1330 * process context, are never on the special Q.
1331 */
1332 mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
1333
1334 vm_page_insert(page: mem, object, offset: offset + pg_offset);
1335 } else {
1336 mem->vmp_q_state = VM_PAGE_IS_WIRED;
1337 mem->vmp_wire_count = 1;
1338
1339 vm_page_insert_wired(page: mem, object, offset: offset + pg_offset, tag);
1340 }
1341
1342 mem->vmp_gobbled = false;
1343 mem->vmp_busy = false;
1344 mem->vmp_pmapped = true;
1345 mem->vmp_wpmapped = true;
1346
1347 /*
1348 * Manual PMAP_ENTER_OPTIONS() with shortcuts
1349 * for the kernel and compressor objects.
1350 */
1351 kernel_memory_populate_pmap_enter(object, addr, offset: pg_offset,
1352 mem, prot, pe_flags, mapping_type);
1353
1354 if (flags & KMA_NOENCRYPT) {
1355 pmap_set_noencrypt(pn: VM_PAGE_GET_PHYS_PAGE(m: mem));
1356 }
1357 }
1358
1359 if (page_list) {
1360 panic("%s: page_list too long", __func__);
1361 }
1362
1363 vm_object_unlock(object);
1364 if ((flags & (KMA_KOBJECT | KMA_COMPRESSOR)) == 0) {
1365 vm_object_deallocate(object);
1366 }
1367
1368 /*
1369 * Update the accounting:
1370 * - the compressor "wired" pages don't really count as wired
1371 * - kmem_alloc_contig_guard() gives gobbled pages,
1372 * which already count as wired but need to be ungobbled.
1373 */
1374 if (gobbled_list) {
1375 vm_page_lockspin_queues();
1376 if (flags & KMA_COMPRESSOR) {
1377 vm_page_wire_count -= atop(size);
1378 }
1379 vm_page_gobble_count -= atop(size);
1380 vm_page_unlock_queues();
1381 } else if ((flags & KMA_COMPRESSOR) == 0) {
1382 vm_page_lockspin_queues();
1383 vm_page_wire_count += atop(size);
1384 vm_page_unlock_queues();
1385 }
1386
1387 if (flags & KMA_KOBJECT) {
1388 /* vm_page_insert_wired() handles regular objects already */
1389 vm_tag_update_size(tag, size, NULL);
1390 }
1391
1392#if KASAN
1393 if (flags & KMA_COMPRESSOR) {
1394 kasan_notify_address_nopoison(addr, size);
1395 } else {
1396 kasan_notify_address(addr, size);
1397 }
1398#endif /* KASAN */
1399}
1400
1401
1402kern_return_t
1403kernel_memory_populate(
1404 vm_offset_t addr,
1405 vm_size_t size,
1406 kma_flags_t flags,
1407 vm_tag_t tag)
1408{
1409 kern_return_t kr = KERN_SUCCESS;
1410 vm_page_t page_list = NULL;
1411 vm_size_t page_count = atop_64(size);
1412 vm_object_t object = __kmem_object(flags: ANYF(flags));
1413
1414#if DEBUG || DEVELOPMENT
1415 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_START,
1416 size, 0, 0, 0);
1417#endif /* DEBUG || DEVELOPMENT */
1418
1419 kr = vm_page_alloc_list(page_count, flags, list: &page_list);
1420 if (kr == KERN_SUCCESS) {
1421 vm_object_lock(object);
1422 kernel_memory_populate_object_and_unlock(object, addr,
1423 offset: addr, size, page_list, flags, tag, VM_PROT_DEFAULT,
1424 mapping_type: __kmem_mapping_type(flags: ANYF(flags)));
1425 }
1426
1427#if DEBUG || DEVELOPMENT
1428 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
1429 page_count, 0, 0, 0);
1430#endif /* DEBUG || DEVELOPMENT */
1431 return kr;
1432}
1433
1434void
1435kernel_memory_depopulate(
1436 vm_offset_t addr,
1437 vm_size_t size,
1438 kma_flags_t flags,
1439 vm_tag_t tag)
1440{
1441 vm_object_t object = __kmem_object(flags: ANYF(flags));
1442 vm_object_offset_t offset = addr;
1443 vm_page_t mem;
1444 vm_page_t local_freeq = NULL;
1445 unsigned int pages_unwired = 0;
1446
1447 vm_object_lock(object);
1448
1449 pmap_protect(map: kernel_pmap, s: offset, e: offset + size, VM_PROT_NONE);
1450
1451 for (vm_object_offset_t pg_offset = 0;
1452 pg_offset < size;
1453 pg_offset += PAGE_SIZE_64) {
1454 mem = vm_page_lookup(object, offset: offset + pg_offset);
1455
1456 assert(mem);
1457
1458 if (flags & KMA_COMPRESSOR) {
1459 assert(mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
1460 } else {
1461 assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
1462 pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m: mem));
1463 pages_unwired++;
1464 }
1465
1466 mem->vmp_busy = TRUE;
1467
1468 assert(mem->vmp_tabled);
1469 vm_page_remove(page: mem, TRUE);
1470 assert(mem->vmp_busy);
1471
1472 assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
1473
1474 mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
1475 mem->vmp_snext = local_freeq;
1476 local_freeq = mem;
1477 }
1478
1479 vm_object_unlock(object);
1480
1481 vm_page_free_list(mem: local_freeq, TRUE);
1482
1483 if (!(flags & KMA_COMPRESSOR)) {
1484 vm_page_lockspin_queues();
1485 vm_page_wire_count -= pages_unwired;
1486 vm_page_unlock_queues();
1487 }
1488
1489 if (flags & KMA_KOBJECT) {
1490 /* vm_page_remove() handles regular objects already */
1491 vm_tag_update_size(tag, size: -ptoa_64(pages_unwired), NULL);
1492 }
1493}
1494
1495#pragma mark reallocation
1496
1497__abortlike
1498static void
1499__kmem_realloc_invalid_object_size_panic(
1500 vm_map_t map,
1501 vm_address_t address,
1502 vm_size_t size,
1503 vm_map_entry_t entry)
1504{
1505 vm_object_t object = VME_OBJECT(entry);
1506 vm_size_t objsize = __kmem_entry_orig_size(entry);
1507
1508 panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1509 "object %p has unexpected size %ld",
1510 map, (void *)address, (size_t)size, entry, object, objsize);
1511}
1512
1513__abortlike
1514static void
1515__kmem_realloc_invalid_pager_panic(
1516 vm_map_t map,
1517 vm_address_t address,
1518 vm_size_t size,
1519 vm_map_entry_t entry)
1520{
1521 vm_object_t object = VME_OBJECT(entry);
1522 memory_object_t pager = object->pager;
1523 bool pager_created = object->pager_created;
1524 bool pager_initialized = object->pager_initialized;
1525 bool pager_ready = object->pager_ready;
1526
1527 panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
1528 "object %p has unexpected pager %p (%d,%d,%d)",
1529 map, (void *)address, (size_t)size, entry, object,
1530 pager, pager_created, pager_initialized, pager_ready);
1531}
1532
1533static kmem_return_t
1534kmem_realloc_shrink_guard(
1535 vm_map_t map,
1536 vm_offset_t req_oldaddr,
1537 vm_size_t req_oldsize,
1538 vm_size_t req_newsize,
1539 kmr_flags_t flags,
1540 kmem_guard_t guard,
1541 vm_map_entry_t entry)
1542{
1543 vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
1544 vm_object_t object;
1545 vm_offset_t delta = 0;
1546 kmem_return_t kmr;
1547 bool was_atomic;
1548 vm_size_t oldsize = round_page(x: req_oldsize);
1549 vm_size_t newsize = round_page(x: req_newsize);
1550 vm_address_t oldaddr = req_oldaddr;
1551
1552#if KASAN_CLASSIC
1553 if (flags & KMR_KASAN_GUARD) {
1554 assert((flags & (KMR_GUARD_FIRST | KMR_GUARD_LAST)) == 0);
1555 flags |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1556 oldaddr -= PAGE_SIZE;
1557 delta = ptoa(2);
1558 oldsize += delta;
1559 newsize += delta;
1560 }
1561#endif /* KASAN_CLASSIC */
1562
1563 if (flags & KMR_TAG) {
1564 oldaddr = vm_memtag_canonicalize_address(req_oldaddr);
1565 }
1566
1567 vm_map_lock_assert_exclusive(map);
1568
1569 if ((flags & KMR_KOBJECT) == 0) {
1570 object = VME_OBJECT(entry);
1571 vm_object_reference(object);
1572 }
1573
1574 /*
1575 * Shrinking an atomic entry starts with splitting it,
1576 * and removing the second half.
1577 */
1578 was_atomic = entry->vme_atomic;
1579 entry->vme_atomic = false;
1580 vm_map_clip_end(map, entry, endaddr: entry->vme_start + newsize);
1581 entry->vme_atomic = was_atomic;
1582
1583#if KASAN
1584 if (entry->vme_kernel_object && was_atomic) {
1585 entry->vme_object_or_delta = (-req_newsize & PAGE_MASK) + delta;
1586 }
1587#if KASAN_CLASSIC
1588 if (flags & KMR_KASAN_GUARD) {
1589 kasan_poison_range(oldaddr + newsize, oldsize - newsize,
1590 ASAN_VALID);
1591 }
1592#endif
1593#if KASAN_TBI
1594 if (flags & KMR_TAG) {
1595 kasan_tbi_mark_free_space(req_oldaddr + newsize, oldsize - newsize);
1596 }
1597#endif /* KASAN_TBI */
1598#endif /* KASAN */
1599 (void)vm_map_remove_and_unlock(map,
1600 start: oldaddr + newsize, end: oldaddr + oldsize,
1601 flags: vmr_flags, KMEM_GUARD_NONE);
1602
1603
1604 /*
1605 * Lastly, if there are guard pages, deal with them.
1606 *
1607 * The kernel object just needs to depopulate,
1608 * regular objects require freeing the last page
1609 * and replacing it with a guard.
1610 */
1611 if (flags & KMR_KOBJECT) {
1612 if (flags & KMR_GUARD_LAST) {
1613 kernel_memory_depopulate(addr: oldaddr + newsize - PAGE_SIZE,
1614 PAGE_SIZE, flags: KMA_KOBJECT, tag: guard.kmg_tag);
1615 }
1616 } else {
1617 vm_page_t guard_right = VM_PAGE_NULL;
1618 vm_offset_t remove_start = newsize;
1619
1620 if (flags & KMR_GUARD_LAST) {
1621 if (!map->never_faults) {
1622 guard_right = vm_page_grab_guard(true);
1623 }
1624 remove_start -= PAGE_SIZE;
1625 }
1626
1627 vm_object_lock(object);
1628
1629 if (object->vo_size != oldsize) {
1630 __kmem_realloc_invalid_object_size_panic(map,
1631 address: req_oldaddr, size: req_oldsize + delta, entry);
1632 }
1633 vm_object_set_size(object, outer_size: newsize, inner_size: req_newsize);
1634
1635 vm_object_page_remove(object, start: remove_start, end: oldsize);
1636
1637 if (guard_right) {
1638 vm_page_insert(page: guard_right, object, offset: newsize - PAGE_SIZE);
1639 guard_right->vmp_busy = false;
1640 }
1641 vm_object_unlock(object);
1642 vm_object_deallocate(object);
1643 }
1644
1645 kmr.kmr_address = req_oldaddr;
1646 kmr.kmr_return = 0;
1647#if KASAN_CLASSIC
1648 if (flags & KMA_KASAN_GUARD) {
1649 kasan_alloc_large(kmr.kmr_address, req_newsize);
1650 }
1651#endif /* KASAN_CLASSIC */
1652#if KASAN_TBI
1653 if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
1654 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
1655 vm_memtag_set_tag(kmr.kmr_address, req_newsize);
1656 kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
1657 }
1658#endif /* KASAN_TBI */
1659
1660 return kmr;
1661}
1662
1663kmem_return_t
1664kmem_realloc_guard(
1665 vm_map_t map,
1666 vm_offset_t req_oldaddr,
1667 vm_size_t req_oldsize,
1668 vm_size_t req_newsize,
1669 kmr_flags_t flags,
1670 kmem_guard_t guard)
1671{
1672 vm_object_t object;
1673 vm_size_t oldsize;
1674 vm_size_t newsize;
1675 vm_offset_t delta = 0;
1676 vm_map_offset_t oldaddr;
1677 vm_map_offset_t newaddr;
1678 vm_object_offset_t newoffs;
1679 vm_map_entry_t oldentry;
1680 vm_map_entry_t newentry;
1681 vm_page_t page_list = NULL;
1682 bool needs_wakeup = false;
1683 kmem_return_t kmr = { };
1684 unsigned int last_timestamp;
1685 vm_map_kernel_flags_t vmk_flags = {
1686 .vmkf_last_free = (bool)(flags & KMR_LAST_FREE),
1687 };
1688
1689 assert(KMEM_REALLOC_FLAGS_VALID(flags));
1690 if (!guard.kmg_atomic && (flags & (KMR_DATA | KMR_KOBJECT)) != KMR_DATA) {
1691 __kmem_invalid_arguments_panic(what: "realloc", map, address: req_oldaddr,
1692 size: req_oldsize, flags);
1693 }
1694
1695 if (req_oldaddr == 0ul) {
1696 return kmem_alloc_guard(map, size: req_newsize, mask: 0, flags: (kma_flags_t)flags, guard);
1697 }
1698
1699 if (req_newsize == 0ul) {
1700 kmem_free_guard(map, addr: req_oldaddr, size: req_oldsize,
1701 flags: (kmf_flags_t)flags, guard);
1702 return kmr;
1703 }
1704
1705 if (req_newsize >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) {
1706 __kmem_invalid_size_panic(map, size: req_newsize, flags);
1707 }
1708 if (req_newsize < __kmem_guard_size(flags: ANYF(flags))) {
1709 __kmem_invalid_size_panic(map, size: req_newsize, flags);
1710 }
1711
1712 oldsize = round_page(x: req_oldsize);
1713 newsize = round_page(x: req_newsize);
1714 oldaddr = req_oldaddr;
1715#if KASAN_CLASSIC
1716 if (flags & KMR_KASAN_GUARD) {
1717 flags |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
1718 oldaddr -= PAGE_SIZE;
1719 delta = ptoa(2);
1720 oldsize += delta;
1721 newsize += delta;
1722 }
1723#endif /* KASAN_CLASSIC */
1724#if CONFIG_KERNEL_TAGGING
1725 if (flags & KMR_TAG) {
1726 vm_memtag_verify_tag(req_oldaddr);
1727 oldaddr = vm_memtag_canonicalize_address(req_oldaddr);
1728 }
1729#endif /* CONFIG_KERNEL_TAGGING */
1730
1731#if !KASAN
1732 /*
1733 * If not on a KASAN variant and no difference in requested size,
1734 * just return.
1735 *
1736 * Otherwise we want to validate the size and re-tag for KASAN_TBI.
1737 */
1738 if (oldsize == newsize) {
1739 kmr.kmr_address = req_oldaddr;
1740 return kmr;
1741 }
1742#endif /* !KASAN */
1743
1744 /*
1745 * If we're growing the allocation,
1746 * then reserve the pages we'll need,
1747 * and find a spot for its new place.
1748 */
1749 if (oldsize < newsize) {
1750#if DEBUG || DEVELOPMENT
1751 VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1752 VM_KERN_REQUEST, DBG_FUNC_START,
1753 newsize - oldsize, 0, 0, 0);
1754#endif /* DEBUG || DEVELOPMENT */
1755 kmr.kmr_return = vm_page_alloc_list(atop(newsize - oldsize),
1756 flags: (kma_flags_t)flags, list: &page_list);
1757 if (kmr.kmr_return == KERN_SUCCESS) {
1758 kmem_apply_security_policy(map, kma_flags: (kma_flags_t)flags, guard,
1759 map_size: newsize, mask: 0, vmk_flags: &vmk_flags, true);
1760 kmr.kmr_return = vm_map_find_space(map, hint_addr: 0, size: newsize, mask: 0,
1761 vmk_flags, o_entry: &newentry);
1762 }
1763 if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
1764 if (flags & KMR_REALLOCF) {
1765 kmem_free_guard(map, addr: req_oldaddr, size: req_oldsize,
1766 flags: KMF_NONE, guard);
1767 }
1768 if (page_list) {
1769 vm_page_free_list(mem: page_list, FALSE);
1770 }
1771#if DEBUG || DEVELOPMENT
1772 VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
1773 VM_KERN_REQUEST, DBG_FUNC_END,
1774 0, 0, 0, 0);
1775#endif /* DEBUG || DEVELOPMENT */
1776 return kmr;
1777 }
1778
1779 /* map is locked */
1780 } else {
1781 vm_map_lock(map);
1782 }
1783
1784
1785 /*
1786 * Locate the entry:
1787 * - wait for it to quiesce.
1788 * - validate its guard,
1789 * - learn its correct tag,
1790 */
1791again:
1792 if (!vm_map_lookup_entry(map, address: oldaddr, entry: &oldentry)) {
1793 __kmem_entry_not_found_panic(map, addr: req_oldaddr);
1794 }
1795 if ((flags & KMR_KOBJECT) && oldentry->in_transition) {
1796 oldentry->needs_wakeup = true;
1797 vm_map_entry_wait(map, THREAD_UNINT);
1798 goto again;
1799 }
1800 kmem_entry_validate_guard(map, entry: oldentry, addr: oldaddr, size: oldsize, guard);
1801 if (!__kmem_entry_validate_object(entry: oldentry, flags: ANYF(flags))) {
1802 __kmem_entry_validate_object_panic(map, entry: oldentry, flags: ANYF(flags));
1803 }
1804 /*
1805 * TODO: We should validate for non atomic entries that the range
1806 * we are acting on is what we expect here.
1807 */
1808#if KASAN
1809 if (__kmem_entry_orig_size(oldentry) != req_oldsize) {
1810 __kmem_realloc_invalid_object_size_panic(map,
1811 req_oldaddr, req_oldsize + delta, oldentry);
1812 }
1813
1814 if (oldsize == newsize) {
1815 kmr.kmr_address = req_oldaddr;
1816 if (oldentry->vme_kernel_object) {
1817 oldentry->vme_object_or_delta = delta +
1818 (-req_newsize & PAGE_MASK);
1819 } else {
1820 object = VME_OBJECT(oldentry);
1821 vm_object_lock(object);
1822 vm_object_set_size(object, newsize, req_newsize);
1823 vm_object_unlock(object);
1824 }
1825 vm_map_unlock(map);
1826
1827#if KASAN_CLASSIC
1828 if (flags & KMA_KASAN_GUARD) {
1829 kasan_alloc_large(kmr.kmr_address, req_newsize);
1830 }
1831#endif /* KASAN_CLASSIC */
1832#if KASAN_TBI
1833 if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
1834 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
1835 vm_memtag_set_tag(kmr.kmr_address, req_newsize);
1836 kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
1837 }
1838#endif /* KASAN_TBI */
1839 return kmr;
1840 }
1841#endif /* KASAN */
1842
1843 guard.kmg_tag = VME_ALIAS(oldentry);
1844
1845 if (newsize < oldsize) {
1846 return kmem_realloc_shrink_guard(map, req_oldaddr,
1847 req_oldsize, req_newsize, flags, guard, entry: oldentry);
1848 }
1849
1850
1851 /*
1852 * We are growing the entry
1853 *
1854 * For regular objects we use the object `vo_size` updates
1855 * as a guarantee that no 2 kmem_realloc() can happen
1856 * concurrently (by doing it before the map is unlocked.
1857 *
1858 * For the kernel object, prevent the entry from being
1859 * reallocated or changed by marking it "in_transition".
1860 */
1861
1862 object = VME_OBJECT(oldentry);
1863 vm_object_lock(object);
1864 vm_object_reference_locked(object);
1865
1866 newaddr = newentry->vme_start;
1867 newoffs = oldsize;
1868
1869 VME_OBJECT_SET(entry: newentry, object, atomic: guard.kmg_atomic, context: guard.kmg_context);
1870 VME_ALIAS_SET(entry: newentry, alias: guard.kmg_tag);
1871 if (flags & KMR_KOBJECT) {
1872 oldentry->in_transition = true;
1873 VME_OFFSET_SET(entry: newentry, offset: newaddr);
1874 newentry->wired_count = 1;
1875 vme_btref_consider_and_set(entry: newentry, fp: __builtin_frame_address(0));
1876 newoffs = newaddr + oldsize;
1877 } else {
1878 if (object->pager_created || object->pager) {
1879 /*
1880 * We can't "realloc/grow" the pager, so pageable
1881 * allocations should not go through this path.
1882 */
1883 __kmem_realloc_invalid_pager_panic(map,
1884 address: req_oldaddr, size: req_oldsize + delta, entry: oldentry);
1885 }
1886 if (object->vo_size != oldsize) {
1887 __kmem_realloc_invalid_object_size_panic(map,
1888 address: req_oldaddr, size: req_oldsize + delta, entry: oldentry);
1889 }
1890 vm_object_set_size(object, outer_size: newsize, inner_size: req_newsize);
1891 }
1892
1893 last_timestamp = map->timestamp;
1894 vm_map_unlock(map);
1895
1896
1897 /*
1898 * Now proceed with the population of pages.
1899 *
1900 * Kernel objects can use the kmem population helpers.
1901 *
1902 * Regular objects will insert pages manually,
1903 * then wire the memory into the new range.
1904 */
1905
1906 vm_size_t guard_right_size = __kmem_guard_right(flags: ANYF(flags));
1907
1908 if (flags & KMR_KOBJECT) {
1909 pmap_mapping_type_t mapping_type = __kmem_mapping_type(flags: ANYF(flags));
1910
1911 pmap_protect(map: kernel_pmap,
1912 s: oldaddr, e: oldaddr + oldsize - guard_right_size,
1913 VM_PROT_NONE);
1914
1915 for (vm_object_offset_t offset = 0;
1916 offset < oldsize - guard_right_size;
1917 offset += PAGE_SIZE_64) {
1918 vm_page_t mem;
1919
1920 mem = vm_page_lookup(object, offset: oldaddr + offset);
1921 if (mem == VM_PAGE_NULL) {
1922 continue;
1923 }
1924
1925 pmap_disconnect(phys: VM_PAGE_GET_PHYS_PAGE(m: mem));
1926
1927 mem->vmp_busy = true;
1928 vm_page_remove(page: mem, true);
1929 vm_page_insert_wired(page: mem, object, offset: newaddr + offset,
1930 tag: guard.kmg_tag);
1931 mem->vmp_busy = false;
1932
1933 kernel_memory_populate_pmap_enter(object, addr: newaddr,
1934 offset, mem, VM_PROT_DEFAULT, pe_flags: 0, mapping_type);
1935 }
1936
1937 kernel_memory_populate_object_and_unlock(object,
1938 addr: newaddr + oldsize - guard_right_size,
1939 offset: newoffs - guard_right_size,
1940 size: newsize - oldsize,
1941 page_list, flags: (kma_flags_t)flags,
1942 tag: guard.kmg_tag, VM_PROT_DEFAULT, mapping_type);
1943 } else {
1944 vm_page_t guard_right = VM_PAGE_NULL;
1945
1946 /*
1947 * Note: we are borrowing the new entry reference
1948 * on the object for the duration of this code,
1949 * which works because we keep the object locked
1950 * throughout.
1951 */
1952 if ((flags & KMR_GUARD_LAST) && !map->never_faults) {
1953 guard_right = vm_page_lookup(object, offset: oldsize - PAGE_SIZE);
1954 assert(guard_right->vmp_fictitious);
1955 guard_right->vmp_busy = true;
1956 vm_page_remove(page: guard_right, true);
1957 }
1958
1959 if (flags & KMR_FREEOLD) {
1960 /*
1961 * Freeing the old mapping will make
1962 * the old pages become pageable until
1963 * the new mapping makes them wired again.
1964 * Let's take an extra "wire_count" to
1965 * prevent any accidental "page out".
1966 * We'll have to undo that after wiring
1967 * the new mapping.
1968 */
1969 vm_object_reference_locked(object); /* keep object alive */
1970 for (vm_object_offset_t offset = 0;
1971 offset < oldsize - guard_right_size;
1972 offset += PAGE_SIZE_64) {
1973 vm_page_t mem;
1974
1975 mem = vm_page_lookup(object, offset);
1976 assert(mem != VM_PAGE_NULL);
1977 assertf(!VM_PAGE_PAGEABLE(mem),
1978 "mem %p qstate %d",
1979 mem, mem->vmp_q_state);
1980 if (VM_PAGE_GET_PHYS_PAGE(m: mem) == vm_page_guard_addr) {
1981 /* guard pages are not wired */
1982 } else {
1983 assertf(VM_PAGE_WIRED(mem),
1984 "mem %p qstate %d wirecount %d",
1985 mem,
1986 mem->vmp_q_state,
1987 mem->vmp_wire_count);
1988 assertf(mem->vmp_wire_count >= 1,
1989 "mem %p wirecount %d",
1990 mem, mem->vmp_wire_count);
1991 mem->vmp_wire_count++;
1992 }
1993 }
1994 }
1995
1996 for (vm_object_offset_t offset = oldsize - guard_right_size;
1997 offset < newsize - guard_right_size;
1998 offset += PAGE_SIZE_64) {
1999 vm_page_t mem = page_list;
2000
2001 page_list = mem->vmp_snext;
2002 mem->vmp_snext = VM_PAGE_NULL;
2003 assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
2004 assert(!VM_PAGE_PAGEABLE(mem));
2005
2006 vm_page_insert(page: mem, object, offset);
2007 mem->vmp_busy = false;
2008 }
2009
2010 if (guard_right) {
2011 vm_page_insert(page: guard_right, object, offset: newsize - PAGE_SIZE);
2012 guard_right->vmp_busy = false;
2013 }
2014
2015 vm_object_unlock(object);
2016 }
2017
2018 /*
2019 * Mark the entry as idle again,
2020 * and honor KMR_FREEOLD if needed.
2021 */
2022
2023 vm_map_lock(map);
2024 if (last_timestamp + 1 != map->timestamp &&
2025 !vm_map_lookup_entry(map, address: oldaddr, entry: &oldentry)) {
2026 __kmem_entry_not_found_panic(map, addr: req_oldaddr);
2027 }
2028
2029 if (flags & KMR_KOBJECT) {
2030 assert(oldentry->in_transition);
2031 oldentry->in_transition = false;
2032 if (oldentry->needs_wakeup) {
2033 needs_wakeup = true;
2034 oldentry->needs_wakeup = false;
2035 }
2036 }
2037
2038 if (flags & KMR_FREEOLD) {
2039 vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2040
2041#if KASAN_CLASSIC
2042 if (flags & KMR_KASAN_GUARD) {
2043 kasan_poison_range(oldaddr, oldsize, ASAN_VALID);
2044 }
2045#endif
2046#if KASAN_TBI
2047 if (flags & KMR_TAG) {
2048 kasan_tbi_mark_free_space(req_oldaddr, oldsize);
2049 }
2050#endif /* KASAN_TBI */
2051 if (flags & KMR_GUARD_LAST) {
2052 vmr_flags |= VM_MAP_REMOVE_NOKUNWIRE_LAST;
2053 }
2054 (void)vm_map_remove_and_unlock(map,
2055 start: oldaddr, end: oldaddr + oldsize,
2056 flags: vmr_flags, guard);
2057 } else {
2058 vm_map_unlock(map);
2059 }
2060
2061 if ((flags & KMR_KOBJECT) == 0) {
2062 kern_return_t kr;
2063 /*
2064 * This must happen _after_ we do the KMR_FREEOLD,
2065 * because wiring the pages will call into the pmap,
2066 * and if the pages are typed XNU_KERNEL_RESTRICTED,
2067 * this would cause a second mapping of the page and panic.
2068 */
2069 kr = vm_map_wire_kernel(map, start: newaddr, end: newaddr + newsize,
2070 VM_PROT_DEFAULT, tag: guard.kmg_tag, FALSE);
2071 assert(kr == KERN_SUCCESS);
2072
2073 if (flags & KMR_FREEOLD) {
2074 /*
2075 * Undo the extra "wiring" we made above
2076 * and release the extra reference we took
2077 * on the object.
2078 */
2079 vm_object_lock(object);
2080 for (vm_object_offset_t offset = 0;
2081 offset < oldsize - guard_right_size;
2082 offset += PAGE_SIZE_64) {
2083 vm_page_t mem;
2084
2085 mem = vm_page_lookup(object, offset);
2086 assert(mem != VM_PAGE_NULL);
2087 assertf(!VM_PAGE_PAGEABLE(mem),
2088 "mem %p qstate %d",
2089 mem, mem->vmp_q_state);
2090 if (VM_PAGE_GET_PHYS_PAGE(m: mem) == vm_page_guard_addr) {
2091 /* guard pages are not wired */
2092 } else {
2093 assertf(VM_PAGE_WIRED(mem),
2094 "mem %p qstate %d wirecount %d",
2095 mem,
2096 mem->vmp_q_state,
2097 mem->vmp_wire_count);
2098 assertf(mem->vmp_wire_count >= 2,
2099 "mem %p wirecount %d",
2100 mem, mem->vmp_wire_count);
2101 mem->vmp_wire_count--;
2102 assert(VM_PAGE_WIRED(mem));
2103 assert(mem->vmp_wire_count >= 1);
2104 }
2105 }
2106 vm_object_unlock(object);
2107 vm_object_deallocate(object); /* release extra ref */
2108 }
2109 }
2110
2111 if (needs_wakeup) {
2112 vm_map_entry_wakeup(map);
2113 }
2114
2115#if DEBUG || DEVELOPMENT
2116 VM_DEBUG_CONSTANT_EVENT(vm_kern_request, VM_KERN_REQUEST, DBG_FUNC_END,
2117 atop(newsize - oldsize), 0, 0, 0);
2118#endif /* DEBUG || DEVELOPMENT */
2119 kmr.kmr_address = newaddr;
2120
2121#if KASAN
2122 kasan_notify_address(kmr.kmr_address, newsize);
2123#endif /* KASAN */
2124#if KASAN_CLASSIC
2125 if (flags & KMR_KASAN_GUARD) {
2126 kmr.kmr_address += PAGE_SIZE;
2127 kasan_alloc_large(kmr.kmr_address, req_newsize);
2128 }
2129#endif /* KASAN_CLASSIC */
2130#if KASAN_TBI
2131 if (flags & KMR_TAG) {
2132 kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
2133 vm_memtag_set_tag(kmr.kmr_address, req_newsize);
2134 kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
2135 }
2136#endif /* KASAN_TBI */
2137
2138 return kmr;
2139}
2140
2141
2142#pragma mark free
2143
2144#if KASAN
2145
2146__abortlike
2147static void
2148__kmem_free_invalid_object_size_panic(
2149 vm_map_t map,
2150 vm_address_t address,
2151 vm_size_t size,
2152 vm_map_entry_t entry)
2153{
2154 vm_object_t object = VME_OBJECT(entry);
2155 vm_size_t objsize = __kmem_entry_orig_size(entry);
2156
2157 panic("kmem_free(map=%p, addr=%p, size=%zd, entry=%p): "
2158 "object %p has unexpected size %ld",
2159 map, (void *)address, (size_t)size, entry, object, objsize);
2160}
2161
2162#endif /* KASAN */
2163
2164vm_size_t
2165kmem_free_guard(
2166 vm_map_t map,
2167 vm_offset_t req_addr,
2168 vm_size_t req_size,
2169 kmf_flags_t flags,
2170 kmem_guard_t guard)
2171{
2172 vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
2173 vm_address_t addr = req_addr;
2174 vm_offset_t delta = 0;
2175 vm_size_t size;
2176#if KASAN
2177 vm_map_entry_t entry;
2178#endif /* KASAN */
2179
2180 assert(map->pmap == kernel_pmap);
2181
2182#if KASAN_CLASSIC
2183 if (flags & KMF_KASAN_GUARD) {
2184 addr -= PAGE_SIZE;
2185 delta = ptoa(2);
2186 }
2187#endif /* KASAN_CLASSIC */
2188#if CONFIG_KERNEL_TAGGING
2189 if (flags & KMF_TAG) {
2190 vm_memtag_verify_tag(req_addr);
2191 addr = vm_memtag_canonicalize_address(req_addr);
2192 }
2193#endif /* CONFIG_KERNEL_TAGGING */
2194
2195 if (flags & KMF_GUESS_SIZE) {
2196 vmr_flags |= VM_MAP_REMOVE_GUESS_SIZE;
2197 size = PAGE_SIZE;
2198 } else if (req_size == 0) {
2199 __kmem_invalid_size_panic(map, size: req_size, flags);
2200 } else {
2201 size = round_page(x: req_size) + delta;
2202 }
2203
2204 vm_map_lock(map);
2205
2206#if KASAN
2207 if (!vm_map_lookup_entry(map, addr, &entry)) {
2208 __kmem_entry_not_found_panic(map, req_addr);
2209 }
2210 if (flags & KMF_GUESS_SIZE) {
2211 vmr_flags &= ~VM_MAP_REMOVE_GUESS_SIZE;
2212 req_size = __kmem_entry_orig_size(entry);
2213 size = round_page(req_size + delta);
2214 } else if (guard.kmg_atomic && entry->vme_kernel_object &&
2215 __kmem_entry_orig_size(entry) != req_size) {
2216 /*
2217 * We can't make a strict check for regular
2218 * VM objects because it could be:
2219 *
2220 * - the kmem_guard_free() of a kmem_realloc_guard() without
2221 * KMR_FREEOLD, and in that case the object size won't match.
2222 *
2223 * - a submap, in which case there is no "orig size".
2224 */
2225 __kmem_free_invalid_object_size_panic(map,
2226 req_addr, req_size + delta, entry);
2227 }
2228#endif /* KASAN */
2229#if KASAN_CLASSIC
2230 if (flags & KMR_KASAN_GUARD) {
2231 kasan_poison_range(addr, size, ASAN_VALID);
2232 }
2233#endif
2234#if KASAN_TBI
2235 if (flags & KMF_TAG) {
2236 kasan_tbi_mark_free_space(req_addr, size);
2237 }
2238#endif /* KASAN_TBI */
2239
2240 /*
2241 * vm_map_remove_and_unlock is called with VM_MAP_REMOVE_KUNWIRE, which
2242 * unwires the kernel mapping. The page won't be mapped any longer so
2243 * there is no extra step that is required for memory tagging to "clear"
2244 * it -- the page will be later laundered when reused.
2245 */
2246 return vm_map_remove_and_unlock(map, start: addr, end: addr + size,
2247 flags: vmr_flags, guard).kmr_size - delta;
2248}
2249
2250__exported void
2251kmem_free_external(
2252 vm_map_t map,
2253 vm_offset_t addr,
2254 vm_size_t size);
2255void
2256kmem_free_external(
2257 vm_map_t map,
2258 vm_offset_t addr,
2259 vm_size_t size)
2260{
2261 if (size) {
2262 kmem_free(map, trunc_page(addr), size);
2263#if MACH_ASSERT
2264 } else {
2265 printf("kmem_free(map=%p, addr=%p) called with size=0, lr: %p\n",
2266 map, (void *)addr, __builtin_return_address(0));
2267#endif
2268 }
2269}
2270
2271#pragma mark kmem metadata
2272
2273/*
2274 * Guard objects for kmem pointer allocation:
2275 *
2276 * Guard objects introduce size slabs to kmem pointer allocations that are
2277 * allocated in chunks of n * sizeclass. When an allocation of a specific
2278 * sizeclass is requested a random slot from [0, n) is returned.
2279 * Allocations are returned from that chunk until m slots are left. The
2280 * remaining m slots are referred to as guard objects. They don't get
2281 * allocated and the chunk is now considered full. When an allocation is
2282 * freed to the chunk 1 slot is now available from m + 1 for the next
2283 * allocation of that sizeclass.
2284 *
2285 * Guard objects are intended to make exploitation of use after frees harder
2286 * as allocations that are freed can no longer be reliable reallocated.
2287 * They also make exploitation of OOBs harder as overflowing out of an
2288 * allocation can no longer be safe even with sufficient spraying.
2289 */
2290
2291#define KMEM_META_PRIMARY UINT8_MAX
2292#define KMEM_META_START (UINT8_MAX - 1)
2293#define KMEM_META_FREE (UINT8_MAX - 2)
2294#if __ARM_16K_PG__
2295#define KMEM_MIN_SIZE PAGE_SIZE
2296#define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 16)
2297#else /* __ARM_16K_PG__ */
2298/*
2299 * PAGE_SIZE isn't a compile time constant on some arm64 devices. Those
2300 * devices use 4k page size when their RAM is <= 1GB and 16k otherwise.
2301 * Therefore populate sizeclasses from 4k for those devices.
2302 */
2303#define KMEM_MIN_SIZE (4 * 1024)
2304#define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 32)
2305#endif /* __ARM_16K_PG__ */
2306#define KMEM_MAX_SIZE (32ULL << 20)
2307#define KMEM_START_IDX (kmem_log2down(KMEM_MIN_SIZE))
2308#define KMEM_LAST_IDX (kmem_log2down(KMEM_MAX_SIZE))
2309#define KMEM_NUM_SIZECLASS (KMEM_LAST_IDX - KMEM_START_IDX + 1)
2310#define KMEM_FRONTS (KMEM_RANGE_ID_NUM_PTR * 2)
2311#define KMEM_NUM_GUARDS 2
2312
2313struct kmem_page_meta {
2314 union {
2315 /*
2316 * On primary allocated chunk with KMEM_META_PRIMARY marker
2317 */
2318 uint32_t km_bitmap;
2319 /*
2320 * On start and end of free chunk with KMEM_META_FREE marker
2321 */
2322 uint32_t km_free_chunks;
2323 };
2324 /*
2325 * KMEM_META_PRIMARY: Start meta of allocated chunk
2326 * KMEM_META_FREE : Start and end meta of free chunk
2327 * KMEM_META_START : Meta region start and end
2328 */
2329 uint8_t km_page_marker;
2330 uint8_t km_sizeclass;
2331 union {
2332 /*
2333 * On primary allocated chunk with KMEM_META_PRIMARY marker
2334 */
2335 uint16_t km_chunk_len;
2336 /*
2337 * On secondary allocated chunks
2338 */
2339 uint16_t km_page_idx;
2340 };
2341 LIST_ENTRY(kmem_page_meta) km_link;
2342} kmem_page_meta_t;
2343
2344typedef LIST_HEAD(kmem_list_head, kmem_page_meta) kmem_list_head_t;
2345struct kmem_sizeclass {
2346 vm_map_size_t ks_size;
2347 uint32_t ks_num_chunk;
2348 uint32_t ks_num_elem;
2349 crypto_random_ctx_t __zpercpu ks_rng_ctx;
2350 kmem_list_head_t ks_allfree_head[KMEM_FRONTS];
2351 kmem_list_head_t ks_partial_head[KMEM_FRONTS];
2352 kmem_list_head_t ks_full_head[KMEM_FRONTS];
2353};
2354
2355static struct kmem_sizeclass kmem_size_array[KMEM_NUM_SIZECLASS];
2356
2357/*
2358 * Locks to synchronize metadata population
2359 */
2360static LCK_GRP_DECLARE(kmem_locks_grp, "kmem_locks");
2361static LCK_MTX_DECLARE(kmem_meta_region_lck, &kmem_locks_grp);
2362#define kmem_meta_lock() lck_mtx_lock(&kmem_meta_region_lck)
2363#define kmem_meta_unlock() lck_mtx_unlock(&kmem_meta_region_lck)
2364
2365static SECURITY_READ_ONLY_LATE(struct mach_vm_range)
2366kmem_meta_range[KMEM_RANGE_ID_NUM_PTR + 1];
2367static SECURITY_READ_ONLY_LATE(struct kmem_page_meta *)
2368kmem_meta_base[KMEM_RANGE_ID_NUM_PTR + 1];
2369/*
2370 * Keeps track of metadata high water mark for each front
2371 */
2372static struct kmem_page_meta *kmem_meta_hwm[KMEM_FRONTS];
2373static SECURITY_READ_ONLY_LATE(vm_map_t)
2374kmem_meta_map[KMEM_RANGE_ID_NUM_PTR + 1];
2375static vm_map_size_t kmem_meta_size;
2376
2377static uint32_t
2378kmem_get_front(
2379 kmem_range_id_t range_id,
2380 bool from_right)
2381{
2382 assert((range_id >= KMEM_RANGE_ID_FIRST) &&
2383 (range_id <= KMEM_RANGE_ID_NUM_PTR));
2384 return (range_id - KMEM_RANGE_ID_FIRST) * 2 + from_right;
2385}
2386
2387static inline uint32_t
2388kmem_slot_idx_to_bit(
2389 uint32_t slot_idx,
2390 uint32_t size_idx __unused)
2391{
2392 assert(slot_idx < kmem_size_array[size_idx].ks_num_elem);
2393 return 1ull << slot_idx;
2394}
2395
2396static uint32_t
2397kmem_get_idx_from_size(vm_map_size_t size)
2398{
2399 assert(size >= KMEM_MIN_SIZE && size <= KMEM_MAX_SIZE);
2400 return kmem_log2down(size - 1) - KMEM_START_IDX + 1;
2401}
2402
2403__abortlike
2404static void
2405kmem_invalid_size_idx(uint32_t idx)
2406{
2407 panic("Invalid sizeclass idx %u", idx);
2408}
2409
2410static vm_map_size_t
2411kmem_get_size_from_idx(uint32_t idx)
2412{
2413 if (__improbable(idx >= KMEM_NUM_SIZECLASS)) {
2414 kmem_invalid_size_idx(idx);
2415 }
2416 return 1ul << (idx + KMEM_START_IDX);
2417}
2418
2419static inline uint16_t
2420kmem_get_page_idx(struct kmem_page_meta *meta)
2421{
2422 uint8_t page_marker = meta->km_page_marker;
2423
2424 return (page_marker == KMEM_META_PRIMARY) ? 0 : meta->km_page_idx;
2425}
2426
2427__abortlike
2428static void
2429kmem_invalid_chunk_len(struct kmem_page_meta *meta)
2430{
2431 panic("Reading free chunks for meta %p where marker != KMEM_META_PRIMARY",
2432 meta);
2433}
2434
2435static inline uint16_t
2436kmem_get_chunk_len(struct kmem_page_meta *meta)
2437{
2438 if (__improbable(meta->km_page_marker != KMEM_META_PRIMARY)) {
2439 kmem_invalid_chunk_len(meta);
2440 }
2441
2442 return meta->km_chunk_len;
2443}
2444
2445__abortlike
2446static void
2447kmem_invalid_free_chunk_len(struct kmem_page_meta *meta)
2448{
2449 panic("Reading free chunks for meta %p where marker != KMEM_META_FREE",
2450 meta);
2451}
2452
2453static inline uint32_t
2454kmem_get_free_chunk_len(struct kmem_page_meta *meta)
2455{
2456 if (__improbable(meta->km_page_marker != KMEM_META_FREE)) {
2457 kmem_invalid_free_chunk_len(meta);
2458 }
2459
2460 return meta->km_free_chunks;
2461}
2462
2463/*
2464 * Return the metadata corresponding to the specified address
2465 */
2466static struct kmem_page_meta *
2467kmem_addr_to_meta(
2468 vm_map_offset_t addr,
2469 vm_map_range_id_t range_id,
2470 vm_map_offset_t *range_start,
2471 uint64_t *meta_idx)
2472{
2473 struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
2474
2475 *range_start = kmem_ranges[range_id].min_address;
2476 *meta_idx = (addr - *range_start) / KMEM_CHUNK_SIZE_MIN;
2477 return &meta_base[*meta_idx];
2478}
2479
2480/*
2481 * Return the metadata start of the chunk that the address belongs to
2482 */
2483static struct kmem_page_meta *
2484kmem_addr_to_meta_start(
2485 vm_address_t addr,
2486 vm_map_range_id_t range_id,
2487 vm_map_offset_t *chunk_start)
2488{
2489 vm_map_offset_t range_start;
2490 uint64_t meta_idx;
2491 struct kmem_page_meta *meta;
2492
2493 meta = kmem_addr_to_meta(addr, range_id, range_start: &range_start, meta_idx: &meta_idx);
2494 meta_idx -= kmem_get_page_idx(meta);
2495 meta -= kmem_get_page_idx(meta);
2496 assert(meta->km_page_marker == KMEM_META_PRIMARY);
2497 *chunk_start = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN);
2498 return meta;
2499}
2500
2501__startup_func
2502static void
2503kmem_init_meta_front(
2504 struct kmem_page_meta *meta,
2505 kmem_range_id_t range_id,
2506 bool from_right)
2507{
2508 kernel_memory_populate(trunc_page((vm_map_offset_t) meta), PAGE_SIZE,
2509 flags: KMA_KOBJECT | KMA_ZERO | KMA_NOFAIL, VM_KERN_MEMORY_OSFMK);
2510 meta->km_page_marker = KMEM_META_START;
2511 if (!from_right) {
2512 meta++;
2513 kmem_meta_base[range_id] = meta;
2514 }
2515 kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta;
2516}
2517
2518__startup_func
2519static void
2520kmem_metadata_init(void)
2521{
2522 for (kmem_range_id_t i = KMEM_RANGE_ID_FIRST; i <= kmem_ptr_ranges; i++) {
2523 vm_map_offset_t addr = kmem_meta_range[i].min_address;
2524 struct kmem_page_meta *meta;
2525 uint64_t meta_idx;
2526
2527 vm_map_will_allocate_early_map(map_owner: &kmem_meta_map[i]);
2528 kmem_meta_map[i] = kmem_suballoc(parent: kernel_map, addr: &addr, size: kmem_meta_size,
2529 vmc_options: VM_MAP_CREATE_NEVER_FAULTS | VM_MAP_CREATE_DISABLE_HOLELIST,
2530 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, flags: KMS_PERMANENT | KMS_NOFAIL,
2531 VM_KERN_MEMORY_OSFMK).kmr_submap;
2532
2533 kmem_meta_range[i].min_address = addr;
2534 kmem_meta_range[i].max_address = addr + kmem_meta_size;
2535
2536 meta = (struct kmem_page_meta *) kmem_meta_range[i].min_address;
2537 kmem_init_meta_front(meta, range_id: i, from_right: 0);
2538
2539 meta = kmem_addr_to_meta(addr: kmem_ranges[i].max_address, range_id: i, range_start: &addr,
2540 meta_idx: &meta_idx);
2541 kmem_init_meta_front(meta, range_id: i, from_right: 1);
2542 }
2543}
2544
2545__startup_func
2546static void
2547kmem_init_front_head(
2548 struct kmem_sizeclass *ks,
2549 uint32_t front)
2550{
2551 LIST_INIT(&ks->ks_allfree_head[front]);
2552 LIST_INIT(&ks->ks_partial_head[front]);
2553 LIST_INIT(&ks->ks_full_head[front]);
2554}
2555
2556__startup_func
2557static void
2558kmem_sizeclass_init(void)
2559{
2560 for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2561 struct kmem_sizeclass *ks = &kmem_size_array[i];
2562 kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
2563
2564 ks->ks_size = kmem_get_size_from_idx(idx: i);
2565 ks->ks_num_chunk = roundup(8 * ks->ks_size, KMEM_CHUNK_SIZE_MIN) /
2566 KMEM_CHUNK_SIZE_MIN;
2567 ks->ks_num_elem = (ks->ks_num_chunk * KMEM_CHUNK_SIZE_MIN) / ks->ks_size;
2568 assert(ks->ks_num_elem <=
2569 (sizeof(((struct kmem_page_meta *)0)->km_bitmap) * 8));
2570 for (; range_id <= KMEM_RANGE_ID_NUM_PTR; range_id++) {
2571 kmem_init_front_head(ks, front: kmem_get_front(range_id, from_right: 0));
2572 kmem_init_front_head(ks, front: kmem_get_front(range_id, from_right: 1));
2573 }
2574 }
2575}
2576
2577/*
2578 * This is done during EARLY_BOOT as it needs the corecrypto module to be
2579 * set up.
2580 */
2581__startup_func
2582static void
2583kmem_crypto_init(void)
2584{
2585 vm_size_t ctx_size = crypto_random_kmem_ctx_size();
2586
2587 for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
2588 struct kmem_sizeclass *ks = &kmem_size_array[i];
2589
2590 ks->ks_rng_ctx = zalloc_percpu_permanent(size: ctx_size, ZALIGN_PTR);
2591 zpercpu_foreach(ctx, ks->ks_rng_ctx) {
2592 crypto_random_kmem_init(ctx);
2593 }
2594 }
2595}
2596STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, kmem_crypto_init);
2597
2598__abortlike
2599static void
2600kmem_validate_slot_panic(
2601 vm_map_offset_t addr,
2602 struct kmem_page_meta *meta,
2603 uint32_t slot_idx,
2604 uint32_t size_idx)
2605{
2606 if (meta->km_page_marker != KMEM_META_PRIMARY) {
2607 panic("Metadata (%p) for addr (%p) not primary", meta, (void *)addr);
2608 }
2609 if (meta->km_sizeclass != size_idx) {
2610 panic("Metadata's (%p) sizeclass (%u != %u) changed during deletion",
2611 meta, meta->km_sizeclass, size_idx);
2612 }
2613 panic("Double free detected: Slot (%u) in meta (%p) for addr %p marked free",
2614 slot_idx, meta, (void *)addr);
2615}
2616
2617__abortlike
2618static void
2619kmem_invalid_slot_for_addr(
2620 mach_vm_range_t slot,
2621 vm_map_offset_t start,
2622 vm_map_offset_t end)
2623{
2624 panic("Invalid kmem ptr slot [%p:%p] for allocation [%p:%p]",
2625 (void *)slot->min_address, (void *)slot->max_address,
2626 (void *)start, (void *)end);
2627}
2628
2629void
2630kmem_validate_slot(
2631 vm_map_offset_t addr,
2632 struct kmem_page_meta *meta,
2633 uint32_t size_idx,
2634 uint32_t slot_idx)
2635{
2636 if ((meta->km_page_marker != KMEM_META_PRIMARY) ||
2637 (meta->km_sizeclass != size_idx) ||
2638 ((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) != 0)) {
2639 kmem_validate_slot_panic(addr, meta, slot_idx: size_idx, size_idx: slot_idx);
2640 }
2641}
2642
2643static void
2644kmem_validate_slot_initial(
2645 mach_vm_range_t slot,
2646 vm_map_offset_t start,
2647 vm_map_offset_t end,
2648 struct kmem_page_meta *meta,
2649 uint32_t size_idx,
2650 uint32_t slot_idx)
2651{
2652 if ((slot->min_address == 0) || (slot->max_address == 0) ||
2653 (start < slot->min_address) || (start >= slot->max_address) ||
2654 (end > slot->max_address)) {
2655 kmem_invalid_slot_for_addr(slot, start, end);
2656 }
2657
2658 kmem_validate_slot(addr: start, meta, size_idx, slot_idx);
2659}
2660
2661uint32_t
2662kmem_addr_get_slot_idx(
2663 vm_map_offset_t start,
2664 vm_map_offset_t end,
2665 vm_map_range_id_t range_id,
2666 struct kmem_page_meta **meta,
2667 uint32_t *size_idx,
2668 mach_vm_range_t slot)
2669{
2670 vm_map_offset_t chunk_start;
2671 vm_map_size_t slot_size;
2672 uint32_t slot_idx;
2673
2674 *meta = kmem_addr_to_meta_start(addr: start, range_id, chunk_start: &chunk_start);
2675 *size_idx = (*meta)->km_sizeclass;
2676 slot_size = kmem_get_size_from_idx(idx: *size_idx);
2677 slot_idx = (start - chunk_start) / slot_size;
2678 slot->min_address = chunk_start + slot_idx * slot_size;
2679 slot->max_address = slot->min_address + slot_size;
2680
2681 kmem_validate_slot_initial(slot, start, end, meta: *meta, size_idx: *size_idx, slot_idx);
2682
2683 return slot_idx;
2684}
2685
2686static bool
2687kmem_populate_needed(vm_offset_t from, vm_offset_t to)
2688{
2689#if KASAN
2690#pragma unused(from, to)
2691 return true;
2692#else
2693 vm_offset_t page_addr = trunc_page(from);
2694
2695 for (; page_addr < to; page_addr += PAGE_SIZE) {
2696 /*
2697 * This can race with another thread doing a populate on the same metadata
2698 * page, where we see an updated pmap but unmapped KASan shadow, causing a
2699 * fault in the shadow when we first access the metadata page. Avoid this
2700 * by always synchronizing on the kmem_meta_lock with KASan.
2701 */
2702 if (!pmap_find_phys(map: kernel_pmap, va: page_addr)) {
2703 return true;
2704 }
2705 }
2706
2707 return false;
2708#endif /* !KASAN */
2709}
2710
2711static void
2712kmem_populate_meta_locked(vm_offset_t from, vm_offset_t to)
2713{
2714 vm_offset_t page_addr = trunc_page(from);
2715
2716 vm_map_unlock(kernel_map);
2717
2718 for (; page_addr < to; page_addr += PAGE_SIZE) {
2719 for (;;) {
2720 kern_return_t ret = KERN_SUCCESS;
2721
2722 /*
2723 * All updates to kmem metadata are done under the kmem_meta_lock
2724 */
2725 kmem_meta_lock();
2726 if (0 == pmap_find_phys(map: kernel_pmap, va: page_addr)) {
2727 ret = kernel_memory_populate(addr: page_addr,
2728 PAGE_SIZE, flags: KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_ZERO,
2729 VM_KERN_MEMORY_OSFMK);
2730 }
2731 kmem_meta_unlock();
2732
2733 if (ret == KERN_SUCCESS) {
2734 break;
2735 }
2736
2737 /*
2738 * We can't pass KMA_NOPAGEWAIT under a global lock as it leads
2739 * to bad system deadlocks, so if the allocation failed,
2740 * we need to do the VM_PAGE_WAIT() outside of the lock.
2741 */
2742 VM_PAGE_WAIT();
2743 }
2744 }
2745
2746 vm_map_lock(kernel_map);
2747}
2748
2749__abortlike
2750static void
2751kmem_invalid_meta_panic(
2752 struct kmem_page_meta *meta,
2753 uint32_t slot_idx,
2754 struct kmem_sizeclass sizeclass)
2755{
2756 uint32_t size_idx = kmem_get_idx_from_size(size: sizeclass.ks_size);
2757
2758 if (slot_idx >= sizeclass.ks_num_elem) {
2759 panic("Invalid slot idx %u [0:%u] for meta %p", slot_idx,
2760 sizeclass.ks_num_elem, meta);
2761 }
2762 if (meta->km_sizeclass != size_idx) {
2763 panic("Invalid size_idx (%u != %u) in meta %p", size_idx,
2764 meta->km_sizeclass, meta);
2765 }
2766 panic("page_marker %u not primary in meta %p", meta->km_page_marker, meta);
2767}
2768
2769__abortlike
2770static void
2771kmem_slot_has_entry_panic(
2772 vm_map_entry_t entry,
2773 vm_map_offset_t addr)
2774{
2775 panic("Entry (%p) already exists for addr (%p) being returned",
2776 entry, (void *)addr);
2777}
2778
2779__abortlike
2780static void
2781kmem_slot_not_found(
2782 struct kmem_page_meta *meta,
2783 uint32_t slot_idx)
2784{
2785 panic("%uth free slot not found for meta %p bitmap %u", slot_idx, meta,
2786 meta->km_bitmap);
2787}
2788
2789/*
2790 * Returns a 16bit random number between 0 and
2791 * upper_limit (inclusive)
2792 */
2793__startup_func
2794uint16_t
2795kmem_get_random16(
2796 uint16_t upper_limit)
2797{
2798 static uint64_t random_entropy;
2799 assert(upper_limit < UINT16_MAX);
2800 if (random_entropy == 0) {
2801 random_entropy = early_random();
2802 }
2803 uint32_t result = random_entropy & UINT32_MAX;
2804 random_entropy >>= 32;
2805 return (uint16_t)(result % (upper_limit + 1));
2806}
2807
2808static uint32_t
2809kmem_get_nth_free_slot(
2810 struct kmem_page_meta *meta,
2811 uint32_t n,
2812 uint32_t bitmap)
2813{
2814 uint32_t zeros_seen = 0, ones_seen = 0;
2815
2816 while (bitmap) {
2817 uint32_t count = __builtin_ctz(bitmap);
2818
2819 zeros_seen += count;
2820 bitmap >>= count;
2821 if (__probable(~bitmap)) {
2822 count = __builtin_ctz(~bitmap);
2823 } else {
2824 count = 32;
2825 }
2826 if (count + ones_seen > n) {
2827 return zeros_seen + n;
2828 }
2829 ones_seen += count;
2830 bitmap >>= count;
2831 }
2832
2833 kmem_slot_not_found(meta, slot_idx: n);
2834}
2835
2836
2837static uint32_t
2838kmem_get_next_slot(
2839 struct kmem_page_meta *meta,
2840 struct kmem_sizeclass sizeclass,
2841 uint32_t bitmap)
2842{
2843 uint32_t num_slots = __builtin_popcount(bitmap);
2844 uint64_t slot_idx = 0;
2845
2846 assert(num_slots > 0);
2847 if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
2848 /*
2849 * Use early random prior to early boot as the ks_rng_ctx requires
2850 * the corecrypto module to be setup before it is initialized and
2851 * used.
2852 *
2853 * num_slots can't be 0 as we take this path when we have more than
2854 * one slot left.
2855 */
2856 slot_idx = kmem_get_random16(upper_limit: (uint16_t)num_slots - 1);
2857 } else {
2858 crypto_random_uniform(zpercpu_get(sizeclass.ks_rng_ctx), bound: num_slots,
2859 random: &slot_idx);
2860 }
2861
2862 return kmem_get_nth_free_slot(meta, n: slot_idx, bitmap);
2863}
2864
2865/*
2866 * Returns an unallocated slot from the given metadata
2867 */
2868static vm_map_offset_t
2869kmem_get_addr_from_meta(
2870 struct kmem_page_meta *meta,
2871 vm_map_range_id_t range_id,
2872 struct kmem_sizeclass sizeclass,
2873 vm_map_entry_t *entry)
2874{
2875 vm_map_offset_t addr;
2876 vm_map_size_t size = sizeclass.ks_size;
2877 uint32_t size_idx = kmem_get_idx_from_size(size);
2878 uint64_t meta_idx = meta - kmem_meta_base[range_id];
2879 mach_vm_offset_t range_start = kmem_ranges[range_id].min_address;
2880 uint32_t slot_bit;
2881 uint32_t slot_idx = kmem_get_next_slot(meta, sizeclass, bitmap: meta->km_bitmap);
2882
2883 if ((slot_idx >= sizeclass.ks_num_elem) ||
2884 (meta->km_sizeclass != size_idx) ||
2885 (meta->km_page_marker != KMEM_META_PRIMARY)) {
2886 kmem_invalid_meta_panic(meta, slot_idx, sizeclass);
2887 }
2888
2889 slot_bit = kmem_slot_idx_to_bit(slot_idx, size_idx);
2890 meta->km_bitmap &= ~slot_bit;
2891
2892 addr = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN) + (slot_idx * size);
2893 assert(kmem_range_contains_fully(range_id, addr, size));
2894 if (vm_map_lookup_entry(map: kernel_map, address: addr, entry)) {
2895 kmem_slot_has_entry_panic(entry: *entry, addr);
2896 }
2897 if ((*entry != vm_map_to_entry(kernel_map)) &&
2898 ((*entry)->vme_next != vm_map_to_entry(kernel_map)) &&
2899 ((*entry)->vme_next->vme_start < (addr + size))) {
2900 kmem_slot_has_entry_panic(entry: *entry, addr);
2901 }
2902 return addr;
2903}
2904
2905__abortlike
2906static void
2907kmem_range_out_of_va(
2908 kmem_range_id_t range_id,
2909 uint32_t num_chunks)
2910{
2911 panic("No more VA to allocate %u chunks in range %u", num_chunks, range_id);
2912}
2913
2914static void
2915kmem_init_allocated_chunk(
2916 struct kmem_page_meta *meta,
2917 struct kmem_sizeclass sizeclass,
2918 uint32_t size_idx)
2919{
2920 uint32_t meta_num = sizeclass.ks_num_chunk;
2921 uint32_t num_elem = sizeclass.ks_num_elem;
2922
2923 meta->km_bitmap = (1ull << num_elem) - 1;
2924 meta->km_chunk_len = (uint16_t)meta_num;
2925 assert(LIST_NEXT(meta, km_link) == NULL);
2926 assert(meta->km_link.le_prev == NULL);
2927 meta->km_sizeclass = (uint8_t)size_idx;
2928 meta->km_page_marker = KMEM_META_PRIMARY;
2929 meta++;
2930 for (uint32_t i = 1; i < meta_num; i++) {
2931 meta->km_page_idx = (uint16_t)i;
2932 meta->km_sizeclass = (uint8_t)size_idx;
2933 meta->km_page_marker = 0;
2934 meta->km_bitmap = 0;
2935 meta++;
2936 }
2937}
2938
2939static uint32_t
2940kmem_get_additional_meta(
2941 struct kmem_page_meta *meta,
2942 uint32_t meta_req,
2943 bool from_right,
2944 struct kmem_page_meta **adj_free_meta)
2945{
2946 struct kmem_page_meta *meta_prev = from_right ? meta : (meta - 1);
2947
2948 if (meta_prev->km_page_marker == KMEM_META_FREE) {
2949 uint32_t chunk_len = kmem_get_free_chunk_len(meta: meta_prev);
2950
2951 *adj_free_meta = from_right ? meta_prev : (meta_prev - chunk_len + 1);
2952 meta_req -= chunk_len;
2953 } else {
2954 *adj_free_meta = NULL;
2955 }
2956
2957 return meta_req;
2958}
2959
2960
2961static struct kmem_page_meta *
2962kmem_get_new_chunk(
2963 vm_map_range_id_t range_id,
2964 bool from_right,
2965 uint32_t size_idx)
2966{
2967 struct kmem_sizeclass sizeclass = kmem_size_array[size_idx];
2968 struct kmem_page_meta *start, *end, *meta_update;
2969 struct kmem_page_meta *adj_free_meta = NULL;
2970 uint32_t meta_req = sizeclass.ks_num_chunk;
2971
2972 for (;;) {
2973 struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, from_right: 0)];
2974 struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, from_right: 1)];
2975 struct kmem_page_meta *meta;
2976 vm_offset_t start_addr, end_addr;
2977 uint32_t meta_num;
2978
2979 meta = from_right ? metab : metaf;
2980 meta_num = kmem_get_additional_meta(meta, meta_req, from_right,
2981 adj_free_meta: &adj_free_meta);
2982
2983 if (metaf + meta_num >= metab) {
2984 kmem_range_out_of_va(range_id, num_chunks: meta_num);
2985 }
2986
2987 start = from_right ? (metab - meta_num) : metaf;
2988 end = from_right ? metab : (metaf + meta_num);
2989
2990 start_addr = (vm_offset_t)start;
2991 end_addr = (vm_offset_t)end;
2992
2993 /*
2994 * If the new high watermark stays on the same page,
2995 * no need to populate and drop the lock.
2996 */
2997 if (!page_aligned(from_right ? end_addr : start_addr) &&
2998 trunc_page(start_addr) == trunc_page(end_addr - 1)) {
2999 break;
3000 }
3001 if (!kmem_populate_needed(from: start_addr, to: end_addr)) {
3002 break;
3003 }
3004
3005 kmem_populate_meta_locked(from: start_addr, to: end_addr);
3006
3007 /*
3008 * Since we dropped the lock, reassess conditions still hold:
3009 * - the HWM we are changing must not have moved
3010 * - the other HWM must not intersect with ours
3011 * - in case of coalescing, the adjacent free meta must still
3012 * be free and of the same size.
3013 *
3014 * If we failed to grow, reevaluate whether freelists have
3015 * entries now by returning NULL.
3016 */
3017 metaf = kmem_meta_hwm[kmem_get_front(range_id, from_right: 0)];
3018 metab = kmem_meta_hwm[kmem_get_front(range_id, from_right: 1)];
3019 if (meta != (from_right ? metab : metaf)) {
3020 return NULL;
3021 }
3022 if (metaf + meta_num >= metab) {
3023 kmem_range_out_of_va(range_id, num_chunks: meta_num);
3024 }
3025 if (adj_free_meta) {
3026 if (adj_free_meta->km_page_marker != KMEM_META_FREE ||
3027 kmem_get_free_chunk_len(meta: adj_free_meta) !=
3028 meta_req - meta_num) {
3029 return NULL;
3030 }
3031 }
3032
3033 break;
3034 }
3035
3036 /*
3037 * If there is an adjacent free chunk remove it from free list
3038 */
3039 if (adj_free_meta) {
3040 LIST_REMOVE(adj_free_meta, km_link);
3041 LIST_NEXT(adj_free_meta, km_link) = NULL;
3042 adj_free_meta->km_link.le_prev = NULL;
3043 }
3044
3045 /*
3046 * Update hwm
3047 */
3048 meta_update = from_right ? start : end;
3049 kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta_update;
3050
3051 /*
3052 * Initialize metadata
3053 */
3054 start = from_right ? start : (end - meta_req);
3055 kmem_init_allocated_chunk(meta: start, sizeclass, size_idx);
3056
3057 return start;
3058}
3059
3060static void
3061kmem_requeue_meta(
3062 struct kmem_page_meta *meta,
3063 struct kmem_list_head *head)
3064{
3065 LIST_REMOVE(meta, km_link);
3066 LIST_INSERT_HEAD(head, meta, km_link);
3067}
3068
3069/*
3070 * Return corresponding sizeclass to stash free chunks in
3071 */
3072__abortlike
3073static void
3074kmem_invalid_chunk_num(uint32_t chunks)
3075{
3076 panic("Invalid number of chunks %u\n", chunks);
3077}
3078
3079static uint32_t
3080kmem_get_size_idx_for_chunks(uint32_t chunks)
3081{
3082 for (uint32_t i = KMEM_NUM_SIZECLASS - 1; i > 0; i--) {
3083 if (chunks >= kmem_size_array[i].ks_num_chunk) {
3084 return i;
3085 }
3086 }
3087 kmem_invalid_chunk_num(chunks);
3088}
3089
3090static void
3091kmem_clear_meta_range(struct kmem_page_meta *meta, uint32_t count)
3092{
3093 bzero(s: meta, n: count * sizeof(struct kmem_page_meta));
3094}
3095
3096static void
3097kmem_check_meta_range_is_clear(struct kmem_page_meta *meta, uint32_t count)
3098{
3099#if MACH_ASSERT
3100 size_t size = count * sizeof(struct kmem_page_meta);
3101
3102 assert(memcmp_zero_ptr_aligned(meta, size) == 0);
3103#else
3104#pragma unused(meta, count)
3105#endif
3106}
3107
3108/*!
3109 * @function kmem_init_free_chunk()
3110 *
3111 * @discussion
3112 * This function prepares a range of chunks to be put on a free list.
3113 * The first and last metadata might be dirty, but the "inner" ones
3114 * must be zero filled by the caller prior to calling this function.
3115 */
3116static void
3117kmem_init_free_chunk(
3118 struct kmem_page_meta *meta,
3119 uint32_t num_chunks,
3120 uint32_t front)
3121{
3122 struct kmem_sizeclass *sizeclass;
3123 uint32_t size_idx = kmem_get_size_idx_for_chunks(chunks: num_chunks);
3124
3125 if (num_chunks > 2) {
3126 kmem_check_meta_range_is_clear(meta: meta + 1, count: num_chunks - 2);
3127 }
3128
3129 meta[0] = (struct kmem_page_meta){
3130 .km_free_chunks = num_chunks,
3131 .km_page_marker = KMEM_META_FREE,
3132 .km_sizeclass = (uint8_t)size_idx,
3133 };
3134 if (num_chunks > 1) {
3135 meta[num_chunks - 1] = (struct kmem_page_meta){
3136 .km_free_chunks = num_chunks,
3137 .km_page_marker = KMEM_META_FREE,
3138 .km_sizeclass = (uint8_t)size_idx,
3139 };
3140 }
3141
3142 sizeclass = &kmem_size_array[size_idx];
3143 LIST_INSERT_HEAD(&sizeclass->ks_allfree_head[front], meta, km_link);
3144}
3145
3146static struct kmem_page_meta *
3147kmem_get_free_chunk_from_list(
3148 struct kmem_sizeclass *org_sizeclass,
3149 uint32_t size_idx,
3150 uint32_t front)
3151{
3152 struct kmem_sizeclass *sizeclass;
3153 uint32_t num_chunks = org_sizeclass->ks_num_chunk;
3154 struct kmem_page_meta *meta;
3155 uint32_t idx = size_idx;
3156
3157 while (idx < KMEM_NUM_SIZECLASS) {
3158 sizeclass = &kmem_size_array[idx];
3159 meta = LIST_FIRST(&sizeclass->ks_allfree_head[front]);
3160 if (meta) {
3161 break;
3162 }
3163 idx++;
3164 }
3165
3166 /*
3167 * Trim if larger in size
3168 */
3169 if (meta) {
3170 uint32_t num_chunks_free = kmem_get_free_chunk_len(meta);
3171
3172 assert(meta->km_page_marker == KMEM_META_FREE);
3173 LIST_REMOVE(meta, km_link);
3174 LIST_NEXT(meta, km_link) = NULL;
3175 meta->km_link.le_prev = NULL;
3176 if (num_chunks_free > num_chunks) {
3177 num_chunks_free -= num_chunks;
3178 kmem_init_free_chunk(meta: meta + num_chunks, num_chunks: num_chunks_free, front);
3179 }
3180
3181 kmem_init_allocated_chunk(meta, sizeclass: *org_sizeclass, size_idx);
3182 }
3183
3184 return meta;
3185}
3186
3187kern_return_t
3188kmem_locate_space(
3189 vm_map_size_t size,
3190 vm_map_range_id_t range_id,
3191 bool from_right,
3192 vm_map_offset_t *start_inout,
3193 vm_map_entry_t *entry_out)
3194{
3195 vm_map_entry_t entry;
3196 uint32_t size_idx = kmem_get_idx_from_size(size);
3197 uint32_t front = kmem_get_front(range_id, from_right);
3198 struct kmem_sizeclass *sizeclass = &kmem_size_array[size_idx];
3199 struct kmem_page_meta *meta;
3200
3201 assert(size <= sizeclass->ks_size);
3202again:
3203 if ((meta = LIST_FIRST(&sizeclass->ks_partial_head[front])) != NULL) {
3204 *start_inout = kmem_get_addr_from_meta(meta, range_id, sizeclass: *sizeclass, entry: &entry);
3205 /*
3206 * Requeue to full if necessary
3207 */
3208 assert(meta->km_page_marker == KMEM_META_PRIMARY);
3209 if (__builtin_popcount(meta->km_bitmap) == KMEM_NUM_GUARDS) {
3210 kmem_requeue_meta(meta, head: &sizeclass->ks_full_head[front]);
3211 }
3212 } else if ((meta = kmem_get_free_chunk_from_list(org_sizeclass: sizeclass, size_idx,
3213 front)) != NULL) {
3214 *start_inout = kmem_get_addr_from_meta(meta, range_id, sizeclass: *sizeclass, entry: &entry);
3215 /*
3216 * Queue to partial
3217 */
3218 assert(meta->km_page_marker == KMEM_META_PRIMARY);
3219 assert(__builtin_popcount(meta->km_bitmap) > KMEM_NUM_GUARDS);
3220 LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3221 } else {
3222 meta = kmem_get_new_chunk(range_id, from_right, size_idx);
3223 if (meta == NULL) {
3224 goto again;
3225 }
3226 *start_inout = kmem_get_addr_from_meta(meta, range_id, sizeclass: *sizeclass, entry: &entry);
3227 assert(meta->km_page_marker == KMEM_META_PRIMARY);
3228 LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
3229 }
3230
3231 if (entry_out) {
3232 *entry_out = entry;
3233 }
3234
3235 return KERN_SUCCESS;
3236}
3237
3238/*
3239 * Determine whether the given metadata was allocated from the right
3240 */
3241static bool
3242kmem_meta_is_from_right(
3243 kmem_range_id_t range_id,
3244 struct kmem_page_meta *meta)
3245{
3246 struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, from_right: 0)];
3247#if DEBUG || DEVELOPMENT
3248 struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3249#endif
3250 struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
3251 struct kmem_page_meta *meta_end;
3252
3253 meta_end = (struct kmem_page_meta *)kmem_meta_range[range_id].max_address;
3254
3255 if ((meta >= meta_base) && (meta < metaf)) {
3256 return false;
3257 }
3258
3259 assert(meta >= metab && meta < meta_end);
3260 return true;
3261}
3262
3263static void
3264kmem_free_chunk(
3265 kmem_range_id_t range_id,
3266 struct kmem_page_meta *meta,
3267 bool from_right)
3268{
3269 struct kmem_page_meta *meta_coalesce = meta - 1;
3270 struct kmem_page_meta *meta_start = meta;
3271 uint32_t num_chunks = kmem_get_chunk_len(meta);
3272 uint32_t add_chunks;
3273 struct kmem_page_meta *meta_end = meta + num_chunks;
3274 struct kmem_page_meta *meta_hwm_l, *meta_hwm_r;
3275 uint32_t front = kmem_get_front(range_id, from_right);
3276
3277 meta_hwm_l = kmem_meta_hwm[kmem_get_front(range_id, from_right: 0)];
3278 meta_hwm_r = kmem_meta_hwm[kmem_get_front(range_id, from_right: 1)];
3279
3280 LIST_REMOVE(meta, km_link);
3281 kmem_clear_meta_range(meta, count: num_chunks);
3282
3283 /*
3284 * Coalesce left
3285 */
3286 if (((from_right && (meta_coalesce >= meta_hwm_r)) || !from_right) &&
3287 (meta_coalesce->km_page_marker == KMEM_META_FREE)) {
3288 meta_start = meta_coalesce - kmem_get_free_chunk_len(meta: meta_coalesce) + 1;
3289 add_chunks = kmem_get_free_chunk_len(meta: meta_start);
3290 num_chunks += add_chunks;
3291 LIST_REMOVE(meta_start, km_link);
3292 kmem_clear_meta_range(meta: meta_start + add_chunks - 1, count: 1);
3293 }
3294
3295 /*
3296 * Coalesce right
3297 */
3298 if (((!from_right && (meta_end < meta_hwm_l)) || from_right) &&
3299 (meta_end->km_page_marker == KMEM_META_FREE)) {
3300 add_chunks = kmem_get_free_chunk_len(meta: meta_end);
3301 LIST_REMOVE(meta_end, km_link);
3302 kmem_clear_meta_range(meta: meta_end, count: 1);
3303 meta_end = meta_end + add_chunks;
3304 num_chunks += add_chunks;
3305 }
3306
3307 kmem_init_free_chunk(meta: meta_start, num_chunks, front);
3308}
3309
3310static void
3311kmem_free_slot(
3312 kmem_range_id_t range_id,
3313 mach_vm_range_t slot)
3314{
3315 struct kmem_page_meta *meta;
3316 vm_map_offset_t chunk_start;
3317 uint32_t size_idx, chunk_elem, slot_idx, num_elem;
3318 struct kmem_sizeclass *sizeclass;
3319 vm_map_size_t slot_size;
3320
3321 meta = kmem_addr_to_meta_start(addr: slot->min_address, range_id, chunk_start: &chunk_start);
3322 size_idx = meta->km_sizeclass;
3323 slot_size = kmem_get_size_from_idx(idx: size_idx);
3324 slot_idx = (slot->min_address - chunk_start) / slot_size;
3325 assert((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) == 0);
3326 meta->km_bitmap |= kmem_slot_idx_to_bit(slot_idx, size_idx);
3327
3328 sizeclass = &kmem_size_array[size_idx];
3329 chunk_elem = sizeclass->ks_num_elem;
3330 num_elem = __builtin_popcount(meta->km_bitmap);
3331
3332 if (num_elem == chunk_elem) {
3333 /*
3334 * If entire chunk empty add to emtpy list
3335 */
3336 bool from_right = kmem_meta_is_from_right(range_id, meta);
3337
3338 kmem_free_chunk(range_id, meta, from_right);
3339 } else if (num_elem == KMEM_NUM_GUARDS + 1) {
3340 /*
3341 * If we freed to full chunk move it to partial
3342 */
3343 uint32_t front = kmem_get_front(range_id,
3344 from_right: kmem_meta_is_from_right(range_id, meta));
3345
3346 kmem_requeue_meta(meta, head: &sizeclass->ks_partial_head[front]);
3347 }
3348}
3349
3350void
3351kmem_free_space(
3352 vm_map_offset_t start,
3353 vm_map_offset_t end,
3354 vm_map_range_id_t range_id,
3355 mach_vm_range_t slot)
3356{
3357 bool entry_present = false;
3358 vm_map_entry_t prev_entry;
3359 vm_map_entry_t next_entry;
3360
3361 if ((slot->min_address == start) && (slot->max_address == end)) {
3362 /*
3363 * Entire slot is being freed at once
3364 */
3365 return kmem_free_slot(range_id, slot);
3366 }
3367
3368 entry_present = vm_map_lookup_entry(map: kernel_map, address: start, entry: &prev_entry);
3369 assert(!entry_present);
3370 next_entry = prev_entry->vme_next;
3371
3372 if (((prev_entry == vm_map_to_entry(kernel_map) ||
3373 prev_entry->vme_end <= slot->min_address)) &&
3374 (next_entry == vm_map_to_entry(kernel_map) ||
3375 (next_entry->vme_start >= slot->max_address))) {
3376 /*
3377 * Free entire slot
3378 */
3379 kmem_free_slot(range_id, slot);
3380 }
3381}
3382
3383#pragma mark kmem init
3384
3385/*
3386 * The default percentage of memory that can be mlocked is scaled based on the total
3387 * amount of memory in the system. These percentages are caclulated
3388 * offline and stored in this table. We index this table by
3389 * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
3390 * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
3391 *
3392 * Note that these values were picked for mac.
3393 * If we ever have very large memory config arm devices, we may want to revisit
3394 * since the kernel overhead is smaller there due to the larger page size.
3395 */
3396
3397/* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
3398#define VM_USER_WIREABLE_MIN_CONFIG 32
3399#if CONFIG_JETSAM
3400/* Systems with jetsam can wire a bit more b/c the system can relieve wired
3401 * pressure.
3402 */
3403static vm_map_size_t wire_limit_percents[] =
3404{ 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
3405#else
3406static vm_map_size_t wire_limit_percents[] =
3407{ 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
3408#endif /* CONFIG_JETSAM */
3409
3410/*
3411 * Sets the default global user wire limit which limits the amount of
3412 * memory that can be locked via mlock() based on the above algorithm..
3413 * This can be overridden via a sysctl.
3414 */
3415static void
3416kmem_set_user_wire_limits(void)
3417{
3418 uint64_t available_mem_log;
3419 uint64_t max_wire_percent;
3420 size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
3421 sizeof(vm_map_size_t);
3422 vm_map_size_t limit;
3423 uint64_t config_memsize = max_mem;
3424#if defined(XNU_TARGET_OS_OSX)
3425 config_memsize = max_mem_actual;
3426#endif /* defined(XNU_TARGET_OS_OSX) */
3427
3428 available_mem_log = bit_floor(n: config_memsize);
3429
3430 if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
3431 available_mem_log = 0;
3432 } else {
3433 available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
3434 }
3435 if (available_mem_log >= wire_limit_percents_length) {
3436 available_mem_log = wire_limit_percents_length - 1;
3437 }
3438 max_wire_percent = wire_limit_percents[available_mem_log];
3439
3440 limit = config_memsize * max_wire_percent / 100;
3441 /* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
3442 if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
3443 limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
3444 }
3445
3446 vm_global_user_wire_limit = limit;
3447 /* the default per task limit is the same as the global limit */
3448 vm_per_task_user_wire_limit = limit;
3449 vm_add_wire_count_over_global_limit = 0;
3450 vm_add_wire_count_over_user_limit = 0;
3451}
3452
3453#define KMEM_MAX_CLAIMS 50
3454__startup_data
3455struct kmem_range_startup_spec kmem_claims[KMEM_MAX_CLAIMS] = {};
3456__startup_data
3457uint32_t kmem_claim_count = 0;
3458
3459__startup_func
3460void
3461kmem_range_startup_init(
3462 struct kmem_range_startup_spec *sp)
3463{
3464 assert(kmem_claim_count < KMEM_MAX_CLAIMS - KMEM_RANGE_COUNT);
3465 if (sp->kc_calculate_sz) {
3466 sp->kc_size = (sp->kc_calculate_sz)();
3467 }
3468 if (sp->kc_size) {
3469 kmem_claims[kmem_claim_count] = *sp;
3470 kmem_claim_count++;
3471 }
3472}
3473
3474static vm_offset_t
3475kmem_fuzz_start(void)
3476{
3477 vm_offset_t kmapoff_kaddr = 0;
3478 uint32_t kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
3479 vm_map_size_t kmapoff_size = ptoa(kmapoff_pgcnt);
3480
3481 kmem_alloc(map: kernel_map, addrp: &kmapoff_kaddr, size: kmapoff_size,
3482 flags: KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY,
3483 VM_KERN_MEMORY_OSFMK);
3484 return kmapoff_kaddr + kmapoff_size;
3485}
3486
3487/*
3488 * Generate a randomly shuffled array of indices from 0 to count - 1
3489 */
3490__startup_func
3491void
3492kmem_shuffle(
3493 uint16_t *shuffle_buf,
3494 uint16_t count)
3495{
3496 for (uint16_t i = 0; i < count; i++) {
3497 uint16_t j = kmem_get_random16(upper_limit: i);
3498 if (j != i) {
3499 shuffle_buf[i] = shuffle_buf[j];
3500 }
3501 shuffle_buf[j] = i;
3502 }
3503}
3504
3505__startup_func
3506static void
3507kmem_shuffle_claims(void)
3508{
3509 uint16_t shuffle_buf[KMEM_MAX_CLAIMS] = {};
3510 uint16_t limit = (uint16_t)kmem_claim_count;
3511
3512 kmem_shuffle(shuffle_buf: &shuffle_buf[0], count: limit);
3513 for (uint16_t i = 0; i < limit; i++) {
3514 struct kmem_range_startup_spec tmp = kmem_claims[i];
3515 kmem_claims[i] = kmem_claims[shuffle_buf[i]];
3516 kmem_claims[shuffle_buf[i]] = tmp;
3517 }
3518}
3519
3520__startup_func
3521static void
3522kmem_readjust_ranges(
3523 uint32_t cur_idx)
3524{
3525 assert(cur_idx != 0);
3526 uint32_t j = cur_idx - 1, random;
3527 struct kmem_range_startup_spec sp = kmem_claims[cur_idx];
3528 struct mach_vm_range *sp_range = sp.kc_range;
3529
3530 /*
3531 * Find max index where restriction is met
3532 */
3533 for (; j > 0; j--) {
3534 struct kmem_range_startup_spec spj = kmem_claims[j];
3535 vm_map_offset_t max_start = spj.kc_range->min_address;
3536 if (spj.kc_flags & KC_NO_MOVE) {
3537 panic("kmem_range_init: Can't scramble with multiple constraints");
3538 }
3539 if (max_start <= sp_range->min_address) {
3540 break;
3541 }
3542 }
3543
3544 /*
3545 * Pick a random index from 0 to max index and shift claims to the right
3546 * to make room for restricted claim
3547 */
3548 random = kmem_get_random16(upper_limit: (uint16_t)j);
3549 assert(random <= j);
3550
3551 sp_range->min_address = kmem_claims[random].kc_range->min_address;
3552 sp_range->max_address = sp_range->min_address + sp.kc_size;
3553
3554 for (j = cur_idx - 1; j >= random && j != UINT32_MAX; j--) {
3555 struct kmem_range_startup_spec spj = kmem_claims[j];
3556 struct mach_vm_range *range = spj.kc_range;
3557 range->min_address += sp.kc_size;
3558 range->max_address += sp.kc_size;
3559 kmem_claims[j + 1] = spj;
3560 }
3561
3562 sp.kc_flags = KC_NO_MOVE;
3563 kmem_claims[random] = sp;
3564}
3565
3566__startup_func
3567static vm_map_size_t
3568kmem_add_ptr_claims(void)
3569{
3570 uint64_t kmem_meta_num, kmem_ptr_chunks;
3571 vm_map_size_t org_ptr_range_size = ptr_range_size;
3572
3573 ptr_range_size -= PAGE_SIZE;
3574 ptr_range_size *= KMEM_CHUNK_SIZE_MIN;
3575 ptr_range_size /= (KMEM_CHUNK_SIZE_MIN + sizeof(struct kmem_page_meta));
3576
3577 kmem_ptr_chunks = ptr_range_size / KMEM_CHUNK_SIZE_MIN;
3578 ptr_range_size = kmem_ptr_chunks * KMEM_CHUNK_SIZE_MIN;
3579
3580 kmem_meta_num = kmem_ptr_chunks + 2;
3581 kmem_meta_size = round_page(x: kmem_meta_num * sizeof(struct kmem_page_meta));
3582
3583 assert(kmem_meta_size + ptr_range_size <= org_ptr_range_size);
3584 /*
3585 * Add claims for kmem's ranges
3586 */
3587 for (uint32_t i = 0; i < kmem_ptr_ranges; i++) {
3588 struct kmem_range_startup_spec kmem_spec = {
3589 .kc_name = "kmem_ptr_range",
3590 .kc_range = &kmem_ranges[KMEM_RANGE_ID_PTR_0 + i],
3591 .kc_size = ptr_range_size,
3592 .kc_flags = KC_NO_ENTRY,
3593 };
3594 kmem_claims[kmem_claim_count++] = kmem_spec;
3595
3596 struct kmem_range_startup_spec kmem_meta_spec = {
3597 .kc_name = "kmem_ptr_range_meta",
3598 .kc_range = &kmem_meta_range[KMEM_RANGE_ID_PTR_0 + i],
3599 .kc_size = kmem_meta_size,
3600 .kc_flags = KC_NONE,
3601 };
3602 kmem_claims[kmem_claim_count++] = kmem_meta_spec;
3603 }
3604 return (org_ptr_range_size - ptr_range_size - kmem_meta_size) *
3605 kmem_ptr_ranges;
3606}
3607
3608__startup_func
3609static void
3610kmem_add_extra_claims(void)
3611{
3612 vm_map_size_t largest_free_size = 0, total_claims = 0;
3613
3614 vm_map_sizes(map: kernel_map, NULL, NULL, plargest_free: &largest_free_size);
3615 largest_free_size = trunc_page(largest_free_size);
3616
3617 /*
3618 * kasan and configs w/o *TRR need to have just one ptr range due to
3619 * resource constraints.
3620 */
3621#if !ZSECURITY_CONFIG(KERNEL_PTR_SPLIT)
3622 kmem_ptr_ranges = 1;
3623#endif
3624 /*
3625 * Determine size of data and pointer kmem_ranges
3626 */
3627 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3628 total_claims += kmem_claims[i].kc_size;
3629 }
3630 assert((total_claims & PAGE_MASK) == 0);
3631 largest_free_size -= total_claims;
3632
3633 /*
3634 * Use half the total available VA for all pointer allocations (this
3635 * includes the kmem_sprayqtn range). Given that we have 4 total
3636 * ranges divide the available VA by 8.
3637 */
3638 ptr_range_size = largest_free_size / ((kmem_ptr_ranges + 1) * 2);
3639 sprayqtn_range_size = ptr_range_size;
3640
3641 if (sprayqtn_range_size > (sane_size / 2)) {
3642 sprayqtn_range_size = sane_size / 2;
3643 }
3644
3645 ptr_range_size = round_page(x: ptr_range_size);
3646 sprayqtn_range_size = round_page(x: sprayqtn_range_size);
3647
3648
3649 data_range_size = largest_free_size
3650 - (ptr_range_size * kmem_ptr_ranges)
3651 - sprayqtn_range_size;
3652
3653 /*
3654 * Add claims for kmem's ranges
3655 */
3656 data_range_size += kmem_add_ptr_claims();
3657 assert(data_range_size + sprayqtn_range_size +
3658 ((ptr_range_size + kmem_meta_size) * kmem_ptr_ranges) <=
3659 largest_free_size);
3660
3661 struct kmem_range_startup_spec kmem_spec_sprayqtn = {
3662 .kc_name = "kmem_sprayqtn_range",
3663 .kc_range = &kmem_ranges[KMEM_RANGE_ID_SPRAYQTN],
3664 .kc_size = sprayqtn_range_size,
3665 .kc_flags = KC_NO_ENTRY,
3666 };
3667 kmem_claims[kmem_claim_count++] = kmem_spec_sprayqtn;
3668
3669 struct kmem_range_startup_spec kmem_spec_data = {
3670 .kc_name = "kmem_data_range",
3671 .kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA],
3672 .kc_size = data_range_size,
3673 .kc_flags = KC_NO_ENTRY,
3674 };
3675 kmem_claims[kmem_claim_count++] = kmem_spec_data;
3676}
3677
3678__startup_func
3679static void
3680kmem_scramble_ranges(void)
3681{
3682 vm_map_offset_t start = 0;
3683
3684 /*
3685 * Initiatize KMEM_RANGE_ID_NONE range to use the entire map so that
3686 * the vm can find the requested ranges.
3687 */
3688 kmem_ranges[KMEM_RANGE_ID_NONE].min_address = MAX(kernel_map->min_offset,
3689 VM_MAP_PAGE_SIZE(kernel_map));
3690 kmem_ranges[KMEM_RANGE_ID_NONE].max_address = kernel_map->max_offset;
3691
3692 /*
3693 * Allocating the g_kext_map prior to randomizing the remaining submaps as
3694 * this map is 2G in size and starts at the end of kernel_text on x86. It
3695 * could overflow into the heap.
3696 */
3697 kext_alloc_init();
3698
3699 /*
3700 * Eat a random amount of kernel_map to fuzz subsequent heap, zone and
3701 * stack addresses. (With a 4K page and 9 bits of randomness, this
3702 * eats about 2M of VA from the map)
3703 *
3704 * Note that we always need to slide by at least one page because the VM
3705 * pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base
3706 * do not admit this address to be part of any zone submap.
3707 */
3708 start = kmem_fuzz_start();
3709
3710 /*
3711 * Add claims for ptr and data kmem_ranges
3712 */
3713 kmem_add_extra_claims();
3714
3715 /*
3716 * Shuffle registered claims
3717 */
3718 assert(kmem_claim_count < UINT16_MAX);
3719 kmem_shuffle_claims();
3720
3721 /*
3722 * Apply restrictions and determine range for each claim
3723 */
3724 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3725 vm_map_offset_t end = 0;
3726 struct kmem_range_startup_spec sp = kmem_claims[i];
3727 struct mach_vm_range *sp_range = sp.kc_range;
3728 if (vm_map_locate_space(map: kernel_map, size: sp.kc_size, mask: 0,
3729 VM_MAP_KERNEL_FLAGS_ANYWHERE(), start_inout: &start, NULL) != KERN_SUCCESS) {
3730 panic("kmem_range_init: vm_map_locate_space failing for claim %s",
3731 sp.kc_name);
3732 }
3733
3734 end = start + sp.kc_size;
3735 /*
3736 * Re-adjust ranges if restriction not met
3737 */
3738 if (sp_range->min_address && start > sp_range->min_address) {
3739 kmem_readjust_ranges(cur_idx: i);
3740 } else {
3741 sp_range->min_address = start;
3742 sp_range->max_address = end;
3743 }
3744 start = end;
3745 }
3746
3747 /*
3748 * We have settled on the ranges, now create temporary entries for the
3749 * claims
3750 */
3751 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3752 struct kmem_range_startup_spec sp = kmem_claims[i];
3753 vm_map_entry_t entry = NULL;
3754 if (sp.kc_flags & KC_NO_ENTRY) {
3755 continue;
3756 }
3757 if (vm_map_find_space(map: kernel_map, hint_addr: sp.kc_range->min_address, size: sp.kc_size, mask: 0,
3758 VM_MAP_KERNEL_FLAGS_ANYWHERE(), o_entry: &entry) != KERN_SUCCESS) {
3759 panic("kmem_range_init: vm_map_find_space failing for claim %s",
3760 sp.kc_name);
3761 }
3762 vm_object_reference(kernel_object_default);
3763 VME_OBJECT_SET(entry, object: kernel_object_default, false, context: 0);
3764 VME_OFFSET_SET(entry, offset: entry->vme_start);
3765 vm_map_unlock(kernel_map);
3766 }
3767 /*
3768 * Now that we are done assigning all the ranges, reset
3769 * kmem_ranges[KMEM_RANGE_ID_NONE]
3770 */
3771 kmem_ranges[KMEM_RANGE_ID_NONE] = (struct mach_vm_range) {};
3772
3773#if DEBUG || DEVELOPMENT
3774 for (uint32_t i = 0; i < kmem_claim_count; i++) {
3775 struct kmem_range_startup_spec sp = kmem_claims[i];
3776
3777 printf("%-24s: %p - %p (%u%c)\n", sp.kc_name,
3778 (void *)sp.kc_range->min_address,
3779 (void *)sp.kc_range->max_address,
3780 mach_vm_size_pretty(sp.kc_size),
3781 mach_vm_size_unit(sp.kc_size));
3782 }
3783#endif /* DEBUG || DEVELOPMENT */
3784}
3785
3786__startup_func
3787static void
3788kmem_range_init(void)
3789{
3790 vm_size_t range_adjustment;
3791
3792 kmem_scramble_ranges();
3793
3794 range_adjustment = sprayqtn_range_size >> 3;
3795 kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address =
3796 kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address + range_adjustment;
3797 kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address =
3798 kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address;
3799
3800 range_adjustment = data_range_size >> 3;
3801 kmem_large_ranges[KMEM_RANGE_ID_DATA].min_address =
3802 kmem_ranges[KMEM_RANGE_ID_DATA].min_address + range_adjustment;
3803 kmem_large_ranges[KMEM_RANGE_ID_DATA].max_address =
3804 kmem_ranges[KMEM_RANGE_ID_DATA].max_address;
3805
3806 pmap_init();
3807 kmem_metadata_init();
3808 kmem_sizeclass_init();
3809
3810#if DEBUG || DEVELOPMENT
3811 for (kmem_range_id_t i = 1; i < KMEM_RANGE_COUNT; i++) {
3812 vm_size_t range_size = mach_vm_range_size(&kmem_large_ranges[i]);
3813 printf("kmem_large_ranges[%d] : %p - %p (%u%c)\n", i,
3814 (void *)kmem_large_ranges[i].min_address,
3815 (void *)kmem_large_ranges[i].max_address,
3816 mach_vm_size_pretty(range_size),
3817 mach_vm_size_unit(range_size));
3818 }
3819#endif
3820}
3821STARTUP(KMEM, STARTUP_RANK_THIRD, kmem_range_init);
3822
3823#if DEBUG || DEVELOPMENT
3824__startup_func
3825static void
3826kmem_log_init(void)
3827{
3828 /*
3829 * Log can only be created after the the kmem subsystem is initialized as
3830 * btlog creation uses kmem
3831 */
3832 kmem_outlier_log = btlog_create(BTLOG_LOG, KMEM_OUTLIER_LOG_SIZE, 0);
3833}
3834STARTUP(ZALLOC, STARTUP_RANK_FIRST, kmem_log_init);
3835
3836kmem_gobj_stats
3837kmem_get_gobj_stats(void)
3838{
3839 kmem_gobj_stats stats = {};
3840
3841 vm_map_lock(kernel_map);
3842 for (uint8_t i = 0; i < kmem_ptr_ranges; i++) {
3843 kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST + i;
3844 struct mach_vm_range range = kmem_ranges[range_id];
3845 struct kmem_page_meta *meta = kmem_meta_hwm[kmem_get_front(range_id, 0)];
3846 struct kmem_page_meta *meta_end;
3847 uint64_t meta_idx = meta - kmem_meta_base[range_id];
3848 vm_map_size_t used = 0, va = 0, meta_sz = 0, pte_sz = 0;
3849 vm_map_offset_t addr;
3850 vm_map_entry_t entry;
3851
3852 /*
3853 * Left front
3854 */
3855 va = (meta_idx * KMEM_CHUNK_SIZE_MIN);
3856 meta_sz = round_page(meta_idx * sizeof(struct kmem_page_meta));
3857
3858 /*
3859 * Right front
3860 */
3861 meta = kmem_meta_hwm[kmem_get_front(range_id, 1)];
3862 meta_end = kmem_addr_to_meta(range.max_address, range_id, &addr,
3863 &meta_idx);
3864 meta_idx = meta_end - meta;
3865 meta_sz += round_page(meta_idx * sizeof(struct kmem_page_meta));
3866 va += (meta_idx * KMEM_CHUNK_SIZE_MIN);
3867
3868 /*
3869 * Compute VA allocated in entire range
3870 */
3871 if (vm_map_lookup_entry(kernel_map, range.min_address, &entry) == false) {
3872 entry = entry->vme_next;
3873 }
3874 while (entry != vm_map_to_entry(kernel_map) &&
3875 entry->vme_start < range.max_address) {
3876 used += (entry->vme_end - entry->vme_start);
3877 entry = entry->vme_next;
3878 }
3879
3880 pte_sz = round_page(atop(va - used) * 8);
3881
3882 stats.total_used += used;
3883 stats.total_va += va;
3884 stats.pte_sz += pte_sz;
3885 stats.meta_sz += meta_sz;
3886 }
3887 vm_map_unlock(kernel_map);
3888
3889 return stats;
3890}
3891
3892#endif /* DEBUG || DEVELOPMENT */
3893
3894/*
3895 * kmem_init:
3896 *
3897 * Initialize the kernel's virtual memory map, taking
3898 * into account all memory allocated up to this time.
3899 */
3900__startup_func
3901void
3902kmem_init(
3903 vm_offset_t start,
3904 vm_offset_t end)
3905{
3906 vm_map_offset_t map_start;
3907 vm_map_offset_t map_end;
3908
3909 map_start = vm_map_trunc_page(start,
3910 VM_MAP_PAGE_MASK(kernel_map));
3911 map_end = vm_map_round_page(end,
3912 VM_MAP_PAGE_MASK(kernel_map));
3913
3914 vm_map_will_allocate_early_map(map_owner: &kernel_map);
3915#if defined(__arm64__)
3916 kernel_map = vm_map_create_options(pmap_kernel(),
3917 VM_MIN_KERNEL_AND_KEXT_ADDRESS,
3918 VM_MAX_KERNEL_ADDRESS,
3919 options: VM_MAP_CREATE_DEFAULT);
3920 /*
3921 * Reserve virtual memory allocated up to this time.
3922 */
3923 {
3924 unsigned int region_select = 0;
3925 vm_map_offset_t region_start;
3926 vm_map_size_t region_size;
3927 vm_map_offset_t map_addr;
3928 kern_return_t kr;
3929
3930 while (pmap_virtual_region(region_select, startp: &region_start, size: &region_size)) {
3931 map_addr = region_start;
3932 kr = vm_map_enter(map: kernel_map, address: &map_addr,
3933 vm_map_round_page(region_size,
3934 VM_MAP_PAGE_MASK(kernel_map)),
3935 mask: (vm_map_offset_t) 0,
3936 VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT(.vmkf_no_pmap_check = true),
3937 VM_OBJECT_NULL,
3938 offset: (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
3939 VM_INHERIT_DEFAULT);
3940
3941 if (kr != KERN_SUCCESS) {
3942 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
3943 (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
3944 (uint64_t) region_size, kr);
3945 }
3946
3947 region_select++;
3948 }
3949 }
3950#else
3951 kernel_map = vm_map_create_options(pmap_kernel(),
3952 VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
3953 VM_MAP_CREATE_DEFAULT);
3954 /*
3955 * Reserve virtual memory allocated up to this time.
3956 */
3957 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
3958 vm_map_offset_t map_addr;
3959 kern_return_t kr;
3960
3961 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
3962 kr = vm_map_enter(kernel_map,
3963 &map_addr,
3964 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
3965 (vm_map_offset_t) 0,
3966 VM_MAP_KERNEL_FLAGS_FIXED(.vmkf_no_pmap_check = true),
3967 VM_OBJECT_NULL,
3968 (vm_object_offset_t) 0, FALSE,
3969 VM_PROT_NONE, VM_PROT_NONE,
3970 VM_INHERIT_DEFAULT);
3971
3972 if (kr != KERN_SUCCESS) {
3973 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
3974 (uint64_t) start, (uint64_t) end,
3975 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
3976 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
3977 kr);
3978 }
3979 }
3980#endif
3981
3982 kmem_set_user_wire_limits();
3983}
3984
3985
3986#pragma mark map copyio
3987
3988/*
3989 * Routine: copyinmap
3990 * Purpose:
3991 * Like copyin, except that fromaddr is an address
3992 * in the specified VM map. This implementation
3993 * is incomplete; it handles the current user map
3994 * and the kernel map/submaps.
3995 */
3996kern_return_t
3997copyinmap(
3998 vm_map_t map,
3999 vm_map_offset_t fromaddr,
4000 void *todata,
4001 vm_size_t length)
4002{
4003 kern_return_t kr = KERN_SUCCESS;
4004 vm_map_t oldmap;
4005
4006 if (vm_map_pmap(map) == pmap_kernel()) {
4007 /* assume a correct copy */
4008 memcpy(dst: todata, CAST_DOWN(void *, fromaddr), n: length);
4009 } else if (current_map() == map) {
4010 if (copyin(fromaddr, todata, length) != 0) {
4011 kr = KERN_INVALID_ADDRESS;
4012 }
4013 } else {
4014 vm_map_reference(map);
4015 oldmap = vm_map_switch(map);
4016 if (copyin(fromaddr, todata, length) != 0) {
4017 kr = KERN_INVALID_ADDRESS;
4018 }
4019 vm_map_switch(map: oldmap);
4020 vm_map_deallocate(map);
4021 }
4022 return kr;
4023}
4024
4025/*
4026 * Routine: copyoutmap
4027 * Purpose:
4028 * Like copyout, except that toaddr is an address
4029 * in the specified VM map.
4030 */
4031kern_return_t
4032copyoutmap(
4033 vm_map_t map,
4034 void *fromdata,
4035 vm_map_address_t toaddr,
4036 vm_size_t length)
4037{
4038 kern_return_t kr = KERN_SUCCESS;
4039 vm_map_t oldmap;
4040
4041 if (vm_map_pmap(map) == pmap_kernel()) {
4042 /* assume a correct copy */
4043 memcpy(CAST_DOWN(void *, toaddr), src: fromdata, n: length);
4044 } else if (current_map() == map) {
4045 if (copyout(fromdata, toaddr, length) != 0) {
4046 ktriage_record(thread_id: thread_tid(thread: current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_COPYOUTMAP_SAMEMAP_ERROR), KERN_INVALID_ADDRESS /* arg */);
4047 kr = KERN_INVALID_ADDRESS;
4048 }
4049 } else {
4050 vm_map_reference(map);
4051 oldmap = vm_map_switch(map);
4052 if (copyout(fromdata, toaddr, length) != 0) {
4053 ktriage_record(thread_id: thread_tid(thread: current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_COPYOUTMAP_DIFFERENTMAP_ERROR), KERN_INVALID_ADDRESS /* arg */);
4054 kr = KERN_INVALID_ADDRESS;
4055 }
4056 vm_map_switch(map: oldmap);
4057 vm_map_deallocate(map);
4058 }
4059 return kr;
4060}
4061
4062/*
4063 * Routine: copyoutmap_atomic{32, 64}
4064 * Purpose:
4065 * Like copyoutmap, except that the operation is atomic.
4066 * Takes in value rather than *fromdata pointer.
4067 */
4068kern_return_t
4069copyoutmap_atomic32(
4070 vm_map_t map,
4071 uint32_t value,
4072 vm_map_address_t toaddr)
4073{
4074 kern_return_t kr = KERN_SUCCESS;
4075 vm_map_t oldmap;
4076
4077 if (vm_map_pmap(map) == pmap_kernel()) {
4078 /* assume a correct toaddr */
4079 *(uint32_t *)toaddr = value;
4080 } else if (current_map() == map) {
4081 if (copyout_atomic32(u32: value, user_addr: toaddr) != 0) {
4082 kr = KERN_INVALID_ADDRESS;
4083 }
4084 } else {
4085 vm_map_reference(map);
4086 oldmap = vm_map_switch(map);
4087 if (copyout_atomic32(u32: value, user_addr: toaddr) != 0) {
4088 kr = KERN_INVALID_ADDRESS;
4089 }
4090 vm_map_switch(map: oldmap);
4091 vm_map_deallocate(map);
4092 }
4093 return kr;
4094}
4095
4096kern_return_t
4097copyoutmap_atomic64(
4098 vm_map_t map,
4099 uint64_t value,
4100 vm_map_address_t toaddr)
4101{
4102 kern_return_t kr = KERN_SUCCESS;
4103 vm_map_t oldmap;
4104
4105 if (vm_map_pmap(map) == pmap_kernel()) {
4106 /* assume a correct toaddr */
4107 *(uint64_t *)toaddr = value;
4108 } else if (current_map() == map) {
4109 if (copyout_atomic64(u64: value, user_addr: toaddr) != 0) {
4110 kr = KERN_INVALID_ADDRESS;
4111 }
4112 } else {
4113 vm_map_reference(map);
4114 oldmap = vm_map_switch(map);
4115 if (copyout_atomic64(u64: value, user_addr: toaddr) != 0) {
4116 kr = KERN_INVALID_ADDRESS;
4117 }
4118 vm_map_switch(map: oldmap);
4119 vm_map_deallocate(map);
4120 }
4121 return kr;
4122}
4123
4124
4125#pragma mark pointer obfuscation / packing
4126
4127/*
4128 *
4129 * The following two functions are to be used when exposing kernel
4130 * addresses to userspace via any of the various debug or info
4131 * facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
4132 * and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
4133 * are exported to KEXTs.
4134 *
4135 * NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
4136 */
4137
4138vm_offset_t
4139vm_kernel_addrhash_internal(vm_offset_t addr, uint64_t salt)
4140{
4141 assert(salt != 0);
4142
4143 if (addr == 0) {
4144 return 0ul;
4145 }
4146
4147 if (VM_KERNEL_IS_SLID(addr)) {
4148 return VM_KERNEL_UNSLIDE(addr);
4149 }
4150
4151 vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
4152 SHA256_CTX sha_ctx;
4153
4154 SHA256_Init(ctx: &sha_ctx);
4155 SHA256_Update(ctx: &sha_ctx, data: &salt, len: sizeof(salt));
4156 SHA256_Update(ctx: &sha_ctx, data: &addr, len: sizeof(addr));
4157 SHA256_Final(digest: sha_digest, ctx: &sha_ctx);
4158
4159 return sha_digest[0];
4160}
4161
4162__exported vm_offset_t
4163vm_kernel_addrhash_external(vm_offset_t addr);
4164vm_offset_t
4165vm_kernel_addrhash_external(vm_offset_t addr)
4166{
4167 return vm_kernel_addrhash_internal(addr, salt: vm_kernel_addrhash_salt_ext);
4168}
4169
4170void
4171vm_kernel_addrhide(
4172 vm_offset_t addr,
4173 vm_offset_t *hide_addr)
4174{
4175 *hide_addr = VM_KERNEL_ADDRHIDE(addr);
4176}
4177
4178/*
4179 * vm_kernel_addrperm_external:
4180 * vm_kernel_unslide_or_perm_external:
4181 *
4182 * Use these macros when exposing an address to userspace that could come from
4183 * either kernel text/data *or* the heap.
4184 */
4185void
4186vm_kernel_addrperm_external(
4187 vm_offset_t addr,
4188 vm_offset_t *perm_addr)
4189{
4190 if (VM_KERNEL_IS_SLID(addr)) {
4191 *perm_addr = VM_KERNEL_UNSLIDE(addr);
4192 } else if (VM_KERNEL_ADDRESS(addr)) {
4193 *perm_addr = addr + vm_kernel_addrperm_ext;
4194 } else {
4195 *perm_addr = addr;
4196 }
4197}
4198
4199void
4200vm_kernel_unslide_or_perm_external(
4201 vm_offset_t addr,
4202 vm_offset_t *up_addr)
4203{
4204 vm_kernel_addrperm_external(addr, perm_addr: up_addr);
4205}
4206
4207void
4208vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
4209{
4210 if (ptr & ((1ul << params.vmpp_shift) - 1)) {
4211 panic("pointer %p can't be packed: low %d bits aren't 0",
4212 (void *)ptr, params.vmpp_shift);
4213 } else if (ptr <= params.vmpp_base) {
4214 panic("pointer %p can't be packed: below base %p",
4215 (void *)ptr, (void *)params.vmpp_base);
4216 } else {
4217 panic("pointer %p can't be packed: maximum encodable pointer is %p",
4218 (void *)ptr, (void *)vm_packing_max_packable(params));
4219 }
4220}
4221
4222void
4223vm_packing_verify_range(
4224 const char *subsystem,
4225 vm_offset_t min_address,
4226 vm_offset_t max_address,
4227 vm_packing_params_t params)
4228{
4229 if (min_address > max_address) {
4230 panic("%s: %s range invalid min:%p > max:%p",
4231 __func__, subsystem, (void *)min_address, (void *)max_address);
4232 }
4233
4234 if (!params.vmpp_base_relative) {
4235 return;
4236 }
4237
4238 if (min_address <= params.vmpp_base) {
4239 panic("%s: %s range invalid min:%p <= base:%p",
4240 __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
4241 }
4242
4243 if (max_address > vm_packing_max_packable(params)) {
4244 panic("%s: %s range invalid max:%p >= max packable:%p",
4245 __func__, subsystem, (void *)max_address,
4246 (void *)vm_packing_max_packable(params));
4247 }
4248}
4249
4250#pragma mark tests
4251#if DEBUG || DEVELOPMENT
4252#include <sys/errno.h>
4253
4254static void
4255kmem_test_for_entry(
4256 vm_map_t map,
4257 vm_offset_t addr,
4258 void (^block)(vm_map_entry_t))
4259{
4260 vm_map_entry_t entry;
4261
4262 vm_map_lock(map);
4263 block(vm_map_lookup_entry(map, addr, &entry) ? entry : NULL);
4264 vm_map_unlock(map);
4265}
4266
4267#define kmem_test_assert_map(map, pg, entries) ({ \
4268 assert3u((map)->size, ==, ptoa(pg)); \
4269 assert3u((map)->hdr.nentries, ==, entries); \
4270})
4271
4272static bool
4273can_write_at(vm_offset_t offs, uint32_t page)
4274{
4275 static const int zero;
4276
4277 return verify_write(&zero, (void *)(offs + ptoa(page) + 128), 1) == 0;
4278}
4279#define assert_writeable(offs, page) \
4280 assertf(can_write_at(offs, page), \
4281 "can write at %p + ptoa(%d)", (void *)offs, page)
4282
4283#define assert_faults(offs, page) \
4284 assertf(!can_write_at(offs, page), \
4285 "can write at %p + ptoa(%d)", (void *)offs, page)
4286
4287#define peek(offs, page) \
4288 (*(uint32_t *)((offs) + ptoa(page)))
4289
4290#define poke(offs, page, v) \
4291 (*(uint32_t *)((offs) + ptoa(page)) = (v))
4292
4293__attribute__((noinline))
4294static void
4295kmem_alloc_basic_test(vm_map_t map)
4296{
4297 kmem_guard_t guard = {
4298 .kmg_tag = VM_KERN_MEMORY_DIAG,
4299 };
4300 vm_offset_t addr;
4301
4302 /*
4303 * Test wired basics:
4304 * - KMA_KOBJECT
4305 * - KMA_GUARD_FIRST, KMA_GUARD_LAST
4306 * - allocation alignment
4307 */
4308 addr = kmem_alloc_guard(map, ptoa(10), ptoa(2) - 1,
4309 KMA_KOBJECT | KMA_GUARD_FIRST | KMA_GUARD_LAST, guard).kmr_address;
4310 assertf(addr != 0ull, "kma(%p, 10p, 0, KO | GF | GL)", map);
4311 assert3u((addr + PAGE_SIZE) % ptoa(2), ==, 0);
4312 kmem_test_assert_map(map, 10, 1);
4313
4314 kmem_test_for_entry(map, addr, ^(vm_map_entry_t e){
4315 assertf(e, "unable to find address %p in map %p", (void *)addr, map);
4316 assert(e->vme_kernel_object);
4317 assert(!e->vme_atomic);
4318 assert3u(e->vme_start, <=, addr);
4319 assert3u(addr + ptoa(10), <=, e->vme_end);
4320 });
4321
4322 assert_faults(addr, 0);
4323 for (int i = 1; i < 9; i++) {
4324 assert_writeable(addr, i);
4325 }
4326 assert_faults(addr, 9);
4327
4328 kmem_free(map, addr, ptoa(10));
4329 kmem_test_assert_map(map, 0, 0);
4330
4331 /*
4332 * Test pageable basics.
4333 */
4334 addr = kmem_alloc_guard(map, ptoa(10), 0,
4335 KMA_PAGEABLE, guard).kmr_address;
4336 assertf(addr != 0ull, "kma(%p, 10p, 0, KO | PG)", map);
4337 kmem_test_assert_map(map, 10, 1);
4338
4339 for (int i = 0; i < 9; i++) {
4340 assert_faults(addr, i);
4341 poke(addr, i, 42);
4342 assert_writeable(addr, i);
4343 }
4344
4345 kmem_free(map, addr, ptoa(10));
4346 kmem_test_assert_map(map, 0, 0);
4347}
4348
4349__attribute__((noinline))
4350static void
4351kmem_realloc_basic_test(vm_map_t map, kmr_flags_t kind)
4352{
4353 kmem_guard_t guard = {
4354 .kmg_atomic = !(kind & KMR_DATA),
4355 .kmg_tag = VM_KERN_MEMORY_DIAG,
4356 .kmg_context = 0xefface,
4357 };
4358 vm_offset_t addr, newaddr;
4359 const int N = 10;
4360
4361 /*
4362 * This isn't something kmem_realloc_guard() _needs_ to do,
4363 * we could conceive an implementation where it grows in place
4364 * if there's space after it.
4365 *
4366 * However, this is what the implementation does today.
4367 */
4368 bool realloc_growth_changes_address = true;
4369 bool GL = (kind & KMR_GUARD_LAST);
4370
4371 /*
4372 * Initial N page allocation
4373 */
4374 addr = kmem_alloc_guard(map, ptoa(N), 0,
4375 (kind & (KMA_KOBJECT | KMA_GUARD_LAST | KMA_DATA)) | KMA_ZERO,
4376 guard).kmr_address;
4377 assert3u(addr, !=, 0);
4378 kmem_test_assert_map(map, N, 1);
4379 for (int pg = 0; pg < N - GL; pg++) {
4380 poke(addr, pg, 42 + pg);
4381 }
4382 for (int pg = N - GL; pg < N; pg++) {
4383 assert_faults(addr, pg);
4384 }
4385
4386
4387 /*
4388 * Grow to N + 3 pages
4389 */
4390 newaddr = kmem_realloc_guard(map, addr, ptoa(N), ptoa(N + 3),
4391 kind | KMR_ZERO, guard).kmr_address;
4392 assert3u(newaddr, !=, 0);
4393 if (realloc_growth_changes_address) {
4394 assert3u(addr, !=, newaddr);
4395 }
4396 if ((kind & KMR_FREEOLD) || (addr == newaddr)) {
4397 kmem_test_assert_map(map, N + 3, 1);
4398 } else {
4399 kmem_test_assert_map(map, 2 * N + 3, 2);
4400 }
4401 for (int pg = 0; pg < N - GL; pg++) {
4402 assert3u(peek(newaddr, pg), ==, 42 + pg);
4403 }
4404 if ((kind & KMR_FREEOLD) == 0) {
4405 for (int pg = 0; pg < N - GL; pg++) {
4406 assert3u(peek(addr, pg), ==, 42 + pg);
4407 }
4408 /* check for tru-share */
4409 poke(addr + 16, 0, 1234);
4410 assert3u(peek(newaddr + 16, 0), ==, 1234);
4411 kmem_free_guard(map, addr, ptoa(N), KMF_NONE, guard);
4412 kmem_test_assert_map(map, N + 3, 1);
4413 }
4414 if (addr != newaddr) {
4415 for (int pg = 0; pg < N - GL; pg++) {
4416 assert_faults(addr, pg);
4417 }
4418 }
4419 for (int pg = N - GL; pg < N + 3 - GL; pg++) {
4420 assert3u(peek(newaddr, pg), ==, 0);
4421 }
4422 for (int pg = N + 3 - GL; pg < N + 3; pg++) {
4423 assert_faults(newaddr, pg);
4424 }
4425 addr = newaddr;
4426
4427
4428 /*
4429 * Shrink to N - 2 pages
4430 */
4431 newaddr = kmem_realloc_guard(map, addr, ptoa(N + 3), ptoa(N - 2),
4432 kind | KMR_ZERO, guard).kmr_address;
4433 assert3u(map->size, ==, ptoa(N - 2));
4434 assert3u(newaddr, ==, addr);
4435 kmem_test_assert_map(map, N - 2, 1);
4436
4437 for (int pg = 0; pg < N - 2 - GL; pg++) {
4438 assert3u(peek(addr, pg), ==, 42 + pg);
4439 }
4440 for (int pg = N - 2 - GL; pg < N + 3; pg++) {
4441 assert_faults(addr, pg);
4442 }
4443
4444 kmem_free_guard(map, addr, ptoa(N - 2), KMF_NONE, guard);
4445 kmem_test_assert_map(map, 0, 0);
4446}
4447
4448static int
4449kmem_basic_test(__unused int64_t in, int64_t *out)
4450{
4451 mach_vm_offset_t addr;
4452 vm_map_t map;
4453
4454 printf("%s: test running\n", __func__);
4455
4456 map = kmem_suballoc(kernel_map, &addr, 64U << 20,
4457 VM_MAP_CREATE_DEFAULT, VM_FLAGS_ANYWHERE,
4458 KMS_NOFAIL | KMS_DATA, VM_KERN_MEMORY_DIAG).kmr_submap;
4459
4460 printf("%s: kmem_alloc ...\n", __func__);
4461 kmem_alloc_basic_test(map);
4462 printf("%s: PASS\n", __func__);
4463
4464 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD) ...\n", __func__);
4465 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD);
4466 printf("%s: PASS\n", __func__);
4467
4468 printf("%s: kmem_realloc (KMR_FREEOLD) ...\n", __func__);
4469 kmem_realloc_basic_test(map, KMR_FREEOLD);
4470 printf("%s: PASS\n", __func__);
4471
4472 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
4473 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST);
4474 printf("%s: PASS\n", __func__);
4475
4476 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
4477 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST);
4478 printf("%s: PASS\n", __func__);
4479
4480 printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
4481 kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
4482 printf("%s: PASS\n", __func__);
4483
4484 printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
4485 kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST);
4486 printf("%s: PASS\n", __func__);
4487
4488 printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
4489 kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_LAST);
4490 printf("%s: PASS\n", __func__);
4491
4492 printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
4493 kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
4494 printf("%s: PASS\n", __func__);
4495
4496 /* using KMR_DATA signals to test the non atomic realloc path */
4497 printf("%s: kmem_realloc (KMR_DATA | KMR_FREEOLD) ...\n", __func__);
4498 kmem_realloc_basic_test(map, KMR_DATA | KMR_FREEOLD);
4499 printf("%s: PASS\n", __func__);
4500
4501 printf("%s: kmem_realloc (KMR_DATA) ...\n", __func__);
4502 kmem_realloc_basic_test(map, KMR_DATA);
4503 printf("%s: PASS\n", __func__);
4504
4505 kmem_free_guard(kernel_map, addr, 64U << 20, KMF_NONE, KMEM_GUARD_SUBMAP);
4506 vm_map_deallocate(map);
4507
4508 printf("%s: test passed\n", __func__);
4509 *out = 1;
4510 return 0;
4511}
4512SYSCTL_TEST_REGISTER(kmem_basic, kmem_basic_test);
4513
4514static void
4515kmem_test_get_size_idx_for_chunks(uint32_t chunks)
4516{
4517 uint32_t idx = kmem_get_size_idx_for_chunks(chunks);
4518
4519 assert(chunks >= kmem_size_array[idx].ks_num_chunk);
4520}
4521
4522__attribute__((noinline))
4523static void
4524kmem_test_get_size_idx_for_all_chunks()
4525{
4526 for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
4527 uint32_t chunks = kmem_size_array[i].ks_num_chunk;
4528
4529 if (chunks != 1) {
4530 kmem_test_get_size_idx_for_chunks(chunks - 1);
4531 }
4532 kmem_test_get_size_idx_for_chunks(chunks);
4533 kmem_test_get_size_idx_for_chunks(chunks + 1);
4534 }
4535}
4536
4537static int
4538kmem_guard_obj_test(__unused int64_t in, int64_t *out)
4539{
4540 printf("%s: test running\n", __func__);
4541
4542 printf("%s: kmem_get_size_idx_for_chunks\n", __func__);
4543 kmem_test_get_size_idx_for_all_chunks();
4544 printf("%s: PASS\n", __func__);
4545
4546 printf("%s: test passed\n", __func__);
4547 *out = 1;
4548 return 0;
4549}
4550SYSCTL_TEST_REGISTER(kmem_guard_obj, kmem_guard_obj_test);
4551#endif /* DEBUG || DEVELOPMENT */
4552