1/*
2 * Copyright (c) 2000-2021 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: kern/kalloc.c
60 * Author: Avadis Tevanian, Jr.
61 * Date: 1985
62 *
63 * General kernel memory allocator. This allocator is designed
64 * to be used by the kernel to manage dynamic memory fast.
65 */
66
67#include "mach/vm_types.h"
68#include <mach/boolean.h>
69#include <mach/sdt.h>
70#include <mach/machine/vm_types.h>
71#include <mach/vm_param.h>
72#include <kern/misc_protos.h>
73#include <kern/counter.h>
74#include <kern/zalloc_internal.h>
75#include <kern/kalloc.h>
76#include <kern/ledger.h>
77#include <kern/backtrace.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_object.h>
80#include <vm/vm_map.h>
81#include <vm/vm_memtag.h>
82#include <sys/kdebug.h>
83
84#include <os/hash.h>
85#include <san/kasan.h>
86#include <libkern/section_keywords.h>
87#include <libkern/prelink.h>
88
89SCALABLE_COUNTER_DEFINE(kalloc_large_count);
90SCALABLE_COUNTER_DEFINE(kalloc_large_total);
91
92#pragma mark initialization
93
94/*
95 * All allocations of size less than KHEAP_MAX_SIZE are rounded to the next nearest
96 * sized zone. This allocator is built on top of the zone allocator. A zone
97 * is created for each potential size that we are willing to get in small
98 * blocks.
99 *
100 * Allocations of size greater than KHEAP_MAX_SIZE, are allocated from the VM.
101 */
102
103/*
104 * The kt_zone_cfg table defines the configuration of zones on various
105 * platforms for kalloc_type fixed size allocations.
106 */
107
108#if KASAN_CLASSIC
109#define K_SIZE_CLASS(size) \
110 (((size) & PAGE_MASK) == 0 ? (size) : \
111 ((size) <= 1024 ? (size) : (size) - KASAN_GUARD_SIZE))
112#else
113#define K_SIZE_CLASS(size) (size)
114#endif
115static_assert(K_SIZE_CLASS(KHEAP_MAX_SIZE) == KHEAP_MAX_SIZE);
116
117static const uint16_t kt_zone_cfg[] = {
118 K_SIZE_CLASS(16),
119 K_SIZE_CLASS(32),
120 K_SIZE_CLASS(48),
121 K_SIZE_CLASS(64),
122 K_SIZE_CLASS(80),
123 K_SIZE_CLASS(96),
124 K_SIZE_CLASS(128),
125 K_SIZE_CLASS(160),
126 K_SIZE_CLASS(192),
127 K_SIZE_CLASS(224),
128 K_SIZE_CLASS(256),
129 K_SIZE_CLASS(288),
130 K_SIZE_CLASS(368),
131 K_SIZE_CLASS(400),
132 K_SIZE_CLASS(512),
133 K_SIZE_CLASS(576),
134 K_SIZE_CLASS(768),
135 K_SIZE_CLASS(1024),
136 K_SIZE_CLASS(1152),
137 K_SIZE_CLASS(1280),
138 K_SIZE_CLASS(1664),
139 K_SIZE_CLASS(2048),
140 K_SIZE_CLASS(4096),
141 K_SIZE_CLASS(6144),
142 K_SIZE_CLASS(8192),
143 K_SIZE_CLASS(12288),
144 K_SIZE_CLASS(16384),
145#if __arm64__
146 K_SIZE_CLASS(24576),
147 K_SIZE_CLASS(32768),
148#endif /* __arm64__ */
149};
150
151#define MAX_K_ZONE(kzc) (uint32_t)(sizeof(kzc) / sizeof(kzc[0]))
152
153/*
154 * kalloc_type callsites are assigned a zone during early boot. They
155 * use the dlut[] (direct lookup table), indexed by size normalized
156 * to the minimum alignment to find the right zone index quickly.
157 */
158#define INDEX_ZDLUT(size) (((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
159#define KALLOC_DLUT_SIZE (KHEAP_MAX_SIZE / KALLOC_MINALIGN)
160#define MAX_SIZE_ZDLUT ((KALLOC_DLUT_SIZE - 1) * KALLOC_MINALIGN)
161static __startup_data uint8_t kalloc_type_dlut[KALLOC_DLUT_SIZE];
162static __startup_data uint32_t kheap_zsize[KHEAP_NUM_ZONES];
163
164#if VM_TAG_SIZECLASSES
165static_assert(VM_TAG_SIZECLASSES >= MAX_K_ZONE(kt_zone_cfg));
166#endif
167
168const char * const kalloc_heap_names[] = {
169 [KHEAP_ID_NONE] = "",
170 [KHEAP_ID_SHARED] = "shared.",
171 [KHEAP_ID_DATA_BUFFERS] = "data.",
172 [KHEAP_ID_KT_VAR] = "",
173};
174
175/*
176 * Shared heap configuration
177 */
178SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_SHARED[1] = {
179 {
180 .kh_name = "shared.kalloc",
181 .kh_heap_id = KHEAP_ID_SHARED,
182 .kh_tag = VM_KERN_MEMORY_KALLOC_TYPE,
183 }
184};
185
186/*
187 * Bag of bytes heap configuration
188 */
189SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_DATA_BUFFERS[1] = {
190 {
191 .kh_name = "data.kalloc",
192 .kh_heap_id = KHEAP_ID_DATA_BUFFERS,
193 .kh_tag = VM_KERN_MEMORY_KALLOC_DATA,
194 }
195};
196
197/*
198 * Configuration of variable kalloc type heaps
199 */
200SECURITY_READ_ONLY_LATE(struct kheap_info)
201kalloc_type_heap_array[KT_VAR_MAX_HEAPS] = {};
202SECURITY_READ_ONLY_LATE(struct kalloc_heap) KHEAP_KT_VAR[1] = {
203 {
204 .kh_name = "kalloc.type.var",
205 .kh_heap_id = KHEAP_ID_KT_VAR,
206 .kh_tag = VM_KERN_MEMORY_KALLOC_TYPE
207 }
208};
209
210KALLOC_HEAP_DEFINE(KHEAP_DEFAULT, "KHEAP_DEFAULT", KHEAP_ID_KT_VAR);
211
212__startup_func
213static void
214kalloc_zsize_compute(void)
215{
216 uint32_t step = KHEAP_STEP_START;
217 uint32_t size = KHEAP_START_SIZE;
218
219 /*
220 * Manually initialize extra initial zones
221 */
222 kheap_zsize[0] = size / 2;
223 kheap_zsize[1] = size;
224 static_assert(KHEAP_EXTRA_ZONES == 2);
225
226 /*
227 * Compute sizes for remaining zones
228 */
229 for (uint32_t i = 0; i < KHEAP_NUM_STEPS; i++) {
230 uint32_t step_idx = (i * 2) + KHEAP_EXTRA_ZONES;
231
232 kheap_zsize[step_idx] = K_SIZE_CLASS(size + step);
233 kheap_zsize[step_idx + 1] = K_SIZE_CLASS(size + 2 * step);
234
235 step *= 2;
236 size += step;
237 }
238}
239
240static zone_t
241kalloc_zone_for_size_with_flags(
242 zone_id_t zid,
243 vm_size_t size,
244 zalloc_flags_t flags)
245{
246 vm_size_t max_size = KHEAP_MAX_SIZE;
247 bool forcopyin = flags & Z_MAY_COPYINMAP;
248 zone_t zone;
249
250 if (flags & Z_KALLOC_ARRAY) {
251 size = roundup(size, KALLOC_ARRAY_GRANULE);
252 }
253
254 if (forcopyin) {
255#if __x86_64__
256 /*
257 * On Intel, the OSData() ABI used to allocate
258 * from the kernel map starting at PAGE_SIZE.
259 *
260 * If only vm_map_copyin() or a wrapper is used,
261 * then everything will work fine because vm_map_copy_t
262 * will perform an actual copy if the data is smaller
263 * than msg_ool_size_small (== KHEAP_MAX_SIZE).
264 *
265 * However, if anyone is trying to call mach_vm_remap(),
266 * then bad things (TM) happen.
267 *
268 * Avoid this by preserving the ABI and moving
269 * to kalloc_large() earlier.
270 *
271 * Any recent code really ought to use IOMemoryDescriptor
272 * for this purpose however.
273 */
274 max_size = PAGE_SIZE - 1;
275#endif
276 }
277
278 if (size <= max_size) {
279 uint32_t idx;
280
281 if (size <= KHEAP_START_SIZE) {
282 zid += (size > 16);
283 } else {
284 /*
285 * . log2down(size - 1) is log2up(size) - 1
286 * . (size - 1) >> (log2down(size - 1) - 1)
287 * is either 0x2 or 0x3
288 */
289 idx = kalloc_log2down((uint32_t)(size - 1));
290 zid += KHEAP_EXTRA_ZONES +
291 2 * (idx - KHEAP_START_IDX) +
292 ((uint32_t)(size - 1) >> (idx - 1)) - 2;
293 }
294
295 zone = zone_by_id(zid);
296#if KASAN_CLASSIC
297 /*
298 * Under kasan classic, certain size classes are a redzone
299 * away from the mathematical formula above, and we need
300 * to "go to the next zone".
301 *
302 * Because the KHEAP_MAX_SIZE bucket _does_ exist however,
303 * this will never go to an "invalid" zone that doesn't
304 * belong to the kheap.
305 */
306 if (size > zone_elem_inner_size(zone)) {
307 zone++;
308 }
309#endif
310 return zone;
311 }
312
313 return ZONE_NULL;
314}
315
316zone_t
317kalloc_zone_for_size(zone_id_t zid, size_t size)
318{
319 return kalloc_zone_for_size_with_flags(zid, size, flags: Z_WAITOK);
320}
321
322static inline bool
323kheap_size_from_zone(
324 void *addr,
325 vm_size_t size,
326 zalloc_flags_t flags)
327{
328 vm_size_t max_size = KHEAP_MAX_SIZE;
329 bool forcopyin = flags & Z_MAY_COPYINMAP;
330
331#if __x86_64__
332 /*
333 * If Z_FULLSIZE is used, then due to kalloc_zone_for_size_with_flags()
334 * behavior, then the element could have a PAGE_SIZE reported size,
335 * yet still be from a zone for Z_MAY_COPYINMAP.
336 */
337 if (forcopyin) {
338 if (size == PAGE_SIZE &&
339 zone_id_for_element(addr, size) != ZONE_ID_INVALID) {
340 return true;
341 }
342
343 max_size = PAGE_SIZE - 1;
344 }
345#else
346#pragma unused(addr, forcopyin)
347#endif
348
349 return size <= max_size;
350}
351
352/*
353 * All data zones shouldn't use shared zone. Therefore set the no share
354 * bit right after creation.
355 */
356__startup_func
357static void
358kalloc_set_no_share_for_data(
359 zone_kheap_id_t kheap_id,
360 zone_stats_t zstats)
361{
362 if (kheap_id == KHEAP_ID_DATA_BUFFERS) {
363 zpercpu_foreach(zs, zstats) {
364 os_atomic_store(&zs->zs_alloc_not_shared, 1, relaxed);
365 }
366 }
367}
368
369__startup_func
370static void
371kalloc_zone_init(
372 const char *kheap_name,
373 zone_kheap_id_t kheap_id,
374 zone_id_t *kheap_zstart,
375 zone_create_flags_t zc_flags)
376{
377 zc_flags |= ZC_PGZ_USE_GUARDS;
378
379 for (uint32_t i = 0; i < KHEAP_NUM_ZONES; i++) {
380 uint32_t size = kheap_zsize[i];
381 char buf[MAX_ZONE_NAME], *z_name;
382 int len;
383
384 len = scnprintf(buf, MAX_ZONE_NAME, "%s.%u", kheap_name, size);
385 z_name = zalloc_permanent(len + 1, ZALIGN_NONE);
386 strlcpy(dst: z_name, src: buf, n: len + 1);
387
388 (void)zone_create_ext(name: z_name, size, flags: zc_flags, ZONE_ID_ANY, extra_setup: ^(zone_t z){
389#if __arm64e__ || CONFIG_KERNEL_TAGGING
390 uint32_t scale = kalloc_log2down(size / 32);
391
392 if (size == 32 << scale) {
393 z->z_array_size_class = scale;
394 } else {
395 z->z_array_size_class = scale | 0x10;
396 }
397#endif
398 zone_security_array[zone_index(z)].z_kheap_id = kheap_id;
399 if (i == 0) {
400 *kheap_zstart = zone_index(z);
401 }
402 kalloc_set_no_share_for_data(kheap_id, zstats: z->z_stats);
403 });
404 }
405}
406
407__startup_func
408static void
409kalloc_heap_init(struct kalloc_heap *kheap)
410{
411 kalloc_zone_init(kheap_name: "kalloc", kheap_id: kheap->kh_heap_id, kheap_zstart: &kheap->kh_zstart,
412 zc_flags: ZC_NONE);
413 /*
414 * Count all the "raw" views for zones in the heap.
415 */
416 zone_view_count += KHEAP_NUM_ZONES;
417}
418
419#define KEXT_ALIGN_SHIFT 6
420#define KEXT_ALIGN_BYTES (1<< KEXT_ALIGN_SHIFT)
421#define KEXT_ALIGN_MASK (KEXT_ALIGN_BYTES-1)
422#define kt_scratch_size (256ul << 10)
423#define KALLOC_TYPE_SECTION(type) \
424 (type == KTV_FIXED? "__kalloc_type": "__kalloc_var")
425
426/*
427 * Enum to specify the kalloc_type variant being used.
428 */
429__options_decl(kalloc_type_variant_t, uint16_t, {
430 KTV_FIXED = 0x0001,
431 KTV_VAR = 0x0002,
432});
433
434/*
435 * Macros that generate the appropriate kalloc_type variant (i.e fixed or
436 * variable) of the desired variable/function.
437 */
438#define kalloc_type_var(type, var) \
439 ((type) == KTV_FIXED? \
440 (vm_offset_t) kalloc_type_##var##_fixed: \
441 (vm_offset_t) kalloc_type_##var##_var)
442#define kalloc_type_func(type, func, ...) \
443 ((type) == KTV_FIXED? \
444 kalloc_type_##func##_fixed(__VA_ARGS__): \
445 kalloc_type_##func##_var(__VA_ARGS__))
446
447TUNABLE(kalloc_type_options_t, kt_options, "kt", 0);
448TUNABLE(uint16_t, kt_var_heaps, "kt_var_heaps",
449 ZSECURITY_CONFIG_KT_VAR_BUDGET);
450TUNABLE(uint16_t, kt_fixed_zones, "kt_fixed_zones",
451 ZSECURITY_CONFIG_KT_BUDGET);
452TUNABLE(uint16_t, kt_var_ptr_heaps, "kt_var_ptr_heaps", 2);
453static TUNABLE(bool, kt_shared_fixed, "-kt-shared", true);
454
455/*
456 * Section start/end for fixed kalloc_type views
457 */
458extern struct kalloc_type_view kalloc_type_sec_start_fixed[]
459__SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
460
461extern struct kalloc_type_view kalloc_type_sec_end_fixed[]
462__SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_type");
463
464/*
465 * Section start/end for variable kalloc_type views
466 */
467extern struct kalloc_type_var_view kalloc_type_sec_start_var[]
468__SECTION_START_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
469
470extern struct kalloc_type_var_view kalloc_type_sec_end_var[]
471__SECTION_END_SYM(KALLOC_TYPE_SEGMENT, "__kalloc_var");
472
473__startup_data
474static kalloc_type_views_t *kt_buffer = NULL;
475__startup_data
476static uint64_t kt_count;
477__startup_data
478uint32_t kalloc_type_hash_seed;
479
480__startup_data
481static uint16_t kt_freq_list[MAX_K_ZONE(kt_zone_cfg)];
482__startup_data
483static uint16_t kt_freq_list_total[MAX_K_ZONE(kt_zone_cfg)];
484
485struct nzones_with_idx {
486 uint16_t nzones;
487 uint16_t idx;
488};
489int16_t zone_carry = 0;
490
491_Static_assert(__builtin_popcount(KT_SUMMARY_MASK_TYPE_BITS) == (KT_GRANULE_MAX + 1),
492 "KT_SUMMARY_MASK_TYPE_BITS doesn't match KT_GRANULE_MAX");
493
494/*
495 * For use by lldb to iterate over kalloc types
496 */
497SECURITY_READ_ONLY_LATE(uint64_t) num_kt_sizeclass = MAX_K_ZONE(kt_zone_cfg);
498SECURITY_READ_ONLY_LATE(zone_t) kalloc_type_zarray[MAX_K_ZONE(kt_zone_cfg)];
499SECURITY_READ_ONLY_LATE(zone_t) kt_singleton_array[MAX_K_ZONE(kt_zone_cfg)];
500
501#define KT_GET_HASH(flags) (uint16_t)((flags & KT_HASH) >> 16)
502static_assert(KT_HASH >> 16 == (KMEM_RANGE_MASK | KMEM_HASH_SET |
503 KMEM_DIRECTION_MASK),
504 "Insufficient bits to represent range and dir for VM allocations");
505static_assert(MAX_K_ZONE(kt_zone_cfg) < KALLOC_TYPE_IDX_MASK,
506 "validate idx mask");
507/* qsort routines */
508typedef int (*cmpfunc_t)(const void *a, const void *b);
509extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
510
511static inline uint16_t
512kalloc_type_get_idx(uint32_t kt_size)
513{
514 return (uint16_t) (kt_size >> KALLOC_TYPE_IDX_SHIFT);
515}
516
517static inline uint32_t
518kalloc_type_set_idx(uint32_t kt_size, uint16_t idx)
519{
520 return kt_size | ((uint32_t) idx << KALLOC_TYPE_IDX_SHIFT);
521}
522
523static void
524kalloc_type_build_dlut(void)
525{
526 vm_size_t size = 0;
527 for (int i = 0; i < KALLOC_DLUT_SIZE; i++, size += KALLOC_MINALIGN) {
528 uint8_t zindex = 0;
529 while (kt_zone_cfg[zindex] < size) {
530 zindex++;
531 }
532 kalloc_type_dlut[i] = zindex;
533 }
534}
535
536static uint32_t
537kalloc_type_idx_for_size(uint32_t size)
538{
539 assert(size <= KHEAP_MAX_SIZE);
540 uint16_t idx = kalloc_type_dlut[INDEX_ZDLUT(size)];
541 return kalloc_type_set_idx(kt_size: size, idx);
542}
543
544static void
545kalloc_type_assign_zone_fixed(
546 kalloc_type_view_t *cur,
547 kalloc_type_view_t *end,
548 zone_t z,
549 zone_t sig_zone,
550 zone_t shared_zone)
551{
552 /*
553 * Assign the zone created for every kalloc_type_view
554 * of the same unique signature
555 */
556 bool need_raw_view = false;
557
558 while (cur < end) {
559 kalloc_type_view_t kt = *cur;
560 struct zone_view *zv = &kt->kt_zv;
561 zv->zv_zone = z;
562 kalloc_type_flags_t kt_flags = kt->kt_flags;
563 zone_security_flags_t zsflags = zone_security_config(z);
564
565 assert(kalloc_type_get_size(kt->kt_size) <= z->z_elem_size);
566 if (!shared_zone) {
567 assert(zsflags.z_kheap_id == KHEAP_ID_DATA_BUFFERS);
568 }
569
570 if (kt_flags & KT_SLID) {
571 kt->kt_signature -= vm_kernel_slide;
572 kt->kt_zv.zv_name -= vm_kernel_slide;
573 }
574
575 if ((kt_flags & KT_PRIV_ACCT) ||
576 ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
577 zv->zv_stats = zalloc_percpu_permanent_type(
578 struct zone_stats);
579 need_raw_view = true;
580 zone_view_count += 1;
581 } else {
582 zv->zv_stats = z->z_stats;
583 }
584
585 if ((kt_flags & KT_NOSHARED) || !shared_zone) {
586 if ((kt_flags & KT_NOSHARED) && !(kt_flags & KT_PRIV_ACCT)) {
587 panic("KT_NOSHARED used w/o private accounting for view %s",
588 zv->zv_name);
589 }
590
591 zpercpu_foreach(zs, zv->zv_stats) {
592 os_atomic_store(&zs->zs_alloc_not_shared, 1, relaxed);
593 }
594 }
595
596 if (zsflags.z_kheap_id != KHEAP_ID_DATA_BUFFERS) {
597 kt->kt_zshared = shared_zone;
598 kt->kt_zsig = sig_zone;
599 /*
600 * If we haven't yet set the signature equivalance then set it
601 * otherwise validate that the zone has the same signature equivalance
602 * as the sig_zone provided
603 */
604 if (!zone_get_sig_eq(zone: z)) {
605 zone_set_sig_eq(zone: z, sig_eq: zone_index(z: sig_zone));
606 } else {
607 assert(zone_get_sig_eq(z) == zone_get_sig_eq(sig_zone));
608 }
609 }
610 zv->zv_next = (zone_view_t) z->z_views;
611 zv->zv_zone->z_views = (zone_view_t) kt;
612 cur++;
613 }
614 if (need_raw_view) {
615 zone_view_count += 1;
616 }
617}
618
619__startup_func
620static void
621kalloc_type_assign_zone_var(kalloc_type_var_view_t *cur,
622 kalloc_type_var_view_t *end, uint32_t heap_idx)
623{
624 struct kheap_info *cfg = &kalloc_type_heap_array[heap_idx];
625 while (cur < end) {
626 kalloc_type_var_view_t kt = *cur;
627 kt->kt_heap_start = cfg->kh_zstart;
628 kalloc_type_flags_t kt_flags = kt->kt_flags;
629
630 if (kt_flags & KT_SLID) {
631 if (kt->kt_sig_hdr) {
632 kt->kt_sig_hdr -= vm_kernel_slide;
633 }
634 kt->kt_sig_type -= vm_kernel_slide;
635 kt->kt_name -= vm_kernel_slide;
636 }
637
638 if ((kt_flags & KT_PRIV_ACCT) ||
639 ((kt_options & KT_OPTIONS_ACCT) && (kt_flags & KT_DEFAULT))) {
640 kt->kt_stats = zalloc_percpu_permanent_type(struct zone_stats);
641 zone_view_count += 1;
642 }
643
644 kt->kt_next = (zone_view_t) cfg->kt_views;
645 cfg->kt_views = kt;
646 cur++;
647 }
648}
649
650__startup_func
651static inline void
652kalloc_type_slide_fixed(vm_offset_t addr)
653{
654 kalloc_type_view_t ktv = (struct kalloc_type_view *) addr;
655 ktv->kt_signature += vm_kernel_slide;
656 ktv->kt_zv.zv_name += vm_kernel_slide;
657 ktv->kt_flags |= KT_SLID;
658}
659
660__startup_func
661static inline void
662kalloc_type_slide_var(vm_offset_t addr)
663{
664 kalloc_type_var_view_t ktv = (struct kalloc_type_var_view *) addr;
665 if (ktv->kt_sig_hdr) {
666 ktv->kt_sig_hdr += vm_kernel_slide;
667 }
668 ktv->kt_sig_type += vm_kernel_slide;
669 ktv->kt_name += vm_kernel_slide;
670 ktv->kt_flags |= KT_SLID;
671}
672
673__startup_func
674static void
675kalloc_type_validate_flags(
676 kalloc_type_flags_t kt_flags,
677 const char *kt_name,
678 uuid_string_t kext_uuid)
679{
680 if (!(kt_flags & KT_CHANGED) || !(kt_flags & KT_CHANGED2)) {
681 panic("kalloc_type_view(%s) from kext(%s) hasn't been rebuilt with "
682 "required xnu headers", kt_name, kext_uuid);
683 }
684}
685
686static kalloc_type_flags_t
687kalloc_type_get_flags_fixed(vm_offset_t addr, uuid_string_t kext_uuid)
688{
689 kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
690 kalloc_type_validate_flags(kt_flags: ktv->kt_flags, kt_name: ktv->kt_zv.zv_name, kext_uuid);
691 return ktv->kt_flags;
692}
693
694static kalloc_type_flags_t
695kalloc_type_get_flags_var(vm_offset_t addr, uuid_string_t kext_uuid)
696{
697 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
698 kalloc_type_validate_flags(kt_flags: ktv->kt_flags, kt_name: ktv->kt_name, kext_uuid);
699 return ktv->kt_flags;
700}
701
702/*
703 * Check if signature of type is made up of only data and padding
704 */
705static bool
706kalloc_type_is_data(kalloc_type_flags_t kt_flags)
707{
708 assert(kt_flags & KT_CHANGED);
709 return kt_flags & KT_DATA_ONLY;
710}
711
712/*
713 * Check if signature of type is made up of only pointers
714 */
715static bool
716kalloc_type_is_ptr_array(kalloc_type_flags_t kt_flags)
717{
718 assert(kt_flags & KT_CHANGED2);
719 return kt_flags & KT_PTR_ARRAY;
720}
721
722static bool
723kalloc_type_from_vm(kalloc_type_flags_t kt_flags)
724{
725 assert(kt_flags & KT_CHANGED);
726 return kt_flags & KT_VM;
727}
728
729__startup_func
730static inline vm_size_t
731kalloc_type_view_sz_fixed(void)
732{
733 return sizeof(struct kalloc_type_view);
734}
735
736__startup_func
737static inline vm_size_t
738kalloc_type_view_sz_var(void)
739{
740 return sizeof(struct kalloc_type_var_view);
741}
742
743__startup_func
744static inline uint64_t
745kalloc_type_view_count(kalloc_type_variant_t type, vm_offset_t start,
746 vm_offset_t end)
747{
748 return (end - start) / kalloc_type_func(type, view_sz);
749}
750
751__startup_func
752static inline void
753kalloc_type_buffer_copy_fixed(kalloc_type_views_t *buffer, vm_offset_t ktv)
754{
755 buffer->ktv_fixed = (kalloc_type_view_t) ktv;
756}
757
758__startup_func
759static inline void
760kalloc_type_buffer_copy_var(kalloc_type_views_t *buffer, vm_offset_t ktv)
761{
762 buffer->ktv_var = (kalloc_type_var_view_t) ktv;
763}
764
765__startup_func
766static void
767kalloc_type_handle_data_view_fixed(vm_offset_t addr)
768{
769 kalloc_type_view_t cur_data_view = (kalloc_type_view_t) addr;
770 zone_t z = kalloc_zone_for_size(zid: KHEAP_DATA_BUFFERS->kh_zstart,
771 size: cur_data_view->kt_size);
772 kalloc_type_assign_zone_fixed(cur: &cur_data_view, end: &cur_data_view + 1, z, NULL,
773 NULL);
774}
775
776__startup_func
777static void
778kalloc_type_handle_data_view_var(vm_offset_t addr)
779{
780 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
781 kalloc_type_assign_zone_var(cur: &ktv, end: &ktv + 1, heap_idx: KT_VAR_DATA_HEAP);
782}
783
784__startup_func
785static uint32_t
786kalloc_type_handle_parray_var(void)
787{
788 uint32_t i = 0;
789 kalloc_type_var_view_t kt = kt_buffer[0].ktv_var;
790 const char *p_name = kt->kt_name;
791
792 /*
793 * The sorted list of variable kalloc_type_view has pointer arrays at the
794 * beginning. Walk through them and assign a random pointer heap to each
795 * type detected by typename.
796 */
797 while (kalloc_type_is_ptr_array(kt_flags: kt->kt_flags)) {
798 uint32_t heap_id = kmem_get_random16(upper_limit: 1) + KT_VAR_PTR_HEAP0;
799 const char *c_name = kt->kt_name;
800 uint32_t p_i = i;
801
802 while (strcmp(s1: c_name, s2: p_name) == 0) {
803 i++;
804 kt = kt_buffer[i].ktv_var;
805 c_name = kt->kt_name;
806 }
807 p_name = c_name;
808 kalloc_type_assign_zone_var(cur: &kt_buffer[p_i].ktv_var,
809 end: &kt_buffer[i].ktv_var, heap_idx: heap_id);
810 }
811
812 /*
813 * Returns the the index of the first view that isn't a pointer array
814 */
815 return i;
816}
817
818__startup_func
819static uint32_t
820kalloc_hash_adjust(uint32_t hash, uint32_t shift)
821{
822 /*
823 * Limit range_id to ptr ranges
824 */
825 uint32_t range_id = kmem_adjust_range_id(hash);
826 uint32_t direction = hash & 0x8000;
827 return (range_id | KMEM_HASH_SET | direction) << shift;
828}
829
830__startup_func
831static void
832kalloc_type_set_type_hash(const char *sig_ty, const char *sig_hdr,
833 kalloc_type_flags_t *kt_flags)
834{
835 uint32_t hash = 0;
836
837 assert(sig_ty != NULL);
838 hash = os_hash_jenkins_update(data: sig_ty, length: strlen(s: sig_ty),
839 hash: kalloc_type_hash_seed);
840 if (sig_hdr) {
841 hash = os_hash_jenkins_update(data: sig_hdr, length: strlen(s: sig_hdr), hash);
842 }
843 os_hash_jenkins_finish(hash);
844 hash &= (KMEM_RANGE_MASK | KMEM_DIRECTION_MASK);
845
846 *kt_flags = *kt_flags | kalloc_hash_adjust(hash, shift: 16);
847}
848
849__startup_func
850static void
851kalloc_type_set_type_hash_fixed(vm_offset_t addr)
852{
853 /*
854 * Use backtraces on fixed as we don't have signatures for types that go
855 * to the VM due to rdar://85182551.
856 */
857 (void) addr;
858}
859
860__startup_func
861static void
862kalloc_type_set_type_hash_var(vm_offset_t addr)
863{
864 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
865 kalloc_type_set_type_hash(sig_ty: ktv->kt_sig_type, sig_hdr: ktv->kt_sig_hdr,
866 kt_flags: &ktv->kt_flags);
867}
868
869__startup_func
870static void
871kalloc_type_mark_processed_fixed(vm_offset_t addr)
872{
873 kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
874 ktv->kt_flags |= KT_PROCESSED;
875}
876
877__startup_func
878static void
879kalloc_type_mark_processed_var(vm_offset_t addr)
880{
881 kalloc_type_var_view_t ktv = (kalloc_type_var_view_t) addr;
882 ktv->kt_flags |= KT_PROCESSED;
883}
884
885__startup_func
886static void
887kalloc_type_update_view_fixed(vm_offset_t addr)
888{
889 kalloc_type_view_t ktv = (kalloc_type_view_t) addr;
890 ktv->kt_size = kalloc_type_idx_for_size(size: ktv->kt_size);
891}
892
893__startup_func
894static void
895kalloc_type_update_view_var(vm_offset_t addr)
896{
897 (void) addr;
898}
899
900__startup_func
901static void
902kalloc_type_view_copy(
903 const kalloc_type_variant_t type,
904 vm_offset_t start,
905 vm_offset_t end,
906 uint64_t *cur_count,
907 bool slide,
908 uuid_string_t kext_uuid)
909{
910 uint64_t count = kalloc_type_view_count(type, start, end);
911 if (count + *cur_count >= kt_count) {
912 panic("kalloc_type_view_copy: Insufficient space in scratch buffer");
913 }
914 vm_offset_t cur = start;
915 while (cur < end) {
916 if (slide) {
917 kalloc_type_func(type, slide, cur);
918 }
919 kalloc_type_flags_t kt_flags = kalloc_type_func(type, get_flags, cur,
920 kext_uuid);
921 kalloc_type_func(type, mark_processed, cur);
922 /*
923 * Skip views that go to the VM
924 */
925 if (kalloc_type_from_vm(kt_flags)) {
926 cur += kalloc_type_func(type, view_sz);
927 continue;
928 }
929
930 /*
931 * If signature indicates that the entire allocation is data move it to
932 * KHEAP_DATA_BUFFERS. Note that KT_VAR_DATA_HEAP is a fake "data" heap,
933 * variable kalloc_type handles the actual redirection in the entry points
934 * kalloc/kfree_type_var_impl.
935 */
936 if (kalloc_type_is_data(kt_flags)) {
937 kalloc_type_func(type, handle_data_view, cur);
938 cur += kalloc_type_func(type, view_sz);
939 continue;
940 }
941
942 /*
943 * Set type hash that is used by kmem_*_guard
944 */
945 kalloc_type_func(type, set_type_hash, cur);
946 kalloc_type_func(type, update_view, cur);
947 kalloc_type_func(type, buffer_copy, &kt_buffer[*cur_count], cur);
948 cur += kalloc_type_func(type, view_sz);
949 *cur_count = *cur_count + 1;
950 }
951}
952
953__startup_func
954static uint64_t
955kalloc_type_view_parse(const kalloc_type_variant_t type)
956{
957 kc_format_t kc_format;
958 uint64_t cur_count = 0;
959
960 if (!PE_get_primary_kc_format(type: &kc_format)) {
961 panic("kalloc_type_view_parse: wasn't able to determine kc format");
962 }
963
964 if (kc_format == KCFormatStatic) {
965 /*
966 * If kc is static or KCGEN, __kalloc_type sections from kexts and
967 * xnu are coalesced.
968 */
969 kalloc_type_view_copy(type,
970 kalloc_type_var(type, sec_start),
971 kalloc_type_var(type, sec_end),
972 cur_count: &cur_count, false, NULL);
973 } else if (kc_format == KCFormatFileset) {
974 /*
975 * If kc uses filesets, traverse __kalloc_type section for each
976 * macho in the BootKC.
977 */
978 kernel_mach_header_t *kc_mh = NULL;
979 kernel_mach_header_t *kext_mh = NULL;
980
981 kc_mh = (kernel_mach_header_t *)PE_get_kc_header(type: KCKindPrimary);
982 struct load_command *lc =
983 (struct load_command *)((vm_offset_t)kc_mh + sizeof(*kc_mh));
984 for (uint32_t i = 0; i < kc_mh->ncmds;
985 i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
986 if (lc->cmd != LC_FILESET_ENTRY) {
987 continue;
988 }
989 struct fileset_entry_command *fse =
990 (struct fileset_entry_command *)(vm_offset_t)lc;
991 kext_mh = (kernel_mach_header_t *)fse->vmaddr;
992 kernel_section_t *sect = (kernel_section_t *)getsectbynamefromheader(
993 header: kext_mh, KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
994 if (sect != NULL) {
995 unsigned long uuidlen = 0;
996 void *kext_uuid = getuuidfromheader(kext_mh, &uuidlen);
997 uuid_string_t kext_uuid_str;
998 if ((kext_uuid != NULL) && (uuidlen == sizeof(uuid_t))) {
999 uuid_unparse_upper(uu: *(uuid_t *)kext_uuid, out: kext_uuid_str);
1000 }
1001 kalloc_type_view_copy(type, start: sect->addr, end: sect->addr + sect->size,
1002 cur_count: &cur_count, false, kext_uuid: kext_uuid_str);
1003 }
1004 }
1005 } else if (kc_format == KCFormatKCGEN) {
1006 /*
1007 * Parse __kalloc_type section from xnu
1008 */
1009 kalloc_type_view_copy(type,
1010 kalloc_type_var(type, sec_start),
1011 kalloc_type_var(type, sec_end), cur_count: &cur_count, false, NULL);
1012
1013 /*
1014 * Parse __kalloc_type section for kexts
1015 *
1016 * Note: We don't process the kalloc_type_views for kexts on armv7
1017 * as this platform has insufficient memory for type based
1018 * segregation. kalloc_type_impl_external will direct callsites
1019 * based on their size.
1020 */
1021 kernel_mach_header_t *xnu_mh = &_mh_execute_header;
1022 vm_offset_t cur = 0;
1023 vm_offset_t end = 0;
1024
1025 /*
1026 * Kext machos are in the __PRELINK_TEXT segment. Extract the segment
1027 * and traverse it.
1028 */
1029 kernel_section_t *prelink_sect = getsectbynamefromheader(
1030 header: xnu_mh, kPrelinkTextSegment, kPrelinkTextSection);
1031 assert(prelink_sect);
1032 cur = prelink_sect->addr;
1033 end = prelink_sect->addr + prelink_sect->size;
1034
1035 while (cur < end) {
1036 uint64_t kext_text_sz = 0;
1037 kernel_mach_header_t *kext_mh = (kernel_mach_header_t *) cur;
1038
1039 if (kext_mh->magic == 0) {
1040 /*
1041 * Assert that we have processed all kexts and all that is left
1042 * is padding
1043 */
1044 assert(memcmp_zero_ptr_aligned((void *)kext_mh, end - cur) == 0);
1045 break;
1046 } else if (kext_mh->magic != MH_MAGIC_64 &&
1047 kext_mh->magic != MH_CIGAM_64) {
1048 panic("kalloc_type_view_parse: couldn't find kext @ offset:%lx",
1049 cur);
1050 }
1051
1052 /*
1053 * Kext macho found, iterate through its segments
1054 */
1055 struct load_command *lc =
1056 (struct load_command *)(cur + sizeof(kernel_mach_header_t));
1057 bool isSplitKext = false;
1058
1059 for (uint32_t i = 0; i < kext_mh->ncmds && (vm_offset_t)lc < end;
1060 i++, lc = (struct load_command *)((vm_offset_t)lc + lc->cmdsize)) {
1061 if (lc->cmd == LC_SEGMENT_SPLIT_INFO) {
1062 isSplitKext = true;
1063 continue;
1064 } else if (lc->cmd != LC_SEGMENT_64) {
1065 continue;
1066 }
1067
1068 kernel_segment_command_t *seg_cmd =
1069 (struct segment_command_64 *)(vm_offset_t)lc;
1070 /*
1071 * Parse kalloc_type section
1072 */
1073 if (strcmp(s1: seg_cmd->segname, KALLOC_TYPE_SEGMENT) == 0) {
1074 kernel_section_t *kt_sect = getsectbynamefromseg(sgp: seg_cmd,
1075 KALLOC_TYPE_SEGMENT, KALLOC_TYPE_SECTION(type));
1076 if (kt_sect) {
1077 kalloc_type_view_copy(type, start: kt_sect->addr + vm_kernel_slide,
1078 end: kt_sect->addr + kt_sect->size + vm_kernel_slide, cur_count: &cur_count,
1079 true, NULL);
1080 }
1081 }
1082 /*
1083 * If the kext has a __TEXT segment, that is the only thing that
1084 * will be in the special __PRELINK_TEXT KC segment, so the next
1085 * macho is right after.
1086 */
1087 if (strcmp(s1: seg_cmd->segname, s2: "__TEXT") == 0) {
1088 kext_text_sz = seg_cmd->filesize;
1089 }
1090 }
1091 /*
1092 * If the kext did not have a __TEXT segment (special xnu kexts with
1093 * only a __LINKEDIT segment) then the next macho will be after all the
1094 * header commands.
1095 */
1096 if (!kext_text_sz) {
1097 kext_text_sz = kext_mh->sizeofcmds;
1098 } else if (!isSplitKext) {
1099 panic("kalloc_type_view_parse: No support for non-split seg KCs");
1100 break;
1101 }
1102
1103 cur += ((kext_text_sz + (KEXT_ALIGN_BYTES - 1)) & (~KEXT_ALIGN_MASK));
1104 }
1105 } else {
1106 /*
1107 * When kc_format is KCFormatDynamic or KCFormatUnknown, we don't handle
1108 * parsing kalloc_type_view structs during startup.
1109 */
1110 panic("kalloc_type_view_parse: couldn't parse kalloc_type_view structs"
1111 " for kc_format = %d\n", kc_format);
1112 }
1113 return cur_count;
1114}
1115
1116__startup_func
1117static int
1118kalloc_type_cmp_fixed(const void *a, const void *b)
1119{
1120 const kalloc_type_view_t ktA = *(const kalloc_type_view_t *)a;
1121 const kalloc_type_view_t ktB = *(const kalloc_type_view_t *)b;
1122
1123 const uint16_t idxA = kalloc_type_get_idx(kt_size: ktA->kt_size);
1124 const uint16_t idxB = kalloc_type_get_idx(kt_size: ktB->kt_size);
1125 /*
1126 * If the kalloc_type_views are in the same kalloc bucket, sort by
1127 * signature else sort by size
1128 */
1129 if (idxA == idxB) {
1130 int result = strcmp(s1: ktA->kt_signature, s2: ktB->kt_signature);
1131 /*
1132 * If the kalloc_type_views have the same signature sort by site
1133 * name
1134 */
1135 if (result == 0) {
1136 return strcmp(s1: ktA->kt_zv.zv_name, s2: ktB->kt_zv.zv_name);
1137 }
1138 return result;
1139 }
1140 const uint32_t sizeA = kalloc_type_get_size(kt_size: ktA->kt_size);
1141 const uint32_t sizeB = kalloc_type_get_size(kt_size: ktB->kt_size);
1142 return (int)(sizeA - sizeB);
1143}
1144
1145__startup_func
1146static int
1147kalloc_type_cmp_var(const void *a, const void *b)
1148{
1149 const kalloc_type_var_view_t ktA = *(const kalloc_type_var_view_t *)a;
1150 const kalloc_type_var_view_t ktB = *(const kalloc_type_var_view_t *)b;
1151 const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1152 const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1153 bool ktA_ptrArray = kalloc_type_is_ptr_array(kt_flags: ktA->kt_flags);
1154 bool ktB_ptrArray = kalloc_type_is_ptr_array(kt_flags: ktA->kt_flags);
1155 int result = 0;
1156
1157 /*
1158 * Switched around (B - A) because we want the pointer arrays to be at the
1159 * top
1160 */
1161 result = ktB_ptrArray - ktA_ptrArray;
1162 if (result == 0) {
1163 result = strcmp(s1: ktA_hdr, s2: ktB_hdr);
1164 if (result == 0) {
1165 result = strcmp(s1: ktA->kt_sig_type, s2: ktB->kt_sig_type);
1166 if (result == 0) {
1167 result = strcmp(s1: ktA->kt_name, s2: ktB->kt_name);
1168 }
1169 }
1170 }
1171 return result;
1172}
1173
1174__startup_func
1175static uint16_t *
1176kalloc_type_create_iterators_fixed(
1177 uint16_t *kt_skip_list_start,
1178 uint64_t count)
1179{
1180 uint16_t *kt_skip_list = kt_skip_list_start;
1181 uint16_t p_idx = UINT16_MAX; /* previous size idx */
1182 uint16_t c_idx = 0; /* current size idx */
1183 uint16_t unique_sig = 0;
1184 uint16_t total_sig = 0;
1185 const char *p_sig = NULL;
1186 const char *p_name = "";
1187 const char *c_sig = NULL;
1188 const char *c_name = NULL;
1189
1190 /*
1191 * Walk over each kalloc_type_view
1192 */
1193 for (uint16_t i = 0; i < count; i++) {
1194 kalloc_type_view_t kt = kt_buffer[i].ktv_fixed;
1195
1196 c_idx = kalloc_type_get_idx(kt_size: kt->kt_size);
1197 c_sig = kt->kt_signature;
1198 c_name = kt->kt_zv.zv_name;
1199 /*
1200 * When current kalloc_type_view is in a different kalloc size
1201 * bucket than the previous, it means we have processed all in
1202 * the previous size bucket, so store the accumulated values
1203 * and advance the indices.
1204 */
1205 if (p_idx == UINT16_MAX || c_idx != p_idx) {
1206 /*
1207 * Updates for frequency lists
1208 */
1209 if (p_idx != UINT16_MAX) {
1210 kt_freq_list[p_idx] = unique_sig;
1211 kt_freq_list_total[p_idx] = total_sig - unique_sig;
1212 }
1213 unique_sig = 1;
1214 total_sig = 1;
1215
1216 p_idx = c_idx;
1217 p_sig = c_sig;
1218 p_name = c_name;
1219
1220 /*
1221 * Updates to signature skip list
1222 */
1223 *kt_skip_list = i;
1224 kt_skip_list++;
1225
1226 continue;
1227 }
1228
1229 /*
1230 * When current kalloc_type_views is in the kalloc size bucket as
1231 * previous, analyze the siganture to see if it is unique.
1232 *
1233 * Signatures are collapsible if one is a substring of the next.
1234 */
1235 if (strncmp(s1: c_sig, s2: p_sig, n: strlen(s: p_sig)) != 0) {
1236 /*
1237 * Unique signature detected. Update counts and advance index
1238 */
1239 unique_sig++;
1240 total_sig++;
1241
1242 *kt_skip_list = i;
1243 kt_skip_list++;
1244 p_sig = c_sig;
1245 p_name = c_name;
1246 continue;
1247 }
1248 /*
1249 * Need this here as we do substring matching for signatures so you
1250 * want to track the longer signature seen rather than the substring
1251 */
1252 p_sig = c_sig;
1253
1254 /*
1255 * Check if current kalloc_type_view corresponds to a new type
1256 */
1257 if (strlen(s: p_name) != strlen(s: c_name) || strcmp(s1: p_name, s2: c_name) != 0) {
1258 total_sig++;
1259 p_name = c_name;
1260 }
1261 }
1262 /*
1263 * Final update
1264 */
1265 assert(c_idx == p_idx);
1266 assert(kt_freq_list[c_idx] == 0);
1267 kt_freq_list[c_idx] = unique_sig;
1268 kt_freq_list_total[c_idx] = total_sig - unique_sig;
1269 *kt_skip_list = (uint16_t) count;
1270
1271 return ++kt_skip_list;
1272}
1273
1274__startup_func
1275static uint32_t
1276kalloc_type_create_iterators_var(
1277 uint32_t *kt_skip_list_start,
1278 uint32_t buf_start)
1279{
1280 uint32_t *kt_skip_list = kt_skip_list_start;
1281 uint32_t n = 0;
1282
1283 kt_skip_list[n] = buf_start;
1284 assert(kt_count > buf_start + 1);
1285 for (uint32_t i = buf_start + 1; i < kt_count; i++) {
1286 kalloc_type_var_view_t ktA = kt_buffer[i - 1].ktv_var;
1287 kalloc_type_var_view_t ktB = kt_buffer[i].ktv_var;
1288 const char *ktA_hdr = ktA->kt_sig_hdr ?: "";
1289 const char *ktB_hdr = ktB->kt_sig_hdr ?: "";
1290 assert(ktA->kt_sig_type != NULL);
1291 assert(ktB->kt_sig_type != NULL);
1292 if (strcmp(s1: ktA_hdr, s2: ktB_hdr) != 0 ||
1293 strcmp(s1: ktA->kt_sig_type, s2: ktB->kt_sig_type) != 0) {
1294 n++;
1295 kt_skip_list[n] = i;
1296 }
1297 }
1298 /*
1299 * Final update
1300 */
1301 n++;
1302 kt_skip_list[n] = (uint32_t) kt_count;
1303 return n;
1304}
1305
1306__startup_func
1307static uint16_t
1308kalloc_type_distribute_budget(
1309 uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)],
1310 uint16_t kt_zones[MAX_K_ZONE(kt_zone_cfg)],
1311 uint16_t zone_budget,
1312 uint16_t min_zones_per_size)
1313{
1314 uint16_t total_sig = 0;
1315 uint16_t min_sig = 0;
1316 uint16_t assigned_zones = 0;
1317 uint16_t remaining_zones = zone_budget;
1318 uint16_t modulo = 0;
1319
1320 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1321 uint16_t sig_freq = freq_list[i];
1322 uint16_t min_zones = min_zones_per_size;
1323
1324 if (sig_freq < min_zones_per_size) {
1325 min_zones = sig_freq;
1326 }
1327 total_sig += sig_freq;
1328 kt_zones[i] = min_zones;
1329 min_sig += min_zones;
1330 }
1331 if (remaining_zones > total_sig) {
1332 remaining_zones = total_sig;
1333 }
1334 assert(remaining_zones >= min_sig);
1335 remaining_zones -= min_sig;
1336 total_sig -= min_sig;
1337 assigned_zones += min_sig;
1338
1339 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1340 uint16_t freq = freq_list[i];
1341
1342 if (freq < min_zones_per_size) {
1343 continue;
1344 }
1345 uint32_t numer = (freq - min_zones_per_size) * remaining_zones;
1346 uint16_t n_zones = (uint16_t) numer / total_sig;
1347
1348 /*
1349 * Accumulate remainder and increment n_zones when it goes above
1350 * denominator
1351 */
1352 modulo += numer % total_sig;
1353 if (modulo >= total_sig) {
1354 n_zones++;
1355 modulo -= total_sig;
1356 }
1357
1358 /*
1359 * Cap the total number of zones to the unique signatures
1360 */
1361 if ((n_zones + min_zones_per_size) > freq) {
1362 uint16_t extra_zones = n_zones + min_zones_per_size - freq;
1363 modulo += (extra_zones * total_sig);
1364 n_zones -= extra_zones;
1365 }
1366 kt_zones[i] += n_zones;
1367 assigned_zones += n_zones;
1368 }
1369
1370 if (kt_options & KT_OPTIONS_DEBUG) {
1371 printf(format: "kalloc_type_apply_policy: assigned %u zones wasted %u zones\n",
1372 assigned_zones, remaining_zones + min_sig - assigned_zones);
1373 }
1374 return remaining_zones + min_sig - assigned_zones;
1375}
1376
1377__startup_func
1378static int
1379kalloc_type_cmp_type_zones(const void *a, const void *b)
1380{
1381 const struct nzones_with_idx A = *(const struct nzones_with_idx *)a;
1382 const struct nzones_with_idx B = *(const struct nzones_with_idx *)b;
1383
1384 return (int)(B.nzones - A.nzones);
1385}
1386
1387__startup_func
1388static void
1389kalloc_type_redistribute_budget(
1390 uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1391 uint16_t kt_zones[MAX_K_ZONE(kt_zone_cfg)])
1392{
1393 uint16_t count = 0, cur_count = 0;
1394 struct nzones_with_idx sorted_zones[MAX_K_ZONE(kt_zone_cfg)] = {};
1395 uint16_t top_zone_total = 0;
1396
1397 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1398 uint16_t zones = kt_zones[i];
1399
1400 /*
1401 * If a sizeclass got no zones but has types to divide make a note
1402 * of it
1403 */
1404 if (zones == 0 && (freq_total_list[i] != 0)) {
1405 count++;
1406 }
1407
1408 sorted_zones[i].nzones = kt_zones[i];
1409 sorted_zones[i].idx = i;
1410 }
1411
1412 qsort(a: &sorted_zones[0], n: (size_t) MAX_K_ZONE(kt_zone_cfg),
1413 es: sizeof(struct nzones_with_idx), cmp: kalloc_type_cmp_type_zones);
1414
1415 for (uint16_t i = 0; i < 3; i++) {
1416 top_zone_total += sorted_zones[i].nzones;
1417 }
1418
1419 /*
1420 * Borrow zones from the top 3 sizeclasses and redistribute to those
1421 * that didn't get a zone but that types to divide
1422 */
1423 cur_count = count;
1424 for (uint16_t i = 0; i < 3; i++) {
1425 uint16_t zone_borrow = (sorted_zones[i].nzones * count) / top_zone_total;
1426 uint16_t zone_available = kt_zones[sorted_zones[i].idx];
1427
1428 if (zone_borrow > (zone_available / 2)) {
1429 zone_borrow = zone_available / 2;
1430 }
1431 kt_zones[sorted_zones[i].idx] -= zone_borrow;
1432 cur_count -= zone_borrow;
1433 }
1434
1435 for (uint16_t i = 0; i < 3; i++) {
1436 if (cur_count == 0) {
1437 break;
1438 }
1439 kt_zones[sorted_zones[i].idx]--;
1440 cur_count--;
1441 }
1442
1443 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1444 if (kt_zones[i] == 0 && (freq_total_list[i] != 0) &&
1445 (count > cur_count)) {
1446 kt_zones[i]++;
1447 count--;
1448 }
1449 }
1450}
1451
1452static uint16_t
1453kalloc_type_apply_policy(
1454 uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)],
1455 uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)],
1456 uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)],
1457 uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)],
1458 uint16_t zone_budget)
1459{
1460 uint16_t zbudget_sig = (uint16_t) ((7 * zone_budget) / 10);
1461 uint16_t zbudget_type = zone_budget - zbudget_sig;
1462 uint16_t wasted_zones = 0;
1463
1464#if DEBUG || DEVELOPMENT
1465 if (startup_phase < STARTUP_SUB_LOCKDOWN) {
1466 uint16_t current_zones = os_atomic_load(&num_zones, relaxed);
1467
1468 assert(zone_budget + current_zones <= MAX_ZONES);
1469 }
1470#endif
1471
1472 wasted_zones += kalloc_type_distribute_budget(freq_list, kt_zones: kt_zones_sig,
1473 zone_budget: zbudget_sig, min_zones_per_size: 2);
1474 wasted_zones += kalloc_type_distribute_budget(freq_list: freq_total_list,
1475 kt_zones: kt_zones_type, zone_budget: zbudget_type, min_zones_per_size: 0);
1476 kalloc_type_redistribute_budget(freq_total_list, kt_zones: kt_zones_type);
1477
1478 /*
1479 * Print stats when KT_OPTIONS_DEBUG boot-arg present
1480 */
1481 if (kt_options & KT_OPTIONS_DEBUG) {
1482 printf(format: "Size\ttotal_sig\tunique_signatures\tzones\tzones_sig\t"
1483 "zones_type\n");
1484 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1485 printf(format: "%u\t%u\t%u\t%u\t%u\t%u\n", kt_zone_cfg[i],
1486 freq_total_list[i] + freq_list[i], freq_list[i],
1487 kt_zones_sig[i] + kt_zones_type[i],
1488 kt_zones_sig[i], kt_zones_type[i]);
1489 }
1490 }
1491
1492 return wasted_zones;
1493}
1494
1495
1496__startup_func
1497static void
1498kalloc_type_create_zone_for_size(
1499 zone_t *kt_zones_for_size,
1500 uint16_t kt_zones,
1501 vm_size_t z_size)
1502{
1503 zone_t p_zone = NULL;
1504 char *z_name = NULL;
1505 zone_t shared_z = NULL;
1506
1507 for (uint16_t i = 0; i < kt_zones; i++) {
1508 z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1509 snprintf(z_name, MAX_ZONE_NAME, "kalloc.type%u.%zu", i,
1510 (size_t) z_size);
1511 zone_t z = zone_create(name: z_name, size: z_size, flags: ZC_KALLOC_TYPE);
1512 if (i != 0) {
1513 p_zone->z_kt_next = z;
1514 }
1515 p_zone = z;
1516 kt_zones_for_size[i] = z;
1517 }
1518 /*
1519 * Create shared zone for sizeclass if it doesn't already exist
1520 */
1521 if (kt_shared_fixed) {
1522 shared_z = kalloc_zone_for_size(zid: KHEAP_SHARED->kh_zstart, size: z_size);
1523 if (zone_elem_inner_size(zone: shared_z) != z_size) {
1524 z_name = zalloc_permanent(MAX_ZONE_NAME, ZALIGN_NONE);
1525 snprintf(z_name, MAX_ZONE_NAME, "kalloc.%zu",
1526 (size_t) z_size);
1527 shared_z = zone_create_ext(name: z_name, size: z_size, flags: ZC_NONE, ZONE_ID_ANY,
1528 extra_setup: ^(zone_t zone){
1529 zone_security_array[zone_index(z: zone)].z_kheap_id = KHEAP_ID_SHARED;
1530 });
1531 }
1532 }
1533 kt_zones_for_size[kt_zones] = shared_z;
1534}
1535
1536__startup_func
1537static uint16_t
1538kalloc_type_zones_for_type(
1539 uint16_t zones_total_type,
1540 uint16_t unique_types,
1541 uint16_t total_types,
1542 bool last_sig)
1543{
1544 uint16_t zones_for_type = 0, n_mod = 0;
1545
1546 if (zones_total_type == 0) {
1547 return 0;
1548 }
1549
1550 zones_for_type = (zones_total_type * unique_types) / total_types;
1551 n_mod = (zones_total_type * unique_types) % total_types;
1552 zone_carry += n_mod;
1553
1554 /*
1555 * Drain carry opportunistically
1556 */
1557 if (((unique_types > 3) && (zone_carry > 0)) ||
1558 (zone_carry >= (int) total_types) ||
1559 (last_sig && (zone_carry > 0))) {
1560 zone_carry -= total_types;
1561 zones_for_type++;
1562 }
1563
1564 if (last_sig) {
1565 assert(zone_carry == 0);
1566 }
1567
1568 return zones_for_type;
1569}
1570
1571__startup_func
1572static uint16_t
1573kalloc_type_build_skip_list(
1574 kalloc_type_view_t *start,
1575 kalloc_type_view_t *end,
1576 uint16_t *kt_skip_list)
1577{
1578 kalloc_type_view_t *cur = start;
1579 kalloc_type_view_t prev = *start;
1580 uint16_t i = 0, idx = 0;
1581
1582 kt_skip_list[idx] = i;
1583 idx++;
1584
1585 while (cur < end) {
1586 kalloc_type_view_t kt_cur = *cur;
1587
1588 if (strcmp(s1: prev->kt_zv.zv_name, s2: kt_cur->kt_zv.zv_name) != 0) {
1589 kt_skip_list[idx] = i;
1590
1591 prev = kt_cur;
1592 idx++;
1593 }
1594 i++;
1595 cur++;
1596 }
1597
1598 /*
1599 * Final update
1600 */
1601 kt_skip_list[idx] = i;
1602 return idx;
1603}
1604
1605__startup_func
1606static void
1607kalloc_type_init_sig_eq(
1608 zone_t *zones,
1609 uint16_t n_zones,
1610 zone_t sig_zone)
1611{
1612 for (uint16_t i = 0; i < n_zones; i++) {
1613 zone_t z = zones[i];
1614
1615 assert(!zone_get_sig_eq(z));
1616 zone_set_sig_eq(zone: z, sig_eq: zone_index(z: sig_zone));
1617 }
1618}
1619
1620__startup_func
1621static uint16_t
1622kalloc_type_distribute_zone_for_type(
1623 kalloc_type_view_t *start,
1624 kalloc_type_view_t *end,
1625 bool last_sig,
1626 uint16_t zones_total_type,
1627 uint16_t total_types,
1628 uint16_t *kt_skip_list,
1629 zone_t kt_zones_for_size[32],
1630 uint16_t type_zones_start,
1631 zone_t sig_zone,
1632 zone_t shared_zone)
1633{
1634 uint16_t count = 0, n_zones = 0;
1635 uint16_t *shuffle_buf = NULL;
1636 zone_t *type_zones = &kt_zones_for_size[type_zones_start];
1637
1638 /*
1639 * Assert there is space in buffer
1640 */
1641 count = kalloc_type_build_skip_list(start, end, kt_skip_list);
1642 n_zones = kalloc_type_zones_for_type(zones_total_type, unique_types: count, total_types,
1643 last_sig);
1644 shuffle_buf = &kt_skip_list[count + 1];
1645
1646 /*
1647 * Initalize signature equivalence zone for type zones
1648 */
1649 kalloc_type_init_sig_eq(zones: type_zones, n_zones, sig_zone);
1650
1651 if (n_zones == 0) {
1652 kalloc_type_assign_zone_fixed(cur: start, end, z: sig_zone, sig_zone,
1653 shared_zone);
1654 return n_zones;
1655 }
1656
1657 /*
1658 * Don't shuffle in the sig_zone if there is only 1 type in the zone
1659 */
1660 if (count == 1) {
1661 kalloc_type_assign_zone_fixed(cur: start, end, z: type_zones[0], sig_zone,
1662 shared_zone);
1663 return n_zones;
1664 }
1665
1666 /*
1667 * Add the signature based zone to n_zones
1668 */
1669 n_zones++;
1670
1671 for (uint16_t i = 0; i < count; i++) {
1672 uint16_t zidx = i % n_zones, shuffled_zidx = 0;
1673 uint16_t type_start = kt_skip_list[i];
1674 kalloc_type_view_t *kt_type_start = &start[type_start];
1675 uint16_t type_end = kt_skip_list[i + 1];
1676 kalloc_type_view_t *kt_type_end = &start[type_end];
1677 zone_t zone;
1678
1679 if (zidx == 0) {
1680 kmem_shuffle(shuffle_buf, count: n_zones);
1681 }
1682
1683 shuffled_zidx = shuffle_buf[zidx];
1684 zone = shuffled_zidx == 0 ? sig_zone : type_zones[shuffled_zidx - 1];
1685 kalloc_type_assign_zone_fixed(cur: kt_type_start, end: kt_type_end, z: zone, sig_zone,
1686 shared_zone);
1687 }
1688
1689 return n_zones - 1;
1690}
1691
1692__startup_func
1693static void
1694kalloc_type_create_zones_fixed(
1695 uint16_t *kt_skip_list_start,
1696 uint16_t *kt_shuffle_buf)
1697{
1698 uint16_t *kt_skip_list = kt_skip_list_start;
1699 uint16_t p_j = 0;
1700 uint16_t kt_zones_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
1701 uint16_t kt_zones_type[MAX_K_ZONE(kt_zone_cfg)] = {};
1702#if DEBUG || DEVELOPMENT
1703 uint64_t kt_shuffle_count = ((vm_address_t) kt_shuffle_buf -
1704 (vm_address_t) kt_buffer) / sizeof(uint16_t);
1705#endif
1706 /*
1707 * Apply policy to determine how many zones to create for each size
1708 * class.
1709 */
1710 kalloc_type_apply_policy(freq_list: kt_freq_list, freq_total_list: kt_freq_list_total,
1711 kt_zones_sig, kt_zones_type, zone_budget: kt_fixed_zones);
1712
1713 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
1714 uint16_t n_unique_sig = kt_freq_list[i];
1715 vm_size_t z_size = kt_zone_cfg[i];
1716 uint16_t n_zones_sig = kt_zones_sig[i];
1717 uint16_t n_zones_type = kt_zones_type[i];
1718 uint16_t total_types = kt_freq_list_total[i];
1719 uint16_t type_zones_used = 0;
1720
1721 if (n_unique_sig == 0) {
1722 continue;
1723 }
1724
1725 zone_carry = 0;
1726 assert(n_zones_sig + n_zones_type + 1 <= 32);
1727 zone_t kt_zones_for_size[32] = {};
1728 kalloc_type_create_zone_for_size(kt_zones_for_size,
1729 kt_zones: n_zones_sig + n_zones_type, z_size);
1730
1731 kalloc_type_zarray[i] = kt_zones_for_size[0];
1732 /*
1733 * Ensure that there is enough space to shuffle n_unique_sig
1734 * indices
1735 */
1736 assert(n_unique_sig < kt_shuffle_count);
1737
1738 /*
1739 * Get a shuffled set of signature indices
1740 */
1741 *kt_shuffle_buf = 0;
1742 if (n_unique_sig > 1) {
1743 kmem_shuffle(shuffle_buf: kt_shuffle_buf, count: n_unique_sig);
1744 }
1745
1746 for (uint16_t j = 0; j < n_zones_sig; j++) {
1747 zone_t *z_ptr = &kt_zones_for_size[j];
1748
1749 kalloc_type_init_sig_eq(zones: z_ptr, n_zones: 1, sig_zone: *z_ptr);
1750 }
1751
1752 for (uint16_t j = 0; j < n_unique_sig; j++) {
1753 /*
1754 * For every size that has unique types
1755 */
1756 uint16_t shuffle_idx = kt_shuffle_buf[j];
1757 uint16_t cur = kt_skip_list[shuffle_idx + p_j];
1758 uint16_t end = kt_skip_list[shuffle_idx + p_j + 1];
1759 zone_t zone = kt_zones_for_size[j % n_zones_sig];
1760 zone_t shared_zone = kt_zones_for_size[n_zones_sig + n_zones_type];
1761 bool last_sig;
1762
1763 last_sig = (j == (n_unique_sig - 1)) ? true : false;
1764 type_zones_used += kalloc_type_distribute_zone_for_type(
1765 start: &kt_buffer[cur].ktv_fixed,
1766 end: &kt_buffer[end].ktv_fixed, last_sig,
1767 zones_total_type: n_zones_type, total_types: total_types + n_unique_sig,
1768 kt_skip_list: &kt_shuffle_buf[n_unique_sig], kt_zones_for_size,
1769 type_zones_start: n_zones_sig + type_zones_used, sig_zone: zone, shared_zone);
1770 }
1771 assert(type_zones_used <= n_zones_type);
1772 p_j += n_unique_sig;
1773 }
1774}
1775
1776__startup_func
1777static void
1778kalloc_type_view_init_fixed(void)
1779{
1780 kalloc_type_hash_seed = (uint32_t) early_random();
1781 kalloc_type_build_dlut();
1782 /*
1783 * Parse __kalloc_type sections and build array of pointers to
1784 * all kalloc type views in kt_buffer.
1785 */
1786 kt_count = kalloc_type_view_parse(type: KTV_FIXED);
1787 assert(kt_count < KALLOC_TYPE_SIZE_MASK);
1788
1789#if DEBUG || DEVELOPMENT
1790 vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint16_t);
1791 vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_view_t);
1792 assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1793#endif
1794
1795 /*
1796 * Sort based on size class and signature
1797 */
1798 qsort(a: kt_buffer, n: (size_t) kt_count, es: sizeof(kalloc_type_view_t),
1799 cmp: kalloc_type_cmp_fixed);
1800
1801 /*
1802 * Build a skip list that holds starts of unique signatures and a
1803 * frequency list of number of unique and total signatures per kalloc
1804 * size class
1805 */
1806 uint16_t *kt_skip_list_start = (uint16_t *)(kt_buffer + kt_count);
1807 uint16_t *kt_shuffle_buf = kalloc_type_create_iterators_fixed(
1808 kt_skip_list_start, count: kt_count);
1809
1810 /*
1811 * Create zones based on signatures
1812 */
1813 kalloc_type_create_zones_fixed(kt_skip_list_start, kt_shuffle_buf);
1814}
1815
1816__startup_func
1817static void
1818kalloc_type_heap_init(void)
1819{
1820 assert(kt_var_heaps + 1 <= KT_VAR_MAX_HEAPS);
1821 char kh_name[MAX_ZONE_NAME];
1822 uint32_t last_heap = KT_VAR_PTR_HEAP0 + kt_var_heaps;
1823
1824 for (uint32_t i = KT_VAR_PTR_HEAP0; i < last_heap; i++) {
1825 snprintf(&kh_name[0], MAX_ZONE_NAME, "%s%u", KHEAP_KT_VAR->kh_name, i);
1826 kalloc_zone_init(kheap_name: (const char *)&kh_name[0], kheap_id: KHEAP_ID_KT_VAR,
1827 kheap_zstart: &kalloc_type_heap_array[i].kh_zstart, zc_flags: ZC_KALLOC_TYPE);
1828 }
1829 /*
1830 * All variable kalloc type allocations are collapsed into a single
1831 * stat. Individual accounting can be requested via KT_PRIV_ACCT
1832 */
1833 KHEAP_KT_VAR->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
1834 zone_view_count += 1;
1835}
1836
1837__startup_func
1838static void
1839kalloc_type_assign_heap(
1840 uint32_t start,
1841 uint32_t end,
1842 uint32_t heap_id)
1843{
1844 bool use_split = kmem_get_random16(upper_limit: 1);
1845
1846 if (use_split) {
1847 heap_id = kt_var_heaps;
1848 }
1849 kalloc_type_assign_zone_var(cur: &kt_buffer[start].ktv_var,
1850 end: &kt_buffer[end].ktv_var, heap_idx: heap_id);
1851}
1852
1853__startup_func
1854static void
1855kalloc_type_split_heap(
1856 uint32_t start,
1857 uint32_t end,
1858 uint32_t heap_id)
1859{
1860 uint32_t count = start;
1861 const char *p_name = NULL;
1862
1863 while (count < end) {
1864 kalloc_type_var_view_t cur = kt_buffer[count].ktv_var;
1865 const char *c_name = cur->kt_name;
1866
1867 if (!p_name) {
1868 assert(count == start);
1869 p_name = c_name;
1870 }
1871 if (strcmp(s1: c_name, s2: p_name) != 0) {
1872 kalloc_type_assign_heap(start, end: count, heap_id);
1873 start = count;
1874 p_name = c_name;
1875 }
1876 count++;
1877 }
1878 kalloc_type_assign_heap(start, end, heap_id);
1879}
1880
1881__startup_func
1882static void
1883kalloc_type_view_init_var(void)
1884{
1885 uint32_t buf_start = 0, unique_sig = 0;
1886 uint32_t *kt_skip_list_start;
1887 uint16_t *shuffle_buf;
1888 uint16_t fixed_heaps = KT_VAR__FIRST_FLEXIBLE_HEAP - 1;
1889 uint16_t flex_heap_count = kt_var_heaps - fixed_heaps - 1;
1890 /*
1891 * Pick a random heap to split
1892 */
1893 uint16_t split_heap = kmem_get_random16(upper_limit: flex_heap_count - 1);
1894
1895 /*
1896 * Zones are created prior to parsing the views as zone budget is fixed
1897 * per sizeclass and special types identified while parsing are redirected
1898 * as they are discovered.
1899 */
1900 kalloc_type_heap_init();
1901
1902 /*
1903 * Parse __kalloc_var sections and build array of pointers to views that
1904 * aren't rediected in kt_buffer.
1905 */
1906 kt_count = kalloc_type_view_parse(type: KTV_VAR);
1907 assert(kt_count < UINT32_MAX);
1908
1909#if DEBUG || DEVELOPMENT
1910 vm_size_t sig_slist_size = (size_t) kt_count * sizeof(uint32_t);
1911 vm_size_t kt_buffer_size = (size_t) kt_count * sizeof(kalloc_type_views_t);
1912 assert(kt_scratch_size >= kt_buffer_size + sig_slist_size);
1913#endif
1914
1915 /*
1916 * Sort based on size class and signature
1917 */
1918 qsort(a: kt_buffer, n: (size_t) kt_count, es: sizeof(kalloc_type_var_view_t),
1919 cmp: kalloc_type_cmp_var);
1920
1921 buf_start = kalloc_type_handle_parray_var();
1922
1923 /*
1924 * Build a skip list that holds starts of unique signatures
1925 */
1926 kt_skip_list_start = (uint32_t *)(kt_buffer + kt_count);
1927 unique_sig = kalloc_type_create_iterators_var(kt_skip_list_start,
1928 buf_start);
1929 shuffle_buf = (uint16_t *)(kt_skip_list_start + unique_sig + 1);
1930 /*
1931 * If we have only one heap then other elements share heap with pointer
1932 * arrays
1933 */
1934 if (kt_var_heaps < KT_VAR__FIRST_FLEXIBLE_HEAP) {
1935 panic("kt_var_heaps is too small");
1936 }
1937
1938 kmem_shuffle(shuffle_buf, count: flex_heap_count);
1939 /*
1940 * The index of the heap we decide to split is placed twice in the shuffle
1941 * buffer so that it gets twice the number of signatures that we split
1942 * evenly
1943 */
1944 shuffle_buf[flex_heap_count] = split_heap;
1945 split_heap += (fixed_heaps + 1);
1946
1947 for (uint32_t i = 1; i <= unique_sig; i++) {
1948 uint32_t heap_id = shuffle_buf[i % (flex_heap_count + 1)] +
1949 fixed_heaps + 1;
1950 uint32_t start = kt_skip_list_start[i - 1];
1951 uint32_t end = kt_skip_list_start[i];
1952
1953 assert(heap_id <= kt_var_heaps);
1954 if (heap_id == split_heap) {
1955 kalloc_type_split_heap(start, end, heap_id);
1956 continue;
1957 }
1958 kalloc_type_assign_zone_var(cur: &kt_buffer[start].ktv_var,
1959 end: &kt_buffer[end].ktv_var, heap_idx: heap_id);
1960 }
1961}
1962
1963__startup_func
1964static void
1965kalloc_init(void)
1966{
1967 /*
1968 * Allocate scratch space to parse kalloc_type_views and create
1969 * other structures necessary to process them.
1970 */
1971 uint64_t max_count = kt_count = kt_scratch_size / sizeof(kalloc_type_views_t);
1972
1973 static_assert(KHEAP_MAX_SIZE >= KALLOC_SAFE_ALLOC_SIZE);
1974 kalloc_zsize_compute();
1975
1976 /* Initialize kalloc data buffers heap */
1977 kalloc_heap_init(kheap: KHEAP_DATA_BUFFERS);
1978
1979 /* Initialize kalloc shared buffers heap */
1980 kalloc_heap_init(kheap: KHEAP_SHARED);
1981
1982 kmem_alloc(map: kernel_map, addrp: (vm_offset_t *)&kt_buffer, kt_scratch_size,
1983 flags: KMA_NOFAIL | KMA_ZERO | KMA_KOBJECT, VM_KERN_MEMORY_KALLOC);
1984
1985 /*
1986 * Handle fixed size views
1987 */
1988 kalloc_type_view_init_fixed();
1989
1990 /*
1991 * Reset
1992 */
1993 bzero(s: kt_buffer, kt_scratch_size);
1994 kt_count = max_count;
1995
1996 /*
1997 * Handle variable size views
1998 */
1999 kalloc_type_view_init_var();
2000
2001 /*
2002 * Free resources used
2003 */
2004 kmem_free(map: kernel_map, addr: (vm_offset_t) kt_buffer, kt_scratch_size);
2005}
2006STARTUP(ZALLOC, STARTUP_RANK_THIRD, kalloc_init);
2007
2008#pragma mark accessors
2009
2010#define KFREE_ABSURD_SIZE \
2011 ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_AND_KEXT_ADDRESS) / 2)
2012
2013static void
2014KALLOC_ZINFO_SALLOC(vm_size_t bytes)
2015{
2016 thread_t thr = current_thread();
2017 ledger_debit_thread(thread: thr, ledger: thr->t_ledger, entry: task_ledgers.tkm_shared, amount: bytes);
2018}
2019
2020static void
2021KALLOC_ZINFO_SFREE(vm_size_t bytes)
2022{
2023 thread_t thr = current_thread();
2024 ledger_credit_thread(thread: thr, ledger: thr->t_ledger, entry: task_ledgers.tkm_shared, amount: bytes);
2025}
2026
2027static kmem_guard_t
2028kalloc_guard(vm_tag_t tag, uint16_t type_hash, const void *owner)
2029{
2030 kmem_guard_t guard = {
2031 .kmg_atomic = true,
2032 .kmg_tag = tag,
2033 .kmg_type_hash = type_hash,
2034 .kmg_context = os_hash_kernel_pointer(pointer: owner),
2035 };
2036
2037 /*
2038 * TODO: this use is really not sufficiently smart.
2039 */
2040
2041 return guard;
2042}
2043
2044#if __arm64e__ || CONFIG_KERNEL_TAGGING
2045
2046#if __arm64e__
2047#define KALLOC_ARRAY_TYPE_SHIFT (64 - T1SZ_BOOT - 1)
2048
2049/*
2050 * Zone encoding is:
2051 *
2052 * <PAC SIG><1><1><PTR value><5 bits of size class>
2053 *
2054 * VM encoding is:
2055 *
2056 * <PAC SIG><1><0><PTR value><14 bits of page count>
2057 *
2058 * The <1> is precisely placed so that <PAC SIG><1> is T1SZ worth of bits,
2059 * so that PAC authentication extends the proper sign bit.
2060 */
2061
2062static_assert(T1SZ_BOOT + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2063#else
2064#define KALLOC_ARRAY_TYPE_SHIFT (64 - 8 - 1)
2065
2066/*
2067 * Zone encoding is:
2068 *
2069 * <TBI><1><PTR value><5 bits of size class>
2070 *
2071 * VM encoding is:
2072 *
2073 * <TBI><0><PTR value><14 bits of page count>
2074 */
2075
2076static_assert(8 + 1 + 1 + VM_KERNEL_POINTER_SIGNIFICANT_BITS <= 64);
2077#endif
2078
2079SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = KALLOC_ARRAY_TYPE_SHIFT;
2080
2081__attribute__((always_inline))
2082struct kalloc_result
2083__kalloc_array_decode(vm_address_t ptr)
2084{
2085 struct kalloc_result kr;
2086 vm_address_t zone_mask = 1ul << KALLOC_ARRAY_TYPE_SHIFT;
2087
2088 if (ptr & zone_mask) {
2089 kr.size = (32 + (ptr & 0x10)) << (ptr & 0xf);
2090 ptr &= ~0x1full;
2091 } else if (__probable(ptr)) {
2092 kr.size = (ptr & PAGE_MASK) << PAGE_SHIFT;
2093 ptr &= ~PAGE_MASK;
2094 ptr |= zone_mask;
2095 } else {
2096 kr.size = 0;
2097 }
2098
2099 kr.addr = (void *)ptr;
2100 return kr;
2101}
2102
2103static inline void *
2104__kalloc_array_encode_zone(zone_t z, void *ptr, vm_size_t size __unused)
2105{
2106 return (void *)((vm_address_t)ptr | z->z_array_size_class);
2107}
2108
2109static inline vm_address_t
2110__kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2111{
2112 addr &= ~(0x1ull << KALLOC_ARRAY_TYPE_SHIFT);
2113
2114 return addr | atop(size);
2115}
2116
2117#else
2118
2119SECURITY_READ_ONLY_LATE(uint32_t) kalloc_array_type_shift = 0;
2120
2121/*
2122 * Encoding is:
2123 * bits 0..46: pointer value
2124 * bits 47..47: 0: zones, 1: VM
2125 * bits 48..63: zones: elem size, VM: number of pages
2126 */
2127
2128#define KALLOC_ARRAY_TYPE_BIT 47
2129static_assert(KALLOC_ARRAY_TYPE_BIT > VM_KERNEL_POINTER_SIGNIFICANT_BITS + 1);
2130static_assert(__builtin_clzll(KHEAP_MAX_SIZE) > KALLOC_ARRAY_TYPE_BIT);
2131
2132__attribute__((always_inline))
2133struct kalloc_result
2134__kalloc_array_decode(vm_address_t ptr)
2135{
2136 struct kalloc_result kr;
2137 uint32_t shift = 64 - KALLOC_ARRAY_TYPE_BIT;
2138
2139 kr.size = ptr >> (KALLOC_ARRAY_TYPE_BIT + 1);
2140 if (ptr & (1ull << KALLOC_ARRAY_TYPE_BIT)) {
2141 kr.size <<= PAGE_SHIFT;
2142 }
2143 /* sign extend, so that it also works with NULL */
2144 kr.addr = (void *)((long)(ptr << shift) >> shift);
2145
2146 return kr;
2147}
2148
2149static inline void *
2150__kalloc_array_encode_zone(zone_t z __unused, void *ptr, vm_size_t size)
2151{
2152 vm_address_t addr = (vm_address_t)ptr;
2153
2154 addr &= (1ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* clear bit */
2155 addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1);
2156
2157 return (void *)addr;
2158}
2159
2160static inline vm_address_t
2161__kalloc_array_encode_vm(vm_address_t addr, vm_size_t size)
2162{
2163 addr &= (2ull << KALLOC_ARRAY_TYPE_BIT) - 1; /* keep bit */
2164 addr |= size << (KALLOC_ARRAY_TYPE_BIT + 1 - PAGE_SHIFT);
2165
2166 return addr;
2167}
2168
2169#endif
2170
2171vm_size_t
2172kalloc_next_good_size(vm_size_t size, uint32_t period)
2173{
2174 uint32_t scale = kalloc_log2down((uint32_t)size);
2175 vm_size_t step, size_class;
2176
2177 if (size < KHEAP_STEP_START) {
2178 return KHEAP_STEP_START;
2179 }
2180 if (size < 2 * KHEAP_STEP_START) {
2181 return 2 * KHEAP_STEP_START;
2182 }
2183
2184 if (size < KHEAP_MAX_SIZE) {
2185 step = 1ul << (scale - 1);
2186 } else {
2187 step = round_page(x: 1ul << (scale - kalloc_log2down(period)));
2188 }
2189
2190 size_class = (size + step) & -step;
2191#if KASAN_CLASSIC
2192 if (size > K_SIZE_CLASS(size_class)) {
2193 return kalloc_next_good_size(size_class, period);
2194 }
2195 size_class = K_SIZE_CLASS(size_class);
2196#endif
2197 return size_class;
2198}
2199
2200
2201#pragma mark kalloc
2202
2203static inline kalloc_heap_t
2204kalloc_type_get_heap(kalloc_type_var_view_t kt_view, bool kt_free __unused)
2205{
2206 /*
2207 * Redirect data-only views
2208 */
2209 if (kalloc_type_is_data(kt_flags: kt_view->kt_flags)) {
2210 return KHEAP_DATA_BUFFERS;
2211 }
2212
2213 if (kt_view->kt_flags & KT_PROCESSED) {
2214 return KHEAP_KT_VAR;
2215 }
2216
2217 return KHEAP_DEFAULT;
2218}
2219
2220__attribute__((noinline))
2221static struct kalloc_result
2222kalloc_large(
2223 kalloc_heap_t kheap,
2224 vm_size_t req_size,
2225 zalloc_flags_t flags,
2226 uint16_t kt_hash,
2227 void *owner __unused)
2228{
2229 kma_flags_t kma_flags = KMA_KASAN_GUARD | KMA_TAG;
2230 vm_tag_t tag;
2231 vm_offset_t addr, size;
2232
2233 if (flags & Z_NOFAIL) {
2234 panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2235 (size_t)req_size);
2236 }
2237
2238 /*
2239 * kmem_alloc could block so we return if noblock
2240 *
2241 * also, reject sizes larger than our address space is quickly,
2242 * as kt_size or IOMallocArraySize() expect this.
2243 */
2244 if ((flags & Z_NOWAIT) ||
2245 (req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2246 return (struct kalloc_result){ };
2247 }
2248
2249 if ((flags & Z_KALLOC_ARRAY) && req_size > KALLOC_ARRAY_SIZE_MAX) {
2250 return (struct kalloc_result){ };
2251 }
2252
2253 /*
2254 * (73465472) on Intel we didn't use to pass this flag,
2255 * which in turned allowed kalloc_large() memory to be shared
2256 * with user directly.
2257 *
2258 * We're bound by this unfortunate ABI.
2259 */
2260 if ((flags & Z_MAY_COPYINMAP) == 0) {
2261#ifndef __x86_64__
2262 kma_flags |= KMA_KOBJECT;
2263#endif
2264 } else {
2265 assert(kheap == KHEAP_DATA_BUFFERS);
2266 kma_flags &= ~KMA_TAG;
2267 }
2268 if (flags & Z_NOPAGEWAIT) {
2269 kma_flags |= KMA_NOPAGEWAIT;
2270 }
2271 if (flags & Z_ZERO) {
2272 kma_flags |= KMA_ZERO;
2273 }
2274 if (kheap == KHEAP_DATA_BUFFERS) {
2275 kma_flags |= KMA_DATA;
2276 } else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
2277 kma_flags |= KMA_SPRAYQTN;
2278 }
2279
2280
2281 tag = zalloc_flags_get_tag(flags);
2282 if (flags & Z_VM_TAG_BT_BIT) {
2283 tag = vm_tag_bt() ?: tag;
2284 }
2285 if (tag == VM_KERN_MEMORY_NONE) {
2286 tag = kheap->kh_tag;
2287 }
2288
2289 size = round_page(x: req_size);
2290 if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2291 req_size = round_page(x: size);
2292 }
2293
2294 addr = kmem_alloc_guard(map: kernel_map, size: req_size, mask: 0,
2295 flags: kma_flags, guard: kalloc_guard(tag, type_hash: kt_hash, owner)).kmr_address;
2296
2297 if (addr != 0) {
2298 counter_inc(&kalloc_large_count);
2299 counter_add(&kalloc_large_total, amount: size);
2300 KALLOC_ZINFO_SALLOC(bytes: size);
2301 if (flags & Z_KALLOC_ARRAY) {
2302 addr = __kalloc_array_encode_vm(addr, size: req_size);
2303 }
2304 } else {
2305 addr = 0;
2306 }
2307
2308 DTRACE_VM3(kalloc, vm_size_t, size, vm_size_t, req_size, void*, addr);
2309 return (struct kalloc_result){ .addr = (void *)addr, .size = req_size };
2310}
2311
2312#if KASAN
2313
2314static inline void
2315kalloc_mark_unused_space(void *addr, vm_size_t size, vm_size_t used)
2316{
2317#if KASAN_CLASSIC
2318 /*
2319 * On KASAN_CLASSIC, Z_SKIP_KASAN is defined and the entire sanitizer
2320 * tagging of the memory region is performed here.
2321 */
2322 kasan_alloc((vm_offset_t)addr, size, used, KASAN_GUARD_SIZE, false,
2323 __builtin_frame_address(0));
2324#endif /* KASAN_CLASSIC */
2325
2326#if KASAN_TBI
2327 kasan_tbi_retag_unused_space((vm_offset_t)addr, size, used ? :1);
2328#endif /* KASAN_TBI */
2329}
2330#endif /* KASAN */
2331
2332static inline struct kalloc_result
2333kalloc_zone(
2334 zone_t z,
2335 zone_stats_t zstats,
2336 zalloc_flags_t flags,
2337 vm_size_t req_size)
2338{
2339 struct kalloc_result kr;
2340 vm_size_t esize;
2341
2342 kr = zalloc_ext(zone: z, zstats: zstats ?: z->z_stats, flags: flags | Z_SKIP_KASAN);
2343 esize = kr.size;
2344
2345 if (__probable(kr.addr)) {
2346 if (flags & (Z_FULLSIZE | Z_KALLOC_ARRAY)) {
2347 req_size = esize;
2348 } else {
2349 kr.size = req_size;
2350 }
2351#if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
2352 kr.addr = zone_element_pgz_oob_adjust(addr: kr.addr, req_size, elem_size: esize);
2353#endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
2354
2355#if KASAN
2356 kalloc_mark_unused_space(kr.addr, esize, kr.size);
2357#endif /* KASAN */
2358
2359 if (flags & Z_KALLOC_ARRAY) {
2360 kr.addr = __kalloc_array_encode_zone(z, ptr: kr.addr, size: kr.size);
2361 }
2362 }
2363
2364 DTRACE_VM3(kalloc, vm_size_t, req_size, vm_size_t, kr.size, void*, kr.addr);
2365 return kr;
2366}
2367
2368static zone_id_t
2369kalloc_use_shared_heap(
2370 kalloc_heap_t kheap,
2371 zone_stats_t zstats,
2372 zone_id_t zstart,
2373 zalloc_flags_t *flags)
2374{
2375 if (kheap->kh_heap_id != KHEAP_ID_DATA_BUFFERS) {
2376 zone_stats_t zstats_cpu = zpercpu_get(zstats);
2377
2378 if (os_atomic_load(&zstats_cpu->zs_alloc_not_shared, relaxed) == 0) {
2379 *flags |= Z_SET_NOTSHARED;
2380 return KHEAP_SHARED->kh_zstart;
2381 }
2382 }
2383
2384 return zstart;
2385}
2386
2387#undef kalloc_ext
2388
2389struct kalloc_result
2390kalloc_ext(
2391 void *kheap_or_kt_view,
2392 vm_size_t size,
2393 zalloc_flags_t flags,
2394 void *owner)
2395{
2396 kalloc_type_var_view_t kt_view;
2397 kalloc_heap_t kheap;
2398 zone_stats_t zstats = NULL;
2399 zone_t z;
2400 uint16_t kt_hash;
2401 zone_id_t zstart;
2402
2403 if (kt_is_var_view(kheap_or_kt_view)) {
2404 kt_view = kt_demangle_var_view(ptr: kheap_or_kt_view);
2405 kheap = kalloc_type_get_heap(kt_view, false);
2406 /*
2407 * Use stats from view if present, else use stats from kheap.
2408 * KHEAP_KT_VAR accumulates stats for all allocations going to
2409 * kalloc.type.var zones, while KHEAP_DEFAULT and KHEAP_DATA_BUFFERS
2410 * use stats from the respective zones.
2411 */
2412 zstats = kt_view->kt_stats;
2413 kt_hash = (uint16_t) KT_GET_HASH(kt_view->kt_flags);
2414 zstart = kt_view->kt_heap_start ?: kheap->kh_zstart;
2415 } else {
2416 kt_view = NULL;
2417 kheap = kheap_or_kt_view;
2418 kt_hash = kheap->kh_type_hash;
2419 zstart = kheap->kh_zstart;
2420 }
2421
2422 if (!zstats) {
2423 zstats = kheap->kh_stats;
2424 }
2425
2426 zstart = kalloc_use_shared_heap(kheap, zstats, zstart, flags: &flags);
2427 z = kalloc_zone_for_size_with_flags(zid: zstart, size, flags);
2428 if (z) {
2429 return kalloc_zone(z, zstats, flags, req_size: size);
2430 } else {
2431 return kalloc_large(kheap, req_size: size, flags, kt_hash, owner);
2432 }
2433}
2434
2435#if XNU_PLATFORM_MacOSX
2436void *
2437kalloc_external(vm_size_t size);
2438void *
2439kalloc_external(vm_size_t size)
2440{
2441 zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK, VM_KERN_MEMORY_KALLOC);
2442 return kheap_alloc(KHEAP_DEFAULT, size, flags);
2443}
2444#endif /* XNU_PLATFORM_MacOSX */
2445
2446void *
2447kalloc_data_external(vm_size_t size, zalloc_flags_t flags);
2448void *
2449kalloc_data_external(vm_size_t size, zalloc_flags_t flags)
2450{
2451 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
2452 return kheap_alloc(KHEAP_DATA_BUFFERS, size, flags);
2453}
2454
2455__abortlike
2456static void
2457kalloc_data_require_panic(void *addr, vm_size_t size)
2458{
2459 zone_id_t zid = zone_id_for_element(addr, esize: size);
2460
2461 if (zid != ZONE_ID_INVALID) {
2462 zone_t z = &zone_array[zid];
2463 zone_security_flags_t zsflags = zone_security_array[zid];
2464
2465 if (zsflags.z_kheap_id != KHEAP_ID_DATA_BUFFERS) {
2466 panic("kalloc_data_require failed: address %p in [%s%s]",
2467 addr, zone_heap_name(z), zone_name(z));
2468 }
2469
2470 panic("kalloc_data_require failed: address %p in [%s%s], "
2471 "size too large %zd > %zd", addr,
2472 zone_heap_name(z), zone_name(z),
2473 (size_t)size, (size_t)zone_elem_inner_size(z));
2474 } else {
2475 panic("kalloc_data_require failed: address %p not in zone native map",
2476 addr);
2477 }
2478}
2479
2480__abortlike
2481static void
2482kalloc_non_data_require_panic(void *addr, vm_size_t size)
2483{
2484 zone_id_t zid = zone_id_for_element(addr, esize: size);
2485
2486 if (zid != ZONE_ID_INVALID) {
2487 zone_t z = &zone_array[zid];
2488 zone_security_flags_t zsflags = zone_security_array[zid];
2489
2490 switch (zsflags.z_kheap_id) {
2491 case KHEAP_ID_NONE:
2492 case KHEAP_ID_DATA_BUFFERS:
2493 case KHEAP_ID_KT_VAR:
2494 panic("kalloc_non_data_require failed: address %p in [%s%s]",
2495 addr, zone_heap_name(z), zone_name(z));
2496 default:
2497 break;
2498 }
2499
2500 panic("kalloc_non_data_require failed: address %p in [%s%s], "
2501 "size too large %zd > %zd", addr,
2502 zone_heap_name(z), zone_name(z),
2503 (size_t)size, (size_t)zone_elem_inner_size(z));
2504 } else {
2505 panic("kalloc_non_data_require failed: address %p not in zone native map",
2506 addr);
2507 }
2508}
2509
2510void
2511kalloc_data_require(void *addr, vm_size_t size)
2512{
2513 zone_id_t zid = zone_id_for_element(addr, esize: size);
2514
2515 if (zid != ZONE_ID_INVALID) {
2516 zone_t z = &zone_array[zid];
2517 zone_security_flags_t zsflags = zone_security_array[zid];
2518 if (zsflags.z_kheap_id == KHEAP_ID_DATA_BUFFERS &&
2519 size <= zone_elem_inner_size(zone: z)) {
2520 return;
2521 }
2522 } else if (kmem_range_id_contains(range_id: KMEM_RANGE_ID_DATA,
2523 addr: (vm_address_t)pgz_decode(addr, size), size)) {
2524 return;
2525 }
2526
2527 kalloc_data_require_panic(addr, size);
2528}
2529
2530void
2531kalloc_non_data_require(void *addr, vm_size_t size)
2532{
2533 zone_id_t zid = zone_id_for_element(addr, esize: size);
2534
2535 if (zid != ZONE_ID_INVALID) {
2536 zone_t z = &zone_array[zid];
2537 zone_security_flags_t zsflags = zone_security_array[zid];
2538 switch (zsflags.z_kheap_id) {
2539 case KHEAP_ID_NONE:
2540 if (!zsflags.z_kalloc_type) {
2541 break;
2542 }
2543 OS_FALLTHROUGH;
2544 case KHEAP_ID_KT_VAR:
2545 if (size < zone_elem_inner_size(zone: z)) {
2546 return;
2547 }
2548 break;
2549 default:
2550 break;
2551 }
2552 } else if (!kmem_range_id_contains(range_id: KMEM_RANGE_ID_DATA,
2553 addr: (vm_address_t)pgz_decode(addr, size), size)) {
2554 return;
2555 }
2556
2557 kalloc_non_data_require_panic(addr, size);
2558}
2559
2560void *
2561kalloc_type_impl_external(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2562{
2563 /*
2564 * Callsites from a kext that aren't in the BootKC on macOS or
2565 * any callsites on armv7 are not processed during startup,
2566 * default to using kheap_alloc
2567 *
2568 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2569 * NULL as we need to use the vm for the allocation
2570 *
2571 */
2572 if (__improbable(kt_view->kt_zv.zv_zone == ZONE_NULL)) {
2573 vm_size_t size = kalloc_type_get_size(kt_size: kt_view->kt_size);
2574 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2575 return kalloc_ext(kheap_or_kt_view: KHEAP_DEFAULT, size, flags, NULL).addr;
2576 }
2577
2578 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2579 return kalloc_type_impl(kt_view, flags);
2580}
2581
2582void *
2583kalloc_type_var_impl_external(
2584 kalloc_type_var_view_t kt_view,
2585 vm_size_t size,
2586 zalloc_flags_t flags,
2587 void *owner);
2588void *
2589kalloc_type_var_impl_external(
2590 kalloc_type_var_view_t kt_view,
2591 vm_size_t size,
2592 zalloc_flags_t flags,
2593 void *owner)
2594{
2595 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC);
2596 return kalloc_type_var_impl(kt_view, size, flags, owner);
2597}
2598
2599#pragma mark kfree
2600
2601__abortlike
2602static void
2603kfree_heap_confusion_panic(kalloc_heap_t kheap, void *data, size_t size, zone_t z)
2604{
2605 zone_security_flags_t zsflags = zone_security_config(z);
2606 const char *kheap_name = kalloc_heap_names[kheap->kh_heap_id];
2607
2608 if (zsflags.z_kalloc_type) {
2609 panic_include_kalloc_types = true;
2610 kalloc_type_src_zone = z;
2611 panic("kfree: addr %p found in kalloc type zone '%s'"
2612 "but being freed to %s heap", data, z->z_name, kheap_name);
2613 }
2614
2615 if (zsflags.z_kheap_id == KHEAP_ID_NONE) {
2616 panic("kfree: addr %p, size %zd found in regular zone '%s%s'",
2617 data, size, zone_heap_name(z), z->z_name);
2618 } else {
2619 panic("kfree: addr %p, size %zd found in heap %s* instead of %s*",
2620 data, size, zone_heap_name(z), kheap_name);
2621 }
2622}
2623
2624__abortlike
2625static void
2626kfree_size_confusion_panic(zone_t z, void *data,
2627 size_t oob_offs, size_t size, size_t zsize)
2628{
2629 if (z) {
2630 panic("kfree: addr %p, size %zd (offs:%zd) found in zone '%s%s' "
2631 "with elem_size %zd",
2632 data, size, oob_offs, zone_heap_name(z), z->z_name, zsize);
2633 } else {
2634 panic("kfree: addr %p, size %zd (offs:%zd) not found in any zone",
2635 data, size, oob_offs);
2636 }
2637}
2638
2639__abortlike
2640static void
2641kfree_size_invalid_panic(void *data, size_t size)
2642{
2643 panic("kfree: addr %p trying to free with nonsensical size %zd",
2644 data, size);
2645}
2646
2647__abortlike
2648static void
2649kfree_size_require_panic(void *data, size_t size, size_t min_size,
2650 size_t max_size)
2651{
2652 panic("kfree: addr %p has size %zd, not in specified bounds [%zd - %zd]",
2653 data, size, min_size, max_size);
2654}
2655
2656static void
2657kfree_size_require(
2658 kalloc_heap_t kheap,
2659 void *addr,
2660 vm_size_t min_size,
2661 vm_size_t max_size)
2662{
2663 assert3u(min_size, <=, max_size);
2664 zone_t max_zone = kalloc_zone_for_size(zid: kheap->kh_zstart, size: max_size);
2665 vm_size_t max_zone_size = zone_elem_inner_size(zone: max_zone);
2666 vm_size_t elem_size = zone_element_size(addr, NULL, false, NULL);
2667 if (elem_size > max_zone_size || elem_size < min_size) {
2668 kfree_size_require_panic(data: addr, size: elem_size, min_size, max_size: max_zone_size);
2669 }
2670}
2671
2672static void
2673kfree_large(
2674 vm_offset_t addr,
2675 vm_size_t size,
2676 kmf_flags_t flags,
2677 void *owner)
2678{
2679 size = kmem_free_guard(map: kernel_map, addr, size,
2680 flags: flags | KMF_TAG | KMF_KASAN_GUARD,
2681 guard: kalloc_guard(VM_KERN_MEMORY_NONE, type_hash: 0, owner));
2682
2683 counter_dec(&kalloc_large_count);
2684 counter_add(&kalloc_large_total, amount: -(uint64_t)size);
2685 KALLOC_ZINFO_SFREE(bytes: size);
2686 DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, size, void*, addr);
2687}
2688
2689static void
2690kfree_zone(
2691 void *kheap_or_kt_view __unsafe_indexable,
2692 void *data,
2693 vm_size_t size,
2694 zone_t z,
2695 vm_size_t zsize)
2696{
2697 zone_security_flags_t zsflags = zone_security_config(z);
2698 kalloc_type_var_view_t kt_view;
2699 kalloc_heap_t kheap;
2700 zone_stats_t zstats = NULL;
2701
2702 if (kt_is_var_view(kheap_or_kt_view)) {
2703 kt_view = kt_demangle_var_view(ptr: kheap_or_kt_view);
2704 kheap = kalloc_type_get_heap(kt_view, true);
2705 /*
2706 * Note: If we have cross frees between KHEAP_KT_VAR and KHEAP_DEFAULT
2707 * we will end up having incorrect stats. Cross frees may happen on
2708 * macOS due to allocation from an unprocessed view and free from
2709 * a processed view or vice versa.
2710 */
2711 zstats = kt_view->kt_stats;
2712 } else {
2713 kt_view = NULL;
2714 kheap = kheap_or_kt_view;
2715 }
2716
2717 if (!zstats) {
2718 zstats = kheap->kh_stats;
2719 }
2720
2721 zsflags = zone_security_config(z);
2722 if (kheap == KHEAP_DATA_BUFFERS) {
2723 if (kheap->kh_heap_id != zsflags.z_kheap_id) {
2724 kfree_heap_confusion_panic(kheap, data, size, z);
2725 }
2726 } else {
2727 if ((kheap->kh_heap_id != zsflags.z_kheap_id) &&
2728 (zsflags.z_kheap_id != KHEAP_ID_SHARED)) {
2729 kfree_heap_confusion_panic(kheap, data, size, z);
2730 }
2731 }
2732
2733 DTRACE_VM3(kfree, vm_size_t, size, vm_size_t, zsize, void*, data);
2734
2735 /* needs to be __nosan because the user size might be partial */
2736 __nosan_bzero(dst: data, sz: zsize);
2737 zfree_ext(zone: z, zstats: zstats ?: z->z_stats, addr: data, ZFREE_PACK_SIZE(zsize, size));
2738}
2739
2740void
2741kfree_ext(void *kheap_or_kt_view, void *data, vm_size_t size)
2742{
2743 vm_size_t bucket_size;
2744 zone_t z;
2745
2746 if (data == NULL) {
2747 return;
2748 }
2749
2750 if (size > KFREE_ABSURD_SIZE) {
2751 kfree_size_invalid_panic(data, size);
2752 }
2753
2754 if (size <= KHEAP_MAX_SIZE) {
2755 vm_size_t oob_offs;
2756
2757 bucket_size = zone_element_size(addr: data, z: &z, true, oob_offs: &oob_offs);
2758 if (size + oob_offs > bucket_size || bucket_size == 0) {
2759 kfree_size_confusion_panic(z, data,
2760 oob_offs, size, zsize: bucket_size);
2761 }
2762
2763 data = (char *)data - oob_offs;
2764 kfree_zone(kheap_or_kt_view, data, size, z, zsize: bucket_size);
2765 } else {
2766 kfree_large(addr: (vm_offset_t)data, size, flags: KMF_NONE, NULL);
2767 }
2768}
2769
2770void
2771kfree_addr_ext(kalloc_heap_t kheap, void *data)
2772{
2773 vm_offset_t oob_offs;
2774 vm_size_t size, usize = 0;
2775 zone_t z;
2776
2777 if (data == NULL) {
2778 return;
2779 }
2780
2781 size = zone_element_size(addr: data, z: &z, true, oob_offs: &oob_offs);
2782 if (size) {
2783#if KASAN_CLASSIC
2784 usize = kasan_user_size((vm_offset_t)data);
2785#endif
2786 data = (char *)data - oob_offs;
2787 kfree_zone(kheap_or_kt_view: kheap, data, size: usize, z, zsize: size);
2788 } else {
2789 kfree_large(addr: (vm_offset_t)data, size: 0, flags: KMF_GUESS_SIZE, NULL);
2790 }
2791}
2792
2793#if XNU_PLATFORM_MacOSX
2794void
2795kfree_external(void *addr, vm_size_t size);
2796void
2797kfree_external(void *addr, vm_size_t size)
2798{
2799 kalloc_heap_t kheap = KHEAP_DEFAULT;
2800
2801 kfree_ext(kheap_or_kt_view: kheap, data: addr, size);
2802}
2803#endif /* XNU_PLATFORM_MacOSX */
2804
2805void
2806(kheap_free_bounded)(kalloc_heap_t kheap, void *addr,
2807 vm_size_t min_sz, vm_size_t max_sz)
2808{
2809 if (__improbable(addr == NULL)) {
2810 return;
2811 }
2812 kfree_size_require(kheap, addr, min_size: min_sz, max_size: max_sz);
2813 kfree_addr_ext(kheap, data: addr);
2814}
2815
2816void *
2817kalloc_type_impl_internal(kalloc_type_view_t kt_view, zalloc_flags_t flags)
2818{
2819 zone_stats_t zs = kt_view->kt_zv.zv_stats;
2820 zone_t z = kt_view->kt_zv.zv_zone;
2821 zone_stats_t zs_cpu = zpercpu_get(zs);
2822
2823 if ((flags & Z_SET_NOTSHARED) ||
2824 os_atomic_load(&zs_cpu->zs_alloc_not_shared, relaxed)) {
2825 return zalloc_ext(zone: z, zstats: zs, flags).addr;
2826 }
2827
2828 assert(zone_security_config(z).z_kheap_id != KHEAP_ID_DATA_BUFFERS);
2829 return zalloc_ext(zone: kt_view->kt_zshared, zstats: zs, flags: flags | Z_SET_NOTSHARED).addr;
2830}
2831
2832void
2833kfree_type_impl_external(kalloc_type_view_t kt_view, void *ptr)
2834{
2835 /*
2836 * If callsite is from a kext that isn't in the BootKC, it wasn't
2837 * processed during startup so default to using kheap_alloc
2838 *
2839 * Additionally when size is greater KHEAP_MAX_SIZE zone is left
2840 * NULL as we need to use the vm for the allocation/free
2841 */
2842 if (kt_view->kt_zv.zv_zone == ZONE_NULL) {
2843 return kheap_free(KHEAP_DEFAULT, ptr,
2844 kalloc_type_get_size(kt_view->kt_size));
2845 }
2846 return kfree_type_impl(kt_view, ptr);
2847}
2848
2849void
2850kfree_type_var_impl_external(
2851 kalloc_type_var_view_t kt_view,
2852 void *ptr,
2853 vm_size_t size);
2854void
2855kfree_type_var_impl_external(
2856 kalloc_type_var_view_t kt_view,
2857 void *ptr,
2858 vm_size_t size)
2859{
2860 return kfree_type_var_impl(kt_view, ptr, size);
2861}
2862
2863void
2864kfree_data_external(void *ptr, vm_size_t size);
2865void
2866kfree_data_external(void *ptr, vm_size_t size)
2867{
2868 return kheap_free(KHEAP_DATA_BUFFERS, ptr, size);
2869}
2870
2871void
2872kfree_data_addr_external(void *ptr);
2873void
2874kfree_data_addr_external(void *ptr)
2875{
2876 return kheap_free_addr(KHEAP_DATA_BUFFERS, ptr);
2877}
2878
2879#pragma mark krealloc
2880
2881__abortlike
2882static void
2883krealloc_size_invalid_panic(void *data, size_t size)
2884{
2885 panic("krealloc: addr %p trying to free with nonsensical size %zd",
2886 data, size);
2887}
2888
2889__attribute__((noinline))
2890static struct kalloc_result
2891krealloc_large(
2892 kalloc_heap_t kheap,
2893 vm_offset_t addr,
2894 vm_size_t old_size,
2895 vm_size_t new_size,
2896 zalloc_flags_t flags,
2897 uint16_t kt_hash,
2898 void *owner __unused)
2899{
2900 kmr_flags_t kmr_flags = KMR_FREEOLD | KMR_TAG | KMR_KASAN_GUARD;
2901 vm_size_t new_req_size = new_size;
2902 vm_size_t old_req_size = old_size;
2903 uint64_t delta;
2904 kmem_return_t kmr;
2905 vm_tag_t tag;
2906
2907 if (flags & Z_NOFAIL) {
2908 panic("trying to kalloc(Z_NOFAIL) with a large size (%zd)",
2909 (size_t)new_req_size);
2910 }
2911
2912 /*
2913 * kmem_alloc could block so we return if noblock
2914 *
2915 * also, reject sizes larger than our address space is quickly,
2916 * as kt_size or IOMallocArraySize() expect this.
2917 */
2918 if ((flags & Z_NOWAIT) ||
2919 (new_req_size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS)) {
2920 return (struct kalloc_result){ };
2921 }
2922
2923 /*
2924 * (73465472) on Intel we didn't use to pass this flag,
2925 * which in turned allowed kalloc_large() memory to be shared
2926 * with user directly.
2927 *
2928 * We're bound by this unfortunate ABI.
2929 */
2930 if ((flags & Z_MAY_COPYINMAP) == 0) {
2931#ifndef __x86_64__
2932 kmr_flags |= KMR_KOBJECT;
2933#endif
2934 } else {
2935 assert(kheap == KHEAP_DATA_BUFFERS);
2936 kmr_flags &= ~KMR_TAG;
2937 }
2938 if (flags & Z_NOPAGEWAIT) {
2939 kmr_flags |= KMR_NOPAGEWAIT;
2940 }
2941 if (flags & Z_ZERO) {
2942 kmr_flags |= KMR_ZERO;
2943 }
2944 if (kheap == KHEAP_DATA_BUFFERS) {
2945 kmr_flags |= KMR_DATA;
2946 } else if (flags & (Z_KALLOC_ARRAY | Z_SPRAYQTN)) {
2947 kmr_flags |= KMR_SPRAYQTN;
2948 }
2949 if (flags & Z_REALLOCF) {
2950 kmr_flags |= KMR_REALLOCF;
2951 }
2952
2953
2954 tag = zalloc_flags_get_tag(flags);
2955 if (flags & Z_VM_TAG_BT_BIT) {
2956 tag = vm_tag_bt() ?: tag;
2957 }
2958 if (tag == VM_KERN_MEMORY_NONE) {
2959 tag = kheap->kh_tag;
2960 }
2961
2962 kmr = kmem_realloc_guard(map: kernel_map, oldaddr: addr, oldsize: old_req_size, newsize: new_req_size,
2963 flags: kmr_flags, guard: kalloc_guard(tag, type_hash: kt_hash, owner));
2964
2965 new_size = round_page(x: new_req_size);
2966 old_size = round_page(x: old_req_size);
2967
2968 if (kmr.kmr_address != 0) {
2969 delta = (uint64_t)(new_size - old_size);
2970 } else if (flags & Z_REALLOCF) {
2971 counter_dec(&kalloc_large_count);
2972 delta = (uint64_t)(-old_size);
2973 } else {
2974 delta = 0;
2975 }
2976
2977 counter_add(&kalloc_large_total, amount: delta);
2978 KALLOC_ZINFO_SALLOC(bytes: delta);
2979
2980 if (addr != 0 || (flags & Z_REALLOCF)) {
2981 DTRACE_VM3(kfree, vm_size_t, old_size, vm_size_t, old_req_size,
2982 void*, addr);
2983 }
2984 if (__improbable(kmr.kmr_address == 0)) {
2985 return (struct kalloc_result){ };
2986 }
2987
2988 DTRACE_VM3(kalloc, vm_size_t, new_size, vm_size_t, new_req_size,
2989 void*, kmr.kmr_address);
2990
2991 if (flags & Z_KALLOC_ARRAY) {
2992 kmr.kmr_address = __kalloc_array_encode_vm(addr: kmr.kmr_address,
2993 size: new_req_size);
2994 }
2995 return (struct kalloc_result){ .addr = kmr.kmr_ptr, .size = new_req_size };
2996}
2997
2998#undef krealloc_ext
2999
3000struct kalloc_result
3001krealloc_ext(
3002 void *kheap_or_kt_view __unsafe_indexable,
3003 void *addr,
3004 vm_size_t old_size,
3005 vm_size_t new_size,
3006 zalloc_flags_t flags,
3007 void *owner)
3008{
3009 vm_size_t old_bucket_size, new_bucket_size, min_size;
3010 kalloc_type_var_view_t kt_view;
3011 kalloc_heap_t kheap;
3012 zone_stats_t zstats = NULL;
3013 struct kalloc_result kr;
3014 vm_offset_t oob_offs = 0;
3015 zone_t old_z, new_z;
3016 uint16_t kt_hash = 0;
3017 zone_id_t zstart;
3018
3019 if (old_size > KFREE_ABSURD_SIZE) {
3020 krealloc_size_invalid_panic(data: addr, size: old_size);
3021 }
3022
3023 if (addr == NULL && new_size == 0) {
3024 return (struct kalloc_result){ };
3025 }
3026
3027 if (kt_is_var_view(kheap_or_kt_view)) {
3028 kt_view = kt_demangle_var_view(ptr: kheap_or_kt_view);
3029 kheap = kalloc_type_get_heap(kt_view, false);
3030 /*
3031 * Similar to kalloc_ext: Use stats from view if present,
3032 * else use stats from kheap.
3033 *
3034 * krealloc_type isn't exposed to kexts, so we don't need to
3035 * handle cross frees and can rely on stats from view or kheap.
3036 */
3037 zstats = kt_view->kt_stats;
3038 kt_hash = KT_GET_HASH(kt_view->kt_flags);
3039 zstart = kt_view->kt_heap_start ?: kheap->kh_zstart;
3040 } else {
3041 kt_view = NULL;
3042 kheap = kheap_or_kt_view;
3043 kt_hash = kheap->kh_type_hash;
3044 zstart = kheap->kh_zstart;
3045 }
3046
3047 if (!zstats) {
3048 zstats = kheap->kh_stats;
3049 }
3050 /*
3051 * Find out the size of the bucket in which the new sized allocation
3052 * would land. If it matches the bucket of the original allocation,
3053 * simply return the same address.
3054 */
3055 if (new_size == 0) {
3056 new_z = ZONE_NULL;
3057 new_bucket_size = new_size = 0;
3058 } else {
3059 zstart = kalloc_use_shared_heap(kheap, zstats, zstart, flags: &flags);
3060 new_z = kalloc_zone_for_size_with_flags(zid: zstart, size: new_size, flags);
3061 new_bucket_size = new_z ? zone_elem_inner_size(zone: new_z) : round_page(x: new_size);
3062 }
3063#if !KASAN_CLASSIC
3064 if (flags & Z_FULLSIZE) {
3065 new_size = new_bucket_size;
3066 }
3067#endif /* !KASAN_CLASSIC */
3068
3069 if (addr == NULL) {
3070 old_z = ZONE_NULL;
3071 old_size = old_bucket_size = 0;
3072 } else if (kheap_size_from_zone(addr, size: old_size, flags)) {
3073 old_bucket_size = zone_element_size(addr, z: &old_z, true, oob_offs: &oob_offs);
3074 if (old_size + oob_offs > old_bucket_size || old_bucket_size == 0) {
3075 kfree_size_confusion_panic(z: old_z, data: addr,
3076 oob_offs, size: old_size, zsize: old_bucket_size);
3077 }
3078 __builtin_assume(old_z != ZONE_NULL);
3079 } else {
3080 old_z = ZONE_NULL;
3081 old_bucket_size = round_page(x: old_size);
3082 }
3083 min_size = MIN(old_size, new_size);
3084
3085 if (old_bucket_size == new_bucket_size && old_z) {
3086 kr.addr = (char *)addr - oob_offs;
3087 kr.size = new_size;
3088#if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
3089 kr.addr = zone_element_pgz_oob_adjust(addr: kr.addr,
3090 req_size: new_size, elem_size: new_bucket_size);
3091 if (kr.addr != addr) {
3092 memmove(dst: kr.addr, src: addr, n: min_size);
3093 bzero(s: (char *)kr.addr + min_size,
3094 n: kr.size - min_size);
3095 }
3096#endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
3097#if KASAN
3098 /*
3099 * On KASAN kernels, treat a reallocation effectively as a new
3100 * allocation and add a sanity check around the existing one
3101 * w.r.t. the old requested size. On KASAN_CLASSIC this doesn't account
3102 * to much extra work, on KASAN_TBI, assign a new tag both to the
3103 * buffer and to the potential free space.
3104 */
3105#if KASAN_CLASSIC
3106 kasan_check_alloc((vm_offset_t)addr, old_bucket_size, old_size);
3107 kasan_alloc((vm_offset_t)addr, new_bucket_size, kr.size,
3108 KASAN_GUARD_SIZE, false, __builtin_frame_address(0));
3109#endif /* KASAN_CLASSIC */
3110#if KASAN_TBI
3111 /*
3112 * Validate the current buffer, then generate a new tag,
3113 * even if the address is stable, it's a "new" allocation.
3114 */
3115 __asan_loadN((vm_offset_t)addr, old_size);
3116 kr.addr = (void *)vm_memtag_assign_tag((vm_offset_t)kr.addr, kr.size);
3117 vm_memtag_set_tag((vm_offset_t)kr.addr, kr.size);
3118 kasan_tbi_retag_unused_space((vm_offset_t)kr.addr, new_bucket_size, kr.size);
3119#endif /* KASAN_TBI */
3120#endif /* KASAN */
3121 goto out_success;
3122 }
3123
3124#if !KASAN
3125 /*
3126 * Fallthrough to krealloc_large() for KASAN,
3127 * because we can't use kasan_check_alloc()
3128 * on kalloc_large() memory.
3129 *
3130 * kmem_realloc_guard() will perform all the validations,
3131 * and re-tagging.
3132 */
3133 if (old_bucket_size == new_bucket_size) {
3134 kr.addr = (char *)addr - oob_offs;
3135 kr.size = new_size;
3136 goto out_success;
3137 }
3138#endif
3139
3140 if (addr && !old_z && new_size && !new_z) {
3141 return krealloc_large(kheap, addr: (vm_offset_t)addr,
3142 old_size, new_size, flags, kt_hash, owner);
3143 }
3144
3145 if (!new_size) {
3146 kr.addr = NULL;
3147 kr.size = 0;
3148 } else if (new_z) {
3149 kr = kalloc_zone(z: new_z, zstats,
3150 flags: flags & ~Z_KALLOC_ARRAY, req_size: new_size);
3151 } else if (old_z || addr == NULL) {
3152 kr = kalloc_large(kheap, req_size: new_size,
3153 flags: flags & ~Z_KALLOC_ARRAY, kt_hash, owner);
3154 }
3155
3156 if (addr && kr.addr) {
3157 __nosan_memcpy(dst: kr.addr, src: addr, sz: min_size);
3158 }
3159
3160 if (addr && (kr.addr || (flags & Z_REALLOCF) || !new_size)) {
3161 if (old_z) {
3162 kfree_zone(kheap_or_kt_view,
3163 data: (char *)addr - oob_offs, size: old_size,
3164 z: old_z, zsize: old_bucket_size);
3165 } else {
3166 kfree_large(addr: (vm_offset_t)addr, size: old_size, flags: KMF_NONE, owner);
3167 }
3168 }
3169
3170 if (__improbable(kr.addr == NULL)) {
3171 return kr;
3172 }
3173
3174out_success:
3175 if ((flags & Z_KALLOC_ARRAY) == 0) {
3176 return kr;
3177 }
3178
3179 if (new_z) {
3180 kr.addr = __kalloc_array_encode_zone(z: new_z,
3181 ptr: kr.addr, size: kr.size);
3182 } else {
3183 kr.addr = (void *)__kalloc_array_encode_vm(addr: (vm_offset_t)kr.addr,
3184 size: kr.size);
3185 }
3186 return kr;
3187}
3188
3189void *
3190krealloc_data_external(
3191 void *ptr,
3192 vm_size_t old_size,
3193 vm_size_t new_size,
3194 zalloc_flags_t flags);
3195void *
3196krealloc_data_external(
3197 void *ptr,
3198 vm_size_t old_size,
3199 vm_size_t new_size,
3200 zalloc_flags_t flags)
3201{
3202 flags = Z_VM_TAG_BT(flags & Z_KPI_MASK, VM_KERN_MEMORY_KALLOC_DATA);
3203 return krealloc_ext(kheap_or_kt_view: KHEAP_DATA_BUFFERS, addr: ptr, old_size, new_size, flags, NULL).addr;
3204}
3205
3206__startup_func
3207static void
3208kheap_init(kalloc_heap_t parent_heap, kalloc_heap_t kheap)
3209{
3210 kheap->kh_zstart = parent_heap->kh_zstart;
3211 kheap->kh_heap_id = parent_heap->kh_heap_id;
3212 kheap->kh_tag = parent_heap->kh_tag;
3213 kheap->kh_stats = zalloc_percpu_permanent_type(struct zone_stats);
3214 zone_view_count += 1;
3215}
3216
3217__startup_func
3218static void
3219kheap_init_data(kalloc_heap_t kheap)
3220{
3221 kheap_init(parent_heap: KHEAP_DATA_BUFFERS, kheap);
3222 kheap->kh_views = KHEAP_DATA_BUFFERS->kh_views;
3223 KHEAP_DATA_BUFFERS->kh_views = kheap;
3224}
3225
3226__startup_func
3227static void
3228kheap_init_var(kalloc_heap_t kheap)
3229{
3230 uint16_t idx;
3231 struct kheap_info *parent_heap;
3232
3233 kheap_init(parent_heap: KHEAP_KT_VAR, kheap);
3234 idx = kmem_get_random16(upper_limit: kt_var_heaps - kt_var_ptr_heaps - 1) +
3235 KT_VAR__FIRST_FLEXIBLE_HEAP;
3236 parent_heap = &kalloc_type_heap_array[idx];
3237 kheap->kh_zstart = parent_heap->kh_zstart;
3238 kheap->kh_type_hash = (uint16_t) kalloc_hash_adjust(
3239 hash: (uint32_t) early_random(), shift: 0);
3240 kheap->kh_views = parent_heap->kh_views;
3241 parent_heap->kh_views = kheap;
3242}
3243
3244__startup_func
3245void
3246kheap_startup_init(kalloc_heap_t kheap)
3247{
3248 switch (kheap->kh_heap_id) {
3249 case KHEAP_ID_DATA_BUFFERS:
3250 kheap_init_data(kheap);
3251 break;
3252 case KHEAP_ID_KT_VAR:
3253 kheap_init_var(kheap);
3254 break;
3255 default:
3256 panic("kalloc_heap_startup_init: invalid KHEAP_ID: %d",
3257 kheap->kh_heap_id);
3258 }
3259}
3260
3261#pragma mark IOKit/libkern helpers
3262
3263#if XNU_PLATFORM_MacOSX
3264
3265void *
3266kern_os_malloc_external(size_t size);
3267void *
3268kern_os_malloc_external(size_t size)
3269{
3270 if (size == 0) {
3271 return NULL;
3272 }
3273
3274 return kheap_alloc(KERN_OS_MALLOC, size,
3275 Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN));
3276}
3277
3278void
3279kern_os_free_external(void *addr);
3280void
3281kern_os_free_external(void *addr)
3282{
3283 kheap_free_addr(KERN_OS_MALLOC, addr);
3284}
3285
3286void *
3287kern_os_realloc_external(void *addr, size_t nsize);
3288void *
3289kern_os_realloc_external(void *addr, size_t nsize)
3290{
3291 zalloc_flags_t flags = Z_VM_TAG_BT(Z_WAITOK_ZERO, VM_KERN_MEMORY_LIBKERN);
3292 vm_size_t osize, oob_offs = 0;
3293
3294 if (addr == NULL) {
3295 return kern_os_malloc_external(size: nsize);
3296 }
3297
3298 osize = zone_element_size(addr, NULL, false, oob_offs: &oob_offs);
3299 if (osize == 0) {
3300 osize = kmem_size_guard(map: kernel_map, addr: (vm_offset_t)addr,
3301 guard: kalloc_guard(VM_KERN_MEMORY_LIBKERN, type_hash: 0, NULL));
3302#if KASAN_CLASSIC
3303 } else {
3304 osize = kasan_user_size((vm_offset_t)addr);
3305#endif
3306 }
3307 return __kheap_realloc(kheap: KERN_OS_MALLOC, addr, old_size: osize - oob_offs, new_size: nsize, flags, NULL);
3308}
3309
3310#endif /* XNU_PLATFORM_MacOSX */
3311
3312void
3313kern_os_zfree(zone_t zone, void *addr, vm_size_t size)
3314{
3315#if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3316#pragma unused(size)
3317 zfree(zone, addr);
3318#else
3319 if (zone_owns(zone, addr)) {
3320 zfree(zone, addr);
3321 } else {
3322 /*
3323 * Third party kexts might not know about the operator new
3324 * and be allocated from the default heap
3325 */
3326 printf("kern_os_zfree: kheap_free called for object from zone %s\n",
3327 zone->z_name);
3328 kheap_free(KHEAP_DEFAULT, addr, size);
3329 }
3330#endif
3331}
3332
3333bool
3334IOMallocType_from_vm(kalloc_type_view_t ktv)
3335{
3336 return kalloc_type_from_vm(kt_flags: ktv->kt_flags);
3337}
3338
3339void
3340kern_os_typed_free(kalloc_type_view_t ktv, void *addr, vm_size_t esize)
3341{
3342#if ZSECURITY_CONFIG(STRICT_IOKIT_FREE)
3343#pragma unused(esize)
3344#else
3345 /*
3346 * For third party kexts that have been compiled with sdk pre macOS 11,
3347 * an allocation of an OSObject that is defined in xnu or first pary
3348 * kexts, by directly calling new will lead to using the default heap
3349 * as it will call OSObject_operator_new_external. If this object
3350 * is freed by xnu, it panics as xnu uses the typed free which
3351 * requires the object to have been allocated in a kalloc.type zone.
3352 * To workaround this issue, detect if the allocation being freed is
3353 * from the default heap and allow freeing to it.
3354 */
3355 zone_id_t zid = zone_id_for_element(addr, esize);
3356 if (__probable(zid < MAX_ZONES)) {
3357 zone_security_flags_t zsflags = zone_security_array[zid];
3358 if (zsflags.z_kheap_id == KHEAP_ID_KT_VAR) {
3359 return kheap_free(KHEAP_DEFAULT, addr, esize);
3360 }
3361 }
3362#endif
3363 kfree_type_impl_external(kt_view: ktv, ptr: addr);
3364}
3365
3366#pragma mark tests
3367#if DEBUG || DEVELOPMENT
3368
3369#include <sys/random.h>
3370
3371/*
3372 * Ensure that the feature is on when the ZSECURITY_CONFIG is present.
3373 *
3374 * Note: Presence of zones with name kalloc.type* is used to
3375 * determine if the feature is on.
3376 */
3377static int
3378kalloc_type_feature_on(void)
3379{
3380 boolean_t zone_found = false;
3381 const char kalloc_type_str[] = "kalloc.type";
3382 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3383 zone_t z = kalloc_type_zarray[i];
3384 while (z != NULL) {
3385 zone_found = true;
3386 if (strncmp(z->z_name, kalloc_type_str,
3387 strlen(kalloc_type_str)) != 0) {
3388 return 0;
3389 }
3390 z = z->z_kt_next;
3391 }
3392 }
3393
3394 if (!zone_found) {
3395 return 0;
3396 }
3397
3398 return 1;
3399}
3400
3401/*
3402 * Ensure that the policy uses the zone budget completely
3403 */
3404static int
3405kalloc_type_test_policy(int64_t in)
3406{
3407 uint16_t zone_budget = (uint16_t) in;
3408 uint16_t max_bucket_freq = 25;
3409 uint16_t freq_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3410 uint16_t freq_total_list[MAX_K_ZONE(kt_zone_cfg)] = {};
3411 uint16_t zones_per_sig[MAX_K_ZONE(kt_zone_cfg)] = {};
3412 uint16_t zones_per_type[MAX_K_ZONE(kt_zone_cfg)] = {};
3413 uint16_t random[MAX_K_ZONE(kt_zone_cfg) * 2];
3414 uint16_t wasted_zone_budget = 0, total_types = 0;
3415 uint16_t n_zones = 0, n_zones_cal = 0;
3416 int ret = 0;
3417
3418 /*
3419 * Need a minimum of 2 zones per size class
3420 */
3421 if (zone_budget < MAX_K_ZONE(kt_zone_cfg) * 2) {
3422 return ret;
3423 }
3424 read_random((void *)&random[0], sizeof(random));
3425 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3426 uint16_t r1 = (random[2 * i] % max_bucket_freq) + 1;
3427 uint16_t r2 = (random[2 * i + 1] % max_bucket_freq) + 1;
3428
3429 freq_list[i] = r1 > r2 ? r2 : r1;
3430 freq_total_list[i] = r1 > r2 ? r1 : r2;
3431 }
3432 wasted_zone_budget = kalloc_type_apply_policy(
3433 freq_list, freq_total_list,
3434 zones_per_sig, zones_per_type, zone_budget);
3435
3436 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3437 total_types += freq_total_list[i];
3438 }
3439
3440 n_zones = kmem_get_random16(total_types);
3441 printf("Dividing %u zones amongst %u types\n", n_zones, total_types);
3442 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3443 uint16_t n_zones_for_type = kalloc_type_zones_for_type(n_zones,
3444 freq_total_list[i], total_types,
3445 (i == MAX_K_ZONE(kt_zone_cfg) - 1) ? true : false);
3446
3447 n_zones_cal += n_zones_for_type;
3448
3449 printf("%u\t%u\n", freq_total_list[i], n_zones_for_type);
3450 }
3451 printf("-----------------------\n%u\t%u\n", total_types,
3452 n_zones_cal);
3453
3454 if ((wasted_zone_budget == 0) && (n_zones == n_zones_cal)) {
3455 ret = 1;
3456 }
3457 return ret;
3458}
3459
3460/*
3461 * Ensure that size of adopters of kalloc_type fit in the zone
3462 * they have been assigned.
3463 */
3464static int
3465kalloc_type_check_size(zone_t z)
3466{
3467 kalloc_type_view_t kt_cur = (kalloc_type_view_t) z->z_views;
3468
3469 while (kt_cur != NULL) {
3470 if (kalloc_type_get_size(kt_cur->kt_size) > z->z_elem_size) {
3471 return 0;
3472 }
3473 kt_cur = (kalloc_type_view_t) kt_cur->kt_zv.zv_next;
3474 }
3475
3476 return 1;
3477}
3478
3479struct test_kt_data {
3480 int a;
3481};
3482
3483static int
3484kalloc_type_test_data_redirect(void)
3485{
3486 struct kalloc_type_view ktv_data = {
3487 .kt_flags = KALLOC_TYPE_ADJUST_FLAGS(KT_SHARED_ACCT, struct test_kt_data),
3488 .kt_signature = KALLOC_TYPE_EMIT_SIG(struct test_kt_data),
3489 };
3490 if (!kalloc_type_is_data(ktv_data.kt_flags)) {
3491 printf("%s: data redirect failed\n", __func__);
3492 return 0;
3493 }
3494 return 1;
3495}
3496
3497static int
3498run_kalloc_type_test(int64_t in, int64_t *out)
3499{
3500 *out = 0;
3501 for (uint16_t i = 0; i < MAX_K_ZONE(kt_zone_cfg); i++) {
3502 zone_t z = kalloc_type_zarray[i];
3503 while (z != NULL) {
3504 if (!kalloc_type_check_size(z)) {
3505 printf("%s: size check failed\n", __func__);
3506 return 0;
3507 }
3508 z = z->z_kt_next;
3509 }
3510 }
3511
3512 if (!kalloc_type_test_policy(in)) {
3513 printf("%s: policy check failed\n", __func__);
3514 return 0;
3515 }
3516
3517 if (!kalloc_type_feature_on()) {
3518 printf("%s: boot-arg is on but feature isn't\n", __func__);
3519 return 0;
3520 }
3521
3522 if (!kalloc_type_test_data_redirect()) {
3523 printf("%s: kalloc_type redirect for all data signature failed\n",
3524 __func__);
3525 return 0;
3526 }
3527
3528 printf("%s: test passed\n", __func__);
3529
3530 *out = 1;
3531 return 0;
3532}
3533SYSCTL_TEST_REGISTER(kalloc_type, run_kalloc_type_test);
3534
3535static vm_size_t
3536test_bucket_size(kalloc_heap_t kheap, vm_size_t size)
3537{
3538 zone_t z = kalloc_zone_for_size(kheap->kh_zstart, size);
3539
3540 return z ? zone_elem_inner_size(z) : round_page(size);
3541}
3542
3543static int
3544run_kalloc_test(int64_t in __unused, int64_t *out)
3545{
3546 *out = 0;
3547 uint64_t *data_ptr;
3548 void *strippedp_old, *strippedp_new;
3549 size_t alloc_size = 0, old_alloc_size = 0;
3550 struct kalloc_result kr = {};
3551
3552 printf("%s: test running\n", __func__);
3553
3554 /*
3555 * Test size 0: alloc, free, realloc
3556 */
3557 data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size, Z_WAITOK | Z_NOFAIL,
3558 NULL).addr;
3559 if (!data_ptr) {
3560 printf("%s: kalloc 0 returned null\n", __func__);
3561 return 0;
3562 }
3563 kheap_free(KHEAP_DATA_BUFFERS, data_ptr, alloc_size);
3564
3565 data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size, Z_WAITOK | Z_NOFAIL,
3566 NULL).addr;
3567 alloc_size = sizeof(uint64_t) + 1;
3568 data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, kr.addr, old_alloc_size,
3569 alloc_size, Z_WAITOK | Z_NOFAIL, NULL).addr;
3570 if (!data_ptr) {
3571 printf("%s: krealloc -> old size 0 failed\n", __func__);
3572 return 0;
3573 }
3574 *data_ptr = 0;
3575
3576 /*
3577 * Test krealloc: same sizeclass, different size classes, 2pgs,
3578 * VM (with owner)
3579 */
3580 old_alloc_size = alloc_size;
3581 alloc_size++;
3582 kr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, old_alloc_size, alloc_size,
3583 Z_WAITOK | Z_NOFAIL, NULL);
3584
3585 strippedp_old = (void *)vm_memtag_canonicalize_address((vm_offset_t)data_ptr);
3586 strippedp_new = (void *)vm_memtag_canonicalize_address((vm_offset_t)kr.addr);
3587
3588 if (!kr.addr || (strippedp_old != strippedp_new) ||
3589 (test_bucket_size(KHEAP_DATA_BUFFERS, kr.size) !=
3590 test_bucket_size(KHEAP_DATA_BUFFERS, old_alloc_size))) {
3591 printf("%s: krealloc -> same size class failed\n", __func__);
3592 return 0;
3593 }
3594 data_ptr = kr.addr;
3595 *data_ptr = 0;
3596
3597 old_alloc_size = alloc_size;
3598 alloc_size *= 2;
3599 kr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, old_alloc_size, alloc_size,
3600 Z_WAITOK | Z_NOFAIL, NULL);
3601
3602 strippedp_old = (void *)vm_memtag_canonicalize_address((vm_offset_t)data_ptr);
3603 strippedp_new = (void *)vm_memtag_canonicalize_address((vm_offset_t)kr.addr);
3604
3605 if (!kr.addr || (strippedp_old == strippedp_new) ||
3606 (test_bucket_size(KHEAP_DATA_BUFFERS, kr.size) ==
3607 test_bucket_size(KHEAP_DATA_BUFFERS, old_alloc_size))) {
3608 printf("%s: krealloc -> different size class failed\n", __func__);
3609 return 0;
3610 }
3611 data_ptr = kr.addr;
3612 *data_ptr = 0;
3613
3614 kheap_free(KHEAP_DATA_BUFFERS, kr.addr, alloc_size);
3615
3616 alloc_size = 3544;
3617 data_ptr = kalloc_ext(KHEAP_DATA_BUFFERS, alloc_size,
3618 Z_WAITOK | Z_FULLSIZE, &data_ptr).addr;
3619 if (!data_ptr) {
3620 printf("%s: kalloc 3544 with owner and Z_FULLSIZE returned not null\n",
3621 __func__);
3622 return 0;
3623 }
3624 *data_ptr = 0;
3625
3626 data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, alloc_size,
3627 PAGE_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3628 if (!data_ptr) {
3629 printf("%s: krealloc -> 2pgs returned not null\n", __func__);
3630 return 0;
3631 }
3632 *data_ptr = 0;
3633
3634 data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, PAGE_SIZE * 2,
3635 KHEAP_MAX_SIZE * 2, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3636 if (!data_ptr) {
3637 printf("%s: krealloc -> VM1 returned not null\n", __func__);
3638 return 0;
3639 }
3640 *data_ptr = 0;
3641
3642 data_ptr = krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, KHEAP_MAX_SIZE * 2,
3643 KHEAP_MAX_SIZE * 4, Z_REALLOCF | Z_WAITOK, &data_ptr).addr;
3644 *data_ptr = 0;
3645 if (!data_ptr) {
3646 printf("%s: krealloc -> VM2 returned not null\n", __func__);
3647 return 0;
3648 }
3649
3650 krealloc_ext(KHEAP_DATA_BUFFERS, data_ptr, KHEAP_MAX_SIZE * 4,
3651 0, Z_REALLOCF | Z_WAITOK, &data_ptr);
3652
3653 printf("%s: test passed\n", __func__);
3654 *out = 1;
3655 return 0;
3656}
3657SYSCTL_TEST_REGISTER(kalloc, run_kalloc_test);
3658
3659#endif
3660