1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59#ifndef _KERN_ZALLOC_INTERNAL_H_
60#define _KERN_ZALLOC_INTERNAL_H_
61
62#include <kern/zalloc.h>
63#include <kern/locks.h>
64#include <kern/simple_lock.h>
65
66#include <os/atomic_private.h>
67#include <sys/queue.h>
68#include <vm/vm_map_internal.h>
69
70#if KASAN
71#include <san/kasan.h>
72#include <kern/spl.h>
73#endif /* !KASAN */
74
75/*
76 * Disable zalloc zero validation under kasan as it is
77 * double-duty with what kasan already does.
78 */
79#if KASAN
80#define ZALLOC_ENABLE_ZERO_CHECK 0
81#else
82#define ZALLOC_ENABLE_ZERO_CHECK 1
83#endif
84
85#if KASAN
86#define ZALLOC_ENABLE_LOGGING 0
87#elif DEBUG || DEVELOPMENT
88#define ZALLOC_ENABLE_LOGGING 1
89#else
90#define ZALLOC_ENABLE_LOGGING 0
91#endif
92
93/*!
94 * @file <kern/zalloc_internal.h>
95 *
96 * @abstract
97 * Exposes some guts of zalloc to interact with the VM, debugging, copyio and
98 * kalloc subsystems.
99 */
100
101__BEGIN_DECLS
102
103#pragma GCC visibility push(hidden)
104
105/*
106 * A zone is a collection of fixed size blocks for which there
107 * is fast allocation/deallocation access. Kernel routines can
108 * use zones to manage data structures dynamically, creating a zone
109 * for each type of data structure to be managed.
110 *
111 */
112
113/*!
114 * @typedef zone_pva_t
115 *
116 * @brief
117 * Type used to point to a page virtual address in the zone allocator.
118 *
119 * @description
120 * - Valid pages have the top bit set.
121 * - 0 represents the "NULL" page
122 * - non 0 values with the top bit cleared represent queue heads,
123 * indexed from the beginning of the __DATA section of the kernel.
124 * (see zone_pageq_base).
125 */
126typedef struct zone_packed_virtual_address {
127 uint32_t packed_address;
128} zone_pva_t;
129
130/*!
131 * @struct zone_stats
132 *
133 * @abstract
134 * Per-cpu structure used for basic zone stats.
135 *
136 * @discussion
137 * The values aren't scaled for per-cpu zones.
138 */
139struct zone_stats {
140 uint64_t zs_mem_allocated;
141 uint64_t zs_mem_freed;
142 uint64_t zs_alloc_fail;
143 uint32_t zs_alloc_rr; /* allocation rr bias */
144 uint32_t _Atomic zs_alloc_not_shared;
145};
146
147typedef struct zone_magazine *zone_magazine_t;
148
149/*!
150 * @struct zone_depot
151 *
152 * @abstract
153 * Holds a list of full and empty magazines.
154 *
155 * @discussion
156 * The data structure is a "STAILQ" and an "SLIST" combined with counters
157 * to know their lengths in O(1). Here is a graphical example:
158 *
159 * zd_full = 3
160 * zd_empty = 1
161 * ╭─── zd_head
162 * │ ╭─ zd_tail
163 * │ ╰────────────────────────────────────╮
164 * │ ╭───────╮ ╭───────╮ ╭───────╮ v ╭───────╮
165 * ╰───>│███████┼──>│███████┼──>│███████┼──>│ ┼─> X
166 * ╰───────╯ ╰───────╯ ╰───────╯ ╰───────╯
167 */
168struct zone_depot {
169 uint32_t zd_full;
170 uint32_t zd_empty;
171 zone_magazine_t zd_head;
172 zone_magazine_t *zd_tail;
173};
174
175/* see https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/ */
176#define Z_MAGIC_QUO(s) (((1ull << 32) - 1) / (uint64_t)(s) + 1)
177#define Z_MAGIC_ALIGNED(s) (~0u / (uint32_t)(s) + 1)
178
179/*
180 * Returns (offs / size) if offs is small enough
181 * and magic = Z_MAGIC_QUO(size)
182 */
183static inline uint32_t
184Z_FAST_QUO(uint64_t offs, uint64_t magic)
185{
186 return (offs * magic) >> 32;
187}
188
189/*
190 * Returns (offs % size) if offs is small enough
191 * and magic = Z_MAGIC_QUO(size)
192 */
193static inline uint32_t
194Z_FAST_MOD(uint64_t offs, uint64_t magic, uint64_t size)
195{
196 uint32_t lowbits = (uint32_t)(offs * magic);
197
198 return (lowbits * size) >> 32;
199}
200
201/*
202 * Returns whether (offs % size) == 0 if offs is small enough
203 * and magic = Z_MAGIC_ALIGNED(size)
204 */
205static inline bool
206Z_FAST_ALIGNED(uint64_t offs, uint32_t magic)
207{
208 return (uint32_t)(offs * magic) < magic;
209}
210
211struct zone_size_params {
212 uint32_t z_align_magic; /* magic to use with Z_FAST_ALIGNED() */
213 uint32_t z_elem_size; /* size of an element */
214};
215
216struct zone_expand {
217 struct zone_expand *ze_next;
218 thread_t ze_thread;
219 bool ze_pg_wait;
220 bool ze_vm_priv;
221 bool ze_clear_priv;
222};
223
224#define Z_WMA_UNIT (1u << 8)
225#define Z_WMA_MIX(base, e) ((3 * (base) + (e) * Z_WMA_UNIT) / 4)
226
227struct zone {
228 /*
229 * Readonly / rarely written fields
230 */
231
232 /*
233 * The first 4 fields match a zone_view.
234 *
235 * z_self points back to the zone when the zone is initialized,
236 * or is NULL else.
237 */
238 struct zone *z_self;
239 zone_stats_t z_stats;
240 const char *z_name;
241 struct zone_view *z_views;
242 struct zone_expand *z_expander;
243
244 uint64_t z_quo_magic;
245 uint32_t z_align_magic;
246 uint16_t z_elem_size;
247 uint16_t z_elem_offs;
248 uint16_t z_chunk_pages;
249 uint16_t z_chunk_elems;
250
251 uint32_t /* 32 bits */
252 /*
253 * Lifecycle state (Mutable after creation)
254 */
255 z_destroyed :1, /* zone is (being) destroyed */
256 z_async_refilling :1, /* asynchronous allocation pending? */
257 z_depot_cleanup :1, /* per cpu depots need cleaning */
258 z_expanding_wait :1, /* is thread waiting for expansion? */
259 z_exhausted_wait :1, /* are threads waiting for exhaustion end */
260 z_exhausts :1, /* whether the zone exhausts by design */
261
262 /*
263 * Behavior configuration bits
264 */
265 z_percpu :1, /* the zone is percpu */
266 z_smr :1, /* the zone uses SMR */
267 z_permanent :1, /* the zone allocations are permanent */
268 z_nocaching :1, /* disallow zone caching for this zone */
269 collectable :1, /* garbage collect empty pages */
270 no_callout :1,
271 z_destructible :1, /* zone can be zdestroy()ed */
272
273 _reserved :6,
274
275 /*
276 * Debugging features
277 */
278 z_pgz_tracked :1, /* this zone is tracked by pgzalloc */
279 z_pgz_use_guards :1, /* this zone uses guards with PGZ */
280 z_kasan_fakestacks :1,
281 z_kasan_quarantine :1, /* whether to use the kasan quarantine */
282 z_tags_sizeclass :6, /* idx into zone_tags_sizeclasses to associate
283 * sizeclass for a particualr kalloc tag */
284 z_uses_tags :1,
285 z_log_on :1, /* zone logging was enabled by boot-arg */
286 z_tbi_tag :1; /* Zone supports tbi tagging */
287
288 uint8_t z_cacheline1[0] __attribute__((aligned(64)));
289
290 /*
291 * Zone caching / recirculation cacheline
292 *
293 * z_recirc* fields are protected by the recirculation lock.
294 *
295 * z_recirc_cont_wma:
296 * weighted moving average of the number of contentions per second,
297 * in Z_WMA_UNIT units (fixed point decimal).
298 *
299 * z_recirc_cont_cur:
300 * count of recorded contentions that will be fused
301 * in z_recirc_cont_wma at the next period.
302 *
303 * Note: if caching is disabled,
304 * this field is used under the zone lock.
305 *
306 * z_elems_free_{min,wma} (overloaded on z_recirc_empty*):
307 * tracks the history of the minimum values of z_elems_free over time
308 * with "min" being the minimum it hit for the current period,
309 * and "wma" the weighted moving average of those value.
310 *
311 * This field is used if z_pcpu_cache is NULL,
312 * otherwise it aliases with z_recirc_empty_{min,wma}
313 *
314 * z_recirc_{full,empty}_{min,wma}:
315 * tracks the history of the the minimum number of full/empty
316 * magazines in the depot over time, with "min" being the minimum
317 * it hit for the current period, and "wma" the weighted moving
318 * average of those value.
319 */
320 struct zone_cache *__zpercpu z_pcpu_cache;
321 struct zone_depot z_recirc;
322
323 hw_lck_ticket_t z_recirc_lock;
324 uint32_t z_recirc_full_min;
325 uint32_t z_recirc_full_wma;
326 union {
327 uint32_t z_recirc_empty_min;
328 uint32_t z_elems_free_min;
329 };
330 union {
331 uint32_t z_recirc_empty_wma;
332 uint32_t z_elems_free_wma;
333 };
334 uint32_t z_recirc_cont_cur;
335 uint32_t z_recirc_cont_wma;
336
337 uint16_t z_depot_size;
338 uint16_t z_depot_limit;
339
340 uint8_t z_cacheline2[0] __attribute__((aligned(64)));
341
342 /*
343 * often mutated fields
344 */
345
346 hw_lck_ticket_t z_lock;
347
348 /*
349 * Page accounting (wired / VA)
350 *
351 * Those numbers are unscaled for z_percpu zones
352 * (zone_scale_for_percpu() needs to be used to find the true value).
353 */
354 uint32_t z_wired_max; /* how large can this zone grow */
355 uint32_t z_wired_hwm; /* z_wired_cur high watermark */
356 uint32_t z_wired_cur; /* number of pages used by this zone */
357 uint32_t z_wired_empty; /* pages collectable by GC */
358 uint32_t z_va_cur; /* amount of VA used by this zone */
359
360 /*
361 * list of metadata structs, which maintain per-page free element lists
362 */
363 zone_pva_t z_pageq_empty; /* populated, completely empty pages */
364 zone_pva_t z_pageq_partial;/* populated, partially filled pages */
365 zone_pva_t z_pageq_full; /* populated, completely full pages */
366 zone_pva_t z_pageq_va; /* non-populated VA pages */
367
368 /*
369 * Zone statistics
370 *
371 * z_elems_avail:
372 * number of elements in the zone (at all).
373 */
374 uint32_t z_elems_free; /* Number of free elements */
375 uint32_t z_elems_avail; /* Number of elements available */
376 uint32_t z_elems_rsv;
377 uint32_t z_array_size_class;
378
379 struct zone *z_kt_next;
380
381 uint8_t z_cacheline3[0] __attribute__((aligned(64)));
382
383#if KASAN_CLASSIC
384 uint16_t z_kasan_redzone;
385 spl_t z_kasan_spl;
386#endif
387
388#if ZONE_ENABLE_LOGGING || CONFIG_ZLEAKS || KASAN_TBI
389 /*
390 * the allocation logs are used when:
391 *
392 * - zlog<n>= boot-args are used (and then z_log_on is set)
393 *
394 * - the leak detection was triggered for the zone.
395 * In that case, the log can't ever be freed,
396 * but it can be enabled/disabled dynamically.
397 */
398 struct btlog *z_btlog;
399 struct btlog *z_btlog_disabled;
400#endif
401} __attribute__((aligned((64))));
402
403/*!
404 * @typedef zone_security_flags_t
405 *
406 * @brief
407 * Type used to store the immutable security properties of a zone.
408 *
409 * @description
410 * These properties influence the security nature of a zone and can't be
411 * modified after lockdown.
412 */
413typedef struct zone_security_flags {
414 uint16_t
415 /*
416 * Security sensitive configuration bits
417 */
418 z_submap_idx :8, /* a Z_SUBMAP_IDX_* value */
419 z_kheap_id :2, /* zone_kheap_id_t when part of a kalloc heap */
420 z_kalloc_type :1, /* zones that does types based seggregation */
421 z_lifo :1, /* depot and recirculation layer are LIFO */
422 z_pgz_use_guards :1, /* this zone uses guards with PGZ */
423 z_submap_from_end :1, /* allocate from the left or the right ? */
424 z_noencrypt :1, /* do not encrypt pages when hibernating */
425 z_unused :1;
426 /*
427 * Signature equivalance zone
428 */
429 zone_id_t z_sig_eq;
430} zone_security_flags_t;
431
432
433/*
434 * Zsecurity config to enable strict free of iokit objects to zone
435 * or heap they were allocated from.
436 *
437 * Turn ZSECURITY_OPTIONS_STRICT_IOKIT_FREE off on x86 so as not
438 * not break third party kexts that haven't yet been recompiled
439 * to use the new iokit macros.
440 */
441#if XNU_PLATFORM_MacOSX && __x86_64__
442# define ZSECURITY_CONFIG_STRICT_IOKIT_FREE OFF
443#else
444# define ZSECURITY_CONFIG_STRICT_IOKIT_FREE ON
445#endif
446
447/*
448 * Zsecurity config to enable the read-only allocator
449 */
450#if KASAN_CLASSIC
451# define ZSECURITY_CONFIG_READ_ONLY OFF
452#else
453# define ZSECURITY_CONFIG_READ_ONLY ON
454#endif
455
456/*
457 * Zsecurity config to enable making heap feng-shui
458 * less reliable.
459 */
460#if KASAN_CLASSIC
461# define ZSECURITY_CONFIG_SAD_FENG_SHUI OFF
462# define ZSECURITY_CONFIG_GENERAL_SUBMAPS 1
463#else
464# define ZSECURITY_CONFIG_SAD_FENG_SHUI ON
465# define ZSECURITY_CONFIG_GENERAL_SUBMAPS 4
466#endif
467
468/*
469 * Zsecurity config to enable adjusting of elements
470 * with PGZ-OOB to right-align them in their space.
471 */
472#if KASAN || defined(__x86_64__) || CONFIG_KERNEL_TAGGING
473# define ZSECURITY_CONFIG_PGZ_OOB_ADJUST OFF
474#else
475# define ZSECURITY_CONFIG_PGZ_OOB_ADJUST ON
476#endif
477
478/*
479 * Zsecurity config to enable kalloc type segregation
480 */
481#if XNU_TARGET_OS_WATCH || KASAN_CLASSIC
482# define ZSECURITY_CONFIG_KT_BUDGET 120
483# define ZSECURITY_CONFIG_KT_VAR_BUDGET 6
484#else
485# define ZSECURITY_CONFIG_KT_BUDGET 260
486# define ZSECURITY_CONFIG_KT_VAR_BUDGET 6
487#endif
488
489
490__options_decl(kalloc_type_options_t, uint64_t, {
491 /*
492 * kalloc type option to switch default accounting to private.
493 */
494 KT_OPTIONS_ACCT = 0x00000001,
495 /*
496 * kalloc type option to print additional stats regarding zone
497 * budget distribution and signatures.
498 */
499 KT_OPTIONS_DEBUG = 0x00000002,
500 /*
501 * kalloc type option to allow loose freeing between heaps
502 */
503 KT_OPTIONS_LOOSE_FREE = 0x00000004,
504});
505
506__enum_decl(kt_var_heap_id_t, uint32_t, {
507 /*
508 * Fake "data" heap used to link views of data-only allocation that
509 * have been redirected to KHEAP_DATA_BUFFERS
510 */
511 KT_VAR_DATA_HEAP,
512 /*
513 * Heaps for pointer arrays
514 */
515 KT_VAR_PTR_HEAP0,
516 KT_VAR_PTR_HEAP1,
517 /*
518 * Indicating first additional heap added
519 */
520 KT_VAR__FIRST_FLEXIBLE_HEAP,
521});
522
523/*
524 * Zone submap indices
525 *
526 * Z_SUBMAP_IDX_VM
527 * this map has the special property that its allocations
528 * can be done without ever locking the submap, and doesn't use
529 * VM entries in the map (which limits certain VM map operations on it).
530 *
531 * On ILP32 a single zone lives here (the vm_map_entry_reserved_zone).
532 *
533 * On LP64 it is also used to restrict VM allocations on LP64 lower
534 * in the kernel VA space, for pointer packing purposes.
535 *
536 * Z_SUBMAP_IDX_GENERAL_{0,1,2,3}
537 * used for unrestricted allocations
538 *
539 * Z_SUBMAP_IDX_DATA
540 * used to sequester bags of bytes from all other allocations and allow VA reuse
541 * within the map
542 *
543 * Z_SUBMAP_IDX_READ_ONLY
544 * used for the read-only allocator
545 */
546__enum_decl(zone_submap_idx_t, uint32_t, {
547 Z_SUBMAP_IDX_VM,
548 Z_SUBMAP_IDX_READ_ONLY,
549 Z_SUBMAP_IDX_GENERAL_0,
550#if ZSECURITY_CONFIG(SAD_FENG_SHUI)
551 Z_SUBMAP_IDX_GENERAL_1,
552 Z_SUBMAP_IDX_GENERAL_2,
553 Z_SUBMAP_IDX_GENERAL_3,
554#endif /* ZSECURITY_CONFIG(SAD_FENG_SHUI) */
555 Z_SUBMAP_IDX_DATA,
556
557 Z_SUBMAP_IDX_COUNT,
558});
559
560#define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
561
562/*
563 * Variable kalloc_type heap config
564 */
565struct kheap_info {
566 zone_id_t kh_zstart;
567 kalloc_heap_t kh_views;
568 kalloc_type_var_view_t kt_views;
569};
570typedef union kalloc_type_views {
571 struct kalloc_type_view *ktv_fixed;
572 struct kalloc_type_var_view *ktv_var;
573} kalloc_type_views_t;
574
575#define KT_VAR_MAX_HEAPS 8
576#define MAX_ZONES 690
577extern struct kheap_info kalloc_type_heap_array[KT_VAR_MAX_HEAPS];
578extern zone_id_t _Atomic num_zones;
579extern uint32_t zone_view_count;
580extern struct zone zone_array[MAX_ZONES];
581extern struct zone_size_params zone_ro_size_params[ZONE_ID__LAST_RO + 1];
582extern zone_security_flags_t zone_security_array[];
583extern const char * const kalloc_heap_names[KHEAP_ID_COUNT];
584extern mach_memory_info_t *panic_kext_memory_info;
585extern vm_size_t panic_kext_memory_size;
586extern vm_offset_t panic_fault_address;
587extern uint16_t _zc_mag_size;
588
589#define zone_index_foreach(i) \
590 for (zone_id_t i = 1, num_zones_##i = os_atomic_load(&num_zones, acquire); \
591 i < num_zones_##i; i++)
592
593#define zone_foreach(z) \
594 for (zone_t z = &zone_array[1], \
595 last_zone_##z = &zone_array[os_atomic_load(&num_zones, acquire)]; \
596 z < last_zone_##z; z++)
597
598__abortlike
599extern void zone_invalid_panic(zone_t zone);
600
601__pure2
602static inline zone_id_t
603zone_index(zone_t z)
604{
605 unsigned long delta;
606 uint64_t quo;
607
608 delta = (unsigned long)z - (unsigned long)zone_array;
609 if (delta >= MAX_ZONES * sizeof(*z)) {
610 zone_invalid_panic(zone: z);
611 }
612 quo = Z_FAST_QUO(offs: delta, Z_MAGIC_QUO(sizeof(*z)));
613 __builtin_assume(quo < MAX_ZONES);
614 return (zone_id_t)quo;
615}
616
617__pure2
618static inline bool
619zone_is_ro(zone_t zone)
620{
621 return zone >= &zone_array[ZONE_ID__FIRST_RO] &&
622 zone <= &zone_array[ZONE_ID__LAST_RO];
623}
624
625static inline bool
626zone_addr_size_crosses_page(mach_vm_address_t addr, mach_vm_size_t size)
627{
628 return atop(addr ^ (addr + size - 1)) != 0;
629}
630
631__pure2
632static inline uint16_t
633zone_elem_redzone(zone_t zone)
634{
635#if KASAN_CLASSIC
636 return zone->z_kasan_redzone;
637#else
638 (void)zone;
639 return 0;
640#endif
641}
642
643__pure2
644static inline uint16_t
645zone_elem_inner_offs(zone_t zone)
646{
647 return zone->z_elem_offs;
648}
649
650__pure2
651static inline uint16_t
652zone_elem_outer_offs(zone_t zone)
653{
654 return zone_elem_inner_offs(zone) - zone_elem_redzone(zone);
655}
656
657__pure2
658static inline vm_offset_t
659zone_elem_inner_size(zone_t zone)
660{
661 return zone->z_elem_size;
662}
663
664__pure2
665static inline vm_offset_t
666zone_elem_outer_size(zone_t zone)
667{
668 return zone_elem_inner_size(zone) + zone_elem_redzone(zone);
669}
670
671__pure2
672static inline zone_security_flags_t
673zone_security_config(zone_t z)
674{
675 zone_id_t zid = zone_index(z);
676 return zone_security_array[zid];
677}
678
679static inline uint32_t
680zone_count_free(zone_t zone)
681{
682 return zone->z_elems_free + zone->z_recirc.zd_full * _zc_mag_size;
683}
684
685static inline uint32_t
686zone_count_allocated(zone_t zone)
687{
688 return zone->z_elems_avail - zone_count_free(zone);
689}
690
691static inline vm_size_t
692zone_scale_for_percpu(zone_t zone, vm_size_t size)
693{
694 if (zone->z_percpu) {
695 size *= zpercpu_count();
696 }
697 return size;
698}
699
700static inline vm_size_t
701zone_size_wired(zone_t zone)
702{
703 /*
704 * this either require the zone lock,
705 * or to be used for statistics purposes only.
706 */
707 vm_size_t size = ptoa(os_atomic_load(&zone->z_wired_cur, relaxed));
708 return zone_scale_for_percpu(zone, size);
709}
710
711static inline vm_size_t
712zone_size_free(zone_t zone)
713{
714 return zone_scale_for_percpu(zone,
715 size: zone_elem_inner_size(zone) * zone_count_free(zone));
716}
717
718/* Under KASAN builds, this also accounts for quarantined elements. */
719static inline vm_size_t
720zone_size_allocated(zone_t zone)
721{
722 return zone_scale_for_percpu(zone,
723 size: zone_elem_inner_size(zone) * zone_count_allocated(zone));
724}
725
726static inline vm_size_t
727zone_size_wasted(zone_t zone)
728{
729 return zone_size_wired(zone) - zone_scale_for_percpu(zone,
730 size: zone_elem_outer_size(zone) * zone->z_elems_avail);
731}
732
733__pure2
734static inline bool
735zone_exhaustible(zone_t zone)
736{
737 return zone->z_wired_max != ~0u;
738}
739
740__pure2
741static inline bool
742zone_exhausted(zone_t zone)
743{
744 return zone->z_wired_cur >= zone->z_wired_max;
745}
746
747/*
748 * Set and get the signature equivalance for the given zone
749 */
750extern void zone_set_sig_eq(zone_t zone, zone_id_t sig_eq);
751extern zone_id_t zone_get_sig_eq(zone_t zone);
752/*
753 * Return the accumulated allocated memory on the given zone stats
754 */
755static inline vm_size_t
756zone_stats_get_mem_allocated(zone_stats_t stats)
757{
758 return stats->zs_mem_allocated;
759}
760
761/*
762 * For sysctl kern.zones_collectable_bytes used by memory_maintenance to check if a
763 * userspace reboot is needed. The only other way to query for this information
764 * is via mach_memory_info() which is unavailable on release kernels.
765 */
766extern uint64_t get_zones_collectable_bytes(void);
767
768/*!
769 * @enum zone_gc_level_t
770 *
771 * @const ZONE_GC_TRIM
772 * Request a trimming GC: it will trim allocations in excess
773 * of the working set size estimate only.
774 *
775 * @const ZONE_GC_DRAIN
776 * Request a draining GC: this is an aggressive mode that will
777 * cause all caches to be drained and all free pages returned to the system.
778 *
779 * @const ZONE_GC_JETSAM
780 * Request to consider a jetsam, and then fallback to @c ZONE_GC_TRIM or
781 * @c ZONE_GC_DRAIN depending on the state of the zone map.
782 * To avoid deadlocks, only @c vm_pageout_garbage_collect() should ever
783 * request a @c ZONE_GC_JETSAM level.
784 */
785__enum_closed_decl(zone_gc_level_t, uint32_t, {
786 ZONE_GC_TRIM,
787 ZONE_GC_DRAIN,
788 ZONE_GC_JETSAM,
789});
790
791/*!
792 * @function zone_gc
793 *
794 * @brief
795 * Reduces memory used by zones by trimming caches and freelists.
796 *
797 * @discussion
798 * @c zone_gc() is called:
799 * - by the pageout daemon when the system needs more free pages.
800 * - by the VM when contiguous page allocation requests get stuck
801 * (see vm_page_find_contiguous()).
802 *
803 * @param level The zone GC level requested.
804 */
805extern void zone_gc(zone_gc_level_t level);
806
807extern void zone_gc_trim(void);
808extern void zone_gc_drain(void);
809
810#define ZONE_WSS_UPDATE_PERIOD 15
811/*!
812 * @function compute_zone_working_set_size
813 *
814 * @brief
815 * Recomputes the working set size for every zone
816 *
817 * @discussion
818 * This runs about every @c ZONE_WSS_UPDATE_PERIOD seconds (10),
819 * computing an exponential moving average with a weight of 75%,
820 * so that the history of the last minute is the dominating factor.
821 */
822extern void compute_zone_working_set_size(void *);
823
824/* Debug logging for zone-map-exhaustion jetsams. */
825extern void get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
826extern void get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size);
827
828/* Bootstrap zone module (create zone zone) */
829extern void zone_bootstrap(void);
830
831/* Force-enable caching on a zone, generally unsafe to call directly */
832extern void zone_enable_caching(zone_t zone);
833
834/*!
835 * @function zone_early_mem_init
836 *
837 * @brief
838 * Steal memory from pmap (prior to initialization of zalloc)
839 * for the special vm zones that allow bootstrap memory and store
840 * the range so as to facilitate range checking in zfree.
841 *
842 * @param size the size to steal (must be a page multiple)
843 */
844__startup_func
845extern vm_offset_t zone_early_mem_init(
846 vm_size_t size);
847
848/*!
849 * @function zone_get_early_alloc_size
850 *
851 * @brief
852 * Compute the correct size (greater than @c ptoa(min_pages)) that is a multiple
853 * of the allocation granule for the zone with the given creation flags and
854 * element size.
855 */
856__startup_func
857extern vm_size_t zone_get_early_alloc_size(
858 const char *name __unused,
859 vm_size_t elem_size,
860 zone_create_flags_t flags,
861 vm_size_t min_elems);
862
863/*!
864 * @function zone_cram_early
865 *
866 * @brief
867 * Cram memory allocated with @c zone_early_mem_init() into a zone.
868 *
869 * @param zone The zone to cram memory into.
870 * @param newmem The base address for the memory to cram.
871 * @param size The size of the memory to cram into the zone.
872 */
873__startup_func
874extern void zone_cram_early(
875 zone_t zone,
876 vm_offset_t newmem,
877 vm_size_t size);
878
879extern bool zone_maps_owned(
880 vm_address_t addr,
881 vm_size_t size);
882
883#if KASAN_LIGHT
884extern bool kasan_zone_maps_owned(
885 vm_address_t addr,
886 vm_size_t size);
887#endif /* KASAN_LIGHT */
888
889extern void zone_map_sizes(
890 vm_map_size_t *psize,
891 vm_map_size_t *pfree,
892 vm_map_size_t *plargest_free);
893
894extern bool
895zone_map_nearing_exhaustion(void);
896
897static inline vm_tag_t
898zalloc_flags_get_tag(zalloc_flags_t flags)
899{
900 return (vm_tag_t)((flags & Z_VM_TAG_MASK) >> Z_VM_TAG_SHIFT);
901}
902
903extern struct kalloc_result zalloc_ext(
904 zone_t zone,
905 zone_stats_t zstats,
906 zalloc_flags_t flags);
907
908#if KASAN
909#define ZFREE_PACK_SIZE(esize, usize) (((uint64_t)(usize) << 32) | (esize))
910#define ZFREE_ELEM_SIZE(combined) ((uint32_t)(combined))
911#define ZFREE_USER_SIZE(combined) ((combined) >> 32)
912#else
913#define ZFREE_PACK_SIZE(esize, usize) (esize)
914#define ZFREE_ELEM_SIZE(combined) (combined)
915#endif
916
917extern void zfree_ext(
918 zone_t zone,
919 zone_stats_t zstats,
920 void *addr,
921 uint64_t combined_size);
922
923extern zone_id_t zone_id_for_element(
924 void *addr,
925 vm_size_t esize);
926
927#if ZSECURITY_CONFIG(PGZ_OOB_ADJUST)
928extern void *zone_element_pgz_oob_adjust(
929 void *addr,
930 vm_size_t req_size,
931 vm_size_t elem_size);
932#endif /* !ZSECURITY_CONFIG(PGZ_OOB_ADJUST) */
933
934extern void zone_element_bounds_check(
935 vm_address_t addr,
936 vm_size_t len);
937
938extern vm_size_t zone_element_size(
939 void *addr,
940 zone_t *z,
941 bool clear_oob,
942 vm_offset_t *oob_offs);
943
944/*!
945 * @function zone_spans_ro_va
946 *
947 * @abstract
948 * This function is used to check whether the specified address range
949 * spans through the read-only zone range.
950 *
951 * @discussion
952 * This only checks for the range specified within ZONE_ADDR_READONLY.
953 * The parameters addr_start and addr_end are stripped off of PAC bits
954 * before the check is made.
955 */
956extern bool zone_spans_ro_va(
957 vm_offset_t addr_start,
958 vm_offset_t addr_end);
959
960/*!
961 * @function __zalloc_ro_mut_atomic
962 *
963 * @abstract
964 * This function is called from the pmap to perform the specified atomic
965 * operation on memory from the read-only allocator.
966 *
967 * @discussion
968 * This function is for internal use only and should not be called directly.
969 */
970static inline uint64_t
971__zalloc_ro_mut_atomic(vm_offset_t dst, zro_atomic_op_t op, uint64_t value)
972{
973#define __ZALLOC_RO_MUT_OP(op, op2) \
974 case ZRO_ATOMIC_##op##_8: \
975 return os_atomic_##op2((uint8_t *)dst, (uint8_t)value, seq_cst); \
976 case ZRO_ATOMIC_##op##_16: \
977 return os_atomic_##op2((uint16_t *)dst, (uint16_t)value, seq_cst); \
978 case ZRO_ATOMIC_##op##_32: \
979 return os_atomic_##op2((uint32_t *)dst, (uint32_t)value, seq_cst); \
980 case ZRO_ATOMIC_##op##_64: \
981 return os_atomic_##op2((uint64_t *)dst, (uint64_t)value, seq_cst)
982
983 switch (op) {
984 __ZALLOC_RO_MUT_OP(OR, or_orig);
985 __ZALLOC_RO_MUT_OP(XOR, xor_orig);
986 __ZALLOC_RO_MUT_OP(AND, and_orig);
987 __ZALLOC_RO_MUT_OP(ADD, add_orig);
988 __ZALLOC_RO_MUT_OP(XCHG, xchg);
989 default:
990 panic("%s: Invalid atomic operation: %d", __func__, op);
991 }
992
993#undef __ZALLOC_RO_MUT_OP
994}
995
996/*!
997 * @function zone_owns
998 *
999 * @abstract
1000 * This function is a soft version of zone_require that checks if a given
1001 * pointer belongs to the specified zone and should not be used outside
1002 * allocator code.
1003 *
1004 * @discussion
1005 * Note that zone_owns() can only work with:
1006 * - zones not allowing foreign memory
1007 * - zones in the general submap.
1008 *
1009 * @param zone the zone the address needs to belong to.
1010 * @param addr the element address to check.
1011 */
1012extern bool zone_owns(
1013 zone_t zone,
1014 void *addr);
1015
1016/**!
1017 * @function zone_submap
1018 *
1019 * @param zsflags the security flags of a specified zone.
1020 * @returns the zone (sub)map this zone allocates from.
1021 */
1022__pure2
1023extern vm_map_t zone_submap(
1024 zone_security_flags_t zsflags);
1025
1026#ifndef VM_TAG_SIZECLASSES
1027#error MAX_TAG_ZONES
1028#endif
1029#if VM_TAG_SIZECLASSES
1030
1031extern uint16_t zone_index_from_tag_index(
1032 uint32_t tag_zone_index);
1033
1034#endif /* VM_TAG_SIZECLASSES */
1035
1036extern lck_grp_t zone_locks_grp;
1037
1038static inline void
1039zone_lock(zone_t zone)
1040{
1041#if KASAN_FAKESTACK
1042 spl_t s = 0;
1043 if (zone->z_kasan_fakestacks) {
1044 s = splsched();
1045 }
1046#endif /* KASAN_FAKESTACK */
1047 hw_lck_ticket_lock(&zone->z_lock, &zone_locks_grp);
1048#if KASAN_FAKESTACK
1049 zone->z_kasan_spl = s;
1050#endif /* KASAN_FAKESTACK */
1051}
1052
1053static inline void
1054zone_unlock(zone_t zone)
1055{
1056#if KASAN_FAKESTACK
1057 spl_t s = zone->z_kasan_spl;
1058 zone->z_kasan_spl = 0;
1059#endif /* KASAN_FAKESTACK */
1060 hw_lck_ticket_unlock(tlock: &zone->z_lock);
1061#if KASAN_FAKESTACK
1062 if (zone->z_kasan_fakestacks) {
1063 splx(s);
1064 }
1065#endif /* KASAN_FAKESTACK */
1066}
1067
1068#define MAX_ZONE_NAME 32 /* max length of a zone name we can take from the boot-args */
1069
1070int track_this_zone(const char *zonename, const char *logname);
1071extern bool panic_include_kalloc_types;
1072extern zone_t kalloc_type_src_zone;
1073extern zone_t kalloc_type_dst_zone;
1074
1075#if DEBUG || DEVELOPMENT
1076extern vm_size_t zone_element_info(void *addr, vm_tag_t * ptag);
1077#endif /* DEBUG || DEVELOPMENT */
1078
1079#pragma GCC visibility pop
1080
1081__END_DECLS
1082
1083#endif /* _KERN_ZALLOC_INTERNAL_H_ */
1084