1/*
2 * Copyright (c) 2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Below is a diagram of the caching system. This design is based of the
30 * paper "Magazines and Vmem: Extending the Slab Allocator to Many CPUs and
31 * Arbitrary Resources" by Jeff Bonwick and Jonathan Adams. It is divided into 3
32 * layers: the Per-cpu Layer, the Depot Layer, and the Zone Allocator. The
33 * Per-CPU and Depot layers store elements using arrays we call magazines.
34 *
35 * Magazines function like a stack (we push and pop elements) and can be
36 * moved around for bulk operations.
37 * _________ _________ _________
38 * | CPU 1 | | CPU 2 | | CPU 3 |
39 * | _ _ | | _ _ | | _ _ |
40 * | |#| | | | | | | |#| | | |#| |#| | Per-CPU Layer
41 * | |#| |_| | | |_| |#| | | |#| |#| |
42 * |_________| |_________| |_________|
43 *
44 * ______________________________________________
45 * | _ _ _ _ _ _ |
46 * | |#| |#| |#| | | | | | | | Depot Layer
47 * | |#| |#| |#| |_| |_| |_| |
48 * |______________________________________________|
49 *
50 * _______________________________________________
51 * | # | # | # | # | # | # | # | # | # | # | # | # | Zone Allocator
52 * |_______________________________________________|
53 *
54 * The top layer is the per-cpu cache and consists of a current and
55 * previous magazine for each CPU. The current magazine is the one we always try
56 * to allocate from and free to first. Only if we are unable, do we check the
57 * previous magazine. If the previous magazine can satisfy the allocate or free,
58 * then we switch the two and allocate from the new current magazine. This layer
59 * requires no locking, so we can access multiple CPU's caches concurrently.
60 * This is the main source of the speedup.
61 *
62 * We have two magazines here to prevent thrashing when swapping magazines
63 * with the depot layer. If a certain pattern of alloc and free are called we
64 * can waste a lot of time swapping magazines to and from the depot layer. We
65 * prevent this by dividing the per-cpu cache into two separate magazines.
66 *
67 * The middle layer is the magazine depot. This layer consists of a
68 * collection of full and empty magazines. These are used to reload the per-cpu
69 * caches when needed. This is implemented as an array of magazines which are
70 * initially all empty and as we fill up magazines we increment the index to
71 * point at the first empty magazine. Since this layer is per-zone, it allows us
72 * to balance the cache between cpus, but does require taking a lock.
73 *
74 * When neither the current nor previous magazine for a given CPU can
75 * satisfy the free or allocation, we look to the depot layer. If there are
76 * magazines in the depot that can satisfy the free or allocation we swap
77 * that magazine into the current position. In the example below, to allocate on
78 * the given CPU we must lock the depot layer and swap magazine A with magazine
79 * B and decrement the depot index.
80 *
81 * _____________________ _______________________________________
82 * | Per-CPU Cache | | Depot Layer |
83 * | | | |
84 * | A___ ____ | | ____ B___ ____ ____ |
85 * | | | | | | | | ## | | ## | | | | | |
86 * | | | | | | | | ## | | ## | | | | | |
87 * | | | | | | | | ## | | ## | | | | | |
88 * | | | | | | | | ## | | ## | | | | | |
89 * | |____| |____| | | |_##_| |_##_| |____| |____| |
90 * | Current Previous | | |
91 * |_____________________| |_______________________________________|
92 *
93 * The bottom layer is the Zone Allocator. This is already implemented in
94 * XNU and will remain mostly unchanged. Implementation for this can be found
95 * in zalloc.c and zalloc.h. We will only use the zone if all other layers are
96 * unable to satisfy the allocation or free. When we do use the zone, we will
97 * try to allocate an entire magazine of elements or free an entire magazine of
98 * elements at once.
99 *
100 * Caching must be enabled explicitly, by calling zone_change() with the
101 * Z_CACHING_ENABLED flag, for every zone you want to cache elements for. Zones
102 * which are good candidates for this are ones with highly contended zone locks.
103 *
104 * Some good potential candidates are kalloc.16, kalloc.48, Vm objects, VM map
105 * entries, ipc vouchers, and ipc ports.
106 *
107 *
108 * Some factors can be tuned by boot-arg:
109 * zcc_enable_for_zone_name name of a single zone to enable caching for
110 * (replace space characters with '.')
111 *
112 * zcc_magazine_element_count integer value for magazine size used for all
113 * zones (default 8 is used if not specified)
114 *
115 * zcc_depot_element_count integer value for how many full and empty
116 * magazines to store in the depot, if N specified
117 * depot will have N full and N empty magazines
118 * (default 16 used if not specified)
119*/
120#include <kern/kern_types.h>
121#include <vm/vm_kern.h>
122
123
124/*
125 * zcache_ready
126 *
127 * Description: returns whether or not the zone caches are ready to use
128 *
129 */
130bool zcache_ready(void);
131
132
133/*
134 * zcache_bootstrap
135 *
136 * Description: initializes zone to allocate magazines from
137 *
138 */
139void zcache_bootstrap(void);
140
141
142/*
143 * zcache_init
144 *
145 * Description: Initializes all parts of the per-cpu caches for a given zone
146 *
147 * Parameters: zone pointer to zone on which to iniitalize caching
148 *
149 */
150void zcache_init(zone_t zone);
151
152
153/*
154 * zcache_free_to_cpu_cache
155 *
156 * Description: Checks per-cpu caches to free element there if possible
157 *
158 * Parameters: zone pointer to zone for which element comes from
159 * addr pointer to element to free
160 *
161 * Returns: TRUE if successfull, FALSE otherwise
162 *
163 * Precondition: check that caching is enabled for zone
164 */
165bool zcache_free_to_cpu_cache(zone_t zone, void *addr);
166
167
168/*
169 * zcache_alloc_from_cpu_cache
170 *
171 * Description: Checks per-cpu caches to allocate element from there if possible
172 *
173 * Parameters: zone pointer to zone for which element will come from
174 *
175 * Returns: pointer to usable element
176 *
177 * Precondition: check that caching is enabled for zone
178 */
179vm_offset_t zcache_alloc_from_cpu_cache(zone_t zone);
180
181/*
182 * zcache_drain_depot
183 *
184 * Description: Frees all the full magazines from the depot layer to the zone allocator
185 * Invoked by zone_gc()
186 *
187 * Parameters: zone pointer to zone for which the depot layer needs to be drained
188 *
189 * Returns: None
190 *
191 */
192void zcache_drain_depot(zone_t zone);
193