1 | /* |
2 | * Copyright (c) 2016-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #ifndef _SKYWALK_MEM_SKMEMCACHEVAR_H |
30 | #define _SKYWALK_MEM_SKMEMCACHEVAR_H |
31 | |
32 | #ifdef BSD_KERNEL_PRIVATE |
33 | #include <skywalk/core/skywalk_var.h> |
34 | #include <skywalk/os_channel_private.h> |
35 | #include <kern/cpu_number.h> |
36 | |
37 | /* |
38 | * Buffer control. |
39 | */ |
40 | struct skmem_bufctl { |
41 | SLIST_ENTRY(skmem_bufctl) bc_link; /* bufctl linkage */ |
42 | void *bc_addr; /* buffer obj address */ |
43 | void *bc_addrm; /* mirrored buffer obj addr */ |
44 | struct skmem_slab *bc_slab; /* controlling slab */ |
45 | uint32_t bc_lim; /* buffer obj limit */ |
46 | uint32_t bc_flags; /* SKMEM_BUFCTL_* flags */ |
47 | uint32_t bc_idx; /* buffer index within slab */ |
48 | volatile uint32_t bc_usecnt; /* outstanding use */ |
49 | }; |
50 | |
51 | #define SKMEM_BUFCTL_SHAREOK 0x1 /* supports sharing */ |
52 | |
53 | #define SKMEM_STACK_DEPTH 16 /* maximum audit stack depth */ |
54 | |
55 | #define SKMEM_CACHE_ALIGN 8 /* min guaranteed alignment */ |
56 | |
57 | /* |
58 | * Alternative buffer control if SKM_MODE_AUDIT is set. |
59 | */ |
60 | struct skmem_bufctl_audit { |
61 | SLIST_ENTRY(skmem_bufctl) bc_link; /* bufctl linkage */ |
62 | void *bc_addr; /* buffer address */ |
63 | void *bc_addrm; /* mirrored buffer address */ |
64 | struct skmem_slab *bc_slab; /* controlling slab */ |
65 | uint32_t bc_flags; /* SKMEM_BUFCTL_* flags */ |
66 | uint32_t bc_idx; /* buffer index within slab */ |
67 | volatile uint32_t bc_usecnt; /* outstanding use */ |
68 | struct thread *bc_thread; /* thread doing transaction */ |
69 | uint32_t bc_timestamp; /* transaction time */ |
70 | uint32_t bc_depth; /* stack depth */ |
71 | void *bc_stack[SKMEM_STACK_DEPTH]; /* stack */ |
72 | }; |
73 | |
74 | /* |
75 | * Buffer control hash bucket. |
76 | */ |
77 | struct skmem_bufctl_bkt { |
78 | SLIST_HEAD(, skmem_bufctl) bcb_head; /* bufctl allocated list */ |
79 | }; |
80 | |
81 | /* |
82 | * Slab. |
83 | */ |
84 | struct skmem_slab { |
85 | TAILQ_ENTRY(skmem_slab) sl_link; /* slab freelist linkage */ |
86 | struct skmem_cache *sl_cache; /* controlling cache */ |
87 | void *sl_base; /* base of allocated memory */ |
88 | void *sl_basem; /* base of mirrored memory */ |
89 | struct sksegment *sl_seg; /* backing segment */ |
90 | struct sksegment *sl_segm; /* backing mirrored segment */ |
91 | SLIST_HEAD(, skmem_bufctl) sl_head; /* bufctl free list */ |
92 | uint32_t sl_refcnt; /* outstanding allocations */ |
93 | uint32_t sl_chunks; /* # of buffers in slab */ |
94 | }; |
95 | |
96 | #define SKMEM_SLAB_IS_PARTIAL(sl) \ |
97 | ((sl)->sl_refcnt > 0 && (sl)->sl_refcnt < (sl)->sl_chunks) |
98 | |
99 | #define SKMEM_SLAB_MEMBER(sl, buf) \ |
100 | (((size_t)(buf) - (size_t)(sl)->sl_base) < (sl)->sl_cache->skm_slabsize) |
101 | |
102 | /* |
103 | * Magazine type. |
104 | */ |
105 | struct skmem_magtype { |
106 | int mt_magsize; /* magazine size (# of objs) */ |
107 | int mt_align; /* magazine alignment */ |
108 | size_t mt_minbuf; /* all smaller bufs qualify */ |
109 | size_t mt_maxbuf; /* no larger bufs qualify */ |
110 | struct skmem_cache *mt_cache; /* magazine cache */ |
111 | char mt_cname[64]; /* magazine cache name */ |
112 | }; |
113 | |
114 | /* |
115 | * Magazine. |
116 | */ |
117 | struct skmem_mag { |
118 | SLIST_ENTRY(skmem_mag) mg_link; /* magazine linkage */ |
119 | struct skmem_magtype *mg_magtype; /* magazine type */ |
120 | void *mg_round[1]; /* one or more objs */ |
121 | }; |
122 | |
123 | #define SKMEM_MAG_SIZE(n) \ |
124 | offsetof(struct skmem_mag, mg_round[n]) |
125 | |
126 | /* |
127 | * Magazine depot. |
128 | */ |
129 | struct skmem_maglist { |
130 | SLIST_HEAD(, skmem_mag) ml_list; /* magazine list */ |
131 | uint32_t ml_total; /* number of magazines */ |
132 | uint32_t ml_min; /* min since last update */ |
133 | uint32_t ml_reaplimit; /* max reapable magazines */ |
134 | uint64_t ml_alloc; /* allocations from this list */ |
135 | }; |
136 | |
137 | /* |
138 | * Per-CPU cache structure. |
139 | */ |
140 | struct skmem_cpu_cache { |
141 | decl_lck_mtx_data(, cp_lock); |
142 | struct skmem_mag *cp_loaded; /* currently filled magazine */ |
143 | struct skmem_mag *cp_ploaded; /* previously filled magazine */ |
144 | uint64_t cp_alloc; /* allocations from this cpu */ |
145 | uint64_t cp_free; /* frees to this cpu */ |
146 | int cp_rounds; /* # of objs in filled mag */ |
147 | int cp_prounds; /* # of objs in previous mag */ |
148 | int cp_magsize; /* # of objs in a full mag */ |
149 | } __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX))); |
150 | |
151 | /* |
152 | * Object's region information. |
153 | * |
154 | * This info is provided to skmem_ctor_fn_t() to assist master and |
155 | * slave objects construction. It is also provided separately via |
156 | * skmem_cache_get_obj_info() when called on an object that's been |
157 | * allocated from skmem_cache. Information about slave object is |
158 | * available only at constructor time. |
159 | */ |
160 | struct skmem_obj_info { |
161 | void *oi_addr; /* object address */ |
162 | struct skmem_bufctl *oi_bc; /* buffer control (master) */ |
163 | uint32_t oi_size; /* actual object size */ |
164 | obj_idx_t oi_idx_reg; /* object idx within region */ |
165 | obj_idx_t oi_idx_seg; /* object idx within segment */ |
166 | } __attribute__((__packed__)); |
167 | |
168 | /* |
169 | * Generic one-way linked list element structure. This is used to |
170 | * handle skmem_cache_batch_alloc() requests in order to chain the |
171 | * allocated objects together before returning them to the caller. |
172 | * It is also used when freeing a batch of packets by the caller of |
173 | * skmem_cache_batch_free(). Note that this requires the region's |
174 | * object to be at least the size of struct skmem_obj, as we store |
175 | * this information at the beginning of each object in the chain. |
176 | */ |
177 | struct skmem_obj { |
178 | /* |
179 | * Given that we overlay this structure on top of whatever |
180 | * structure that the object represents, the constructor must |
181 | * ensure that it reserves at least the size of a pointer |
182 | * at the top for the linkage. |
183 | */ |
184 | struct skmem_obj *mo_next; /* next object in the list */ |
185 | /* |
186 | * The following are used only for raw (unconstructed) objects |
187 | * coming out of the slab layer during allocations. They are |
188 | * not touched otherwise by skmem_cache when the object resides |
189 | * in the magazine. By utilizing this space, we avoid having |
190 | * to allocate temporary storage elsewhere. |
191 | */ |
192 | struct skmem_obj_info mo_info; /* object's info */ |
193 | struct skmem_obj_info mo_minfo; /* mirrored object's info */ |
194 | }; |
195 | |
196 | #define SKMEM_OBJ_ADDR(_oi) (_oi)->oi_addr |
197 | #define SKMEM_OBJ_BUFCTL(_oi) (_oi)->oi_bc |
198 | #define SKMEM_OBJ_SIZE(_oi) (_oi)->oi_size |
199 | #define SKMEM_OBJ_IDX_REG(_oi) (_oi)->oi_idx_reg |
200 | #define SKMEM_OBJ_IDX_SEG(_oi) (_oi)->oi_idx_seg |
201 | /* segment the object belongs to (only for master) */ |
202 | #define SKMEM_OBJ_SEG(_oi) (_oi)->oi_bc->bc_slab->sl_seg |
203 | /* offset of object relative to the object's own region */ |
204 | #define SKMEM_OBJ_ROFF(_oi) \ |
205 | ((mach_vm_offset_t)(SKMEM_OBJ_SIZE(_oi) * SKMEM_OBJ_IDX_REG(_oi))) |
206 | |
207 | typedef int (*skmem_ctor_fn_t)(struct skmem_obj_info *, |
208 | struct skmem_obj_info *, void *, uint32_t); |
209 | typedef void (*skmem_dtor_fn_t)(void *, void *); |
210 | typedef void (*skmem_reclaim_fn_t)(void *); |
211 | typedef int (*skmem_slab_alloc_fn_t)(struct skmem_cache *, |
212 | struct skmem_obj_info *, struct skmem_obj_info *, uint32_t); |
213 | typedef void (*skmem_slab_free_fn_t)(struct skmem_cache *, void *); |
214 | |
215 | /* |
216 | * Cache. |
217 | */ |
218 | struct skmem_cache { |
219 | /* |
220 | * Commonly-accessed elements during alloc and free. |
221 | */ |
222 | uint32_t skm_mode; /* cache mode flags */ |
223 | skmem_ctor_fn_t skm_ctor; /* object constructor */ |
224 | skmem_dtor_fn_t skm_dtor; /* object destructor */ |
225 | skmem_reclaim_fn_t skm_reclaim; /* cache reclaim */ |
226 | void *skm_private; /* opaque arg to callbacks */ |
227 | |
228 | /* |
229 | * Depot. |
230 | */ |
231 | decl_lck_mtx_data(, skm_dp_lock); /* protects depot layer */ |
232 | struct skmem_magtype *skm_magtype; /* magazine type */ |
233 | struct skmem_maglist skm_full; /* full magazines */ |
234 | struct skmem_maglist skm_empty; /* empty magazines */ |
235 | |
236 | /* |
237 | * Slab. |
238 | */ |
239 | decl_lck_mtx_data(, skm_sl_lock); /* protects slab layer */ |
240 | skmem_slab_alloc_fn_t skm_slab_alloc; /* slab allocate */ |
241 | skmem_slab_free_fn_t skm_slab_free; /* slab free */ |
242 | size_t skm_chunksize; /* bufsize + alignment */ |
243 | size_t skm_objsize; /* actual obj size in slab */ |
244 | size_t skm_slabsize; /* size of a slab */ |
245 | size_t skm_hash_initial; /* initial hash table size */ |
246 | size_t skm_hash_limit; /* hash table size limit */ |
247 | size_t skm_hash_shift; /* get to interesting bits */ |
248 | size_t skm_hash_mask; /* hash table mask */ |
249 | struct skmem_bufctl_bkt *skm_hash_table; /* alloc'd buffer htable */ |
250 | TAILQ_HEAD(, skmem_slab) skm_sl_partial_list; /* partially-allocated */ |
251 | TAILQ_HEAD(, skmem_slab) skm_sl_empty_list; /* fully-allocated */ |
252 | struct skmem_region *skm_region; /* region source for slabs */ |
253 | |
254 | /* |
255 | * Statistics. |
256 | */ |
257 | uint32_t skm_cpu_mag_size; /* current magazine size */ |
258 | uint32_t skm_cpu_mag_resize; /* # of magazine resizes */ |
259 | uint32_t skm_cpu_mag_purge; /* # of magazine purges */ |
260 | uint32_t skm_cpu_mag_reap; /* # of magazine reaps */ |
261 | uint64_t skm_depot_contention; /* mutex contention count */ |
262 | uint64_t skm_depot_contention_prev; /* previous snapshot */ |
263 | uint32_t skm_depot_full; /* # of full magazines */ |
264 | uint32_t skm_depot_empty; /* # of empty magazines */ |
265 | uint32_t skm_depot_ws_zero; /* # of working set flushes */ |
266 | uint32_t skm_sl_rescale; /* # of hash table rescales */ |
267 | uint32_t skm_sl_create; /* slab creates */ |
268 | uint32_t skm_sl_destroy; /* slab destroys */ |
269 | uint32_t skm_sl_alloc; /* slab layer allocations */ |
270 | uint32_t skm_sl_free; /* slab layer frees */ |
271 | uint32_t skm_sl_partial; /* # of partial slabs */ |
272 | uint32_t skm_sl_empty; /* # of empty slabs */ |
273 | uint64_t skm_sl_alloc_fail; /* total failed allocations */ |
274 | uint64_t skm_sl_bufinuse; /* total unfreed buffers */ |
275 | uint64_t skm_sl_bufmax; /* max buffers ever */ |
276 | |
277 | /* |
278 | * Cache properties. |
279 | */ |
280 | TAILQ_ENTRY(skmem_cache) skm_link; /* cache linkage */ |
281 | char skm_name[64]; /* cache name */ |
282 | uuid_t skm_uuid; /* cache uuid */ |
283 | size_t skm_bufsize; /* buffer size */ |
284 | size_t skm_bufalign; /* buffer alignment */ |
285 | size_t skm_objalign; /* object alignment */ |
286 | |
287 | /* |
288 | * CPU layer, aligned at (maximum) cache line boundary. |
289 | */ |
290 | decl_lck_mtx_data(, skm_rs_lock); /* protects resizing */ |
291 | struct thread *skm_rs_owner; /* resize owner */ |
292 | uint32_t skm_rs_busy; /* prevent resizing */ |
293 | uint32_t skm_rs_want; /* # of threads blocked */ |
294 | struct skmem_cpu_cache skm_cpu_cache[1] |
295 | __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX))); |
296 | }; |
297 | |
298 | #define SKMEM_CACHE_SIZE(n) \ |
299 | offsetof(struct skmem_cache, skm_cpu_cache[n]) |
300 | |
301 | #define SKMEM_CPU_CACHE(c) \ |
302 | ((struct skmem_cpu_cache *)((void *)((char *)(c) + \ |
303 | SKMEM_CACHE_SIZE(cpu_number())))) |
304 | |
305 | /* valid values for skm_mode, set only by skmem_cache_create() */ |
306 | #define SKM_MODE_NOMAGAZINES 0x00000001 /* disable magazines layer */ |
307 | #define SKM_MODE_AUDIT 0x00000002 /* audit transactions */ |
308 | #define SKM_MODE_NOREDIRECT 0x00000004 /* unaffected by defunct */ |
309 | #define SKM_MODE_BATCH 0x00000008 /* supports batch alloc/free */ |
310 | #define SKM_MODE_DYNAMIC 0x00000010 /* enable magazine resizing */ |
311 | #define SKM_MODE_CLEARONFREE 0x00000020 /* zero-out upon slab free */ |
312 | #define SKM_MODE_PSEUDO 0x00000040 /* external backing store */ |
313 | #define SKM_MODE_RECLAIM 0x00000080 /* aggressive memory reclaim */ |
314 | |
315 | #define SKM_MODE_BITS \ |
316 | "\020\01NOMAGAZINES\02AUDIT\03NOREDIRECT\04BATCH\05DYNAMIC" \ |
317 | "\06CLEARONFREE\07PSEUDO\10RECLAIM" |
318 | |
319 | /* |
320 | * Valid flags for sk{mem,region}_alloc(). SKMEM_FAILOK is valid only if |
321 | * SKMEM_SLEEP is set, i.e. SKMEM_{NOSLEEP,FAILOK} are mutually exclusive. |
322 | * If set, SKMEM_FAILOK indicates that the segment allocation may fail, |
323 | * and that the cache layer would handle the retries rather than blocking |
324 | * inside the region allocator. |
325 | */ |
326 | #define SKMEM_SLEEP 0x0 /* can block for memory; won't fail */ |
327 | #define SKMEM_NOSLEEP 0x1 /* cannot block for memory; may fail */ |
328 | #define SKMEM_PANIC 0x2 /* panic upon allocation failure */ |
329 | #define SKMEM_FAILOK 0x4 /* can fail for blocking alloc */ |
330 | |
331 | /* valid flag values for skmem_cache_create() */ |
332 | #define SKMEM_CR_NOMAGAZINES 0x1 /* disable magazines layer */ |
333 | #define SKMEM_CR_BATCH 0x2 /* support batch alloc/free */ |
334 | #define SKMEM_CR_DYNAMIC 0x4 /* enable magazine resizing */ |
335 | #define SKMEM_CR_CLEARONFREE 0x8 /* zero-out upon slab free */ |
336 | #define SKMEM_CR_RECLAIM 0x10 /* aggressive memory reclaim */ |
337 | |
338 | __BEGIN_DECLS |
339 | /* |
340 | * Given a buffer control, add a use count to it. |
341 | */ |
342 | __attribute__((always_inline)) |
343 | static inline void |
344 | skmem_bufctl_use(struct skmem_bufctl *bc) |
345 | { |
346 | uint32_t old, new; |
347 | |
348 | os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, { |
349 | new = old + 1; |
350 | VERIFY(new != 0); |
351 | ASSERT(new == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK)); |
352 | }); |
353 | } |
354 | |
355 | /* |
356 | * Given a buffer control, remove a use count from it (returns new value). |
357 | */ |
358 | __attribute__((always_inline)) |
359 | static inline uint32_t |
360 | skmem_bufctl_unuse(struct skmem_bufctl *bc) |
361 | { |
362 | uint32_t old, new; |
363 | |
364 | os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, { |
365 | new = old - 1; |
366 | VERIFY(old != 0); |
367 | ASSERT(old == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK)); |
368 | }); |
369 | |
370 | return new; |
371 | } |
372 | |
373 | extern void skmem_cache_pre_init(void); |
374 | extern void skmem_cache_init(void); |
375 | extern void skmem_cache_fini(void); |
376 | extern struct skmem_cache *skmem_cache_create(const char *, size_t, size_t, |
377 | skmem_ctor_fn_t, skmem_dtor_fn_t, skmem_reclaim_fn_t, void *, |
378 | struct skmem_region *, uint32_t); |
379 | extern void skmem_cache_destroy(struct skmem_cache *); |
380 | extern void *skmem_cache_alloc(struct skmem_cache *, uint32_t); |
381 | extern uint32_t skmem_cache_batch_alloc(struct skmem_cache *, |
382 | struct skmem_obj **list, uint32_t, uint32_t); |
383 | extern void skmem_cache_free(struct skmem_cache *, void *); |
384 | extern void skmem_cache_batch_free(struct skmem_cache *, struct skmem_obj *); |
385 | extern void skmem_cache_reap_now(struct skmem_cache *, boolean_t); |
386 | extern void skmem_cache_reap(void); |
387 | extern void skmem_reap_caches(boolean_t); |
388 | extern void skmem_cache_get_obj_info(struct skmem_cache *, void *, |
389 | struct skmem_obj_info *, struct skmem_obj_info *); |
390 | extern uint32_t skmem_cache_magazine_max(uint32_t); |
391 | extern boolean_t skmem_allow_magazines(void); |
392 | #if (DEVELOPMENT || DEBUG) |
393 | extern void skmem_cache_test_start(uint32_t); |
394 | extern void skmem_cache_test_stop(void); |
395 | #endif /* (DEVELOPMENT || DEBUG) */ |
396 | __END_DECLS |
397 | #endif /* BSD_KERNEL_PRIVATE */ |
398 | #endif /* _SKYWALK_MEM_SKMEMCACHEVAR_H */ |
399 | |