1/*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#ifndef _SKYWALK_MEM_SKMEMCACHEVAR_H
30#define _SKYWALK_MEM_SKMEMCACHEVAR_H
31
32#ifdef BSD_KERNEL_PRIVATE
33#include <skywalk/core/skywalk_var.h>
34#include <skywalk/os_channel_private.h>
35#include <kern/cpu_number.h>
36
37/*
38 * Buffer control.
39 */
40struct skmem_bufctl {
41 SLIST_ENTRY(skmem_bufctl) bc_link; /* bufctl linkage */
42 void *bc_addr; /* buffer obj address */
43 void *bc_addrm; /* mirrored buffer obj addr */
44 struct skmem_slab *bc_slab; /* controlling slab */
45 uint32_t bc_lim; /* buffer obj limit */
46 uint32_t bc_flags; /* SKMEM_BUFCTL_* flags */
47 uint32_t bc_idx; /* buffer index within slab */
48 volatile uint32_t bc_usecnt; /* outstanding use */
49};
50
51#define SKMEM_BUFCTL_SHAREOK 0x1 /* supports sharing */
52
53#define SKMEM_STACK_DEPTH 16 /* maximum audit stack depth */
54
55#define SKMEM_CACHE_ALIGN 8 /* min guaranteed alignment */
56
57/*
58 * Alternative buffer control if SKM_MODE_AUDIT is set.
59 */
60struct skmem_bufctl_audit {
61 SLIST_ENTRY(skmem_bufctl) bc_link; /* bufctl linkage */
62 void *bc_addr; /* buffer address */
63 void *bc_addrm; /* mirrored buffer address */
64 struct skmem_slab *bc_slab; /* controlling slab */
65 uint32_t bc_flags; /* SKMEM_BUFCTL_* flags */
66 uint32_t bc_idx; /* buffer index within slab */
67 volatile uint32_t bc_usecnt; /* outstanding use */
68 struct thread *bc_thread; /* thread doing transaction */
69 uint32_t bc_timestamp; /* transaction time */
70 uint32_t bc_depth; /* stack depth */
71 void *bc_stack[SKMEM_STACK_DEPTH]; /* stack */
72};
73
74/*
75 * Buffer control hash bucket.
76 */
77struct skmem_bufctl_bkt {
78 SLIST_HEAD(, skmem_bufctl) bcb_head; /* bufctl allocated list */
79};
80
81/*
82 * Slab.
83 */
84struct skmem_slab {
85 TAILQ_ENTRY(skmem_slab) sl_link; /* slab freelist linkage */
86 struct skmem_cache *sl_cache; /* controlling cache */
87 void *sl_base; /* base of allocated memory */
88 void *sl_basem; /* base of mirrored memory */
89 struct sksegment *sl_seg; /* backing segment */
90 struct sksegment *sl_segm; /* backing mirrored segment */
91 SLIST_HEAD(, skmem_bufctl) sl_head; /* bufctl free list */
92 uint32_t sl_refcnt; /* outstanding allocations */
93 uint32_t sl_chunks; /* # of buffers in slab */
94};
95
96#define SKMEM_SLAB_IS_PARTIAL(sl) \
97 ((sl)->sl_refcnt > 0 && (sl)->sl_refcnt < (sl)->sl_chunks)
98
99#define SKMEM_SLAB_MEMBER(sl, buf) \
100 (((size_t)(buf) - (size_t)(sl)->sl_base) < (sl)->sl_cache->skm_slabsize)
101
102/*
103 * Magazine type.
104 */
105struct skmem_magtype {
106 int mt_magsize; /* magazine size (# of objs) */
107 int mt_align; /* magazine alignment */
108 size_t mt_minbuf; /* all smaller bufs qualify */
109 size_t mt_maxbuf; /* no larger bufs qualify */
110 struct skmem_cache *mt_cache; /* magazine cache */
111 char mt_cname[64]; /* magazine cache name */
112};
113
114/*
115 * Magazine.
116 */
117struct skmem_mag {
118 SLIST_ENTRY(skmem_mag) mg_link; /* magazine linkage */
119 struct skmem_magtype *mg_magtype; /* magazine type */
120 void *mg_round[1]; /* one or more objs */
121};
122
123#define SKMEM_MAG_SIZE(n) \
124 offsetof(struct skmem_mag, mg_round[n])
125
126/*
127 * Magazine depot.
128 */
129struct skmem_maglist {
130 SLIST_HEAD(, skmem_mag) ml_list; /* magazine list */
131 uint32_t ml_total; /* number of magazines */
132 uint32_t ml_min; /* min since last update */
133 uint32_t ml_reaplimit; /* max reapable magazines */
134 uint64_t ml_alloc; /* allocations from this list */
135};
136
137/*
138 * Per-CPU cache structure.
139 */
140struct skmem_cpu_cache {
141 decl_lck_mtx_data(, cp_lock);
142 struct skmem_mag *cp_loaded; /* currently filled magazine */
143 struct skmem_mag *cp_ploaded; /* previously filled magazine */
144 uint64_t cp_alloc; /* allocations from this cpu */
145 uint64_t cp_free; /* frees to this cpu */
146 int cp_rounds; /* # of objs in filled mag */
147 int cp_prounds; /* # of objs in previous mag */
148 int cp_magsize; /* # of objs in a full mag */
149} __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX)));
150
151/*
152 * Object's region information.
153 *
154 * This info is provided to skmem_ctor_fn_t() to assist master and
155 * slave objects construction. It is also provided separately via
156 * skmem_cache_get_obj_info() when called on an object that's been
157 * allocated from skmem_cache. Information about slave object is
158 * available only at constructor time.
159 */
160struct skmem_obj_info {
161 void *oi_addr; /* object address */
162 struct skmem_bufctl *oi_bc; /* buffer control (master) */
163 uint32_t oi_size; /* actual object size */
164 obj_idx_t oi_idx_reg; /* object idx within region */
165 obj_idx_t oi_idx_seg; /* object idx within segment */
166} __attribute__((__packed__));
167
168/*
169 * Generic one-way linked list element structure. This is used to
170 * handle skmem_cache_batch_alloc() requests in order to chain the
171 * allocated objects together before returning them to the caller.
172 * It is also used when freeing a batch of packets by the caller of
173 * skmem_cache_batch_free(). Note that this requires the region's
174 * object to be at least the size of struct skmem_obj, as we store
175 * this information at the beginning of each object in the chain.
176 */
177struct skmem_obj {
178 /*
179 * Given that we overlay this structure on top of whatever
180 * structure that the object represents, the constructor must
181 * ensure that it reserves at least the size of a pointer
182 * at the top for the linkage.
183 */
184 struct skmem_obj *mo_next; /* next object in the list */
185 /*
186 * The following are used only for raw (unconstructed) objects
187 * coming out of the slab layer during allocations. They are
188 * not touched otherwise by skmem_cache when the object resides
189 * in the magazine. By utilizing this space, we avoid having
190 * to allocate temporary storage elsewhere.
191 */
192 struct skmem_obj_info mo_info; /* object's info */
193 struct skmem_obj_info mo_minfo; /* mirrored object's info */
194};
195
196#define SKMEM_OBJ_ADDR(_oi) (_oi)->oi_addr
197#define SKMEM_OBJ_BUFCTL(_oi) (_oi)->oi_bc
198#define SKMEM_OBJ_SIZE(_oi) (_oi)->oi_size
199#define SKMEM_OBJ_IDX_REG(_oi) (_oi)->oi_idx_reg
200#define SKMEM_OBJ_IDX_SEG(_oi) (_oi)->oi_idx_seg
201/* segment the object belongs to (only for master) */
202#define SKMEM_OBJ_SEG(_oi) (_oi)->oi_bc->bc_slab->sl_seg
203/* offset of object relative to the object's own region */
204#define SKMEM_OBJ_ROFF(_oi) \
205 ((mach_vm_offset_t)(SKMEM_OBJ_SIZE(_oi) * SKMEM_OBJ_IDX_REG(_oi)))
206
207typedef int (*skmem_ctor_fn_t)(struct skmem_obj_info *,
208 struct skmem_obj_info *, void *, uint32_t);
209typedef void (*skmem_dtor_fn_t)(void *, void *);
210typedef void (*skmem_reclaim_fn_t)(void *);
211typedef int (*skmem_slab_alloc_fn_t)(struct skmem_cache *,
212 struct skmem_obj_info *, struct skmem_obj_info *, uint32_t);
213typedef void (*skmem_slab_free_fn_t)(struct skmem_cache *, void *);
214
215/*
216 * Cache.
217 */
218struct skmem_cache {
219 /*
220 * Commonly-accessed elements during alloc and free.
221 */
222 uint32_t skm_mode; /* cache mode flags */
223 skmem_ctor_fn_t skm_ctor; /* object constructor */
224 skmem_dtor_fn_t skm_dtor; /* object destructor */
225 skmem_reclaim_fn_t skm_reclaim; /* cache reclaim */
226 void *skm_private; /* opaque arg to callbacks */
227
228 /*
229 * Depot.
230 */
231 decl_lck_mtx_data(, skm_dp_lock); /* protects depot layer */
232 struct skmem_magtype *skm_magtype; /* magazine type */
233 struct skmem_maglist skm_full; /* full magazines */
234 struct skmem_maglist skm_empty; /* empty magazines */
235
236 /*
237 * Slab.
238 */
239 decl_lck_mtx_data(, skm_sl_lock); /* protects slab layer */
240 skmem_slab_alloc_fn_t skm_slab_alloc; /* slab allocate */
241 skmem_slab_free_fn_t skm_slab_free; /* slab free */
242 size_t skm_chunksize; /* bufsize + alignment */
243 size_t skm_objsize; /* actual obj size in slab */
244 size_t skm_slabsize; /* size of a slab */
245 size_t skm_hash_initial; /* initial hash table size */
246 size_t skm_hash_limit; /* hash table size limit */
247 size_t skm_hash_shift; /* get to interesting bits */
248 size_t skm_hash_mask; /* hash table mask */
249 struct skmem_bufctl_bkt *skm_hash_table; /* alloc'd buffer htable */
250 TAILQ_HEAD(, skmem_slab) skm_sl_partial_list; /* partially-allocated */
251 TAILQ_HEAD(, skmem_slab) skm_sl_empty_list; /* fully-allocated */
252 struct skmem_region *skm_region; /* region source for slabs */
253
254 /*
255 * Statistics.
256 */
257 uint32_t skm_cpu_mag_size; /* current magazine size */
258 uint32_t skm_cpu_mag_resize; /* # of magazine resizes */
259 uint32_t skm_cpu_mag_purge; /* # of magazine purges */
260 uint32_t skm_cpu_mag_reap; /* # of magazine reaps */
261 uint64_t skm_depot_contention; /* mutex contention count */
262 uint64_t skm_depot_contention_prev; /* previous snapshot */
263 uint32_t skm_depot_full; /* # of full magazines */
264 uint32_t skm_depot_empty; /* # of empty magazines */
265 uint32_t skm_depot_ws_zero; /* # of working set flushes */
266 uint32_t skm_sl_rescale; /* # of hash table rescales */
267 uint32_t skm_sl_create; /* slab creates */
268 uint32_t skm_sl_destroy; /* slab destroys */
269 uint32_t skm_sl_alloc; /* slab layer allocations */
270 uint32_t skm_sl_free; /* slab layer frees */
271 uint32_t skm_sl_partial; /* # of partial slabs */
272 uint32_t skm_sl_empty; /* # of empty slabs */
273 uint64_t skm_sl_alloc_fail; /* total failed allocations */
274 uint64_t skm_sl_bufinuse; /* total unfreed buffers */
275 uint64_t skm_sl_bufmax; /* max buffers ever */
276
277 /*
278 * Cache properties.
279 */
280 TAILQ_ENTRY(skmem_cache) skm_link; /* cache linkage */
281 char skm_name[64]; /* cache name */
282 uuid_t skm_uuid; /* cache uuid */
283 size_t skm_bufsize; /* buffer size */
284 size_t skm_bufalign; /* buffer alignment */
285 size_t skm_objalign; /* object alignment */
286
287 /*
288 * CPU layer, aligned at (maximum) cache line boundary.
289 */
290 decl_lck_mtx_data(, skm_rs_lock); /* protects resizing */
291 struct thread *skm_rs_owner; /* resize owner */
292 uint32_t skm_rs_busy; /* prevent resizing */
293 uint32_t skm_rs_want; /* # of threads blocked */
294 struct skmem_cpu_cache skm_cpu_cache[1]
295 __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX)));
296};
297
298#define SKMEM_CACHE_SIZE(n) \
299 offsetof(struct skmem_cache, skm_cpu_cache[n])
300
301#define SKMEM_CPU_CACHE(c) \
302 ((struct skmem_cpu_cache *)((void *)((char *)(c) + \
303 SKMEM_CACHE_SIZE(cpu_number()))))
304
305/* valid values for skm_mode, set only by skmem_cache_create() */
306#define SKM_MODE_NOMAGAZINES 0x00000001 /* disable magazines layer */
307#define SKM_MODE_AUDIT 0x00000002 /* audit transactions */
308#define SKM_MODE_NOREDIRECT 0x00000004 /* unaffected by defunct */
309#define SKM_MODE_BATCH 0x00000008 /* supports batch alloc/free */
310#define SKM_MODE_DYNAMIC 0x00000010 /* enable magazine resizing */
311#define SKM_MODE_CLEARONFREE 0x00000020 /* zero-out upon slab free */
312#define SKM_MODE_PSEUDO 0x00000040 /* external backing store */
313#define SKM_MODE_RECLAIM 0x00000080 /* aggressive memory reclaim */
314
315#define SKM_MODE_BITS \
316 "\020\01NOMAGAZINES\02AUDIT\03NOREDIRECT\04BATCH\05DYNAMIC" \
317 "\06CLEARONFREE\07PSEUDO\10RECLAIM"
318
319/*
320 * Valid flags for sk{mem,region}_alloc(). SKMEM_FAILOK is valid only if
321 * SKMEM_SLEEP is set, i.e. SKMEM_{NOSLEEP,FAILOK} are mutually exclusive.
322 * If set, SKMEM_FAILOK indicates that the segment allocation may fail,
323 * and that the cache layer would handle the retries rather than blocking
324 * inside the region allocator.
325 */
326#define SKMEM_SLEEP 0x0 /* can block for memory; won't fail */
327#define SKMEM_NOSLEEP 0x1 /* cannot block for memory; may fail */
328#define SKMEM_PANIC 0x2 /* panic upon allocation failure */
329#define SKMEM_FAILOK 0x4 /* can fail for blocking alloc */
330
331/* valid flag values for skmem_cache_create() */
332#define SKMEM_CR_NOMAGAZINES 0x1 /* disable magazines layer */
333#define SKMEM_CR_BATCH 0x2 /* support batch alloc/free */
334#define SKMEM_CR_DYNAMIC 0x4 /* enable magazine resizing */
335#define SKMEM_CR_CLEARONFREE 0x8 /* zero-out upon slab free */
336#define SKMEM_CR_RECLAIM 0x10 /* aggressive memory reclaim */
337
338__BEGIN_DECLS
339/*
340 * Given a buffer control, add a use count to it.
341 */
342__attribute__((always_inline))
343static inline void
344skmem_bufctl_use(struct skmem_bufctl *bc)
345{
346 uint32_t old, new;
347
348 os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, {
349 new = old + 1;
350 VERIFY(new != 0);
351 ASSERT(new == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK));
352 });
353}
354
355/*
356 * Given a buffer control, remove a use count from it (returns new value).
357 */
358__attribute__((always_inline))
359static inline uint32_t
360skmem_bufctl_unuse(struct skmem_bufctl *bc)
361{
362 uint32_t old, new;
363
364 os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, {
365 new = old - 1;
366 VERIFY(old != 0);
367 ASSERT(old == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK));
368 });
369
370 return new;
371}
372
373extern void skmem_cache_pre_init(void);
374extern void skmem_cache_init(void);
375extern void skmem_cache_fini(void);
376extern struct skmem_cache *skmem_cache_create(const char *, size_t, size_t,
377 skmem_ctor_fn_t, skmem_dtor_fn_t, skmem_reclaim_fn_t, void *,
378 struct skmem_region *, uint32_t);
379extern void skmem_cache_destroy(struct skmem_cache *);
380extern void *skmem_cache_alloc(struct skmem_cache *, uint32_t);
381extern uint32_t skmem_cache_batch_alloc(struct skmem_cache *,
382 struct skmem_obj **list, uint32_t, uint32_t);
383extern void skmem_cache_free(struct skmem_cache *, void *);
384extern void skmem_cache_batch_free(struct skmem_cache *, struct skmem_obj *);
385extern void skmem_cache_reap_now(struct skmem_cache *, boolean_t);
386extern void skmem_cache_reap(void);
387extern void skmem_reap_caches(boolean_t);
388extern void skmem_cache_get_obj_info(struct skmem_cache *, void *,
389 struct skmem_obj_info *, struct skmem_obj_info *);
390extern uint32_t skmem_cache_magazine_max(uint32_t);
391extern boolean_t skmem_allow_magazines(void);
392#if (DEVELOPMENT || DEBUG)
393extern void skmem_cache_test_start(uint32_t);
394extern void skmem_cache_test_stop(void);
395#endif /* (DEVELOPMENT || DEBUG) */
396__END_DECLS
397#endif /* BSD_KERNEL_PRIVATE */
398#endif /* _SKYWALK_MEM_SKMEMCACHEVAR_H */
399