| 1 | /* |
| 2 | * Copyright (c) 2016-2021 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #ifndef _SKYWALK_MEM_SKMEMCACHEVAR_H |
| 30 | #define _SKYWALK_MEM_SKMEMCACHEVAR_H |
| 31 | |
| 32 | #ifdef BSD_KERNEL_PRIVATE |
| 33 | #include <skywalk/core/skywalk_var.h> |
| 34 | #include <skywalk/os_channel_private.h> |
| 35 | #include <kern/cpu_number.h> |
| 36 | |
| 37 | /* |
| 38 | * Buffer control. |
| 39 | */ |
| 40 | struct skmem_bufctl { |
| 41 | SLIST_ENTRY(skmem_bufctl) bc_link; /* bufctl linkage */ |
| 42 | void *bc_addr; /* buffer obj address */ |
| 43 | void *bc_addrm; /* mirrored buffer obj addr */ |
| 44 | struct skmem_slab *bc_slab; /* controlling slab */ |
| 45 | uint32_t bc_lim; /* buffer obj limit */ |
| 46 | uint32_t bc_flags; /* SKMEM_BUFCTL_* flags */ |
| 47 | uint32_t bc_idx; /* buffer index within slab */ |
| 48 | volatile uint32_t bc_usecnt; /* outstanding use */ |
| 49 | }; |
| 50 | |
| 51 | #define SKMEM_BUFCTL_SHAREOK 0x1 /* supports sharing */ |
| 52 | |
| 53 | #define SKMEM_STACK_DEPTH 16 /* maximum audit stack depth */ |
| 54 | |
| 55 | #define SKMEM_CACHE_ALIGN 8 /* min guaranteed alignment */ |
| 56 | |
| 57 | /* |
| 58 | * Alternative buffer control if SKM_MODE_AUDIT is set. |
| 59 | */ |
| 60 | struct skmem_bufctl_audit { |
| 61 | SLIST_ENTRY(skmem_bufctl) bc_link; /* bufctl linkage */ |
| 62 | void *bc_addr; /* buffer address */ |
| 63 | void *bc_addrm; /* mirrored buffer address */ |
| 64 | struct skmem_slab *bc_slab; /* controlling slab */ |
| 65 | uint32_t bc_flags; /* SKMEM_BUFCTL_* flags */ |
| 66 | uint32_t bc_idx; /* buffer index within slab */ |
| 67 | volatile uint32_t bc_usecnt; /* outstanding use */ |
| 68 | struct thread *bc_thread; /* thread doing transaction */ |
| 69 | uint32_t bc_timestamp; /* transaction time */ |
| 70 | uint32_t bc_depth; /* stack depth */ |
| 71 | void *bc_stack[SKMEM_STACK_DEPTH]; /* stack */ |
| 72 | }; |
| 73 | |
| 74 | /* |
| 75 | * Buffer control hash bucket. |
| 76 | */ |
| 77 | struct skmem_bufctl_bkt { |
| 78 | SLIST_HEAD(, skmem_bufctl) bcb_head; /* bufctl allocated list */ |
| 79 | }; |
| 80 | |
| 81 | /* |
| 82 | * Slab. |
| 83 | */ |
| 84 | struct skmem_slab { |
| 85 | TAILQ_ENTRY(skmem_slab) sl_link; /* slab freelist linkage */ |
| 86 | struct skmem_cache *sl_cache; /* controlling cache */ |
| 87 | void *sl_base; /* base of allocated memory */ |
| 88 | void *sl_basem; /* base of mirrored memory */ |
| 89 | struct sksegment *sl_seg; /* backing segment */ |
| 90 | struct sksegment *sl_segm; /* backing mirrored segment */ |
| 91 | SLIST_HEAD(, skmem_bufctl) sl_head; /* bufctl free list */ |
| 92 | uint32_t sl_refcnt; /* outstanding allocations */ |
| 93 | uint32_t sl_chunks; /* # of buffers in slab */ |
| 94 | }; |
| 95 | |
| 96 | #define SKMEM_SLAB_IS_PARTIAL(sl) \ |
| 97 | ((sl)->sl_refcnt > 0 && (sl)->sl_refcnt < (sl)->sl_chunks) |
| 98 | |
| 99 | #define SKMEM_SLAB_MEMBER(sl, buf) \ |
| 100 | (((size_t)(buf) - (size_t)(sl)->sl_base) < (sl)->sl_cache->skm_slabsize) |
| 101 | |
| 102 | /* |
| 103 | * Magazine type. |
| 104 | */ |
| 105 | struct skmem_magtype { |
| 106 | int mt_magsize; /* magazine size (# of objs) */ |
| 107 | int mt_align; /* magazine alignment */ |
| 108 | size_t mt_minbuf; /* all smaller bufs qualify */ |
| 109 | size_t mt_maxbuf; /* no larger bufs qualify */ |
| 110 | struct skmem_cache *mt_cache; /* magazine cache */ |
| 111 | char mt_cname[64]; /* magazine cache name */ |
| 112 | }; |
| 113 | |
| 114 | /* |
| 115 | * Magazine. |
| 116 | */ |
| 117 | struct skmem_mag { |
| 118 | SLIST_ENTRY(skmem_mag) mg_link; /* magazine linkage */ |
| 119 | struct skmem_magtype *mg_magtype; /* magazine type */ |
| 120 | void *mg_round[1]; /* one or more objs */ |
| 121 | }; |
| 122 | |
| 123 | #define SKMEM_MAG_SIZE(n) \ |
| 124 | offsetof(struct skmem_mag, mg_round[n]) |
| 125 | |
| 126 | /* |
| 127 | * Magazine depot. |
| 128 | */ |
| 129 | struct skmem_maglist { |
| 130 | SLIST_HEAD(, skmem_mag) ml_list; /* magazine list */ |
| 131 | uint32_t ml_total; /* number of magazines */ |
| 132 | uint32_t ml_min; /* min since last update */ |
| 133 | uint32_t ml_reaplimit; /* max reapable magazines */ |
| 134 | uint64_t ml_alloc; /* allocations from this list */ |
| 135 | }; |
| 136 | |
| 137 | /* |
| 138 | * Per-CPU cache structure. |
| 139 | */ |
| 140 | struct skmem_cpu_cache { |
| 141 | decl_lck_mtx_data(, cp_lock); |
| 142 | struct skmem_mag *cp_loaded; /* currently filled magazine */ |
| 143 | struct skmem_mag *cp_ploaded; /* previously filled magazine */ |
| 144 | uint64_t cp_alloc; /* allocations from this cpu */ |
| 145 | uint64_t cp_free; /* frees to this cpu */ |
| 146 | int cp_rounds; /* # of objs in filled mag */ |
| 147 | int cp_prounds; /* # of objs in previous mag */ |
| 148 | int cp_magsize; /* # of objs in a full mag */ |
| 149 | } __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX))); |
| 150 | |
| 151 | /* |
| 152 | * Object's region information. |
| 153 | * |
| 154 | * This info is provided to skmem_ctor_fn_t() to assist master and |
| 155 | * slave objects construction. It is also provided separately via |
| 156 | * skmem_cache_get_obj_info() when called on an object that's been |
| 157 | * allocated from skmem_cache. Information about slave object is |
| 158 | * available only at constructor time. |
| 159 | */ |
| 160 | struct skmem_obj_info { |
| 161 | void *oi_addr; /* object address */ |
| 162 | struct skmem_bufctl *oi_bc; /* buffer control (master) */ |
| 163 | uint32_t oi_size; /* actual object size */ |
| 164 | obj_idx_t oi_idx_reg; /* object idx within region */ |
| 165 | obj_idx_t oi_idx_seg; /* object idx within segment */ |
| 166 | } __attribute__((__packed__)); |
| 167 | |
| 168 | /* |
| 169 | * Generic one-way linked list element structure. This is used to |
| 170 | * handle skmem_cache_batch_alloc() requests in order to chain the |
| 171 | * allocated objects together before returning them to the caller. |
| 172 | * It is also used when freeing a batch of packets by the caller of |
| 173 | * skmem_cache_batch_free(). Note that this requires the region's |
| 174 | * object to be at least the size of struct skmem_obj, as we store |
| 175 | * this information at the beginning of each object in the chain. |
| 176 | */ |
| 177 | struct skmem_obj { |
| 178 | /* |
| 179 | * Given that we overlay this structure on top of whatever |
| 180 | * structure that the object represents, the constructor must |
| 181 | * ensure that it reserves at least the size of a pointer |
| 182 | * at the top for the linkage. |
| 183 | */ |
| 184 | struct skmem_obj *mo_next; /* next object in the list */ |
| 185 | /* |
| 186 | * The following are used only for raw (unconstructed) objects |
| 187 | * coming out of the slab layer during allocations. They are |
| 188 | * not touched otherwise by skmem_cache when the object resides |
| 189 | * in the magazine. By utilizing this space, we avoid having |
| 190 | * to allocate temporary storage elsewhere. |
| 191 | */ |
| 192 | struct skmem_obj_info mo_info; /* object's info */ |
| 193 | struct skmem_obj_info mo_minfo; /* mirrored object's info */ |
| 194 | }; |
| 195 | |
| 196 | #define SKMEM_OBJ_ADDR(_oi) (_oi)->oi_addr |
| 197 | #define SKMEM_OBJ_BUFCTL(_oi) (_oi)->oi_bc |
| 198 | #define SKMEM_OBJ_SIZE(_oi) (_oi)->oi_size |
| 199 | #define SKMEM_OBJ_IDX_REG(_oi) (_oi)->oi_idx_reg |
| 200 | #define SKMEM_OBJ_IDX_SEG(_oi) (_oi)->oi_idx_seg |
| 201 | /* segment the object belongs to (only for master) */ |
| 202 | #define SKMEM_OBJ_SEG(_oi) (_oi)->oi_bc->bc_slab->sl_seg |
| 203 | /* offset of object relative to the object's own region */ |
| 204 | #define SKMEM_OBJ_ROFF(_oi) \ |
| 205 | ((mach_vm_offset_t)(SKMEM_OBJ_SIZE(_oi) * SKMEM_OBJ_IDX_REG(_oi))) |
| 206 | |
| 207 | typedef int (*skmem_ctor_fn_t)(struct skmem_obj_info *, |
| 208 | struct skmem_obj_info *, void *, uint32_t); |
| 209 | typedef void (*skmem_dtor_fn_t)(void *, void *); |
| 210 | typedef void (*skmem_reclaim_fn_t)(void *); |
| 211 | typedef int (*skmem_slab_alloc_fn_t)(struct skmem_cache *, |
| 212 | struct skmem_obj_info *, struct skmem_obj_info *, uint32_t); |
| 213 | typedef void (*skmem_slab_free_fn_t)(struct skmem_cache *, void *); |
| 214 | |
| 215 | /* |
| 216 | * Cache. |
| 217 | */ |
| 218 | struct skmem_cache { |
| 219 | /* |
| 220 | * Commonly-accessed elements during alloc and free. |
| 221 | */ |
| 222 | uint32_t skm_mode; /* cache mode flags */ |
| 223 | skmem_ctor_fn_t skm_ctor; /* object constructor */ |
| 224 | skmem_dtor_fn_t skm_dtor; /* object destructor */ |
| 225 | skmem_reclaim_fn_t skm_reclaim; /* cache reclaim */ |
| 226 | void *skm_private; /* opaque arg to callbacks */ |
| 227 | |
| 228 | /* |
| 229 | * Depot. |
| 230 | */ |
| 231 | decl_lck_mtx_data(, skm_dp_lock); /* protects depot layer */ |
| 232 | struct skmem_magtype *skm_magtype; /* magazine type */ |
| 233 | struct skmem_maglist skm_full; /* full magazines */ |
| 234 | struct skmem_maglist skm_empty; /* empty magazines */ |
| 235 | |
| 236 | /* |
| 237 | * Slab. |
| 238 | */ |
| 239 | decl_lck_mtx_data(, skm_sl_lock); /* protects slab layer */ |
| 240 | skmem_slab_alloc_fn_t skm_slab_alloc; /* slab allocate */ |
| 241 | skmem_slab_free_fn_t skm_slab_free; /* slab free */ |
| 242 | size_t skm_chunksize; /* bufsize + alignment */ |
| 243 | size_t skm_objsize; /* actual obj size in slab */ |
| 244 | size_t skm_slabsize; /* size of a slab */ |
| 245 | size_t skm_hash_initial; /* initial hash table size */ |
| 246 | size_t skm_hash_limit; /* hash table size limit */ |
| 247 | size_t skm_hash_shift; /* get to interesting bits */ |
| 248 | size_t skm_hash_mask; /* hash table mask */ |
| 249 | struct skmem_bufctl_bkt *skm_hash_table; /* alloc'd buffer htable */ |
| 250 | TAILQ_HEAD(, skmem_slab) skm_sl_partial_list; /* partially-allocated */ |
| 251 | TAILQ_HEAD(, skmem_slab) skm_sl_empty_list; /* fully-allocated */ |
| 252 | struct skmem_region *skm_region; /* region source for slabs */ |
| 253 | |
| 254 | /* |
| 255 | * Statistics. |
| 256 | */ |
| 257 | uint32_t skm_cpu_mag_size; /* current magazine size */ |
| 258 | uint32_t skm_cpu_mag_resize; /* # of magazine resizes */ |
| 259 | uint32_t skm_cpu_mag_purge; /* # of magazine purges */ |
| 260 | uint32_t skm_cpu_mag_reap; /* # of magazine reaps */ |
| 261 | uint64_t skm_depot_contention; /* mutex contention count */ |
| 262 | uint64_t skm_depot_contention_prev; /* previous snapshot */ |
| 263 | uint32_t skm_depot_full; /* # of full magazines */ |
| 264 | uint32_t skm_depot_empty; /* # of empty magazines */ |
| 265 | uint32_t skm_depot_ws_zero; /* # of working set flushes */ |
| 266 | uint32_t skm_sl_rescale; /* # of hash table rescales */ |
| 267 | uint32_t skm_sl_create; /* slab creates */ |
| 268 | uint32_t skm_sl_destroy; /* slab destroys */ |
| 269 | uint32_t skm_sl_alloc; /* slab layer allocations */ |
| 270 | uint32_t skm_sl_free; /* slab layer frees */ |
| 271 | uint32_t skm_sl_partial; /* # of partial slabs */ |
| 272 | uint32_t skm_sl_empty; /* # of empty slabs */ |
| 273 | uint64_t skm_sl_alloc_fail; /* total failed allocations */ |
| 274 | uint64_t skm_sl_bufinuse; /* total unfreed buffers */ |
| 275 | uint64_t skm_sl_bufmax; /* max buffers ever */ |
| 276 | |
| 277 | /* |
| 278 | * Cache properties. |
| 279 | */ |
| 280 | TAILQ_ENTRY(skmem_cache) skm_link; /* cache linkage */ |
| 281 | char skm_name[64]; /* cache name */ |
| 282 | uuid_t skm_uuid; /* cache uuid */ |
| 283 | size_t skm_bufsize; /* buffer size */ |
| 284 | size_t skm_bufalign; /* buffer alignment */ |
| 285 | size_t skm_objalign; /* object alignment */ |
| 286 | |
| 287 | /* |
| 288 | * CPU layer, aligned at (maximum) cache line boundary. |
| 289 | */ |
| 290 | decl_lck_mtx_data(, skm_rs_lock); /* protects resizing */ |
| 291 | struct thread *skm_rs_owner; /* resize owner */ |
| 292 | uint32_t skm_rs_busy; /* prevent resizing */ |
| 293 | uint32_t skm_rs_want; /* # of threads blocked */ |
| 294 | struct skmem_cpu_cache skm_cpu_cache[1] |
| 295 | __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX))); |
| 296 | }; |
| 297 | |
| 298 | #define SKMEM_CACHE_SIZE(n) \ |
| 299 | offsetof(struct skmem_cache, skm_cpu_cache[n]) |
| 300 | |
| 301 | #define SKMEM_CPU_CACHE(c) \ |
| 302 | ((struct skmem_cpu_cache *)((void *)((char *)(c) + \ |
| 303 | SKMEM_CACHE_SIZE(cpu_number())))) |
| 304 | |
| 305 | /* valid values for skm_mode, set only by skmem_cache_create() */ |
| 306 | #define SKM_MODE_NOMAGAZINES 0x00000001 /* disable magazines layer */ |
| 307 | #define SKM_MODE_AUDIT 0x00000002 /* audit transactions */ |
| 308 | #define SKM_MODE_NOREDIRECT 0x00000004 /* unaffected by defunct */ |
| 309 | #define SKM_MODE_BATCH 0x00000008 /* supports batch alloc/free */ |
| 310 | #define SKM_MODE_DYNAMIC 0x00000010 /* enable magazine resizing */ |
| 311 | #define SKM_MODE_CLEARONFREE 0x00000020 /* zero-out upon slab free */ |
| 312 | #define SKM_MODE_PSEUDO 0x00000040 /* external backing store */ |
| 313 | #define SKM_MODE_RECLAIM 0x00000080 /* aggressive memory reclaim */ |
| 314 | |
| 315 | #define SKM_MODE_BITS \ |
| 316 | "\020\01NOMAGAZINES\02AUDIT\03NOREDIRECT\04BATCH\05DYNAMIC" \ |
| 317 | "\06CLEARONFREE\07PSEUDO\10RECLAIM" |
| 318 | |
| 319 | /* |
| 320 | * Valid flags for sk{mem,region}_alloc(). SKMEM_FAILOK is valid only if |
| 321 | * SKMEM_SLEEP is set, i.e. SKMEM_{NOSLEEP,FAILOK} are mutually exclusive. |
| 322 | * If set, SKMEM_FAILOK indicates that the segment allocation may fail, |
| 323 | * and that the cache layer would handle the retries rather than blocking |
| 324 | * inside the region allocator. |
| 325 | */ |
| 326 | #define SKMEM_SLEEP 0x0 /* can block for memory; won't fail */ |
| 327 | #define SKMEM_NOSLEEP 0x1 /* cannot block for memory; may fail */ |
| 328 | #define SKMEM_PANIC 0x2 /* panic upon allocation failure */ |
| 329 | #define SKMEM_FAILOK 0x4 /* can fail for blocking alloc */ |
| 330 | |
| 331 | /* valid flag values for skmem_cache_create() */ |
| 332 | #define SKMEM_CR_NOMAGAZINES 0x1 /* disable magazines layer */ |
| 333 | #define SKMEM_CR_BATCH 0x2 /* support batch alloc/free */ |
| 334 | #define SKMEM_CR_DYNAMIC 0x4 /* enable magazine resizing */ |
| 335 | #define SKMEM_CR_CLEARONFREE 0x8 /* zero-out upon slab free */ |
| 336 | #define SKMEM_CR_RECLAIM 0x10 /* aggressive memory reclaim */ |
| 337 | |
| 338 | __BEGIN_DECLS |
| 339 | /* |
| 340 | * Given a buffer control, add a use count to it. |
| 341 | */ |
| 342 | __attribute__((always_inline)) |
| 343 | static inline void |
| 344 | skmem_bufctl_use(struct skmem_bufctl *bc) |
| 345 | { |
| 346 | uint32_t old, new; |
| 347 | |
| 348 | os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, { |
| 349 | new = old + 1; |
| 350 | VERIFY(new != 0); |
| 351 | ASSERT(new == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK)); |
| 352 | }); |
| 353 | } |
| 354 | |
| 355 | /* |
| 356 | * Given a buffer control, remove a use count from it (returns new value). |
| 357 | */ |
| 358 | __attribute__((always_inline)) |
| 359 | static inline uint32_t |
| 360 | skmem_bufctl_unuse(struct skmem_bufctl *bc) |
| 361 | { |
| 362 | uint32_t old, new; |
| 363 | |
| 364 | os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, { |
| 365 | new = old - 1; |
| 366 | VERIFY(old != 0); |
| 367 | ASSERT(old == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK)); |
| 368 | }); |
| 369 | |
| 370 | return new; |
| 371 | } |
| 372 | |
| 373 | extern void skmem_cache_pre_init(void); |
| 374 | extern void skmem_cache_init(void); |
| 375 | extern void skmem_cache_fini(void); |
| 376 | extern struct skmem_cache *skmem_cache_create(const char *, size_t, size_t, |
| 377 | skmem_ctor_fn_t, skmem_dtor_fn_t, skmem_reclaim_fn_t, void *, |
| 378 | struct skmem_region *, uint32_t); |
| 379 | extern void skmem_cache_destroy(struct skmem_cache *); |
| 380 | extern void *skmem_cache_alloc(struct skmem_cache *, uint32_t); |
| 381 | extern uint32_t skmem_cache_batch_alloc(struct skmem_cache *, |
| 382 | struct skmem_obj **list, uint32_t, uint32_t); |
| 383 | extern void skmem_cache_free(struct skmem_cache *, void *); |
| 384 | extern void skmem_cache_batch_free(struct skmem_cache *, struct skmem_obj *); |
| 385 | extern void skmem_cache_reap_now(struct skmem_cache *, boolean_t); |
| 386 | extern void skmem_cache_reap(void); |
| 387 | extern void skmem_reap_caches(boolean_t); |
| 388 | extern void skmem_cache_get_obj_info(struct skmem_cache *, void *, |
| 389 | struct skmem_obj_info *, struct skmem_obj_info *); |
| 390 | extern uint32_t skmem_cache_magazine_max(uint32_t); |
| 391 | extern boolean_t skmem_allow_magazines(void); |
| 392 | #if (DEVELOPMENT || DEBUG) |
| 393 | extern void skmem_cache_test_start(uint32_t); |
| 394 | extern void skmem_cache_test_stop(void); |
| 395 | #endif /* (DEVELOPMENT || DEBUG) */ |
| 396 | __END_DECLS |
| 397 | #endif /* BSD_KERNEL_PRIVATE */ |
| 398 | #endif /* _SKYWALK_MEM_SKMEMCACHEVAR_H */ |
| 399 | |