| 1 | /* |
| 2 | * Copyright (c) 2016-2022 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #ifndef _SKYWALK_MEM_SKMEMREGIONVAR_H |
| 30 | #define _SKYWALK_MEM_SKMEMREGIONVAR_H |
| 31 | |
| 32 | #ifdef BSD_KERNEL_PRIVATE |
| 33 | #include <skywalk/core/skywalk_var.h> |
| 34 | #include <skywalk/os_nexus_private.h> |
| 35 | |
| 36 | /* |
| 37 | * Segment types. |
| 38 | */ |
| 39 | typedef enum { |
| 40 | SKSEG_TYPE_INVALID = 0, |
| 41 | SKSEG_TYPE_ALLOC, /* segment is in skr_hash_table */ |
| 42 | SKSEG_TYPE_FREE, /* segment is in skr_segfree */ |
| 43 | SKSEG_TYPE_DESTROYED /* in process of being destroyed */ |
| 44 | } sksegment_type_t; |
| 45 | |
| 46 | /* |
| 47 | * Segment memory states. |
| 48 | */ |
| 49 | typedef enum { |
| 50 | SKSEG_STATE_INVALID = 0, |
| 51 | SKSEG_STATE_DETACHED, /* not backed by a IOBMD */ |
| 52 | SKSEG_STATE_MAPPED, /* mapped (IOBMD non-volatile) */ |
| 53 | SKSEG_STATE_MAPPED_WIRED, /* mapped (IOBMD non-volatile+wired) */ |
| 54 | } sksegment_state_t; |
| 55 | |
| 56 | struct skmem_region; |
| 57 | |
| 58 | /* |
| 59 | * Segment. |
| 60 | * |
| 61 | * Segments that are available for use can be found in the doubly-linked |
| 62 | * list (skr_seg_free) as well as the red-black tree (skr_seg_tfree). |
| 63 | * The latter is used to faciliate finding a segment by its index, which |
| 64 | * is required when allocating a segment from a mirrored region. |
| 65 | * |
| 66 | * Allocated segments are inserted into the allocated-address hash chain; |
| 67 | * they don't exist in any tree at that point. |
| 68 | */ |
| 69 | struct sksegment { |
| 70 | TAILQ_ENTRY(sksegment) sg_link; /* sksegment linkage */ |
| 71 | RB_ENTRY(sksegment) sg_node; /* sksegment node in tree */ |
| 72 | struct skmem_region *sg_region; /* controlling region */ |
| 73 | |
| 74 | /* |
| 75 | * If attached to a IOBMD, sg_{start,end} will be valid. |
| 76 | */ |
| 77 | IOSKMemoryBufferRef sg_md; /* backing IOBMD */ |
| 78 | mach_vm_address_t sg_start; /* start address (inclusive) */ |
| 79 | mach_vm_address_t sg_end; /* end address (exclusive) */ |
| 80 | |
| 81 | uint32_t sg_index; /* index in skr_seg[] */ |
| 82 | sksegment_type_t sg_type; /* segment type */ |
| 83 | sksegment_state_t sg_state; /* segment state */ |
| 84 | }; |
| 85 | |
| 86 | #define SKSEGMENT_IN_FREELIST(_sg) \ |
| 87 | ((_sg)->sg_link.tqe_next != NULL || \ |
| 88 | (_sg)->sg_link.tqe_prev != NULL) |
| 89 | |
| 90 | /* |
| 91 | * Segment hash bucket. |
| 92 | */ |
| 93 | struct sksegment_bkt { |
| 94 | TAILQ_HEAD(, sksegment) sgb_head; /* sksegment allocated list */ |
| 95 | }; |
| 96 | |
| 97 | /* |
| 98 | * Region IDs. |
| 99 | * |
| 100 | * When adding or removing regions, adjust the templates in skmem.c |
| 101 | * accordingly. Do not reorder regions without making the appropriate |
| 102 | * changes in the code that relies on the existing arena layout. |
| 103 | */ |
| 104 | typedef enum { |
| 105 | /* |
| 106 | * The following are user task mappable. |
| 107 | * |
| 108 | * XXX: When adding new ones, ensure that they get added before |
| 109 | * SKMEM_REGION_GUARD_TAIL, and make the appropriate changes in |
| 110 | * skmem_region_init(). |
| 111 | */ |
| 112 | SKMEM_REGION_GUARD_HEAD = 0, /* leading guard page(s) */ |
| 113 | SKMEM_REGION_SCHEMA, /* channel layout */ |
| 114 | SKMEM_REGION_RING, /* rings */ |
| 115 | SKMEM_REGION_BUF_DEF, /* Default rx/tx buffer */ |
| 116 | SKMEM_REGION_BUF_LARGE, /* Large rx/tx buffer */ |
| 117 | SKMEM_REGION_RXBUF_DEF, /* Default rx only buffers */ |
| 118 | SKMEM_REGION_RXBUF_LARGE, /* Large rx only buffers */ |
| 119 | SKMEM_REGION_TXBUF_DEF, /* Default tx only buffers */ |
| 120 | SKMEM_REGION_TXBUF_LARGE, /* Large tx only buffers */ |
| 121 | SKMEM_REGION_UMD, /* userland metadata */ |
| 122 | SKMEM_REGION_TXAUSD, /* tx/alloc/event user slot descriptors */ |
| 123 | SKMEM_REGION_RXFUSD, /* rx/free user slot descriptors */ |
| 124 | SKMEM_REGION_UBFT, /* userland buflet metadata */ |
| 125 | SKMEM_REGION_USTATS, /* statistics */ |
| 126 | SKMEM_REGION_FLOWADV, /* flow advisories */ |
| 127 | SKMEM_REGION_NEXUSADV, /* nexus advisories */ |
| 128 | SKMEM_REGION_SYSCTLS, /* sysctl */ |
| 129 | SKMEM_REGION_GUARD_TAIL, /* trailing guard page(s) */ |
| 130 | |
| 131 | /* |
| 132 | * The following are NOT user task mappable. |
| 133 | */ |
| 134 | SKMEM_REGION_KMD, /* rx/tx kernel metadata */ |
| 135 | SKMEM_REGION_RXKMD, /* rx only kernel metadata */ |
| 136 | SKMEM_REGION_TXKMD, /* tx only kernel metadata */ |
| 137 | SKMEM_REGION_KBFT, /* rx/tx kernel buflet metadata */ |
| 138 | SKMEM_REGION_RXKBFT, /* rx only kernel buflet metadata */ |
| 139 | SKMEM_REGION_TXKBFT, /* tx only kernel buflet metadata */ |
| 140 | SKMEM_REGION_TXAKSD, /* tx/alloc/event kernel slot descriptors */ |
| 141 | SKMEM_REGION_RXFKSD, /* rx/free kernel slot descriptors */ |
| 142 | SKMEM_REGION_KSTATS, /* kernel statistics snapshot */ |
| 143 | SKMEM_REGION_INTRINSIC, /* intrinsic objects */ |
| 144 | |
| 145 | SKMEM_REGIONS /* max */ |
| 146 | } skmem_region_id_t; |
| 147 | |
| 148 | #define SKMEM_PP_REGIONS 14 |
| 149 | extern const skmem_region_id_t skmem_pp_region_ids[SKMEM_PP_REGIONS]; |
| 150 | |
| 151 | /* |
| 152 | * Region parameters structure. Based on requested object parameters, |
| 153 | * skmem_region_params_config() will compute the segment parameters as |
| 154 | * well as the configured object parameters. |
| 155 | */ |
| 156 | struct skmem_region_params { |
| 157 | /* |
| 158 | * Region parameters. |
| 159 | */ |
| 160 | const char *srp_name; /* (i) region name */ |
| 161 | skmem_region_id_t srp_id; /* (i) region identifier */ |
| 162 | uint32_t srp_cflags; /* (i) region creation flags */ |
| 163 | uint32_t srp_r_seg_size; /* (i) requested seg size */ |
| 164 | uint32_t srp_c_seg_size; /* (o) configured seg size */ |
| 165 | uint32_t srp_seg_cnt; /* (o) number of segments */ |
| 166 | |
| 167 | /* |
| 168 | * Object parameters. |
| 169 | */ |
| 170 | uint32_t srp_r_obj_size; /* (i) requested obj size */ |
| 171 | uint32_t srp_r_obj_cnt; /* (i) requested obj count */ |
| 172 | uint32_t srp_c_obj_size; /* (o) configured obj size */ |
| 173 | uint32_t srp_c_obj_cnt; /* (o) configured obj count */ |
| 174 | size_t srp_align; /* (i) object alignment */ |
| 175 | |
| 176 | /* |
| 177 | * SKMEM_REGION_{UMD,KMD} specific parameters. |
| 178 | */ |
| 179 | nexus_meta_type_t srp_md_type; /* (i) metadata type */ |
| 180 | nexus_meta_subtype_t srp_md_subtype; /* (i) metadata subtype */ |
| 181 | uint16_t srp_max_frags; /* (i) max frags per packet */ |
| 182 | }; |
| 183 | |
| 184 | typedef void (*sksegment_ctor_fn_t)(struct sksegment *, |
| 185 | IOSKMemoryBufferRef, void *); |
| 186 | typedef void (*sksegment_dtor_fn_t)(struct sksegment *, |
| 187 | IOSKMemoryBufferRef, void *); |
| 188 | |
| 189 | /* |
| 190 | * Region. |
| 191 | */ |
| 192 | #define SKR_MAX_CACHES 2 /* max # of caches allowed on a region */ |
| 193 | |
| 194 | struct skmem_region { |
| 195 | decl_lck_mtx_data(, skr_lock); /* region lock */ |
| 196 | |
| 197 | /* |
| 198 | * Statistics. |
| 199 | */ |
| 200 | uint64_t skr_meminuse; /* memory in use */ |
| 201 | uint64_t skr_w_meminuse; /* wired memory in use */ |
| 202 | uint64_t skr_memtotal; /* total memory in region */ |
| 203 | uint64_t skr_alloc; /* number of allocations */ |
| 204 | uint64_t skr_free; /* number of frees */ |
| 205 | uint32_t skr_seginuse; /* total unfreed segments */ |
| 206 | uint32_t skr_rescale; /* # of hash table rescales */ |
| 207 | |
| 208 | /* |
| 209 | * Region properties. |
| 210 | */ |
| 211 | struct skmem_region_params skr_params; /* region parameters */ |
| 212 | #define skr_id skr_params.srp_id /* region ID */ |
| 213 | #define skr_cflags skr_params.srp_cflags /* creation flags */ |
| 214 | TAILQ_ENTRY(skmem_region) skr_link; /* skmem_region linkage */ |
| 215 | char skr_name[64]; /* region name */ |
| 216 | uuid_t skr_uuid; /* region uuid */ |
| 217 | uint32_t skr_mode; /* skmem_region mode flags */ |
| 218 | uint32_t skr_size; /* total region size */ |
| 219 | IOSKMemoryBufferSpec skr_bufspec; /* IOSKMemoryBuffer spec */ |
| 220 | IOSKRegionSpec skr_regspec; /* IOSKRegion spec */ |
| 221 | IOSKRegionRef skr_reg; /* backing IOSKRegion */ |
| 222 | struct zone *skr_zreg; /* backing zone (pseudo mode) */ |
| 223 | void *skr_private; /* opaque arg to callbacks */ |
| 224 | struct skmem_cache *skr_cache[SKR_MAX_CACHES]; /* client slab/cache layer */ |
| 225 | |
| 226 | /* |
| 227 | * Objects. |
| 228 | */ |
| 229 | #define skr_r_obj_size skr_params.srp_r_obj_size /* requested obj size */ |
| 230 | #define skr_r_obj_cnt skr_params.srp_r_obj_cnt /* requested obj count */ |
| 231 | #define skr_c_obj_size skr_params.srp_c_obj_size /* configured obj size */ |
| 232 | #define skr_c_obj_cnt skr_params.srp_c_obj_cnt /* configured obj count */ |
| 233 | #define skr_align skr_params.srp_align /* object alignment */ |
| 234 | #define skr_md_type skr_params.srp_md_type /* metadata type */ |
| 235 | #define skr_md_subtype skr_params.srp_md_subtype /* metadata subtype */ |
| 236 | #define skr_max_frags skr_params.srp_max_frags /* max number of buflets */ |
| 237 | |
| 238 | /* |
| 239 | * Segment. |
| 240 | */ |
| 241 | sksegment_ctor_fn_t skr_seg_ctor; /* segment constructor */ |
| 242 | sksegment_dtor_fn_t skr_seg_dtor; /* segment destructor */ |
| 243 | uint32_t skr_seg_objs; /* # of objects per segment */ |
| 244 | #define skr_seg_size skr_params.srp_c_seg_size /* configured segment size */ |
| 245 | #define skr_seg_max_cnt skr_params.srp_seg_cnt /* max # of segments */ |
| 246 | uint32_t skr_seg_bmap_len; /* # of skr_seg_bmap */ |
| 247 | bitmap_t *skr_seg_bmap; /* segment bitmaps */ |
| 248 | uint32_t skr_seg_free_cnt; /* # of free segments */ |
| 249 | uint32_t skr_hash_initial; /* initial hash table size */ |
| 250 | uint32_t skr_hash_limit; /* hash table size limit */ |
| 251 | uint32_t skr_hash_shift; /* get to interesting bits */ |
| 252 | uint32_t skr_hash_mask; /* hash table mask */ |
| 253 | struct sksegment_bkt *skr_hash_table; /* alloc'd segment htable */ |
| 254 | TAILQ_HEAD(segfreehead, sksegment) skr_seg_free; /* free segment list */ |
| 255 | RB_HEAD(segtfreehead, sksegment) skr_seg_tfree; /* free tree */ |
| 256 | uint32_t skr_seg_waiters; /* # of waiter threads */ |
| 257 | |
| 258 | /* |
| 259 | * Region. |
| 260 | */ |
| 261 | uint32_t skr_refcnt; /* reference count */ |
| 262 | |
| 263 | /* |
| 264 | * Mirror. |
| 265 | */ |
| 266 | struct skmem_region *skr_mirror; |
| 267 | }; |
| 268 | |
| 269 | #define SKR_LOCK(_skr) \ |
| 270 | lck_mtx_lock(&(_skr)->skr_lock) |
| 271 | #define SKR_LOCK_ASSERT_HELD(_skr) \ |
| 272 | LCK_MTX_ASSERT(&(_skr)->skr_lock, LCK_MTX_ASSERT_OWNED) |
| 273 | #define SKR_LOCK_ASSERT_NOTHELD(_skr) \ |
| 274 | LCK_MTX_ASSERT(&(_skr)->skr_lock, LCK_MTX_ASSERT_NOTOWNED) |
| 275 | #define SKR_UNLOCK(_skr) \ |
| 276 | lck_mtx_unlock(&(_skr)->skr_lock) |
| 277 | |
| 278 | /* valid values for skr_mode */ |
| 279 | #define SKR_MODE_NOREDIRECT 0x1 /* unaffect by defunct */ |
| 280 | #define SKR_MODE_MMAPOK 0x2 /* can be mapped to user task */ |
| 281 | #define SKR_MODE_KREADONLY 0x4 /* kernel read only */ |
| 282 | #define SKR_MODE_UREADONLY 0x8 /* if user map, map it read-only */ |
| 283 | #define SKR_MODE_PERSISTENT 0x10 /* memory stays non-volatile */ |
| 284 | #define SKR_MODE_MONOLITHIC 0x20 /* monolithic region */ |
| 285 | #define SKR_MODE_NOMAGAZINES 0x40 /* disable magazines layer */ |
| 286 | #define SKR_MODE_NOCACHE 0x80 /* caching-inhibited */ |
| 287 | #define SKR_MODE_SEGPHYSCONTIG 0x100 /* phys. contiguous segment */ |
| 288 | #define SKR_MODE_SHAREOK 0x200 /* allow object sharing */ |
| 289 | #define SKR_MODE_IODIR_IN 0x400 /* I/O direction In */ |
| 290 | #define SKR_MODE_IODIR_OUT 0x800 /* I/O direction Out */ |
| 291 | #define SKR_MODE_GUARD 0x1000 /* guard pages region */ |
| 292 | #define SKR_MODE_PUREDATA 0x2000 /* purely data; no pointers */ |
| 293 | #define SKR_MODE_PSEUDO 0x4000 /* external backing store */ |
| 294 | #define SKR_MODE_THREADSAFE 0x8000 /* thread safe */ |
| 295 | #define SKR_MODE_MEMTAG 0x10000 /* enable memory tagging in this region */ |
| 296 | #define SKR_MODE_SLAB (1U << 30) /* backend for slab layer */ |
| 297 | #define SKR_MODE_MIRRORED (1U << 31) /* controlled by another region */ |
| 298 | |
| 299 | #define SKR_MODE_BITS \ |
| 300 | "\020\01NOREDIRECT\02MMAPOK\03KREADONLY\04UREADONLY" \ |
| 301 | "\05PERSISTENT\06MONOLITHIC\07NOMAGAZINES\10NOCACHE" \ |
| 302 | "\11SEGPHYSCONTIG\012SHAREOK\013IODIR_IN\014IODIR_OUT" \ |
| 303 | "\015GUARD\016PUREDATA\017PSEUDO\020THREADSAFE\021MEMTAG\037SLAB" \ |
| 304 | "\040MIRRORED" |
| 305 | |
| 306 | /* valid values for skmem_region_create() */ |
| 307 | #define SKMEM_REGION_CR_NOREDIRECT 0x1 /* unaffected by defunct */ |
| 308 | #define SKMEM_REGION_CR_MMAPOK 0x2 /* can be mapped to user task */ |
| 309 | #define SKMEM_REGION_CR_KREADONLY 0x4 /* kernel space readonly */ |
| 310 | #define SKMEM_REGION_CR_UREADONLY 0x8 /* if user map, map it RO */ |
| 311 | #define SKMEM_REGION_CR_PERSISTENT 0x10 /* memory stays non-volatile */ |
| 312 | #define SKMEM_REGION_CR_MONOLITHIC 0x20 /* monolithic region */ |
| 313 | #define SKMEM_REGION_CR_NOMAGAZINES 0x40 /* disable magazines layer */ |
| 314 | #define SKMEM_REGION_CR_NOCACHE 0x80 /* caching-inhibited */ |
| 315 | #define SKMEM_REGION_CR_SEGPHYSCONTIG 0x100 /* phys. contiguous segment */ |
| 316 | #define SKMEM_REGION_CR_SHAREOK 0x200 /* allow object sharing */ |
| 317 | #define SKMEM_REGION_CR_IODIR_IN 0x400 /* I/O direction in */ |
| 318 | #define SKMEM_REGION_CR_IODIR_OUT 0x800 /* I/O direction out */ |
| 319 | #define SKMEM_REGION_CR_GUARD 0x1000 /* guard pages region */ |
| 320 | #define SKMEM_REGION_CR_PUREDATA 0x2000 /* purely data; no pointers */ |
| 321 | #define SKMEM_REGION_CR_PSEUDO 0x4000 /* external backing store */ |
| 322 | #define SKMEM_REGION_CR_THREADSAFE 0x8000 /* thread safe */ |
| 323 | #define SKMEM_REGION_CR_MEMTAG 0x10000 /* enable memory tagging in this region */ |
| 324 | |
| 325 | #define SKMEM_REGION_CR_BITS \ |
| 326 | "\021\01NOREDIRECT\02MMAPOK\03KREADONLY\04UREADONLY" \ |
| 327 | "\05PERSISTENT\06MONOLITHIC\07NOMAGAZINES\10NOCACHE" \ |
| 328 | "\11SEGPHYSCONTIG\012SHAREOK\013IODIR_IN\014IODIR_OUT" \ |
| 329 | "\015GUARD\016PUREDATA\017PSEUDO\020THREADSAFE\021MEMTAG" |
| 330 | |
| 331 | __BEGIN_DECLS |
| 332 | extern void skmem_region_init(void); |
| 333 | extern void skmem_region_fini(void); |
| 334 | extern void skmem_region_reap_caches(boolean_t); |
| 335 | extern void skmem_region_params_config(struct skmem_region_params *); |
| 336 | extern struct skmem_region *skmem_region_create(const char *, |
| 337 | struct skmem_region_params *, sksegment_ctor_fn_t, sksegment_dtor_fn_t, |
| 338 | void *); |
| 339 | extern void skmem_region_mirror(struct skmem_region *, struct skmem_region *); |
| 340 | extern void skmem_region_slab_config(struct skmem_region *, |
| 341 | struct skmem_cache *, bool); |
| 342 | extern void *skmem_region_alloc(struct skmem_region *, void **, |
| 343 | struct sksegment **, struct sksegment **, uint32_t); |
| 344 | extern void skmem_region_free(struct skmem_region *, void *, void *); |
| 345 | extern void skmem_region_retain(struct skmem_region *); |
| 346 | extern boolean_t skmem_region_release(struct skmem_region *); |
| 347 | extern mach_vm_address_t skmem_region_obj_lookup(struct skmem_region *, |
| 348 | uint32_t); |
| 349 | extern int skmem_region_get_info(struct skmem_region *, uint32_t *, |
| 350 | struct sksegment **); |
| 351 | extern boolean_t skmem_region_for_pp(skmem_region_id_t); |
| 352 | extern void skmem_region_get_stats(struct skmem_region *, |
| 353 | struct sk_stats_region *); |
| 354 | #if (DEVELOPMENT || DEBUG) |
| 355 | extern uint64_t skmem_region_get_mtbf(void); |
| 356 | /* |
| 357 | * Reasonable boundaries for MTBF that would make sense for testing, |
| 358 | * in milliseconds; why not pick a couple of Mersenne p numbers? |
| 359 | */ |
| 360 | #define SKMEM_REGION_MTBF_MIN 2 /* almost 2 msec */ |
| 361 | #define SKMEM_REGION_MTBF_MAX 3021377 /* almost 1 hour */ |
| 362 | extern void skmem_region_set_mtbf(uint64_t); |
| 363 | #endif /* (DEVELOPMENT || DEBUG) */ |
| 364 | #if SK_LOG |
| 365 | extern const char *skmem_region_id2name(skmem_region_id_t); |
| 366 | #endif /* SK_LOG */ |
| 367 | __END_DECLS |
| 368 | #endif /* BSD_KERNEL_PRIVATE */ |
| 369 | #endif /* _SKYWALK_MEM_SKMEMVAR_H */ |
| 370 | |