| 1 | /* |
| 2 | * Copyright (c) 2000-2014 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | /* |
| 29 | * Copyright (c) 1992,7 NeXT Computer, Inc. |
| 30 | * |
| 31 | * Unix data structure initialization. |
| 32 | * |
| 33 | */ |
| 34 | |
| 35 | #include <mach/mach_types.h> |
| 36 | |
| 37 | #include <kern/startup.h> |
| 38 | #include <vm/vm_kern.h> |
| 39 | #include <mach/vm_prot.h> |
| 40 | |
| 41 | #include <sys/param.h> |
| 42 | #include <sys/buf_internal.h> |
| 43 | #include <sys/file_internal.h> |
| 44 | #include <sys/proc_internal.h> |
| 45 | #include <sys/mcache.h> |
| 46 | #include <sys/mbuf.h> |
| 47 | #include <sys/systm.h> |
| 48 | #include <sys/tty.h> |
| 49 | #include <sys/vnode.h> |
| 50 | #include <sys/sysctl.h> |
| 51 | #include <machine/cons.h> |
| 52 | #include <pexpert/pexpert.h> |
| 53 | #include <sys/socketvar.h> |
| 54 | #include <pexpert/pexpert.h> |
| 55 | #include <netinet/tcp_var.h> |
| 56 | |
| 57 | extern uint32_t kern_maxvnodes; |
| 58 | #if CONFIG_MBUF_MCACHE |
| 59 | extern vm_map_t mb_map; |
| 60 | #endif /* CONFIG_MBUF_MCACHE */ |
| 61 | |
| 62 | #if INET |
| 63 | extern uint32_t tcp_sendspace; |
| 64 | extern uint32_t tcp_recvspace; |
| 65 | #endif |
| 66 | |
| 67 | void bsd_bufferinit(void); |
| 68 | |
| 69 | unsigned int bsd_mbuf_cluster_reserve(boolean_t *); |
| 70 | void bsd_scale_setup(int); |
| 71 | void bsd_exec_setup(int); |
| 72 | |
| 73 | /* |
| 74 | * Declare these as initialized data so we can patch them. |
| 75 | */ |
| 76 | |
| 77 | #ifdef NBUF |
| 78 | int max_nbuf_headers = NBUF; |
| 79 | int niobuf_headers = (NBUF / 2) + 2048; |
| 80 | int nbuf_hashelements = NBUF; |
| 81 | int nbuf_headers = NBUF; |
| 82 | #else |
| 83 | int = 0; |
| 84 | int = 0; |
| 85 | int nbuf_hashelements = 0; |
| 86 | int = 0; |
| 87 | #endif |
| 88 | |
| 89 | SYSCTL_INT(_kern, OID_AUTO, nbuf, CTLFLAG_RD | CTLFLAG_LOCKED, &nbuf_headers, 0, "" ); |
| 90 | SYSCTL_INT(_kern, OID_AUTO, maxnbuf, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_KERN, &max_nbuf_headers, 0, "" ); |
| 91 | |
| 92 | __private_extern__ int customnbuf = 0; |
| 93 | |
| 94 | /* Indicates a server boot when set */ |
| 95 | TUNABLE(int, serverperfmode, "serverperfmode" , 0); |
| 96 | |
| 97 | #if SOCKETS |
| 98 | static unsigned int mbuf_poolsz; |
| 99 | #endif |
| 100 | |
| 101 | vm_map_t buffer_map; |
| 102 | vm_map_t bufferhdr_map; |
| 103 | static int vnodes_sized = 0; |
| 104 | |
| 105 | extern void bsd_startupearly(void); |
| 106 | |
| 107 | static vm_map_size_t bufferhdr_map_size; |
| 108 | SECURITY_READ_ONLY_LATE(struct mach_vm_range) bufferhdr_range = {}; |
| 109 | |
| 110 | static vm_map_size_t |
| 111 | bsd_get_bufferhdr_map_size(void) |
| 112 | { |
| 113 | vm_size_t size; |
| 114 | |
| 115 | /* clip the number of buf headers upto 16k */ |
| 116 | if (max_nbuf_headers == 0) { |
| 117 | max_nbuf_headers = (int)atop_kernel(sane_size / 50); /* Get 2% of ram, but no more than we can map */ |
| 118 | } |
| 119 | if ((customnbuf == 0) && ((unsigned int)max_nbuf_headers > 16384)) { |
| 120 | max_nbuf_headers = 16384; |
| 121 | } |
| 122 | if (max_nbuf_headers < CONFIG_MIN_NBUF) { |
| 123 | max_nbuf_headers = CONFIG_MIN_NBUF; |
| 124 | } |
| 125 | |
| 126 | if (niobuf_headers == 0) { |
| 127 | if (max_nbuf_headers < 4096) { |
| 128 | niobuf_headers = max_nbuf_headers; |
| 129 | } else { |
| 130 | niobuf_headers = (max_nbuf_headers / 2) + 2048; |
| 131 | } |
| 132 | } |
| 133 | if (niobuf_headers < CONFIG_MIN_NIOBUF) { |
| 134 | niobuf_headers = CONFIG_MIN_NIOBUF; |
| 135 | } |
| 136 | |
| 137 | size = (max_nbuf_headers + niobuf_headers) * sizeof(struct buf); |
| 138 | size = round_page(x: size); |
| 139 | |
| 140 | return size; |
| 141 | } |
| 142 | |
| 143 | KMEM_RANGE_REGISTER_DYNAMIC(bufferhdr, &bufferhdr_range, ^() { |
| 144 | return bufferhdr_map_size = bsd_get_bufferhdr_map_size(); |
| 145 | }); |
| 146 | |
| 147 | void |
| 148 | bsd_startupearly(void) |
| 149 | { |
| 150 | vm_size_t size = bufferhdr_map_size; |
| 151 | |
| 152 | assert(size); |
| 153 | |
| 154 | /* clip the number of hash elements to 200000 */ |
| 155 | if ((customnbuf == 0) && nbuf_hashelements == 0) { |
| 156 | nbuf_hashelements = (int)atop_kernel(sane_size / 50); |
| 157 | if ((unsigned int)nbuf_hashelements > 200000) { |
| 158 | nbuf_hashelements = 200000; |
| 159 | } |
| 160 | } else { |
| 161 | nbuf_hashelements = max_nbuf_headers; |
| 162 | } |
| 163 | |
| 164 | bufferhdr_map = kmem_suballoc(parent: kernel_map, |
| 165 | addr: &bufferhdr_range.min_address, |
| 166 | size, |
| 167 | vmc_options: VM_MAP_CREATE_NEVER_FAULTS, |
| 168 | VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, |
| 169 | flags: KMS_PERMANENT | KMS_NOFAIL, |
| 170 | VM_KERN_MEMORY_FILE).kmr_submap; |
| 171 | |
| 172 | kmem_alloc(map: bufferhdr_map, |
| 173 | addrp: &(vm_offset_t){ bufferhdr_range.min_address }, |
| 174 | size, |
| 175 | flags: KMA_NOFAIL | KMA_PERMANENT | KMA_ZERO | KMA_KOBJECT, |
| 176 | VM_KERN_MEMORY_FILE); |
| 177 | |
| 178 | buf_headers = (struct buf *)bufferhdr_range.min_address; |
| 179 | |
| 180 | #if SOCKETS |
| 181 | { |
| 182 | static const unsigned int maxspace = 128 * 1024; |
| 183 | int scale; |
| 184 | |
| 185 | #if INET |
| 186 | if ((scale = nmbclusters / NMBCLUSTERS) > 1) { |
| 187 | tcp_sendspace *= scale; |
| 188 | tcp_recvspace *= scale; |
| 189 | |
| 190 | if (tcp_sendspace > maxspace) { |
| 191 | tcp_sendspace = maxspace; |
| 192 | } |
| 193 | if (tcp_recvspace > maxspace) { |
| 194 | tcp_recvspace = maxspace; |
| 195 | } |
| 196 | } |
| 197 | #endif /* INET */ |
| 198 | } |
| 199 | #endif /* SOCKETS */ |
| 200 | |
| 201 | if (vnodes_sized == 0) { |
| 202 | if (!PE_get_default(property_name: "kern.maxvnodes" , property_ptr: &desiredvnodes, max_property: sizeof(desiredvnodes))) { |
| 203 | /* |
| 204 | * Size vnodes based on memory |
| 205 | * Number vnodes is (memsize/64k) + 1024 |
| 206 | * This is the calculation that is used by launchd in tiger |
| 207 | * we are clipping the max based on 16G |
| 208 | * ie ((16*1024*1024*1024)/(64 *1024)) + 1024 = 263168; |
| 209 | * CONFIG_VNODES is set to 263168 for "medium" configurations (the default) |
| 210 | * but can be smaller or larger. |
| 211 | */ |
| 212 | desiredvnodes = (int)(sane_size / 65536) + 1024; |
| 213 | #ifdef CONFIG_VNODES |
| 214 | if (desiredvnodes > CONFIG_VNODES) { |
| 215 | desiredvnodes = CONFIG_VNODES; |
| 216 | } |
| 217 | #endif |
| 218 | } |
| 219 | vnodes_sized = 1; |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | #if SOCKETS |
| 224 | SECURITY_READ_ONLY_LATE(struct mach_vm_range) mb_range = {}; |
| 225 | KMEM_RANGE_REGISTER_DYNAMIC(mb, &mb_range, ^() { |
| 226 | nmbclusters = bsd_mbuf_cluster_reserve(NULL) / MCLBYTES; |
| 227 | return (vm_map_size_t)(nmbclusters * MCLBYTES); |
| 228 | }); |
| 229 | #endif /* SOCKETS */ |
| 230 | |
| 231 | void |
| 232 | bsd_bufferinit(void) |
| 233 | { |
| 234 | /* |
| 235 | * Note: Console device initialized in kminit() from bsd_autoconf() |
| 236 | * prior to call to us in bsd_init(). |
| 237 | */ |
| 238 | |
| 239 | bsd_startupearly(); |
| 240 | |
| 241 | #if CONFIG_MBUF_MCACHE |
| 242 | mb_map = kmem_suballoc(kernel_map, |
| 243 | &mb_range.min_address, |
| 244 | (vm_size_t) (nmbclusters * MCLBYTES), |
| 245 | FALSE, |
| 246 | VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, |
| 247 | KMS_PERMANENT | KMS_NOFAIL, |
| 248 | VM_KERN_MEMORY_MBUF).kmr_submap; |
| 249 | mbutl = (unsigned char *)mb_range.min_address; |
| 250 | #endif /* CONFIG_MBUF_MCACHE */ |
| 251 | |
| 252 | /* |
| 253 | * Set up buffers, so they can be used to read disk labels. |
| 254 | */ |
| 255 | bufinit(); |
| 256 | } |
| 257 | |
| 258 | /* 512 MB (K32) or 2 GB (K64) hard limit on size of the mbuf pool */ |
| 259 | #if !defined(__LP64__) |
| 260 | #define MAX_MBUF_POOL (512 << MBSHIFT) |
| 261 | #else |
| 262 | #define MAX_MBUF_POOL (2ULL << GBSHIFT) |
| 263 | #endif /* !__LP64__ */ |
| 264 | #define MAX_NCL (MAX_MBUF_POOL >> MCLSHIFT) |
| 265 | |
| 266 | #if SOCKETS |
| 267 | /* |
| 268 | * this has been broken out into a separate routine that |
| 269 | * can be called from the x86 early vm initialization to |
| 270 | * determine how much lo memory to reserve on systems with |
| 271 | * DMA hardware that can't fully address all of the physical |
| 272 | * memory that is present. |
| 273 | */ |
| 274 | unsigned int |
| 275 | bsd_mbuf_cluster_reserve(boolean_t *overridden) |
| 276 | { |
| 277 | int mbuf_pool = 0, ncl = 0; |
| 278 | static boolean_t was_overridden = FALSE; |
| 279 | |
| 280 | /* If called more than once, return the previously calculated size */ |
| 281 | if (mbuf_poolsz != 0) { |
| 282 | goto done; |
| 283 | } |
| 284 | |
| 285 | /* |
| 286 | * Some of these are parsed in parse_bsd_args(), but for x86 we get |
| 287 | * here early from i386_vm_init() and so we parse them now, in order |
| 288 | * to correctly compute the size of the low-memory VM pool. It is |
| 289 | * redundant but rather harmless. |
| 290 | */ |
| 291 | (void) PE_parse_boot_argn(arg_string: "ncl" , arg_ptr: &ncl, max_arg: sizeof(ncl)); |
| 292 | (void) PE_parse_boot_argn(arg_string: "mbuf_pool" , arg_ptr: &mbuf_pool, max_arg: sizeof(mbuf_pool)); |
| 293 | |
| 294 | /* |
| 295 | * Convert "mbuf_pool" from MB to # of 2KB clusters; it is |
| 296 | * equivalent to "ncl", except that it uses different unit. |
| 297 | */ |
| 298 | if (mbuf_pool != 0) { |
| 299 | ncl = (mbuf_pool << MBSHIFT) >> MCLSHIFT; |
| 300 | } |
| 301 | |
| 302 | if (sane_size > (64 * 1024 * 1024) || ncl != 0) { |
| 303 | if (ncl || serverperfmode) { |
| 304 | was_overridden = TRUE; |
| 305 | } |
| 306 | |
| 307 | if ((nmbclusters = ncl) == 0) { |
| 308 | /* Auto-configure the mbuf pool size */ |
| 309 | nmbclusters = mbuf_default_ncl(mem_actual); |
| 310 | } else { |
| 311 | /* Make sure it's not odd in case ncl is manually set */ |
| 312 | if (nmbclusters & 0x1) { |
| 313 | --nmbclusters; |
| 314 | } |
| 315 | |
| 316 | /* And obey the upper limit */ |
| 317 | if (nmbclusters > MAX_NCL) { |
| 318 | nmbclusters = MAX_NCL; |
| 319 | } |
| 320 | } |
| 321 | |
| 322 | /* Round it down to nearest multiple of PAGE_SIZE */ |
| 323 | nmbclusters = (unsigned int)P2ROUNDDOWN(nmbclusters, NCLPG); |
| 324 | } |
| 325 | mbuf_poolsz = nmbclusters << MCLSHIFT; |
| 326 | done: |
| 327 | if (overridden) { |
| 328 | *overridden = was_overridden; |
| 329 | } |
| 330 | |
| 331 | return mbuf_poolsz; |
| 332 | } |
| 333 | #endif |
| 334 | |
| 335 | #if defined(__LP64__) |
| 336 | extern int tcp_tcbhashsize; |
| 337 | extern int max_cached_sock_count; |
| 338 | #endif |
| 339 | |
| 340 | #define SERVER_PERF_MODE_VALIDATION_DISABLES 0x5dee |
| 341 | extern unsigned int kern_feature_overrides; |
| 342 | void |
| 343 | bsd_scale_setup(int scale) |
| 344 | { |
| 345 | #if defined(__LP64__) |
| 346 | if ((scale > 0) && (serverperfmode == 0)) { |
| 347 | maxproc *= scale; |
| 348 | maxprocperuid = (maxproc * 2) / 3; |
| 349 | if (scale > 2) { |
| 350 | maxfiles *= scale; |
| 351 | maxfilesperproc = maxfiles / 2; |
| 352 | } |
| 353 | } |
| 354 | /* Apply server scaling rules */ |
| 355 | if ((scale > 0) && (serverperfmode != 0)) { |
| 356 | maxproc = 2500 * scale; |
| 357 | hard_maxproc = maxproc; |
| 358 | /* no fp usage */ |
| 359 | maxprocperuid = (maxproc * 3) / 4; |
| 360 | maxfiles = (150000 * scale); |
| 361 | maxfilesperproc = maxfiles / 2; |
| 362 | desiredvnodes = maxfiles; |
| 363 | vnodes_sized = 1; |
| 364 | tcp_tfo_backlog = 100 * scale; |
| 365 | if (scale > 4) { |
| 366 | /* clip somaxconn at 32G level */ |
| 367 | somaxconn = 2048; |
| 368 | /* |
| 369 | * For scale > 4 (> 32G), clip |
| 370 | * tcp_tcbhashsize to 32K |
| 371 | */ |
| 372 | tcp_tcbhashsize = 32 * 1024; |
| 373 | |
| 374 | if (scale > 7) { |
| 375 | /* clip at 64G level */ |
| 376 | max_cached_sock_count = 165000; |
| 377 | } else { |
| 378 | max_cached_sock_count = 60000 + ((scale - 1) * 15000); |
| 379 | } |
| 380 | } else { |
| 381 | somaxconn = 512 * scale; |
| 382 | tcp_tcbhashsize = 4 * 1024 * scale; |
| 383 | max_cached_sock_count = 60000 + ((scale - 1) * 15000); |
| 384 | } |
| 385 | } |
| 386 | |
| 387 | if (maxproc > hard_maxproc) { |
| 388 | hard_maxproc = maxproc; |
| 389 | } |
| 390 | #endif |
| 391 | if (serverperfmode) { |
| 392 | /* If running in serverperfmode disable some internal only diagnostics. */ |
| 393 | kern_feature_overrides |= SERVER_PERF_MODE_VALIDATION_DISABLES; |
| 394 | } |
| 395 | bsd_exec_setup(scale); |
| 396 | } |
| 397 | |