1 | /* |
2 | * Copyright (c) 2000-2014 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * Copyright (c) 1992,7 NeXT Computer, Inc. |
30 | * |
31 | * Unix data structure initialization. |
32 | * |
33 | */ |
34 | |
35 | #include <mach/mach_types.h> |
36 | |
37 | #include <kern/startup.h> |
38 | #include <vm/vm_kern.h> |
39 | #include <mach/vm_prot.h> |
40 | |
41 | #include <sys/param.h> |
42 | #include <sys/buf_internal.h> |
43 | #include <sys/file_internal.h> |
44 | #include <sys/proc_internal.h> |
45 | #include <sys/mcache.h> |
46 | #include <sys/mbuf.h> |
47 | #include <sys/systm.h> |
48 | #include <sys/tty.h> |
49 | #include <sys/vnode.h> |
50 | #include <sys/sysctl.h> |
51 | #include <machine/cons.h> |
52 | #include <pexpert/pexpert.h> |
53 | #include <sys/socketvar.h> |
54 | #include <pexpert/pexpert.h> |
55 | #include <netinet/tcp_var.h> |
56 | |
57 | extern uint32_t kern_maxvnodes; |
58 | #if CONFIG_MBUF_MCACHE |
59 | extern vm_map_t mb_map; |
60 | #endif /* CONFIG_MBUF_MCACHE */ |
61 | |
62 | #if INET |
63 | extern uint32_t tcp_sendspace; |
64 | extern uint32_t tcp_recvspace; |
65 | #endif |
66 | |
67 | void bsd_bufferinit(void); |
68 | |
69 | unsigned int bsd_mbuf_cluster_reserve(boolean_t *); |
70 | void bsd_scale_setup(int); |
71 | void bsd_exec_setup(int); |
72 | |
73 | /* |
74 | * Declare these as initialized data so we can patch them. |
75 | */ |
76 | |
77 | #ifdef NBUF |
78 | int max_nbuf_headers = NBUF; |
79 | int niobuf_headers = (NBUF / 2) + 2048; |
80 | int nbuf_hashelements = NBUF; |
81 | int nbuf_headers = NBUF; |
82 | #else |
83 | int = 0; |
84 | int = 0; |
85 | int nbuf_hashelements = 0; |
86 | int = 0; |
87 | #endif |
88 | |
89 | SYSCTL_INT(_kern, OID_AUTO, nbuf, CTLFLAG_RD | CTLFLAG_LOCKED, &nbuf_headers, 0, "" ); |
90 | SYSCTL_INT(_kern, OID_AUTO, maxnbuf, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_KERN, &max_nbuf_headers, 0, "" ); |
91 | |
92 | __private_extern__ int customnbuf = 0; |
93 | |
94 | /* Indicates a server boot when set */ |
95 | TUNABLE(int, serverperfmode, "serverperfmode" , 0); |
96 | |
97 | #if SOCKETS |
98 | static unsigned int mbuf_poolsz; |
99 | #endif |
100 | |
101 | vm_map_t buffer_map; |
102 | vm_map_t bufferhdr_map; |
103 | static int vnodes_sized = 0; |
104 | |
105 | extern void bsd_startupearly(void); |
106 | |
107 | static vm_map_size_t bufferhdr_map_size; |
108 | SECURITY_READ_ONLY_LATE(struct mach_vm_range) bufferhdr_range = {}; |
109 | |
110 | static vm_map_size_t |
111 | bsd_get_bufferhdr_map_size(void) |
112 | { |
113 | vm_size_t size; |
114 | |
115 | /* clip the number of buf headers upto 16k */ |
116 | if (max_nbuf_headers == 0) { |
117 | max_nbuf_headers = (int)atop_kernel(sane_size / 50); /* Get 2% of ram, but no more than we can map */ |
118 | } |
119 | if ((customnbuf == 0) && ((unsigned int)max_nbuf_headers > 16384)) { |
120 | max_nbuf_headers = 16384; |
121 | } |
122 | if (max_nbuf_headers < CONFIG_MIN_NBUF) { |
123 | max_nbuf_headers = CONFIG_MIN_NBUF; |
124 | } |
125 | |
126 | if (niobuf_headers == 0) { |
127 | if (max_nbuf_headers < 4096) { |
128 | niobuf_headers = max_nbuf_headers; |
129 | } else { |
130 | niobuf_headers = (max_nbuf_headers / 2) + 2048; |
131 | } |
132 | } |
133 | if (niobuf_headers < CONFIG_MIN_NIOBUF) { |
134 | niobuf_headers = CONFIG_MIN_NIOBUF; |
135 | } |
136 | |
137 | size = (max_nbuf_headers + niobuf_headers) * sizeof(struct buf); |
138 | size = round_page(x: size); |
139 | |
140 | return size; |
141 | } |
142 | |
143 | KMEM_RANGE_REGISTER_DYNAMIC(bufferhdr, &bufferhdr_range, ^() { |
144 | return bufferhdr_map_size = bsd_get_bufferhdr_map_size(); |
145 | }); |
146 | |
147 | void |
148 | bsd_startupearly(void) |
149 | { |
150 | vm_size_t size = bufferhdr_map_size; |
151 | |
152 | assert(size); |
153 | |
154 | /* clip the number of hash elements to 200000 */ |
155 | if ((customnbuf == 0) && nbuf_hashelements == 0) { |
156 | nbuf_hashelements = (int)atop_kernel(sane_size / 50); |
157 | if ((unsigned int)nbuf_hashelements > 200000) { |
158 | nbuf_hashelements = 200000; |
159 | } |
160 | } else { |
161 | nbuf_hashelements = max_nbuf_headers; |
162 | } |
163 | |
164 | bufferhdr_map = kmem_suballoc(parent: kernel_map, |
165 | addr: &bufferhdr_range.min_address, |
166 | size, |
167 | vmc_options: VM_MAP_CREATE_NEVER_FAULTS, |
168 | VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, |
169 | flags: KMS_PERMANENT | KMS_NOFAIL, |
170 | VM_KERN_MEMORY_FILE).kmr_submap; |
171 | |
172 | kmem_alloc(map: bufferhdr_map, |
173 | addrp: &(vm_offset_t){ bufferhdr_range.min_address }, |
174 | size, |
175 | flags: KMA_NOFAIL | KMA_PERMANENT | KMA_ZERO | KMA_KOBJECT, |
176 | VM_KERN_MEMORY_FILE); |
177 | |
178 | buf_headers = (struct buf *)bufferhdr_range.min_address; |
179 | |
180 | #if SOCKETS |
181 | { |
182 | static const unsigned int maxspace = 128 * 1024; |
183 | int scale; |
184 | |
185 | #if INET |
186 | if ((scale = nmbclusters / NMBCLUSTERS) > 1) { |
187 | tcp_sendspace *= scale; |
188 | tcp_recvspace *= scale; |
189 | |
190 | if (tcp_sendspace > maxspace) { |
191 | tcp_sendspace = maxspace; |
192 | } |
193 | if (tcp_recvspace > maxspace) { |
194 | tcp_recvspace = maxspace; |
195 | } |
196 | } |
197 | #endif /* INET */ |
198 | } |
199 | #endif /* SOCKETS */ |
200 | |
201 | if (vnodes_sized == 0) { |
202 | if (!PE_get_default(property_name: "kern.maxvnodes" , property_ptr: &desiredvnodes, max_property: sizeof(desiredvnodes))) { |
203 | /* |
204 | * Size vnodes based on memory |
205 | * Number vnodes is (memsize/64k) + 1024 |
206 | * This is the calculation that is used by launchd in tiger |
207 | * we are clipping the max based on 16G |
208 | * ie ((16*1024*1024*1024)/(64 *1024)) + 1024 = 263168; |
209 | * CONFIG_VNODES is set to 263168 for "medium" configurations (the default) |
210 | * but can be smaller or larger. |
211 | */ |
212 | desiredvnodes = (int)(sane_size / 65536) + 1024; |
213 | #ifdef CONFIG_VNODES |
214 | if (desiredvnodes > CONFIG_VNODES) { |
215 | desiredvnodes = CONFIG_VNODES; |
216 | } |
217 | #endif |
218 | } |
219 | vnodes_sized = 1; |
220 | } |
221 | } |
222 | |
223 | #if SOCKETS |
224 | SECURITY_READ_ONLY_LATE(struct mach_vm_range) mb_range = {}; |
225 | KMEM_RANGE_REGISTER_DYNAMIC(mb, &mb_range, ^() { |
226 | nmbclusters = bsd_mbuf_cluster_reserve(NULL) / MCLBYTES; |
227 | return (vm_map_size_t)(nmbclusters * MCLBYTES); |
228 | }); |
229 | #endif /* SOCKETS */ |
230 | |
231 | void |
232 | bsd_bufferinit(void) |
233 | { |
234 | /* |
235 | * Note: Console device initialized in kminit() from bsd_autoconf() |
236 | * prior to call to us in bsd_init(). |
237 | */ |
238 | |
239 | bsd_startupearly(); |
240 | |
241 | #if CONFIG_MBUF_MCACHE |
242 | mb_map = kmem_suballoc(kernel_map, |
243 | &mb_range.min_address, |
244 | (vm_size_t) (nmbclusters * MCLBYTES), |
245 | FALSE, |
246 | VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, |
247 | KMS_PERMANENT | KMS_NOFAIL, |
248 | VM_KERN_MEMORY_MBUF).kmr_submap; |
249 | mbutl = (unsigned char *)mb_range.min_address; |
250 | #endif /* CONFIG_MBUF_MCACHE */ |
251 | |
252 | /* |
253 | * Set up buffers, so they can be used to read disk labels. |
254 | */ |
255 | bufinit(); |
256 | } |
257 | |
258 | /* 512 MB (K32) or 2 GB (K64) hard limit on size of the mbuf pool */ |
259 | #if !defined(__LP64__) |
260 | #define MAX_MBUF_POOL (512 << MBSHIFT) |
261 | #else |
262 | #define MAX_MBUF_POOL (2ULL << GBSHIFT) |
263 | #endif /* !__LP64__ */ |
264 | #define MAX_NCL (MAX_MBUF_POOL >> MCLSHIFT) |
265 | |
266 | #if SOCKETS |
267 | /* |
268 | * this has been broken out into a separate routine that |
269 | * can be called from the x86 early vm initialization to |
270 | * determine how much lo memory to reserve on systems with |
271 | * DMA hardware that can't fully address all of the physical |
272 | * memory that is present. |
273 | */ |
274 | unsigned int |
275 | bsd_mbuf_cluster_reserve(boolean_t *overridden) |
276 | { |
277 | int mbuf_pool = 0, ncl = 0; |
278 | static boolean_t was_overridden = FALSE; |
279 | |
280 | /* If called more than once, return the previously calculated size */ |
281 | if (mbuf_poolsz != 0) { |
282 | goto done; |
283 | } |
284 | |
285 | /* |
286 | * Some of these are parsed in parse_bsd_args(), but for x86 we get |
287 | * here early from i386_vm_init() and so we parse them now, in order |
288 | * to correctly compute the size of the low-memory VM pool. It is |
289 | * redundant but rather harmless. |
290 | */ |
291 | (void) PE_parse_boot_argn(arg_string: "ncl" , arg_ptr: &ncl, max_arg: sizeof(ncl)); |
292 | (void) PE_parse_boot_argn(arg_string: "mbuf_pool" , arg_ptr: &mbuf_pool, max_arg: sizeof(mbuf_pool)); |
293 | |
294 | /* |
295 | * Convert "mbuf_pool" from MB to # of 2KB clusters; it is |
296 | * equivalent to "ncl", except that it uses different unit. |
297 | */ |
298 | if (mbuf_pool != 0) { |
299 | ncl = (mbuf_pool << MBSHIFT) >> MCLSHIFT; |
300 | } |
301 | |
302 | if (sane_size > (64 * 1024 * 1024) || ncl != 0) { |
303 | if (ncl || serverperfmode) { |
304 | was_overridden = TRUE; |
305 | } |
306 | |
307 | if ((nmbclusters = ncl) == 0) { |
308 | /* Auto-configure the mbuf pool size */ |
309 | nmbclusters = mbuf_default_ncl(mem_actual); |
310 | } else { |
311 | /* Make sure it's not odd in case ncl is manually set */ |
312 | if (nmbclusters & 0x1) { |
313 | --nmbclusters; |
314 | } |
315 | |
316 | /* And obey the upper limit */ |
317 | if (nmbclusters > MAX_NCL) { |
318 | nmbclusters = MAX_NCL; |
319 | } |
320 | } |
321 | |
322 | /* Round it down to nearest multiple of PAGE_SIZE */ |
323 | nmbclusters = (unsigned int)P2ROUNDDOWN(nmbclusters, NCLPG); |
324 | } |
325 | mbuf_poolsz = nmbclusters << MCLSHIFT; |
326 | done: |
327 | if (overridden) { |
328 | *overridden = was_overridden; |
329 | } |
330 | |
331 | return mbuf_poolsz; |
332 | } |
333 | #endif |
334 | |
335 | #if defined(__LP64__) |
336 | extern int tcp_tcbhashsize; |
337 | extern int max_cached_sock_count; |
338 | #endif |
339 | |
340 | #define SERVER_PERF_MODE_VALIDATION_DISABLES 0x5dee |
341 | extern unsigned int kern_feature_overrides; |
342 | void |
343 | bsd_scale_setup(int scale) |
344 | { |
345 | #if defined(__LP64__) |
346 | if ((scale > 0) && (serverperfmode == 0)) { |
347 | maxproc *= scale; |
348 | maxprocperuid = (maxproc * 2) / 3; |
349 | if (scale > 2) { |
350 | maxfiles *= scale; |
351 | maxfilesperproc = maxfiles / 2; |
352 | } |
353 | } |
354 | /* Apply server scaling rules */ |
355 | if ((scale > 0) && (serverperfmode != 0)) { |
356 | maxproc = 2500 * scale; |
357 | hard_maxproc = maxproc; |
358 | /* no fp usage */ |
359 | maxprocperuid = (maxproc * 3) / 4; |
360 | maxfiles = (150000 * scale); |
361 | maxfilesperproc = maxfiles / 2; |
362 | desiredvnodes = maxfiles; |
363 | vnodes_sized = 1; |
364 | tcp_tfo_backlog = 100 * scale; |
365 | if (scale > 4) { |
366 | /* clip somaxconn at 32G level */ |
367 | somaxconn = 2048; |
368 | /* |
369 | * For scale > 4 (> 32G), clip |
370 | * tcp_tcbhashsize to 32K |
371 | */ |
372 | tcp_tcbhashsize = 32 * 1024; |
373 | |
374 | if (scale > 7) { |
375 | /* clip at 64G level */ |
376 | max_cached_sock_count = 165000; |
377 | } else { |
378 | max_cached_sock_count = 60000 + ((scale - 1) * 15000); |
379 | } |
380 | } else { |
381 | somaxconn = 512 * scale; |
382 | tcp_tcbhashsize = 4 * 1024 * scale; |
383 | max_cached_sock_count = 60000 + ((scale - 1) * 15000); |
384 | } |
385 | } |
386 | |
387 | if (maxproc > hard_maxproc) { |
388 | hard_maxproc = maxproc; |
389 | } |
390 | #endif |
391 | if (serverperfmode) { |
392 | /* If running in serverperfmode disable some internal only diagnostics. */ |
393 | kern_feature_overrides |= SERVER_PERF_MODE_VALIDATION_DISABLES; |
394 | } |
395 | bsd_exec_setup(scale); |
396 | } |
397 | |