1 | /* |
2 | * Copyright (c) 2000-2014 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * Copyright (c) 1992,7 NeXT Computer, Inc. |
30 | * |
31 | * Unix data structure initialization. |
32 | * |
33 | */ |
34 | |
35 | #include <mach/mach_types.h> |
36 | |
37 | #include <vm/vm_kern.h> |
38 | #include <mach/vm_prot.h> |
39 | |
40 | #include <sys/param.h> |
41 | #include <sys/buf_internal.h> |
42 | #include <sys/file_internal.h> |
43 | #include <sys/proc_internal.h> |
44 | #include <sys/clist.h> |
45 | #include <sys/mcache.h> |
46 | #include <sys/mbuf.h> |
47 | #include <sys/systm.h> |
48 | #include <sys/tty.h> |
49 | #include <sys/vnode.h> |
50 | #include <sys/sysctl.h> |
51 | #include <machine/cons.h> |
52 | #include <pexpert/pexpert.h> |
53 | #include <sys/socketvar.h> |
54 | #include <pexpert/pexpert.h> |
55 | #include <netinet/tcp_var.h> |
56 | |
57 | extern uint32_t kern_maxvnodes; |
58 | extern vm_map_t mb_map; |
59 | |
60 | #if INET || INET6 |
61 | extern uint32_t tcp_sendspace; |
62 | extern uint32_t tcp_recvspace; |
63 | #endif |
64 | |
65 | void bsd_bufferinit(void); |
66 | |
67 | unsigned int bsd_mbuf_cluster_reserve(boolean_t *); |
68 | void bsd_scale_setup(int); |
69 | void bsd_exec_setup(int); |
70 | |
71 | /* |
72 | * Declare these as initialized data so we can patch them. |
73 | */ |
74 | |
75 | #ifdef NBUF |
76 | int max_nbuf_headers = NBUF; |
77 | int niobuf_headers = (NBUF / 2) + 2048; |
78 | int nbuf_hashelements = NBUF; |
79 | int nbuf_headers = NBUF; |
80 | #else |
81 | int = 0; |
82 | int = 0; |
83 | int nbuf_hashelements = 0; |
84 | int = 0; |
85 | #endif |
86 | |
87 | SYSCTL_INT (_kern, OID_AUTO, nbuf, CTLFLAG_RD | CTLFLAG_LOCKED, &nbuf_headers, 0, "" ); |
88 | SYSCTL_INT (_kern, OID_AUTO, maxnbuf, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_KERN, &max_nbuf_headers, 0, "" ); |
89 | |
90 | __private_extern__ int customnbuf = 0; |
91 | int serverperfmode = 0; /* Flag indicates a server boot when set */ |
92 | int ncl = 0; |
93 | |
94 | #if SOCKETS |
95 | static unsigned int mbuf_poolsz; |
96 | #endif |
97 | |
98 | vm_map_t buffer_map; |
99 | vm_map_t bufferhdr_map; |
100 | static int vnodes_sized = 0; |
101 | |
102 | extern void bsd_startupearly(void); |
103 | |
104 | void |
105 | bsd_startupearly(void) |
106 | { |
107 | vm_offset_t firstaddr; |
108 | vm_size_t size; |
109 | kern_return_t ret; |
110 | |
111 | /* clip the number of buf headers upto 16k */ |
112 | if (max_nbuf_headers == 0) |
113 | max_nbuf_headers = atop_kernel(sane_size / 50); /* Get 2% of ram, but no more than we can map */ |
114 | if ((customnbuf == 0) && (max_nbuf_headers > 16384)) |
115 | max_nbuf_headers = 16384; |
116 | if (max_nbuf_headers < CONFIG_MIN_NBUF) |
117 | max_nbuf_headers = CONFIG_MIN_NBUF; |
118 | |
119 | /* clip the number of hash elements to 200000 */ |
120 | if ( (customnbuf == 0 ) && nbuf_hashelements == 0) { |
121 | nbuf_hashelements = atop_kernel(sane_size / 50); |
122 | if (nbuf_hashelements > 200000) |
123 | nbuf_hashelements = 200000; |
124 | } else |
125 | nbuf_hashelements = max_nbuf_headers; |
126 | |
127 | if (niobuf_headers == 0) { |
128 | if (max_nbuf_headers < 4096) |
129 | niobuf_headers = max_nbuf_headers; |
130 | else |
131 | niobuf_headers = (max_nbuf_headers / 2) + 2048; |
132 | } |
133 | if (niobuf_headers < CONFIG_MIN_NIOBUF) |
134 | niobuf_headers = CONFIG_MIN_NIOBUF; |
135 | |
136 | size = (max_nbuf_headers + niobuf_headers) * sizeof(struct buf); |
137 | size = round_page(size); |
138 | |
139 | ret = kmem_suballoc(kernel_map, |
140 | &firstaddr, |
141 | size, |
142 | FALSE, |
143 | VM_FLAGS_ANYWHERE, |
144 | VM_MAP_KERNEL_FLAGS_NONE, |
145 | VM_KERN_MEMORY_FILE, |
146 | &bufferhdr_map); |
147 | |
148 | if (ret != KERN_SUCCESS) |
149 | panic("Failed to create bufferhdr_map" ); |
150 | |
151 | ret = kernel_memory_allocate(bufferhdr_map, |
152 | &firstaddr, |
153 | size, |
154 | 0, |
155 | KMA_HERE | KMA_KOBJECT, |
156 | VM_KERN_MEMORY_FILE); |
157 | |
158 | if (ret != KERN_SUCCESS) |
159 | panic("Failed to allocate bufferhdr_map" ); |
160 | |
161 | buf_headers = (struct buf *) firstaddr; |
162 | bzero(buf_headers, size); |
163 | |
164 | #if SOCKETS |
165 | { |
166 | static const unsigned int maxspace = 128 * 1024; |
167 | int scale; |
168 | |
169 | nmbclusters = bsd_mbuf_cluster_reserve(NULL) / MCLBYTES; |
170 | |
171 | #if INET || INET6 |
172 | if ((scale = nmbclusters / NMBCLUSTERS) > 1) { |
173 | tcp_sendspace *= scale; |
174 | tcp_recvspace *= scale; |
175 | |
176 | if (tcp_sendspace > maxspace) |
177 | tcp_sendspace = maxspace; |
178 | if (tcp_recvspace > maxspace) |
179 | tcp_recvspace = maxspace; |
180 | } |
181 | #endif /* INET || INET6 */ |
182 | } |
183 | #endif /* SOCKETS */ |
184 | |
185 | if (vnodes_sized == 0) { |
186 | if (!PE_get_default("kern.maxvnodes" , &desiredvnodes, sizeof(desiredvnodes))) { |
187 | /* |
188 | * Size vnodes based on memory |
189 | * Number vnodes is (memsize/64k) + 1024 |
190 | * This is the calculation that is used by launchd in tiger |
191 | * we are clipping the max based on 16G |
192 | * ie ((16*1024*1024*1024)/(64 *1024)) + 1024 = 263168; |
193 | * CONFIG_VNODES is set to 263168 for "medium" configurations (the default) |
194 | * but can be smaller or larger. |
195 | */ |
196 | desiredvnodes = (sane_size/65536) + 1024; |
197 | #ifdef CONFIG_VNODES |
198 | if (desiredvnodes > CONFIG_VNODES) |
199 | desiredvnodes = CONFIG_VNODES; |
200 | #endif |
201 | } |
202 | vnodes_sized = 1; |
203 | } |
204 | } |
205 | |
206 | void |
207 | bsd_bufferinit(void) |
208 | { |
209 | #if SOCKETS |
210 | kern_return_t ret; |
211 | #endif |
212 | /* |
213 | * Note: Console device initialized in kminit() from bsd_autoconf() |
214 | * prior to call to us in bsd_init(). |
215 | */ |
216 | |
217 | bsd_startupearly(); |
218 | |
219 | #if SOCKETS |
220 | ret = kmem_suballoc(kernel_map, |
221 | (vm_offset_t *) &mbutl, |
222 | (vm_size_t) (nmbclusters * MCLBYTES), |
223 | FALSE, |
224 | VM_FLAGS_ANYWHERE, |
225 | VM_MAP_KERNEL_FLAGS_NONE, |
226 | VM_KERN_MEMORY_MBUF, |
227 | &mb_map); |
228 | |
229 | if (ret != KERN_SUCCESS) |
230 | panic("Failed to allocate mb_map\n" ); |
231 | #endif /* SOCKETS */ |
232 | |
233 | /* |
234 | * Set up buffers, so they can be used to read disk labels. |
235 | */ |
236 | bufinit(); |
237 | } |
238 | |
239 | /* 512 MB (K32) or 2 GB (K64) hard limit on size of the mbuf pool */ |
240 | #if !defined(__LP64__) |
241 | #define MAX_MBUF_POOL (512 << MBSHIFT) |
242 | #else |
243 | #define MAX_MBUF_POOL (2ULL << GBSHIFT) |
244 | #endif /* !__LP64__ */ |
245 | #define MAX_NCL (MAX_MBUF_POOL >> MCLSHIFT) |
246 | |
247 | #if SOCKETS |
248 | /* |
249 | * this has been broken out into a separate routine that |
250 | * can be called from the x86 early vm initialization to |
251 | * determine how much lo memory to reserve on systems with |
252 | * DMA hardware that can't fully address all of the physical |
253 | * memory that is present. |
254 | */ |
255 | unsigned int |
256 | bsd_mbuf_cluster_reserve(boolean_t *overridden) |
257 | { |
258 | int mbuf_pool = 0; |
259 | static boolean_t was_overridden = FALSE; |
260 | |
261 | /* If called more than once, return the previously calculated size */ |
262 | if (mbuf_poolsz != 0) |
263 | goto done; |
264 | |
265 | /* |
266 | * Some of these are parsed in parse_bsd_args(), but for x86 we get |
267 | * here early from i386_vm_init() and so we parse them now, in order |
268 | * to correctly compute the size of the low-memory VM pool. It is |
269 | * redundant but rather harmless. |
270 | */ |
271 | (void) PE_parse_boot_argn("ncl" , &ncl, sizeof (ncl)); |
272 | (void) PE_parse_boot_argn("mbuf_pool" , &mbuf_pool, sizeof (mbuf_pool)); |
273 | |
274 | /* |
275 | * Convert "mbuf_pool" from MB to # of 2KB clusters; it is |
276 | * equivalent to "ncl", except that it uses different unit. |
277 | */ |
278 | if (mbuf_pool != 0) |
279 | ncl = (mbuf_pool << MBSHIFT) >> MCLSHIFT; |
280 | |
281 | if (sane_size > (64 * 1024 * 1024) || ncl != 0) { |
282 | |
283 | if (ncl || serverperfmode) |
284 | was_overridden = TRUE; |
285 | |
286 | if ((nmbclusters = ncl) == 0) { |
287 | /* Auto-configure the mbuf pool size */ |
288 | nmbclusters = mbuf_default_ncl(serverperfmode, sane_size); |
289 | } else { |
290 | /* Make sure it's not odd in case ncl is manually set */ |
291 | if (nmbclusters & 0x1) |
292 | --nmbclusters; |
293 | |
294 | /* And obey the upper limit */ |
295 | if (nmbclusters > MAX_NCL) |
296 | nmbclusters = MAX_NCL; |
297 | } |
298 | |
299 | /* Round it down to nearest multiple of PAGE_SIZE */ |
300 | nmbclusters = P2ROUNDDOWN(nmbclusters, NCLPG); |
301 | } |
302 | mbuf_poolsz = nmbclusters << MCLSHIFT; |
303 | done: |
304 | if (overridden) |
305 | *overridden = was_overridden; |
306 | |
307 | return (mbuf_poolsz); |
308 | } |
309 | #endif |
310 | |
311 | #if defined(__LP64__) |
312 | extern int tcp_tcbhashsize; |
313 | extern int max_cached_sock_count; |
314 | #endif |
315 | |
316 | |
317 | void |
318 | bsd_scale_setup(int scale) |
319 | { |
320 | #if defined(__LP64__) |
321 | if ((scale > 0) && (serverperfmode == 0)) { |
322 | maxproc *= scale; |
323 | maxprocperuid = (maxproc * 2) / 3; |
324 | if (scale > 2) { |
325 | maxfiles *= scale; |
326 | maxfilesperproc = maxfiles/2; |
327 | } |
328 | } |
329 | /* Apply server scaling rules */ |
330 | if ((scale > 0) && (serverperfmode !=0)) { |
331 | maxproc = 2500 * scale; |
332 | hard_maxproc = maxproc; |
333 | /* no fp usage */ |
334 | maxprocperuid = (maxproc*3)/4; |
335 | maxfiles = (150000 * scale); |
336 | maxfilesperproc = maxfiles/2; |
337 | desiredvnodes = maxfiles; |
338 | vnodes_sized = 1; |
339 | tcp_tfo_backlog = 100 * scale; |
340 | if (scale > 4) { |
341 | /* clip somaxconn at 32G level */ |
342 | somaxconn = 2048; |
343 | /* |
344 | * For scale > 4 (> 32G), clip |
345 | * tcp_tcbhashsize to 32K |
346 | */ |
347 | tcp_tcbhashsize = 32 *1024; |
348 | |
349 | if (scale > 7) { |
350 | /* clip at 64G level */ |
351 | max_cached_sock_count = 165000; |
352 | } else { |
353 | max_cached_sock_count = 60000 + ((scale-1) * 15000); |
354 | } |
355 | } else { |
356 | somaxconn = 512*scale; |
357 | tcp_tcbhashsize = 4*1024*scale; |
358 | max_cached_sock_count = 60000 + ((scale-1) * 15000); |
359 | } |
360 | } |
361 | |
362 | if(maxproc > hard_maxproc) { |
363 | hard_maxproc = maxproc; |
364 | } |
365 | #endif |
366 | bsd_exec_setup(scale); |
367 | } |
368 | |
369 | |