1 | /* |
2 | * Copyright (c) 2003-2007 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * Kernel stack management routines. |
30 | */ |
31 | |
32 | #include <mach/mach_host.h> |
33 | #include <mach/mach_types.h> |
34 | #include <mach/processor_set.h> |
35 | |
36 | #include <kern/kern_types.h> |
37 | #include <kern/mach_param.h> |
38 | #include <kern/processor.h> |
39 | #include <kern/thread.h> |
40 | #include <kern/zalloc.h> |
41 | #include <kern/kalloc.h> |
42 | #include <kern/ledger.h> |
43 | |
44 | #include <vm/vm_map.h> |
45 | #include <vm/vm_kern.h> |
46 | |
47 | #include <mach_debug.h> |
48 | #include <san/kasan.h> |
49 | |
50 | /* |
51 | * We allocate stacks from generic kernel VM. |
52 | * |
53 | * The stack_free_list can only be accessed at splsched, |
54 | * because stack_alloc_try/thread_invoke operate at splsched. |
55 | */ |
56 | |
57 | decl_simple_lock_data(static,stack_lock_data) |
58 | #define stack_lock() simple_lock(&stack_lock_data) |
59 | #define stack_unlock() simple_unlock(&stack_lock_data) |
60 | |
61 | #define STACK_CACHE_SIZE 2 |
62 | |
63 | static vm_offset_t stack_free_list; |
64 | |
65 | static unsigned int stack_free_count, stack_free_hiwat; /* free list count */ |
66 | static unsigned int stack_hiwat; |
67 | unsigned int stack_total; /* current total count */ |
68 | unsigned long long stack_allocs; /* total count of allocations */ |
69 | |
70 | static int stack_fake_zone_index = -1; /* index in zone_info array */ |
71 | |
72 | static unsigned int stack_free_target; |
73 | static int stack_free_delta; |
74 | |
75 | static unsigned int stack_new_count; /* total new stack allocations */ |
76 | |
77 | static vm_offset_t stack_addr_mask; |
78 | |
79 | unsigned int kernel_stack_pages; |
80 | vm_offset_t kernel_stack_size; |
81 | vm_offset_t kernel_stack_mask; |
82 | vm_offset_t kernel_stack_depth_max; |
83 | |
84 | /* |
85 | * The next field is at the base of the stack, |
86 | * so the low end is left unsullied. |
87 | */ |
88 | #define stack_next(stack) \ |
89 | (*((vm_offset_t *)((stack) + kernel_stack_size) - 1)) |
90 | |
91 | static inline int |
92 | log2(vm_offset_t size) |
93 | { |
94 | int result; |
95 | for (result = 0; size > 0; result++) |
96 | size >>= 1; |
97 | return result; |
98 | } |
99 | |
100 | static inline vm_offset_t |
101 | roundup_pow2(vm_offset_t size) |
102 | { |
103 | return 1UL << (log2(size - 1) + 1); |
104 | } |
105 | |
106 | static vm_offset_t stack_alloc_internal(void); |
107 | static void stack_free_stack(vm_offset_t); |
108 | |
109 | void |
110 | stack_init(void) |
111 | { |
112 | simple_lock_init(&stack_lock_data, 0); |
113 | |
114 | kernel_stack_pages = KERNEL_STACK_SIZE / PAGE_SIZE; |
115 | kernel_stack_size = KERNEL_STACK_SIZE; |
116 | kernel_stack_mask = -KERNEL_STACK_SIZE; |
117 | kernel_stack_depth_max = 0; |
118 | |
119 | if (PE_parse_boot_argn("kernel_stack_pages" , |
120 | &kernel_stack_pages, |
121 | sizeof (kernel_stack_pages))) { |
122 | kernel_stack_size = kernel_stack_pages * PAGE_SIZE; |
123 | printf("stack_init: kernel_stack_pages=%d kernel_stack_size=%p\n" , |
124 | kernel_stack_pages, (void *) kernel_stack_size); |
125 | } |
126 | |
127 | if (kernel_stack_size < round_page(kernel_stack_size)) |
128 | panic("stack_init: stack size %p not a multiple of page size %d\n" , |
129 | (void *) kernel_stack_size, PAGE_SIZE); |
130 | |
131 | stack_addr_mask = roundup_pow2(kernel_stack_size) - 1; |
132 | kernel_stack_mask = ~stack_addr_mask; |
133 | } |
134 | |
135 | /* |
136 | * stack_alloc: |
137 | * |
138 | * Allocate a stack for a thread, may |
139 | * block. |
140 | */ |
141 | |
142 | static vm_offset_t |
143 | stack_alloc_internal(void) |
144 | { |
145 | vm_offset_t stack = 0; |
146 | spl_t s; |
147 | int flags = 0; |
148 | kern_return_t kr = KERN_SUCCESS; |
149 | |
150 | s = splsched(); |
151 | stack_lock(); |
152 | stack_allocs++; |
153 | stack = stack_free_list; |
154 | if (stack != 0) { |
155 | stack_free_list = stack_next(stack); |
156 | stack_free_count--; |
157 | } |
158 | else { |
159 | if (++stack_total > stack_hiwat) |
160 | stack_hiwat = stack_total; |
161 | stack_new_count++; |
162 | } |
163 | stack_free_delta--; |
164 | stack_unlock(); |
165 | splx(s); |
166 | |
167 | if (stack == 0) { |
168 | |
169 | /* |
170 | * Request guard pages on either side of the stack. Ask |
171 | * kernel_memory_allocate() for two extra pages to account |
172 | * for these. |
173 | */ |
174 | |
175 | flags = KMA_GUARD_FIRST | KMA_GUARD_LAST | KMA_KSTACK | KMA_KOBJECT | KMA_ZERO; |
176 | kr = kernel_memory_allocate(kernel_map, &stack, |
177 | kernel_stack_size + (2*PAGE_SIZE), |
178 | stack_addr_mask, |
179 | flags, |
180 | VM_KERN_MEMORY_STACK); |
181 | if (kr != KERN_SUCCESS) { |
182 | panic("stack_alloc: kernel_memory_allocate(size:0x%llx, mask: 0x%llx, flags: 0x%x) failed with %d\n" , (uint64_t)(kernel_stack_size + (2*PAGE_SIZE)), (uint64_t)stack_addr_mask, flags, kr); |
183 | } |
184 | |
185 | /* |
186 | * The stack address that comes back is the address of the lower |
187 | * guard page. Skip past it to get the actual stack base address. |
188 | */ |
189 | |
190 | stack += PAGE_SIZE; |
191 | } |
192 | return stack; |
193 | } |
194 | |
195 | void |
196 | stack_alloc( |
197 | thread_t thread) |
198 | { |
199 | |
200 | assert(thread->kernel_stack == 0); |
201 | machine_stack_attach(thread, stack_alloc_internal()); |
202 | } |
203 | |
204 | void |
205 | stack_handoff(thread_t from, thread_t to) |
206 | { |
207 | assert(from == current_thread()); |
208 | machine_stack_handoff(from, to); |
209 | } |
210 | |
211 | /* |
212 | * stack_free: |
213 | * |
214 | * Detach and free the stack for a thread. |
215 | */ |
216 | void |
217 | stack_free( |
218 | thread_t thread) |
219 | { |
220 | vm_offset_t stack = machine_stack_detach(thread); |
221 | |
222 | assert(stack); |
223 | if (stack != thread->reserved_stack) { |
224 | stack_free_stack(stack); |
225 | } |
226 | } |
227 | |
228 | void |
229 | stack_free_reserved( |
230 | thread_t thread) |
231 | { |
232 | if (thread->reserved_stack != thread->kernel_stack) { |
233 | stack_free_stack(thread->reserved_stack); |
234 | } |
235 | } |
236 | |
237 | static void |
238 | stack_free_stack( |
239 | vm_offset_t stack) |
240 | { |
241 | struct stack_cache *cache; |
242 | spl_t s; |
243 | |
244 | #if KASAN_DEBUG |
245 | /* Sanity check - stack should be unpoisoned by now */ |
246 | assert(kasan_check_shadow(stack, kernel_stack_size, 0)); |
247 | #endif |
248 | |
249 | s = splsched(); |
250 | cache = &PROCESSOR_DATA(current_processor(), stack_cache); |
251 | if (cache->count < STACK_CACHE_SIZE) { |
252 | stack_next(stack) = cache->free; |
253 | cache->free = stack; |
254 | cache->count++; |
255 | } |
256 | else { |
257 | stack_lock(); |
258 | stack_next(stack) = stack_free_list; |
259 | stack_free_list = stack; |
260 | if (++stack_free_count > stack_free_hiwat) |
261 | stack_free_hiwat = stack_free_count; |
262 | stack_free_delta++; |
263 | stack_unlock(); |
264 | } |
265 | splx(s); |
266 | } |
267 | |
268 | /* |
269 | * stack_alloc_try: |
270 | * |
271 | * Non-blocking attempt to allocate a |
272 | * stack for a thread. |
273 | * |
274 | * Returns TRUE on success. |
275 | * |
276 | * Called at splsched. |
277 | */ |
278 | boolean_t |
279 | stack_alloc_try( |
280 | thread_t thread) |
281 | { |
282 | struct stack_cache *cache; |
283 | vm_offset_t stack; |
284 | |
285 | cache = &PROCESSOR_DATA(current_processor(), stack_cache); |
286 | stack = cache->free; |
287 | if (stack != 0) { |
288 | cache->free = stack_next(stack); |
289 | cache->count--; |
290 | } |
291 | else { |
292 | if (stack_free_list != 0) { |
293 | stack_lock(); |
294 | stack = stack_free_list; |
295 | if (stack != 0) { |
296 | stack_free_list = stack_next(stack); |
297 | stack_free_count--; |
298 | stack_free_delta--; |
299 | } |
300 | stack_unlock(); |
301 | } |
302 | } |
303 | |
304 | if (stack != 0 || (stack = thread->reserved_stack) != 0) { |
305 | machine_stack_attach(thread, stack); |
306 | return (TRUE); |
307 | } |
308 | |
309 | return (FALSE); |
310 | } |
311 | |
312 | static unsigned int stack_collect_tick, last_stack_tick; |
313 | |
314 | /* |
315 | * stack_collect: |
316 | * |
317 | * Free excess kernel stacks, may |
318 | * block. |
319 | */ |
320 | void |
321 | stack_collect(void) |
322 | { |
323 | if (stack_collect_tick != last_stack_tick) { |
324 | unsigned int target; |
325 | vm_offset_t stack; |
326 | spl_t s; |
327 | |
328 | s = splsched(); |
329 | stack_lock(); |
330 | |
331 | target = stack_free_target + (STACK_CACHE_SIZE * processor_count); |
332 | target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; |
333 | |
334 | while (stack_free_count > target) { |
335 | stack = stack_free_list; |
336 | stack_free_list = stack_next(stack); |
337 | stack_free_count--; stack_total--; |
338 | stack_unlock(); |
339 | splx(s); |
340 | |
341 | /* |
342 | * Get the stack base address, then decrement by one page |
343 | * to account for the lower guard page. Add two extra pages |
344 | * to the size to account for the guard pages on both ends |
345 | * that were originally requested when the stack was allocated |
346 | * back in stack_alloc(). |
347 | */ |
348 | |
349 | stack = (vm_offset_t)vm_map_trunc_page( |
350 | stack, |
351 | VM_MAP_PAGE_MASK(kernel_map)); |
352 | stack -= PAGE_SIZE; |
353 | if (vm_map_remove( |
354 | kernel_map, |
355 | stack, |
356 | stack + kernel_stack_size+(2*PAGE_SIZE), |
357 | VM_MAP_REMOVE_KUNWIRE) |
358 | != KERN_SUCCESS) |
359 | panic("stack_collect: vm_map_remove" ); |
360 | stack = 0; |
361 | |
362 | s = splsched(); |
363 | stack_lock(); |
364 | |
365 | target = stack_free_target + (STACK_CACHE_SIZE * processor_count); |
366 | target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; |
367 | } |
368 | |
369 | last_stack_tick = stack_collect_tick; |
370 | |
371 | stack_unlock(); |
372 | splx(s); |
373 | } |
374 | } |
375 | |
376 | /* |
377 | * compute_stack_target: |
378 | * |
379 | * Computes a new target free list count |
380 | * based on recent alloc / free activity. |
381 | * |
382 | * Limits stack collection to once per |
383 | * computation period. |
384 | */ |
385 | void |
386 | compute_stack_target( |
387 | __unused void *arg) |
388 | { |
389 | spl_t s; |
390 | |
391 | s = splsched(); |
392 | stack_lock(); |
393 | |
394 | if (stack_free_target > 5) |
395 | stack_free_target = (4 * stack_free_target) / 5; |
396 | else |
397 | if (stack_free_target > 0) |
398 | stack_free_target--; |
399 | |
400 | stack_free_target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; |
401 | |
402 | stack_free_delta = 0; |
403 | stack_collect_tick++; |
404 | |
405 | stack_unlock(); |
406 | splx(s); |
407 | } |
408 | |
409 | void |
410 | stack_fake_zone_init(int zone_index) |
411 | { |
412 | stack_fake_zone_index = zone_index; |
413 | } |
414 | |
415 | void |
416 | stack_fake_zone_info(int *count, |
417 | vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size, |
418 | uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct) |
419 | { |
420 | unsigned int total, hiwat, free; |
421 | unsigned long long all; |
422 | spl_t s; |
423 | |
424 | s = splsched(); |
425 | stack_lock(); |
426 | all = stack_allocs; |
427 | total = stack_total; |
428 | hiwat = stack_hiwat; |
429 | free = stack_free_count; |
430 | stack_unlock(); |
431 | splx(s); |
432 | |
433 | *count = total - free; |
434 | *cur_size = kernel_stack_size * total; |
435 | *max_size = kernel_stack_size * hiwat; |
436 | *elem_size = kernel_stack_size; |
437 | *alloc_size = kernel_stack_size; |
438 | *sum_size = all * kernel_stack_size; |
439 | |
440 | *collectable = 1; |
441 | *exhaustable = 0; |
442 | *caller_acct = 1; |
443 | } |
444 | |
445 | /* OBSOLETE */ |
446 | void stack_privilege( |
447 | thread_t thread); |
448 | |
449 | void |
450 | stack_privilege( |
451 | __unused thread_t thread) |
452 | { |
453 | /* OBSOLETE */ |
454 | } |
455 | |
456 | /* |
457 | * Return info on stack usage for threads in a specific processor set |
458 | */ |
459 | kern_return_t |
460 | processor_set_stack_usage( |
461 | processor_set_t pset, |
462 | unsigned int *totalp, |
463 | vm_size_t *spacep, |
464 | vm_size_t *residentp, |
465 | vm_size_t *maxusagep, |
466 | vm_offset_t *maxstackp) |
467 | { |
468 | #if !MACH_DEBUG |
469 | return KERN_NOT_SUPPORTED; |
470 | #else |
471 | unsigned int total; |
472 | vm_size_t maxusage; |
473 | vm_offset_t maxstack; |
474 | |
475 | thread_t *thread_list; |
476 | thread_t thread; |
477 | |
478 | unsigned int actual; /* this many things */ |
479 | unsigned int i; |
480 | |
481 | vm_size_t size, size_needed; |
482 | void *addr; |
483 | |
484 | if (pset == PROCESSOR_SET_NULL || pset != &pset0) |
485 | return KERN_INVALID_ARGUMENT; |
486 | |
487 | size = 0; |
488 | addr = NULL; |
489 | |
490 | for (;;) { |
491 | lck_mtx_lock(&tasks_threads_lock); |
492 | |
493 | actual = threads_count; |
494 | |
495 | /* do we have the memory we need? */ |
496 | |
497 | size_needed = actual * sizeof(thread_t); |
498 | if (size_needed <= size) |
499 | break; |
500 | |
501 | lck_mtx_unlock(&tasks_threads_lock); |
502 | |
503 | if (size != 0) |
504 | kfree(addr, size); |
505 | |
506 | assert(size_needed > 0); |
507 | size = size_needed; |
508 | |
509 | addr = kalloc(size); |
510 | if (addr == 0) |
511 | return KERN_RESOURCE_SHORTAGE; |
512 | } |
513 | |
514 | /* OK, have memory and list is locked */ |
515 | thread_list = (thread_t *) addr; |
516 | for (i = 0, thread = (thread_t)(void *) queue_first(&threads); |
517 | !queue_end(&threads, (queue_entry_t) thread); |
518 | thread = (thread_t)(void *) queue_next(&thread->threads)) { |
519 | thread_reference_internal(thread); |
520 | thread_list[i++] = thread; |
521 | } |
522 | assert(i <= actual); |
523 | |
524 | lck_mtx_unlock(&tasks_threads_lock); |
525 | |
526 | /* calculate maxusage and free thread references */ |
527 | |
528 | total = 0; |
529 | maxusage = 0; |
530 | maxstack = 0; |
531 | while (i > 0) { |
532 | thread_t threadref = thread_list[--i]; |
533 | |
534 | if (threadref->kernel_stack != 0) |
535 | total++; |
536 | |
537 | thread_deallocate(threadref); |
538 | } |
539 | |
540 | if (size != 0) |
541 | kfree(addr, size); |
542 | |
543 | *totalp = total; |
544 | *residentp = *spacep = total * round_page(kernel_stack_size); |
545 | *maxusagep = maxusage; |
546 | *maxstackp = maxstack; |
547 | return KERN_SUCCESS; |
548 | |
549 | #endif /* MACH_DEBUG */ |
550 | } |
551 | |
552 | vm_offset_t min_valid_stack_address(void) |
553 | { |
554 | return (vm_offset_t)vm_map_min(kernel_map); |
555 | } |
556 | |
557 | vm_offset_t max_valid_stack_address(void) |
558 | { |
559 | return (vm_offset_t)vm_map_max(kernel_map); |
560 | } |
561 | |