1 | /* |
2 | * Copyright (c) 2003-2019 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * Kernel stack management routines. |
30 | */ |
31 | |
32 | #include <mach/mach_host.h> |
33 | #include <mach/mach_types.h> |
34 | #include <mach/processor_set.h> |
35 | |
36 | #include <kern/kern_types.h> |
37 | #include <kern/lock_group.h> |
38 | #include <kern/mach_param.h> |
39 | #include <kern/misc_protos.h> |
40 | #include <kern/percpu.h> |
41 | #include <kern/processor.h> |
42 | #include <kern/thread.h> |
43 | #include <kern/zalloc.h> |
44 | #include <kern/kalloc.h> |
45 | #include <kern/ledger.h> |
46 | |
47 | #include <vm/vm_map.h> |
48 | #include <vm/vm_kern.h> |
49 | |
50 | #include <san/kasan.h> |
51 | |
52 | /* |
53 | * We allocate stacks from generic kernel VM. |
54 | * |
55 | * The stack_free_list can only be accessed at splsched, |
56 | * because stack_alloc_try/thread_invoke operate at splsched. |
57 | */ |
58 | |
59 | static SIMPLE_LOCK_DECLARE(stack_lock_data, 0); |
60 | #define stack_lock() simple_lock(&stack_lock_data, LCK_GRP_NULL) |
61 | #define stack_unlock() simple_unlock(&stack_lock_data) |
62 | |
63 | #define STACK_CACHE_SIZE 2 |
64 | |
65 | static vm_offset_t stack_free_list; |
66 | |
67 | static unsigned int stack_free_count, stack_free_hiwat; /* free list count */ |
68 | static unsigned int stack_hiwat; |
69 | unsigned int stack_total; /* current total count */ |
70 | unsigned long long stack_allocs; /* total count of allocations */ |
71 | |
72 | static unsigned int stack_free_target; |
73 | static int stack_free_delta; |
74 | |
75 | static unsigned int stack_new_count; /* total new stack allocations */ |
76 | |
77 | static SECURITY_READ_ONLY_LATE(vm_offset_t) stack_addr_mask; |
78 | SECURITY_READ_ONLY_LATE(vm_offset_t) kernel_stack_size; |
79 | SECURITY_READ_ONLY_LATE(vm_offset_t) kernel_stack_mask; |
80 | vm_offset_t kernel_stack_depth_max; |
81 | |
82 | struct stack_cache { |
83 | vm_offset_t free; |
84 | unsigned int count; |
85 | }; |
86 | static struct stack_cache PERCPU_DATA(stack_cache); |
87 | |
88 | /* |
89 | * The next field is at the base of the stack, |
90 | * so the low end is left unsullied. |
91 | */ |
92 | #define stack_next(stack) \ |
93 | (*((vm_offset_t *)((stack) + kernel_stack_size) - 1)) |
94 | |
95 | static inline vm_offset_t |
96 | roundup_pow2(vm_offset_t size) |
97 | { |
98 | if ((size & (size - 1)) == 0) { |
99 | /* if size is a power of 2 we're good */ |
100 | return size; |
101 | } |
102 | |
103 | return 1ul << flsll(mask: size); |
104 | } |
105 | |
106 | static vm_offset_t stack_alloc_internal(void); |
107 | static void stack_free_stack(vm_offset_t); |
108 | |
109 | static void |
110 | stack_init(void) |
111 | { |
112 | uint32_t kernel_stack_pages = atop(KERNEL_STACK_SIZE); |
113 | |
114 | kernel_stack_size = KERNEL_STACK_SIZE; |
115 | kernel_stack_mask = -KERNEL_STACK_SIZE; |
116 | |
117 | if (PE_parse_boot_argn(arg_string: "kernel_stack_pages" , |
118 | arg_ptr: &kernel_stack_pages, |
119 | max_arg: sizeof(kernel_stack_pages))) { |
120 | kernel_stack_size = kernel_stack_pages * PAGE_SIZE; |
121 | } |
122 | |
123 | if (kernel_stack_size < round_page(x: kernel_stack_size)) { |
124 | panic("stack_init: stack size %p not a multiple of page size %d" , |
125 | (void *) kernel_stack_size, PAGE_SIZE); |
126 | } |
127 | |
128 | stack_addr_mask = roundup_pow2(size: kernel_stack_size) - 1; |
129 | kernel_stack_mask = ~stack_addr_mask; |
130 | } |
131 | STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, stack_init); |
132 | |
133 | /* |
134 | * stack_alloc: |
135 | * |
136 | * Allocate a stack for a thread, may |
137 | * block. |
138 | */ |
139 | |
140 | static vm_offset_t |
141 | stack_alloc_internal(void) |
142 | { |
143 | vm_offset_t stack = 0; |
144 | spl_t s; |
145 | kma_flags_t flags = KMA_NOFAIL | KMA_GUARD_FIRST | KMA_GUARD_LAST | |
146 | KMA_KSTACK | KMA_KOBJECT | KMA_ZERO | KMA_SPRAYQTN; |
147 | |
148 | s = splsched(); |
149 | stack_lock(); |
150 | stack_allocs++; |
151 | stack = stack_free_list; |
152 | if (stack != 0) { |
153 | stack_free_list = stack_next(stack); |
154 | stack_free_count--; |
155 | } else { |
156 | if (++stack_total > stack_hiwat) { |
157 | stack_hiwat = stack_total; |
158 | } |
159 | stack_new_count++; |
160 | } |
161 | stack_free_delta--; |
162 | stack_unlock(); |
163 | splx(s); |
164 | |
165 | if (stack == 0) { |
166 | /* |
167 | * Request guard pages on either side of the stack. Ask |
168 | * kernel_memory_allocate() for two extra pages to account |
169 | * for these. |
170 | */ |
171 | |
172 | kernel_memory_allocate(map: kernel_map, addrp: &stack, |
173 | size: kernel_stack_size + ptoa(2), mask: stack_addr_mask, |
174 | flags, VM_KERN_MEMORY_STACK); |
175 | |
176 | /* |
177 | * The stack address that comes back is the address of the lower |
178 | * guard page. Skip past it to get the actual stack base address. |
179 | */ |
180 | |
181 | stack += PAGE_SIZE; |
182 | } |
183 | return stack; |
184 | } |
185 | |
186 | void |
187 | stack_alloc( |
188 | thread_t thread) |
189 | { |
190 | assert(thread->kernel_stack == 0); |
191 | machine_stack_attach(thread, stack: stack_alloc_internal()); |
192 | } |
193 | |
194 | void |
195 | stack_handoff(thread_t from, thread_t to) |
196 | { |
197 | assert(from == current_thread()); |
198 | machine_stack_handoff(old: from, new: to); |
199 | } |
200 | |
201 | /* |
202 | * stack_free: |
203 | * |
204 | * Detach and free the stack for a thread. |
205 | */ |
206 | void |
207 | stack_free( |
208 | thread_t thread) |
209 | { |
210 | vm_offset_t stack = machine_stack_detach(thread); |
211 | |
212 | assert(stack); |
213 | if (stack != thread->reserved_stack) { |
214 | stack_free_stack(stack); |
215 | } |
216 | } |
217 | |
218 | void |
219 | stack_free_reserved( |
220 | thread_t thread) |
221 | { |
222 | if (thread->reserved_stack != thread->kernel_stack) { |
223 | stack_free_stack(thread->reserved_stack); |
224 | } |
225 | } |
226 | |
227 | static void |
228 | stack_free_stack( |
229 | vm_offset_t stack) |
230 | { |
231 | struct stack_cache *cache; |
232 | spl_t s; |
233 | |
234 | #if KASAN_DEBUG |
235 | /* Sanity check - stack should be unpoisoned by now */ |
236 | assert(kasan_check_shadow(stack, kernel_stack_size, 0)); |
237 | #endif |
238 | |
239 | s = splsched(); |
240 | cache = PERCPU_GET(stack_cache); |
241 | if (cache->count < STACK_CACHE_SIZE) { |
242 | stack_next(stack) = cache->free; |
243 | cache->free = stack; |
244 | cache->count++; |
245 | } else { |
246 | stack_lock(); |
247 | stack_next(stack) = stack_free_list; |
248 | stack_free_list = stack; |
249 | if (++stack_free_count > stack_free_hiwat) { |
250 | stack_free_hiwat = stack_free_count; |
251 | } |
252 | stack_free_delta++; |
253 | stack_unlock(); |
254 | } |
255 | splx(s); |
256 | } |
257 | |
258 | /* |
259 | * stack_alloc_try: |
260 | * |
261 | * Non-blocking attempt to allocate a |
262 | * stack for a thread. |
263 | * |
264 | * Returns TRUE on success. |
265 | * |
266 | * Called at splsched. |
267 | */ |
268 | boolean_t |
269 | stack_alloc_try( |
270 | thread_t thread) |
271 | { |
272 | struct stack_cache *cache; |
273 | vm_offset_t stack; |
274 | |
275 | cache = PERCPU_GET(stack_cache); |
276 | stack = cache->free; |
277 | if (stack != 0) { |
278 | cache->free = stack_next(stack); |
279 | cache->count--; |
280 | } else { |
281 | if (stack_free_list != 0) { |
282 | stack_lock(); |
283 | stack = stack_free_list; |
284 | if (stack != 0) { |
285 | stack_free_list = stack_next(stack); |
286 | stack_free_count--; |
287 | stack_free_delta--; |
288 | } |
289 | stack_unlock(); |
290 | } |
291 | } |
292 | |
293 | if (stack != 0 || (stack = thread->reserved_stack) != 0) { |
294 | machine_stack_attach(thread, stack); |
295 | return TRUE; |
296 | } |
297 | |
298 | return FALSE; |
299 | } |
300 | |
301 | static unsigned int stack_collect_tick, last_stack_tick; |
302 | |
303 | /* |
304 | * stack_collect: |
305 | * |
306 | * Free excess kernel stacks, may |
307 | * block. |
308 | */ |
309 | void |
310 | stack_collect(void) |
311 | { |
312 | if (stack_collect_tick != last_stack_tick) { |
313 | unsigned int target; |
314 | vm_offset_t stack; |
315 | spl_t s; |
316 | |
317 | s = splsched(); |
318 | stack_lock(); |
319 | |
320 | target = stack_free_target + (STACK_CACHE_SIZE * processor_count); |
321 | target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; |
322 | |
323 | while (stack_free_count > target) { |
324 | stack = stack_free_list; |
325 | stack_free_list = stack_next(stack); |
326 | stack_free_count--; stack_total--; |
327 | stack_unlock(); |
328 | splx(s); |
329 | |
330 | /* |
331 | * Get the stack base address, then decrement by one page |
332 | * to account for the lower guard page. Add two extra pages |
333 | * to the size to account for the guard pages on both ends |
334 | * that were originally requested when the stack was allocated |
335 | * back in stack_alloc(). |
336 | */ |
337 | |
338 | stack = (vm_offset_t)vm_map_trunc_page( |
339 | stack, |
340 | VM_MAP_PAGE_MASK(kernel_map)); |
341 | stack -= PAGE_SIZE; |
342 | kmem_free(map: kernel_map, addr: stack, size: kernel_stack_size + ptoa(2)); |
343 | stack = 0; |
344 | |
345 | s = splsched(); |
346 | stack_lock(); |
347 | |
348 | target = stack_free_target + (STACK_CACHE_SIZE * processor_count); |
349 | target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; |
350 | } |
351 | |
352 | last_stack_tick = stack_collect_tick; |
353 | |
354 | stack_unlock(); |
355 | splx(s); |
356 | } |
357 | } |
358 | |
359 | /* |
360 | * compute_stack_target: |
361 | * |
362 | * Computes a new target free list count |
363 | * based on recent alloc / free activity. |
364 | * |
365 | * Limits stack collection to once per |
366 | * computation period. |
367 | */ |
368 | void |
369 | compute_stack_target( |
370 | __unused void *arg) |
371 | { |
372 | spl_t s; |
373 | |
374 | s = splsched(); |
375 | stack_lock(); |
376 | |
377 | if (stack_free_target > 5) { |
378 | stack_free_target = (4 * stack_free_target) / 5; |
379 | } else if (stack_free_target > 0) { |
380 | stack_free_target--; |
381 | } |
382 | |
383 | stack_free_target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; |
384 | |
385 | stack_free_delta = 0; |
386 | stack_collect_tick++; |
387 | |
388 | stack_unlock(); |
389 | splx(s); |
390 | } |
391 | |
392 | /* OBSOLETE */ |
393 | void stack_privilege( |
394 | thread_t thread); |
395 | |
396 | void |
397 | stack_privilege( |
398 | __unused thread_t thread) |
399 | { |
400 | /* OBSOLETE */ |
401 | } |
402 | |
403 | /* |
404 | * Return info on stack usage for threads in a specific processor set |
405 | */ |
406 | kern_return_t |
407 | processor_set_stack_usage( |
408 | processor_set_t pset, |
409 | unsigned int *totalp, |
410 | vm_size_t *spacep, |
411 | vm_size_t *residentp, |
412 | vm_size_t *maxusagep, |
413 | vm_offset_t *maxstackp) |
414 | { |
415 | #if DEVELOPMENT || DEBUG |
416 | unsigned int total = 0; |
417 | thread_t thread; |
418 | |
419 | if (pset == PROCESSOR_SET_NULL || pset != &pset0) { |
420 | return KERN_INVALID_ARGUMENT; |
421 | } |
422 | |
423 | lck_mtx_lock(&tasks_threads_lock); |
424 | |
425 | queue_iterate(&threads, thread, thread_t, threads) { |
426 | total += (thread->kernel_stack != 0); |
427 | } |
428 | |
429 | lck_mtx_unlock(&tasks_threads_lock); |
430 | |
431 | *totalp = total; |
432 | *residentp = *spacep = total * round_page(kernel_stack_size); |
433 | *maxusagep = 0; |
434 | *maxstackp = 0; |
435 | return KERN_SUCCESS; |
436 | |
437 | #else |
438 | #pragma unused(pset, totalp, spacep, residentp, maxusagep, maxstackp) |
439 | return KERN_NOT_SUPPORTED; |
440 | #endif /* DEVELOPMENT || DEBUG */ |
441 | } |
442 | |
443 | vm_offset_t |
444 | min_valid_stack_address(void) |
445 | { |
446 | return (vm_offset_t)vm_map_min(kernel_map); |
447 | } |
448 | |
449 | vm_offset_t |
450 | max_valid_stack_address(void) |
451 | { |
452 | return (vm_offset_t)vm_map_max(kernel_map); |
453 | } |
454 | |