1 | /* |
2 | * Copyright (c) 2007 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <kern/affinity.h> |
30 | #include <kern/task.h> |
31 | #include <kern/kalloc.h> |
32 | #include <machine/cpu_affinity.h> |
33 | |
34 | /* |
35 | * Affinity involves 2 objects: |
36 | * - affinity namespace: |
37 | * shared by a task family, this controls affinity tag lookup and |
38 | * allocation; it anchors all affinity sets in one namespace |
39 | * - affinity set: |
40 | * anchors all threads with membership of this affinity set |
41 | * and which share an affinity tag in the owning namespace. |
42 | * |
43 | * Locking: |
44 | * - The task lock protects the creation of an affinity namespace. |
45 | * - The affinity namespace mutex protects the inheritance of a namespace |
46 | * and its thread membership. This includes its destruction when the task |
47 | * reference count goes to zero. |
48 | * - The thread mutex protects a thread's affinity set membership, but in |
49 | * addition, the thread_lock is taken to write thread->affinity_set since this |
50 | * field (representng the active affinity set) is read by the scheduler. |
51 | * |
52 | * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock. |
53 | */ |
54 | |
55 | #if AFFINITY_DEBUG |
56 | #define DBG(x...) kprintf("DBG: " x) |
57 | #else |
58 | #define DBG(x...) |
59 | #endif |
60 | |
61 | struct affinity_space { |
62 | lck_mtx_t aspc_lock; |
63 | uint32_t aspc_task_count; |
64 | queue_head_t aspc_affinities; |
65 | }; |
66 | typedef struct affinity_space *affinity_space_t; |
67 | |
68 | static affinity_space_t affinity_space_alloc(void); |
69 | static void affinity_space_free(affinity_space_t aspc); |
70 | static affinity_set_t affinity_set_alloc(void); |
71 | static void affinity_set_free(affinity_set_t aset); |
72 | static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag); |
73 | static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset); |
74 | static void affinity_set_add(affinity_set_t aset, thread_t thread); |
75 | static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread); |
76 | |
77 | /* |
78 | * The following globals may be modified by the sysctls |
79 | * kern.affinity_sets_enabled - disables hinting if cleared |
80 | * kern.affinity_sets_mapping - controls cache distribution policy |
81 | * See bsd/kern_sysctl.c |
82 | * |
83 | * Affinity sets are not used on embedded, which typically only |
84 | * has a single pset, and last-processor affinity is |
85 | * more important than pset affinity. |
86 | */ |
87 | #if !defined(XNU_TARGET_OS_OSX) |
88 | boolean_t affinity_sets_enabled = FALSE; |
89 | int affinity_sets_mapping = 0; |
90 | #else /* !defined(XNU_TARGET_OS_OSX) */ |
91 | boolean_t affinity_sets_enabled = TRUE; |
92 | int affinity_sets_mapping = 1; |
93 | #endif /* !defined(XNU_TARGET_OS_OSX) */ |
94 | |
95 | boolean_t |
96 | thread_affinity_is_supported(void) |
97 | { |
98 | return ml_get_max_affinity_sets() != 0; |
99 | } |
100 | |
101 | |
102 | /* |
103 | * thread_affinity_get() |
104 | * Return the affinity tag for a thread. |
105 | * Called with the thread mutex held. |
106 | */ |
107 | uint32_t |
108 | thread_affinity_get(thread_t thread) |
109 | { |
110 | uint32_t tag; |
111 | |
112 | if (thread->affinity_set != NULL) { |
113 | tag = thread->affinity_set->aset_tag; |
114 | } else { |
115 | tag = THREAD_AFFINITY_TAG_NULL; |
116 | } |
117 | |
118 | return tag; |
119 | } |
120 | |
121 | |
122 | /* |
123 | * thread_affinity_set() |
124 | * Place a thread in an affinity set identified by a tag. |
125 | * Called with thread referenced but not locked. |
126 | */ |
127 | kern_return_t |
128 | thread_affinity_set(thread_t thread, uint32_t tag) |
129 | { |
130 | task_t task = get_threadtask(thread); |
131 | affinity_set_t aset; |
132 | affinity_set_t empty_aset = NULL; |
133 | affinity_space_t aspc; |
134 | affinity_space_t new_aspc = NULL; |
135 | |
136 | DBG("thread_affinity_set(%p,%u)\n" , thread, tag); |
137 | |
138 | task_lock(task); |
139 | aspc = task->affinity_space; |
140 | if (aspc == NULL) { |
141 | task_unlock(task); |
142 | new_aspc = affinity_space_alloc(); |
143 | if (new_aspc == NULL) { |
144 | return KERN_RESOURCE_SHORTAGE; |
145 | } |
146 | task_lock(task); |
147 | if (task->affinity_space == NULL) { |
148 | task->affinity_space = new_aspc; |
149 | new_aspc = NULL; |
150 | } |
151 | aspc = task->affinity_space; |
152 | } |
153 | task_unlock(task); |
154 | if (new_aspc) { |
155 | affinity_space_free(aspc: new_aspc); |
156 | } |
157 | |
158 | thread_mtx_lock(thread); |
159 | if (!thread->active) { |
160 | /* Beaten to lock and the thread is dead */ |
161 | thread_mtx_unlock(thread); |
162 | return KERN_TERMINATED; |
163 | } |
164 | |
165 | lck_mtx_lock(lck: &aspc->aspc_lock); |
166 | aset = thread->affinity_set; |
167 | if (aset != NULL) { |
168 | /* |
169 | * Remove thread from current affinity set |
170 | */ |
171 | DBG("thread_affinity_set(%p,%u) removing from aset %p\n" , |
172 | thread, tag, aset); |
173 | empty_aset = affinity_set_remove(aset, thread); |
174 | } |
175 | |
176 | if (tag != THREAD_AFFINITY_TAG_NULL) { |
177 | aset = affinity_set_find(aspc, tag); |
178 | if (aset != NULL) { |
179 | /* |
180 | * Add thread to existing affinity set |
181 | */ |
182 | DBG("thread_affinity_set(%p,%u) found aset %p\n" , |
183 | thread, tag, aset); |
184 | } else { |
185 | /* |
186 | * Use the new affinity set, add this thread |
187 | * and place it in a suitable processor set. |
188 | */ |
189 | if (empty_aset != NULL) { |
190 | aset = empty_aset; |
191 | empty_aset = NULL; |
192 | } else { |
193 | aset = affinity_set_alloc(); |
194 | if (aset == NULL) { |
195 | lck_mtx_unlock(lck: &aspc->aspc_lock); |
196 | thread_mtx_unlock(thread); |
197 | return KERN_RESOURCE_SHORTAGE; |
198 | } |
199 | } |
200 | DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n" , |
201 | thread, tag, aset); |
202 | aset->aset_tag = tag; |
203 | affinity_set_place(aspc, aset); |
204 | } |
205 | affinity_set_add(aset, thread); |
206 | } |
207 | |
208 | lck_mtx_unlock(lck: &aspc->aspc_lock); |
209 | thread_mtx_unlock(thread); |
210 | |
211 | /* |
212 | * If we wound up not using an empty aset we created, |
213 | * free it here. |
214 | */ |
215 | if (empty_aset != NULL) { |
216 | affinity_set_free(aset: empty_aset); |
217 | } |
218 | |
219 | if (thread == current_thread()) { |
220 | thread_block(THREAD_CONTINUE_NULL); |
221 | } |
222 | |
223 | return KERN_SUCCESS; |
224 | } |
225 | |
226 | /* |
227 | * task_affinity_create() |
228 | * Called from task create. |
229 | */ |
230 | void |
231 | task_affinity_create(task_t parent_task, task_t child_task) |
232 | { |
233 | affinity_space_t aspc = parent_task->affinity_space; |
234 | |
235 | DBG("task_affinity_create(%p,%p)\n" , parent_task, child_task); |
236 | |
237 | assert(aspc); |
238 | |
239 | /* |
240 | * Bump the task reference count on the shared namespace and |
241 | * give it to the child. |
242 | */ |
243 | lck_mtx_lock(lck: &aspc->aspc_lock); |
244 | aspc->aspc_task_count++; |
245 | child_task->affinity_space = aspc; |
246 | lck_mtx_unlock(lck: &aspc->aspc_lock); |
247 | } |
248 | |
249 | /* |
250 | * task_affinity_deallocate() |
251 | * Called from task_deallocate() when there's a namespace to dereference. |
252 | */ |
253 | void |
254 | task_affinity_deallocate(task_t task) |
255 | { |
256 | affinity_space_t aspc = task->affinity_space; |
257 | |
258 | DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n" , |
259 | task, aspc, aspc->aspc_task_count); |
260 | |
261 | lck_mtx_lock(lck: &aspc->aspc_lock); |
262 | if (--(aspc->aspc_task_count) == 0) { |
263 | assert(queue_empty(&aspc->aspc_affinities)); |
264 | lck_mtx_unlock(lck: &aspc->aspc_lock); |
265 | affinity_space_free(aspc); |
266 | } else { |
267 | lck_mtx_unlock(lck: &aspc->aspc_lock); |
268 | } |
269 | } |
270 | |
271 | /* |
272 | * task_affinity_info() |
273 | * Return affinity tag info (number, min, max) for the task. |
274 | * |
275 | * Conditions: task is locked. |
276 | */ |
277 | kern_return_t |
278 | task_affinity_info( |
279 | task_t task, |
280 | task_info_t task_info_out, |
281 | mach_msg_type_number_t *task_info_count) |
282 | { |
283 | affinity_set_t aset; |
284 | affinity_space_t aspc; |
285 | task_affinity_tag_info_t info; |
286 | |
287 | *task_info_count = TASK_AFFINITY_TAG_INFO_COUNT; |
288 | info = (task_affinity_tag_info_t) task_info_out; |
289 | info->set_count = 0; |
290 | info->task_count = 0; |
291 | info->min = THREAD_AFFINITY_TAG_NULL; |
292 | info->max = THREAD_AFFINITY_TAG_NULL; |
293 | |
294 | aspc = task->affinity_space; |
295 | if (aspc) { |
296 | lck_mtx_lock(lck: &aspc->aspc_lock); |
297 | queue_iterate(&aspc->aspc_affinities, |
298 | aset, affinity_set_t, aset_affinities) { |
299 | info->set_count++; |
300 | if (info->min == THREAD_AFFINITY_TAG_NULL || |
301 | aset->aset_tag < (uint32_t) info->min) { |
302 | info->min = aset->aset_tag; |
303 | } |
304 | if (info->max == THREAD_AFFINITY_TAG_NULL || |
305 | aset->aset_tag > (uint32_t) info->max) { |
306 | info->max = aset->aset_tag; |
307 | } |
308 | } |
309 | info->task_count = aspc->aspc_task_count; |
310 | lck_mtx_unlock(lck: &aspc->aspc_lock); |
311 | } |
312 | return KERN_SUCCESS; |
313 | } |
314 | |
315 | /* |
316 | * Called from thread_dup() during fork() with child's mutex held. |
317 | * Set the child into the parent's affinity set. |
318 | * Note the affinity space is shared. |
319 | */ |
320 | void |
321 | thread_affinity_dup(thread_t parent, thread_t child) |
322 | { |
323 | affinity_set_t aset; |
324 | affinity_space_t aspc; |
325 | |
326 | thread_mtx_lock(thread: parent); |
327 | aset = parent->affinity_set; |
328 | DBG("thread_affinity_dup(%p,%p) aset %p\n" , parent, child, aset); |
329 | if (aset == NULL) { |
330 | thread_mtx_unlock(thread: parent); |
331 | return; |
332 | } |
333 | |
334 | aspc = aset->aset_space; |
335 | assert(aspc == get_threadtask(parent)->affinity_space); |
336 | assert(aspc == get_threadtask(child)->affinity_space); |
337 | |
338 | lck_mtx_lock(lck: &aspc->aspc_lock); |
339 | affinity_set_add(aset, thread: child); |
340 | lck_mtx_unlock(lck: &aspc->aspc_lock); |
341 | |
342 | thread_mtx_unlock(thread: parent); |
343 | } |
344 | |
345 | /* |
346 | * thread_affinity_terminate() |
347 | * Remove thread from any affinity set. |
348 | * Called with the thread mutex locked. |
349 | */ |
350 | void |
351 | thread_affinity_terminate(thread_t thread) |
352 | { |
353 | affinity_set_t aset = thread->affinity_set; |
354 | affinity_space_t aspc; |
355 | |
356 | DBG("thread_affinity_terminate(%p)\n" , thread); |
357 | |
358 | aspc = aset->aset_space; |
359 | lck_mtx_lock(lck: &aspc->aspc_lock); |
360 | if (affinity_set_remove(aset, thread)) { |
361 | affinity_set_free(aset); |
362 | } |
363 | lck_mtx_unlock(lck: &aspc->aspc_lock); |
364 | } |
365 | |
366 | /* |
367 | * thread_affinity_exec() |
368 | * Called from execve() to cancel any current affinity - a new image implies |
369 | * the calling thread terminates any expressed or inherited affinity. |
370 | */ |
371 | void |
372 | thread_affinity_exec(thread_t thread) |
373 | { |
374 | if (thread->affinity_set != AFFINITY_SET_NULL) { |
375 | thread_affinity_terminate(thread); |
376 | } |
377 | } |
378 | |
379 | /* |
380 | * Create an empty affinity namespace data structure. |
381 | */ |
382 | static affinity_space_t |
383 | affinity_space_alloc(void) |
384 | { |
385 | affinity_space_t aspc; |
386 | |
387 | aspc = kalloc_type(struct affinity_space, Z_WAITOK | Z_NOFAIL); |
388 | |
389 | lck_mtx_init(lck: &aspc->aspc_lock, grp: &task_lck_grp, attr: &task_lck_attr); |
390 | queue_init(&aspc->aspc_affinities); |
391 | aspc->aspc_task_count = 1; |
392 | |
393 | DBG("affinity_space_create() returns %p\n" , aspc); |
394 | return aspc; |
395 | } |
396 | |
397 | /* |
398 | * Destroy the given empty affinity namespace data structure. |
399 | */ |
400 | static void |
401 | affinity_space_free(affinity_space_t aspc) |
402 | { |
403 | assert(queue_empty(&aspc->aspc_affinities)); |
404 | |
405 | lck_mtx_destroy(lck: &aspc->aspc_lock, grp: &task_lck_grp); |
406 | DBG("affinity_space_free(%p)\n" , aspc); |
407 | kfree_type(struct affinity_space, aspc); |
408 | } |
409 | |
410 | |
411 | /* |
412 | * Create an empty affinity set data structure |
413 | * entering it into a list anchored by the owning task. |
414 | */ |
415 | static affinity_set_t |
416 | affinity_set_alloc(void) |
417 | { |
418 | affinity_set_t aset; |
419 | |
420 | aset = kalloc_type(struct affinity_set, Z_WAITOK | Z_NOFAIL); |
421 | |
422 | aset->aset_thread_count = 0; |
423 | queue_init(&aset->aset_affinities); |
424 | queue_init(&aset->aset_threads); |
425 | aset->aset_num = 0; |
426 | aset->aset_pset = PROCESSOR_SET_NULL; |
427 | aset->aset_space = NULL; |
428 | |
429 | DBG("affinity_set_create() returns %p\n" , aset); |
430 | return aset; |
431 | } |
432 | |
433 | /* |
434 | * Destroy the given empty affinity set data structure |
435 | * after removing it from the parent task. |
436 | */ |
437 | static void |
438 | affinity_set_free(affinity_set_t aset) |
439 | { |
440 | assert(queue_empty(&aset->aset_threads)); |
441 | |
442 | DBG("affinity_set_free(%p)\n" , aset); |
443 | kfree_type(struct affinity_set, aset); |
444 | } |
445 | |
446 | /* |
447 | * Add a thread to an affinity set. |
448 | * The caller must have the thread mutex and space locked. |
449 | */ |
450 | static void |
451 | affinity_set_add(affinity_set_t aset, thread_t thread) |
452 | { |
453 | spl_t s; |
454 | |
455 | DBG("affinity_set_add(%p,%p)\n" , aset, thread); |
456 | queue_enter(&aset->aset_threads, |
457 | thread, thread_t, affinity_threads); |
458 | aset->aset_thread_count++; |
459 | s = splsched(); |
460 | thread_lock(thread); |
461 | thread->affinity_set = aset; |
462 | thread_unlock(thread); |
463 | splx(s); |
464 | } |
465 | |
466 | /* |
467 | * Remove a thread from an affinity set returning the set if now empty. |
468 | * The caller must have the thread mutex and space locked. |
469 | */ |
470 | static affinity_set_t |
471 | affinity_set_remove(affinity_set_t aset, thread_t thread) |
472 | { |
473 | spl_t s; |
474 | |
475 | s = splsched(); |
476 | thread_lock(thread); |
477 | thread->affinity_set = NULL; |
478 | thread_unlock(thread); |
479 | splx(s); |
480 | |
481 | aset->aset_thread_count--; |
482 | queue_remove(&aset->aset_threads, |
483 | thread, thread_t, affinity_threads); |
484 | if (queue_empty(&aset->aset_threads)) { |
485 | queue_remove(&aset->aset_space->aspc_affinities, |
486 | aset, affinity_set_t, aset_affinities); |
487 | assert(aset->aset_thread_count == 0); |
488 | aset->aset_tag = THREAD_AFFINITY_TAG_NULL; |
489 | aset->aset_num = 0; |
490 | aset->aset_pset = PROCESSOR_SET_NULL; |
491 | aset->aset_space = NULL; |
492 | DBG("affinity_set_remove(%p,%p) set now empty\n" , aset, thread); |
493 | return aset; |
494 | } else { |
495 | DBG("affinity_set_remove(%p,%p)\n" , aset, thread); |
496 | return NULL; |
497 | } |
498 | } |
499 | |
500 | /* |
501 | * Find an affinity set in the parent task with the given affinity tag. |
502 | * The caller must have the space locked. |
503 | */ |
504 | static affinity_set_t |
505 | affinity_set_find(affinity_space_t space, uint32_t tag) |
506 | { |
507 | affinity_set_t aset; |
508 | |
509 | queue_iterate(&space->aspc_affinities, |
510 | aset, affinity_set_t, aset_affinities) { |
511 | if (aset->aset_tag == tag) { |
512 | DBG("affinity_set_find(%p,%u) finds %p\n" , |
513 | space, tag, aset); |
514 | return aset; |
515 | } |
516 | } |
517 | DBG("affinity_set_find(%p,%u) not found\n" , space, tag); |
518 | return NULL; |
519 | } |
520 | |
521 | /* |
522 | * affinity_set_place() assigns an affinity set to a suitable processor_set. |
523 | * The selection criteria is: |
524 | * - the set currently occupied by the least number of affinities |
525 | * belonging to the owning the task. |
526 | * The caller must have the space locked. |
527 | */ |
528 | static void |
529 | affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset) |
530 | { |
531 | unsigned short set_occupancy[MAX_CPUS] = { 0 }; |
532 | unsigned num_cpu_asets = ml_get_max_affinity_sets(); |
533 | unsigned i_least_occupied; |
534 | affinity_set_t aset; |
535 | |
536 | if (__improbable(num_cpu_asets > MAX_CPUS)) { |
537 | // If this triggers then the array needs to be made bigger. |
538 | panic("num_cpu_asets = %d > %d too big in %s" , num_cpu_asets, MAX_CPUS, __FUNCTION__); |
539 | } |
540 | |
541 | /* |
542 | * Scan the affinity sets calculating the number of sets |
543 | * occupy the available physical affinities. |
544 | */ |
545 | queue_iterate(&aspc->aspc_affinities, |
546 | aset, affinity_set_t, aset_affinities) { |
547 | if (aset->aset_num < num_cpu_asets) { |
548 | set_occupancy[aset->aset_num]++; |
549 | } else { |
550 | panic("aset_num = %d in %s" , aset->aset_num, __FUNCTION__); |
551 | } |
552 | } |
553 | |
554 | /* |
555 | * Find the least occupied set (or the first empty set). |
556 | * To distribute placements somewhat, start searching from |
557 | * a cpu affinity chosen randomly per namespace: |
558 | * [(unsigned int)aspc % 127] % num_cpu_asets |
559 | * unless this mapping policy is overridden. |
560 | */ |
561 | if (affinity_sets_mapping == 0) { |
562 | i_least_occupied = 0; |
563 | } else { |
564 | i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets); |
565 | } |
566 | for (unsigned i = 0; i < num_cpu_asets; i++) { |
567 | unsigned int j = (i_least_occupied + i) % num_cpu_asets; |
568 | if (set_occupancy[j] == 0) { |
569 | i_least_occupied = j; |
570 | break; |
571 | } |
572 | if (set_occupancy[j] < set_occupancy[i_least_occupied]) { |
573 | i_least_occupied = j; |
574 | } |
575 | } |
576 | new_aset->aset_num = i_least_occupied; |
577 | new_aset->aset_pset = ml_affinity_to_pset(affinity_num: i_least_occupied); |
578 | |
579 | /* Add the new affinity set to the group */ |
580 | new_aset->aset_space = aspc; |
581 | queue_enter(&aspc->aspc_affinities, |
582 | new_aset, affinity_set_t, aset_affinities); |
583 | |
584 | DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n" , |
585 | aspc, new_aset, new_aset->aset_num, new_aset->aset_pset); |
586 | } |
587 | |