1 | /* |
2 | * Copyright (c) 2007 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <kern/affinity.h> |
30 | #include <kern/task.h> |
31 | #include <kern/kalloc.h> |
32 | #include <machine/cpu_affinity.h> |
33 | |
34 | /* |
35 | * Affinity involves 2 objects: |
36 | * - affinity namespace: |
37 | * shared by a task family, this controls affinity tag lookup and |
38 | * allocation; it anchors all affinity sets in one namespace |
39 | * - affinity set: |
40 | * anchors all threads with membership of this affinity set |
41 | * and which share an affinity tag in the owning namespace. |
42 | * |
43 | * Locking: |
44 | * - The task lock protects the creation of an affinity namespace. |
45 | * - The affinity namespace mutex protects the inheritance of a namespace |
46 | * and its thread membership. This includes its destruction when the task |
47 | * reference count goes to zero. |
48 | * - The thread mutex protects a thread's affinity set membership, but in |
49 | * addition, the thread_lock is taken to write thread->affinity_set since this |
50 | * field (representng the active affinity set) is read by the scheduler. |
51 | * |
52 | * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock. |
53 | */ |
54 | |
55 | #if AFFINITY_DEBUG |
56 | #define DBG(x...) kprintf("DBG: " x) |
57 | #else |
58 | #define DBG(x...) |
59 | #endif |
60 | |
61 | struct affinity_space { |
62 | lck_mtx_t aspc_lock; |
63 | uint32_t aspc_task_count; |
64 | queue_head_t aspc_affinities; |
65 | }; |
66 | typedef struct affinity_space *affinity_space_t; |
67 | |
68 | static affinity_space_t affinity_space_alloc(void); |
69 | static void affinity_space_free(affinity_space_t aspc); |
70 | static affinity_set_t affinity_set_alloc(void); |
71 | static void affinity_set_free(affinity_set_t aset); |
72 | static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag); |
73 | static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset); |
74 | static void affinity_set_add(affinity_set_t aset, thread_t thread); |
75 | static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread); |
76 | |
77 | /* |
78 | * The following globals may be modified by the sysctls |
79 | * kern.affinity_sets_enabled - disables hinting if cleared |
80 | * kern.affinity_sets_mapping - controls cache distribution policy |
81 | * See bsd/kern_sysctl.c |
82 | * |
83 | * Affinity sets are not used on embedded, which typically only |
84 | * has a single pset, and last-processor affinity is |
85 | * more important than pset affinity. |
86 | */ |
87 | #if CONFIG_EMBEDDED |
88 | boolean_t affinity_sets_enabled = FALSE; |
89 | int affinity_sets_mapping = 0; |
90 | #else /* !CONFIG_EMBEDDED */ |
91 | boolean_t affinity_sets_enabled = TRUE; |
92 | int affinity_sets_mapping = 1; |
93 | #endif /* !CONFIG_EMBEDDED */ |
94 | |
95 | boolean_t |
96 | thread_affinity_is_supported(void) |
97 | { |
98 | return (ml_get_max_affinity_sets() != 0); |
99 | } |
100 | |
101 | |
102 | /* |
103 | * thread_affinity_get() |
104 | * Return the affinity tag for a thread. |
105 | * Called with the thread mutex held. |
106 | */ |
107 | uint32_t |
108 | thread_affinity_get(thread_t thread) |
109 | { |
110 | uint32_t tag; |
111 | |
112 | if (thread->affinity_set != NULL) |
113 | tag = thread->affinity_set->aset_tag; |
114 | else |
115 | tag = THREAD_AFFINITY_TAG_NULL; |
116 | |
117 | return tag; |
118 | } |
119 | |
120 | |
121 | /* |
122 | * thread_affinity_set() |
123 | * Place a thread in an affinity set identified by a tag. |
124 | * Called with thread referenced but not locked. |
125 | */ |
126 | kern_return_t |
127 | thread_affinity_set(thread_t thread, uint32_t tag) |
128 | { |
129 | affinity_set_t aset; |
130 | affinity_set_t empty_aset = NULL; |
131 | affinity_space_t aspc; |
132 | affinity_space_t new_aspc = NULL; |
133 | |
134 | DBG("thread_affinity_set(%p,%u)\n" , thread, tag); |
135 | |
136 | task_lock(thread->task); |
137 | aspc = thread->task->affinity_space; |
138 | if (aspc == NULL) { |
139 | task_unlock(thread->task); |
140 | new_aspc = affinity_space_alloc(); |
141 | if (new_aspc == NULL) |
142 | return KERN_RESOURCE_SHORTAGE; |
143 | task_lock(thread->task); |
144 | if (thread->task->affinity_space == NULL) { |
145 | thread->task->affinity_space = new_aspc; |
146 | new_aspc = NULL; |
147 | } |
148 | aspc = thread->task->affinity_space; |
149 | } |
150 | task_unlock(thread->task); |
151 | if (new_aspc) |
152 | affinity_space_free(new_aspc); |
153 | |
154 | thread_mtx_lock(thread); |
155 | if (!thread->active) { |
156 | /* Beaten to lock and the thread is dead */ |
157 | thread_mtx_unlock(thread); |
158 | return KERN_TERMINATED; |
159 | } |
160 | |
161 | lck_mtx_lock(&aspc->aspc_lock); |
162 | aset = thread->affinity_set; |
163 | if (aset != NULL) { |
164 | /* |
165 | * Remove thread from current affinity set |
166 | */ |
167 | DBG("thread_affinity_set(%p,%u) removing from aset %p\n" , |
168 | thread, tag, aset); |
169 | empty_aset = affinity_set_remove(aset, thread); |
170 | } |
171 | |
172 | if (tag != THREAD_AFFINITY_TAG_NULL) { |
173 | aset = affinity_set_find(aspc, tag); |
174 | if (aset != NULL) { |
175 | /* |
176 | * Add thread to existing affinity set |
177 | */ |
178 | DBG("thread_affinity_set(%p,%u) found aset %p\n" , |
179 | thread, tag, aset); |
180 | } else { |
181 | /* |
182 | * Use the new affinity set, add this thread |
183 | * and place it in a suitable processor set. |
184 | */ |
185 | if (empty_aset != NULL) { |
186 | aset = empty_aset; |
187 | empty_aset = NULL; |
188 | } else { |
189 | aset = affinity_set_alloc(); |
190 | if (aset == NULL) { |
191 | lck_mtx_unlock(&aspc->aspc_lock); |
192 | thread_mtx_unlock(thread); |
193 | return KERN_RESOURCE_SHORTAGE; |
194 | } |
195 | } |
196 | DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n" , |
197 | thread, tag, aset); |
198 | aset->aset_tag = tag; |
199 | affinity_set_place(aspc, aset); |
200 | } |
201 | affinity_set_add(aset, thread); |
202 | } |
203 | |
204 | lck_mtx_unlock(&aspc->aspc_lock); |
205 | thread_mtx_unlock(thread); |
206 | |
207 | /* |
208 | * If we wound up not using an empty aset we created, |
209 | * free it here. |
210 | */ |
211 | if (empty_aset != NULL) |
212 | affinity_set_free(empty_aset); |
213 | |
214 | if (thread == current_thread()) |
215 | thread_block(THREAD_CONTINUE_NULL); |
216 | |
217 | return KERN_SUCCESS; |
218 | } |
219 | |
220 | /* |
221 | * task_affinity_create() |
222 | * Called from task create. |
223 | */ |
224 | void |
225 | task_affinity_create(task_t parent_task, task_t child_task) |
226 | { |
227 | affinity_space_t aspc = parent_task->affinity_space; |
228 | |
229 | DBG("task_affinity_create(%p,%p)\n" , parent_task, child_task); |
230 | |
231 | assert(aspc); |
232 | |
233 | /* |
234 | * Bump the task reference count on the shared namespace and |
235 | * give it to the child. |
236 | */ |
237 | lck_mtx_lock(&aspc->aspc_lock); |
238 | aspc->aspc_task_count++; |
239 | child_task->affinity_space = aspc; |
240 | lck_mtx_unlock(&aspc->aspc_lock); |
241 | } |
242 | |
243 | /* |
244 | * task_affinity_deallocate() |
245 | * Called from task_deallocate() when there's a namespace to dereference. |
246 | */ |
247 | void |
248 | task_affinity_deallocate(task_t task) |
249 | { |
250 | affinity_space_t aspc = task->affinity_space; |
251 | |
252 | DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n" , |
253 | task, aspc, aspc->aspc_task_count); |
254 | |
255 | lck_mtx_lock(&aspc->aspc_lock); |
256 | if (--(aspc->aspc_task_count) == 0) { |
257 | assert(queue_empty(&aspc->aspc_affinities)); |
258 | lck_mtx_unlock(&aspc->aspc_lock); |
259 | affinity_space_free(aspc); |
260 | } else { |
261 | lck_mtx_unlock(&aspc->aspc_lock); |
262 | } |
263 | } |
264 | |
265 | /* |
266 | * task_affinity_info() |
267 | * Return affinity tag info (number, min, max) for the task. |
268 | * |
269 | * Conditions: task is locked. |
270 | */ |
271 | kern_return_t |
272 | task_affinity_info( |
273 | task_t task, |
274 | task_info_t task_info_out, |
275 | mach_msg_type_number_t *task_info_count) |
276 | { |
277 | affinity_set_t aset; |
278 | affinity_space_t aspc; |
279 | task_affinity_tag_info_t info; |
280 | |
281 | *task_info_count = TASK_AFFINITY_TAG_INFO_COUNT; |
282 | info = (task_affinity_tag_info_t) task_info_out; |
283 | info->set_count = 0; |
284 | info->task_count = 0; |
285 | info->min = THREAD_AFFINITY_TAG_NULL; |
286 | info->max = THREAD_AFFINITY_TAG_NULL; |
287 | |
288 | aspc = task->affinity_space; |
289 | if (aspc) { |
290 | lck_mtx_lock(&aspc->aspc_lock); |
291 | queue_iterate(&aspc->aspc_affinities, |
292 | aset, affinity_set_t, aset_affinities) { |
293 | info->set_count++; |
294 | if (info->min == THREAD_AFFINITY_TAG_NULL || |
295 | aset->aset_tag < (uint32_t) info->min) |
296 | info->min = aset->aset_tag; |
297 | if (info->max == THREAD_AFFINITY_TAG_NULL || |
298 | aset->aset_tag > (uint32_t) info->max) |
299 | info->max = aset->aset_tag; |
300 | } |
301 | info->task_count = aspc->aspc_task_count; |
302 | lck_mtx_unlock(&aspc->aspc_lock); |
303 | } |
304 | return KERN_SUCCESS; |
305 | } |
306 | |
307 | /* |
308 | * Called from thread_dup() during fork() with child's mutex held. |
309 | * Set the child into the parent's affinity set. |
310 | * Note the affinity space is shared. |
311 | */ |
312 | void |
313 | thread_affinity_dup(thread_t parent, thread_t child) |
314 | { |
315 | affinity_set_t aset; |
316 | affinity_space_t aspc; |
317 | |
318 | thread_mtx_lock(parent); |
319 | aset = parent->affinity_set; |
320 | DBG("thread_affinity_dup(%p,%p) aset %p\n" , parent, child, aset); |
321 | if (aset == NULL) { |
322 | thread_mtx_unlock(parent); |
323 | return; |
324 | } |
325 | |
326 | aspc = aset->aset_space; |
327 | assert(aspc == parent->task->affinity_space); |
328 | assert(aspc == child->task->affinity_space); |
329 | |
330 | lck_mtx_lock(&aspc->aspc_lock); |
331 | affinity_set_add(aset, child); |
332 | lck_mtx_unlock(&aspc->aspc_lock); |
333 | |
334 | thread_mtx_unlock(parent); |
335 | } |
336 | |
337 | /* |
338 | * thread_affinity_terminate() |
339 | * Remove thread from any affinity set. |
340 | * Called with the thread mutex locked. |
341 | */ |
342 | void |
343 | thread_affinity_terminate(thread_t thread) |
344 | { |
345 | affinity_set_t aset = thread->affinity_set; |
346 | affinity_space_t aspc; |
347 | |
348 | DBG("thread_affinity_terminate(%p)\n" , thread); |
349 | |
350 | aspc = aset->aset_space; |
351 | lck_mtx_lock(&aspc->aspc_lock); |
352 | if (affinity_set_remove(aset, thread)) { |
353 | affinity_set_free(aset); |
354 | } |
355 | lck_mtx_unlock(&aspc->aspc_lock); |
356 | } |
357 | |
358 | /* |
359 | * thread_affinity_exec() |
360 | * Called from execve() to cancel any current affinity - a new image implies |
361 | * the calling thread terminates any expressed or inherited affinity. |
362 | */ |
363 | void |
364 | thread_affinity_exec(thread_t thread) |
365 | { |
366 | if (thread->affinity_set != AFFINITY_SET_NULL) |
367 | thread_affinity_terminate(thread); |
368 | } |
369 | |
370 | /* |
371 | * Create an empty affinity namespace data structure. |
372 | */ |
373 | static affinity_space_t |
374 | affinity_space_alloc(void) |
375 | { |
376 | affinity_space_t aspc; |
377 | |
378 | aspc = (affinity_space_t) kalloc(sizeof(struct affinity_space)); |
379 | if (aspc == NULL) |
380 | return NULL; |
381 | |
382 | lck_mtx_init(&aspc->aspc_lock, &task_lck_grp, &task_lck_attr); |
383 | queue_init(&aspc->aspc_affinities); |
384 | aspc->aspc_task_count = 1; |
385 | |
386 | DBG("affinity_space_create() returns %p\n" , aspc); |
387 | return aspc; |
388 | } |
389 | |
390 | /* |
391 | * Destroy the given empty affinity namespace data structure. |
392 | */ |
393 | static void |
394 | affinity_space_free(affinity_space_t aspc) |
395 | { |
396 | assert(queue_empty(&aspc->aspc_affinities)); |
397 | |
398 | lck_mtx_destroy(&aspc->aspc_lock, &task_lck_grp); |
399 | DBG("affinity_space_free(%p)\n" , aspc); |
400 | kfree(aspc, sizeof(struct affinity_space)); |
401 | } |
402 | |
403 | |
404 | /* |
405 | * Create an empty affinity set data structure |
406 | * entering it into a list anchored by the owning task. |
407 | */ |
408 | static affinity_set_t |
409 | affinity_set_alloc(void) |
410 | { |
411 | affinity_set_t aset; |
412 | |
413 | aset = (affinity_set_t) kalloc(sizeof(struct affinity_set)); |
414 | if (aset == NULL) |
415 | return NULL; |
416 | |
417 | aset->aset_thread_count = 0; |
418 | queue_init(&aset->aset_affinities); |
419 | queue_init(&aset->aset_threads); |
420 | aset->aset_num = 0; |
421 | aset->aset_pset = PROCESSOR_SET_NULL; |
422 | aset->aset_space = NULL; |
423 | |
424 | DBG("affinity_set_create() returns %p\n" , aset); |
425 | return aset; |
426 | } |
427 | |
428 | /* |
429 | * Destroy the given empty affinity set data structure |
430 | * after removing it from the parent task. |
431 | */ |
432 | static void |
433 | affinity_set_free(affinity_set_t aset) |
434 | { |
435 | assert(queue_empty(&aset->aset_threads)); |
436 | |
437 | DBG("affinity_set_free(%p)\n" , aset); |
438 | kfree(aset, sizeof(struct affinity_set)); |
439 | } |
440 | |
441 | /* |
442 | * Add a thread to an affinity set. |
443 | * The caller must have the thread mutex and space locked. |
444 | */ |
445 | static void |
446 | affinity_set_add(affinity_set_t aset, thread_t thread) |
447 | { |
448 | spl_t s; |
449 | |
450 | DBG("affinity_set_add(%p,%p)\n" , aset, thread); |
451 | queue_enter(&aset->aset_threads, |
452 | thread, thread_t, affinity_threads); |
453 | aset->aset_thread_count++; |
454 | s = splsched(); |
455 | thread_lock(thread); |
456 | thread->affinity_set = affinity_sets_enabled ? aset : NULL; |
457 | thread_unlock(thread); |
458 | splx(s); |
459 | } |
460 | |
461 | /* |
462 | * Remove a thread from an affinity set returning the set if now empty. |
463 | * The caller must have the thread mutex and space locked. |
464 | */ |
465 | static affinity_set_t |
466 | affinity_set_remove(affinity_set_t aset, thread_t thread) |
467 | { |
468 | spl_t s; |
469 | |
470 | s = splsched(); |
471 | thread_lock(thread); |
472 | thread->affinity_set = NULL; |
473 | thread_unlock(thread); |
474 | splx(s); |
475 | |
476 | aset->aset_thread_count--; |
477 | queue_remove(&aset->aset_threads, |
478 | thread, thread_t, affinity_threads); |
479 | if (queue_empty(&aset->aset_threads)) { |
480 | queue_remove(&aset->aset_space->aspc_affinities, |
481 | aset, affinity_set_t, aset_affinities); |
482 | assert(aset->aset_thread_count == 0); |
483 | aset->aset_tag = THREAD_AFFINITY_TAG_NULL; |
484 | aset->aset_num = 0; |
485 | aset->aset_pset = PROCESSOR_SET_NULL; |
486 | aset->aset_space = NULL; |
487 | DBG("affinity_set_remove(%p,%p) set now empty\n" , aset, thread); |
488 | return aset; |
489 | } else { |
490 | DBG("affinity_set_remove(%p,%p)\n" , aset, thread); |
491 | return NULL; |
492 | } |
493 | } |
494 | |
495 | /* |
496 | * Find an affinity set in the parent task with the given affinity tag. |
497 | * The caller must have the space locked. |
498 | */ |
499 | static affinity_set_t |
500 | affinity_set_find(affinity_space_t space, uint32_t tag) |
501 | { |
502 | affinity_set_t aset; |
503 | |
504 | queue_iterate(&space->aspc_affinities, |
505 | aset, affinity_set_t, aset_affinities) { |
506 | if (aset->aset_tag == tag) { |
507 | DBG("affinity_set_find(%p,%u) finds %p\n" , |
508 | space, tag, aset); |
509 | return aset; |
510 | } |
511 | } |
512 | DBG("affinity_set_find(%p,%u) not found\n" , space, tag); |
513 | return NULL; |
514 | } |
515 | |
516 | /* |
517 | * affinity_set_place() assigns an affinity set to a suitable processor_set. |
518 | * The selection criteria is: |
519 | * - the set currently occupied by the least number of affinities |
520 | * belonging to the owning the task. |
521 | * The caller must have the space locked. |
522 | */ |
523 | static void |
524 | affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset) |
525 | { |
526 | unsigned int num_cpu_asets = ml_get_max_affinity_sets(); |
527 | unsigned int set_occupancy[num_cpu_asets]; |
528 | unsigned int i; |
529 | unsigned int i_least_occupied; |
530 | affinity_set_t aset; |
531 | |
532 | for (i = 0; i < num_cpu_asets; i++) |
533 | set_occupancy[i] = 0; |
534 | |
535 | /* |
536 | * Scan the affinity sets calculating the number of sets |
537 | * occupy the available physical affinities. |
538 | */ |
539 | queue_iterate(&aspc->aspc_affinities, |
540 | aset, affinity_set_t, aset_affinities) { |
541 | if(aset->aset_num < num_cpu_asets) |
542 | set_occupancy[aset->aset_num]++; |
543 | else |
544 | panic("aset_num = %d in %s\n" , aset->aset_num, __FUNCTION__); |
545 | } |
546 | |
547 | /* |
548 | * Find the least occupied set (or the first empty set). |
549 | * To distribute placements somewhat, start searching from |
550 | * a cpu affinity chosen randomly per namespace: |
551 | * [(unsigned int)aspc % 127] % num_cpu_asets |
552 | * unless this mapping policy is overridden. |
553 | */ |
554 | if (affinity_sets_mapping == 0) |
555 | i_least_occupied = 0; |
556 | else |
557 | i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets); |
558 | for (i = 0; i < num_cpu_asets; i++) { |
559 | unsigned int j = (i_least_occupied + i) % num_cpu_asets; |
560 | if (set_occupancy[j] == 0) { |
561 | i_least_occupied = j; |
562 | break; |
563 | } |
564 | if (set_occupancy[j] < set_occupancy[i_least_occupied]) |
565 | i_least_occupied = j; |
566 | } |
567 | new_aset->aset_num = i_least_occupied; |
568 | new_aset->aset_pset = ml_affinity_to_pset(i_least_occupied); |
569 | |
570 | /* Add the new affinity set to the group */ |
571 | new_aset->aset_space = aspc; |
572 | queue_enter(&aspc->aspc_affinities, |
573 | new_aset, affinity_set_t, aset_affinities); |
574 | |
575 | DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n" , |
576 | aspc, new_aset, new_aset->aset_num, new_aset->aset_pset); |
577 | } |
578 | |