1/*
2 * Copyright (c) 2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <kern/affinity.h>
30#include <kern/task.h>
31#include <kern/kalloc.h>
32#include <machine/cpu_affinity.h>
33
34/*
35 * Affinity involves 2 objects:
36 * - affinity namespace:
37 * shared by a task family, this controls affinity tag lookup and
38 * allocation; it anchors all affinity sets in one namespace
39 * - affinity set:
40 * anchors all threads with membership of this affinity set
41 * and which share an affinity tag in the owning namespace.
42 *
43 * Locking:
44 * - The task lock protects the creation of an affinity namespace.
45 * - The affinity namespace mutex protects the inheritance of a namespace
46 * and its thread membership. This includes its destruction when the task
47 * reference count goes to zero.
48 * - The thread mutex protects a thread's affinity set membership, but in
49 * addition, the thread_lock is taken to write thread->affinity_set since this
50 * field (representng the active affinity set) is read by the scheduler.
51 *
52 * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock.
53 */
54
55#if AFFINITY_DEBUG
56#define DBG(x...) kprintf("DBG: " x)
57#else
58#define DBG(x...)
59#endif
60
61struct affinity_space {
62 lck_mtx_t aspc_lock;
63 uint32_t aspc_task_count;
64 queue_head_t aspc_affinities;
65};
66typedef struct affinity_space *affinity_space_t;
67
68static affinity_space_t affinity_space_alloc(void);
69static void affinity_space_free(affinity_space_t aspc);
70static affinity_set_t affinity_set_alloc(void);
71static void affinity_set_free(affinity_set_t aset);
72static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag);
73static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset);
74static void affinity_set_add(affinity_set_t aset, thread_t thread);
75static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread);
76
77/*
78 * The following globals may be modified by the sysctls
79 * kern.affinity_sets_enabled - disables hinting if cleared
80 * kern.affinity_sets_mapping - controls cache distribution policy
81 * See bsd/kern_sysctl.c
82 *
83 * Affinity sets are not used on embedded, which typically only
84 * has a single pset, and last-processor affinity is
85 * more important than pset affinity.
86 */
87#if CONFIG_EMBEDDED
88boolean_t affinity_sets_enabled = FALSE;
89int affinity_sets_mapping = 0;
90#else /* !CONFIG_EMBEDDED */
91boolean_t affinity_sets_enabled = TRUE;
92int affinity_sets_mapping = 1;
93#endif /* !CONFIG_EMBEDDED */
94
95boolean_t
96thread_affinity_is_supported(void)
97{
98 return (ml_get_max_affinity_sets() != 0);
99}
100
101
102/*
103 * thread_affinity_get()
104 * Return the affinity tag for a thread.
105 * Called with the thread mutex held.
106 */
107uint32_t
108thread_affinity_get(thread_t thread)
109{
110 uint32_t tag;
111
112 if (thread->affinity_set != NULL)
113 tag = thread->affinity_set->aset_tag;
114 else
115 tag = THREAD_AFFINITY_TAG_NULL;
116
117 return tag;
118}
119
120
121/*
122 * thread_affinity_set()
123 * Place a thread in an affinity set identified by a tag.
124 * Called with thread referenced but not locked.
125 */
126kern_return_t
127thread_affinity_set(thread_t thread, uint32_t tag)
128{
129 affinity_set_t aset;
130 affinity_set_t empty_aset = NULL;
131 affinity_space_t aspc;
132 affinity_space_t new_aspc = NULL;
133
134 DBG("thread_affinity_set(%p,%u)\n", thread, tag);
135
136 task_lock(thread->task);
137 aspc = thread->task->affinity_space;
138 if (aspc == NULL) {
139 task_unlock(thread->task);
140 new_aspc = affinity_space_alloc();
141 if (new_aspc == NULL)
142 return KERN_RESOURCE_SHORTAGE;
143 task_lock(thread->task);
144 if (thread->task->affinity_space == NULL) {
145 thread->task->affinity_space = new_aspc;
146 new_aspc = NULL;
147 }
148 aspc = thread->task->affinity_space;
149 }
150 task_unlock(thread->task);
151 if (new_aspc)
152 affinity_space_free(new_aspc);
153
154 thread_mtx_lock(thread);
155 if (!thread->active) {
156 /* Beaten to lock and the thread is dead */
157 thread_mtx_unlock(thread);
158 return KERN_TERMINATED;
159 }
160
161 lck_mtx_lock(&aspc->aspc_lock);
162 aset = thread->affinity_set;
163 if (aset != NULL) {
164 /*
165 * Remove thread from current affinity set
166 */
167 DBG("thread_affinity_set(%p,%u) removing from aset %p\n",
168 thread, tag, aset);
169 empty_aset = affinity_set_remove(aset, thread);
170 }
171
172 if (tag != THREAD_AFFINITY_TAG_NULL) {
173 aset = affinity_set_find(aspc, tag);
174 if (aset != NULL) {
175 /*
176 * Add thread to existing affinity set
177 */
178 DBG("thread_affinity_set(%p,%u) found aset %p\n",
179 thread, tag, aset);
180 } else {
181 /*
182 * Use the new affinity set, add this thread
183 * and place it in a suitable processor set.
184 */
185 if (empty_aset != NULL) {
186 aset = empty_aset;
187 empty_aset = NULL;
188 } else {
189 aset = affinity_set_alloc();
190 if (aset == NULL) {
191 lck_mtx_unlock(&aspc->aspc_lock);
192 thread_mtx_unlock(thread);
193 return KERN_RESOURCE_SHORTAGE;
194 }
195 }
196 DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n",
197 thread, tag, aset);
198 aset->aset_tag = tag;
199 affinity_set_place(aspc, aset);
200 }
201 affinity_set_add(aset, thread);
202 }
203
204 lck_mtx_unlock(&aspc->aspc_lock);
205 thread_mtx_unlock(thread);
206
207 /*
208 * If we wound up not using an empty aset we created,
209 * free it here.
210 */
211 if (empty_aset != NULL)
212 affinity_set_free(empty_aset);
213
214 if (thread == current_thread())
215 thread_block(THREAD_CONTINUE_NULL);
216
217 return KERN_SUCCESS;
218}
219
220/*
221 * task_affinity_create()
222 * Called from task create.
223 */
224void
225task_affinity_create(task_t parent_task, task_t child_task)
226{
227 affinity_space_t aspc = parent_task->affinity_space;
228
229 DBG("task_affinity_create(%p,%p)\n", parent_task, child_task);
230
231 assert(aspc);
232
233 /*
234 * Bump the task reference count on the shared namespace and
235 * give it to the child.
236 */
237 lck_mtx_lock(&aspc->aspc_lock);
238 aspc->aspc_task_count++;
239 child_task->affinity_space = aspc;
240 lck_mtx_unlock(&aspc->aspc_lock);
241}
242
243/*
244 * task_affinity_deallocate()
245 * Called from task_deallocate() when there's a namespace to dereference.
246 */
247void
248task_affinity_deallocate(task_t task)
249{
250 affinity_space_t aspc = task->affinity_space;
251
252 DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n",
253 task, aspc, aspc->aspc_task_count);
254
255 lck_mtx_lock(&aspc->aspc_lock);
256 if (--(aspc->aspc_task_count) == 0) {
257 assert(queue_empty(&aspc->aspc_affinities));
258 lck_mtx_unlock(&aspc->aspc_lock);
259 affinity_space_free(aspc);
260 } else {
261 lck_mtx_unlock(&aspc->aspc_lock);
262 }
263}
264
265/*
266 * task_affinity_info()
267 * Return affinity tag info (number, min, max) for the task.
268 *
269 * Conditions: task is locked.
270 */
271kern_return_t
272task_affinity_info(
273 task_t task,
274 task_info_t task_info_out,
275 mach_msg_type_number_t *task_info_count)
276{
277 affinity_set_t aset;
278 affinity_space_t aspc;
279 task_affinity_tag_info_t info;
280
281 *task_info_count = TASK_AFFINITY_TAG_INFO_COUNT;
282 info = (task_affinity_tag_info_t) task_info_out;
283 info->set_count = 0;
284 info->task_count = 0;
285 info->min = THREAD_AFFINITY_TAG_NULL;
286 info->max = THREAD_AFFINITY_TAG_NULL;
287
288 aspc = task->affinity_space;
289 if (aspc) {
290 lck_mtx_lock(&aspc->aspc_lock);
291 queue_iterate(&aspc->aspc_affinities,
292 aset, affinity_set_t, aset_affinities) {
293 info->set_count++;
294 if (info->min == THREAD_AFFINITY_TAG_NULL ||
295 aset->aset_tag < (uint32_t) info->min)
296 info->min = aset->aset_tag;
297 if (info->max == THREAD_AFFINITY_TAG_NULL ||
298 aset->aset_tag > (uint32_t) info->max)
299 info->max = aset->aset_tag;
300 }
301 info->task_count = aspc->aspc_task_count;
302 lck_mtx_unlock(&aspc->aspc_lock);
303 }
304 return KERN_SUCCESS;
305}
306
307/*
308 * Called from thread_dup() during fork() with child's mutex held.
309 * Set the child into the parent's affinity set.
310 * Note the affinity space is shared.
311 */
312void
313thread_affinity_dup(thread_t parent, thread_t child)
314{
315 affinity_set_t aset;
316 affinity_space_t aspc;
317
318 thread_mtx_lock(parent);
319 aset = parent->affinity_set;
320 DBG("thread_affinity_dup(%p,%p) aset %p\n", parent, child, aset);
321 if (aset == NULL) {
322 thread_mtx_unlock(parent);
323 return;
324 }
325
326 aspc = aset->aset_space;
327 assert(aspc == parent->task->affinity_space);
328 assert(aspc == child->task->affinity_space);
329
330 lck_mtx_lock(&aspc->aspc_lock);
331 affinity_set_add(aset, child);
332 lck_mtx_unlock(&aspc->aspc_lock);
333
334 thread_mtx_unlock(parent);
335}
336
337/*
338 * thread_affinity_terminate()
339 * Remove thread from any affinity set.
340 * Called with the thread mutex locked.
341 */
342void
343thread_affinity_terminate(thread_t thread)
344{
345 affinity_set_t aset = thread->affinity_set;
346 affinity_space_t aspc;
347
348 DBG("thread_affinity_terminate(%p)\n", thread);
349
350 aspc = aset->aset_space;
351 lck_mtx_lock(&aspc->aspc_lock);
352 if (affinity_set_remove(aset, thread)) {
353 affinity_set_free(aset);
354 }
355 lck_mtx_unlock(&aspc->aspc_lock);
356}
357
358/*
359 * thread_affinity_exec()
360 * Called from execve() to cancel any current affinity - a new image implies
361 * the calling thread terminates any expressed or inherited affinity.
362 */
363void
364thread_affinity_exec(thread_t thread)
365{
366 if (thread->affinity_set != AFFINITY_SET_NULL)
367 thread_affinity_terminate(thread);
368}
369
370/*
371 * Create an empty affinity namespace data structure.
372 */
373static affinity_space_t
374affinity_space_alloc(void)
375{
376 affinity_space_t aspc;
377
378 aspc = (affinity_space_t) kalloc(sizeof(struct affinity_space));
379 if (aspc == NULL)
380 return NULL;
381
382 lck_mtx_init(&aspc->aspc_lock, &task_lck_grp, &task_lck_attr);
383 queue_init(&aspc->aspc_affinities);
384 aspc->aspc_task_count = 1;
385
386 DBG("affinity_space_create() returns %p\n", aspc);
387 return aspc;
388}
389
390/*
391 * Destroy the given empty affinity namespace data structure.
392 */
393static void
394affinity_space_free(affinity_space_t aspc)
395{
396 assert(queue_empty(&aspc->aspc_affinities));
397
398 lck_mtx_destroy(&aspc->aspc_lock, &task_lck_grp);
399 DBG("affinity_space_free(%p)\n", aspc);
400 kfree(aspc, sizeof(struct affinity_space));
401}
402
403
404/*
405 * Create an empty affinity set data structure
406 * entering it into a list anchored by the owning task.
407 */
408static affinity_set_t
409affinity_set_alloc(void)
410{
411 affinity_set_t aset;
412
413 aset = (affinity_set_t) kalloc(sizeof(struct affinity_set));
414 if (aset == NULL)
415 return NULL;
416
417 aset->aset_thread_count = 0;
418 queue_init(&aset->aset_affinities);
419 queue_init(&aset->aset_threads);
420 aset->aset_num = 0;
421 aset->aset_pset = PROCESSOR_SET_NULL;
422 aset->aset_space = NULL;
423
424 DBG("affinity_set_create() returns %p\n", aset);
425 return aset;
426}
427
428/*
429 * Destroy the given empty affinity set data structure
430 * after removing it from the parent task.
431 */
432static void
433affinity_set_free(affinity_set_t aset)
434{
435 assert(queue_empty(&aset->aset_threads));
436
437 DBG("affinity_set_free(%p)\n", aset);
438 kfree(aset, sizeof(struct affinity_set));
439}
440
441/*
442 * Add a thread to an affinity set.
443 * The caller must have the thread mutex and space locked.
444 */
445static void
446affinity_set_add(affinity_set_t aset, thread_t thread)
447{
448 spl_t s;
449
450 DBG("affinity_set_add(%p,%p)\n", aset, thread);
451 queue_enter(&aset->aset_threads,
452 thread, thread_t, affinity_threads);
453 aset->aset_thread_count++;
454 s = splsched();
455 thread_lock(thread);
456 thread->affinity_set = affinity_sets_enabled ? aset : NULL;
457 thread_unlock(thread);
458 splx(s);
459}
460
461/*
462 * Remove a thread from an affinity set returning the set if now empty.
463 * The caller must have the thread mutex and space locked.
464 */
465static affinity_set_t
466affinity_set_remove(affinity_set_t aset, thread_t thread)
467{
468 spl_t s;
469
470 s = splsched();
471 thread_lock(thread);
472 thread->affinity_set = NULL;
473 thread_unlock(thread);
474 splx(s);
475
476 aset->aset_thread_count--;
477 queue_remove(&aset->aset_threads,
478 thread, thread_t, affinity_threads);
479 if (queue_empty(&aset->aset_threads)) {
480 queue_remove(&aset->aset_space->aspc_affinities,
481 aset, affinity_set_t, aset_affinities);
482 assert(aset->aset_thread_count == 0);
483 aset->aset_tag = THREAD_AFFINITY_TAG_NULL;
484 aset->aset_num = 0;
485 aset->aset_pset = PROCESSOR_SET_NULL;
486 aset->aset_space = NULL;
487 DBG("affinity_set_remove(%p,%p) set now empty\n", aset, thread);
488 return aset;
489 } else {
490 DBG("affinity_set_remove(%p,%p)\n", aset, thread);
491 return NULL;
492 }
493}
494
495/*
496 * Find an affinity set in the parent task with the given affinity tag.
497 * The caller must have the space locked.
498 */
499static affinity_set_t
500affinity_set_find(affinity_space_t space, uint32_t tag)
501{
502 affinity_set_t aset;
503
504 queue_iterate(&space->aspc_affinities,
505 aset, affinity_set_t, aset_affinities) {
506 if (aset->aset_tag == tag) {
507 DBG("affinity_set_find(%p,%u) finds %p\n",
508 space, tag, aset);
509 return aset;
510 }
511 }
512 DBG("affinity_set_find(%p,%u) not found\n", space, tag);
513 return NULL;
514}
515
516/*
517 * affinity_set_place() assigns an affinity set to a suitable processor_set.
518 * The selection criteria is:
519 * - the set currently occupied by the least number of affinities
520 * belonging to the owning the task.
521 * The caller must have the space locked.
522 */
523static void
524affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset)
525{
526 unsigned int num_cpu_asets = ml_get_max_affinity_sets();
527 unsigned int set_occupancy[num_cpu_asets];
528 unsigned int i;
529 unsigned int i_least_occupied;
530 affinity_set_t aset;
531
532 for (i = 0; i < num_cpu_asets; i++)
533 set_occupancy[i] = 0;
534
535 /*
536 * Scan the affinity sets calculating the number of sets
537 * occupy the available physical affinities.
538 */
539 queue_iterate(&aspc->aspc_affinities,
540 aset, affinity_set_t, aset_affinities) {
541 if(aset->aset_num < num_cpu_asets)
542 set_occupancy[aset->aset_num]++;
543 else
544 panic("aset_num = %d in %s\n", aset->aset_num, __FUNCTION__);
545 }
546
547 /*
548 * Find the least occupied set (or the first empty set).
549 * To distribute placements somewhat, start searching from
550 * a cpu affinity chosen randomly per namespace:
551 * [(unsigned int)aspc % 127] % num_cpu_asets
552 * unless this mapping policy is overridden.
553 */
554 if (affinity_sets_mapping == 0)
555 i_least_occupied = 0;
556 else
557 i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets);
558 for (i = 0; i < num_cpu_asets; i++) {
559 unsigned int j = (i_least_occupied + i) % num_cpu_asets;
560 if (set_occupancy[j] == 0) {
561 i_least_occupied = j;
562 break;
563 }
564 if (set_occupancy[j] < set_occupancy[i_least_occupied])
565 i_least_occupied = j;
566 }
567 new_aset->aset_num = i_least_occupied;
568 new_aset->aset_pset = ml_affinity_to_pset(i_least_occupied);
569
570 /* Add the new affinity set to the group */
571 new_aset->aset_space = aspc;
572 queue_enter(&aspc->aspc_affinities,
573 new_aset, affinity_set_t, aset_affinities);
574
575 DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n",
576 aspc, new_aset, new_aset->aset_num, new_aset->aset_pset);
577}
578