1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/types.h>
30#include <sys/proc.h>
31#include <sys/proc_internal.h>
32#include <sys/systm.h>
33#include <sys/user.h>
34#include <sys/dtrace_ptss.h>
35
36#include <mach/vm_map.h>
37#include <mach/vm_param.h>
38#include <mach/mach_vm.h>
39
40#include <kern/task.h>
41
42#include <vm/vm_map.h>
43
44/*
45 * This function requires the sprlock to be held
46 *
47 * In general, it will not block. If it needs to allocate a new
48 * page of memory, the underlying kernel kalloc may block.
49 */
50struct dtrace_ptss_page_entry*
51dtrace_ptss_claim_entry_locked(struct proc* p)
52{
53 LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
54
55 struct dtrace_ptss_page_entry* entry = NULL;
56
57 while (TRUE) {
58 struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list;
59
60 if (temp == NULL) {
61 // Nothing on the free list. Allocate a new page, its okay if multiple threads race here.
62 struct dtrace_ptss_page* page = dtrace_ptss_allocate_page(p);
63
64 // Make sure we actually got a page
65 if (page == NULL) {
66 return NULL;
67 }
68
69 // Add the page to the page list
70 page->next = p->p_dtrace_ptss_pages;
71 p->p_dtrace_ptss_pages = page;
72
73 // CAS the entries onto the free list.
74 do {
75 page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next = p->p_dtrace_ptss_free_list;
76 } while (!OSCompareAndSwapPtr((void *)page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next,
77 (void *)&page->entries[0],
78 (void * volatile *)&p->p_dtrace_ptss_free_list));
79
80 // Now that we've added to the free list, try again.
81 continue;
82 }
83
84 // Claim temp
85 if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) {
86 continue;
87 }
88
89 // At this point, we own temp.
90 entry = temp;
91
92 break;
93 }
94
95 return entry;
96}
97
98/*
99 * This function does not require any locks to be held on entry.
100 */
101struct dtrace_ptss_page_entry*
102dtrace_ptss_claim_entry(struct proc* p)
103{
104 // Verify no locks held on entry
105 LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED);
106 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
107
108 struct dtrace_ptss_page_entry* entry = NULL;
109
110 while (TRUE) {
111 struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list;
112
113 if (temp == NULL) {
114 lck_mtx_lock(lck: &p->p_dtrace_sprlock);
115 temp = dtrace_ptss_claim_entry_locked(p);
116 lck_mtx_unlock(lck: &p->p_dtrace_sprlock);
117 return temp;
118 }
119
120 // Claim temp
121 if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) {
122 continue;
123 }
124
125 // At this point, we own temp.
126 entry = temp;
127
128 break;
129 }
130
131 return entry;
132}
133
134/*
135 * This function does not require any locks to be held on entry.
136 *
137 * (PR-11138709) A NULL p->p_dtrace_ptss_pages means the entry can
138 * no longer be referenced safely. When found in this state, the chore
139 * of releasing an entry to the free list is ignored.
140 */
141void
142dtrace_ptss_release_entry(struct proc* p, struct dtrace_ptss_page_entry* e)
143{
144 if (p && p->p_dtrace_ptss_pages && e) {
145 do {
146 e->next = p->p_dtrace_ptss_free_list;
147 } while (!OSCompareAndSwapPtr((void *)e->next, (void *)e, (void * volatile *)&p->p_dtrace_ptss_free_list));
148 }
149}
150
151/*
152 * This function allocates a new page in the target process's address space.
153 *
154 * It returns a dtrace_ptss_page that has its entries chained, with the last
155 * entries next field set to NULL. It does not add the page or the entries to
156 * the process's page/entry lists.
157 *
158 * This function does not require that any locks be held when it is invoked.
159 */
160struct dtrace_ptss_page*
161dtrace_ptss_allocate_page(struct proc* p)
162{
163 // Allocate the kernel side data
164 struct dtrace_ptss_page* ptss_page = kalloc_type(struct dtrace_ptss_page, Z_ZERO | Z_WAITOK);
165 if (ptss_page == NULL) {
166 return NULL;
167 }
168
169 // Now allocate a page in user space and set its protections to allow execute.
170 task_t task = proc_task(p);
171 vm_map_t map = get_task_map_reference(task);
172 if (map == NULL) {
173 goto err;
174 }
175
176 mach_vm_size_t size = PAGE_MAX_SIZE;
177 mach_vm_offset_t addr = 0;
178 mach_vm_offset_t write_addr = 0;
179 /*
180 * The embedded OS has extra permissions for writable and executable pages.
181 * To ensure correct permissions, we must set the page protections separately.
182 */
183 vm_prot_t cur_protection = VM_PROT_READ | VM_PROT_EXECUTE;
184 vm_prot_t max_protection = VM_PROT_READ | VM_PROT_EXECUTE;
185 kern_return_t kr;
186
187 kr = mach_vm_map_kernel(target_map: map, address: &addr, initial_size: size, mask: 0,
188 VM_MAP_KERNEL_FLAGS_ANYWHERE(), IPC_PORT_NULL, offset: 0, FALSE,
189 cur_protection, max_protection, VM_INHERIT_DEFAULT);
190 if (kr != KERN_SUCCESS) {
191 goto err;
192 }
193
194 /*
195 * To ensure the page is properly marked as user debug, temporarily change
196 * the permissions to rw and then back again to rx. The VM will keep track
197 * of this remapping and on fault will pass PMAP_OPTIONS_XNU_USER_DEBUG
198 * properly to the PMAP layer.
199 */
200 kr = mach_vm_protect(target_task: map, address: (mach_vm_offset_t)addr, size: (mach_vm_size_t)size, set_maximum: 0,
201 VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY);
202 if (kr != KERN_SUCCESS) {
203 goto err;
204 }
205
206 kr = mach_vm_protect(target_task: map, address: (mach_vm_offset_t)addr, size: (mach_vm_size_t)size, set_maximum: 0,
207 VM_PROT_READ | VM_PROT_EXECUTE);
208 if (kr != KERN_SUCCESS) {
209 goto err;
210 }
211
212 /*
213 * If on embedded, remap the scratch space as writable at another
214 * virtual address
215 */
216 kr = mach_vm_remap_kernel(target_map: map, address: &write_addr, size, mask: 0, VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE, src_map: map, memory_address: addr, FALSE, cur_protection: &cur_protection, max_protection: &max_protection, VM_INHERIT_DEFAULT);
217 if (kr != KERN_SUCCESS || !(max_protection & VM_PROT_WRITE)) {
218 goto err;
219 }
220
221 kr = mach_vm_protect(target_task: map, address: (mach_vm_offset_t)write_addr, size: (mach_vm_size_t)size, set_maximum: 0, VM_PROT_READ | VM_PROT_WRITE);
222 if (kr != KERN_SUCCESS) {
223 goto err;
224 }
225
226 // Chain the page entries.
227 int i;
228 for (i = 0; i < DTRACE_PTSS_ENTRIES_PER_PAGE; i++) {
229 ptss_page->entries[i].addr = addr + (i * DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD);
230 ptss_page->entries[i].write_addr = write_addr + (i * DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD);
231 ptss_page->entries[i].next = &ptss_page->entries[i + 1];
232 }
233
234 // The last entry should point to NULL
235 ptss_page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next = NULL;
236
237 vm_map_deallocate(map);
238
239 return ptss_page;
240
241err:
242 kfree_type(struct dtrace_ptss_page, ptss_page);
243
244 if (map) {
245 vm_map_deallocate(map);
246 }
247
248 return NULL;
249}
250
251/*
252 * This function frees an existing page in the target process's address space.
253 *
254 * It does not alter any of the process's page/entry lists.
255 *
256 * TODO: Inline in dtrace_ptrace_exec_exit?
257 */
258void
259dtrace_ptss_free_page(struct proc* p, struct dtrace_ptss_page* ptss_page)
260{
261 // Grab the task and get a reference to its vm_map
262 task_t task = proc_task(p);
263 vm_map_t map = get_task_map_reference(task);
264
265 mach_vm_address_t addr = ptss_page->entries[0].addr;
266 mach_vm_size_t size = PAGE_SIZE; // We need some way to assert that this matches vm_map_round_page() !!!
267
268 // Silent failures, no point in checking return code.
269 mach_vm_deallocate(target: map, address: addr, size);
270
271 mach_vm_address_t write_addr = ptss_page->entries[0].write_addr;
272 mach_vm_deallocate(target: map, address: write_addr, size);
273
274 vm_map_deallocate(map);
275}
276
277/*
278 * This function assumes that the target process has been
279 * suspended, and the proc_lock & sprlock is held
280 */
281void
282dtrace_ptss_enable(struct proc* p)
283{
284 LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
285 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_OWNED);
286
287 struct uthread* uth;
288 /*
289 * XXX There has been a concern raised about holding the proc_lock
290 * while calling dtrace_ptss_claim_entry(), due to the fact
291 * that dtrace_ptss_claim_entry() can potentially malloc.
292 */
293 TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) {
294 uth->t_dtrace_scratch = dtrace_ptss_claim_entry_locked(p);
295 }
296}
297
298/*
299 * This function is not thread safe.
300 *
301 * It assumes the sprlock is held, and the proc_lock is not.
302 */
303void
304dtrace_ptss_exec_exit(struct proc* p)
305{
306 /*
307 * Should hold sprlock to touch the pages list. Must not
308 * hold the proc lock to avoid deadlock.
309 */
310 LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
311 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
312
313 p->p_dtrace_ptss_free_list = NULL;
314
315 struct dtrace_ptss_page* temp = p->p_dtrace_ptss_pages;
316 p->p_dtrace_ptss_pages = NULL;
317
318 while (temp != NULL) {
319 struct dtrace_ptss_page* next = temp->next;
320
321 // Do we need to specifically mach_vm_deallocate the user pages?
322 // This can be called when the process is exiting, I believe the proc's
323 // vm_map_t may already be toast.
324
325 // Must be certain to free the kernel memory!
326 kfree_type(struct dtrace_ptss_page, temp);
327 temp = next;
328 }
329}
330
331/*
332 * This function is not thread safe.
333 *
334 * The child proc ptss fields are initialized to NULL at fork time.
335 * Pages allocated in the parent are copied as part of the vm_map copy, though.
336 * We need to deallocate those pages.
337 *
338 * Parent and child sprlock should be held, and proc_lock must NOT be held.
339 */
340void
341dtrace_ptss_fork(struct proc* parent, struct proc* child)
342{
343 // The child should not have any pages/entries allocated at this point.
344 // ASSERT(child->p_dtrace_ptss_pages == NULL);
345 // ASSERT(child->p_dtrace_ptss_free_list == NULL);
346
347 /*
348 * The parent's sprlock should be held, to protect its pages list
349 * from changing while the child references it. The child's sprlock
350 * must also be held, because we are modifying its pages list.
351 * Finally, to prevent a deadlock with the fasttrap cleanup code,
352 * neither the parent or child proc_lock should be held.
353 */
354 LCK_MTX_ASSERT(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
355 LCK_MTX_ASSERT(&parent->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
356 LCK_MTX_ASSERT(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED);
357 LCK_MTX_ASSERT(&child->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
358
359 // Get page list from *PARENT*
360 struct dtrace_ptss_page* temp = parent->p_dtrace_ptss_pages;
361
362 while (temp != NULL) {
363 // Freeing the page in the *CHILD*
364 dtrace_ptss_free_page(p: child, ptss_page: temp);
365
366 // Do not free the kernel memory, it belong to the parent.
367 temp = temp->next;
368 }
369}
370