| 1 | /* |
| 2 | * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #include <sys/types.h> |
| 30 | #include <sys/proc.h> |
| 31 | #include <sys/proc_internal.h> |
| 32 | #include <sys/systm.h> |
| 33 | #include <sys/user.h> |
| 34 | #include <sys/dtrace_ptss.h> |
| 35 | |
| 36 | #include <mach/vm_map.h> |
| 37 | #include <mach/vm_param.h> |
| 38 | #include <mach/mach_vm.h> |
| 39 | |
| 40 | #include <kern/task.h> |
| 41 | |
| 42 | #include <vm/vm_map.h> |
| 43 | |
| 44 | /* |
| 45 | * This function requires the sprlock to be held |
| 46 | * |
| 47 | * In general, it will not block. If it needs to allocate a new |
| 48 | * page of memory, the underlying kernel kalloc may block. |
| 49 | */ |
| 50 | struct dtrace_ptss_page_entry* |
| 51 | dtrace_ptss_claim_entry_locked(struct proc* p) |
| 52 | { |
| 53 | LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED); |
| 54 | |
| 55 | struct dtrace_ptss_page_entry* entry = NULL; |
| 56 | |
| 57 | while (TRUE) { |
| 58 | struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list; |
| 59 | |
| 60 | if (temp == NULL) { |
| 61 | // Nothing on the free list. Allocate a new page, its okay if multiple threads race here. |
| 62 | struct dtrace_ptss_page* page = dtrace_ptss_allocate_page(p); |
| 63 | |
| 64 | // Make sure we actually got a page |
| 65 | if (page == NULL) { |
| 66 | return NULL; |
| 67 | } |
| 68 | |
| 69 | // Add the page to the page list |
| 70 | page->next = p->p_dtrace_ptss_pages; |
| 71 | p->p_dtrace_ptss_pages = page; |
| 72 | |
| 73 | // CAS the entries onto the free list. |
| 74 | do { |
| 75 | page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next = p->p_dtrace_ptss_free_list; |
| 76 | } while (!OSCompareAndSwapPtr((void *)page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next, |
| 77 | (void *)&page->entries[0], |
| 78 | (void * volatile *)&p->p_dtrace_ptss_free_list)); |
| 79 | |
| 80 | // Now that we've added to the free list, try again. |
| 81 | continue; |
| 82 | } |
| 83 | |
| 84 | // Claim temp |
| 85 | if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) { |
| 86 | continue; |
| 87 | } |
| 88 | |
| 89 | // At this point, we own temp. |
| 90 | entry = temp; |
| 91 | |
| 92 | break; |
| 93 | } |
| 94 | |
| 95 | return entry; |
| 96 | } |
| 97 | |
| 98 | /* |
| 99 | * This function does not require any locks to be held on entry. |
| 100 | */ |
| 101 | struct dtrace_ptss_page_entry* |
| 102 | dtrace_ptss_claim_entry(struct proc* p) |
| 103 | { |
| 104 | // Verify no locks held on entry |
| 105 | LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); |
| 106 | LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED); |
| 107 | |
| 108 | struct dtrace_ptss_page_entry* entry = NULL; |
| 109 | |
| 110 | while (TRUE) { |
| 111 | struct dtrace_ptss_page_entry* temp = p->p_dtrace_ptss_free_list; |
| 112 | |
| 113 | if (temp == NULL) { |
| 114 | lck_mtx_lock(lck: &p->p_dtrace_sprlock); |
| 115 | temp = dtrace_ptss_claim_entry_locked(p); |
| 116 | lck_mtx_unlock(lck: &p->p_dtrace_sprlock); |
| 117 | return temp; |
| 118 | } |
| 119 | |
| 120 | // Claim temp |
| 121 | if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) { |
| 122 | continue; |
| 123 | } |
| 124 | |
| 125 | // At this point, we own temp. |
| 126 | entry = temp; |
| 127 | |
| 128 | break; |
| 129 | } |
| 130 | |
| 131 | return entry; |
| 132 | } |
| 133 | |
| 134 | /* |
| 135 | * This function does not require any locks to be held on entry. |
| 136 | * |
| 137 | * (PR-11138709) A NULL p->p_dtrace_ptss_pages means the entry can |
| 138 | * no longer be referenced safely. When found in this state, the chore |
| 139 | * of releasing an entry to the free list is ignored. |
| 140 | */ |
| 141 | void |
| 142 | dtrace_ptss_release_entry(struct proc* p, struct dtrace_ptss_page_entry* e) |
| 143 | { |
| 144 | if (p && p->p_dtrace_ptss_pages && e) { |
| 145 | do { |
| 146 | e->next = p->p_dtrace_ptss_free_list; |
| 147 | } while (!OSCompareAndSwapPtr((void *)e->next, (void *)e, (void * volatile *)&p->p_dtrace_ptss_free_list)); |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | /* |
| 152 | * This function allocates a new page in the target process's address space. |
| 153 | * |
| 154 | * It returns a dtrace_ptss_page that has its entries chained, with the last |
| 155 | * entries next field set to NULL. It does not add the page or the entries to |
| 156 | * the process's page/entry lists. |
| 157 | * |
| 158 | * This function does not require that any locks be held when it is invoked. |
| 159 | */ |
| 160 | struct dtrace_ptss_page* |
| 161 | dtrace_ptss_allocate_page(struct proc* p) |
| 162 | { |
| 163 | // Allocate the kernel side data |
| 164 | struct dtrace_ptss_page* ptss_page = kalloc_type(struct dtrace_ptss_page, Z_ZERO | Z_WAITOK); |
| 165 | if (ptss_page == NULL) { |
| 166 | return NULL; |
| 167 | } |
| 168 | |
| 169 | // Now allocate a page in user space and set its protections to allow execute. |
| 170 | task_t task = proc_task(p); |
| 171 | vm_map_t map = get_task_map_reference(task); |
| 172 | if (map == NULL) { |
| 173 | goto err; |
| 174 | } |
| 175 | |
| 176 | mach_vm_size_t size = PAGE_MAX_SIZE; |
| 177 | mach_vm_offset_t addr = 0; |
| 178 | mach_vm_offset_t write_addr = 0; |
| 179 | /* |
| 180 | * The embedded OS has extra permissions for writable and executable pages. |
| 181 | * To ensure correct permissions, we must set the page protections separately. |
| 182 | */ |
| 183 | vm_prot_t cur_protection = VM_PROT_READ | VM_PROT_EXECUTE; |
| 184 | vm_prot_t max_protection = VM_PROT_READ | VM_PROT_EXECUTE; |
| 185 | kern_return_t kr; |
| 186 | |
| 187 | kr = mach_vm_map_kernel(target_map: map, address: &addr, initial_size: size, mask: 0, |
| 188 | VM_MAP_KERNEL_FLAGS_ANYWHERE(), IPC_PORT_NULL, offset: 0, FALSE, |
| 189 | cur_protection, max_protection, VM_INHERIT_DEFAULT); |
| 190 | if (kr != KERN_SUCCESS) { |
| 191 | goto err; |
| 192 | } |
| 193 | |
| 194 | /* |
| 195 | * To ensure the page is properly marked as user debug, temporarily change |
| 196 | * the permissions to rw and then back again to rx. The VM will keep track |
| 197 | * of this remapping and on fault will pass PMAP_OPTIONS_XNU_USER_DEBUG |
| 198 | * properly to the PMAP layer. |
| 199 | */ |
| 200 | kr = mach_vm_protect(target_task: map, address: (mach_vm_offset_t)addr, size: (mach_vm_size_t)size, set_maximum: 0, |
| 201 | VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY); |
| 202 | if (kr != KERN_SUCCESS) { |
| 203 | goto err; |
| 204 | } |
| 205 | |
| 206 | kr = mach_vm_protect(target_task: map, address: (mach_vm_offset_t)addr, size: (mach_vm_size_t)size, set_maximum: 0, |
| 207 | VM_PROT_READ | VM_PROT_EXECUTE); |
| 208 | if (kr != KERN_SUCCESS) { |
| 209 | goto err; |
| 210 | } |
| 211 | |
| 212 | /* |
| 213 | * If on embedded, remap the scratch space as writable at another |
| 214 | * virtual address |
| 215 | */ |
| 216 | kr = mach_vm_remap_kernel(target_map: map, address: &write_addr, size, mask: 0, VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE, src_map: map, memory_address: addr, FALSE, cur_protection: &cur_protection, max_protection: &max_protection, VM_INHERIT_DEFAULT); |
| 217 | if (kr != KERN_SUCCESS || !(max_protection & VM_PROT_WRITE)) { |
| 218 | goto err; |
| 219 | } |
| 220 | |
| 221 | kr = mach_vm_protect(target_task: map, address: (mach_vm_offset_t)write_addr, size: (mach_vm_size_t)size, set_maximum: 0, VM_PROT_READ | VM_PROT_WRITE); |
| 222 | if (kr != KERN_SUCCESS) { |
| 223 | goto err; |
| 224 | } |
| 225 | |
| 226 | // Chain the page entries. |
| 227 | int i; |
| 228 | for (i = 0; i < DTRACE_PTSS_ENTRIES_PER_PAGE; i++) { |
| 229 | ptss_page->entries[i].addr = addr + (i * DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD); |
| 230 | ptss_page->entries[i].write_addr = write_addr + (i * DTRACE_PTSS_SCRATCH_SPACE_PER_THREAD); |
| 231 | ptss_page->entries[i].next = &ptss_page->entries[i + 1]; |
| 232 | } |
| 233 | |
| 234 | // The last entry should point to NULL |
| 235 | ptss_page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE - 1].next = NULL; |
| 236 | |
| 237 | vm_map_deallocate(map); |
| 238 | |
| 239 | return ptss_page; |
| 240 | |
| 241 | err: |
| 242 | kfree_type(struct dtrace_ptss_page, ptss_page); |
| 243 | |
| 244 | if (map) { |
| 245 | vm_map_deallocate(map); |
| 246 | } |
| 247 | |
| 248 | return NULL; |
| 249 | } |
| 250 | |
| 251 | /* |
| 252 | * This function frees an existing page in the target process's address space. |
| 253 | * |
| 254 | * It does not alter any of the process's page/entry lists. |
| 255 | * |
| 256 | * TODO: Inline in dtrace_ptrace_exec_exit? |
| 257 | */ |
| 258 | void |
| 259 | dtrace_ptss_free_page(struct proc* p, struct dtrace_ptss_page* ptss_page) |
| 260 | { |
| 261 | // Grab the task and get a reference to its vm_map |
| 262 | task_t task = proc_task(p); |
| 263 | vm_map_t map = get_task_map_reference(task); |
| 264 | |
| 265 | mach_vm_address_t addr = ptss_page->entries[0].addr; |
| 266 | mach_vm_size_t size = PAGE_SIZE; // We need some way to assert that this matches vm_map_round_page() !!! |
| 267 | |
| 268 | // Silent failures, no point in checking return code. |
| 269 | mach_vm_deallocate(target: map, address: addr, size); |
| 270 | |
| 271 | mach_vm_address_t write_addr = ptss_page->entries[0].write_addr; |
| 272 | mach_vm_deallocate(target: map, address: write_addr, size); |
| 273 | |
| 274 | vm_map_deallocate(map); |
| 275 | } |
| 276 | |
| 277 | /* |
| 278 | * This function assumes that the target process has been |
| 279 | * suspended, and the proc_lock & sprlock is held |
| 280 | */ |
| 281 | void |
| 282 | dtrace_ptss_enable(struct proc* p) |
| 283 | { |
| 284 | LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED); |
| 285 | LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_OWNED); |
| 286 | |
| 287 | struct uthread* uth; |
| 288 | /* |
| 289 | * XXX There has been a concern raised about holding the proc_lock |
| 290 | * while calling dtrace_ptss_claim_entry(), due to the fact |
| 291 | * that dtrace_ptss_claim_entry() can potentially malloc. |
| 292 | */ |
| 293 | TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) { |
| 294 | uth->t_dtrace_scratch = dtrace_ptss_claim_entry_locked(p); |
| 295 | } |
| 296 | } |
| 297 | |
| 298 | /* |
| 299 | * This function is not thread safe. |
| 300 | * |
| 301 | * It assumes the sprlock is held, and the proc_lock is not. |
| 302 | */ |
| 303 | void |
| 304 | dtrace_ptss_exec_exit(struct proc* p) |
| 305 | { |
| 306 | /* |
| 307 | * Should hold sprlock to touch the pages list. Must not |
| 308 | * hold the proc lock to avoid deadlock. |
| 309 | */ |
| 310 | LCK_MTX_ASSERT(&p->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED); |
| 311 | LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED); |
| 312 | |
| 313 | p->p_dtrace_ptss_free_list = NULL; |
| 314 | |
| 315 | struct dtrace_ptss_page* temp = p->p_dtrace_ptss_pages; |
| 316 | p->p_dtrace_ptss_pages = NULL; |
| 317 | |
| 318 | while (temp != NULL) { |
| 319 | struct dtrace_ptss_page* next = temp->next; |
| 320 | |
| 321 | // Do we need to specifically mach_vm_deallocate the user pages? |
| 322 | // This can be called when the process is exiting, I believe the proc's |
| 323 | // vm_map_t may already be toast. |
| 324 | |
| 325 | // Must be certain to free the kernel memory! |
| 326 | kfree_type(struct dtrace_ptss_page, temp); |
| 327 | temp = next; |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | /* |
| 332 | * This function is not thread safe. |
| 333 | * |
| 334 | * The child proc ptss fields are initialized to NULL at fork time. |
| 335 | * Pages allocated in the parent are copied as part of the vm_map copy, though. |
| 336 | * We need to deallocate those pages. |
| 337 | * |
| 338 | * Parent and child sprlock should be held, and proc_lock must NOT be held. |
| 339 | */ |
| 340 | void |
| 341 | dtrace_ptss_fork(struct proc* parent, struct proc* child) |
| 342 | { |
| 343 | // The child should not have any pages/entries allocated at this point. |
| 344 | // ASSERT(child->p_dtrace_ptss_pages == NULL); |
| 345 | // ASSERT(child->p_dtrace_ptss_free_list == NULL); |
| 346 | |
| 347 | /* |
| 348 | * The parent's sprlock should be held, to protect its pages list |
| 349 | * from changing while the child references it. The child's sprlock |
| 350 | * must also be held, because we are modifying its pages list. |
| 351 | * Finally, to prevent a deadlock with the fasttrap cleanup code, |
| 352 | * neither the parent or child proc_lock should be held. |
| 353 | */ |
| 354 | LCK_MTX_ASSERT(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED); |
| 355 | LCK_MTX_ASSERT(&parent->p_mlock, LCK_MTX_ASSERT_NOTOWNED); |
| 356 | LCK_MTX_ASSERT(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_OWNED); |
| 357 | LCK_MTX_ASSERT(&child->p_mlock, LCK_MTX_ASSERT_NOTOWNED); |
| 358 | |
| 359 | // Get page list from *PARENT* |
| 360 | struct dtrace_ptss_page* temp = parent->p_dtrace_ptss_pages; |
| 361 | |
| 362 | while (temp != NULL) { |
| 363 | // Freeing the page in the *CHILD* |
| 364 | dtrace_ptss_free_page(p: child, ptss_page: temp); |
| 365 | |
| 366 | // Do not free the kernel memory, it belong to the parent. |
| 367 | temp = temp->next; |
| 368 | } |
| 369 | } |
| 370 | |