1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * File: bsd/kern/kern_shutdown.c
30 *
31 * Copyright (C) 1989, NeXT, Inc.
32 *
33 */
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/vm.h>
39#include <sys/proc_internal.h>
40#include <sys/user.h>
41#include <sys/reboot.h>
42#include <sys/conf.h>
43#include <sys/vnode_internal.h>
44#include <sys/file_internal.h>
45#include <sys/clist.h>
46#include <sys/callout.h>
47#include <sys/mbuf.h>
48#include <sys/msgbuf.h>
49#include <sys/ioctl.h>
50#include <sys/signal.h>
51#include <sys/tty.h>
52#include <kern/task.h>
53#include <sys/quota.h>
54#include <vm/vm_kern.h>
55#include <mach/vm_param.h>
56#include <sys/filedesc.h>
57#include <mach/host_priv.h>
58#include <mach/host_reboot.h>
59
60#include <security/audit/audit.h>
61
62#include <kern/sched_prim.h> /* for thread_block() */
63#include <kern/host.h> /* for host_priv_self() */
64#include <net/if_var.h> /* for if_down_all() */
65#include <sys/buf_internal.h> /* for count_busy_buffers() */
66#include <sys/mount_internal.h> /* for vfs_unmountall() */
67#include <mach/task.h> /* for task_suspend() */
68#include <sys/sysproto.h> /* abused for sync() */
69#include <kern/clock.h> /* for delay_for_interval() */
70#include <libkern/OSAtomic.h>
71#include <IOKit/IOPlatformExpert.h>
72
73#include <sys/kdebug.h>
74
75uint32_t system_inshutdown = 0;
76
77/* XXX should be in a header file somewhere, but isn't */
78extern void (*unmountroot_pre_hook)(void);
79
80unsigned int proc_shutdown_exitcount = 0;
81
82static int sd_openlog(vfs_context_t);
83static int sd_closelog(vfs_context_t);
84static void sd_log(vfs_context_t, const char *, ...);
85static void proc_shutdown(void);
86static void zprint_panic_info(void);
87extern void halt_log_enter(const char * what, const void * pc, uint64_t time);
88
89#if DEVELOPMENT || DEBUG
90extern boolean_t kdp_has_polled_corefile(void);
91#endif /* DEVELOPMENT || DEBUG */
92
93struct sd_filterargs{
94 int delayterm;
95 int shutdownstate;
96};
97
98
99struct sd_iterargs {
100 int signo; /* the signal to be posted */
101 int setsdstate; /* shutdown state to be set */
102 int countproc; /* count processes on action */
103 int activecount; /* number of processes on which action was done */
104};
105
106static vnode_t sd_logvp = NULLVP;
107static off_t sd_log_offset = 0;
108
109
110static int sd_filt1(proc_t, void *);
111static int sd_filt2(proc_t, void *);
112static int sd_callback1(proc_t p, void * arg);
113static int sd_callback2(proc_t p, void * arg);
114static int sd_callback3(proc_t p, void * arg);
115
116extern boolean_t panic_include_zprint;
117extern mach_memory_info_t *panic_kext_memory_info;
118extern vm_size_t panic_kext_memory_size;
119
120static void
121zprint_panic_info(void)
122{
123 unsigned int num_sites;
124 kern_return_t kr;
125
126 panic_include_zprint = TRUE;
127 panic_kext_memory_info = NULL;
128 panic_kext_memory_size = 0;
129
130 num_sites = vm_page_diagnose_estimate();
131 panic_kext_memory_size = num_sites * sizeof(panic_kext_memory_info[0]);
132
133 kr = kmem_alloc(kernel_map, (vm_offset_t *)&panic_kext_memory_info, round_page(panic_kext_memory_size), VM_KERN_MEMORY_OSFMK);
134 if (kr != KERN_SUCCESS) {
135 panic_kext_memory_info = NULL;
136 return;
137 }
138
139 vm_page_diagnose(panic_kext_memory_info, num_sites, 0);
140}
141
142int
143get_system_inshutdown()
144{
145 return (system_inshutdown);
146}
147
148static void
149panic_kernel(int howto, char *message)
150{
151 if ((howto & RB_PANIC_ZPRINT) == RB_PANIC_ZPRINT) {
152 zprint_panic_info();
153 }
154 panic("userspace panic: %s", message);
155}
156
157int
158reboot_kernel(int howto, char *message)
159{
160 int hostboot_option=0;
161 uint64_t startTime;
162
163 if ((howto & (RB_PANIC | RB_QUICK)) == (RB_PANIC | RB_QUICK)) {
164 panic_kernel(howto, message);
165 }
166
167 if (!OSCompareAndSwap(0, 1, &system_inshutdown)) {
168 if ( (howto&RB_QUICK) == RB_QUICK)
169 goto force_reboot;
170 return (EBUSY);
171 }
172 /*
173 * Notify the power management root domain that the system will shut down.
174 */
175 IOSystemShutdownNotification(kIOSystemShutdownNotificationStageProcessExit);
176
177 if ((howto&RB_QUICK)==RB_QUICK) {
178 printf("Quick reboot...\n");
179 if ((howto&RB_NOSYNC)==0) {
180 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
181 }
182 }
183 else if ((howto&RB_NOSYNC)==0) {
184 int iter, nbusy;
185
186 printf("syncing disks... ");
187
188 /*
189 * Release vnodes held by texts before sync.
190 */
191
192 /* handle live procs (deallocate their root and current directories), suspend initproc */
193
194 startTime = mach_absolute_time();
195 proc_shutdown();
196 halt_log_enter("proc_shutdown", 0, mach_absolute_time() - startTime);
197
198#if CONFIG_AUDIT
199 startTime = mach_absolute_time();
200 audit_shutdown();
201 halt_log_enter("audit_shutdown", 0, mach_absolute_time() - startTime);
202#endif
203
204 if (unmountroot_pre_hook != NULL)
205 unmountroot_pre_hook();
206
207 startTime = mach_absolute_time();
208 sync((proc_t)NULL, (void *)NULL, (int *)NULL);
209
210 if (kdebug_enable) {
211 startTime = mach_absolute_time();
212 kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace");
213 halt_log_enter("shutdown.trace", 0, mach_absolute_time() - startTime);
214 }
215
216 IOSystemShutdownNotification(kIOSystemShutdownNotificationStageRootUnmount);
217
218 /*
219 * Unmount filesystems
220 */
221
222#if DEVELOPMENT || DEBUG
223 if (!(howto & RB_PANIC) || !kdp_has_polled_corefile())
224#endif /* DEVELOPMENT || DEBUG */
225 {
226 startTime = mach_absolute_time();
227 vfs_unmountall();
228 halt_log_enter("vfs_unmountall", 0, mach_absolute_time() - startTime);
229 }
230
231 /* Wait for the buffer cache to clean remaining dirty buffers */
232 startTime = mach_absolute_time();
233 for (iter = 0; iter < 100; iter++) {
234 nbusy = count_busy_buffers();
235 if (nbusy == 0)
236 break;
237 printf("%d ", nbusy);
238 delay_for_interval( 1 * nbusy, 1000 * 1000);
239 }
240 if (nbusy)
241 printf("giving up\n");
242 else
243 printf("done\n");
244 halt_log_enter("bufferclean", 0, mach_absolute_time() - startTime);
245 }
246#if NETWORKING
247 /*
248 * Can't just use an splnet() here to disable the network
249 * because that will lock out softints which the disk
250 * drivers depend on to finish DMAs.
251 */
252 startTime = mach_absolute_time();
253 if_down_all();
254 halt_log_enter("if_down_all", 0, mach_absolute_time() - startTime);
255#endif /* NETWORKING */
256
257force_reboot:
258
259 if (howto & RB_PANIC) {
260 panic_kernel(howto, message);
261 }
262
263 if (howto & RB_POWERDOWN)
264 hostboot_option = HOST_REBOOT_HALT;
265 if (howto & RB_HALT)
266 hostboot_option = HOST_REBOOT_HALT;
267
268 if (howto & RB_UPSDELAY) {
269 hostboot_option = HOST_REBOOT_UPSDELAY;
270 }
271
272 host_reboot(host_priv_self(), hostboot_option);
273 /*
274 * should not be reached
275 */
276 return (0);
277}
278
279static int
280sd_openlog(vfs_context_t ctx)
281{
282 int error = 0;
283 struct timeval tv;
284
285 /* Open shutdown log */
286 if ((error = vnode_open(PROC_SHUTDOWN_LOG, (O_CREAT | FWRITE | O_NOFOLLOW), 0644, 0, &sd_logvp, ctx))) {
287 printf("Failed to open %s: error %d\n", PROC_SHUTDOWN_LOG, error);
288 sd_logvp = NULLVP;
289 return error;
290 }
291
292 vnode_setsize(sd_logvp, (off_t)0, 0, ctx);
293
294 /* Write a little header */
295 microtime(&tv);
296 sd_log(ctx, "Process shutdown log. Current time is %lu (in seconds).\n\n", tv.tv_sec);
297
298 return 0;
299}
300
301static int
302sd_closelog(vfs_context_t ctx)
303{
304 int error = 0;
305 if (sd_logvp != NULLVP) {
306 VNOP_FSYNC(sd_logvp, MNT_WAIT, ctx);
307 error = vnode_close(sd_logvp, FWRITE, ctx);
308 }
309
310 return error;
311}
312
313static void
314sd_log(vfs_context_t ctx, const char *fmt, ...)
315{
316 int resid, log_error, len;
317 char logbuf[100];
318 va_list arglist;
319
320 /* If the log isn't open yet, open it */
321 if (sd_logvp == NULLVP) {
322 if (sd_openlog(ctx) != 0) {
323 /* Couldn't open, we fail out */
324 return;
325 }
326 }
327
328 va_start(arglist, fmt);
329 len = vsnprintf(logbuf, sizeof(logbuf), fmt, arglist);
330 log_error = vn_rdwr(UIO_WRITE, sd_logvp, (caddr_t)logbuf, len, sd_log_offset,
331 UIO_SYSSPACE, IO_UNIT | IO_NOAUTH, vfs_context_ucred(ctx), &resid, vfs_context_proc(ctx));
332 if (log_error == EIO || log_error == 0) {
333 sd_log_offset += (len - resid);
334 }
335
336 va_end(arglist);
337
338}
339
340static int
341sd_filt1(proc_t p, void * args)
342{
343 proc_t self = current_proc();
344 struct sd_filterargs * sf = (struct sd_filterargs *)args;
345 int delayterm = sf-> delayterm;
346 int shutdownstate = sf->shutdownstate;
347
348 if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0)
349 ||(p == self) || (p->p_stat == SZOMB)
350 || (p->p_shutdownstate != shutdownstate)
351 ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))
352 || ((p->p_sigcatch & sigmask(SIGTERM))== 0)) {
353 return(0);
354 }
355 else
356 return(1);
357}
358
359
360static int
361sd_callback1(proc_t p, void * args)
362{
363 struct sd_iterargs * sd = (struct sd_iterargs *)args;
364 int signo = sd->signo;
365 int setsdstate = sd->setsdstate;
366 int countproc = sd->countproc;
367
368 proc_lock(p);
369 p->p_shutdownstate = setsdstate;
370 if (p->p_stat != SZOMB) {
371 proc_unlock(p);
372 if (countproc != 0) {
373 proc_list_lock();
374 p->p_listflag |= P_LIST_EXITCOUNT;
375 proc_shutdown_exitcount++;
376 proc_list_unlock();
377 }
378
379 psignal(p, signo);
380 if (countproc != 0)
381 sd->activecount++;
382 } else {
383 proc_unlock(p);
384 }
385
386 return PROC_RETURNED;
387}
388
389static int
390sd_filt2(proc_t p, void * args)
391{
392 proc_t self = current_proc();
393 struct sd_filterargs * sf = (struct sd_filterargs *)args;
394 int delayterm = sf-> delayterm;
395 int shutdownstate = sf->shutdownstate;
396
397 if (((p->p_flag&P_SYSTEM) != 0) || (p->p_ppid == 0)
398 ||(p == self) || (p->p_stat == SZOMB)
399 || (p->p_shutdownstate == shutdownstate)
400 ||((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM))) {
401 return(0);
402 }
403 else
404 return(1);
405}
406
407static int
408sd_callback2(proc_t p, void * args)
409{
410 struct sd_iterargs * sd = (struct sd_iterargs *)args;
411 int signo = sd->signo;
412 int setsdstate = sd->setsdstate;
413 int countproc = sd->countproc;
414
415 proc_lock(p);
416 p->p_shutdownstate = setsdstate;
417 if (p->p_stat != SZOMB) {
418 proc_unlock(p);
419 if (countproc != 0) {
420 proc_list_lock();
421 p->p_listflag |= P_LIST_EXITCOUNT;
422 proc_shutdown_exitcount++;
423 proc_list_unlock();
424 }
425 psignal(p, signo);
426 if (countproc != 0)
427 sd->activecount++;
428 } else {
429 proc_unlock(p);
430 }
431
432 return PROC_RETURNED;
433}
434
435static int
436sd_callback3(proc_t p, void * args)
437{
438 struct sd_iterargs * sd = (struct sd_iterargs *)args;
439 vfs_context_t ctx = vfs_context_current();
440
441 int setsdstate = sd->setsdstate;
442
443 proc_lock(p);
444 p->p_shutdownstate = setsdstate;
445 if (p->p_stat != SZOMB) {
446 /*
447 * NOTE: following code ignores sig_lock and plays
448 * with exit_thread correctly. This is OK unless we
449 * are a multiprocessor, in which case I do not
450 * understand the sig_lock. This needs to be fixed.
451 * XXX
452 */
453 if (p->exit_thread) { /* someone already doing it */
454 proc_unlock(p);
455 /* give him a chance */
456 thread_block(THREAD_CONTINUE_NULL);
457 } else {
458 p->exit_thread = current_thread();
459 printf(".");
460
461 sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid);
462
463 proc_unlock(p);
464 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
465 p->p_pid, 0, 1, 0, 0);
466 sd->activecount++;
467 exit1(p, 1, (int *)NULL);
468 }
469 } else {
470 proc_unlock(p);
471 }
472
473 return PROC_RETURNED;
474}
475
476
477/*
478 * proc_shutdown()
479 *
480 * Shutdown down proc system (release references to current and root
481 * dirs for each process).
482 *
483 * POSIX modifications:
484 *
485 * For POSIX fcntl() file locking call vno_lockrelease() on
486 * the file to release all of its record locks, if any.
487 */
488
489static void
490proc_shutdown(void)
491{
492 vfs_context_t ctx = vfs_context_current();
493 struct proc *p, *self;
494 int delayterm = 0;
495 struct sd_filterargs sfargs;
496 struct sd_iterargs sdargs;
497 int error = 0;
498 struct timespec ts;
499
500 /*
501 * Kill as many procs as we can. (Except ourself...)
502 */
503 self = (struct proc *)current_proc();
504
505 /*
506 * Signal the init with SIGTERM so that he does not launch
507 * new processes
508 */
509 p = proc_find(1);
510 if (p && p != self) {
511 psignal(p, SIGTERM);
512 }
513 proc_rele(p);
514
515 printf("Killing all processes ");
516
517sigterm_loop:
518 /*
519 * send SIGTERM to those procs interested in catching one
520 */
521 sfargs.delayterm = delayterm;
522 sfargs.shutdownstate = 0;
523 sdargs.signo = SIGTERM;
524 sdargs.setsdstate = 1;
525 sdargs.countproc = 1;
526 sdargs.activecount = 0;
527
528 error = 0;
529 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
530 proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs);
531
532 if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
533 proc_list_lock();
534 if (proc_shutdown_exitcount != 0) {
535 /*
536 * now wait for up to 3 seconds to allow those procs catching SIGTERM
537 * to digest it
538 * as soon as these procs have exited, we'll continue on to the next step
539 */
540 ts.tv_sec = 3;
541 ts.tv_nsec = 0;
542 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
543 if (error != 0) {
544 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
545 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
546 p->p_listflag &= ~P_LIST_EXITCOUNT;
547 }
548 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
549 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
550 p->p_listflag &= ~P_LIST_EXITCOUNT;
551 }
552 }
553 }
554 proc_list_unlock();
555 }
556 if (error == ETIMEDOUT) {
557 /*
558 * log the names of the unresponsive tasks
559 */
560
561 proc_list_lock();
562
563 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
564 if (p->p_shutdownstate == 1) {
565 printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
566 sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid);
567 }
568 }
569
570 proc_list_unlock();
571 }
572
573 /*
574 * send a SIGKILL to all the procs still hanging around
575 */
576 sfargs.delayterm = delayterm;
577 sfargs.shutdownstate = 2;
578 sdargs.signo = SIGKILL;
579 sdargs.setsdstate = 2;
580 sdargs.countproc = 1;
581 sdargs.activecount = 0;
582
583 /* post a SIGKILL to all that catch SIGTERM and not marked for delay */
584 proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs);
585
586 error = 0;
587
588 if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) {
589 proc_list_lock();
590 if (proc_shutdown_exitcount != 0) {
591 /*
592 * wait for up to 60 seconds to allow these procs to exit normally
593 *
594 * History: The delay interval was changed from 100 to 200
595 * for NFS requests in particular.
596 */
597 ts.tv_sec = 10;
598 ts.tv_nsec = 0;
599 error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts);
600 if (error != 0) {
601 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
602 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
603 p->p_listflag &= ~P_LIST_EXITCOUNT;
604 }
605 for (p = zombproc.lh_first; p; p = p->p_list.le_next) {
606 if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT)
607 p->p_listflag &= ~P_LIST_EXITCOUNT;
608 }
609 }
610 }
611 proc_list_unlock();
612 }
613
614 if (error == ETIMEDOUT) {
615 /*
616 * log the names of the unresponsive tasks
617 */
618
619 proc_list_lock();
620
621 for (p = allproc.lh_first; p; p = p->p_list.le_next) {
622 if (p->p_shutdownstate == 2) {
623 printf("%s[%d]: didn't act on SIGKILL\n", p->p_comm, p->p_pid);
624 sd_log(ctx, "%s[%d]: didn't act on SIGKILL\n", p->p_comm, p->p_pid);
625 }
626 }
627
628 proc_list_unlock();
629 }
630
631 /*
632 * if we still have procs that haven't exited, then brute force 'em
633 */
634 sfargs.delayterm = delayterm;
635 sfargs.shutdownstate = 3;
636 sdargs.signo = 0;
637 sdargs.setsdstate = 3;
638 sdargs.countproc = 0;
639 sdargs.activecount = 0;
640
641
642
643 /* post a SIGTERM to all that catch SIGTERM and not marked for delay */
644 proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs);
645 printf("\n");
646
647 /* Now start the termination of processes that are marked for delayed termn */
648 if (delayterm == 0) {
649 delayterm = 1;
650 goto sigterm_loop;
651 }
652
653 sd_closelog(ctx);
654
655 /*
656 * Now that all other processes have been terminated, suspend init
657 */
658 task_suspend_internal(initproc->task);
659
660 /* drop the ref on initproc */
661 proc_rele(initproc);
662 printf("continuing\n");
663}
664
665