1 | /* |
2 | * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* Copyright (c) 1998 Apple Computer, Inc. All rights reserved. |
29 | * |
30 | * File: bsd/kern/kern_symfile.c |
31 | * |
32 | * HISTORY |
33 | */ |
34 | |
35 | #include <mach/vm_param.h> |
36 | |
37 | #include <sys/param.h> |
38 | #include <sys/systm.h> |
39 | #include <sys/signalvar.h> |
40 | #include <sys/resourcevar.h> |
41 | #include <sys/namei.h> |
42 | #include <sys/vnode_internal.h> |
43 | #include <sys/proc_internal.h> |
44 | #include <sys/kauth.h> |
45 | #include <sys/timeb.h> |
46 | #include <sys/times.h> |
47 | #include <sys/acct.h> |
48 | #include <sys/file_internal.h> |
49 | #include <sys/uio.h> |
50 | #include <sys/kernel.h> |
51 | #include <sys/stat.h> |
52 | #include <sys/disk.h> |
53 | #include <sys/conf.h> |
54 | #include <sys/content_protection.h> |
55 | #include <sys/fsctl.h> |
56 | |
57 | #include <mach-o/loader.h> |
58 | #include <mach-o/nlist.h> |
59 | |
60 | #include <kern/kalloc.h> |
61 | #include <vm/vm_kern.h> |
62 | #include <pexpert/pexpert.h> |
63 | #include <IOKit/IOPolledInterface.h> |
64 | |
65 | #define HIBERNATE_MIN_PHYSICAL_LBA_512 (34) |
66 | #define HIBERNATE_MIN_PHYSICAL_LBA_4096 (6) |
67 | #define HIBERNATE_MIN_FILE_SIZE (1024*1024) |
68 | |
69 | /* This function is called from kern_sysctl in the current process context; |
70 | * it is exported with the System6.0.exports, but this appears to be a legacy |
71 | * export, as there are no internal consumers. |
72 | */ |
73 | int |
74 | get_kernel_symfile(__unused proc_t p, __unused char const **symfile); |
75 | int |
76 | get_kernel_symfile(__unused proc_t p, __unused char const **symfile) |
77 | { |
78 | return KERN_FAILURE; |
79 | } |
80 | |
81 | struct kern_direct_file_io_ref_t { |
82 | vfs_context_t ctx; |
83 | struct vnode * vp; |
84 | char * name; |
85 | size_t namesize; |
86 | dev_t device; |
87 | uint32_t blksize; |
88 | off_t filelength; |
89 | char cf; |
90 | char pinned; |
91 | char frozen; |
92 | char wbcranged; |
93 | }; |
94 | |
95 | |
96 | static int |
97 | file_ioctl(void * p1, void * p2, u_long theIoctl, caddr_t result) |
98 | { |
99 | dev_t device = *(dev_t*) p1; |
100 | |
101 | return (*bdevsw[major(device)].d_ioctl) |
102 | (device, theIoctl, result, S_IFBLK, p2); |
103 | } |
104 | |
105 | static int |
106 | device_ioctl(void * p1, __unused void * p2, u_long theIoctl, caddr_t result) |
107 | { |
108 | return VNOP_IOCTL(vp: p1, command: theIoctl, data: result, fflag: 0, ctx: p2); |
109 | } |
110 | |
111 | static int |
112 | kern_ioctl_file_extents(struct kern_direct_file_io_ref_t * ref, u_long theIoctl, off_t offset, off_t end) |
113 | { |
114 | int error = 0; |
115 | int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); |
116 | void * p1; |
117 | void * p2; |
118 | uint64_t fileblk = 0; |
119 | size_t filechunk = 0; |
120 | dk_extent_t extent; |
121 | dk_unmap_t unmap; |
122 | _dk_cs_pin_t pin; |
123 | |
124 | bzero(s: &extent, n: sizeof(dk_extent_t)); |
125 | bzero(s: &unmap, n: sizeof(dk_unmap_t)); |
126 | bzero(s: &pin, n: sizeof(pin)); |
127 | if (ref->vp->v_type == VREG) { |
128 | p1 = &ref->device; |
129 | p2 = kernproc; |
130 | do_ioctl = &file_ioctl; |
131 | } else { |
132 | /* Partition. */ |
133 | p1 = ref->vp; |
134 | p2 = ref->ctx; |
135 | do_ioctl = &device_ioctl; |
136 | } |
137 | |
138 | if (_DKIOCCSPINEXTENT == theIoctl) { |
139 | /* Tell CS the image size, so it knows whether to place the subsequent pins SSD/HDD */ |
140 | pin.cp_extent.length = end; |
141 | pin.cp_flags = _DKIOCCSHIBERNATEIMGSIZE; |
142 | (void) do_ioctl(p1, p2, _DKIOCCSPINEXTENT, (caddr_t)&pin); |
143 | } else if (_DKIOCCSUNPINEXTENT == theIoctl) { |
144 | /* Tell CS hibernation is done, so it can stop blocking overlapping writes */ |
145 | pin.cp_flags = _DKIOCCSPINDISCARDDENYLIST; |
146 | (void) do_ioctl(p1, p2, _DKIOCCSUNPINEXTENT, (caddr_t)&pin); |
147 | } |
148 | |
149 | for (; offset < end; offset += filechunk) { |
150 | if (ref->vp->v_type == VREG) { |
151 | daddr64_t blkno; |
152 | filechunk = 1 * 1024 * 1024 * 1024; |
153 | if (filechunk > (size_t)(end - offset)) { |
154 | filechunk = (size_t)(end - offset); |
155 | } |
156 | error = VNOP_BLOCKMAP(ref->vp, offset, filechunk, &blkno, |
157 | &filechunk, NULL, VNODE_WRITE | VNODE_BLOCKMAP_NO_TRACK, NULL); |
158 | if (error) { |
159 | break; |
160 | } |
161 | if (-1LL == blkno) { |
162 | continue; |
163 | } |
164 | fileblk = blkno * ref->blksize; |
165 | } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { |
166 | fileblk = offset; |
167 | filechunk = (unsigned long)((ref->filelength > ULONG_MAX) ? ULONG_MAX: ref->filelength); |
168 | } |
169 | |
170 | if (DKIOCUNMAP == theIoctl) { |
171 | extent.offset = fileblk; |
172 | extent.length = filechunk; |
173 | unmap.extents = &extent; |
174 | unmap.extentsCount = 1; |
175 | error = do_ioctl(p1, p2, theIoctl, (caddr_t)&unmap); |
176 | // printf("DKIOCUNMAP(%d) 0x%qx, 0x%qx\n", error, extent.offset, extent.length); |
177 | } else if (_DKIOCCSPINEXTENT == theIoctl) { |
178 | pin.cp_extent.offset = fileblk; |
179 | pin.cp_extent.length = filechunk; |
180 | pin.cp_flags = _DKIOCCSPINFORHIBERNATION; |
181 | error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin); |
182 | if (error && (ENOTTY != error)) { |
183 | printf("_DKIOCCSPINEXTENT(%d) 0x%qx, 0x%qx\n" , error, pin.cp_extent.offset, pin.cp_extent.length); |
184 | } |
185 | } else if (_DKIOCCSUNPINEXTENT == theIoctl) { |
186 | pin.cp_extent.offset = fileblk; |
187 | pin.cp_extent.length = filechunk; |
188 | pin.cp_flags = _DKIOCCSPINFORHIBERNATION; |
189 | error = do_ioctl(p1, p2, theIoctl, (caddr_t)&pin); |
190 | if (error && (ENOTTY != error)) { |
191 | printf("_DKIOCCSUNPINEXTENT(%d) 0x%qx, 0x%qx\n" , error, pin.cp_extent.offset, pin.cp_extent.length); |
192 | } |
193 | } else { |
194 | error = EINVAL; |
195 | } |
196 | |
197 | if (error) { |
198 | break; |
199 | } |
200 | } |
201 | return error; |
202 | } |
203 | |
204 | extern uint32_t freespace_mb(vnode_t vp); |
205 | |
206 | struct kern_direct_file_io_ref_t * |
207 | kern_open_file_for_direct_io(const char * name, |
208 | uint32_t iflags, |
209 | kern_get_file_extents_callback_t callback, |
210 | void * callback_ref, |
211 | off_t set_file_size, |
212 | off_t fs_free_size, |
213 | off_t write_file_offset, |
214 | void * write_file_addr, |
215 | size_t write_file_len, |
216 | dev_t * partition_device_result, |
217 | dev_t * image_device_result, |
218 | uint64_t * partitionbase_result, |
219 | uint64_t * maxiocount_result, |
220 | uint32_t * oflags) |
221 | { |
222 | struct kern_direct_file_io_ref_t * ref; |
223 | |
224 | proc_t p; |
225 | struct vnode_attr va; |
226 | dk_apfs_wbc_range_t wbc_range; |
227 | int error; |
228 | off_t f_offset; |
229 | uint64_t fileblk = 0; |
230 | size_t filechunk = 0; |
231 | uint64_t physoffset, minoffset; |
232 | dev_t device; |
233 | dev_t target = 0; |
234 | int isssd = 0; |
235 | uint32_t flags = 0; |
236 | uint32_t blksize; |
237 | off_t maxiocount, count, segcount, wbctotal; |
238 | boolean_t locked = FALSE; |
239 | int fmode; |
240 | mode_t cmode; |
241 | struct nameidata nd; |
242 | u_int32_t ndflags; |
243 | off_t mpFree; |
244 | |
245 | wbc_range.count = 0; |
246 | |
247 | int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); |
248 | do_ioctl = NULL; |
249 | void * p1 = NULL; |
250 | void * p2 = NULL; |
251 | |
252 | error = EFAULT; |
253 | |
254 | ref = kalloc_type(struct kern_direct_file_io_ref_t, |
255 | Z_WAITOK | Z_ZERO | Z_NOFAIL); |
256 | |
257 | p = kernproc; |
258 | ref->ctx = vfs_context_kernel(); |
259 | ref->namesize = strlen(s: name) + 1; |
260 | ref->name = kalloc_data(ref->namesize, Z_WAITOK | Z_NOFAIL); |
261 | strlcpy(dst: ref->name, src: name, n: ref->namesize); |
262 | |
263 | fmode = (kIOPolledFileCreate & iflags) ? (O_CREAT | FWRITE) : FWRITE; |
264 | cmode = S_IRUSR | S_IWUSR; |
265 | ndflags = NOFOLLOW; |
266 | NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(ref->name), ref->ctx); |
267 | VATTR_INIT(&va); |
268 | VATTR_SET(&va, va_mode, cmode); |
269 | VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); |
270 | VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_D); |
271 | if ((error = vn_open_auth(ndp: &nd, fmode: &fmode, &va, NULLVP))) { |
272 | kprintf(fmt: "vn_open_auth(fmode: %d, cmode: %d) failed with error: %d\n" , fmode, cmode, error); |
273 | goto out; |
274 | } |
275 | |
276 | ref->vp = nd.ni_vp; |
277 | if (ref->vp->v_type == VREG) { |
278 | vnode_lock_spin(ref->vp); |
279 | SET(ref->vp->v_flag, VSWAP); |
280 | vnode_unlock(ref->vp); |
281 | } |
282 | |
283 | if (write_file_addr && write_file_len) { |
284 | if ((error = kern_write_file(ref, offset: write_file_offset, addr: write_file_addr, len: write_file_len, IO_SKIP_ENCRYPTION))) { |
285 | kprintf(fmt: "kern_write_file() failed with error: %d\n" , error); |
286 | goto out; |
287 | } |
288 | } |
289 | |
290 | VATTR_INIT(&va); |
291 | VATTR_WANTED(&va, va_rdev); |
292 | VATTR_WANTED(&va, va_fsid); |
293 | VATTR_WANTED(&va, va_devid); |
294 | VATTR_WANTED(&va, va_data_size); |
295 | VATTR_WANTED(&va, va_data_alloc); |
296 | VATTR_WANTED(&va, va_nlink); |
297 | error = EFAULT; |
298 | if (vnode_getattr(vp: ref->vp, vap: &va, ctx: ref->ctx)) { |
299 | goto out; |
300 | } |
301 | |
302 | wbctotal = 0; |
303 | mpFree = freespace_mb(vp: ref->vp); |
304 | mpFree <<= 20; |
305 | kprintf(fmt: "kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n" , |
306 | ref->name, va.va_data_size, va.va_data_alloc, mpFree, fs_free_size); |
307 | |
308 | if (ref->vp->v_type == VREG) { |
309 | /* Don't dump files with links. */ |
310 | if (va.va_nlink != 1) { |
311 | goto out; |
312 | } |
313 | |
314 | /* Don't dump on fs without backing device. */ |
315 | if (!VATTR_IS_SUPPORTED(&va, va_devid)) { |
316 | kprintf(fmt: "kern_direct_file(%s): Not backed by block device.\n" , ref->name); |
317 | error = ENODEV; |
318 | goto out; |
319 | } |
320 | device = va.va_devid; |
321 | ref->filelength = va.va_data_size; |
322 | |
323 | p1 = &device; |
324 | p2 = p; |
325 | do_ioctl = &file_ioctl; |
326 | |
327 | if (kIOPolledFileHibernate & iflags) { |
328 | error = do_ioctl(p1, p2, DKIOCAPFSGETWBCRANGE, (caddr_t) &wbc_range); |
329 | ref->wbcranged = (error == 0); |
330 | } |
331 | if (ref->wbcranged) { |
332 | uint32_t idx; |
333 | assert(wbc_range.count <= (sizeof(wbc_range.extents) / sizeof(wbc_range.extents[0]))); |
334 | for (idx = 0; idx < wbc_range.count; idx++) { |
335 | wbctotal += wbc_range.extents[idx].length; |
336 | } |
337 | kprintf(fmt: "kern_direct_file(%s): wbc %qd\n" , ref->name, wbctotal); |
338 | if (wbctotal) { |
339 | target = wbc_range.dev; |
340 | } |
341 | } |
342 | |
343 | if (set_file_size) { |
344 | if (wbctotal) { |
345 | if (wbctotal >= set_file_size) { |
346 | set_file_size = HIBERNATE_MIN_FILE_SIZE; |
347 | } else { |
348 | set_file_size -= wbctotal; |
349 | if (set_file_size < HIBERNATE_MIN_FILE_SIZE) { |
350 | set_file_size = HIBERNATE_MIN_FILE_SIZE; |
351 | } |
352 | } |
353 | } |
354 | if (fs_free_size) { |
355 | mpFree += va.va_data_alloc; |
356 | if ((mpFree < set_file_size) || ((mpFree - set_file_size) < fs_free_size)) { |
357 | error = ENOSPC; |
358 | goto out; |
359 | } |
360 | } |
361 | error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL | IO_NOAUTH, ref->ctx); |
362 | if (error) { |
363 | goto out; |
364 | } |
365 | ref->filelength = set_file_size; |
366 | } |
367 | } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { |
368 | /* Partition. */ |
369 | device = va.va_rdev; |
370 | |
371 | p1 = ref->vp; |
372 | p2 = ref->ctx; |
373 | do_ioctl = &device_ioctl; |
374 | } else { |
375 | /* Don't dump to non-regular files. */ |
376 | error = EFAULT; |
377 | goto out; |
378 | } |
379 | ref->device = device; |
380 | |
381 | // probe for CF |
382 | dk_corestorage_info_t cs_info; |
383 | memset(s: &cs_info, c: 0, n: sizeof(dk_corestorage_info_t)); |
384 | error = do_ioctl(p1, p2, DKIOCCORESTORAGE, (caddr_t)&cs_info); |
385 | ref->cf = (error == 0) && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES); |
386 | |
387 | // get block size |
388 | |
389 | error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize); |
390 | if (error) { |
391 | goto out; |
392 | } |
393 | |
394 | if (ref->blksize == 4096) { |
395 | minoffset = HIBERNATE_MIN_PHYSICAL_LBA_4096 * ref->blksize; |
396 | } else { |
397 | minoffset = HIBERNATE_MIN_PHYSICAL_LBA_512 * ref->blksize; |
398 | } |
399 | |
400 | if (ref->vp->v_type != VREG) { |
401 | error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk); |
402 | if (error) { |
403 | goto out; |
404 | } |
405 | ref->filelength = fileblk * ref->blksize; |
406 | } |
407 | |
408 | // pin logical extents, CS version |
409 | |
410 | error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, offset: 0, end: ref->filelength); |
411 | if (error && (ENOTTY != error)) { |
412 | goto out; |
413 | } |
414 | ref->pinned = (error == 0); |
415 | |
416 | // pin logical extents, apfs version |
417 | |
418 | error = VNOP_IOCTL(vp: ref->vp, FSCTL_FREEZE_EXTENTS, NULL, fflag: 0, ctx: ref->ctx); |
419 | if (error && (ENOTTY != error)) { |
420 | goto out; |
421 | } |
422 | ref->frozen = (error == 0); |
423 | |
424 | // generate the block list |
425 | |
426 | error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL); |
427 | if (error) { |
428 | goto out; |
429 | } |
430 | locked = TRUE; |
431 | |
432 | f_offset = 0; |
433 | for (; f_offset < ref->filelength; f_offset += filechunk) { |
434 | if (ref->vp->v_type == VREG) { |
435 | filechunk = 1 * 1024 * 1024 * 1024; |
436 | daddr64_t blkno; |
437 | |
438 | error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, |
439 | &filechunk, NULL, VNODE_WRITE | VNODE_BLOCKMAP_NO_TRACK, NULL); |
440 | if (error) { |
441 | goto out; |
442 | } |
443 | if (-1LL == blkno) { |
444 | continue; |
445 | } |
446 | fileblk = blkno * ref->blksize; |
447 | } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { |
448 | fileblk = f_offset; |
449 | filechunk = f_offset ? 0 : (unsigned long)ref->filelength; |
450 | } |
451 | |
452 | physoffset = 0; |
453 | while (physoffset < filechunk) { |
454 | dk_physical_extent_t getphysreq; |
455 | bzero(s: &getphysreq, n: sizeof(getphysreq)); |
456 | |
457 | getphysreq.offset = fileblk + physoffset; |
458 | getphysreq.length = (filechunk - physoffset); |
459 | error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq); |
460 | if (error) { |
461 | goto out; |
462 | } |
463 | if (!target) { |
464 | target = getphysreq.dev; |
465 | } else if (target != getphysreq.dev) { |
466 | error = ENOTSUP; |
467 | goto out; |
468 | } |
469 | |
470 | assert(getphysreq.offset >= minoffset); |
471 | |
472 | #if HIBFRAGMENT |
473 | uint64_t rev; |
474 | for (rev = 4096; rev <= getphysreq.length; rev += 4096) { |
475 | callback(callback_ref, getphysreq.offset + getphysreq.length - rev, 4096); |
476 | } |
477 | #else |
478 | callback(callback_ref, getphysreq.offset, getphysreq.length); |
479 | #endif |
480 | physoffset += getphysreq.length; |
481 | } |
482 | } |
483 | if (ref->wbcranged) { |
484 | uint32_t idx; |
485 | for (idx = 0; idx < wbc_range.count; idx++) { |
486 | assert(wbc_range.extents[idx].offset >= minoffset); |
487 | callback(callback_ref, wbc_range.extents[idx].offset, wbc_range.extents[idx].length); |
488 | } |
489 | } |
490 | callback(callback_ref, 0ULL, 0ULL); |
491 | |
492 | if (ref->vp->v_type == VREG) { |
493 | p1 = ⌖ |
494 | } else { |
495 | p1 = ⌖ |
496 | p2 = p; |
497 | do_ioctl = &file_ioctl; |
498 | } |
499 | |
500 | // get partition base |
501 | |
502 | if (partitionbase_result) { |
503 | error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result); |
504 | if (error) { |
505 | goto out; |
506 | } |
507 | } |
508 | |
509 | // get block size & constraints |
510 | |
511 | error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize); |
512 | if (error) { |
513 | goto out; |
514 | } |
515 | |
516 | maxiocount = 1 * 1024 * 1024 * 1024; |
517 | |
518 | error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count); |
519 | if (error) { |
520 | count = 0; |
521 | } |
522 | count *= blksize; |
523 | if (count && (count < maxiocount)) { |
524 | maxiocount = count; |
525 | } |
526 | |
527 | error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count); |
528 | if (error) { |
529 | count = 0; |
530 | } |
531 | count *= blksize; |
532 | if (count && (count < maxiocount)) { |
533 | maxiocount = count; |
534 | } |
535 | |
536 | error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count); |
537 | if (error) { |
538 | count = 0; |
539 | } |
540 | if (count && (count < maxiocount)) { |
541 | maxiocount = count; |
542 | } |
543 | |
544 | error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count); |
545 | if (error) { |
546 | count = 0; |
547 | } |
548 | if (count && (count < maxiocount)) { |
549 | maxiocount = count; |
550 | } |
551 | |
552 | error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count); |
553 | if (!error) { |
554 | error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t) &segcount); |
555 | } |
556 | if (error) { |
557 | count = segcount = 0; |
558 | } |
559 | count *= segcount; |
560 | if (count && (count < maxiocount)) { |
561 | maxiocount = count; |
562 | } |
563 | |
564 | error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count); |
565 | if (!error) { |
566 | error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTWRITE, (caddr_t) &segcount); |
567 | } |
568 | if (error) { |
569 | count = segcount = 0; |
570 | } |
571 | count *= segcount; |
572 | if (count && (count < maxiocount)) { |
573 | maxiocount = count; |
574 | } |
575 | |
576 | kprintf(fmt: "max io 0x%qx bytes\n" , maxiocount); |
577 | if (maxiocount_result) { |
578 | *maxiocount_result = maxiocount; |
579 | } |
580 | |
581 | error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd); |
582 | if (!error && isssd) { |
583 | flags |= kIOPolledFileSSD; |
584 | } |
585 | |
586 | if (partition_device_result) { |
587 | *partition_device_result = device; |
588 | } |
589 | if (image_device_result) { |
590 | *image_device_result = target; |
591 | } |
592 | if (oflags) { |
593 | *oflags = flags; |
594 | } |
595 | |
596 | if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { |
597 | vnode_close(vp: ref->vp, FWRITE, ctx: ref->ctx); |
598 | ref->vp = NULLVP; |
599 | ref->ctx = NULL; |
600 | } |
601 | |
602 | out: |
603 | printf("kern_open_file_for_direct_io(%p, %d)\n" , ref, error); |
604 | |
605 | |
606 | if (error && locked) { |
607 | p1 = &device; |
608 | if (do_ioctl) { |
609 | (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL); |
610 | } |
611 | } |
612 | |
613 | if (error && ref) { |
614 | if (ref->vp) { |
615 | (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, offset: 0, end: (ref->pinned && ref->cf) ? ref->filelength : 0); |
616 | |
617 | if (ref->frozen) { |
618 | (void) VNOP_IOCTL(vp: ref->vp, FSCTL_THAW_EXTENTS, NULL, fflag: 0, ctx: ref->ctx); |
619 | } |
620 | if (ref->wbcranged) { |
621 | if (do_ioctl) { |
622 | (void) do_ioctl(p1, p2, DKIOCAPFSRELEASEWBCRANGE, (caddr_t) NULL); |
623 | } |
624 | } |
625 | vnode_close(vp: ref->vp, FWRITE, ctx: ref->ctx); |
626 | ref->vp = NULLVP; |
627 | } |
628 | ref->ctx = NULL; |
629 | if (ref->name) { |
630 | kfree_data(ref->name, ref->namesize); |
631 | ref->name = NULL; |
632 | } |
633 | kfree_type(struct kern_direct_file_io_ref_t, ref); |
634 | ref = NULL; |
635 | } |
636 | |
637 | return ref; |
638 | } |
639 | |
640 | int |
641 | kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag) |
642 | { |
643 | assert(len <= INT32_MAX); |
644 | return vn_rdwr(rw: UIO_WRITE, vp: ref->vp, |
645 | base: addr, len: (int)len, offset, |
646 | segflg: UIO_SYSSPACE, ioflg: ioflag | IO_SYNC | IO_NODELOCKED | IO_UNIT, |
647 | cred: vfs_context_ucred(ctx: ref->ctx), aresid: (int *) 0, |
648 | p: vfs_context_proc(ctx: ref->ctx)); |
649 | } |
650 | |
651 | int |
652 | kern_read_file(struct kern_direct_file_io_ref_t * ref, off_t offset, void * addr, size_t len, int ioflag) |
653 | { |
654 | assert(len <= INT32_MAX); |
655 | return vn_rdwr(rw: UIO_READ, vp: ref->vp, |
656 | base: addr, len: (int)len, offset, |
657 | segflg: UIO_SYSSPACE, ioflg: ioflag | IO_SYNC | IO_NODELOCKED | IO_UNIT, |
658 | cred: vfs_context_ucred(ctx: ref->ctx), aresid: (int *) 0, |
659 | p: vfs_context_proc(ctx: ref->ctx)); |
660 | } |
661 | |
662 | |
663 | struct mount * |
664 | kern_file_mount(struct kern_direct_file_io_ref_t * ref) |
665 | { |
666 | return ref->vp->v_mount; |
667 | } |
668 | |
669 | void |
670 | kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, |
671 | off_t write_offset, void * addr, size_t write_length, |
672 | off_t discard_offset, off_t discard_end, bool unlink) |
673 | { |
674 | int error; |
675 | printf("kern_close_file_for_direct_io(%p)\n" , ref); |
676 | |
677 | if (!ref) { |
678 | return; |
679 | } |
680 | |
681 | if (ref->vp) { |
682 | int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); |
683 | void * p1; |
684 | void * p2; |
685 | |
686 | discard_offset = ((discard_offset + ref->blksize - 1) & ~(((off_t) ref->blksize) - 1)); |
687 | discard_end = ((discard_end) & ~(((off_t) ref->blksize) - 1)); |
688 | |
689 | if (ref->vp->v_type == VREG) { |
690 | p1 = &ref->device; |
691 | p2 = kernproc; |
692 | do_ioctl = &file_ioctl; |
693 | } else { |
694 | /* Partition. */ |
695 | p1 = ref->vp; |
696 | p2 = ref->ctx; |
697 | do_ioctl = &device_ioctl; |
698 | } |
699 | (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL); |
700 | |
701 | //XXX If unmapping extents then don't also need to unpin; except ... |
702 | //XXX if file unaligned (HFS 4k / Fusion 128k) then pin is superset and |
703 | //XXX unmap is subset, so save extra walk over file extents (and the risk |
704 | //XXX that CF drain starts) vs leaving partial units pinned to SSD |
705 | //XXX (until whatever was sharing also unmaps). Err on cleaning up fully. |
706 | boolean_t will_unmap = (!ref->pinned || ref->cf) && (discard_end > discard_offset); |
707 | boolean_t will_unpin = (ref->pinned && ref->cf /* && !will_unmap */); |
708 | |
709 | (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, offset: 0, end: (will_unpin) ? ref->filelength : 0); |
710 | |
711 | if (will_unmap) { |
712 | (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, offset: discard_offset, end: (ref->cf) ? ref->filelength : discard_end); |
713 | } |
714 | |
715 | if (ref->frozen) { |
716 | (void) VNOP_IOCTL(vp: ref->vp, FSCTL_THAW_EXTENTS, NULL, fflag: 0, ctx: ref->ctx); |
717 | } |
718 | if (ref->wbcranged) { |
719 | (void) do_ioctl(p1, p2, DKIOCAPFSRELEASEWBCRANGE, (caddr_t) NULL); |
720 | } |
721 | |
722 | if (addr && write_length) { |
723 | (void) kern_write_file(ref, offset: write_offset, addr, len: write_length, IO_SKIP_ENCRYPTION); |
724 | } |
725 | |
726 | error = vnode_close(vp: ref->vp, FWRITE, ctx: ref->ctx); |
727 | |
728 | ref->vp = NULLVP; |
729 | kprintf(fmt: "vnode_close(%d)\n" , error); |
730 | |
731 | |
732 | if (unlink) { |
733 | int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int); |
734 | |
735 | error = unlink1(ref->ctx, NULLVP, CAST_USER_ADDR_T(ref->name), UIO_SYSSPACE, 0); |
736 | kprintf(fmt: "%s: unlink1(%d)\n" , __func__, error); |
737 | } |
738 | } |
739 | |
740 | ref->ctx = NULL; |
741 | |
742 | kfree_data(ref->name, ref->namesize); |
743 | ref->name = NULL; |
744 | |
745 | kfree_type(struct kern_direct_file_io_ref_t, ref); |
746 | } |
747 | |