1 | /* |
2 | * Copyright (c) 2004-2016 Apple Computer, Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /*- |
29 | * Copyright (c) 1990, 1993 |
30 | * The Regents of the University of California. All rights reserved. |
31 | * |
32 | * This code is derived from software contributed to Berkeley by |
33 | * Chris Torek. |
34 | * |
35 | * Redistribution and use in source and binary forms, with or without |
36 | * modification, are permitted provided that the following conditions |
37 | * are met: |
38 | * 1. Redistributions of source code must retain the above copyright |
39 | * notice, this list of conditions and the following disclaimer. |
40 | * 2. Redistributions in binary form must reproduce the above copyright |
41 | * notice, this list of conditions and the following disclaimer in the |
42 | * documentation and/or other materials provided with the distribution. |
43 | * 3. All advertising materials mentioning features or use of this software |
44 | * must display the following acknowledgement: |
45 | * This product includes software developed by the University of |
46 | * California, Berkeley and its contributors. |
47 | * 4. Neither the name of the University nor the names of its contributors |
48 | * may be used to endorse or promote products derived from this software |
49 | * without specific prior written permission. |
50 | * |
51 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
52 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
53 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
54 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
55 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
56 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
57 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
58 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
59 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
60 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
61 | * SUCH DAMAGE. |
62 | */ |
63 | |
64 | #include <sys/cdefs.h> |
65 | |
66 | #if 0 /* XXX coming soon */ |
67 | #include <ctype.h> |
68 | #else |
69 | static inline int |
70 | isspace(char c) |
71 | { |
72 | return (c == ' ' || c == '\t' || c == '\n' || c == '\12'); |
73 | } |
74 | #endif |
75 | #include <stdarg.h> |
76 | #include <string.h> |
77 | #include <sys/param.h> |
78 | #include <sys/systm.h> |
79 | |
80 | #define BUF 32 /* Maximum length of numeric string. */ |
81 | |
82 | /* |
83 | * Flags used during conversion. |
84 | */ |
85 | #define LONG 0x01 /* l: long or double */ |
86 | #define SHORT 0x04 /* h: short */ |
87 | #define SUPPRESS 0x08 /* *: suppress assignment */ |
88 | #define POINTER 0x10 /* p: void * (as hex) */ |
89 | #define NOSKIP 0x20 /* [ or c: do not skip blanks */ |
90 | #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ |
91 | #define SHORTSHORT 0x4000 /* hh: char */ |
92 | #define UNSIGNED 0x8000 /* %[oupxX] conversions */ |
93 | |
94 | /* |
95 | * The following are used in numeric conversions only: |
96 | * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; |
97 | * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. |
98 | */ |
99 | #define SIGNOK 0x40 /* +/- is (still) legal */ |
100 | #define NDIGITS 0x80 /* no digits detected */ |
101 | |
102 | #define DPTOK 0x100 /* (float) decimal point is still legal */ |
103 | #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ |
104 | |
105 | #define PFXOK 0x100 /* 0x prefix is (still) legal */ |
106 | #define NZDIGITS 0x200 /* no zero digits detected */ |
107 | |
108 | /* |
109 | * Conversion types. |
110 | */ |
111 | #define CT_CHAR 0 /* %c conversion */ |
112 | #define CT_CCL 1 /* %[...] conversion */ |
113 | #define CT_STRING 2 /* %s conversion */ |
114 | #define CT_INT 3 /* %[dioupxX] conversion */ |
115 | |
116 | static const u_char *__sccl(char *, const u_char *); |
117 | |
118 | int |
119 | sscanf(const char *ibuf, const char *fmt, ...) |
120 | { |
121 | va_list ap; |
122 | int ret; |
123 | |
124 | va_start(ap, fmt); |
125 | ret = vsscanf(ibuf, fmt, ap); |
126 | va_end(ap); |
127 | return(ret); |
128 | } |
129 | |
130 | int |
131 | vsscanf(const char *inp, char const *fmt0, va_list ap) |
132 | { |
133 | int inr; |
134 | const u_char *fmt = (const u_char *)fmt0; |
135 | int c; /* character from format, or conversion */ |
136 | size_t width; /* field width, or 0 */ |
137 | char *p; /* points into all kinds of strings */ |
138 | int n; /* handy integer */ |
139 | int flags; /* flags as defined above */ |
140 | char *p0; /* saves original value of p when necessary */ |
141 | int nassigned; /* number of fields assigned */ |
142 | int nconversions; /* number of conversions */ |
143 | int nread; /* number of characters consumed from fp */ |
144 | int base; /* base argument to conversion function */ |
145 | char ccltab[256]; /* character class table for %[...] */ |
146 | char buf[BUF]; /* buffer for numeric conversions */ |
147 | |
148 | /* `basefix' is used to avoid `if' tests in the integer scanner */ |
149 | static short basefix[17] = |
150 | { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; |
151 | |
152 | inr = strlen(inp); |
153 | |
154 | nassigned = 0; |
155 | nconversions = 0; |
156 | nread = 0; |
157 | base = 0; /* XXX just to keep gcc happy */ |
158 | for (;;) { |
159 | c = *fmt++; |
160 | if (c == 0) |
161 | return (nassigned); |
162 | if (isspace(c)) { |
163 | while (inr > 0 && isspace(*inp)) { |
164 | nread++; |
165 | inr--; |
166 | inp++; |
167 | } |
168 | continue; |
169 | } |
170 | if (c != '%') |
171 | goto literal; |
172 | width = 0; |
173 | flags = 0; |
174 | /* |
175 | * switch on the format. continue if done; |
176 | * break once format type is derived. |
177 | */ |
178 | again: c = *fmt++; |
179 | switch (c) { |
180 | case '%': |
181 | literal: |
182 | if (inr <= 0) |
183 | goto input_failure; |
184 | if (*inp != c) |
185 | goto match_failure; |
186 | inr--; |
187 | inp++; |
188 | nread++; |
189 | continue; |
190 | |
191 | case '*': |
192 | flags |= SUPPRESS; |
193 | goto again; |
194 | case 'l': |
195 | if (flags & LONG) { |
196 | flags &= ~LONG; |
197 | flags |= LONGLONG; |
198 | } else |
199 | flags |= LONG; |
200 | goto again; |
201 | case 'q': |
202 | flags |= LONGLONG; /* not quite */ |
203 | goto again; |
204 | case 'h': |
205 | if (flags & SHORT) { |
206 | flags &= ~SHORT; |
207 | flags |= SHORTSHORT; |
208 | } else |
209 | flags |= SHORT; |
210 | goto again; |
211 | |
212 | case '0': case '1': case '2': case '3': case '4': |
213 | case '5': case '6': case '7': case '8': case '9': |
214 | width = width * 10 + c - '0'; |
215 | goto again; |
216 | |
217 | /* |
218 | * Conversions. |
219 | */ |
220 | case 'd': |
221 | c = CT_INT; |
222 | base = 10; |
223 | break; |
224 | |
225 | case 'i': |
226 | c = CT_INT; |
227 | base = 0; |
228 | break; |
229 | |
230 | case 'o': |
231 | c = CT_INT; |
232 | flags |= UNSIGNED; |
233 | base = 8; |
234 | break; |
235 | |
236 | case 'u': |
237 | c = CT_INT; |
238 | flags |= UNSIGNED; |
239 | base = 10; |
240 | break; |
241 | |
242 | case 'X': |
243 | case 'x': |
244 | flags |= PFXOK; /* enable 0x prefixing */ |
245 | c = CT_INT; |
246 | flags |= UNSIGNED; |
247 | base = 16; |
248 | break; |
249 | |
250 | case 's': |
251 | c = CT_STRING; |
252 | break; |
253 | |
254 | case '[': |
255 | fmt = __sccl(ccltab, fmt); |
256 | flags |= NOSKIP; |
257 | c = CT_CCL; |
258 | break; |
259 | |
260 | case 'c': |
261 | flags |= NOSKIP; |
262 | c = CT_CHAR; |
263 | break; |
264 | |
265 | case 'p': /* pointer format is like hex */ |
266 | flags |= POINTER | PFXOK; |
267 | c = CT_INT; |
268 | flags |= UNSIGNED; |
269 | base = 16; |
270 | break; |
271 | |
272 | case 'n': |
273 | nconversions++; |
274 | if (flags & SUPPRESS) /* ??? */ |
275 | continue; |
276 | if (flags & SHORTSHORT) |
277 | *va_arg(ap, char *) = nread; |
278 | else if (flags & SHORT) |
279 | *va_arg(ap, short *) = nread; |
280 | else if (flags & LONG) |
281 | *va_arg(ap, long *) = nread; |
282 | else if (flags & LONGLONG) |
283 | *va_arg(ap, long long *) = nread; |
284 | else |
285 | *va_arg(ap, int *) = nread; |
286 | continue; |
287 | } |
288 | |
289 | /* |
290 | * We have a conversion that requires input. |
291 | */ |
292 | if (inr <= 0) |
293 | goto input_failure; |
294 | |
295 | /* |
296 | * Consume leading white space, except for formats |
297 | * that suppress this. |
298 | */ |
299 | if ((flags & NOSKIP) == 0) { |
300 | while (isspace(*inp)) { |
301 | nread++; |
302 | if (--inr > 0) |
303 | inp++; |
304 | else |
305 | goto input_failure; |
306 | } |
307 | /* |
308 | * Note that there is at least one character in |
309 | * the buffer, so conversions that do not set NOSKIP |
310 | * can no longer result in an input failure. |
311 | */ |
312 | } |
313 | |
314 | /* |
315 | * Do the conversion. |
316 | */ |
317 | switch (c) { |
318 | |
319 | case CT_CHAR: |
320 | /* scan arbitrary characters (sets NOSKIP) */ |
321 | if (width == 0) |
322 | width = 1; |
323 | if (flags & SUPPRESS) { |
324 | size_t sum = 0; |
325 | for (;;) { |
326 | if ((n = inr) < (int)width) { |
327 | sum += n; |
328 | width -= n; |
329 | inp += n; |
330 | if (sum == 0) |
331 | goto input_failure; |
332 | break; |
333 | } else { |
334 | sum += width; |
335 | inr -= width; |
336 | inp += width; |
337 | break; |
338 | } |
339 | } |
340 | nread += sum; |
341 | } else { |
342 | bcopy(inp, va_arg(ap, char *), width); |
343 | inr -= width; |
344 | inp += width; |
345 | nread += width; |
346 | nassigned++; |
347 | } |
348 | nconversions++; |
349 | break; |
350 | |
351 | case CT_CCL: |
352 | /* scan a (nonempty) character class (sets NOSKIP) */ |
353 | if (width == 0) |
354 | width = (size_t)~0; /* `infinity' */ |
355 | /* take only those things in the class */ |
356 | if (flags & SUPPRESS) { |
357 | n = 0; |
358 | while (ccltab[(unsigned char)*inp]) { |
359 | n++; |
360 | inr--; |
361 | inp++; |
362 | if (--width == 0) |
363 | break; |
364 | if (inr <= 0) { |
365 | if (n == 0) |
366 | goto input_failure; |
367 | break; |
368 | } |
369 | } |
370 | if (n == 0) |
371 | goto match_failure; |
372 | } else { |
373 | p0 = p = va_arg(ap, char *); |
374 | while (ccltab[(unsigned char)*inp]) { |
375 | inr--; |
376 | *p++ = *inp++; |
377 | if (--width == 0) |
378 | break; |
379 | if (inr <= 0) { |
380 | if (p == p0) |
381 | goto input_failure; |
382 | break; |
383 | } |
384 | } |
385 | n = p - p0; |
386 | if (n == 0) |
387 | goto match_failure; |
388 | *p = 0; |
389 | nassigned++; |
390 | } |
391 | nread += n; |
392 | nconversions++; |
393 | break; |
394 | |
395 | case CT_STRING: |
396 | /* like CCL, but zero-length string OK, & no NOSKIP */ |
397 | if (width == 0) |
398 | width = (size_t)~0; |
399 | if (flags & SUPPRESS) { |
400 | n = 0; |
401 | while (!isspace(*inp)) { |
402 | n++; |
403 | inr--; |
404 | inp++; |
405 | if (--width == 0) |
406 | break; |
407 | if (inr <= 0) |
408 | break; |
409 | } |
410 | nread += n; |
411 | } else { |
412 | p0 = p = va_arg(ap, char *); |
413 | while (!isspace(*inp)) { |
414 | inr--; |
415 | *p++ = *inp++; |
416 | if (--width == 0) |
417 | break; |
418 | if (inr <= 0) |
419 | break; |
420 | } |
421 | *p = 0; |
422 | nread += p - p0; |
423 | nassigned++; |
424 | } |
425 | nconversions++; |
426 | continue; |
427 | |
428 | case CT_INT: |
429 | /* scan an integer as if by the conversion function */ |
430 | #ifdef hardway |
431 | if (width == 0 || width > sizeof(buf) - 1) |
432 | width = sizeof(buf) - 1; |
433 | #else |
434 | /* size_t is unsigned, hence this optimisation */ |
435 | if (--width > sizeof(buf) - 2) |
436 | width = sizeof(buf) - 2; |
437 | width++; |
438 | #endif |
439 | flags |= SIGNOK | NDIGITS | NZDIGITS; |
440 | for (p = buf; width; width--) { |
441 | c = *inp; |
442 | /* |
443 | * Switch on the character; `goto ok' |
444 | * if we accept it as a part of number. |
445 | */ |
446 | switch (c) { |
447 | |
448 | /* |
449 | * The digit 0 is always legal, but is |
450 | * special. For %i conversions, if no |
451 | * digits (zero or nonzero) have been |
452 | * scanned (only signs), we will have |
453 | * base==0. In that case, we should set |
454 | * it to 8 and enable 0x prefixing. |
455 | * Also, if we have not scanned zero digits |
456 | * before this, do not turn off prefixing |
457 | * (someone else will turn it off if we |
458 | * have scanned any nonzero digits). |
459 | */ |
460 | case '0': |
461 | if (base == 0) { |
462 | base = 8; |
463 | flags |= PFXOK; |
464 | } |
465 | if (flags & NZDIGITS) |
466 | flags &= ~(SIGNOK|NZDIGITS|NDIGITS); |
467 | else |
468 | flags &= ~(SIGNOK|PFXOK|NDIGITS); |
469 | goto ok; |
470 | |
471 | /* 1 through 7 always legal */ |
472 | case '1': case '2': case '3': |
473 | case '4': case '5': case '6': case '7': |
474 | base = basefix[base]; |
475 | flags &= ~(SIGNOK | PFXOK | NDIGITS); |
476 | goto ok; |
477 | |
478 | /* digits 8 and 9 ok iff decimal or hex */ |
479 | case '8': case '9': |
480 | base = basefix[base]; |
481 | if (base <= 8) |
482 | break; /* not legal here */ |
483 | flags &= ~(SIGNOK | PFXOK | NDIGITS); |
484 | goto ok; |
485 | |
486 | /* letters ok iff hex */ |
487 | case 'A': case 'B': case 'C': |
488 | case 'D': case 'E': case 'F': |
489 | case 'a': case 'b': case 'c': |
490 | case 'd': case 'e': case 'f': |
491 | /* no need to fix base here */ |
492 | if (base <= 10) |
493 | break; /* not legal here */ |
494 | flags &= ~(SIGNOK | PFXOK | NDIGITS); |
495 | goto ok; |
496 | |
497 | /* sign ok only as first character */ |
498 | case '+': case '-': |
499 | if (flags & SIGNOK) { |
500 | flags &= ~SIGNOK; |
501 | goto ok; |
502 | } |
503 | break; |
504 | |
505 | /* x ok iff flag still set & 2nd char */ |
506 | case 'x': case 'X': |
507 | if (flags & PFXOK && p == buf + 1) { |
508 | base = 16; /* if %i */ |
509 | flags &= ~PFXOK; |
510 | goto ok; |
511 | } |
512 | break; |
513 | } |
514 | |
515 | /* |
516 | * If we got here, c is not a legal character |
517 | * for a number. Stop accumulating digits. |
518 | */ |
519 | break; |
520 | ok: |
521 | /* |
522 | * c is legal: store it and look at the next. |
523 | */ |
524 | *p++ = c; |
525 | if (--inr > 0) |
526 | inp++; |
527 | else |
528 | break; /* end of input */ |
529 | } |
530 | /* |
531 | * If we had only a sign, it is no good; push |
532 | * back the sign. If the number ends in `x', |
533 | * it was [sign] '0' 'x', so push back the x |
534 | * and treat it as [sign] '0'. |
535 | */ |
536 | if (flags & NDIGITS) { |
537 | if (p > buf) { |
538 | inp--; |
539 | inr++; |
540 | } |
541 | goto match_failure; |
542 | } |
543 | c = ((u_char *)p)[-1]; |
544 | if (c == 'x' || c == 'X') { |
545 | --p; |
546 | inp--; |
547 | inr++; |
548 | } |
549 | if ((flags & SUPPRESS) == 0) { |
550 | u_quad_t res; |
551 | |
552 | *p = 0; |
553 | if ((flags & UNSIGNED) == 0) |
554 | res = strtoq(buf, (char **)NULL, base); |
555 | else |
556 | res = strtouq(buf, (char **)NULL, base); |
557 | if (flags & POINTER) |
558 | *va_arg(ap, void **) = |
559 | (void *)(uintptr_t)res; |
560 | else if (flags & SHORTSHORT) |
561 | *va_arg(ap, char *) = res; |
562 | else if (flags & SHORT) |
563 | *va_arg(ap, short *) = res; |
564 | else if (flags & LONG) |
565 | *va_arg(ap, long *) = res; |
566 | else if (flags & LONGLONG) |
567 | *va_arg(ap, long long *) = res; |
568 | else |
569 | *va_arg(ap, int *) = res; |
570 | nassigned++; |
571 | } |
572 | nread += p - buf; |
573 | nconversions++; |
574 | break; |
575 | |
576 | } |
577 | } |
578 | input_failure: |
579 | return (nconversions != 0 ? nassigned : -1); |
580 | match_failure: |
581 | return (nassigned); |
582 | } |
583 | |
584 | /* |
585 | * Fill in the given table from the scanset at the given format |
586 | * (just after `['). Return a pointer to the character past the |
587 | * closing `]'. The table has a 1 wherever characters should be |
588 | * considered part of the scanset. |
589 | */ |
590 | static const u_char * |
591 | __sccl(char *tab, const u_char *fmt) |
592 | { |
593 | int c, n, v; |
594 | |
595 | /* first `clear' the whole table */ |
596 | c = *fmt++; /* first char hat => negated scanset */ |
597 | if (c == '^') { |
598 | v = 1; /* default => accept */ |
599 | c = *fmt++; /* get new first char */ |
600 | } else |
601 | v = 0; /* default => reject */ |
602 | |
603 | /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ |
604 | (void) memset(tab, v, 256); |
605 | |
606 | if (c == 0) |
607 | return (fmt - 1);/* format ended before closing ] */ |
608 | |
609 | /* |
610 | * Now set the entries corresponding to the actual scanset |
611 | * to the opposite of the above. |
612 | * |
613 | * The first character may be ']' (or '-') without being special; |
614 | * the last character may be '-'. |
615 | */ |
616 | v = 1 - v; |
617 | for (;;) { |
618 | tab[c] = v; /* take character c */ |
619 | doswitch: |
620 | n = *fmt++; /* and examine the next */ |
621 | switch (n) { |
622 | |
623 | case 0: /* format ended too soon */ |
624 | return (fmt - 1); |
625 | |
626 | case '-': |
627 | /* |
628 | * A scanset of the form |
629 | * [01+-] |
630 | * is defined as `the digit 0, the digit 1, |
631 | * the character +, the character -', but |
632 | * the effect of a scanset such as |
633 | * [a-zA-Z0-9] |
634 | * is implementation defined. The V7 Unix |
635 | * scanf treats `a-z' as `the letters a through |
636 | * z', but treats `a-a' as `the letter a, the |
637 | * character -, and the letter a'. |
638 | * |
639 | * For compatibility, the `-' is not considerd |
640 | * to define a range if the character following |
641 | * it is either a close bracket (required by ANSI) |
642 | * or is not numerically greater than the character |
643 | * we just stored in the table (c). |
644 | */ |
645 | n = *fmt; |
646 | if (n == ']' || n < c) { |
647 | c = '-'; |
648 | break; /* resume the for(;;) */ |
649 | } |
650 | fmt++; |
651 | /* fill in the range */ |
652 | do { |
653 | tab[++c] = v; |
654 | } while (c < n); |
655 | c = n; |
656 | /* |
657 | * Alas, the V7 Unix scanf also treats formats |
658 | * such as [a-c-e] as `the letters a through e'. |
659 | * This too is permitted by the standard.... |
660 | */ |
661 | goto doswitch; |
662 | |
663 | case ']': /* end of scanset */ |
664 | return (fmt); |
665 | |
666 | default: /* just another character */ |
667 | c = n; |
668 | break; |
669 | } |
670 | } |
671 | /* NOTREACHED */ |
672 | } |
673 | |