1 | /* |
2 | * Copyright (c) 2004-2016 Apple Computer, Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /*- |
29 | * Copyright (c) 1990, 1993 |
30 | * The Regents of the University of California. All rights reserved. |
31 | * |
32 | * This code is derived from software contributed to Berkeley by |
33 | * Chris Torek. |
34 | * |
35 | * Redistribution and use in source and binary forms, with or without |
36 | * modification, are permitted provided that the following conditions |
37 | * are met: |
38 | * 1. Redistributions of source code must retain the above copyright |
39 | * notice, this list of conditions and the following disclaimer. |
40 | * 2. Redistributions in binary form must reproduce the above copyright |
41 | * notice, this list of conditions and the following disclaimer in the |
42 | * documentation and/or other materials provided with the distribution. |
43 | * 3. All advertising materials mentioning features or use of this software |
44 | * must display the following acknowledgement: |
45 | * This product includes software developed by the University of |
46 | * California, Berkeley and its contributors. |
47 | * 4. Neither the name of the University nor the names of its contributors |
48 | * may be used to endorse or promote products derived from this software |
49 | * without specific prior written permission. |
50 | * |
51 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
52 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
53 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
54 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
55 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
56 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
57 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
58 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
59 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
60 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
61 | * SUCH DAMAGE. |
62 | */ |
63 | |
64 | #include <stdarg.h> |
65 | #include <stddef.h> |
66 | #include <string.h> |
67 | #include <sys/cdefs.h> |
68 | #include <sys/param.h> |
69 | |
70 | quad_t strtoq(const char *, char **, int); |
71 | u_quad_t strtouq(const char *, char **, int); |
72 | |
73 | static inline int |
74 | isspace(char c) |
75 | { |
76 | return c == ' ' || c == '\t' || c == '\n' || c == '\12'; |
77 | } |
78 | |
79 | #define BUF 32 /* Maximum length of numeric string. */ |
80 | |
81 | /* |
82 | * Flags used during conversion. |
83 | */ |
84 | #define LONG 0x01 /* l: long or double */ |
85 | #define SHORT 0x04 /* h: short */ |
86 | #define SUPPRESS 0x08 /* *: suppress assignment */ |
87 | #define POINTER 0x10 /* p: void * (as hex) */ |
88 | #define NOSKIP 0x20 /* [ or c: do not skip blanks */ |
89 | #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ |
90 | #define SHORTSHORT 0x4000 /* hh: char */ |
91 | #define UNSIGNED 0x8000 /* %[oupxX] conversions */ |
92 | |
93 | /* |
94 | * The following are used in numeric conversions only: |
95 | * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; |
96 | * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. |
97 | */ |
98 | #define SIGNOK 0x40 /* +/- is (still) legal */ |
99 | #define NDIGITS 0x80 /* no digits detected */ |
100 | |
101 | #define DPTOK 0x100 /* (float) decimal point is still legal */ |
102 | #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ |
103 | |
104 | #define PFXOK 0x100 /* 0x prefix is (still) legal */ |
105 | #define NZDIGITS 0x200 /* no zero digits detected */ |
106 | |
107 | /* |
108 | * Conversion types. |
109 | */ |
110 | #define CT_CHAR 0 /* %c conversion */ |
111 | #define CT_CCL 1 /* %[...] conversion */ |
112 | #define CT_STRING 2 /* %s conversion */ |
113 | #define CT_INT 3 /* %[dioupxX] conversion */ |
114 | |
115 | static const u_char *__sccl(char *, const u_char *); |
116 | |
117 | int sscanf(const char *, const char *, ...); |
118 | int vsscanf(const char *, char const *, va_list); |
119 | |
120 | int |
121 | sscanf(const char *ibuf, const char *fmt, ...) |
122 | { |
123 | va_list ap; |
124 | int ret; |
125 | |
126 | va_start(ap, fmt); |
127 | ret = vsscanf(ibuf, fmt, ap); |
128 | va_end(ap); |
129 | return ret; |
130 | } |
131 | |
132 | int |
133 | vsscanf(const char *inp, char const *fmt0, va_list ap) |
134 | { |
135 | ssize_t inr; |
136 | const u_char *fmt = (const u_char *)fmt0; |
137 | ssize_t width; /* field width, or 0 */ |
138 | char *p; /* points into all kinds of strings */ |
139 | int flags; /* flags as defined above */ |
140 | char *p0; /* saves original value of p when necessary */ |
141 | int nassigned = 0; /* number of fields assigned */ |
142 | int nconversions = 0; /* number of conversions */ |
143 | int nread = 0; /* number of characters consumed from fp */ |
144 | int base = 0; /* base argument to conversion function */ |
145 | char ccltab[256]; /* character class table for %[...] */ |
146 | char buf[BUF]; /* buffer for numeric conversions */ |
147 | |
148 | /* `basefix' is used to avoid `if' tests in the integer scanner */ |
149 | static short basefix[17] = |
150 | { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; |
151 | |
152 | inr = (ssize_t)strlen(s: inp); |
153 | |
154 | for (;;) { |
155 | char c = (char)*fmt++; /* character from format, or conversion */ |
156 | if (c == 0) { |
157 | return nassigned; |
158 | } |
159 | if (isspace(c)) { |
160 | while (inr > 0 && isspace(c: *inp)) { |
161 | nread++; |
162 | inr--; |
163 | inp++; |
164 | } |
165 | continue; |
166 | } |
167 | if (c != '%') { |
168 | goto literal; |
169 | } |
170 | width = 0; |
171 | flags = 0; |
172 | /* |
173 | * switch on the format. continue if done; |
174 | * break once format type is derived. |
175 | */ |
176 | again: |
177 | c = (char)*fmt++; |
178 | switch (c) { |
179 | case '%': |
180 | literal: |
181 | if (inr <= 0) { |
182 | goto input_failure; |
183 | } |
184 | if (*inp != c) { |
185 | goto match_failure; |
186 | } |
187 | inr--; |
188 | inp++; |
189 | nread++; |
190 | continue; |
191 | |
192 | case '*': |
193 | flags |= SUPPRESS; |
194 | goto again; |
195 | case 'l': |
196 | if (flags & LONG) { |
197 | flags &= ~LONG; |
198 | flags |= LONGLONG; |
199 | } else { |
200 | flags |= LONG; |
201 | } |
202 | goto again; |
203 | case 'q': |
204 | flags |= LONGLONG; /* not quite */ |
205 | goto again; |
206 | case 'h': |
207 | if (flags & SHORT) { |
208 | flags &= ~SHORT; |
209 | flags |= SHORTSHORT; |
210 | } else { |
211 | flags |= SHORT; |
212 | } |
213 | goto again; |
214 | |
215 | case '0': case '1': case '2': case '3': case '4': |
216 | case '5': case '6': case '7': case '8': case '9': |
217 | width = width * 10 + c - '0'; |
218 | goto again; |
219 | |
220 | /* |
221 | * Conversions. |
222 | */ |
223 | case 'd': |
224 | c = CT_INT; |
225 | base = 10; |
226 | break; |
227 | |
228 | case 'i': |
229 | c = CT_INT; |
230 | base = 0; |
231 | break; |
232 | |
233 | case 'o': |
234 | c = CT_INT; |
235 | flags |= UNSIGNED; |
236 | base = 8; |
237 | break; |
238 | |
239 | case 'u': |
240 | c = CT_INT; |
241 | flags |= UNSIGNED; |
242 | base = 10; |
243 | break; |
244 | |
245 | case 'X': |
246 | case 'x': |
247 | flags |= PFXOK; /* enable 0x prefixing */ |
248 | c = CT_INT; |
249 | flags |= UNSIGNED; |
250 | base = 16; |
251 | break; |
252 | |
253 | case 's': |
254 | c = CT_STRING; |
255 | break; |
256 | |
257 | case '[': |
258 | fmt = __sccl(ccltab, fmt); |
259 | flags |= NOSKIP; |
260 | c = CT_CCL; |
261 | break; |
262 | |
263 | case 'c': |
264 | flags |= NOSKIP; |
265 | c = CT_CHAR; |
266 | break; |
267 | |
268 | case 'p': /* pointer format is like hex */ |
269 | flags |= POINTER | PFXOK; |
270 | c = CT_INT; |
271 | flags |= UNSIGNED; |
272 | base = 16; |
273 | break; |
274 | |
275 | case 'n': |
276 | nconversions++; |
277 | if (flags & SUPPRESS) { /* ??? */ |
278 | continue; |
279 | } |
280 | if (flags & SHORTSHORT) { |
281 | *va_arg(ap, char *) = (char)nread; |
282 | } else if (flags & SHORT) { |
283 | *va_arg(ap, short *) = (short)nread; |
284 | } else if (flags & LONG) { |
285 | *va_arg(ap, long *) = (long)nread; |
286 | } else if (flags & LONGLONG) { |
287 | *va_arg(ap, long long *) = (long long)nread; |
288 | } else { |
289 | *va_arg(ap, int *) = (int)nread; |
290 | } |
291 | continue; |
292 | } |
293 | |
294 | /* |
295 | * We have a conversion that requires input. |
296 | */ |
297 | if (inr <= 0) { |
298 | goto input_failure; |
299 | } |
300 | |
301 | /* |
302 | * Consume leading white space, except for formats |
303 | * that suppress this. |
304 | */ |
305 | if ((flags & NOSKIP) == 0) { |
306 | while (isspace(c: *inp)) { |
307 | nread++; |
308 | if (--inr > 0) { |
309 | inp++; |
310 | } else { |
311 | goto input_failure; |
312 | } |
313 | } |
314 | /* |
315 | * Note that there is at least one character in |
316 | * the buffer, so conversions that do not set NOSKIP |
317 | * can no longer result in an input failure. |
318 | */ |
319 | } |
320 | |
321 | /* |
322 | * Do the conversion. |
323 | */ |
324 | switch (c) { |
325 | case CT_CHAR: |
326 | /* scan arbitrary characters (sets NOSKIP) */ |
327 | if (width == 0) { |
328 | width = 1; |
329 | } |
330 | if (flags & SUPPRESS) { |
331 | size_t sum = 0; |
332 | for (;;) { |
333 | ssize_t n = inr; |
334 | if (n < width) { |
335 | sum += (size_t)n; |
336 | width -= n; |
337 | inp += n; |
338 | if (sum == 0) { |
339 | goto input_failure; |
340 | } |
341 | break; |
342 | } else { |
343 | sum += (size_t)width; |
344 | inr -= width; |
345 | inp += width; |
346 | break; |
347 | } |
348 | } |
349 | nread += sum; |
350 | } else { |
351 | bcopy(src: inp, va_arg(ap, char *), n: width); |
352 | inr -= width; |
353 | inp += width; |
354 | nread += width; |
355 | nassigned++; |
356 | } |
357 | nconversions++; |
358 | break; |
359 | |
360 | case CT_CCL: { |
361 | /* scan a (nonempty) character class (sets NOSKIP) */ |
362 | if (width == 0) { |
363 | width = SSIZE_MAX; /* `infinity' */ |
364 | } |
365 | /* take only those things in the class */ |
366 | ptrdiff_t n; |
367 | if (flags & SUPPRESS) { |
368 | n = 0; |
369 | while (ccltab[(unsigned char)*inp]) { |
370 | n++; |
371 | inr--; |
372 | inp++; |
373 | if (--width == 0) { |
374 | break; |
375 | } |
376 | if (inr <= 0) { |
377 | if (n == 0) { |
378 | goto input_failure; |
379 | } |
380 | break; |
381 | } |
382 | } |
383 | if (n == 0) { |
384 | goto match_failure; |
385 | } |
386 | } else { |
387 | p0 = p = va_arg(ap, char *); |
388 | while (ccltab[(unsigned char)*inp]) { |
389 | inr--; |
390 | *p++ = *inp++; |
391 | if (--width == 0) { |
392 | break; |
393 | } |
394 | if (inr <= 0) { |
395 | if (p == p0) { |
396 | goto input_failure; |
397 | } |
398 | break; |
399 | } |
400 | } |
401 | n = p - p0; |
402 | if (n == 0) { |
403 | goto match_failure; |
404 | } |
405 | *p = 0; |
406 | nassigned++; |
407 | } |
408 | nread += n; |
409 | nconversions++; |
410 | break; |
411 | } |
412 | |
413 | case CT_STRING: |
414 | /* like CCL, but zero-length string OK, & no NOSKIP */ |
415 | if (width == 0) { |
416 | width = SSIZE_MAX; |
417 | } |
418 | if (flags & SUPPRESS) { |
419 | size_t n = 0; |
420 | while (!isspace(c: *inp)) { |
421 | n++; |
422 | inr--; |
423 | inp++; |
424 | if (--width == 0) { |
425 | break; |
426 | } |
427 | if (inr <= 0) { |
428 | break; |
429 | } |
430 | } |
431 | nread += n; |
432 | } else { |
433 | p0 = p = va_arg(ap, char *); |
434 | while (!isspace(c: *inp)) { |
435 | inr--; |
436 | *p++ = *inp++; |
437 | if (--width == 0) { |
438 | break; |
439 | } |
440 | if (inr <= 0) { |
441 | break; |
442 | } |
443 | } |
444 | *p = 0; |
445 | nread += p - p0; |
446 | nassigned++; |
447 | } |
448 | nconversions++; |
449 | continue; |
450 | |
451 | case CT_INT: |
452 | /* scan an integer as if by the conversion function */ |
453 | if (width <= 0 || width > (ssize_t)(sizeof(buf) - 1)) { |
454 | width = sizeof(buf) - 1; |
455 | } |
456 | flags |= SIGNOK | NDIGITS | NZDIGITS; |
457 | for (p = buf; width; width--) { |
458 | c = *inp; |
459 | /* |
460 | * Switch on the character; `goto ok' |
461 | * if we accept it as a part of number. |
462 | */ |
463 | switch (c) { |
464 | /* |
465 | * The digit 0 is always legal, but is |
466 | * special. For %i conversions, if no |
467 | * digits (zero or nonzero) have been |
468 | * scanned (only signs), we will have |
469 | * base==0. In that case, we should set |
470 | * it to 8 and enable 0x prefixing. |
471 | * Also, if we have not scanned zero digits |
472 | * before this, do not turn off prefixing |
473 | * (someone else will turn it off if we |
474 | * have scanned any nonzero digits). |
475 | */ |
476 | case '0': |
477 | if (base == 0) { |
478 | base = 8; |
479 | flags |= PFXOK; |
480 | } |
481 | if (flags & NZDIGITS) { |
482 | flags &= ~(SIGNOK | NZDIGITS | NDIGITS); |
483 | } else { |
484 | flags &= ~(SIGNOK | PFXOK | NDIGITS); |
485 | } |
486 | goto ok; |
487 | |
488 | /* 1 through 7 always legal */ |
489 | case '1': case '2': case '3': |
490 | case '4': case '5': case '6': case '7': |
491 | base = basefix[base]; |
492 | flags &= ~(SIGNOK | PFXOK | NDIGITS); |
493 | goto ok; |
494 | |
495 | /* digits 8 and 9 ok iff decimal or hex */ |
496 | case '8': case '9': |
497 | base = basefix[base]; |
498 | if (base <= 8) { |
499 | break; /* not legal here */ |
500 | } |
501 | flags &= ~(SIGNOK | PFXOK | NDIGITS); |
502 | goto ok; |
503 | |
504 | /* letters ok iff hex */ |
505 | case 'A': case 'B': case 'C': |
506 | case 'D': case 'E': case 'F': |
507 | case 'a': case 'b': case 'c': |
508 | case 'd': case 'e': case 'f': |
509 | /* no need to fix base here */ |
510 | if (base <= 10) { |
511 | break; /* not legal here */ |
512 | } |
513 | flags &= ~(SIGNOK | PFXOK | NDIGITS); |
514 | goto ok; |
515 | |
516 | /* sign ok only as first character */ |
517 | case '+': case '-': |
518 | if (flags & SIGNOK) { |
519 | flags &= ~SIGNOK; |
520 | goto ok; |
521 | } |
522 | break; |
523 | |
524 | /* x ok iff flag still set & 2nd char */ |
525 | case 'x': case 'X': |
526 | if (flags & PFXOK && p == buf + 1) { |
527 | base = 16; /* if %i */ |
528 | flags &= ~PFXOK; |
529 | goto ok; |
530 | } |
531 | break; |
532 | } |
533 | |
534 | /* |
535 | * If we got here, c is not a legal character |
536 | * for a number. Stop accumulating digits. |
537 | */ |
538 | break; |
539 | ok: |
540 | /* |
541 | * c is legal: store it and look at the next. |
542 | */ |
543 | *p++ = c; |
544 | if (--inr > 0) { |
545 | inp++; |
546 | } else { |
547 | break; /* end of input */ |
548 | } |
549 | } |
550 | /* |
551 | * If we had only a sign, it is no good; push |
552 | * back the sign. If the number ends in `x', |
553 | * it was [sign] '0' 'x', so push back the x |
554 | * and treat it as [sign] '0'. |
555 | */ |
556 | if (flags & NDIGITS) { |
557 | if (p > buf) { |
558 | inp--; |
559 | inr++; |
560 | } |
561 | goto match_failure; |
562 | } |
563 | c = p[-1]; |
564 | if (c == 'x' || c == 'X') { |
565 | --p; |
566 | inp--; |
567 | inr++; |
568 | } |
569 | if ((flags & SUPPRESS) == 0) { |
570 | u_quad_t res; |
571 | |
572 | *p = 0; |
573 | if ((flags & UNSIGNED) == 0) { |
574 | res = (u_quad_t)strtoq(buf, (char **)NULL, base); |
575 | } else { |
576 | res = strtouq(buf, (char **)NULL, base); |
577 | } |
578 | if (flags & POINTER) { |
579 | *va_arg(ap, void **) = |
580 | (void *)(uintptr_t)res; |
581 | } else if (flags & SHORTSHORT) { |
582 | *va_arg(ap, char *) = (char)res; |
583 | } else if (flags & SHORT) { |
584 | *va_arg(ap, short *) = (short)res; |
585 | } else if (flags & LONG) { |
586 | *va_arg(ap, long *) = (long)res; |
587 | } else if (flags & LONGLONG) { |
588 | *va_arg(ap, long long *) = (long long)res; |
589 | } else { |
590 | *va_arg(ap, int *) = (int)res; |
591 | } |
592 | nassigned++; |
593 | } |
594 | nread += p - buf; |
595 | nconversions++; |
596 | break; |
597 | } |
598 | } |
599 | input_failure: |
600 | return nconversions != 0 ? nassigned : -1; |
601 | match_failure: |
602 | return nassigned; |
603 | } |
604 | |
605 | /* |
606 | * Fill in the given table from the scanset at the given format |
607 | * (just after `['). Return a pointer to the character past the |
608 | * closing `]'. The table has a 1 wherever characters should be |
609 | * considered part of the scanset. |
610 | */ |
611 | static const u_char * |
612 | __sccl(char *tab, const u_char *fmt) |
613 | { |
614 | char v; |
615 | |
616 | /* first `clear' the whole table */ |
617 | int c = *fmt++; /* first char hat => negated scanset */ |
618 | if (c == '^') { |
619 | v = 1; /* default => accept */ |
620 | c = *fmt++; /* get new first char */ |
621 | } else { |
622 | v = 0; /* default => reject */ |
623 | } |
624 | /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ |
625 | (void) memset(s: tab, c: v, n: 256); |
626 | |
627 | if (c == 0) { |
628 | return fmt - 1;/* format ended before closing ] */ |
629 | } |
630 | /* |
631 | * Now set the entries corresponding to the actual scanset |
632 | * to the opposite of the above. |
633 | * |
634 | * The first character may be ']' (or '-') without being special; |
635 | * the last character may be '-'. |
636 | */ |
637 | v = 1 - v; |
638 | for (;;) { |
639 | int n; |
640 | tab[c] = v; /* take character c */ |
641 | doswitch: |
642 | n = *fmt++; |
643 | switch (n) { |
644 | case 0: /* format ended too soon */ |
645 | return fmt - 1; |
646 | |
647 | case '-': |
648 | /* |
649 | * A scanset of the form |
650 | * [01+-] |
651 | * is defined as `the digit 0, the digit 1, |
652 | * the character +, the character -', but |
653 | * the effect of a scanset such as |
654 | * [a-zA-Z0-9] |
655 | * is implementation defined. The V7 Unix |
656 | * scanf treats `a-z' as `the letters a through |
657 | * z', but treats `a-a' as `the letter a, the |
658 | * character -, and the letter a'. |
659 | * |
660 | * For compatibility, the `-' is not considerd |
661 | * to define a range if the character following |
662 | * it is either a close bracket (required by ANSI) |
663 | * or is not numerically greater than the character |
664 | * we just stored in the table (c). |
665 | */ |
666 | n = *fmt; |
667 | if (n == ']' || n < c) { |
668 | c = '-'; |
669 | break; /* resume the for(;;) */ |
670 | } |
671 | fmt++; |
672 | /* fill in the range */ |
673 | do { |
674 | tab[++c] = v; |
675 | } while (c < n); |
676 | c = n; |
677 | /* |
678 | * Alas, the V7 Unix scanf also treats formats |
679 | * such as [a-c-e] as `the letters a through e'. |
680 | * This too is permitted by the standard.... |
681 | */ |
682 | goto doswitch; |
683 | |
684 | case ']': /* end of scanset */ |
685 | return fmt; |
686 | |
687 | default: /* just another character */ |
688 | c = n; |
689 | break; |
690 | } |
691 | } |
692 | /* NOTREACHED */ |
693 | } |
694 | |