1/*
2 * Copyright (c) 2004-2016 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*-
29 * Copyright (c) 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from software contributed to Berkeley by
33 * Chris Torek.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 */
63
64#include <stdarg.h>
65#include <stddef.h>
66#include <string.h>
67#include <sys/cdefs.h>
68#include <sys/param.h>
69
70quad_t strtoq(const char *, char **, int);
71u_quad_t strtouq(const char *, char **, int);
72
73static inline int
74isspace(char c)
75{
76 return c == ' ' || c == '\t' || c == '\n' || c == '\12';
77}
78
79#define BUF 32 /* Maximum length of numeric string. */
80
81/*
82 * Flags used during conversion.
83 */
84#define LONG 0x01 /* l: long or double */
85#define SHORT 0x04 /* h: short */
86#define SUPPRESS 0x08 /* *: suppress assignment */
87#define POINTER 0x10 /* p: void * (as hex) */
88#define NOSKIP 0x20 /* [ or c: do not skip blanks */
89#define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */
90#define SHORTSHORT 0x4000 /* hh: char */
91#define UNSIGNED 0x8000 /* %[oupxX] conversions */
92
93/*
94 * The following are used in numeric conversions only:
95 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
96 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
97 */
98#define SIGNOK 0x40 /* +/- is (still) legal */
99#define NDIGITS 0x80 /* no digits detected */
100
101#define DPTOK 0x100 /* (float) decimal point is still legal */
102#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
103
104#define PFXOK 0x100 /* 0x prefix is (still) legal */
105#define NZDIGITS 0x200 /* no zero digits detected */
106
107/*
108 * Conversion types.
109 */
110#define CT_CHAR 0 /* %c conversion */
111#define CT_CCL 1 /* %[...] conversion */
112#define CT_STRING 2 /* %s conversion */
113#define CT_INT 3 /* %[dioupxX] conversion */
114
115static const u_char *__sccl(char *, const u_char *);
116
117int sscanf(const char *, const char *, ...);
118int vsscanf(const char *, char const *, va_list);
119
120int
121sscanf(const char *ibuf, const char *fmt, ...)
122{
123 va_list ap;
124 int ret;
125
126 va_start(ap, fmt);
127 ret = vsscanf(ibuf, fmt, ap);
128 va_end(ap);
129 return ret;
130}
131
132int
133vsscanf(const char *inp, char const *fmt0, va_list ap)
134{
135 ssize_t inr;
136 const u_char *fmt = (const u_char *)fmt0;
137 ssize_t width; /* field width, or 0 */
138 char *p; /* points into all kinds of strings */
139 int flags; /* flags as defined above */
140 char *p0; /* saves original value of p when necessary */
141 int nassigned = 0; /* number of fields assigned */
142 int nconversions = 0; /* number of conversions */
143 int nread = 0; /* number of characters consumed from fp */
144 int base = 0; /* base argument to conversion function */
145 char ccltab[256]; /* character class table for %[...] */
146 char buf[BUF]; /* buffer for numeric conversions */
147
148 /* `basefix' is used to avoid `if' tests in the integer scanner */
149 static short basefix[17] =
150 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
151
152 inr = (ssize_t)strlen(s: inp);
153
154 for (;;) {
155 char c = (char)*fmt++; /* character from format, or conversion */
156 if (c == 0) {
157 return nassigned;
158 }
159 if (isspace(c)) {
160 while (inr > 0 && isspace(c: *inp)) {
161 nread++;
162 inr--;
163 inp++;
164 }
165 continue;
166 }
167 if (c != '%') {
168 goto literal;
169 }
170 width = 0;
171 flags = 0;
172 /*
173 * switch on the format. continue if done;
174 * break once format type is derived.
175 */
176again:
177 c = (char)*fmt++;
178 switch (c) {
179 case '%':
180literal:
181 if (inr <= 0) {
182 goto input_failure;
183 }
184 if (*inp != c) {
185 goto match_failure;
186 }
187 inr--;
188 inp++;
189 nread++;
190 continue;
191
192 case '*':
193 flags |= SUPPRESS;
194 goto again;
195 case 'l':
196 if (flags & LONG) {
197 flags &= ~LONG;
198 flags |= LONGLONG;
199 } else {
200 flags |= LONG;
201 }
202 goto again;
203 case 'q':
204 flags |= LONGLONG; /* not quite */
205 goto again;
206 case 'h':
207 if (flags & SHORT) {
208 flags &= ~SHORT;
209 flags |= SHORTSHORT;
210 } else {
211 flags |= SHORT;
212 }
213 goto again;
214
215 case '0': case '1': case '2': case '3': case '4':
216 case '5': case '6': case '7': case '8': case '9':
217 width = width * 10 + c - '0';
218 goto again;
219
220 /*
221 * Conversions.
222 */
223 case 'd':
224 c = CT_INT;
225 base = 10;
226 break;
227
228 case 'i':
229 c = CT_INT;
230 base = 0;
231 break;
232
233 case 'o':
234 c = CT_INT;
235 flags |= UNSIGNED;
236 base = 8;
237 break;
238
239 case 'u':
240 c = CT_INT;
241 flags |= UNSIGNED;
242 base = 10;
243 break;
244
245 case 'X':
246 case 'x':
247 flags |= PFXOK; /* enable 0x prefixing */
248 c = CT_INT;
249 flags |= UNSIGNED;
250 base = 16;
251 break;
252
253 case 's':
254 c = CT_STRING;
255 break;
256
257 case '[':
258 fmt = __sccl(ccltab, fmt);
259 flags |= NOSKIP;
260 c = CT_CCL;
261 break;
262
263 case 'c':
264 flags |= NOSKIP;
265 c = CT_CHAR;
266 break;
267
268 case 'p': /* pointer format is like hex */
269 flags |= POINTER | PFXOK;
270 c = CT_INT;
271 flags |= UNSIGNED;
272 base = 16;
273 break;
274
275 case 'n':
276 nconversions++;
277 if (flags & SUPPRESS) { /* ??? */
278 continue;
279 }
280 if (flags & SHORTSHORT) {
281 *va_arg(ap, char *) = (char)nread;
282 } else if (flags & SHORT) {
283 *va_arg(ap, short *) = (short)nread;
284 } else if (flags & LONG) {
285 *va_arg(ap, long *) = (long)nread;
286 } else if (flags & LONGLONG) {
287 *va_arg(ap, long long *) = (long long)nread;
288 } else {
289 *va_arg(ap, int *) = (int)nread;
290 }
291 continue;
292 }
293
294 /*
295 * We have a conversion that requires input.
296 */
297 if (inr <= 0) {
298 goto input_failure;
299 }
300
301 /*
302 * Consume leading white space, except for formats
303 * that suppress this.
304 */
305 if ((flags & NOSKIP) == 0) {
306 while (isspace(c: *inp)) {
307 nread++;
308 if (--inr > 0) {
309 inp++;
310 } else {
311 goto input_failure;
312 }
313 }
314 /*
315 * Note that there is at least one character in
316 * the buffer, so conversions that do not set NOSKIP
317 * can no longer result in an input failure.
318 */
319 }
320
321 /*
322 * Do the conversion.
323 */
324 switch (c) {
325 case CT_CHAR:
326 /* scan arbitrary characters (sets NOSKIP) */
327 if (width == 0) {
328 width = 1;
329 }
330 if (flags & SUPPRESS) {
331 size_t sum = 0;
332 for (;;) {
333 ssize_t n = inr;
334 if (n < width) {
335 sum += (size_t)n;
336 width -= n;
337 inp += n;
338 if (sum == 0) {
339 goto input_failure;
340 }
341 break;
342 } else {
343 sum += (size_t)width;
344 inr -= width;
345 inp += width;
346 break;
347 }
348 }
349 nread += sum;
350 } else {
351 bcopy(src: inp, va_arg(ap, char *), n: width);
352 inr -= width;
353 inp += width;
354 nread += width;
355 nassigned++;
356 }
357 nconversions++;
358 break;
359
360 case CT_CCL: {
361 /* scan a (nonempty) character class (sets NOSKIP) */
362 if (width == 0) {
363 width = SSIZE_MAX; /* `infinity' */
364 }
365 /* take only those things in the class */
366 ptrdiff_t n;
367 if (flags & SUPPRESS) {
368 n = 0;
369 while (ccltab[(unsigned char)*inp]) {
370 n++;
371 inr--;
372 inp++;
373 if (--width == 0) {
374 break;
375 }
376 if (inr <= 0) {
377 if (n == 0) {
378 goto input_failure;
379 }
380 break;
381 }
382 }
383 if (n == 0) {
384 goto match_failure;
385 }
386 } else {
387 p0 = p = va_arg(ap, char *);
388 while (ccltab[(unsigned char)*inp]) {
389 inr--;
390 *p++ = *inp++;
391 if (--width == 0) {
392 break;
393 }
394 if (inr <= 0) {
395 if (p == p0) {
396 goto input_failure;
397 }
398 break;
399 }
400 }
401 n = p - p0;
402 if (n == 0) {
403 goto match_failure;
404 }
405 *p = 0;
406 nassigned++;
407 }
408 nread += n;
409 nconversions++;
410 break;
411 }
412
413 case CT_STRING:
414 /* like CCL, but zero-length string OK, & no NOSKIP */
415 if (width == 0) {
416 width = SSIZE_MAX;
417 }
418 if (flags & SUPPRESS) {
419 size_t n = 0;
420 while (!isspace(c: *inp)) {
421 n++;
422 inr--;
423 inp++;
424 if (--width == 0) {
425 break;
426 }
427 if (inr <= 0) {
428 break;
429 }
430 }
431 nread += n;
432 } else {
433 p0 = p = va_arg(ap, char *);
434 while (!isspace(c: *inp)) {
435 inr--;
436 *p++ = *inp++;
437 if (--width == 0) {
438 break;
439 }
440 if (inr <= 0) {
441 break;
442 }
443 }
444 *p = 0;
445 nread += p - p0;
446 nassigned++;
447 }
448 nconversions++;
449 continue;
450
451 case CT_INT:
452 /* scan an integer as if by the conversion function */
453 if (width <= 0 || width > (ssize_t)(sizeof(buf) - 1)) {
454 width = sizeof(buf) - 1;
455 }
456 flags |= SIGNOK | NDIGITS | NZDIGITS;
457 for (p = buf; width; width--) {
458 c = *inp;
459 /*
460 * Switch on the character; `goto ok'
461 * if we accept it as a part of number.
462 */
463 switch (c) {
464 /*
465 * The digit 0 is always legal, but is
466 * special. For %i conversions, if no
467 * digits (zero or nonzero) have been
468 * scanned (only signs), we will have
469 * base==0. In that case, we should set
470 * it to 8 and enable 0x prefixing.
471 * Also, if we have not scanned zero digits
472 * before this, do not turn off prefixing
473 * (someone else will turn it off if we
474 * have scanned any nonzero digits).
475 */
476 case '0':
477 if (base == 0) {
478 base = 8;
479 flags |= PFXOK;
480 }
481 if (flags & NZDIGITS) {
482 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
483 } else {
484 flags &= ~(SIGNOK | PFXOK | NDIGITS);
485 }
486 goto ok;
487
488 /* 1 through 7 always legal */
489 case '1': case '2': case '3':
490 case '4': case '5': case '6': case '7':
491 base = basefix[base];
492 flags &= ~(SIGNOK | PFXOK | NDIGITS);
493 goto ok;
494
495 /* digits 8 and 9 ok iff decimal or hex */
496 case '8': case '9':
497 base = basefix[base];
498 if (base <= 8) {
499 break; /* not legal here */
500 }
501 flags &= ~(SIGNOK | PFXOK | NDIGITS);
502 goto ok;
503
504 /* letters ok iff hex */
505 case 'A': case 'B': case 'C':
506 case 'D': case 'E': case 'F':
507 case 'a': case 'b': case 'c':
508 case 'd': case 'e': case 'f':
509 /* no need to fix base here */
510 if (base <= 10) {
511 break; /* not legal here */
512 }
513 flags &= ~(SIGNOK | PFXOK | NDIGITS);
514 goto ok;
515
516 /* sign ok only as first character */
517 case '+': case '-':
518 if (flags & SIGNOK) {
519 flags &= ~SIGNOK;
520 goto ok;
521 }
522 break;
523
524 /* x ok iff flag still set & 2nd char */
525 case 'x': case 'X':
526 if (flags & PFXOK && p == buf + 1) {
527 base = 16; /* if %i */
528 flags &= ~PFXOK;
529 goto ok;
530 }
531 break;
532 }
533
534 /*
535 * If we got here, c is not a legal character
536 * for a number. Stop accumulating digits.
537 */
538 break;
539ok:
540 /*
541 * c is legal: store it and look at the next.
542 */
543 *p++ = c;
544 if (--inr > 0) {
545 inp++;
546 } else {
547 break; /* end of input */
548 }
549 }
550 /*
551 * If we had only a sign, it is no good; push
552 * back the sign. If the number ends in `x',
553 * it was [sign] '0' 'x', so push back the x
554 * and treat it as [sign] '0'.
555 */
556 if (flags & NDIGITS) {
557 if (p > buf) {
558 inp--;
559 inr++;
560 }
561 goto match_failure;
562 }
563 c = p[-1];
564 if (c == 'x' || c == 'X') {
565 --p;
566 inp--;
567 inr++;
568 }
569 if ((flags & SUPPRESS) == 0) {
570 u_quad_t res;
571
572 *p = 0;
573 if ((flags & UNSIGNED) == 0) {
574 res = (u_quad_t)strtoq(buf, (char **)NULL, base);
575 } else {
576 res = strtouq(buf, (char **)NULL, base);
577 }
578 if (flags & POINTER) {
579 *va_arg(ap, void **) =
580 (void *)(uintptr_t)res;
581 } else if (flags & SHORTSHORT) {
582 *va_arg(ap, char *) = (char)res;
583 } else if (flags & SHORT) {
584 *va_arg(ap, short *) = (short)res;
585 } else if (flags & LONG) {
586 *va_arg(ap, long *) = (long)res;
587 } else if (flags & LONGLONG) {
588 *va_arg(ap, long long *) = (long long)res;
589 } else {
590 *va_arg(ap, int *) = (int)res;
591 }
592 nassigned++;
593 }
594 nread += p - buf;
595 nconversions++;
596 break;
597 }
598 }
599input_failure:
600 return nconversions != 0 ? nassigned : -1;
601match_failure:
602 return nassigned;
603}
604
605/*
606 * Fill in the given table from the scanset at the given format
607 * (just after `['). Return a pointer to the character past the
608 * closing `]'. The table has a 1 wherever characters should be
609 * considered part of the scanset.
610 */
611static const u_char *
612__sccl(char *tab, const u_char *fmt)
613{
614 char v;
615
616 /* first `clear' the whole table */
617 int c = *fmt++; /* first char hat => negated scanset */
618 if (c == '^') {
619 v = 1; /* default => accept */
620 c = *fmt++; /* get new first char */
621 } else {
622 v = 0; /* default => reject */
623 }
624 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
625 (void) memset(s: tab, c: v, n: 256);
626
627 if (c == 0) {
628 return fmt - 1;/* format ended before closing ] */
629 }
630 /*
631 * Now set the entries corresponding to the actual scanset
632 * to the opposite of the above.
633 *
634 * The first character may be ']' (or '-') without being special;
635 * the last character may be '-'.
636 */
637 v = 1 - v;
638 for (;;) {
639 int n;
640 tab[c] = v; /* take character c */
641doswitch:
642 n = *fmt++;
643 switch (n) {
644 case 0: /* format ended too soon */
645 return fmt - 1;
646
647 case '-':
648 /*
649 * A scanset of the form
650 * [01+-]
651 * is defined as `the digit 0, the digit 1,
652 * the character +, the character -', but
653 * the effect of a scanset such as
654 * [a-zA-Z0-9]
655 * is implementation defined. The V7 Unix
656 * scanf treats `a-z' as `the letters a through
657 * z', but treats `a-a' as `the letter a, the
658 * character -, and the letter a'.
659 *
660 * For compatibility, the `-' is not considerd
661 * to define a range if the character following
662 * it is either a close bracket (required by ANSI)
663 * or is not numerically greater than the character
664 * we just stored in the table (c).
665 */
666 n = *fmt;
667 if (n == ']' || n < c) {
668 c = '-';
669 break; /* resume the for(;;) */
670 }
671 fmt++;
672 /* fill in the range */
673 do {
674 tab[++c] = v;
675 } while (c < n);
676 c = n;
677 /*
678 * Alas, the V7 Unix scanf also treats formats
679 * such as [a-c-e] as `the letters a through e'.
680 * This too is permitted by the standard....
681 */
682 goto doswitch;
683
684 case ']': /* end of scanset */
685 return fmt;
686
687 default: /* just another character */
688 c = n;
689 break;
690 }
691 }
692 /* NOTREACHED */
693}
694