scanf.c source code [xnu/libkern/stdio/scanf.c]

1	/*
2	* Copyright (c) 2004-2016 Apple Computer, Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/-*
29	* Copyright (c) 1990, 1993
30	* The Regents of the University of California. All rights reserved.
31	*
32	* This code is derived from software contributed to Berkeley by
33	* Chris Torek.
34	*
35	* Redistribution and use in source and binary forms, with or without
36	* modification, are permitted provided that the following conditions
37	* are met:
38	* 1. Redistributions of source code must retain the above copyright
39	* notice, this list of conditions and the following disclaimer.
40	* 2. Redistributions in binary form must reproduce the above copyright
41	* notice, this list of conditions and the following disclaimer in the
42	* documentation and/or other materials provided with the distribution.
43	* 3. All advertising materials mentioning features or use of this software
44	* must display the following acknowledgement:
45	* This product includes software developed by the University of
46	* California, Berkeley and its contributors.
47	* 4. Neither the name of the University nor the names of its contributors
48	* may be used to endorse or promote products derived from this software
49	* without specific prior written permission.
50	*
51	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61	* SUCH DAMAGE.
62	*/
63
64	#include <sys/cdefs.h>
65
66	#if 0 /* XXX coming soon */
67	#include <ctype.h>
68	#else
69	static inline int
70	isspace(char c)
71	{
72	return (c == `' '` \|\| c == `'\t'` \|\| c == `'\n'` \|\| c == `'\12'`);
73	}
74	#endif
75	#include <stdarg.h>
76	#include <string.h>
77	#include <sys/param.h>
78	#include <sys/systm.h>
79
80	#define BUF 32 /* Maximum length of numeric string. */
81
82	/*
83	* Flags used during conversion.
84	*/
85	#define LONG 0x01 /* l: long or double */
86	#define SHORT 0x04 /* h: short */
87	#define SUPPRESS 0x08 /* : suppress assignment /
88	#define POINTER 0x10 /* p: void * (as hex) */
89	#define NOSKIP 0x20 /* [ or c: do not skip blanks */
90	#define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */
91	#define SHORTSHORT 0x4000 /* hh: char */
92	#define UNSIGNED 0x8000 /* %[oupxX] conversions */
93
94	/*
95	* The following are used in numeric conversions only:
96	* SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
97	* SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
98	*/
99	#define SIGNOK 0x40 /* +/- is (still) legal */
100	#define NDIGITS 0x80 /* no digits detected */
101
102	#define DPTOK 0x100 /* (float) decimal point is still legal */
103	#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
104
105	#define PFXOK 0x100 /* 0x prefix is (still) legal */
106	#define NZDIGITS 0x200 /* no zero digits detected */
107
108	/*
109	* Conversion types.
110	*/
111	#define CT_CHAR 0 /* %c conversion */
112	#define CT_CCL 1 /* %[...] conversion */
113	#define CT_STRING 2 /* %s conversion */
114	#define CT_INT 3 /* %[dioupxX] conversion */
115
116	static const u_char __sccl(char* , const* u_char *);
117
118	int
119	sscanf(const char ibuf, const* char *fmt, ...)
120	{
121	va_list ap;
122	int ret;
123
124	va_start(ap, fmt);
125	ret = vsscanf(ibuf, fmt, ap);
126	va_end(ap);
127	return(ret);
128	}
129
130	int
131	vsscanf(const char inp, char* const *fmt0, va_list ap)
132	{
133	int inr;
134	const u_char fmt = (const* u_char *)fmt0;
135	int c; / character from format, or conversion /
136	size_t width; / field width, or 0 /
137	char p; /* points into all kinds of strings /
138	int n; / handy integer /
139	int flags; / flags as defined above /
140	char p0; /* saves original value of p when necessary /
141	int nassigned; / number of fields assigned /
142	int nconversions; / number of conversions /
143	int nread; / number of characters consumed from fp /
144	int base; / base argument to conversion function /
145	char ccltab[`256`]; / character class table for %[...] /
146	char buf[BUF]; / buffer for numeric conversions /
147
148	/ `basefix' is used to avoid `if' tests in the integer scanner /
149	static short basefix[`17`] =
150	{ `10`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16` };
151
152	inr = strlen(inp);
153
154	nassigned = `0`;
155	nconversions = `0`;
156	nread = `0`;
157	base = `0`; / XXX just to keep gcc happy /
158	for (;;) {
159	c = *fmt++;
160	if (c == `0`)
161	return (nassigned);
162	if (isspace(c)) {
163	while (inr > `0` && isspace(*inp)) {
164	nread++;
165	inr--;
166	inp++;
167	}
168	continue;
169	}
170	if (c != `'%'`)
171	goto literal;
172	width = `0`;
173	flags = `0`;
174	/*
175	* switch on the format. continue if done;
176	* break once format type is derived.
177	*/
178	again: c = *fmt++;
179	switch (c) {
180	case `'%'`:
181	literal:
182	if (inr <= `0`)
183	goto input_failure;
184	if (*inp != c)
185	goto match_failure;
186	inr--;
187	inp++;
188	nread++;
189	continue;
190
191	case `'*'`:
192	flags \|= SUPPRESS;
193	goto again;
194	case `'l'`:
195	if (flags & LONG) {
196	flags &= ~LONG;
197	flags \|= LONGLONG;
198	} else
199	flags \|= LONG;
200	goto again;
201	case `'q'`:
202	flags \|= LONGLONG; / not quite /
203	goto again;
204	case `'h'`:
205	if (flags & SHORT) {
206	flags &= ~SHORT;
207	flags \|= SHORTSHORT;
208	} else
209	flags \|= SHORT;
210	goto again;
211
212	case `'0'`: case `'1'`: case `'2'`: case `'3'`: case `'4'`:
213	case `'5'`: case `'6'`: case `'7'`: case `'8'`: case `'9'`:
214	width = width * `10` + c - `'0'`;
215	goto again;
216
217	/*
218	* Conversions.
219	*/
220	case `'d'`:
221	c = CT_INT;
222	base = `10`;
223	break;
224
225	case `'i'`:
226	c = CT_INT;
227	base = `0`;
228	break;
229
230	case `'o'`:
231	c = CT_INT;
232	flags \|= UNSIGNED;
233	base = `8`;
234	break;
235
236	case `'u'`:
237	c = CT_INT;
238	flags \|= UNSIGNED;
239	base = `10`;
240	break;
241
242	case `'X'`:
243	case `'x'`:
244	flags \|= PFXOK; / enable 0x prefixing /
245	c = CT_INT;
246	flags \|= UNSIGNED;
247	base = `16`;
248	break;
249
250	case `'s'`:
251	c = CT_STRING;
252	break;
253
254	case `'['`:
255	fmt = __sccl(ccltab, fmt);
256	flags \|= NOSKIP;
257	c = CT_CCL;
258	break;
259
260	case `'c'`:
261	flags \|= NOSKIP;
262	c = CT_CHAR;
263	break;
264
265	case `'p'`: / pointer format is like hex /
266	flags \|= POINTER \| PFXOK;
267	c = CT_INT;
268	flags \|= UNSIGNED;
269	base = `16`;
270	break;
271
272	case `'n'`:
273	nconversions++;
274	if (flags & SUPPRESS) / ??? /
275	continue;
276	if (flags & SHORTSHORT)
277	va_arg(ap, char* *) = nread;
278	else if (flags & SHORT)
279	va_arg(ap, short* *) = nread;
280	else if (flags & LONG)
281	va_arg(ap, long* *) = nread;
282	else if (flags & LONGLONG)
283	va_arg(ap, long* long *) = nread;
284	else
285	va_arg(ap, int* *) = nread;
286	continue;
287	}
288
289	/*
290	* We have a conversion that requires input.
291	*/
292	if (inr <= `0`)
293	goto input_failure;
294
295	/*
296	* Consume leading white space, except for formats
297	* that suppress this.
298	*/
299	if ((flags & NOSKIP) == `0`) {
300	while (isspace(*inp)) {
301	nread++;
302	if (--inr > `0`)
303	inp++;
304	else
305	goto input_failure;
306	}
307	/*
308	* Note that there is at least one character in
309	* the buffer, so conversions that do not set NOSKIP
310	* can no longer result in an input failure.
311	*/
312	}
313
314	/*
315	* Do the conversion.
316	*/
317	switch (c) {
318
319	case CT_CHAR:
320	/ scan arbitrary characters (sets NOSKIP) /
321	if (width == `0`)
322	width = `1`;
323	if (flags & SUPPRESS) {
324	size_t sum = `0`;
325	for (;;) {
326	if ((n = inr) < (int)width) {
327	sum += n;
328	width -= n;
329	inp += n;
330	if (sum == `0`)
331	goto input_failure;
332	break;
333	} else {
334	sum += width;
335	inr -= width;
336	inp += width;
337	break;
338	}
339	}
340	nread += sum;
341	} else {
342	bcopy(inp, va_arg(ap, char *), width);
343	inr -= width;
344	inp += width;
345	nread += width;
346	nassigned++;
347	}
348	nconversions++;
349	break;
350
351	case CT_CCL:
352	/ scan a (nonempty) character class (sets NOSKIP) /
353	if (width == `0`)
354	width = (size_t)~`0`; / `infinity' /
355	/ take only those things in the class /
356	if (flags & SUPPRESS) {
357	n = `0`;
358	while (ccltab[(unsigned char)*inp]) {
359	n++;
360	inr--;
361	inp++;
362	if (--width == `0`)
363	break;
364	if (inr <= `0`) {
365	if (n == `0`)
366	goto input_failure;
367	break;
368	}
369	}
370	if (n == `0`)
371	goto match_failure;
372	} else {
373	p0 = p = va_arg(ap, char *);
374	while (ccltab[(unsigned char)*inp]) {
375	inr--;
376	p++ = inp++;
377	if (--width == `0`)
378	break;
379	if (inr <= `0`) {
380	if (p == p0)
381	goto input_failure;
382	break;
383	}
384	}
385	n = p - p0;
386	if (n == `0`)
387	goto match_failure;
388	*p = `0`;
389	nassigned++;
390	}
391	nread += n;
392	nconversions++;
393	break;
394
395	case CT_STRING:
396	/ like CCL, but zero-length string OK, & no NOSKIP /
397	if (width == `0`)
398	width = (size_t)~`0`;
399	if (flags & SUPPRESS) {
400	n = `0`;
401	while (!isspace(*inp)) {
402	n++;
403	inr--;
404	inp++;
405	if (--width == `0`)
406	break;
407	if (inr <= `0`)
408	break;
409	}
410	nread += n;
411	} else {
412	p0 = p = va_arg(ap, char *);
413	while (!isspace(*inp)) {
414	inr--;
415	p++ = inp++;
416	if (--width == `0`)
417	break;
418	if (inr <= `0`)
419	break;
420	}
421	*p = `0`;
422	nread += p - p0;
423	nassigned++;
424	}
425	nconversions++;
426	continue;
427
428	case CT_INT:
429	/ scan an integer as if by the conversion function /
430	#ifdef hardway
431	if (width == `0` \|\| width > sizeof(buf) - `1`)
432	width = sizeof(buf) - `1`;
433	#else
434	/ size_t is unsigned, hence this optimisation /
435	if (--width > sizeof(buf) - `2`)
436	width = sizeof(buf) - `2`;
437	width++;
438	#endif
439	flags \|= SIGNOK \| NDIGITS \| NZDIGITS;
440	for (p = buf; width; width--) {
441	c = *inp;
442	/*
443	* Switch on the character; `goto ok'
444	* if we accept it as a part of number.
445	*/
446	switch (c) {
447
448	/*
449	* The digit 0 is always legal, but is
450	* special. For %i conversions, if no
451	* digits (zero or nonzero) have been
452	* scanned (only signs), we will have
453	* base==0. In that case, we should set
454	* it to 8 and enable 0x prefixing.
455	* Also, if we have not scanned zero digits
456	* before this, do not turn off prefixing
457	* (someone else will turn it off if we
458	* have scanned any nonzero digits).
459	*/
460	case `'0'`:
461	if (base == `0`) {
462	base = `8`;
463	flags \|= PFXOK;
464	}
465	if (flags & NZDIGITS)
466	flags &= ~(SIGNOK\|NZDIGITS\|NDIGITS);
467	else
468	flags &= ~(SIGNOK\|PFXOK\|NDIGITS);
469	goto ok;
470
471	/ 1 through 7 always legal /
472	case `'1'`: case `'2'`: case `'3'`:
473	case `'4'`: case `'5'`: case `'6'`: case `'7'`:
474	base = basefix[base];
475	flags &= ~(SIGNOK \| PFXOK \| NDIGITS);
476	goto ok;
477
478	/ digits 8 and 9 ok iff decimal or hex /
479	case `'8'`: case `'9'`:
480	base = basefix[base];
481	if (base <= `8`)
482	break; / not legal here /
483	flags &= ~(SIGNOK \| PFXOK \| NDIGITS);
484	goto ok;
485
486	/ letters ok iff hex /
487	case `'A'`: case `'B'`: case `'C'`:
488	case `'D'`: case `'E'`: case `'F'`:
489	case `'a'`: case `'b'`: case `'c'`:
490	case `'d'`: case `'e'`: case `'f'`:
491	/ no need to fix base here /
492	if (base <= `10`)
493	break; / not legal here /
494	flags &= ~(SIGNOK \| PFXOK \| NDIGITS);
495	goto ok;
496
497	/ sign ok only as first character /
498	case `'+'`: case `'-'`:
499	if (flags & SIGNOK) {
500	flags &= ~SIGNOK;
501	goto ok;
502	}
503	break;
504
505	/ x ok iff flag still set & 2nd char /
506	case `'x'`: case `'X'`:
507	if (flags & PFXOK && p == buf + `1`) {
508	base = `16`; / if %i /
509	flags &= ~PFXOK;
510	goto ok;
511	}
512	break;
513	}
514
515	/*
516	* If we got here, c is not a legal character
517	* for a number. Stop accumulating digits.
518	*/
519	break;
520	ok:
521	/*
522	* c is legal: store it and look at the next.
523	*/
524	*p++ = c;
525	if (--inr > `0`)
526	inp++;
527	else
528	break; / end of input /
529	}
530	/*
531	* If we had only a sign, it is no good; push
532	* back the sign. If the number ends in `x',
533	* it was [sign] '0' 'x', so push back the x
534	* and treat it as [sign] '0'.
535	*/
536	if (flags & NDIGITS) {
537	if (p > buf) {
538	inp--;
539	inr++;
540	}
541	goto match_failure;
542	}
543	c = ((u_char *)p)[-`1`];
544	if (c == `'x'` \|\| c == `'X'`) {
545	--p;
546	inp--;
547	inr++;
548	}
549	if ((flags & SUPPRESS) == `0`) {
550	u_quad_t res;
551
552	*p = `0`;
553	if ((flags & UNSIGNED) == `0`)
554	res = strtoq(buf, (char **)NULL, base);
555	else
556	res = strtouq(buf, (char **)NULL, base);
557	if (flags & POINTER)
558	va_arg(ap, void* **) =
559	(void *)(uintptr_t)res;
560	else if (flags & SHORTSHORT)
561	va_arg(ap, char* *) = res;
562	else if (flags & SHORT)
563	va_arg(ap, short* *) = res;
564	else if (flags & LONG)
565	va_arg(ap, long* *) = res;
566	else if (flags & LONGLONG)
567	va_arg(ap, long* long *) = res;
568	else
569	va_arg(ap, int* *) = res;
570	nassigned++;
571	}
572	nread += p - buf;
573	nconversions++;
574	break;
575
576	}
577	}
578	input_failure:
579	return (nconversions != `0` ? nassigned : -`1`);
580	match_failure:
581	return (nassigned);
582	}
583
584	/*
585	* Fill in the given table from the scanset at the given format
586	* (just after `['). Return a pointer to the character past the
587	* closing `]'. The table has a 1 wherever characters should be
588	* considered part of the scanset.
589	*/
590	static const u_char *
591	__sccl(char tab, const* u_char *fmt)
592	{
593	int c, n, v;
594
595	/ first `clear' the whole table /
596	c = fmt++; /* first char hat => negated scanset /
597	if (c == `'^'`) {
598	v = `1`; / default => accept /
599	c = fmt++; /* get new first char /
600	} else
601	v = `0`; / default => reject /
602
603	/ XXX: Will not work if sizeof(tab) > sizeof(char) /*
604	(void) memset(tab, v, `256`);
605
606	if (c == `0`)
607	return (fmt - `1`);/ format ended before closing ] /
608
609	/*
610	* Now set the entries corresponding to the actual scanset
611	* to the opposite of the above.
612	*
613	* The first character may be ']' (or '-') without being special;
614	* the last character may be '-'.
615	*/
616	v = `1` - v;
617	for (;;) {
618	tab[c] = v; / take character c /
619	doswitch:
620	n = fmt++; /* and examine the next /
621	switch (n) {
622
623	case `0`: / format ended too soon /
624	return (fmt - `1`);
625
626	case `'-'`:
627	/*
628	* A scanset of the form
629	* [01+-]
630	* is defined as `the digit 0, the digit 1,
631	* the character +, the character -', but
632	* the effect of a scanset such as
633	* [a-zA-Z0-9]
634	* is implementation defined. The V7 Unix
635	* scanf treats `a-z' as `the letters a through
636	* z', but treats `a-a' as `the letter a, the
637	* character -, and the letter a'.
638	*
639	* For compatibility, the `-' is not considerd
640	* to define a range if the character following
641	* it is either a close bracket (required by ANSI)
642	* or is not numerically greater than the character
643	* we just stored in the table (c).
644	*/
645	n = *fmt;
646	if (n == `']'` \|\| n < c) {
647	c = `'-'`;
648	break; / resume the for(;;) /
649	}
650	fmt++;
651	/ fill in the range /
652	do {
653	tab[++c] = v;
654	} while (c < n);
655	c = n;
656	/*
657	* Alas, the V7 Unix scanf also treats formats
658	* such as [a-c-e] as `the letters a through e'.
659	* This too is permitted by the standard....
660	*/
661	goto doswitch;
662
663	case `']'`: / end of scanset /
664	return (fmt);
665
666	default: / just another character /
667	c = n;
668	break;
669	}
670	}
671	/ NOTREACHED /
672	}
673

Browse the source code of xnu/libkern/stdio/scanf.c