scanf.c source code [xnu/libkern/stdio/scanf.c]

1	/*
2	* Copyright (c) 2004-2016 Apple Computer, Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/-*
29	* Copyright (c) 1990, 1993
30	* The Regents of the University of California. All rights reserved.
31	*
32	* This code is derived from software contributed to Berkeley by
33	* Chris Torek.
34	*
35	* Redistribution and use in source and binary forms, with or without
36	* modification, are permitted provided that the following conditions
37	* are met:
38	* 1. Redistributions of source code must retain the above copyright
39	* notice, this list of conditions and the following disclaimer.
40	* 2. Redistributions in binary form must reproduce the above copyright
41	* notice, this list of conditions and the following disclaimer in the
42	* documentation and/or other materials provided with the distribution.
43	* 3. All advertising materials mentioning features or use of this software
44	* must display the following acknowledgement:
45	* This product includes software developed by the University of
46	* California, Berkeley and its contributors.
47	* 4. Neither the name of the University nor the names of its contributors
48	* may be used to endorse or promote products derived from this software
49	* without specific prior written permission.
50	*
51	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61	* SUCH DAMAGE.
62	*/
63
64	#include <stdarg.h>
65	#include <stddef.h>
66	#include <string.h>
67	#include <sys/cdefs.h>
68	#include <sys/param.h>
69
70	quad_t strtoq(const char , char* *, int*);
71	u_quad_t strtouq(const char , char* *, int*);
72
73	static inline int
74	isspace(char c)
75	{
76	return c == `' '` \|\| c == `'\t'` \|\| c == `'\n'` \|\| c == `'\12'`;
77	}
78
79	#define BUF 32 /* Maximum length of numeric string. */
80
81	/*
82	* Flags used during conversion.
83	*/
84	#define LONG 0x01 /* l: long or double */
85	#define SHORT 0x04 /* h: short */
86	#define SUPPRESS 0x08 /* : suppress assignment /
87	#define POINTER 0x10 /* p: void * (as hex) */
88	#define NOSKIP 0x20 /* [ or c: do not skip blanks */
89	#define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */
90	#define SHORTSHORT 0x4000 /* hh: char */
91	#define UNSIGNED 0x8000 /* %[oupxX] conversions */
92
93	/*
94	* The following are used in numeric conversions only:
95	* SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
96	* SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
97	*/
98	#define SIGNOK 0x40 /* +/- is (still) legal */
99	#define NDIGITS 0x80 /* no digits detected */
100
101	#define DPTOK 0x100 /* (float) decimal point is still legal */
102	#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
103
104	#define PFXOK 0x100 /* 0x prefix is (still) legal */
105	#define NZDIGITS 0x200 /* no zero digits detected */
106
107	/*
108	* Conversion types.
109	*/
110	#define CT_CHAR 0 /* %c conversion */
111	#define CT_CCL 1 /* %[...] conversion */
112	#define CT_STRING 2 /* %s conversion */
113	#define CT_INT 3 /* %[dioupxX] conversion */
114
115	static const u_char __sccl(char* , const* u_char *);
116
117	int sscanf(const char , const* char *, ...);
118	int vsscanf(const char , char* const *, va_list);
119
120	int
121	sscanf(const char ibuf, const* char *fmt, ...)
122	{
123	va_list ap;
124	int ret;
125
126	va_start(ap, fmt);
127	ret = vsscanf(ibuf, fmt, ap);
128	va_end(ap);
129	return ret;
130	}
131
132	int
133	vsscanf(const char inp, char* const *fmt0, va_list ap)
134	{
135	ssize_t inr;
136	const u_char fmt = (const* u_char *)fmt0;
137	ssize_t width; / field width, or 0 /
138	char p; /* points into all kinds of strings /
139	int flags; / flags as defined above /
140	char p0; /* saves original value of p when necessary /
141	int nassigned = `0`; / number of fields assigned /
142	int nconversions = `0`; / number of conversions /
143	int nread = `0`; / number of characters consumed from fp /
144	int base = `0`; / base argument to conversion function /
145	char ccltab[`256`]; / character class table for %[...] /
146	char buf[BUF]; / buffer for numeric conversions /
147
148	/ `basefix' is used to avoid `if' tests in the integer scanner /
149	static short basefix[`17`] =
150	{ `10`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16` };
151
152	inr = (ssize_t)strlen(s: inp);
153
154	for (;;) {
155	char c = (char)fmt++; /* character from format, or conversion /
156	if (c == `0`) {
157	return nassigned;
158	}
159	if (isspace(c)) {
160	while (inr > `0` && isspace(c: *inp)) {
161	nread++;
162	inr--;
163	inp++;
164	}
165	continue;
166	}
167	if (c != `'%'`) {
168	goto literal;
169	}
170	width = `0`;
171	flags = `0`;
172	/*
173	* switch on the format. continue if done;
174	* break once format type is derived.
175	*/
176	again:
177	c = (char)*fmt++;
178	switch (c) {
179	case `'%'`:
180	literal:
181	if (inr <= `0`) {
182	goto input_failure;
183	}
184	if (*inp != c) {
185	goto match_failure;
186	}
187	inr--;
188	inp++;
189	nread++;
190	continue;
191
192	case `'*'`:
193	flags \|= SUPPRESS;
194	goto again;
195	case `'l'`:
196	if (flags & LONG) {
197	flags &= ~LONG;
198	flags \|= LONGLONG;
199	} else {
200	flags \|= LONG;
201	}
202	goto again;
203	case `'q'`:
204	flags \|= LONGLONG; / not quite /
205	goto again;
206	case `'h'`:
207	if (flags & SHORT) {
208	flags &= ~SHORT;
209	flags \|= SHORTSHORT;
210	} else {
211	flags \|= SHORT;
212	}
213	goto again;
214
215	case `'0'`: case `'1'`: case `'2'`: case `'3'`: case `'4'`:
216	case `'5'`: case `'6'`: case `'7'`: case `'8'`: case `'9'`:
217	width = width * `10` + c - `'0'`;
218	goto again;
219
220	/*
221	* Conversions.
222	*/
223	case `'d'`:
224	c = CT_INT;
225	base = `10`;
226	break;
227
228	case `'i'`:
229	c = CT_INT;
230	base = `0`;
231	break;
232
233	case `'o'`:
234	c = CT_INT;
235	flags \|= UNSIGNED;
236	base = `8`;
237	break;
238
239	case `'u'`:
240	c = CT_INT;
241	flags \|= UNSIGNED;
242	base = `10`;
243	break;
244
245	case `'X'`:
246	case `'x'`:
247	flags \|= PFXOK; / enable 0x prefixing /
248	c = CT_INT;
249	flags \|= UNSIGNED;
250	base = `16`;
251	break;
252
253	case `'s'`:
254	c = CT_STRING;
255	break;
256
257	case `'['`:
258	fmt = __sccl(ccltab, fmt);
259	flags \|= NOSKIP;
260	c = CT_CCL;
261	break;
262
263	case `'c'`:
264	flags \|= NOSKIP;
265	c = CT_CHAR;
266	break;
267
268	case `'p'`: / pointer format is like hex /
269	flags \|= POINTER \| PFXOK;
270	c = CT_INT;
271	flags \|= UNSIGNED;
272	base = `16`;
273	break;
274
275	case `'n'`:
276	nconversions++;
277	if (flags & SUPPRESS) { / ??? /
278	continue;
279	}
280	if (flags & SHORTSHORT) {
281	va_arg(ap, char* ) = (char*)nread;
282	} else if (flags & SHORT) {
283	va_arg(ap, short* ) = (short*)nread;
284	} else if (flags & LONG) {
285	va_arg(ap, long* ) = (long*)nread;
286	} else if (flags & LONGLONG) {
287	va_arg(ap, long* long ) = (long* long)nread;
288	} else {
289	va_arg(ap, int* ) = (int*)nread;
290	}
291	continue;
292	}
293
294	/*
295	* We have a conversion that requires input.
296	*/
297	if (inr <= `0`) {
298	goto input_failure;
299	}
300
301	/*
302	* Consume leading white space, except for formats
303	* that suppress this.
304	*/
305	if ((flags & NOSKIP) == `0`) {
306	while (isspace(c: *inp)) {
307	nread++;
308	if (--inr > `0`) {
309	inp++;
310	} else {
311	goto input_failure;
312	}
313	}
314	/*
315	* Note that there is at least one character in
316	* the buffer, so conversions that do not set NOSKIP
317	* can no longer result in an input failure.
318	*/
319	}
320
321	/*
322	* Do the conversion.
323	*/
324	switch (c) {
325	case CT_CHAR:
326	/ scan arbitrary characters (sets NOSKIP) /
327	if (width == `0`) {
328	width = `1`;
329	}
330	if (flags & SUPPRESS) {
331	size_t sum = `0`;
332	for (;;) {
333	ssize_t n = inr;
334	if (n < width) {
335	sum += (size_t)n;
336	width -= n;
337	inp += n;
338	if (sum == `0`) {
339	goto input_failure;
340	}
341	break;
342	} else {
343	sum += (size_t)width;
344	inr -= width;
345	inp += width;
346	break;
347	}
348	}
349	nread += sum;
350	} else {
351	bcopy(src: inp, va_arg(ap, char *), n: width);
352	inr -= width;
353	inp += width;
354	nread += width;
355	nassigned++;
356	}
357	nconversions++;
358	break;
359
360	case CT_CCL: {
361	/ scan a (nonempty) character class (sets NOSKIP) /
362	if (width == `0`) {
363	width = SSIZE_MAX; / `infinity' /
364	}
365	/ take only those things in the class /
366	ptrdiff_t n;
367	if (flags & SUPPRESS) {
368	n = `0`;
369	while (ccltab[(unsigned char)*inp]) {
370	n++;
371	inr--;
372	inp++;
373	if (--width == `0`) {
374	break;
375	}
376	if (inr <= `0`) {
377	if (n == `0`) {
378	goto input_failure;
379	}
380	break;
381	}
382	}
383	if (n == `0`) {
384	goto match_failure;
385	}
386	} else {
387	p0 = p = va_arg(ap, char *);
388	while (ccltab[(unsigned char)*inp]) {
389	inr--;
390	p++ = inp++;
391	if (--width == `0`) {
392	break;
393	}
394	if (inr <= `0`) {
395	if (p == p0) {
396	goto input_failure;
397	}
398	break;
399	}
400	}
401	n = p - p0;
402	if (n == `0`) {
403	goto match_failure;
404	}
405	*p = `0`;
406	nassigned++;
407	}
408	nread += n;
409	nconversions++;
410	break;
411	}
412
413	case CT_STRING:
414	/ like CCL, but zero-length string OK, & no NOSKIP /
415	if (width == `0`) {
416	width = SSIZE_MAX;
417	}
418	if (flags & SUPPRESS) {
419	size_t n = `0`;
420	while (!isspace(c: *inp)) {
421	n++;
422	inr--;
423	inp++;
424	if (--width == `0`) {
425	break;
426	}
427	if (inr <= `0`) {
428	break;
429	}
430	}
431	nread += n;
432	} else {
433	p0 = p = va_arg(ap, char *);
434	while (!isspace(c: *inp)) {
435	inr--;
436	p++ = inp++;
437	if (--width == `0`) {
438	break;
439	}
440	if (inr <= `0`) {
441	break;
442	}
443	}
444	*p = `0`;
445	nread += p - p0;
446	nassigned++;
447	}
448	nconversions++;
449	continue;
450
451	case CT_INT:
452	/ scan an integer as if by the conversion function /
453	if (width <= `0` \|\| width > (ssize_t)(sizeof(buf) - `1`)) {
454	width = sizeof(buf) - `1`;
455	}
456	flags \|= SIGNOK \| NDIGITS \| NZDIGITS;
457	for (p = buf; width; width--) {
458	c = *inp;
459	/*
460	* Switch on the character; `goto ok'
461	* if we accept it as a part of number.
462	*/
463	switch (c) {
464	/*
465	* The digit 0 is always legal, but is
466	* special. For %i conversions, if no
467	* digits (zero or nonzero) have been
468	* scanned (only signs), we will have
469	* base==0. In that case, we should set
470	* it to 8 and enable 0x prefixing.
471	* Also, if we have not scanned zero digits
472	* before this, do not turn off prefixing
473	* (someone else will turn it off if we
474	* have scanned any nonzero digits).
475	*/
476	case `'0'`:
477	if (base == `0`) {
478	base = `8`;
479	flags \|= PFXOK;
480	}
481	if (flags & NZDIGITS) {
482	flags &= ~(SIGNOK \| NZDIGITS \| NDIGITS);
483	} else {
484	flags &= ~(SIGNOK \| PFXOK \| NDIGITS);
485	}
486	goto ok;
487
488	/ 1 through 7 always legal /
489	case `'1'`: case `'2'`: case `'3'`:
490	case `'4'`: case `'5'`: case `'6'`: case `'7'`:
491	base = basefix[base];
492	flags &= ~(SIGNOK \| PFXOK \| NDIGITS);
493	goto ok;
494
495	/ digits 8 and 9 ok iff decimal or hex /
496	case `'8'`: case `'9'`:
497	base = basefix[base];
498	if (base <= `8`) {
499	break; / not legal here /
500	}
501	flags &= ~(SIGNOK \| PFXOK \| NDIGITS);
502	goto ok;
503
504	/ letters ok iff hex /
505	case `'A'`: case `'B'`: case `'C'`:
506	case `'D'`: case `'E'`: case `'F'`:
507	case `'a'`: case `'b'`: case `'c'`:
508	case `'d'`: case `'e'`: case `'f'`:
509	/ no need to fix base here /
510	if (base <= `10`) {
511	break; / not legal here /
512	}
513	flags &= ~(SIGNOK \| PFXOK \| NDIGITS);
514	goto ok;
515
516	/ sign ok only as first character /
517	case `'+'`: case `'-'`:
518	if (flags & SIGNOK) {
519	flags &= ~SIGNOK;
520	goto ok;
521	}
522	break;
523
524	/ x ok iff flag still set & 2nd char /
525	case `'x'`: case `'X'`:
526	if (flags & PFXOK && p == buf + `1`) {
527	base = `16`; / if %i /
528	flags &= ~PFXOK;
529	goto ok;
530	}
531	break;
532	}
533
534	/*
535	* If we got here, c is not a legal character
536	* for a number. Stop accumulating digits.
537	*/
538	break;
539	ok:
540	/*
541	* c is legal: store it and look at the next.
542	*/
543	*p++ = c;
544	if (--inr > `0`) {
545	inp++;
546	} else {
547	break; / end of input /
548	}
549	}
550	/*
551	* If we had only a sign, it is no good; push
552	* back the sign. If the number ends in `x',
553	* it was [sign] '0' 'x', so push back the x
554	* and treat it as [sign] '0'.
555	*/
556	if (flags & NDIGITS) {
557	if (p > buf) {
558	inp--;
559	inr++;
560	}
561	goto match_failure;
562	}
563	c = p[-`1`];
564	if (c == `'x'` \|\| c == `'X'`) {
565	--p;
566	inp--;
567	inr++;
568	}
569	if ((flags & SUPPRESS) == `0`) {
570	u_quad_t res;
571
572	*p = `0`;
573	if ((flags & UNSIGNED) == `0`) {
574	res = (u_quad_t)strtoq(buf, (char **)NULL, base);
575	} else {
576	res = strtouq(buf, (char **)NULL, base);
577	}
578	if (flags & POINTER) {
579	va_arg(ap, void* **) =
580	(void *)(uintptr_t)res;
581	} else if (flags & SHORTSHORT) {
582	va_arg(ap, char* ) = (char*)res;
583	} else if (flags & SHORT) {
584	va_arg(ap, short* ) = (short*)res;
585	} else if (flags & LONG) {
586	va_arg(ap, long* ) = (long*)res;
587	} else if (flags & LONGLONG) {
588	va_arg(ap, long* long ) = (long* long)res;
589	} else {
590	va_arg(ap, int* ) = (int*)res;
591	}
592	nassigned++;
593	}
594	nread += p - buf;
595	nconversions++;
596	break;
597	}
598	}
599	input_failure:
600	return nconversions != `0` ? nassigned : -`1`;
601	match_failure:
602	return nassigned;
603	}
604
605	/*
606	* Fill in the given table from the scanset at the given format
607	* (just after `['). Return a pointer to the character past the
608	* closing `]'. The table has a 1 wherever characters should be
609	* considered part of the scanset.
610	*/
611	static const u_char *
612	__sccl(char tab, const* u_char *fmt)
613	{
614	char v;
615
616	/ first `clear' the whole table /
617	int c = fmt++; /* first char hat => negated scanset /
618	if (c == `'^'`) {
619	v = `1`; / default => accept /
620	c = fmt++; /* get new first char /
621	} else {
622	v = `0`; / default => reject /
623	}
624	/ XXX: Will not work if sizeof(tab) > sizeof(char) /*
625	(void) memset(s: tab, c: v, n: `256`);
626
627	if (c == `0`) {
628	return fmt - `1`;/ format ended before closing ] /
629	}
630	/*
631	* Now set the entries corresponding to the actual scanset
632	* to the opposite of the above.
633	*
634	* The first character may be ']' (or '-') without being special;
635	* the last character may be '-'.
636	*/
637	v = `1` - v;
638	for (;;) {
639	int n;
640	tab[c] = v; / take character c /
641	doswitch:
642	n = *fmt++;
643	switch (n) {
644	case `0`: / format ended too soon /
645	return fmt - `1`;
646
647	case `'-'`:
648	/*
649	* A scanset of the form
650	* [01+-]
651	* is defined as `the digit 0, the digit 1,
652	* the character +, the character -', but
653	* the effect of a scanset such as
654	* [a-zA-Z0-9]
655	* is implementation defined. The V7 Unix
656	* scanf treats `a-z' as `the letters a through
657	* z', but treats `a-a' as `the letter a, the
658	* character -, and the letter a'.
659	*
660	* For compatibility, the `-' is not considerd
661	* to define a range if the character following
662	* it is either a close bracket (required by ANSI)
663	* or is not numerically greater than the character
664	* we just stored in the table (c).
665	*/
666	n = *fmt;
667	if (n == `']'` \|\| n < c) {
668	c = `'-'`;
669	break; / resume the for(;;) /
670	}
671	fmt++;
672	/ fill in the range /
673	do {
674	tab[++c] = v;
675	} while (c < n);
676	c = n;
677	/*
678	* Alas, the V7 Unix scanf also treats formats
679	* such as [a-c-e] as `the letters a through e'.
680	* This too is permitted by the standard....
681	*/
682	goto doswitch;
683
684	case `']'`: / end of scanset /
685	return fmt;
686
687	default: / just another character /
688	c = n;
689	break;
690	}
691	}
692	/ NOTREACHED /
693	}
694

Browse the source code of xnu/libkern/stdio/scanf.c