1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Copyright (c) 2011 The FreeBSD Foundation
8 *
9 * Copyright (c) 2023 Dag-Erling Smørgrav
10 *
11 * Portions of this software were developed by David Chisnall
12 * under sponsorship from the FreeBSD Foundation.
13 *
14 * This code is derived from software contributed to Berkeley by
15 * Chris Torek.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 */
41
42 #include "namespace.h"
43 #include <ctype.h>
44 #include <inttypes.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <stddef.h>
48 #include <stdarg.h>
49 #include <string.h>
50 #include <wchar.h>
51 #include <wctype.h>
52 #include "un-namespace.h"
53
54 #include "collate.h"
55 #include "libc_private.h"
56 #include "local.h"
57 #include "xlocale_private.h"
58
59 #include <locale.h>
60
61 #define BUF 513 /* Maximum length of numeric string. */
62
63 /*
64 * Flags used during conversion.
65 */
66 #define LONG 0x01 /* l: long or double */
67 #define LONGDBL 0x02 /* L: long double */
68 #define SHORT 0x04 /* h: short */
69 #define SUPPRESS 0x08 /* *: suppress assignment */
70 #define POINTER 0x10 /* p: void * (as hex) */
71 #define NOSKIP 0x20 /* [ or c: do not skip blanks */
72 #define FASTINT 0x200 /* wfN: int_fastN_t */
73 #define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */
74 #define INTMAXT 0x800 /* j: intmax_t */
75 #define PTRDIFFT 0x1000 /* t: ptrdiff_t */
76 #define SIZET 0x2000 /* z: size_t */
77 #define SHORTSHORT 0x4000 /* hh: char */
78 #define UNSIGNED 0x8000 /* %[oupxX] conversions */
79
80 /*
81 * Conversion types.
82 */
83 #define CT_CHAR 0 /* %c conversion */
84 #define CT_CCL 1 /* %[...] conversion */
85 #define CT_STRING 2 /* %s conversion */
86 #define CT_INT 3 /* %[dioupxX] conversion */
87 #define CT_FLOAT 4 /* %[efgEFG] conversion */
88
89 static const u_char *__sccl(char *, const u_char *);
90 static int parsefloat(FILE *, char *, char *, locale_t);
91
92 __weak_reference(__vfscanf, vfscanf);
93
94 /*
95 * Conversion functions are passed a pointer to this object instead of
96 * a real parameter to indicate that the assignment-suppression (*)
97 * flag was specified. We could use a NULL pointer to indicate this,
98 * but that would mask bugs in applications that call scanf() with a
99 * NULL pointer.
100 */
101 static const int suppress;
102 #define SUPPRESS_PTR ((void *)&suppress)
103
104 static const mbstate_t initial_mbs;
105
106 /*
107 * The following conversion functions return the number of characters consumed,
108 * or -1 on input failure. Character class conversion returns 0 on match
109 * failure.
110 */
111
112 static __inline int
convert_char(FILE * fp,char * p,int width)113 convert_char(FILE *fp, char * p, int width)
114 {
115 int n;
116
117 if (p == SUPPRESS_PTR) {
118 size_t sum = 0;
119 for (;;) {
120 if ((n = fp->_r) < width) {
121 sum += n;
122 width -= n;
123 fp->_p += n;
124 if (__srefill(fp)) {
125 if (sum == 0)
126 return (-1);
127 break;
128 }
129 } else {
130 sum += width;
131 fp->_r -= width;
132 fp->_p += width;
133 break;
134 }
135 }
136 return (sum);
137 } else {
138 size_t r = __fread(p, 1, width, fp);
139
140 if (r == 0)
141 return (-1);
142 return (r);
143 }
144 }
145
146 static __inline int
convert_wchar(FILE * fp,wchar_t * wcp,int width,locale_t locale)147 convert_wchar(FILE *fp, wchar_t *wcp, int width, locale_t locale)
148 {
149 mbstate_t mbs;
150 int n, nread;
151 wint_t wi;
152
153 mbs = initial_mbs;
154 n = 0;
155 while (width-- != 0 &&
156 (wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF) {
157 if (wcp != SUPPRESS_PTR)
158 *wcp++ = (wchar_t)wi;
159 n += nread;
160 }
161 if (n == 0)
162 return (-1);
163 return (n);
164 }
165
166 static __inline int
convert_ccl(FILE * fp,char * p,int width,const char * ccltab)167 convert_ccl(FILE *fp, char * p, int width, const char *ccltab)
168 {
169 char *p0;
170 int n;
171
172 if (p == SUPPRESS_PTR) {
173 n = 0;
174 while (ccltab[*fp->_p]) {
175 n++, fp->_r--, fp->_p++;
176 if (--width == 0)
177 break;
178 if (fp->_r <= 0 && __srefill(fp)) {
179 if (n == 0)
180 return (-1);
181 break;
182 }
183 }
184 } else {
185 p0 = p;
186 while (ccltab[*fp->_p]) {
187 fp->_r--;
188 *p++ = *fp->_p++;
189 if (--width == 0)
190 break;
191 if (fp->_r <= 0 && __srefill(fp)) {
192 if (p == p0)
193 return (-1);
194 break;
195 }
196 }
197 n = p - p0;
198 if (n == 0)
199 return (0);
200 *p = 0;
201 }
202 return (n);
203 }
204
205 static __inline int
convert_wccl(FILE * fp,wchar_t * wcp,int width,const char * ccltab,locale_t locale)206 convert_wccl(FILE *fp, wchar_t *wcp, int width, const char *ccltab,
207 locale_t locale)
208 {
209 mbstate_t mbs;
210 wint_t wi;
211 int n, nread;
212
213 mbs = initial_mbs;
214 n = 0;
215 if (wcp == SUPPRESS_PTR) {
216 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
217 width-- != 0 && ccltab[wctob(wi)])
218 n += nread;
219 if (wi != WEOF)
220 __ungetwc(wi, fp, __get_locale());
221 } else {
222 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
223 width-- != 0 && ccltab[wctob(wi)]) {
224 *wcp++ = (wchar_t)wi;
225 n += nread;
226 }
227 if (wi != WEOF)
228 __ungetwc(wi, fp, __get_locale());
229 if (n == 0)
230 return (0);
231 *wcp = 0;
232 }
233 return (n);
234 }
235
236 static __inline int
convert_string(FILE * fp,char * p,int width)237 convert_string(FILE *fp, char * p, int width)
238 {
239 char *p0;
240 int n;
241
242 if (p == SUPPRESS_PTR) {
243 n = 0;
244 while (!isspace(*fp->_p)) {
245 n++, fp->_r--, fp->_p++;
246 if (--width == 0)
247 break;
248 if (fp->_r <= 0 && __srefill(fp))
249 break;
250 }
251 } else {
252 p0 = p;
253 while (!isspace(*fp->_p)) {
254 fp->_r--;
255 *p++ = *fp->_p++;
256 if (--width == 0)
257 break;
258 if (fp->_r <= 0 && __srefill(fp))
259 break;
260 }
261 *p = 0;
262 n = p - p0;
263 }
264 return (n);
265 }
266
267 static __inline int
convert_wstring(FILE * fp,wchar_t * wcp,int width,locale_t locale)268 convert_wstring(FILE *fp, wchar_t *wcp, int width, locale_t locale)
269 {
270 mbstate_t mbs;
271 wint_t wi;
272 int n, nread;
273
274 mbs = initial_mbs;
275 n = 0;
276 if (wcp == SUPPRESS_PTR) {
277 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
278 width-- != 0 && !iswspace(wi))
279 n += nread;
280 if (wi != WEOF)
281 __ungetwc(wi, fp, __get_locale());
282 } else {
283 while ((wi = __fgetwc_mbs(fp, &mbs, &nread, locale)) != WEOF &&
284 width-- != 0 && !iswspace(wi)) {
285 *wcp++ = (wchar_t)wi;
286 n += nread;
287 }
288 if (wi != WEOF)
289 __ungetwc(wi, fp, __get_locale());
290 *wcp = '\0';
291 }
292 return (n);
293 }
294
295 enum parseint_state {
296 begin,
297 havesign,
298 havezero,
299 haveprefix,
300 any,
301 };
302
303 static __inline int
parseint_fsm(int c,enum parseint_state * state,int * base)304 parseint_fsm(int c, enum parseint_state *state, int *base)
305 {
306 switch (c) {
307 case '+':
308 case '-':
309 if (*state == begin) {
310 *state = havesign;
311 return 1;
312 }
313 break;
314 case '0':
315 if (*state == begin || *state == havesign) {
316 *state = havezero;
317 return 1;
318 }
319 /* FALL THROUGH */
320 case '1':
321 case '2':
322 case '3':
323 case '4':
324 case '5':
325 case '6':
326 case '7':
327 if (*state == havezero && *base == 0) {
328 *base = 8;
329 }
330 /* FALL THROUGH */
331 case '8':
332 case '9':
333 if (*state == begin ||
334 *state == havesign) {
335 if (*base == 0) {
336 *base = 10;
337 }
338 }
339 if (*state == begin ||
340 *state == havesign ||
341 *state == havezero ||
342 *state == haveprefix ||
343 *state == any) {
344 if (*base > c - '0') {
345 *state = any;
346 return 1;
347 }
348 }
349 break;
350 case 'b':
351 if (*state == havezero) {
352 if (*base == 0 || *base == 2) {
353 *state = haveprefix;
354 *base = 2;
355 return 1;
356 }
357 }
358 /* FALL THROUGH */
359 case 'a':
360 case 'c':
361 case 'd':
362 case 'e':
363 case 'f':
364 if (*state == begin ||
365 *state == havesign ||
366 *state == havezero ||
367 *state == haveprefix ||
368 *state == any) {
369 if (*base > c - 'a' + 10) {
370 *state = any;
371 return 1;
372 }
373 }
374 break;
375 case 'B':
376 if (*state == havezero) {
377 if (*base == 0 || *base == 2) {
378 *state = haveprefix;
379 *base = 2;
380 return 1;
381 }
382 }
383 /* FALL THROUGH */
384 case 'A':
385 case 'C':
386 case 'D':
387 case 'E':
388 case 'F':
389 if (*state == begin ||
390 *state == havesign ||
391 *state == havezero ||
392 *state == haveprefix ||
393 *state == any) {
394 if (*base > c - 'A' + 10) {
395 *state = any;
396 return 1;
397 }
398 }
399 break;
400 case 'x':
401 case 'X':
402 if (*state == havezero) {
403 if (*base == 0 || *base == 16) {
404 *state = haveprefix;
405 *base = 16;
406 return 1;
407 }
408 }
409 break;
410 }
411 return 0;
412 }
413
414 /*
415 * Read an integer, storing it in buf.
416 *
417 * Return 0 on a match failure, and the number of characters read
418 * otherwise.
419 */
420 static __inline int
parseint(FILE * fp,char * __restrict buf,int width,int base)421 parseint(FILE *fp, char * __restrict buf, int width, int base)
422 {
423 enum parseint_state state = begin;
424 char *p;
425 int c;
426
427 for (p = buf; width; width--) {
428 c = __sgetc(fp);
429 if (c == EOF)
430 break;
431 if (!parseint_fsm(c, &state, &base))
432 break;
433 *p++ = c;
434 }
435 /*
436 * If we only had a sign, push it back. If we only had a 0b or 0x
437 * prefix (possibly preceded by a sign), we view it as "0" and
438 * push back the letter. In all other cases, if we stopped
439 * because we read a non-number character, push it back.
440 */
441 if (state == havesign) {
442 p--;
443 (void) __ungetc(*(u_char *)p, fp);
444 } else if (state == haveprefix) {
445 p--;
446 (void) __ungetc(c, fp);
447 } else if (width && c != EOF) {
448 (void) __ungetc(c, fp);
449 }
450 return (p - buf);
451 }
452
453 /*
454 * __vfscanf - MT-safe version
455 */
456 int
__vfscanf(FILE * fp,char const * fmt0,va_list ap)457 __vfscanf(FILE *fp, char const *fmt0, va_list ap)
458 {
459 int ret;
460
461 FLOCKFILE_CANCELSAFE(fp);
462 ret = __svfscanf(fp, __get_locale(), fmt0, ap);
463 FUNLOCKFILE_CANCELSAFE();
464 return (ret);
465 }
466 int
vfscanf_l(FILE * fp,locale_t locale,char const * fmt0,va_list ap)467 vfscanf_l(FILE *fp, locale_t locale, char const *fmt0, va_list ap)
468 {
469 int ret;
470 FIX_LOCALE(locale);
471
472 FLOCKFILE_CANCELSAFE(fp);
473 ret = __svfscanf(fp, locale, fmt0, ap);
474 FUNLOCKFILE_CANCELSAFE();
475 return (ret);
476 }
477
478 /*
479 * __svfscanf - non-MT-safe version of __vfscanf
480 */
481 int
__svfscanf(FILE * fp,locale_t locale,const char * fmt0,va_list ap)482 __svfscanf(FILE *fp, locale_t locale, const char *fmt0, va_list ap)
483 {
484 #define GETARG(type) ((flags & SUPPRESS) ? SUPPRESS_PTR : va_arg(ap, type))
485 const u_char *fmt = (const u_char *)fmt0;
486 int c; /* character from format, or conversion */
487 size_t width; /* field width, or 0 */
488 int flags; /* flags as defined above */
489 int nassigned; /* number of fields assigned */
490 int nconversions; /* number of conversions */
491 int nr; /* characters read by the current conversion */
492 int nread; /* number of characters consumed from fp */
493 int base; /* base argument to conversion function */
494 char ccltab[256]; /* character class table for %[...] */
495 char buf[BUF]; /* buffer for numeric conversions */
496
497 ORIENT(fp, -1);
498
499 nassigned = 0;
500 nconversions = 0;
501 nread = 0;
502 for (;;) {
503 c = *fmt++;
504 if (c == 0)
505 return (nassigned);
506 if (isspace(c)) {
507 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
508 nread++, fp->_r--, fp->_p++;
509 continue;
510 }
511 if (c != '%')
512 goto literal;
513 width = 0;
514 flags = 0;
515 /*
516 * switch on the format. continue if done;
517 * break once format type is derived.
518 */
519 again: c = *fmt++;
520 switch (c) {
521 case '%':
522 literal:
523 if (fp->_r <= 0 && __srefill(fp))
524 goto input_failure;
525 if (*fp->_p != c)
526 goto match_failure;
527 fp->_r--, fp->_p++;
528 nread++;
529 continue;
530
531 case '*':
532 flags |= SUPPRESS;
533 goto again;
534 case 'j':
535 flags |= INTMAXT;
536 goto again;
537 case 'l':
538 if (flags & LONG) {
539 flags &= ~LONG;
540 flags |= LONGLONG;
541 } else
542 flags |= LONG;
543 goto again;
544 case 'q':
545 flags |= LONGLONG; /* not quite */
546 goto again;
547 case 't':
548 flags |= PTRDIFFT;
549 goto again;
550 case 'w':
551 /*
552 * Fixed-width integer types. On all platforms we
553 * support, int8_t is equivalent to char, int16_t
554 * is equivalent to short, int32_t is equivalent
555 * to int, int64_t is equivalent to long long int.
556 * Furthermore, int_fast8_t, int_fast16_t and
557 * int_fast32_t are equivalent to int, and
558 * int_fast64_t is equivalent to long long int.
559 */
560 flags &= ~(SHORTSHORT|SHORT|LONG|LONGLONG|SIZET|INTMAXT|PTRDIFFT);
561 if (fmt[0] == 'f') {
562 flags |= FASTINT;
563 fmt++;
564 } else {
565 flags &= ~FASTINT;
566 }
567 if (fmt[0] == '8') {
568 if (!(flags & FASTINT))
569 flags |= SHORTSHORT;
570 else
571 /* no flag set = 32 */ ;
572 fmt += 1;
573 } else if (fmt[0] == '1' && fmt[1] == '6') {
574 if (!(flags & FASTINT))
575 flags |= SHORT;
576 else
577 /* no flag set = 32 */ ;
578 fmt += 2;
579 } else if (fmt[0] == '3' && fmt[1] == '2') {
580 /* no flag set = 32 */ ;
581 fmt += 2;
582 } else if (fmt[0] == '6' && fmt[1] == '4') {
583 flags |= LONGLONG;
584 fmt += 2;
585 } else {
586 goto match_failure;
587 }
588 goto again;
589 case 'z':
590 flags |= SIZET;
591 goto again;
592 case 'L':
593 flags |= LONGDBL;
594 goto again;
595 case 'h':
596 if (flags & SHORT) {
597 flags &= ~SHORT;
598 flags |= SHORTSHORT;
599 } else
600 flags |= SHORT;
601 goto again;
602
603 case '0': case '1': case '2': case '3': case '4':
604 case '5': case '6': case '7': case '8': case '9':
605 width = width * 10 + c - '0';
606 goto again;
607
608 /*
609 * Conversions.
610 */
611 case 'B':
612 case 'b':
613 c = CT_INT;
614 flags |= UNSIGNED;
615 base = 2;
616 break;
617
618 case 'd':
619 c = CT_INT;
620 base = 10;
621 break;
622
623 case 'i':
624 c = CT_INT;
625 base = 0;
626 break;
627
628 case 'o':
629 c = CT_INT;
630 flags |= UNSIGNED;
631 base = 8;
632 break;
633
634 case 'u':
635 c = CT_INT;
636 flags |= UNSIGNED;
637 base = 10;
638 break;
639
640 case 'X':
641 case 'x':
642 c = CT_INT;
643 flags |= UNSIGNED;
644 base = 16;
645 break;
646
647 case 'A': case 'E': case 'F': case 'G':
648 case 'a': case 'e': case 'f': case 'g':
649 c = CT_FLOAT;
650 break;
651
652 case 'S':
653 flags |= LONG;
654 /* FALLTHROUGH */
655 case 's':
656 c = CT_STRING;
657 break;
658
659 case '[':
660 fmt = __sccl(ccltab, fmt);
661 flags |= NOSKIP;
662 c = CT_CCL;
663 break;
664
665 case 'C':
666 flags |= LONG;
667 /* FALLTHROUGH */
668 case 'c':
669 flags |= NOSKIP;
670 c = CT_CHAR;
671 break;
672
673 case 'p': /* pointer format is like hex */
674 flags |= POINTER;
675 c = CT_INT; /* assumes sizeof(uintmax_t) */
676 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */
677 base = 16;
678 break;
679
680 case 'n':
681 if (flags & SUPPRESS) /* ??? */
682 continue;
683 if (flags & SHORTSHORT)
684 *va_arg(ap, char *) = nread;
685 else if (flags & SHORT)
686 *va_arg(ap, short *) = nread;
687 else if (flags & LONG)
688 *va_arg(ap, long *) = nread;
689 else if (flags & LONGLONG)
690 *va_arg(ap, long long *) = nread;
691 else if (flags & INTMAXT)
692 *va_arg(ap, intmax_t *) = nread;
693 else if (flags & SIZET)
694 *va_arg(ap, size_t *) = nread;
695 else if (flags & PTRDIFFT)
696 *va_arg(ap, ptrdiff_t *) = nread;
697 else
698 *va_arg(ap, int *) = nread;
699 continue;
700
701 default:
702 goto match_failure;
703
704 /*
705 * Disgusting backwards compatibility hack. XXX
706 */
707 case '\0': /* compat */
708 return (EOF);
709 }
710
711 /*
712 * We have a conversion that requires input.
713 */
714 if (fp->_r <= 0 && __srefill(fp))
715 goto input_failure;
716
717 /*
718 * Consume leading white space, except for formats
719 * that suppress this.
720 */
721 if ((flags & NOSKIP) == 0) {
722 while (isspace(*fp->_p)) {
723 nread++;
724 if (--fp->_r > 0)
725 fp->_p++;
726 else if (__srefill(fp))
727 goto input_failure;
728 }
729 /*
730 * Note that there is at least one character in
731 * the buffer, so conversions that do not set NOSKIP
732 * ca no longer result in an input failure.
733 */
734 }
735
736 /*
737 * Do the conversion.
738 */
739 switch (c) {
740
741 case CT_CHAR:
742 /* scan arbitrary characters (sets NOSKIP) */
743 if (width == 0)
744 width = 1;
745 if (flags & LONG) {
746 nr = convert_wchar(fp, GETARG(wchar_t *),
747 width, locale);
748 } else {
749 nr = convert_char(fp, GETARG(char *), width);
750 }
751 if (nr < 0)
752 goto input_failure;
753 break;
754
755 case CT_CCL:
756 /* scan a (nonempty) character class (sets NOSKIP) */
757 if (width == 0)
758 width = (size_t)~0; /* `infinity' */
759 if (flags & LONG) {
760 nr = convert_wccl(fp, GETARG(wchar_t *), width,
761 ccltab, locale);
762 } else {
763 nr = convert_ccl(fp, GETARG(char *), width,
764 ccltab);
765 }
766 if (nr <= 0) {
767 if (nr < 0)
768 goto input_failure;
769 else /* nr == 0 */
770 goto match_failure;
771 }
772 break;
773
774 case CT_STRING:
775 /* like CCL, but zero-length string OK, & no NOSKIP */
776 if (width == 0)
777 width = (size_t)~0;
778 if (flags & LONG) {
779 nr = convert_wstring(fp, GETARG(wchar_t *),
780 width, locale);
781 } else {
782 nr = convert_string(fp, GETARG(char *), width);
783 }
784 if (nr < 0)
785 goto input_failure;
786 break;
787
788 case CT_INT:
789 /* scan an integer as if by the conversion function */
790 #ifdef hardway
791 if (width == 0 || width > sizeof(buf) - 1)
792 width = sizeof(buf) - 1;
793 #else
794 /* size_t is unsigned, hence this optimisation */
795 if (--width > sizeof(buf) - 2)
796 width = sizeof(buf) - 2;
797 width++;
798 #endif
799 nr = parseint(fp, buf, width, base);
800 if (nr == 0)
801 goto match_failure;
802 if ((flags & SUPPRESS) == 0) {
803 uintmax_t res;
804
805 buf[nr] = '\0';
806 if ((flags & UNSIGNED) == 0)
807 res = strtoimax_l(buf, (char **)NULL, base, locale);
808 else
809 res = strtoumax_l(buf, (char **)NULL, base, locale);
810 if (flags & POINTER)
811 *va_arg(ap, void **) =
812 (void *)(uintptr_t)res;
813 else if (flags & SHORTSHORT)
814 *va_arg(ap, char *) = res;
815 else if (flags & SHORT)
816 *va_arg(ap, short *) = res;
817 else if (flags & LONG)
818 *va_arg(ap, long *) = res;
819 else if (flags & LONGLONG)
820 *va_arg(ap, long long *) = res;
821 else if (flags & INTMAXT)
822 *va_arg(ap, intmax_t *) = res;
823 else if (flags & PTRDIFFT)
824 *va_arg(ap, ptrdiff_t *) = res;
825 else if (flags & SIZET)
826 *va_arg(ap, size_t *) = res;
827 else
828 *va_arg(ap, int *) = res;
829 }
830 break;
831
832 case CT_FLOAT:
833 /* scan a floating point number as if by strtod */
834 if (width == 0 || width > sizeof(buf) - 1)
835 width = sizeof(buf) - 1;
836 nr = parsefloat(fp, buf, buf + width, locale);
837 if (nr == 0)
838 goto match_failure;
839 if ((flags & SUPPRESS) == 0) {
840 if (flags & LONGDBL) {
841 long double res = strtold_l(buf, NULL,
842 locale);
843 *va_arg(ap, long double *) = res;
844 } else if (flags & LONG) {
845 double res = strtod_l(buf, NULL,
846 locale);
847 *va_arg(ap, double *) = res;
848 } else {
849 float res = strtof_l(buf, NULL, locale);
850 *va_arg(ap, float *) = res;
851 }
852 }
853 break;
854 }
855 if (!(flags & SUPPRESS))
856 nassigned++;
857 nread += nr;
858 nconversions++;
859 }
860 input_failure:
861 return (nconversions != 0 ? nassigned : EOF);
862 match_failure:
863 return (nassigned);
864 }
865
866 /*
867 * Fill in the given table from the scanset at the given format
868 * (just after `['). Return a pointer to the character past the
869 * closing `]'. The table has a 1 wherever characters should be
870 * considered part of the scanset.
871 */
872 static const u_char *
__sccl(char * tab,const u_char * fmt)873 __sccl(char *tab, const u_char *fmt)
874 {
875 int c, n, v, i;
876 struct xlocale_collate *table =
877 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
878
879 /* first `clear' the whole table */
880 c = *fmt++; /* first char hat => negated scanset */
881 if (c == '^') {
882 v = 1; /* default => accept */
883 c = *fmt++; /* get new first char */
884 } else
885 v = 0; /* default => reject */
886
887 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
888 (void) memset(tab, v, 256);
889
890 if (c == 0)
891 return (fmt - 1);/* format ended before closing ] */
892
893 /*
894 * Now set the entries corresponding to the actual scanset
895 * to the opposite of the above.
896 *
897 * The first character may be ']' (or '-') without being special;
898 * the last character may be '-'.
899 */
900 v = 1 - v;
901 for (;;) {
902 tab[c] = v; /* take character c */
903 doswitch:
904 n = *fmt++; /* and examine the next */
905 switch (n) {
906
907 case 0: /* format ended too soon */
908 return (fmt - 1);
909
910 case '-':
911 /*
912 * A scanset of the form
913 * [01+-]
914 * is defined as `the digit 0, the digit 1,
915 * the character +, the character -', but
916 * the effect of a scanset such as
917 * [a-zA-Z0-9]
918 * is implementation defined. The V7 Unix
919 * scanf treats `a-z' as `the letters a through
920 * z', but treats `a-a' as `the letter a, the
921 * character -, and the letter a'.
922 *
923 * For compatibility, the `-' is not considered
924 * to define a range if the character following
925 * it is either a close bracket (required by ANSI)
926 * or is not numerically greater than the character
927 * we just stored in the table (c).
928 */
929 n = *fmt;
930 if (n == ']'
931 || (table->__collate_load_error ? n < c :
932 __collate_range_cmp(n, c) < 0
933 )
934 ) {
935 c = '-';
936 break; /* resume the for(;;) */
937 }
938 fmt++;
939 /* fill in the range */
940 if (table->__collate_load_error) {
941 do {
942 tab[++c] = v;
943 } while (c < n);
944 } else {
945 for (i = 0; i < 256; i ++)
946 if (__collate_range_cmp(c, i) <= 0 &&
947 __collate_range_cmp(i, n) <= 0
948 )
949 tab[i] = v;
950 }
951 #if 1 /* XXX another disgusting compatibility hack */
952 c = n;
953 /*
954 * Alas, the V7 Unix scanf also treats formats
955 * such as [a-c-e] as `the letters a through e'.
956 * This too is permitted by the standard....
957 */
958 goto doswitch;
959 #else
960 c = *fmt++;
961 if (c == 0)
962 return (fmt - 1);
963 if (c == ']')
964 return (fmt);
965 #endif
966 break;
967
968 case ']': /* end of scanset */
969 return (fmt);
970
971 default: /* just another character */
972 c = n;
973 break;
974 }
975 }
976 /* NOTREACHED */
977 }
978
979 static int
parsefloat(FILE * fp,char * buf,char * end,locale_t locale)980 parsefloat(FILE *fp, char *buf, char *end, locale_t locale)
981 {
982 char *commit, *p;
983 int infnanpos = 0, decptpos = 0;
984 enum {
985 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
986 S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
987 } state = S_START;
988 unsigned char c;
989 const char *decpt = localeconv_l(locale)->decimal_point;
990 _Bool gotmantdig = 0, ishex = 0;
991
992 /*
993 * We set commit = p whenever the string we have read so far
994 * constitutes a valid representation of a floating point
995 * number by itself. At some point, the parse will complete
996 * or fail, and we will ungetc() back to the last commit point.
997 * To ensure that the file offset gets updated properly, it is
998 * always necessary to read at least one character that doesn't
999 * match; thus, we can't short-circuit "infinity" or "nan(...)".
1000 */
1001 commit = buf - 1;
1002 for (p = buf; p < end; ) {
1003 c = *fp->_p;
1004 reswitch:
1005 switch (state) {
1006 case S_START:
1007 state = S_GOTSIGN;
1008 if (c == '-' || c == '+')
1009 break;
1010 else
1011 goto reswitch;
1012 case S_GOTSIGN:
1013 switch (c) {
1014 case '0':
1015 state = S_MAYBEHEX;
1016 commit = p;
1017 break;
1018 case 'I':
1019 case 'i':
1020 state = S_INF;
1021 break;
1022 case 'N':
1023 case 'n':
1024 state = S_NAN;
1025 break;
1026 default:
1027 state = S_DIGITS;
1028 goto reswitch;
1029 }
1030 break;
1031 case S_INF:
1032 if (infnanpos > 6 ||
1033 (c != "nfinity"[infnanpos] &&
1034 c != "NFINITY"[infnanpos]))
1035 goto parsedone;
1036 if (infnanpos == 1 || infnanpos == 6)
1037 commit = p; /* inf or infinity */
1038 infnanpos++;
1039 break;
1040 case S_NAN:
1041 switch (infnanpos) {
1042 case 0:
1043 if (c != 'A' && c != 'a')
1044 goto parsedone;
1045 break;
1046 case 1:
1047 if (c != 'N' && c != 'n')
1048 goto parsedone;
1049 else
1050 commit = p;
1051 break;
1052 case 2:
1053 if (c != '(')
1054 goto parsedone;
1055 break;
1056 default:
1057 if (c == ')') {
1058 commit = p;
1059 state = S_DONE;
1060 } else if (!isalnum(c) && c != '_')
1061 goto parsedone;
1062 break;
1063 }
1064 infnanpos++;
1065 break;
1066 case S_DONE:
1067 goto parsedone;
1068 case S_MAYBEHEX:
1069 state = S_DIGITS;
1070 if (c == 'X' || c == 'x') {
1071 ishex = 1;
1072 break;
1073 } else { /* we saw a '0', but no 'x' */
1074 gotmantdig = 1;
1075 goto reswitch;
1076 }
1077 case S_DIGITS:
1078 if ((ishex && isxdigit(c)) || isdigit(c)) {
1079 gotmantdig = 1;
1080 commit = p;
1081 break;
1082 } else {
1083 state = S_DECPT;
1084 goto reswitch;
1085 }
1086 case S_DECPT:
1087 if (c == decpt[decptpos]) {
1088 if (decpt[++decptpos] == '\0') {
1089 /* We read the complete decpt seq. */
1090 state = S_FRAC;
1091 if (gotmantdig)
1092 commit = p;
1093 }
1094 break;
1095 } else if (!decptpos) {
1096 /* We didn't read any decpt characters. */
1097 state = S_FRAC;
1098 goto reswitch;
1099 } else {
1100 /*
1101 * We read part of a multibyte decimal point,
1102 * but the rest is invalid, so bail.
1103 */
1104 goto parsedone;
1105 }
1106 case S_FRAC:
1107 if (((c == 'E' || c == 'e') && !ishex) ||
1108 ((c == 'P' || c == 'p') && ishex)) {
1109 if (!gotmantdig)
1110 goto parsedone;
1111 else
1112 state = S_EXP;
1113 } else if ((ishex && isxdigit(c)) || isdigit(c)) {
1114 commit = p;
1115 gotmantdig = 1;
1116 } else
1117 goto parsedone;
1118 break;
1119 case S_EXP:
1120 state = S_EXPDIGITS;
1121 if (c == '-' || c == '+')
1122 break;
1123 else
1124 goto reswitch;
1125 case S_EXPDIGITS:
1126 if (isdigit(c))
1127 commit = p;
1128 else
1129 goto parsedone;
1130 break;
1131 default:
1132 abort();
1133 }
1134 *p++ = c;
1135 if (--fp->_r > 0)
1136 fp->_p++;
1137 else if (__srefill(fp))
1138 break; /* EOF */
1139 }
1140
1141 parsedone:
1142 while (commit < --p)
1143 __ungetc(*(u_char *)p, fp);
1144 *++commit = '\0';
1145 return (commit - buf);
1146 }
1147