printf-parse.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. /* Formatted output to strings.
  2. Copyright (C) 1999-2000, 2002-2003, 2006-2020 Free Software Foundation, Inc.
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 3, or (at your option)
  6. any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License along
  12. with this program; if not, see <https://www.gnu.org/licenses/>. */
  13. /* This file can be parametrized with the following macros:
  14. CHAR_T The element type of the format string.
  15. CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters
  16. in the format string are ASCII.
  17. DIRECTIVE Structure denoting a format directive.
  18. Depends on CHAR_T.
  19. DIRECTIVES Structure denoting the set of format directives of a
  20. format string. Depends on CHAR_T.
  21. PRINTF_PARSE Function that parses a format string.
  22. Depends on CHAR_T.
  23. STATIC Set to 'static' to declare the function static.
  24. ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */
  25. #ifndef PRINTF_PARSE
  26. # include <config.h>
  27. #endif
  28. /* Specification. */
  29. #ifndef PRINTF_PARSE
  30. # include "printf-parse.h"
  31. #endif
  32. /* Default parameters. */
  33. #ifndef PRINTF_PARSE
  34. # define PRINTF_PARSE printf_parse
  35. # define CHAR_T char
  36. # define DIRECTIVE char_directive
  37. # define DIRECTIVES char_directives
  38. #endif
  39. /* Get size_t, NULL. */
  40. #include <stddef.h>
  41. /* Get intmax_t. */
  42. #if defined IN_LIBINTL || defined IN_LIBASPRINTF
  43. # if HAVE_STDINT_H_WITH_UINTMAX
  44. # include <stdint.h>
  45. # endif
  46. # if HAVE_INTTYPES_H_WITH_UINTMAX
  47. # include <inttypes.h>
  48. # endif
  49. #else
  50. # include <stdint.h>
  51. #endif
  52. /* malloc(), realloc(), free(). */
  53. #include <stdlib.h>
  54. /* memcpy(). */
  55. #include <string.h>
  56. /* errno. */
  57. #include <errno.h>
  58. /* Checked size_t computations. */
  59. #include "xsize.h"
  60. #if CHAR_T_ONLY_ASCII
  61. /* c_isascii(). */
  62. # include "c-ctype.h"
  63. #endif
  64. #ifdef STATIC
  65. STATIC
  66. #endif
  67. int
  68. PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
  69. {
  70. const CHAR_T *cp = format; /* pointer into format */
  71. size_t arg_posn = 0; /* number of regular arguments consumed */
  72. size_t d_allocated; /* allocated elements of d->dir */
  73. size_t a_allocated; /* allocated elements of a->arg */
  74. size_t max_width_length = 0;
  75. size_t max_precision_length = 0;
  76. d->count = 0;
  77. d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
  78. d->dir = d->direct_alloc_dir;
  79. a->count = 0;
  80. a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
  81. a->arg = a->direct_alloc_arg;
  82. #define REGISTER_ARG(_index_,_type_) \
  83. { \
  84. size_t n = (_index_); \
  85. if (n >= a_allocated) \
  86. { \
  87. size_t memory_size; \
  88. argument *memory; \
  89. \
  90. a_allocated = xtimes (a_allocated, 2); \
  91. if (a_allocated <= n) \
  92. a_allocated = xsum (n, 1); \
  93. memory_size = xtimes (a_allocated, sizeof (argument)); \
  94. if (size_overflow_p (memory_size)) \
  95. /* Overflow, would lead to out of memory. */ \
  96. goto out_of_memory; \
  97. memory = (argument *) (a->arg != a->direct_alloc_arg \
  98. ? realloc (a->arg, memory_size) \
  99. : malloc (memory_size)); \
  100. if (memory == NULL) \
  101. /* Out of memory. */ \
  102. goto out_of_memory; \
  103. if (a->arg == a->direct_alloc_arg) \
  104. memcpy (memory, a->arg, a->count * sizeof (argument)); \
  105. a->arg = memory; \
  106. } \
  107. while (a->count <= n) \
  108. a->arg[a->count++].type = TYPE_NONE; \
  109. if (a->arg[n].type == TYPE_NONE) \
  110. a->arg[n].type = (_type_); \
  111. else if (a->arg[n].type != (_type_)) \
  112. /* Ambiguous type for positional argument. */ \
  113. goto error; \
  114. }
  115. while (*cp != '\0')
  116. {
  117. CHAR_T c = *cp++;
  118. if (c == '%')
  119. {
  120. size_t arg_index = ARG_NONE;
  121. DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
  122. /* Initialize the next directive. */
  123. dp->dir_start = cp - 1;
  124. dp->flags = 0;
  125. dp->width_start = NULL;
  126. dp->width_end = NULL;
  127. dp->width_arg_index = ARG_NONE;
  128. dp->precision_start = NULL;
  129. dp->precision_end = NULL;
  130. dp->precision_arg_index = ARG_NONE;
  131. dp->arg_index = ARG_NONE;
  132. /* Test for positional argument. */
  133. if (*cp >= '0' && *cp <= '9')
  134. {
  135. const CHAR_T *np;
  136. for (np = cp; *np >= '0' && *np <= '9'; np++)
  137. ;
  138. if (*np == '$')
  139. {
  140. size_t n = 0;
  141. for (np = cp; *np >= '0' && *np <= '9'; np++)
  142. n = xsum (xtimes (n, 10), *np - '0');
  143. if (n == 0)
  144. /* Positional argument 0. */
  145. goto error;
  146. if (size_overflow_p (n))
  147. /* n too large, would lead to out of memory later. */
  148. goto error;
  149. arg_index = n - 1;
  150. cp = np + 1;
  151. }
  152. }
  153. /* Read the flags. */
  154. for (;;)
  155. {
  156. if (*cp == '\'')
  157. {
  158. dp->flags |= FLAG_GROUP;
  159. cp++;
  160. }
  161. else if (*cp == '-')
  162. {
  163. dp->flags |= FLAG_LEFT;
  164. cp++;
  165. }
  166. else if (*cp == '+')
  167. {
  168. dp->flags |= FLAG_SHOWSIGN;
  169. cp++;
  170. }
  171. else if (*cp == ' ')
  172. {
  173. dp->flags |= FLAG_SPACE;
  174. cp++;
  175. }
  176. else if (*cp == '#')
  177. {
  178. dp->flags |= FLAG_ALT;
  179. cp++;
  180. }
  181. else if (*cp == '0')
  182. {
  183. dp->flags |= FLAG_ZERO;
  184. cp++;
  185. }
  186. #if __GLIBC__ >= 2 && !defined __UCLIBC__
  187. else if (*cp == 'I')
  188. {
  189. dp->flags |= FLAG_LOCALIZED;
  190. cp++;
  191. }
  192. #endif
  193. else
  194. break;
  195. }
  196. /* Parse the field width. */
  197. if (*cp == '*')
  198. {
  199. dp->width_start = cp;
  200. cp++;
  201. dp->width_end = cp;
  202. if (max_width_length < 1)
  203. max_width_length = 1;
  204. /* Test for positional argument. */
  205. if (*cp >= '0' && *cp <= '9')
  206. {
  207. const CHAR_T *np;
  208. for (np = cp; *np >= '0' && *np <= '9'; np++)
  209. ;
  210. if (*np == '$')
  211. {
  212. size_t n = 0;
  213. for (np = cp; *np >= '0' && *np <= '9'; np++)
  214. n = xsum (xtimes (n, 10), *np - '0');
  215. if (n == 0)
  216. /* Positional argument 0. */
  217. goto error;
  218. if (size_overflow_p (n))
  219. /* n too large, would lead to out of memory later. */
  220. goto error;
  221. dp->width_arg_index = n - 1;
  222. cp = np + 1;
  223. }
  224. }
  225. if (dp->width_arg_index == ARG_NONE)
  226. {
  227. dp->width_arg_index = arg_posn++;
  228. if (dp->width_arg_index == ARG_NONE)
  229. /* arg_posn wrapped around. */
  230. goto error;
  231. }
  232. REGISTER_ARG (dp->width_arg_index, TYPE_INT);
  233. }
  234. else if (*cp >= '0' && *cp <= '9')
  235. {
  236. size_t width_length;
  237. dp->width_start = cp;
  238. for (; *cp >= '0' && *cp <= '9'; cp++)
  239. ;
  240. dp->width_end = cp;
  241. width_length = dp->width_end - dp->width_start;
  242. if (max_width_length < width_length)
  243. max_width_length = width_length;
  244. }
  245. /* Parse the precision. */
  246. if (*cp == '.')
  247. {
  248. cp++;
  249. if (*cp == '*')
  250. {
  251. dp->precision_start = cp - 1;
  252. cp++;
  253. dp->precision_end = cp;
  254. if (max_precision_length < 2)
  255. max_precision_length = 2;
  256. /* Test for positional argument. */
  257. if (*cp >= '0' && *cp <= '9')
  258. {
  259. const CHAR_T *np;
  260. for (np = cp; *np >= '0' && *np <= '9'; np++)
  261. ;
  262. if (*np == '$')
  263. {
  264. size_t n = 0;
  265. for (np = cp; *np >= '0' && *np <= '9'; np++)
  266. n = xsum (xtimes (n, 10), *np - '0');
  267. if (n == 0)
  268. /* Positional argument 0. */
  269. goto error;
  270. if (size_overflow_p (n))
  271. /* n too large, would lead to out of memory
  272. later. */
  273. goto error;
  274. dp->precision_arg_index = n - 1;
  275. cp = np + 1;
  276. }
  277. }
  278. if (dp->precision_arg_index == ARG_NONE)
  279. {
  280. dp->precision_arg_index = arg_posn++;
  281. if (dp->precision_arg_index == ARG_NONE)
  282. /* arg_posn wrapped around. */
  283. goto error;
  284. }
  285. REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
  286. }
  287. else
  288. {
  289. size_t precision_length;
  290. dp->precision_start = cp - 1;
  291. for (; *cp >= '0' && *cp <= '9'; cp++)
  292. ;
  293. dp->precision_end = cp;
  294. precision_length = dp->precision_end - dp->precision_start;
  295. if (max_precision_length < precision_length)
  296. max_precision_length = precision_length;
  297. }
  298. }
  299. {
  300. arg_type type;
  301. /* Parse argument type/size specifiers. */
  302. {
  303. int flags = 0;
  304. for (;;)
  305. {
  306. if (*cp == 'h')
  307. {
  308. flags |= (1 << (flags & 1));
  309. cp++;
  310. }
  311. else if (*cp == 'L')
  312. {
  313. flags |= 4;
  314. cp++;
  315. }
  316. else if (*cp == 'l')
  317. {
  318. flags += 8;
  319. cp++;
  320. }
  321. else if (*cp == 'j')
  322. {
  323. if (sizeof (intmax_t) > sizeof (long))
  324. {
  325. /* intmax_t = long long */
  326. flags += 16;
  327. }
  328. else if (sizeof (intmax_t) > sizeof (int))
  329. {
  330. /* intmax_t = long */
  331. flags += 8;
  332. }
  333. cp++;
  334. }
  335. else if (*cp == 'z' || *cp == 'Z')
  336. {
  337. /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
  338. because the warning facility in gcc-2.95.2 understands
  339. only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
  340. if (sizeof (size_t) > sizeof (long))
  341. {
  342. /* size_t = long long */
  343. flags += 16;
  344. }
  345. else if (sizeof (size_t) > sizeof (int))
  346. {
  347. /* size_t = long */
  348. flags += 8;
  349. }
  350. cp++;
  351. }
  352. else if (*cp == 't')
  353. {
  354. if (sizeof (ptrdiff_t) > sizeof (long))
  355. {
  356. /* ptrdiff_t = long long */
  357. flags += 16;
  358. }
  359. else if (sizeof (ptrdiff_t) > sizeof (int))
  360. {
  361. /* ptrdiff_t = long */
  362. flags += 8;
  363. }
  364. cp++;
  365. }
  366. #if defined __APPLE__ && defined __MACH__
  367. /* On Mac OS X 10.3, PRIdMAX is defined as "qd".
  368. We cannot change it to "lld" because PRIdMAX must also
  369. be understood by the system's printf routines. */
  370. else if (*cp == 'q')
  371. {
  372. if (64 / 8 > sizeof (long))
  373. {
  374. /* int64_t = long long */
  375. flags += 16;
  376. }
  377. else
  378. {
  379. /* int64_t = long */
  380. flags += 8;
  381. }
  382. cp++;
  383. }
  384. #endif
  385. #if defined _WIN32 && ! defined __CYGWIN__
  386. /* On native Windows, PRIdMAX is defined as "I64d".
  387. We cannot change it to "lld" because PRIdMAX must also
  388. be understood by the system's printf routines. */
  389. else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
  390. {
  391. if (64 / 8 > sizeof (long))
  392. {
  393. /* __int64 = long long */
  394. flags += 16;
  395. }
  396. else
  397. {
  398. /* __int64 = long */
  399. flags += 8;
  400. }
  401. cp += 3;
  402. }
  403. #endif
  404. else
  405. break;
  406. }
  407. /* Read the conversion character. */
  408. c = *cp++;
  409. switch (c)
  410. {
  411. case 'd': case 'i':
  412. /* If 'long long' is larger than 'long': */
  413. if (flags >= 16 || (flags & 4))
  414. type = TYPE_LONGLONGINT;
  415. else
  416. /* If 'long long' is the same as 'long', we parse "lld" into
  417. TYPE_LONGINT. */
  418. if (flags >= 8)
  419. type = TYPE_LONGINT;
  420. else if (flags & 2)
  421. type = TYPE_SCHAR;
  422. else if (flags & 1)
  423. type = TYPE_SHORT;
  424. else
  425. type = TYPE_INT;
  426. break;
  427. case 'o': case 'u': case 'x': case 'X':
  428. /* If 'unsigned long long' is larger than 'unsigned long': */
  429. if (flags >= 16 || (flags & 4))
  430. type = TYPE_ULONGLONGINT;
  431. else
  432. /* If 'unsigned long long' is the same as 'unsigned long', we
  433. parse "llu" into TYPE_ULONGINT. */
  434. if (flags >= 8)
  435. type = TYPE_ULONGINT;
  436. else if (flags & 2)
  437. type = TYPE_UCHAR;
  438. else if (flags & 1)
  439. type = TYPE_USHORT;
  440. else
  441. type = TYPE_UINT;
  442. break;
  443. case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
  444. case 'a': case 'A':
  445. if (flags >= 16 || (flags & 4))
  446. type = TYPE_LONGDOUBLE;
  447. else
  448. type = TYPE_DOUBLE;
  449. break;
  450. case 'c':
  451. if (flags >= 8)
  452. #if HAVE_WINT_T
  453. type = TYPE_WIDE_CHAR;
  454. #else
  455. goto error;
  456. #endif
  457. else
  458. type = TYPE_CHAR;
  459. break;
  460. #if HAVE_WINT_T
  461. case 'C':
  462. type = TYPE_WIDE_CHAR;
  463. c = 'c';
  464. break;
  465. #endif
  466. case 's':
  467. if (flags >= 8)
  468. #if HAVE_WCHAR_T
  469. type = TYPE_WIDE_STRING;
  470. #else
  471. goto error;
  472. #endif
  473. else
  474. type = TYPE_STRING;
  475. break;
  476. #if HAVE_WCHAR_T
  477. case 'S':
  478. type = TYPE_WIDE_STRING;
  479. c = 's';
  480. break;
  481. #endif
  482. case 'p':
  483. type = TYPE_POINTER;
  484. break;
  485. case 'n':
  486. /* If 'long long' is larger than 'long': */
  487. if (flags >= 16 || (flags & 4))
  488. type = TYPE_COUNT_LONGLONGINT_POINTER;
  489. else
  490. /* If 'long long' is the same as 'long', we parse "lln" into
  491. TYPE_COUNT_LONGINT_POINTER. */
  492. if (flags >= 8)
  493. type = TYPE_COUNT_LONGINT_POINTER;
  494. else if (flags & 2)
  495. type = TYPE_COUNT_SCHAR_POINTER;
  496. else if (flags & 1)
  497. type = TYPE_COUNT_SHORT_POINTER;
  498. else
  499. type = TYPE_COUNT_INT_POINTER;
  500. break;
  501. #if ENABLE_UNISTDIO
  502. /* The unistdio extensions. */
  503. case 'U':
  504. if (flags >= 16)
  505. type = TYPE_U32_STRING;
  506. else if (flags >= 8)
  507. type = TYPE_U16_STRING;
  508. else
  509. type = TYPE_U8_STRING;
  510. break;
  511. #endif
  512. case '%':
  513. type = TYPE_NONE;
  514. break;
  515. default:
  516. /* Unknown conversion character. */
  517. goto error;
  518. }
  519. }
  520. if (type != TYPE_NONE)
  521. {
  522. dp->arg_index = arg_index;
  523. if (dp->arg_index == ARG_NONE)
  524. {
  525. dp->arg_index = arg_posn++;
  526. if (dp->arg_index == ARG_NONE)
  527. /* arg_posn wrapped around. */
  528. goto error;
  529. }
  530. REGISTER_ARG (dp->arg_index, type);
  531. }
  532. dp->conversion = c;
  533. dp->dir_end = cp;
  534. }
  535. d->count++;
  536. if (d->count >= d_allocated)
  537. {
  538. size_t memory_size;
  539. DIRECTIVE *memory;
  540. d_allocated = xtimes (d_allocated, 2);
  541. memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
  542. if (size_overflow_p (memory_size))
  543. /* Overflow, would lead to out of memory. */
  544. goto out_of_memory;
  545. memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
  546. ? realloc (d->dir, memory_size)
  547. : malloc (memory_size));
  548. if (memory == NULL)
  549. /* Out of memory. */
  550. goto out_of_memory;
  551. if (d->dir == d->direct_alloc_dir)
  552. memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
  553. d->dir = memory;
  554. }
  555. }
  556. #if CHAR_T_ONLY_ASCII
  557. else if (!c_isascii (c))
  558. {
  559. /* Non-ASCII character. Not supported. */
  560. goto error;
  561. }
  562. #endif
  563. }
  564. d->dir[d->count].dir_start = cp;
  565. d->max_width_length = max_width_length;
  566. d->max_precision_length = max_precision_length;
  567. return 0;
  568. error:
  569. if (a->arg != a->direct_alloc_arg)
  570. free (a->arg);
  571. if (d->dir != d->direct_alloc_dir)
  572. free (d->dir);
  573. errno = EINVAL;
  574. return -1;
  575. out_of_memory:
  576. if (a->arg != a->direct_alloc_arg)
  577. free (a->arg);
  578. if (d->dir != d->direct_alloc_dir)
  579. free (d->dir);
  580. errno = ENOMEM;
  581. return -1;
  582. }
  583. #undef PRINTF_PARSE
  584. #undef DIRECTIVES
  585. #undef DIRECTIVE
  586. #undef CHAR_T_ONLY_ASCII
  587. #undef CHAR_T