sfparse.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146
  1. /*
  2. * sfparse
  3. *
  4. * Copyright (c) 2023 sfparse contributors
  5. * Copyright (c) 2019 nghttp3 contributors
  6. * Copyright (c) 2015 nghttp2 contributors
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining
  9. * a copy of this software and associated documentation files (the
  10. * "Software"), to deal in the Software without restriction, including
  11. * without limitation the rights to use, copy, modify, merge, publish,
  12. * distribute, sublicense, and/or sell copies of the Software, and to
  13. * permit persons to whom the Software is furnished to do so, subject to
  14. * the following conditions:
  15. *
  16. * The above copyright notice and this permission notice shall be
  17. * included in all copies or substantial portions of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  22. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  23. * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24. * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26. */
  27. #include "sfparse.h"
  28. #include <string.h>
  29. #include <assert.h>
  30. #include <stdlib.h>
  31. #define SF_STATE_DICT 0x08u
  32. #define SF_STATE_LIST 0x10u
  33. #define SF_STATE_ITEM 0x18u
  34. #define SF_STATE_INNER_LIST 0x04u
  35. #define SF_STATE_BEFORE 0x00u
  36. #define SF_STATE_BEFORE_PARAMS 0x01u
  37. #define SF_STATE_PARAMS 0x02u
  38. #define SF_STATE_AFTER 0x03u
  39. #define SF_STATE_OP_MASK 0x03u
  40. #define SF_SET_STATE_AFTER(NAME) (SF_STATE_##NAME | SF_STATE_AFTER)
  41. #define SF_SET_STATE_BEFORE_PARAMS(NAME) \
  42. (SF_STATE_##NAME | SF_STATE_BEFORE_PARAMS)
  43. #define SF_SET_STATE_INNER_LIST_BEFORE(NAME) \
  44. (SF_STATE_##NAME | SF_STATE_INNER_LIST | SF_STATE_BEFORE)
  45. #define SF_STATE_DICT_AFTER SF_SET_STATE_AFTER(DICT)
  46. #define SF_STATE_DICT_BEFORE_PARAMS SF_SET_STATE_BEFORE_PARAMS(DICT)
  47. #define SF_STATE_DICT_INNER_LIST_BEFORE SF_SET_STATE_INNER_LIST_BEFORE(DICT)
  48. #define SF_STATE_LIST_AFTER SF_SET_STATE_AFTER(LIST)
  49. #define SF_STATE_LIST_BEFORE_PARAMS SF_SET_STATE_BEFORE_PARAMS(LIST)
  50. #define SF_STATE_LIST_INNER_LIST_BEFORE SF_SET_STATE_INNER_LIST_BEFORE(LIST)
  51. #define SF_STATE_ITEM_AFTER SF_SET_STATE_AFTER(ITEM)
  52. #define SF_STATE_ITEM_BEFORE_PARAMS SF_SET_STATE_BEFORE_PARAMS(ITEM)
  53. #define SF_STATE_ITEM_INNER_LIST_BEFORE SF_SET_STATE_INNER_LIST_BEFORE(ITEM)
  54. #define SF_STATE_INITIAL 0x00u
  55. #define DIGIT_CASES \
  56. case '0': \
  57. case '1': \
  58. case '2': \
  59. case '3': \
  60. case '4': \
  61. case '5': \
  62. case '6': \
  63. case '7': \
  64. case '8': \
  65. case '9'
  66. #define LCALPHA_CASES \
  67. case 'a': \
  68. case 'b': \
  69. case 'c': \
  70. case 'd': \
  71. case 'e': \
  72. case 'f': \
  73. case 'g': \
  74. case 'h': \
  75. case 'i': \
  76. case 'j': \
  77. case 'k': \
  78. case 'l': \
  79. case 'm': \
  80. case 'n': \
  81. case 'o': \
  82. case 'p': \
  83. case 'q': \
  84. case 'r': \
  85. case 's': \
  86. case 't': \
  87. case 'u': \
  88. case 'v': \
  89. case 'w': \
  90. case 'x': \
  91. case 'y': \
  92. case 'z'
  93. #define UCALPHA_CASES \
  94. case 'A': \
  95. case 'B': \
  96. case 'C': \
  97. case 'D': \
  98. case 'E': \
  99. case 'F': \
  100. case 'G': \
  101. case 'H': \
  102. case 'I': \
  103. case 'J': \
  104. case 'K': \
  105. case 'L': \
  106. case 'M': \
  107. case 'N': \
  108. case 'O': \
  109. case 'P': \
  110. case 'Q': \
  111. case 'R': \
  112. case 'S': \
  113. case 'T': \
  114. case 'U': \
  115. case 'V': \
  116. case 'W': \
  117. case 'X': \
  118. case 'Y': \
  119. case 'Z'
  120. #define ALPHA_CASES \
  121. UCALPHA_CASES: \
  122. LCALPHA_CASES
  123. #define X20_21_CASES \
  124. case ' ': \
  125. case '!'
  126. #define X23_5B_CASES \
  127. case '#': \
  128. case '$': \
  129. case '%': \
  130. case '&': \
  131. case '\'': \
  132. case '(': \
  133. case ')': \
  134. case '*': \
  135. case '+': \
  136. case ',': \
  137. case '-': \
  138. case '.': \
  139. case '/': \
  140. DIGIT_CASES: \
  141. case ':': \
  142. case ';': \
  143. case '<': \
  144. case '=': \
  145. case '>': \
  146. case '?': \
  147. case '@': \
  148. UCALPHA_CASES: \
  149. case '['
  150. #define X5D_7E_CASES \
  151. case ']': \
  152. case '^': \
  153. case '_': \
  154. case '`': \
  155. LCALPHA_CASES: \
  156. case '{': \
  157. case '|': \
  158. case '}': \
  159. case '~'
  160. static int is_ws(uint8_t c) {
  161. switch (c) {
  162. case ' ':
  163. case '\t':
  164. return 1;
  165. default:
  166. return 0;
  167. }
  168. }
  169. static int parser_eof(sf_parser *sfp) { return sfp->pos == sfp->end; }
  170. static void parser_discard_ows(sf_parser *sfp) {
  171. for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos)
  172. ;
  173. }
  174. static void parser_discard_sp(sf_parser *sfp) {
  175. for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos)
  176. ;
  177. }
  178. static void parser_set_op_state(sf_parser *sfp, uint32_t op) {
  179. sfp->state &= ~SF_STATE_OP_MASK;
  180. sfp->state |= op;
  181. }
  182. static void parser_unset_inner_list_state(sf_parser *sfp) {
  183. sfp->state &= ~SF_STATE_INNER_LIST;
  184. }
  185. static int parser_key(sf_parser *sfp, sf_vec *dest) {
  186. const uint8_t *base;
  187. switch (*sfp->pos) {
  188. case '*':
  189. LCALPHA_CASES:
  190. break;
  191. default:
  192. return SF_ERR_PARSE_ERROR;
  193. }
  194. base = sfp->pos++;
  195. for (; !parser_eof(sfp); ++sfp->pos) {
  196. switch (*sfp->pos) {
  197. case '_':
  198. case '-':
  199. case '.':
  200. case '*':
  201. DIGIT_CASES:
  202. LCALPHA_CASES:
  203. continue;
  204. }
  205. break;
  206. }
  207. if (dest) {
  208. dest->base = (uint8_t *)base;
  209. dest->len = (size_t)(sfp->pos - dest->base);
  210. }
  211. return 0;
  212. }
  213. static int parser_number(sf_parser *sfp, sf_value *dest) {
  214. int sign = 1;
  215. int64_t value = 0;
  216. size_t len = 0;
  217. size_t fpos = 0;
  218. if (*sfp->pos == '-') {
  219. ++sfp->pos;
  220. if (parser_eof(sfp)) {
  221. return SF_ERR_PARSE_ERROR;
  222. }
  223. sign = -1;
  224. }
  225. assert(!parser_eof(sfp));
  226. for (; !parser_eof(sfp); ++sfp->pos) {
  227. switch (*sfp->pos) {
  228. DIGIT_CASES:
  229. if (++len > 15) {
  230. return SF_ERR_PARSE_ERROR;
  231. }
  232. value *= 10;
  233. value += *sfp->pos - '0';
  234. continue;
  235. }
  236. break;
  237. }
  238. if (len == 0) {
  239. return SF_ERR_PARSE_ERROR;
  240. }
  241. if (parser_eof(sfp) || *sfp->pos != '.') {
  242. if (dest) {
  243. dest->type = SF_TYPE_INTEGER;
  244. dest->flags = SF_VALUE_FLAG_NONE;
  245. dest->integer = value * sign;
  246. }
  247. return 0;
  248. }
  249. /* decimal */
  250. if (len > 12) {
  251. return SF_ERR_PARSE_ERROR;
  252. }
  253. fpos = len;
  254. ++sfp->pos;
  255. for (; !parser_eof(sfp); ++sfp->pos) {
  256. switch (*sfp->pos) {
  257. DIGIT_CASES:
  258. if (++len > 15) {
  259. return SF_ERR_PARSE_ERROR;
  260. }
  261. value *= 10;
  262. value += *sfp->pos - '0';
  263. continue;
  264. }
  265. break;
  266. }
  267. if (fpos == len || len - fpos > 3) {
  268. return SF_ERR_PARSE_ERROR;
  269. }
  270. if (dest) {
  271. dest->type = SF_TYPE_DECIMAL;
  272. dest->flags = SF_VALUE_FLAG_NONE;
  273. dest->decimal.numer = value * sign;
  274. switch (len - fpos) {
  275. case 1:
  276. dest->decimal.denom = 10;
  277. break;
  278. case 2:
  279. dest->decimal.denom = 100;
  280. break;
  281. case 3:
  282. dest->decimal.denom = 1000;
  283. break;
  284. }
  285. }
  286. return 0;
  287. }
  288. static int parser_date(sf_parser *sfp, sf_value *dest) {
  289. int rv;
  290. sf_value val;
  291. /* The first byte has already been validated by the caller. */
  292. assert('@' == *sfp->pos);
  293. ++sfp->pos;
  294. if (parser_eof(sfp)) {
  295. return SF_ERR_PARSE_ERROR;
  296. }
  297. rv = parser_number(sfp, &val);
  298. if (rv != 0) {
  299. return rv;
  300. }
  301. if (val.type != SF_TYPE_INTEGER) {
  302. return SF_ERR_PARSE_ERROR;
  303. }
  304. if (dest) {
  305. *dest = val;
  306. dest->type = SF_TYPE_DATE;
  307. }
  308. return 0;
  309. }
  310. static int parser_string(sf_parser *sfp, sf_value *dest) {
  311. const uint8_t *base;
  312. uint32_t flags = SF_VALUE_FLAG_NONE;
  313. /* The first byte has already been validated by the caller. */
  314. assert('"' == *sfp->pos);
  315. base = ++sfp->pos;
  316. for (; !parser_eof(sfp); ++sfp->pos) {
  317. switch (*sfp->pos) {
  318. X20_21_CASES:
  319. X23_5B_CASES:
  320. X5D_7E_CASES:
  321. break;
  322. case '\\':
  323. ++sfp->pos;
  324. if (parser_eof(sfp)) {
  325. return SF_ERR_PARSE_ERROR;
  326. }
  327. switch (*sfp->pos) {
  328. case '"':
  329. case '\\':
  330. flags = SF_VALUE_FLAG_ESCAPED_STRING;
  331. break;
  332. default:
  333. return SF_ERR_PARSE_ERROR;
  334. }
  335. break;
  336. case '"':
  337. if (dest) {
  338. dest->type = SF_TYPE_STRING;
  339. dest->flags = flags;
  340. dest->vec.len = (size_t)(sfp->pos - base);
  341. dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
  342. }
  343. ++sfp->pos;
  344. return 0;
  345. default:
  346. return SF_ERR_PARSE_ERROR;
  347. }
  348. }
  349. return SF_ERR_PARSE_ERROR;
  350. }
  351. static int parser_token(sf_parser *sfp, sf_value *dest) {
  352. const uint8_t *base;
  353. /* The first byte has already been validated by the caller. */
  354. base = sfp->pos++;
  355. for (; !parser_eof(sfp); ++sfp->pos) {
  356. switch (*sfp->pos) {
  357. case '!':
  358. case '#':
  359. case '$':
  360. case '%':
  361. case '&':
  362. case '\'':
  363. case '*':
  364. case '+':
  365. case '-':
  366. case '.':
  367. case '^':
  368. case '_':
  369. case '`':
  370. case '|':
  371. case '~':
  372. case ':':
  373. case '/':
  374. DIGIT_CASES:
  375. ALPHA_CASES:
  376. continue;
  377. }
  378. break;
  379. }
  380. if (dest) {
  381. dest->type = SF_TYPE_TOKEN;
  382. dest->flags = SF_VALUE_FLAG_NONE;
  383. dest->vec.base = (uint8_t *)base;
  384. dest->vec.len = (size_t)(sfp->pos - base);
  385. }
  386. return 0;
  387. }
  388. static int parser_byteseq(sf_parser *sfp, sf_value *dest) {
  389. const uint8_t *base;
  390. /* The first byte has already been validated by the caller. */
  391. assert(':' == *sfp->pos);
  392. base = ++sfp->pos;
  393. for (; !parser_eof(sfp); ++sfp->pos) {
  394. switch (*sfp->pos) {
  395. case '+':
  396. case '/':
  397. DIGIT_CASES:
  398. ALPHA_CASES:
  399. continue;
  400. case '=':
  401. switch ((sfp->pos - base) & 0x3) {
  402. case 0:
  403. case 1:
  404. return SF_ERR_PARSE_ERROR;
  405. case 2:
  406. switch (*(sfp->pos - 1)) {
  407. case 'A':
  408. case 'Q':
  409. case 'g':
  410. case 'w':
  411. break;
  412. default:
  413. return SF_ERR_PARSE_ERROR;
  414. }
  415. ++sfp->pos;
  416. if (parser_eof(sfp) || *sfp->pos != '=') {
  417. return SF_ERR_PARSE_ERROR;
  418. }
  419. break;
  420. case 3:
  421. switch (*(sfp->pos - 1)) {
  422. case 'A':
  423. case 'E':
  424. case 'I':
  425. case 'M':
  426. case 'Q':
  427. case 'U':
  428. case 'Y':
  429. case 'c':
  430. case 'g':
  431. case 'k':
  432. case 'o':
  433. case 's':
  434. case 'w':
  435. case '0':
  436. case '4':
  437. case '8':
  438. break;
  439. default:
  440. return SF_ERR_PARSE_ERROR;
  441. }
  442. break;
  443. }
  444. ++sfp->pos;
  445. if (parser_eof(sfp) || *sfp->pos != ':') {
  446. return SF_ERR_PARSE_ERROR;
  447. }
  448. goto fin;
  449. case ':':
  450. if ((sfp->pos - base) & 0x3) {
  451. return SF_ERR_PARSE_ERROR;
  452. }
  453. goto fin;
  454. default:
  455. return SF_ERR_PARSE_ERROR;
  456. }
  457. }
  458. return SF_ERR_PARSE_ERROR;
  459. fin:
  460. if (dest) {
  461. dest->type = SF_TYPE_BYTESEQ;
  462. dest->flags = SF_VALUE_FLAG_NONE;
  463. dest->vec.len = (size_t)(sfp->pos - base);
  464. dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
  465. }
  466. ++sfp->pos;
  467. return 0;
  468. }
  469. static int parser_boolean(sf_parser *sfp, sf_value *dest) {
  470. int b;
  471. /* The first byte has already been validated by the caller. */
  472. assert('?' == *sfp->pos);
  473. ++sfp->pos;
  474. if (parser_eof(sfp)) {
  475. return SF_ERR_PARSE_ERROR;
  476. }
  477. switch (*sfp->pos) {
  478. case '0':
  479. b = 0;
  480. break;
  481. case '1':
  482. b = 1;
  483. break;
  484. default:
  485. return SF_ERR_PARSE_ERROR;
  486. }
  487. ++sfp->pos;
  488. if (dest) {
  489. dest->type = SF_TYPE_BOOLEAN;
  490. dest->flags = SF_VALUE_FLAG_NONE;
  491. dest->boolean = b;
  492. }
  493. return 0;
  494. }
  495. static int parser_bare_item(sf_parser *sfp, sf_value *dest) {
  496. switch (*sfp->pos) {
  497. case '"':
  498. return parser_string(sfp, dest);
  499. case '-':
  500. DIGIT_CASES:
  501. return parser_number(sfp, dest);
  502. case '@':
  503. return parser_date(sfp, dest);
  504. case ':':
  505. return parser_byteseq(sfp, dest);
  506. case '?':
  507. return parser_boolean(sfp, dest);
  508. case '*':
  509. ALPHA_CASES:
  510. return parser_token(sfp, dest);
  511. default:
  512. return SF_ERR_PARSE_ERROR;
  513. }
  514. }
  515. static int parser_skip_inner_list(sf_parser *sfp);
  516. int sf_parser_param(sf_parser *sfp, sf_vec *dest_key, sf_value *dest_value) {
  517. int rv;
  518. switch (sfp->state & SF_STATE_OP_MASK) {
  519. case SF_STATE_BEFORE:
  520. rv = parser_skip_inner_list(sfp);
  521. if (rv != 0) {
  522. return rv;
  523. }
  524. /* fall through */
  525. case SF_STATE_BEFORE_PARAMS:
  526. parser_set_op_state(sfp, SF_STATE_PARAMS);
  527. break;
  528. case SF_STATE_PARAMS:
  529. break;
  530. default:
  531. assert(0);
  532. abort();
  533. }
  534. if (parser_eof(sfp) || *sfp->pos != ';') {
  535. parser_set_op_state(sfp, SF_STATE_AFTER);
  536. return SF_ERR_EOF;
  537. }
  538. ++sfp->pos;
  539. parser_discard_sp(sfp);
  540. if (parser_eof(sfp)) {
  541. return SF_ERR_PARSE_ERROR;
  542. }
  543. rv = parser_key(sfp, dest_key);
  544. if (rv != 0) {
  545. return rv;
  546. }
  547. if (parser_eof(sfp) || *sfp->pos != '=') {
  548. if (dest_value) {
  549. dest_value->type = SF_TYPE_BOOLEAN;
  550. dest_value->flags = SF_VALUE_FLAG_NONE;
  551. dest_value->boolean = 1;
  552. }
  553. return 0;
  554. }
  555. ++sfp->pos;
  556. if (parser_eof(sfp)) {
  557. return SF_ERR_PARSE_ERROR;
  558. }
  559. return parser_bare_item(sfp, dest_value);
  560. }
  561. static int parser_skip_params(sf_parser *sfp) {
  562. int rv;
  563. for (;;) {
  564. rv = sf_parser_param(sfp, NULL, NULL);
  565. switch (rv) {
  566. case 0:
  567. break;
  568. case SF_ERR_EOF:
  569. return 0;
  570. case SF_ERR_PARSE_ERROR:
  571. return rv;
  572. default:
  573. assert(0);
  574. abort();
  575. }
  576. }
  577. }
  578. int sf_parser_inner_list(sf_parser *sfp, sf_value *dest) {
  579. int rv;
  580. switch (sfp->state & SF_STATE_OP_MASK) {
  581. case SF_STATE_BEFORE:
  582. parser_discard_sp(sfp);
  583. if (parser_eof(sfp)) {
  584. return SF_ERR_PARSE_ERROR;
  585. }
  586. break;
  587. case SF_STATE_BEFORE_PARAMS:
  588. rv = parser_skip_params(sfp);
  589. if (rv != 0) {
  590. return rv;
  591. }
  592. /* Technically, we are entering SF_STATE_AFTER, but we will set
  593. another state without reading the state. */
  594. /* parser_set_op_state(sfp, SF_STATE_AFTER); */
  595. /* fall through */
  596. case SF_STATE_AFTER:
  597. if (parser_eof(sfp)) {
  598. return SF_ERR_PARSE_ERROR;
  599. }
  600. switch (*sfp->pos) {
  601. case ' ':
  602. parser_discard_sp(sfp);
  603. if (parser_eof(sfp)) {
  604. return SF_ERR_PARSE_ERROR;
  605. }
  606. break;
  607. case ')':
  608. break;
  609. default:
  610. return SF_ERR_PARSE_ERROR;
  611. }
  612. break;
  613. default:
  614. assert(0);
  615. abort();
  616. }
  617. if (*sfp->pos == ')') {
  618. ++sfp->pos;
  619. parser_unset_inner_list_state(sfp);
  620. parser_set_op_state(sfp, SF_STATE_BEFORE_PARAMS);
  621. return SF_ERR_EOF;
  622. }
  623. rv = parser_bare_item(sfp, dest);
  624. if (rv != 0) {
  625. return rv;
  626. }
  627. parser_set_op_state(sfp, SF_STATE_BEFORE_PARAMS);
  628. return 0;
  629. }
  630. static int parser_skip_inner_list(sf_parser *sfp) {
  631. int rv;
  632. for (;;) {
  633. rv = sf_parser_inner_list(sfp, NULL);
  634. switch (rv) {
  635. case 0:
  636. break;
  637. case SF_ERR_EOF:
  638. return 0;
  639. case SF_ERR_PARSE_ERROR:
  640. return rv;
  641. default:
  642. assert(0);
  643. abort();
  644. }
  645. }
  646. }
  647. static int parser_next_key_or_item(sf_parser *sfp) {
  648. parser_discard_ows(sfp);
  649. if (parser_eof(sfp)) {
  650. return SF_ERR_EOF;
  651. }
  652. if (*sfp->pos != ',') {
  653. return SF_ERR_PARSE_ERROR;
  654. }
  655. ++sfp->pos;
  656. parser_discard_ows(sfp);
  657. if (parser_eof(sfp)) {
  658. return SF_ERR_PARSE_ERROR;
  659. }
  660. return 0;
  661. }
  662. static int parser_dict_value(sf_parser *sfp, sf_value *dest) {
  663. int rv;
  664. if (parser_eof(sfp) || *(sfp->pos) != '=') {
  665. /* Boolean true */
  666. if (dest) {
  667. dest->type = SF_TYPE_BOOLEAN;
  668. dest->flags = SF_VALUE_FLAG_NONE;
  669. dest->boolean = 1;
  670. }
  671. sfp->state = SF_STATE_DICT_BEFORE_PARAMS;
  672. return 0;
  673. }
  674. ++sfp->pos;
  675. if (parser_eof(sfp)) {
  676. return SF_ERR_PARSE_ERROR;
  677. }
  678. if (*sfp->pos == '(') {
  679. if (dest) {
  680. dest->type = SF_TYPE_INNER_LIST;
  681. dest->flags = SF_VALUE_FLAG_NONE;
  682. }
  683. ++sfp->pos;
  684. sfp->state = SF_STATE_DICT_INNER_LIST_BEFORE;
  685. return 0;
  686. }
  687. rv = parser_bare_item(sfp, dest);
  688. if (rv != 0) {
  689. return rv;
  690. }
  691. sfp->state = SF_STATE_DICT_BEFORE_PARAMS;
  692. return 0;
  693. }
  694. int sf_parser_dict(sf_parser *sfp, sf_vec *dest_key, sf_value *dest_value) {
  695. int rv;
  696. switch (sfp->state) {
  697. case SF_STATE_DICT_INNER_LIST_BEFORE:
  698. rv = parser_skip_inner_list(sfp);
  699. if (rv != 0) {
  700. return rv;
  701. }
  702. /* fall through */
  703. case SF_STATE_DICT_BEFORE_PARAMS:
  704. rv = parser_skip_params(sfp);
  705. if (rv != 0) {
  706. return rv;
  707. }
  708. /* fall through */
  709. case SF_STATE_DICT_AFTER:
  710. rv = parser_next_key_or_item(sfp);
  711. if (rv != 0) {
  712. return rv;
  713. }
  714. break;
  715. case SF_STATE_INITIAL:
  716. parser_discard_sp(sfp);
  717. if (parser_eof(sfp)) {
  718. return SF_ERR_EOF;
  719. }
  720. break;
  721. default:
  722. assert(0);
  723. abort();
  724. }
  725. rv = parser_key(sfp, dest_key);
  726. if (rv != 0) {
  727. return rv;
  728. }
  729. return parser_dict_value(sfp, dest_value);
  730. }
  731. int sf_parser_list(sf_parser *sfp, sf_value *dest) {
  732. int rv;
  733. switch (sfp->state) {
  734. case SF_STATE_LIST_INNER_LIST_BEFORE:
  735. rv = parser_skip_inner_list(sfp);
  736. if (rv != 0) {
  737. return rv;
  738. }
  739. /* fall through */
  740. case SF_STATE_LIST_BEFORE_PARAMS:
  741. rv = parser_skip_params(sfp);
  742. if (rv != 0) {
  743. return rv;
  744. }
  745. /* fall through */
  746. case SF_STATE_LIST_AFTER:
  747. rv = parser_next_key_or_item(sfp);
  748. if (rv != 0) {
  749. return rv;
  750. }
  751. break;
  752. case SF_STATE_INITIAL:
  753. parser_discard_sp(sfp);
  754. if (parser_eof(sfp)) {
  755. return SF_ERR_EOF;
  756. }
  757. break;
  758. default:
  759. assert(0);
  760. abort();
  761. }
  762. if (*sfp->pos == '(') {
  763. if (dest) {
  764. dest->type = SF_TYPE_INNER_LIST;
  765. dest->flags = SF_VALUE_FLAG_NONE;
  766. }
  767. ++sfp->pos;
  768. sfp->state = SF_STATE_LIST_INNER_LIST_BEFORE;
  769. return 0;
  770. }
  771. rv = parser_bare_item(sfp, dest);
  772. if (rv != 0) {
  773. return rv;
  774. }
  775. sfp->state = SF_STATE_LIST_BEFORE_PARAMS;
  776. return 0;
  777. }
  778. int sf_parser_item(sf_parser *sfp, sf_value *dest) {
  779. int rv;
  780. switch (sfp->state) {
  781. case SF_STATE_INITIAL:
  782. parser_discard_sp(sfp);
  783. if (parser_eof(sfp)) {
  784. return SF_ERR_PARSE_ERROR;
  785. }
  786. break;
  787. case SF_STATE_ITEM_INNER_LIST_BEFORE:
  788. rv = parser_skip_inner_list(sfp);
  789. if (rv != 0) {
  790. return rv;
  791. }
  792. /* fall through */
  793. case SF_STATE_ITEM_BEFORE_PARAMS:
  794. rv = parser_skip_params(sfp);
  795. if (rv != 0) {
  796. return rv;
  797. }
  798. /* fall through */
  799. case SF_STATE_ITEM_AFTER:
  800. parser_discard_sp(sfp);
  801. if (!parser_eof(sfp)) {
  802. return SF_ERR_PARSE_ERROR;
  803. }
  804. return SF_ERR_EOF;
  805. default:
  806. assert(0);
  807. abort();
  808. }
  809. if (*sfp->pos == '(') {
  810. if (dest) {
  811. dest->type = SF_TYPE_INNER_LIST;
  812. dest->flags = SF_VALUE_FLAG_NONE;
  813. }
  814. ++sfp->pos;
  815. sfp->state = SF_STATE_ITEM_INNER_LIST_BEFORE;
  816. return 0;
  817. }
  818. rv = parser_bare_item(sfp, dest);
  819. if (rv != 0) {
  820. return rv;
  821. }
  822. sfp->state = SF_STATE_ITEM_BEFORE_PARAMS;
  823. return 0;
  824. }
  825. void sf_parser_init(sf_parser *sfp, const uint8_t *data, size_t datalen) {
  826. if (datalen == 0) {
  827. sfp->pos = sfp->end = NULL;
  828. } else {
  829. sfp->pos = data;
  830. sfp->end = data + datalen;
  831. }
  832. sfp->state = SF_STATE_INITIAL;
  833. }
  834. void sf_unescape(sf_vec *dest, const sf_vec *src) {
  835. const uint8_t *p, *q;
  836. uint8_t *o;
  837. size_t len, slen;
  838. if (src->len == 0) {
  839. *dest = *src;
  840. return;
  841. }
  842. o = dest->base;
  843. p = src->base;
  844. len = src->len;
  845. for (;;) {
  846. q = memchr(p, '\\', len);
  847. if (q == NULL) {
  848. if (len == src->len) {
  849. *dest = *src;
  850. return;
  851. }
  852. memcpy(o, p, len);
  853. o += len;
  854. break;
  855. }
  856. slen = (size_t)(q - p);
  857. memcpy(o, p, slen);
  858. o += slen;
  859. p = q + 1;
  860. *o++ = *p++;
  861. len -= slen + 2;
  862. }
  863. dest->len = (size_t)(o - dest->base);
  864. }
  865. void sf_base64decode(sf_vec *dest, const sf_vec *src) {
  866. static const int index_tbl[] = {
  867. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  868. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  869. -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57,
  870. 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6,
  871. 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
  872. 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
  873. 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1,
  874. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  875. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  876. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  877. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  878. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  879. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  880. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  881. -1, -1, -1, -1};
  882. uint8_t *o;
  883. const uint8_t *p, *end;
  884. uint32_t n;
  885. size_t i;
  886. int idx;
  887. assert((src->len & 0x3) == 0);
  888. if (src->len == 0) {
  889. *dest = *src;
  890. return;
  891. }
  892. o = dest->base;
  893. p = src->base;
  894. end = src->base + src->len;
  895. for (; p != end;) {
  896. n = 0;
  897. for (i = 1; i <= 4; ++i, ++p) {
  898. idx = index_tbl[*p];
  899. if (idx == -1) {
  900. assert(i > 2);
  901. if (i == 3) {
  902. assert(*p == '=' && *(p + 1) == '=' && p + 2 == end);
  903. *o++ = (uint8_t)(n >> 16);
  904. goto fin;
  905. }
  906. assert(*p == '=' && p + 1 == end);
  907. *o++ = (uint8_t)(n >> 16);
  908. *o++ = (n >> 8) & 0xffu;
  909. goto fin;
  910. }
  911. n += (uint32_t)(idx << (24 - i * 6));
  912. }
  913. *o++ = (uint8_t)(n >> 16);
  914. *o++ = (n >> 8) & 0xffu;
  915. *o++ = n & 0xffu;
  916. }
  917. fin:
  918. dest->len = (size_t)(o - dest->base);
  919. }