sfparse.c 55 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787
  1. /*
  2. * sfparse
  3. *
  4. * Copyright (c) 2023 sfparse contributors
  5. * Copyright (c) 2019 nghttp3 contributors
  6. * Copyright (c) 2015 nghttp2 contributors
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining
  9. * a copy of this software and associated documentation files (the
  10. * "Software"), to deal in the Software without restriction, including
  11. * without limitation the rights to use, copy, modify, merge, publish,
  12. * distribute, sublicense, and/or sell copies of the Software, and to
  13. * permit persons to whom the Software is furnished to do so, subject to
  14. * the following conditions:
  15. *
  16. * The above copyright notice and this permission notice shall be
  17. * included in all copies or substantial portions of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  22. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  23. * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24. * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26. */
  27. #include "sfparse.h"
  28. #include <string.h>
  29. #include <assert.h>
  30. #include <stdlib.h>
  31. #ifdef __AVX2__
  32. # include <immintrin.h>
  33. #endif /* __AVX2__ */
  34. #define SFPARSE_STATE_DICT 0x08u
  35. #define SFPARSE_STATE_LIST 0x10u
  36. #define SFPARSE_STATE_ITEM 0x18u
  37. #define SFPARSE_STATE_INNER_LIST 0x04u
  38. #define SFPARSE_STATE_BEFORE 0x00u
  39. #define SFPARSE_STATE_BEFORE_PARAMS 0x01u
  40. #define SFPARSE_STATE_PARAMS 0x02u
  41. #define SFPARSE_STATE_AFTER 0x03u
  42. #define SFPARSE_STATE_OP_MASK 0x03u
  43. #define SFPARSE_SET_STATE_AFTER(NAME) \
  44. (SFPARSE_STATE_##NAME | SFPARSE_STATE_AFTER)
  45. #define SFPARSE_SET_STATE_BEFORE_PARAMS(NAME) \
  46. (SFPARSE_STATE_##NAME | SFPARSE_STATE_BEFORE_PARAMS)
  47. #define SFPARSE_SET_STATE_INNER_LIST_BEFORE(NAME) \
  48. (SFPARSE_STATE_##NAME | SFPARSE_STATE_INNER_LIST | SFPARSE_STATE_BEFORE)
  49. #define SFPARSE_STATE_DICT_AFTER SFPARSE_SET_STATE_AFTER(DICT)
  50. #define SFPARSE_STATE_DICT_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(DICT)
  51. #define SFPARSE_STATE_DICT_INNER_LIST_BEFORE \
  52. SFPARSE_SET_STATE_INNER_LIST_BEFORE(DICT)
  53. #define SFPARSE_STATE_LIST_AFTER SFPARSE_SET_STATE_AFTER(LIST)
  54. #define SFPARSE_STATE_LIST_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(LIST)
  55. #define SFPARSE_STATE_LIST_INNER_LIST_BEFORE \
  56. SFPARSE_SET_STATE_INNER_LIST_BEFORE(LIST)
  57. #define SFPARSE_STATE_ITEM_AFTER SFPARSE_SET_STATE_AFTER(ITEM)
  58. #define SFPARSE_STATE_ITEM_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(ITEM)
  59. #define SFPARSE_STATE_ITEM_INNER_LIST_BEFORE \
  60. SFPARSE_SET_STATE_INNER_LIST_BEFORE(ITEM)
  61. #define SFPARSE_STATE_INITIAL 0x00u
  62. #define DIGIT_CASES \
  63. case '0': \
  64. case '1': \
  65. case '2': \
  66. case '3': \
  67. case '4': \
  68. case '5': \
  69. case '6': \
  70. case '7': \
  71. case '8': \
  72. case '9'
  73. #define LCALPHA_CASES \
  74. case 'a': \
  75. case 'b': \
  76. case 'c': \
  77. case 'd': \
  78. case 'e': \
  79. case 'f': \
  80. case 'g': \
  81. case 'h': \
  82. case 'i': \
  83. case 'j': \
  84. case 'k': \
  85. case 'l': \
  86. case 'm': \
  87. case 'n': \
  88. case 'o': \
  89. case 'p': \
  90. case 'q': \
  91. case 'r': \
  92. case 's': \
  93. case 't': \
  94. case 'u': \
  95. case 'v': \
  96. case 'w': \
  97. case 'x': \
  98. case 'y': \
  99. case 'z'
  100. #define UCALPHA_CASES \
  101. case 'A': \
  102. case 'B': \
  103. case 'C': \
  104. case 'D': \
  105. case 'E': \
  106. case 'F': \
  107. case 'G': \
  108. case 'H': \
  109. case 'I': \
  110. case 'J': \
  111. case 'K': \
  112. case 'L': \
  113. case 'M': \
  114. case 'N': \
  115. case 'O': \
  116. case 'P': \
  117. case 'Q': \
  118. case 'R': \
  119. case 'S': \
  120. case 'T': \
  121. case 'U': \
  122. case 'V': \
  123. case 'W': \
  124. case 'X': \
  125. case 'Y': \
  126. case 'Z'
  127. #define ALPHA_CASES \
  128. UCALPHA_CASES: \
  129. LCALPHA_CASES
  130. #define TOKEN_CASES \
  131. case '!': \
  132. case '#': \
  133. case '$': \
  134. case '%': \
  135. case '&': \
  136. case '\'': \
  137. case '*': \
  138. case '+': \
  139. case '-': \
  140. case '.': \
  141. case '/': \
  142. DIGIT_CASES: \
  143. case ':': \
  144. UCALPHA_CASES: \
  145. case '^': \
  146. case '_': \
  147. case '`': \
  148. LCALPHA_CASES: \
  149. case '|': \
  150. case '~'
  151. #define LCHEXALPHA_CASES \
  152. case 'a': \
  153. case 'b': \
  154. case 'c': \
  155. case 'd': \
  156. case 'e': \
  157. case 'f'
  158. #define X00_1F_CASES \
  159. case 0x00: \
  160. case 0x01: \
  161. case 0x02: \
  162. case 0x03: \
  163. case 0x04: \
  164. case 0x05: \
  165. case 0x06: \
  166. case 0x07: \
  167. case 0x08: \
  168. case 0x09: \
  169. case 0x0a: \
  170. case 0x0b: \
  171. case 0x0c: \
  172. case 0x0d: \
  173. case 0x0e: \
  174. case 0x0f: \
  175. case 0x10: \
  176. case 0x11: \
  177. case 0x12: \
  178. case 0x13: \
  179. case 0x14: \
  180. case 0x15: \
  181. case 0x16: \
  182. case 0x17: \
  183. case 0x18: \
  184. case 0x19: \
  185. case 0x1a: \
  186. case 0x1b: \
  187. case 0x1c: \
  188. case 0x1d: \
  189. case 0x1e: \
  190. case 0x1f
  191. #define X20_21_CASES \
  192. case ' ': \
  193. case '!'
  194. #define X23_5B_CASES \
  195. case '#': \
  196. case '$': \
  197. case '%': \
  198. case '&': \
  199. case '\'': \
  200. case '(': \
  201. case ')': \
  202. case '*': \
  203. case '+': \
  204. case ',': \
  205. case '-': \
  206. case '.': \
  207. case '/': \
  208. DIGIT_CASES: \
  209. case ':': \
  210. case ';': \
  211. case '<': \
  212. case '=': \
  213. case '>': \
  214. case '?': \
  215. case '@': \
  216. UCALPHA_CASES: \
  217. case '['
  218. #define X5D_7E_CASES \
  219. case ']': \
  220. case '^': \
  221. case '_': \
  222. case '`': \
  223. LCALPHA_CASES: \
  224. case '{': \
  225. case '|': \
  226. case '}': \
  227. case '~'
  228. #define X7F_FF_CASES \
  229. case 0x7f: \
  230. case 0x80: \
  231. case 0x81: \
  232. case 0x82: \
  233. case 0x83: \
  234. case 0x84: \
  235. case 0x85: \
  236. case 0x86: \
  237. case 0x87: \
  238. case 0x88: \
  239. case 0x89: \
  240. case 0x8a: \
  241. case 0x8b: \
  242. case 0x8c: \
  243. case 0x8d: \
  244. case 0x8e: \
  245. case 0x8f: \
  246. case 0x90: \
  247. case 0x91: \
  248. case 0x92: \
  249. case 0x93: \
  250. case 0x94: \
  251. case 0x95: \
  252. case 0x96: \
  253. case 0x97: \
  254. case 0x98: \
  255. case 0x99: \
  256. case 0x9a: \
  257. case 0x9b: \
  258. case 0x9c: \
  259. case 0x9d: \
  260. case 0x9e: \
  261. case 0x9f: \
  262. case 0xa0: \
  263. case 0xa1: \
  264. case 0xa2: \
  265. case 0xa3: \
  266. case 0xa4: \
  267. case 0xa5: \
  268. case 0xa6: \
  269. case 0xa7: \
  270. case 0xa8: \
  271. case 0xa9: \
  272. case 0xaa: \
  273. case 0xab: \
  274. case 0xac: \
  275. case 0xad: \
  276. case 0xae: \
  277. case 0xaf: \
  278. case 0xb0: \
  279. case 0xb1: \
  280. case 0xb2: \
  281. case 0xb3: \
  282. case 0xb4: \
  283. case 0xb5: \
  284. case 0xb6: \
  285. case 0xb7: \
  286. case 0xb8: \
  287. case 0xb9: \
  288. case 0xba: \
  289. case 0xbb: \
  290. case 0xbc: \
  291. case 0xbd: \
  292. case 0xbe: \
  293. case 0xbf: \
  294. case 0xc0: \
  295. case 0xc1: \
  296. case 0xc2: \
  297. case 0xc3: \
  298. case 0xc4: \
  299. case 0xc5: \
  300. case 0xc6: \
  301. case 0xc7: \
  302. case 0xc8: \
  303. case 0xc9: \
  304. case 0xca: \
  305. case 0xcb: \
  306. case 0xcc: \
  307. case 0xcd: \
  308. case 0xce: \
  309. case 0xcf: \
  310. case 0xd0: \
  311. case 0xd1: \
  312. case 0xd2: \
  313. case 0xd3: \
  314. case 0xd4: \
  315. case 0xd5: \
  316. case 0xd6: \
  317. case 0xd7: \
  318. case 0xd8: \
  319. case 0xd9: \
  320. case 0xda: \
  321. case 0xdb: \
  322. case 0xdc: \
  323. case 0xdd: \
  324. case 0xde: \
  325. case 0xdf: \
  326. case 0xe0: \
  327. case 0xe1: \
  328. case 0xe2: \
  329. case 0xe3: \
  330. case 0xe4: \
  331. case 0xe5: \
  332. case 0xe6: \
  333. case 0xe7: \
  334. case 0xe8: \
  335. case 0xe9: \
  336. case 0xea: \
  337. case 0xeb: \
  338. case 0xec: \
  339. case 0xed: \
  340. case 0xee: \
  341. case 0xef: \
  342. case 0xf0: \
  343. case 0xf1: \
  344. case 0xf2: \
  345. case 0xf3: \
  346. case 0xf4: \
  347. case 0xf5: \
  348. case 0xf6: \
  349. case 0xf7: \
  350. case 0xf8: \
  351. case 0xf9: \
  352. case 0xfa: \
  353. case 0xfb: \
  354. case 0xfc: \
  355. case 0xfd: \
  356. case 0xfe: \
  357. case 0xff
  358. static int is_ws(uint8_t c) {
  359. switch (c) {
  360. case ' ':
  361. case '\t':
  362. return 1;
  363. default:
  364. return 0;
  365. }
  366. }
  367. #ifdef __AVX2__
  368. # ifdef _MSC_VER
  369. # include <intrin.h>
  370. static int ctz(unsigned int v) {
  371. unsigned long n;
  372. /* Assume that v is not 0. */
  373. _BitScanForward(&n, v);
  374. return (int)n;
  375. }
  376. # else /* !_MSC_VER */
  377. # define ctz __builtin_ctz
  378. # endif /* !_MSC_VER */
  379. #endif /* __AVX2__ */
  380. static int parser_eof(sfparse_parser *sfp) { return sfp->pos == sfp->end; }
  381. static void parser_discard_ows(sfparse_parser *sfp) {
  382. for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos)
  383. ;
  384. }
  385. static void parser_discard_sp(sfparse_parser *sfp) {
  386. for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos)
  387. ;
  388. }
  389. static void parser_set_op_state(sfparse_parser *sfp, uint32_t op) {
  390. sfp->state &= ~SFPARSE_STATE_OP_MASK;
  391. sfp->state |= op;
  392. }
  393. static void parser_unset_inner_list_state(sfparse_parser *sfp) {
  394. sfp->state &= ~SFPARSE_STATE_INNER_LIST;
  395. }
  396. #ifdef __AVX2__
  397. static const uint8_t *find_char_key(const uint8_t *first, const uint8_t *last) {
  398. const __m256i us = _mm256_set1_epi8('_');
  399. const __m256i ds = _mm256_set1_epi8('-');
  400. const __m256i dot = _mm256_set1_epi8('.');
  401. const __m256i ast = _mm256_set1_epi8('*');
  402. const __m256i r0l = _mm256_set1_epi8('0' - 1);
  403. const __m256i r0r = _mm256_set1_epi8('9' + 1);
  404. const __m256i r1l = _mm256_set1_epi8('a' - 1);
  405. const __m256i r1r = _mm256_set1_epi8('z' + 1);
  406. __m256i s, x;
  407. uint32_t m;
  408. for (; first != last; first += 32) {
  409. s = _mm256_loadu_si256((void *)first);
  410. x = _mm256_cmpeq_epi8(s, us);
  411. x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ds), x);
  412. x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dot), x);
  413. x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ast), x);
  414. x = _mm256_or_si256(
  415. _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
  416. x);
  417. x = _mm256_or_si256(
  418. _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
  419. x);
  420. m = ~(uint32_t)_mm256_movemask_epi8(x);
  421. if (m) {
  422. return first + ctz(m);
  423. }
  424. }
  425. return last;
  426. }
  427. #endif /* __AVX2__ */
  428. static int parser_key(sfparse_parser *sfp, sfparse_vec *dest) {
  429. const uint8_t *base;
  430. #ifdef __AVX2__
  431. const uint8_t *last;
  432. #endif /* __AVX2__ */
  433. switch (*sfp->pos) {
  434. case '*':
  435. LCALPHA_CASES:
  436. break;
  437. default:
  438. return SFPARSE_ERR_PARSE;
  439. }
  440. base = sfp->pos++;
  441. #ifdef __AVX2__
  442. if (sfp->end - sfp->pos >= 32) {
  443. last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
  444. sfp->pos = find_char_key(sfp->pos, last);
  445. if (sfp->pos != last) {
  446. goto fin;
  447. }
  448. }
  449. #endif /* __AVX2__ */
  450. for (; !parser_eof(sfp); ++sfp->pos) {
  451. switch (*sfp->pos) {
  452. case '_':
  453. case '-':
  454. case '.':
  455. case '*':
  456. DIGIT_CASES:
  457. LCALPHA_CASES:
  458. continue;
  459. }
  460. break;
  461. }
  462. #ifdef __AVX2__
  463. fin:
  464. #endif /* __AVX2__ */
  465. if (dest) {
  466. dest->base = (uint8_t *)base;
  467. dest->len = (size_t)(sfp->pos - dest->base);
  468. }
  469. return 0;
  470. }
  471. static int parser_number(sfparse_parser *sfp, sfparse_value *dest) {
  472. int sign = 1;
  473. int64_t value = 0;
  474. size_t len = 0;
  475. size_t fpos = 0;
  476. if (*sfp->pos == '-') {
  477. ++sfp->pos;
  478. if (parser_eof(sfp)) {
  479. return SFPARSE_ERR_PARSE;
  480. }
  481. sign = -1;
  482. }
  483. assert(!parser_eof(sfp));
  484. for (; !parser_eof(sfp); ++sfp->pos) {
  485. switch (*sfp->pos) {
  486. DIGIT_CASES:
  487. if (++len > 15) {
  488. return SFPARSE_ERR_PARSE;
  489. }
  490. value *= 10;
  491. value += *sfp->pos - '0';
  492. continue;
  493. }
  494. break;
  495. }
  496. if (len == 0) {
  497. return SFPARSE_ERR_PARSE;
  498. }
  499. if (parser_eof(sfp) || *sfp->pos != '.') {
  500. if (dest) {
  501. dest->type = SFPARSE_TYPE_INTEGER;
  502. dest->flags = SFPARSE_VALUE_FLAG_NONE;
  503. dest->integer = value * sign;
  504. }
  505. return 0;
  506. }
  507. /* decimal */
  508. if (len > 12) {
  509. return SFPARSE_ERR_PARSE;
  510. }
  511. fpos = len;
  512. ++sfp->pos;
  513. for (; !parser_eof(sfp); ++sfp->pos) {
  514. switch (*sfp->pos) {
  515. DIGIT_CASES:
  516. if (++len > 15) {
  517. return SFPARSE_ERR_PARSE;
  518. }
  519. value *= 10;
  520. value += *sfp->pos - '0';
  521. continue;
  522. }
  523. break;
  524. }
  525. if (fpos == len || len - fpos > 3) {
  526. return SFPARSE_ERR_PARSE;
  527. }
  528. if (dest) {
  529. dest->type = SFPARSE_TYPE_DECIMAL;
  530. dest->flags = SFPARSE_VALUE_FLAG_NONE;
  531. dest->decimal.numer = value * sign;
  532. switch (len - fpos) {
  533. case 1:
  534. dest->decimal.denom = 10;
  535. break;
  536. case 2:
  537. dest->decimal.denom = 100;
  538. break;
  539. case 3:
  540. dest->decimal.denom = 1000;
  541. break;
  542. }
  543. }
  544. return 0;
  545. }
  546. static int parser_date(sfparse_parser *sfp, sfparse_value *dest) {
  547. int rv;
  548. sfparse_value val;
  549. /* The first byte has already been validated by the caller. */
  550. assert('@' == *sfp->pos);
  551. ++sfp->pos;
  552. if (parser_eof(sfp)) {
  553. return SFPARSE_ERR_PARSE;
  554. }
  555. rv = parser_number(sfp, &val);
  556. if (rv != 0) {
  557. return rv;
  558. }
  559. if (val.type != SFPARSE_TYPE_INTEGER) {
  560. return SFPARSE_ERR_PARSE;
  561. }
  562. if (dest) {
  563. *dest = val;
  564. dest->type = SFPARSE_TYPE_DATE;
  565. }
  566. return 0;
  567. }
  568. #ifdef __AVX2__
  569. static const uint8_t *find_char_string(const uint8_t *first,
  570. const uint8_t *last) {
  571. const __m256i bs = _mm256_set1_epi8('\\');
  572. const __m256i dq = _mm256_set1_epi8('"');
  573. const __m256i del = _mm256_set1_epi8(0x7f);
  574. const __m256i sp = _mm256_set1_epi8(' ');
  575. __m256i s, x;
  576. uint32_t m;
  577. for (; first != last; first += 32) {
  578. s = _mm256_loadu_si256((void *)first);
  579. x = _mm256_cmpgt_epi8(sp, s);
  580. x = _mm256_or_si256(_mm256_cmpeq_epi8(s, bs), x);
  581. x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dq), x);
  582. x = _mm256_or_si256(_mm256_cmpeq_epi8(s, del), x);
  583. m = (uint32_t)_mm256_movemask_epi8(x);
  584. if (m) {
  585. return first + ctz(m);
  586. }
  587. }
  588. return last;
  589. }
  590. #endif /* __AVX2__ */
  591. static int parser_string(sfparse_parser *sfp, sfparse_value *dest) {
  592. const uint8_t *base;
  593. #ifdef __AVX2__
  594. const uint8_t *last;
  595. #endif /* __AVX2__ */
  596. uint32_t flags = SFPARSE_VALUE_FLAG_NONE;
  597. /* The first byte has already been validated by the caller. */
  598. assert('"' == *sfp->pos);
  599. base = ++sfp->pos;
  600. #ifdef __AVX2__
  601. for (; sfp->end - sfp->pos >= 32; ++sfp->pos) {
  602. last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
  603. sfp->pos = find_char_string(sfp->pos, last);
  604. if (sfp->pos == last) {
  605. break;
  606. }
  607. switch (*sfp->pos) {
  608. case '\\':
  609. ++sfp->pos;
  610. if (parser_eof(sfp)) {
  611. return SFPARSE_ERR_PARSE;
  612. }
  613. switch (*sfp->pos) {
  614. case '"':
  615. case '\\':
  616. flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
  617. break;
  618. default:
  619. return SFPARSE_ERR_PARSE;
  620. }
  621. break;
  622. case '"':
  623. goto fin;
  624. default:
  625. return SFPARSE_ERR_PARSE;
  626. }
  627. }
  628. #endif /* __AVX2__ */
  629. for (; !parser_eof(sfp); ++sfp->pos) {
  630. switch (*sfp->pos) {
  631. X20_21_CASES:
  632. X23_5B_CASES:
  633. X5D_7E_CASES:
  634. break;
  635. case '\\':
  636. ++sfp->pos;
  637. if (parser_eof(sfp)) {
  638. return SFPARSE_ERR_PARSE;
  639. }
  640. switch (*sfp->pos) {
  641. case '"':
  642. case '\\':
  643. flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
  644. break;
  645. default:
  646. return SFPARSE_ERR_PARSE;
  647. }
  648. break;
  649. case '"':
  650. goto fin;
  651. default:
  652. return SFPARSE_ERR_PARSE;
  653. }
  654. }
  655. return SFPARSE_ERR_PARSE;
  656. fin:
  657. if (dest) {
  658. dest->type = SFPARSE_TYPE_STRING;
  659. dest->flags = flags;
  660. dest->vec.len = (size_t)(sfp->pos - base);
  661. dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
  662. }
  663. ++sfp->pos;
  664. return 0;
  665. }
  666. #ifdef __AVX2__
  667. static const uint8_t *find_char_token(const uint8_t *first,
  668. const uint8_t *last) {
  669. /* r0: !..:, excluding "(),
  670. r1: A..Z
  671. r2: ^..~, excluding {} */
  672. const __m256i r0l = _mm256_set1_epi8('!' - 1);
  673. const __m256i r0r = _mm256_set1_epi8(':' + 1);
  674. const __m256i dq = _mm256_set1_epi8('"');
  675. const __m256i prl = _mm256_set1_epi8('(');
  676. const __m256i prr = _mm256_set1_epi8(')');
  677. const __m256i comma = _mm256_set1_epi8(',');
  678. const __m256i r1l = _mm256_set1_epi8('A' - 1);
  679. const __m256i r1r = _mm256_set1_epi8('Z' + 1);
  680. const __m256i r2l = _mm256_set1_epi8('^' - 1);
  681. const __m256i r2r = _mm256_set1_epi8('~' + 1);
  682. const __m256i cbl = _mm256_set1_epi8('{');
  683. const __m256i cbr = _mm256_set1_epi8('}');
  684. __m256i s, x;
  685. uint32_t m;
  686. for (; first != last; first += 32) {
  687. s = _mm256_loadu_si256((void *)first);
  688. x = _mm256_andnot_si256(
  689. _mm256_cmpeq_epi8(s, comma),
  690. _mm256_andnot_si256(
  691. _mm256_cmpeq_epi8(s, prr),
  692. _mm256_andnot_si256(
  693. _mm256_cmpeq_epi8(s, prl),
  694. _mm256_andnot_si256(_mm256_cmpeq_epi8(s, dq),
  695. _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l),
  696. _mm256_cmpgt_epi8(r0r, s))))));
  697. x = _mm256_or_si256(
  698. _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
  699. x);
  700. x = _mm256_or_si256(
  701. _mm256_andnot_si256(
  702. _mm256_cmpeq_epi8(s, cbr),
  703. _mm256_andnot_si256(_mm256_cmpeq_epi8(s, cbl),
  704. _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l),
  705. _mm256_cmpgt_epi8(r2r, s)))),
  706. x);
  707. m = ~(uint32_t)_mm256_movemask_epi8(x);
  708. if (m) {
  709. return first + ctz(m);
  710. }
  711. }
  712. return last;
  713. }
  714. #endif /* __AVX2__ */
  715. static int parser_token(sfparse_parser *sfp, sfparse_value *dest) {
  716. const uint8_t *base;
  717. #ifdef __AVX2__
  718. const uint8_t *last;
  719. #endif /* __AVX2__ */
  720. /* The first byte has already been validated by the caller. */
  721. base = sfp->pos++;
  722. #ifdef __AVX2__
  723. if (sfp->end - sfp->pos >= 32) {
  724. last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
  725. sfp->pos = find_char_token(sfp->pos, last);
  726. if (sfp->pos != last) {
  727. goto fin;
  728. }
  729. }
  730. #endif /* __AVX2__ */
  731. for (; !parser_eof(sfp); ++sfp->pos) {
  732. switch (*sfp->pos) {
  733. TOKEN_CASES:
  734. continue;
  735. }
  736. break;
  737. }
  738. #ifdef __AVX2__
  739. fin:
  740. #endif /* __AVX2__ */
  741. if (dest) {
  742. dest->type = SFPARSE_TYPE_TOKEN;
  743. dest->flags = SFPARSE_VALUE_FLAG_NONE;
  744. dest->vec.base = (uint8_t *)base;
  745. dest->vec.len = (size_t)(sfp->pos - base);
  746. }
  747. return 0;
  748. }
  749. #ifdef __AVX2__
  750. static const uint8_t *find_char_byteseq(const uint8_t *first,
  751. const uint8_t *last) {
  752. const __m256i pls = _mm256_set1_epi8('+');
  753. const __m256i fs = _mm256_set1_epi8('/');
  754. const __m256i r0l = _mm256_set1_epi8('0' - 1);
  755. const __m256i r0r = _mm256_set1_epi8('9' + 1);
  756. const __m256i r1l = _mm256_set1_epi8('A' - 1);
  757. const __m256i r1r = _mm256_set1_epi8('Z' + 1);
  758. const __m256i r2l = _mm256_set1_epi8('a' - 1);
  759. const __m256i r2r = _mm256_set1_epi8('z' + 1);
  760. __m256i s, x;
  761. uint32_t m;
  762. for (; first != last; first += 32) {
  763. s = _mm256_loadu_si256((void *)first);
  764. x = _mm256_cmpeq_epi8(s, pls);
  765. x = _mm256_or_si256(_mm256_cmpeq_epi8(s, fs), x);
  766. x = _mm256_or_si256(
  767. _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
  768. x);
  769. x = _mm256_or_si256(
  770. _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
  771. x);
  772. x = _mm256_or_si256(
  773. _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), _mm256_cmpgt_epi8(r2r, s)),
  774. x);
  775. m = ~(uint32_t)_mm256_movemask_epi8(x);
  776. if (m) {
  777. return first + ctz(m);
  778. }
  779. }
  780. return last;
  781. }
  782. #endif /* __AVX2__ */
  783. static int parser_byteseq(sfparse_parser *sfp, sfparse_value *dest) {
  784. const uint8_t *base;
  785. #ifdef __AVX2__
  786. const uint8_t *last;
  787. #endif /* __AVX2__ */
  788. /* The first byte has already been validated by the caller. */
  789. assert(':' == *sfp->pos);
  790. base = ++sfp->pos;
  791. #ifdef __AVX2__
  792. if (sfp->end - sfp->pos >= 32) {
  793. last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
  794. sfp->pos = find_char_byteseq(sfp->pos, last);
  795. }
  796. #endif /* __AVX2__ */
  797. for (; !parser_eof(sfp); ++sfp->pos) {
  798. switch (*sfp->pos) {
  799. case '+':
  800. case '/':
  801. DIGIT_CASES:
  802. ALPHA_CASES:
  803. continue;
  804. case '=':
  805. switch ((sfp->pos - base) & 0x3) {
  806. case 0:
  807. case 1:
  808. return SFPARSE_ERR_PARSE;
  809. case 2:
  810. ++sfp->pos;
  811. if (parser_eof(sfp)) {
  812. return SFPARSE_ERR_PARSE;
  813. }
  814. if (*sfp->pos == '=') {
  815. ++sfp->pos;
  816. }
  817. break;
  818. case 3:
  819. ++sfp->pos;
  820. break;
  821. }
  822. if (parser_eof(sfp) || *sfp->pos != ':') {
  823. return SFPARSE_ERR_PARSE;
  824. }
  825. goto fin;
  826. case ':':
  827. if (((sfp->pos - base) & 0x3) == 1) {
  828. return SFPARSE_ERR_PARSE;
  829. }
  830. goto fin;
  831. default:
  832. return SFPARSE_ERR_PARSE;
  833. }
  834. }
  835. return SFPARSE_ERR_PARSE;
  836. fin:
  837. if (dest) {
  838. dest->type = SFPARSE_TYPE_BYTESEQ;
  839. dest->flags = SFPARSE_VALUE_FLAG_NONE;
  840. dest->vec.len = (size_t)(sfp->pos - base);
  841. dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
  842. }
  843. ++sfp->pos;
  844. return 0;
  845. }
  846. static int parser_boolean(sfparse_parser *sfp, sfparse_value *dest) {
  847. int b;
  848. /* The first byte has already been validated by the caller. */
  849. assert('?' == *sfp->pos);
  850. ++sfp->pos;
  851. if (parser_eof(sfp)) {
  852. return SFPARSE_ERR_PARSE;
  853. }
  854. switch (*sfp->pos) {
  855. case '0':
  856. b = 0;
  857. break;
  858. case '1':
  859. b = 1;
  860. break;
  861. default:
  862. return SFPARSE_ERR_PARSE;
  863. }
  864. ++sfp->pos;
  865. if (dest) {
  866. dest->type = SFPARSE_TYPE_BOOLEAN;
  867. dest->flags = SFPARSE_VALUE_FLAG_NONE;
  868. dest->boolean = b;
  869. }
  870. return 0;
  871. }
  872. static int pctdecode(uint8_t *pc, const uint8_t **ppos) {
  873. uint8_t c, b = **ppos;
  874. switch (b) {
  875. DIGIT_CASES:
  876. c = (uint8_t)((b - '0') << 4);
  877. break;
  878. LCHEXALPHA_CASES:
  879. c = (uint8_t)((b - 'a' + 10) << 4);
  880. break;
  881. default:
  882. return -1;
  883. }
  884. b = *++*ppos;
  885. switch (b) {
  886. DIGIT_CASES:
  887. c |= (uint8_t)(b - '0');
  888. break;
  889. LCHEXALPHA_CASES:
  890. c |= (uint8_t)(b - 'a' + 10);
  891. break;
  892. default:
  893. return -1;
  894. }
  895. *pc = c;
  896. ++*ppos;
  897. return 0;
  898. }
  899. /* Start of utf8 dfa */
  900. /* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
  901. * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
  902. *
  903. * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
  904. *
  905. * Permission is hereby granted, free of charge, to any person
  906. * obtaining a copy of this software and associated documentation
  907. * files (the "Software"), to deal in the Software without
  908. * restriction, including without limitation the rights to use, copy,
  909. * modify, merge, publish, distribute, sublicense, and/or sell copies
  910. * of the Software, and to permit persons to whom the Software is
  911. * furnished to do so, subject to the following conditions:
  912. *
  913. * The above copyright notice and this permission notice shall be
  914. * included in all copies or substantial portions of the Software.
  915. *
  916. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  917. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  918. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  919. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  920. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  921. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  922. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  923. * SOFTWARE.
  924. */
  925. #define UTF8_ACCEPT 0
  926. #define UTF8_REJECT 12
  927. /* clang-format off */
  928. static const uint8_t utf8d[] = {
  929. /*
  930. * The first part of the table maps bytes to character classes that
  931. * to reduce the size of the transition table and create bitmasks.
  932. */
  933. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  934. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  935. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  936. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  937. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
  938. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  939. 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  940. 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
  941. /*
  942. * The second part is a transition table that maps a combination
  943. * of a state of the automaton and a character class to a state.
  944. */
  945. 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
  946. 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
  947. 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
  948. 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
  949. 12,36,12,12,12,12,12,12,12,12,12,12,
  950. };
  951. /* clang-format on */
  952. static void utf8_decode(uint32_t *state, uint8_t byte) {
  953. *state = utf8d[256 + *state + utf8d[byte]];
  954. }
  955. /* End of utf8 dfa */
  956. static int parser_dispstring(sfparse_parser *sfp, sfparse_value *dest) {
  957. const uint8_t *base;
  958. uint8_t c;
  959. uint32_t utf8state = UTF8_ACCEPT;
  960. assert('%' == *sfp->pos);
  961. ++sfp->pos;
  962. if (parser_eof(sfp) || *sfp->pos != '"') {
  963. return SFPARSE_ERR_PARSE;
  964. }
  965. base = ++sfp->pos;
  966. for (; !parser_eof(sfp);) {
  967. switch (*sfp->pos) {
  968. X00_1F_CASES:
  969. X7F_FF_CASES:
  970. return SFPARSE_ERR_PARSE;
  971. case '%':
  972. ++sfp->pos;
  973. if (sfp->pos + 2 > sfp->end) {
  974. return SFPARSE_ERR_PARSE;
  975. }
  976. if (pctdecode(&c, &sfp->pos) != 0) {
  977. return SFPARSE_ERR_PARSE;
  978. }
  979. utf8_decode(&utf8state, c);
  980. if (utf8state == UTF8_REJECT) {
  981. return SFPARSE_ERR_PARSE;
  982. }
  983. break;
  984. case '"':
  985. if (utf8state != UTF8_ACCEPT) {
  986. return SFPARSE_ERR_PARSE;
  987. }
  988. if (dest) {
  989. dest->type = SFPARSE_TYPE_DISPSTRING;
  990. dest->flags = SFPARSE_VALUE_FLAG_NONE;
  991. dest->vec.len = (size_t)(sfp->pos - base);
  992. dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
  993. }
  994. ++sfp->pos;
  995. return 0;
  996. default:
  997. if (utf8state != UTF8_ACCEPT) {
  998. return SFPARSE_ERR_PARSE;
  999. }
  1000. ++sfp->pos;
  1001. }
  1002. }
  1003. return SFPARSE_ERR_PARSE;
  1004. }
  1005. static int parser_bare_item(sfparse_parser *sfp, sfparse_value *dest) {
  1006. switch (*sfp->pos) {
  1007. case '"':
  1008. return parser_string(sfp, dest);
  1009. case '-':
  1010. DIGIT_CASES:
  1011. return parser_number(sfp, dest);
  1012. case '@':
  1013. return parser_date(sfp, dest);
  1014. case ':':
  1015. return parser_byteseq(sfp, dest);
  1016. case '?':
  1017. return parser_boolean(sfp, dest);
  1018. case '*':
  1019. ALPHA_CASES:
  1020. return parser_token(sfp, dest);
  1021. case '%':
  1022. return parser_dispstring(sfp, dest);
  1023. default:
  1024. return SFPARSE_ERR_PARSE;
  1025. }
  1026. }
  1027. static int parser_skip_inner_list(sfparse_parser *sfp);
  1028. int sfparse_parser_param(sfparse_parser *sfp, sfparse_vec *dest_key,
  1029. sfparse_value *dest_value) {
  1030. int rv;
  1031. switch (sfp->state & SFPARSE_STATE_OP_MASK) {
  1032. case SFPARSE_STATE_BEFORE:
  1033. rv = parser_skip_inner_list(sfp);
  1034. if (rv != 0) {
  1035. return rv;
  1036. }
  1037. /* fall through */
  1038. case SFPARSE_STATE_BEFORE_PARAMS:
  1039. parser_set_op_state(sfp, SFPARSE_STATE_PARAMS);
  1040. break;
  1041. case SFPARSE_STATE_PARAMS:
  1042. break;
  1043. default:
  1044. assert(0);
  1045. abort();
  1046. }
  1047. if (parser_eof(sfp) || *sfp->pos != ';') {
  1048. parser_set_op_state(sfp, SFPARSE_STATE_AFTER);
  1049. return SFPARSE_ERR_EOF;
  1050. }
  1051. ++sfp->pos;
  1052. parser_discard_sp(sfp);
  1053. if (parser_eof(sfp)) {
  1054. return SFPARSE_ERR_PARSE;
  1055. }
  1056. rv = parser_key(sfp, dest_key);
  1057. if (rv != 0) {
  1058. return rv;
  1059. }
  1060. if (parser_eof(sfp) || *sfp->pos != '=') {
  1061. if (dest_value) {
  1062. dest_value->type = SFPARSE_TYPE_BOOLEAN;
  1063. dest_value->flags = SFPARSE_VALUE_FLAG_NONE;
  1064. dest_value->boolean = 1;
  1065. }
  1066. return 0;
  1067. }
  1068. ++sfp->pos;
  1069. if (parser_eof(sfp)) {
  1070. return SFPARSE_ERR_PARSE;
  1071. }
  1072. return parser_bare_item(sfp, dest_value);
  1073. }
  1074. static int parser_skip_params(sfparse_parser *sfp) {
  1075. int rv;
  1076. for (;;) {
  1077. rv = sfparse_parser_param(sfp, NULL, NULL);
  1078. switch (rv) {
  1079. case 0:
  1080. break;
  1081. case SFPARSE_ERR_EOF:
  1082. return 0;
  1083. case SFPARSE_ERR_PARSE:
  1084. return rv;
  1085. default:
  1086. assert(0);
  1087. abort();
  1088. }
  1089. }
  1090. }
  1091. int sfparse_parser_inner_list(sfparse_parser *sfp, sfparse_value *dest) {
  1092. int rv;
  1093. switch (sfp->state & SFPARSE_STATE_OP_MASK) {
  1094. case SFPARSE_STATE_BEFORE:
  1095. parser_discard_sp(sfp);
  1096. if (parser_eof(sfp)) {
  1097. return SFPARSE_ERR_PARSE;
  1098. }
  1099. break;
  1100. case SFPARSE_STATE_BEFORE_PARAMS:
  1101. rv = parser_skip_params(sfp);
  1102. if (rv != 0) {
  1103. return rv;
  1104. }
  1105. /* Technically, we are entering SFPARSE_STATE_AFTER, but we will set
  1106. another state without reading the state. */
  1107. /* parser_set_op_state(sfp, SFPARSE_STATE_AFTER); */
  1108. /* fall through */
  1109. case SFPARSE_STATE_AFTER:
  1110. if (parser_eof(sfp)) {
  1111. return SFPARSE_ERR_PARSE;
  1112. }
  1113. switch (*sfp->pos) {
  1114. case ' ':
  1115. parser_discard_sp(sfp);
  1116. if (parser_eof(sfp)) {
  1117. return SFPARSE_ERR_PARSE;
  1118. }
  1119. break;
  1120. case ')':
  1121. break;
  1122. default:
  1123. return SFPARSE_ERR_PARSE;
  1124. }
  1125. break;
  1126. default:
  1127. assert(0);
  1128. abort();
  1129. }
  1130. if (*sfp->pos == ')') {
  1131. ++sfp->pos;
  1132. parser_unset_inner_list_state(sfp);
  1133. parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
  1134. return SFPARSE_ERR_EOF;
  1135. }
  1136. rv = parser_bare_item(sfp, dest);
  1137. if (rv != 0) {
  1138. return rv;
  1139. }
  1140. parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
  1141. return 0;
  1142. }
  1143. static int parser_skip_inner_list(sfparse_parser *sfp) {
  1144. int rv;
  1145. for (;;) {
  1146. rv = sfparse_parser_inner_list(sfp, NULL);
  1147. switch (rv) {
  1148. case 0:
  1149. break;
  1150. case SFPARSE_ERR_EOF:
  1151. return 0;
  1152. case SFPARSE_ERR_PARSE:
  1153. return rv;
  1154. default:
  1155. assert(0);
  1156. abort();
  1157. }
  1158. }
  1159. }
  1160. static int parser_next_key_or_item(sfparse_parser *sfp) {
  1161. parser_discard_ows(sfp);
  1162. if (parser_eof(sfp)) {
  1163. return SFPARSE_ERR_EOF;
  1164. }
  1165. if (*sfp->pos != ',') {
  1166. return SFPARSE_ERR_PARSE;
  1167. }
  1168. ++sfp->pos;
  1169. parser_discard_ows(sfp);
  1170. if (parser_eof(sfp)) {
  1171. return SFPARSE_ERR_PARSE;
  1172. }
  1173. return 0;
  1174. }
  1175. static int parser_dict_value(sfparse_parser *sfp, sfparse_value *dest) {
  1176. int rv;
  1177. if (parser_eof(sfp) || *(sfp->pos) != '=') {
  1178. /* Boolean true */
  1179. if (dest) {
  1180. dest->type = SFPARSE_TYPE_BOOLEAN;
  1181. dest->flags = SFPARSE_VALUE_FLAG_NONE;
  1182. dest->boolean = 1;
  1183. }
  1184. sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
  1185. return 0;
  1186. }
  1187. ++sfp->pos;
  1188. if (parser_eof(sfp)) {
  1189. return SFPARSE_ERR_PARSE;
  1190. }
  1191. if (*sfp->pos == '(') {
  1192. if (dest) {
  1193. dest->type = SFPARSE_TYPE_INNER_LIST;
  1194. dest->flags = SFPARSE_VALUE_FLAG_NONE;
  1195. }
  1196. ++sfp->pos;
  1197. sfp->state = SFPARSE_STATE_DICT_INNER_LIST_BEFORE;
  1198. return 0;
  1199. }
  1200. rv = parser_bare_item(sfp, dest);
  1201. if (rv != 0) {
  1202. return rv;
  1203. }
  1204. sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
  1205. return 0;
  1206. }
  1207. int sfparse_parser_dict(sfparse_parser *sfp, sfparse_vec *dest_key,
  1208. sfparse_value *dest_value) {
  1209. int rv;
  1210. switch (sfp->state) {
  1211. case SFPARSE_STATE_DICT_INNER_LIST_BEFORE:
  1212. rv = parser_skip_inner_list(sfp);
  1213. if (rv != 0) {
  1214. return rv;
  1215. }
  1216. /* fall through */
  1217. case SFPARSE_STATE_DICT_BEFORE_PARAMS:
  1218. rv = parser_skip_params(sfp);
  1219. if (rv != 0) {
  1220. return rv;
  1221. }
  1222. /* fall through */
  1223. case SFPARSE_STATE_DICT_AFTER:
  1224. rv = parser_next_key_or_item(sfp);
  1225. if (rv != 0) {
  1226. return rv;
  1227. }
  1228. break;
  1229. case SFPARSE_STATE_INITIAL:
  1230. parser_discard_sp(sfp);
  1231. if (parser_eof(sfp)) {
  1232. return SFPARSE_ERR_EOF;
  1233. }
  1234. break;
  1235. default:
  1236. assert(0);
  1237. abort();
  1238. }
  1239. rv = parser_key(sfp, dest_key);
  1240. if (rv != 0) {
  1241. return rv;
  1242. }
  1243. return parser_dict_value(sfp, dest_value);
  1244. }
  1245. int sfparse_parser_list(sfparse_parser *sfp, sfparse_value *dest) {
  1246. int rv;
  1247. switch (sfp->state) {
  1248. case SFPARSE_STATE_LIST_INNER_LIST_BEFORE:
  1249. rv = parser_skip_inner_list(sfp);
  1250. if (rv != 0) {
  1251. return rv;
  1252. }
  1253. /* fall through */
  1254. case SFPARSE_STATE_LIST_BEFORE_PARAMS:
  1255. rv = parser_skip_params(sfp);
  1256. if (rv != 0) {
  1257. return rv;
  1258. }
  1259. /* fall through */
  1260. case SFPARSE_STATE_LIST_AFTER:
  1261. rv = parser_next_key_or_item(sfp);
  1262. if (rv != 0) {
  1263. return rv;
  1264. }
  1265. break;
  1266. case SFPARSE_STATE_INITIAL:
  1267. parser_discard_sp(sfp);
  1268. if (parser_eof(sfp)) {
  1269. return SFPARSE_ERR_EOF;
  1270. }
  1271. break;
  1272. default:
  1273. assert(0);
  1274. abort();
  1275. }
  1276. if (*sfp->pos == '(') {
  1277. if (dest) {
  1278. dest->type = SFPARSE_TYPE_INNER_LIST;
  1279. dest->flags = SFPARSE_VALUE_FLAG_NONE;
  1280. }
  1281. ++sfp->pos;
  1282. sfp->state = SFPARSE_STATE_LIST_INNER_LIST_BEFORE;
  1283. return 0;
  1284. }
  1285. rv = parser_bare_item(sfp, dest);
  1286. if (rv != 0) {
  1287. return rv;
  1288. }
  1289. sfp->state = SFPARSE_STATE_LIST_BEFORE_PARAMS;
  1290. return 0;
  1291. }
  1292. int sfparse_parser_item(sfparse_parser *sfp, sfparse_value *dest) {
  1293. int rv;
  1294. switch (sfp->state) {
  1295. case SFPARSE_STATE_INITIAL:
  1296. parser_discard_sp(sfp);
  1297. if (parser_eof(sfp)) {
  1298. return SFPARSE_ERR_PARSE;
  1299. }
  1300. break;
  1301. case SFPARSE_STATE_ITEM_INNER_LIST_BEFORE:
  1302. rv = parser_skip_inner_list(sfp);
  1303. if (rv != 0) {
  1304. return rv;
  1305. }
  1306. /* fall through */
  1307. case SFPARSE_STATE_ITEM_BEFORE_PARAMS:
  1308. rv = parser_skip_params(sfp);
  1309. if (rv != 0) {
  1310. return rv;
  1311. }
  1312. /* fall through */
  1313. case SFPARSE_STATE_ITEM_AFTER:
  1314. parser_discard_sp(sfp);
  1315. if (!parser_eof(sfp)) {
  1316. return SFPARSE_ERR_PARSE;
  1317. }
  1318. return SFPARSE_ERR_EOF;
  1319. default:
  1320. assert(0);
  1321. abort();
  1322. }
  1323. if (*sfp->pos == '(') {
  1324. if (dest) {
  1325. dest->type = SFPARSE_TYPE_INNER_LIST;
  1326. dest->flags = SFPARSE_VALUE_FLAG_NONE;
  1327. }
  1328. ++sfp->pos;
  1329. sfp->state = SFPARSE_STATE_ITEM_INNER_LIST_BEFORE;
  1330. return 0;
  1331. }
  1332. rv = parser_bare_item(sfp, dest);
  1333. if (rv != 0) {
  1334. return rv;
  1335. }
  1336. sfp->state = SFPARSE_STATE_ITEM_BEFORE_PARAMS;
  1337. return 0;
  1338. }
  1339. void sfparse_parser_init(sfparse_parser *sfp, const uint8_t *data,
  1340. size_t datalen) {
  1341. if (datalen == 0) {
  1342. sfp->pos = sfp->end = NULL;
  1343. } else {
  1344. sfp->pos = data;
  1345. sfp->end = data + datalen;
  1346. }
  1347. sfp->state = SFPARSE_STATE_INITIAL;
  1348. }
  1349. void sfparse_unescape(sfparse_vec *dest, const sfparse_vec *src) {
  1350. const uint8_t *p, *q;
  1351. uint8_t *o;
  1352. size_t len, slen;
  1353. if (src->len == 0) {
  1354. dest->len = 0;
  1355. return;
  1356. }
  1357. o = dest->base;
  1358. p = src->base;
  1359. len = src->len;
  1360. for (;;) {
  1361. q = memchr(p, '\\', len);
  1362. if (q == NULL) {
  1363. memcpy(o, p, len);
  1364. o += len;
  1365. dest->len = (size_t)(o - dest->base);
  1366. return;
  1367. }
  1368. slen = (size_t)(q - p);
  1369. memcpy(o, p, slen);
  1370. o += slen;
  1371. p = q + 1;
  1372. *o++ = *p++;
  1373. len -= slen + 2;
  1374. }
  1375. }
  1376. void sfparse_base64decode(sfparse_vec *dest, const sfparse_vec *src) {
  1377. static const int index_tbl[] = {
  1378. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  1379. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  1380. -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
  1381. 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
  1382. 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1,
  1383. -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
  1384. 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  1385. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  1386. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  1387. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  1388. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  1389. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  1390. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  1391. -1, -1, -1, -1, -1, -1, -1, -1, -1};
  1392. uint8_t *o;
  1393. const uint8_t *p, *end;
  1394. uint32_t n;
  1395. size_t i, left;
  1396. int idx;
  1397. if (src->len == 0) {
  1398. dest->len = 0;
  1399. return;
  1400. }
  1401. o = dest->base;
  1402. p = src->base;
  1403. left = src->len & 0x3;
  1404. if (left == 0 && src->base[src->len - 1] == '=') {
  1405. left = 4;
  1406. }
  1407. end = src->base + src->len - left;
  1408. for (; p != end;) {
  1409. n = 0;
  1410. for (i = 1; i <= 4; ++i, ++p) {
  1411. idx = index_tbl[*p];
  1412. assert(idx != -1);
  1413. n += (uint32_t)(idx << (24 - i * 6));
  1414. }
  1415. *o++ = (uint8_t)(n >> 16);
  1416. *o++ = (n >> 8) & 0xffu;
  1417. *o++ = n & 0xffu;
  1418. }
  1419. switch (left) {
  1420. case 0:
  1421. goto fin;
  1422. case 1:
  1423. assert(0);
  1424. abort();
  1425. case 3:
  1426. if (src->base[src->len - 1] == '=') {
  1427. left = 2;
  1428. }
  1429. break;
  1430. case 4:
  1431. assert('=' == src->base[src->len - 1]);
  1432. if (src->base[src->len - 2] == '=') {
  1433. left = 2;
  1434. } else {
  1435. left = 3;
  1436. }
  1437. break;
  1438. }
  1439. switch (left) {
  1440. case 2:
  1441. *o = (uint8_t)(index_tbl[*p++] << 2);
  1442. *o++ |= (uint8_t)(index_tbl[*p++] >> 4);
  1443. break;
  1444. case 3:
  1445. n = (uint32_t)(index_tbl[*p++] << 10);
  1446. n += (uint32_t)(index_tbl[*p++] << 4);
  1447. n += (uint32_t)(index_tbl[*p++] >> 2);
  1448. *o++ = (n >> 8) & 0xffu;
  1449. *o++ = n & 0xffu;
  1450. break;
  1451. }
  1452. fin:
  1453. dest->len = (size_t)(o - dest->base);
  1454. }
  1455. void sfparse_pctdecode(sfparse_vec *dest, const sfparse_vec *src) {
  1456. const uint8_t *p, *q;
  1457. uint8_t *o;
  1458. size_t len, slen;
  1459. if (src->len == 0) {
  1460. dest->len = 0;
  1461. return;
  1462. }
  1463. o = dest->base;
  1464. p = src->base;
  1465. len = src->len;
  1466. for (;;) {
  1467. q = memchr(p, '%', len);
  1468. if (q == NULL) {
  1469. memcpy(o, p, len);
  1470. o += len;
  1471. dest->len = (size_t)(o - dest->base);
  1472. return;
  1473. }
  1474. slen = (size_t)(q - p);
  1475. memcpy(o, p, slen);
  1476. o += slen;
  1477. p = q + 1;
  1478. pctdecode(o++, &p);
  1479. len -= slen + 3;
  1480. }
  1481. }