12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787 |
- /*
- * sfparse
- *
- * Copyright (c) 2023 sfparse contributors
- * Copyright (c) 2019 nghttp3 contributors
- * Copyright (c) 2015 nghttp2 contributors
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
- #include "sfparse.h"
- #include <string.h>
- #include <assert.h>
- #include <stdlib.h>
- #ifdef __AVX2__
- # include <immintrin.h>
- #endif /* __AVX2__ */
- #define SFPARSE_STATE_DICT 0x08u
- #define SFPARSE_STATE_LIST 0x10u
- #define SFPARSE_STATE_ITEM 0x18u
- #define SFPARSE_STATE_INNER_LIST 0x04u
- #define SFPARSE_STATE_BEFORE 0x00u
- #define SFPARSE_STATE_BEFORE_PARAMS 0x01u
- #define SFPARSE_STATE_PARAMS 0x02u
- #define SFPARSE_STATE_AFTER 0x03u
- #define SFPARSE_STATE_OP_MASK 0x03u
- #define SFPARSE_SET_STATE_AFTER(NAME) \
- (SFPARSE_STATE_##NAME | SFPARSE_STATE_AFTER)
- #define SFPARSE_SET_STATE_BEFORE_PARAMS(NAME) \
- (SFPARSE_STATE_##NAME | SFPARSE_STATE_BEFORE_PARAMS)
- #define SFPARSE_SET_STATE_INNER_LIST_BEFORE(NAME) \
- (SFPARSE_STATE_##NAME | SFPARSE_STATE_INNER_LIST | SFPARSE_STATE_BEFORE)
- #define SFPARSE_STATE_DICT_AFTER SFPARSE_SET_STATE_AFTER(DICT)
- #define SFPARSE_STATE_DICT_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(DICT)
- #define SFPARSE_STATE_DICT_INNER_LIST_BEFORE \
- SFPARSE_SET_STATE_INNER_LIST_BEFORE(DICT)
- #define SFPARSE_STATE_LIST_AFTER SFPARSE_SET_STATE_AFTER(LIST)
- #define SFPARSE_STATE_LIST_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(LIST)
- #define SFPARSE_STATE_LIST_INNER_LIST_BEFORE \
- SFPARSE_SET_STATE_INNER_LIST_BEFORE(LIST)
- #define SFPARSE_STATE_ITEM_AFTER SFPARSE_SET_STATE_AFTER(ITEM)
- #define SFPARSE_STATE_ITEM_BEFORE_PARAMS SFPARSE_SET_STATE_BEFORE_PARAMS(ITEM)
- #define SFPARSE_STATE_ITEM_INNER_LIST_BEFORE \
- SFPARSE_SET_STATE_INNER_LIST_BEFORE(ITEM)
- #define SFPARSE_STATE_INITIAL 0x00u
- #define DIGIT_CASES \
- case '0': \
- case '1': \
- case '2': \
- case '3': \
- case '4': \
- case '5': \
- case '6': \
- case '7': \
- case '8': \
- case '9'
- #define LCALPHA_CASES \
- case 'a': \
- case 'b': \
- case 'c': \
- case 'd': \
- case 'e': \
- case 'f': \
- case 'g': \
- case 'h': \
- case 'i': \
- case 'j': \
- case 'k': \
- case 'l': \
- case 'm': \
- case 'n': \
- case 'o': \
- case 'p': \
- case 'q': \
- case 'r': \
- case 's': \
- case 't': \
- case 'u': \
- case 'v': \
- case 'w': \
- case 'x': \
- case 'y': \
- case 'z'
- #define UCALPHA_CASES \
- case 'A': \
- case 'B': \
- case 'C': \
- case 'D': \
- case 'E': \
- case 'F': \
- case 'G': \
- case 'H': \
- case 'I': \
- case 'J': \
- case 'K': \
- case 'L': \
- case 'M': \
- case 'N': \
- case 'O': \
- case 'P': \
- case 'Q': \
- case 'R': \
- case 'S': \
- case 'T': \
- case 'U': \
- case 'V': \
- case 'W': \
- case 'X': \
- case 'Y': \
- case 'Z'
- #define ALPHA_CASES \
- UCALPHA_CASES: \
- LCALPHA_CASES
- #define TOKEN_CASES \
- case '!': \
- case '#': \
- case '$': \
- case '%': \
- case '&': \
- case '\'': \
- case '*': \
- case '+': \
- case '-': \
- case '.': \
- case '/': \
- DIGIT_CASES: \
- case ':': \
- UCALPHA_CASES: \
- case '^': \
- case '_': \
- case '`': \
- LCALPHA_CASES: \
- case '|': \
- case '~'
- #define LCHEXALPHA_CASES \
- case 'a': \
- case 'b': \
- case 'c': \
- case 'd': \
- case 'e': \
- case 'f'
- #define X00_1F_CASES \
- case 0x00: \
- case 0x01: \
- case 0x02: \
- case 0x03: \
- case 0x04: \
- case 0x05: \
- case 0x06: \
- case 0x07: \
- case 0x08: \
- case 0x09: \
- case 0x0a: \
- case 0x0b: \
- case 0x0c: \
- case 0x0d: \
- case 0x0e: \
- case 0x0f: \
- case 0x10: \
- case 0x11: \
- case 0x12: \
- case 0x13: \
- case 0x14: \
- case 0x15: \
- case 0x16: \
- case 0x17: \
- case 0x18: \
- case 0x19: \
- case 0x1a: \
- case 0x1b: \
- case 0x1c: \
- case 0x1d: \
- case 0x1e: \
- case 0x1f
- #define X20_21_CASES \
- case ' ': \
- case '!'
- #define X23_5B_CASES \
- case '#': \
- case '$': \
- case '%': \
- case '&': \
- case '\'': \
- case '(': \
- case ')': \
- case '*': \
- case '+': \
- case ',': \
- case '-': \
- case '.': \
- case '/': \
- DIGIT_CASES: \
- case ':': \
- case ';': \
- case '<': \
- case '=': \
- case '>': \
- case '?': \
- case '@': \
- UCALPHA_CASES: \
- case '['
- #define X5D_7E_CASES \
- case ']': \
- case '^': \
- case '_': \
- case '`': \
- LCALPHA_CASES: \
- case '{': \
- case '|': \
- case '}': \
- case '~'
- #define X7F_FF_CASES \
- case 0x7f: \
- case 0x80: \
- case 0x81: \
- case 0x82: \
- case 0x83: \
- case 0x84: \
- case 0x85: \
- case 0x86: \
- case 0x87: \
- case 0x88: \
- case 0x89: \
- case 0x8a: \
- case 0x8b: \
- case 0x8c: \
- case 0x8d: \
- case 0x8e: \
- case 0x8f: \
- case 0x90: \
- case 0x91: \
- case 0x92: \
- case 0x93: \
- case 0x94: \
- case 0x95: \
- case 0x96: \
- case 0x97: \
- case 0x98: \
- case 0x99: \
- case 0x9a: \
- case 0x9b: \
- case 0x9c: \
- case 0x9d: \
- case 0x9e: \
- case 0x9f: \
- case 0xa0: \
- case 0xa1: \
- case 0xa2: \
- case 0xa3: \
- case 0xa4: \
- case 0xa5: \
- case 0xa6: \
- case 0xa7: \
- case 0xa8: \
- case 0xa9: \
- case 0xaa: \
- case 0xab: \
- case 0xac: \
- case 0xad: \
- case 0xae: \
- case 0xaf: \
- case 0xb0: \
- case 0xb1: \
- case 0xb2: \
- case 0xb3: \
- case 0xb4: \
- case 0xb5: \
- case 0xb6: \
- case 0xb7: \
- case 0xb8: \
- case 0xb9: \
- case 0xba: \
- case 0xbb: \
- case 0xbc: \
- case 0xbd: \
- case 0xbe: \
- case 0xbf: \
- case 0xc0: \
- case 0xc1: \
- case 0xc2: \
- case 0xc3: \
- case 0xc4: \
- case 0xc5: \
- case 0xc6: \
- case 0xc7: \
- case 0xc8: \
- case 0xc9: \
- case 0xca: \
- case 0xcb: \
- case 0xcc: \
- case 0xcd: \
- case 0xce: \
- case 0xcf: \
- case 0xd0: \
- case 0xd1: \
- case 0xd2: \
- case 0xd3: \
- case 0xd4: \
- case 0xd5: \
- case 0xd6: \
- case 0xd7: \
- case 0xd8: \
- case 0xd9: \
- case 0xda: \
- case 0xdb: \
- case 0xdc: \
- case 0xdd: \
- case 0xde: \
- case 0xdf: \
- case 0xe0: \
- case 0xe1: \
- case 0xe2: \
- case 0xe3: \
- case 0xe4: \
- case 0xe5: \
- case 0xe6: \
- case 0xe7: \
- case 0xe8: \
- case 0xe9: \
- case 0xea: \
- case 0xeb: \
- case 0xec: \
- case 0xed: \
- case 0xee: \
- case 0xef: \
- case 0xf0: \
- case 0xf1: \
- case 0xf2: \
- case 0xf3: \
- case 0xf4: \
- case 0xf5: \
- case 0xf6: \
- case 0xf7: \
- case 0xf8: \
- case 0xf9: \
- case 0xfa: \
- case 0xfb: \
- case 0xfc: \
- case 0xfd: \
- case 0xfe: \
- case 0xff
- static int is_ws(uint8_t c) {
- switch (c) {
- case ' ':
- case '\t':
- return 1;
- default:
- return 0;
- }
- }
- #ifdef __AVX2__
- # ifdef _MSC_VER
- # include <intrin.h>
- static int ctz(unsigned int v) {
- unsigned long n;
- /* Assume that v is not 0. */
- _BitScanForward(&n, v);
- return (int)n;
- }
- # else /* !_MSC_VER */
- # define ctz __builtin_ctz
- # endif /* !_MSC_VER */
- #endif /* __AVX2__ */
- static int parser_eof(sfparse_parser *sfp) { return sfp->pos == sfp->end; }
- static void parser_discard_ows(sfparse_parser *sfp) {
- for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos)
- ;
- }
- static void parser_discard_sp(sfparse_parser *sfp) {
- for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos)
- ;
- }
- static void parser_set_op_state(sfparse_parser *sfp, uint32_t op) {
- sfp->state &= ~SFPARSE_STATE_OP_MASK;
- sfp->state |= op;
- }
- static void parser_unset_inner_list_state(sfparse_parser *sfp) {
- sfp->state &= ~SFPARSE_STATE_INNER_LIST;
- }
- #ifdef __AVX2__
- static const uint8_t *find_char_key(const uint8_t *first, const uint8_t *last) {
- const __m256i us = _mm256_set1_epi8('_');
- const __m256i ds = _mm256_set1_epi8('-');
- const __m256i dot = _mm256_set1_epi8('.');
- const __m256i ast = _mm256_set1_epi8('*');
- const __m256i r0l = _mm256_set1_epi8('0' - 1);
- const __m256i r0r = _mm256_set1_epi8('9' + 1);
- const __m256i r1l = _mm256_set1_epi8('a' - 1);
- const __m256i r1r = _mm256_set1_epi8('z' + 1);
- __m256i s, x;
- uint32_t m;
- for (; first != last; first += 32) {
- s = _mm256_loadu_si256((void *)first);
- x = _mm256_cmpeq_epi8(s, us);
- x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ds), x);
- x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dot), x);
- x = _mm256_or_si256(_mm256_cmpeq_epi8(s, ast), x);
- x = _mm256_or_si256(
- _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
- x);
- x = _mm256_or_si256(
- _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
- x);
- m = ~(uint32_t)_mm256_movemask_epi8(x);
- if (m) {
- return first + ctz(m);
- }
- }
- return last;
- }
- #endif /* __AVX2__ */
- static int parser_key(sfparse_parser *sfp, sfparse_vec *dest) {
- const uint8_t *base;
- #ifdef __AVX2__
- const uint8_t *last;
- #endif /* __AVX2__ */
- switch (*sfp->pos) {
- case '*':
- LCALPHA_CASES:
- break;
- default:
- return SFPARSE_ERR_PARSE;
- }
- base = sfp->pos++;
- #ifdef __AVX2__
- if (sfp->end - sfp->pos >= 32) {
- last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
- sfp->pos = find_char_key(sfp->pos, last);
- if (sfp->pos != last) {
- goto fin;
- }
- }
- #endif /* __AVX2__ */
- for (; !parser_eof(sfp); ++sfp->pos) {
- switch (*sfp->pos) {
- case '_':
- case '-':
- case '.':
- case '*':
- DIGIT_CASES:
- LCALPHA_CASES:
- continue;
- }
- break;
- }
- #ifdef __AVX2__
- fin:
- #endif /* __AVX2__ */
- if (dest) {
- dest->base = (uint8_t *)base;
- dest->len = (size_t)(sfp->pos - dest->base);
- }
- return 0;
- }
- static int parser_number(sfparse_parser *sfp, sfparse_value *dest) {
- int sign = 1;
- int64_t value = 0;
- size_t len = 0;
- size_t fpos = 0;
- if (*sfp->pos == '-') {
- ++sfp->pos;
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- sign = -1;
- }
- assert(!parser_eof(sfp));
- for (; !parser_eof(sfp); ++sfp->pos) {
- switch (*sfp->pos) {
- DIGIT_CASES:
- if (++len > 15) {
- return SFPARSE_ERR_PARSE;
- }
- value *= 10;
- value += *sfp->pos - '0';
- continue;
- }
- break;
- }
- if (len == 0) {
- return SFPARSE_ERR_PARSE;
- }
- if (parser_eof(sfp) || *sfp->pos != '.') {
- if (dest) {
- dest->type = SFPARSE_TYPE_INTEGER;
- dest->flags = SFPARSE_VALUE_FLAG_NONE;
- dest->integer = value * sign;
- }
- return 0;
- }
- /* decimal */
- if (len > 12) {
- return SFPARSE_ERR_PARSE;
- }
- fpos = len;
- ++sfp->pos;
- for (; !parser_eof(sfp); ++sfp->pos) {
- switch (*sfp->pos) {
- DIGIT_CASES:
- if (++len > 15) {
- return SFPARSE_ERR_PARSE;
- }
- value *= 10;
- value += *sfp->pos - '0';
- continue;
- }
- break;
- }
- if (fpos == len || len - fpos > 3) {
- return SFPARSE_ERR_PARSE;
- }
- if (dest) {
- dest->type = SFPARSE_TYPE_DECIMAL;
- dest->flags = SFPARSE_VALUE_FLAG_NONE;
- dest->decimal.numer = value * sign;
- switch (len - fpos) {
- case 1:
- dest->decimal.denom = 10;
- break;
- case 2:
- dest->decimal.denom = 100;
- break;
- case 3:
- dest->decimal.denom = 1000;
- break;
- }
- }
- return 0;
- }
- static int parser_date(sfparse_parser *sfp, sfparse_value *dest) {
- int rv;
- sfparse_value val;
- /* The first byte has already been validated by the caller. */
- assert('@' == *sfp->pos);
- ++sfp->pos;
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- rv = parser_number(sfp, &val);
- if (rv != 0) {
- return rv;
- }
- if (val.type != SFPARSE_TYPE_INTEGER) {
- return SFPARSE_ERR_PARSE;
- }
- if (dest) {
- *dest = val;
- dest->type = SFPARSE_TYPE_DATE;
- }
- return 0;
- }
- #ifdef __AVX2__
- static const uint8_t *find_char_string(const uint8_t *first,
- const uint8_t *last) {
- const __m256i bs = _mm256_set1_epi8('\\');
- const __m256i dq = _mm256_set1_epi8('"');
- const __m256i del = _mm256_set1_epi8(0x7f);
- const __m256i sp = _mm256_set1_epi8(' ');
- __m256i s, x;
- uint32_t m;
- for (; first != last; first += 32) {
- s = _mm256_loadu_si256((void *)first);
- x = _mm256_cmpgt_epi8(sp, s);
- x = _mm256_or_si256(_mm256_cmpeq_epi8(s, bs), x);
- x = _mm256_or_si256(_mm256_cmpeq_epi8(s, dq), x);
- x = _mm256_or_si256(_mm256_cmpeq_epi8(s, del), x);
- m = (uint32_t)_mm256_movemask_epi8(x);
- if (m) {
- return first + ctz(m);
- }
- }
- return last;
- }
- #endif /* __AVX2__ */
- static int parser_string(sfparse_parser *sfp, sfparse_value *dest) {
- const uint8_t *base;
- #ifdef __AVX2__
- const uint8_t *last;
- #endif /* __AVX2__ */
- uint32_t flags = SFPARSE_VALUE_FLAG_NONE;
- /* The first byte has already been validated by the caller. */
- assert('"' == *sfp->pos);
- base = ++sfp->pos;
- #ifdef __AVX2__
- for (; sfp->end - sfp->pos >= 32; ++sfp->pos) {
- last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
- sfp->pos = find_char_string(sfp->pos, last);
- if (sfp->pos == last) {
- break;
- }
- switch (*sfp->pos) {
- case '\\':
- ++sfp->pos;
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- switch (*sfp->pos) {
- case '"':
- case '\\':
- flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
- break;
- default:
- return SFPARSE_ERR_PARSE;
- }
- break;
- case '"':
- goto fin;
- default:
- return SFPARSE_ERR_PARSE;
- }
- }
- #endif /* __AVX2__ */
- for (; !parser_eof(sfp); ++sfp->pos) {
- switch (*sfp->pos) {
- X20_21_CASES:
- X23_5B_CASES:
- X5D_7E_CASES:
- break;
- case '\\':
- ++sfp->pos;
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- switch (*sfp->pos) {
- case '"':
- case '\\':
- flags = SFPARSE_VALUE_FLAG_ESCAPED_STRING;
- break;
- default:
- return SFPARSE_ERR_PARSE;
- }
- break;
- case '"':
- goto fin;
- default:
- return SFPARSE_ERR_PARSE;
- }
- }
- return SFPARSE_ERR_PARSE;
- fin:
- if (dest) {
- dest->type = SFPARSE_TYPE_STRING;
- dest->flags = flags;
- dest->vec.len = (size_t)(sfp->pos - base);
- dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
- }
- ++sfp->pos;
- return 0;
- }
- #ifdef __AVX2__
- static const uint8_t *find_char_token(const uint8_t *first,
- const uint8_t *last) {
- /* r0: !..:, excluding "(),
- r1: A..Z
- r2: ^..~, excluding {} */
- const __m256i r0l = _mm256_set1_epi8('!' - 1);
- const __m256i r0r = _mm256_set1_epi8(':' + 1);
- const __m256i dq = _mm256_set1_epi8('"');
- const __m256i prl = _mm256_set1_epi8('(');
- const __m256i prr = _mm256_set1_epi8(')');
- const __m256i comma = _mm256_set1_epi8(',');
- const __m256i r1l = _mm256_set1_epi8('A' - 1);
- const __m256i r1r = _mm256_set1_epi8('Z' + 1);
- const __m256i r2l = _mm256_set1_epi8('^' - 1);
- const __m256i r2r = _mm256_set1_epi8('~' + 1);
- const __m256i cbl = _mm256_set1_epi8('{');
- const __m256i cbr = _mm256_set1_epi8('}');
- __m256i s, x;
- uint32_t m;
- for (; first != last; first += 32) {
- s = _mm256_loadu_si256((void *)first);
- x = _mm256_andnot_si256(
- _mm256_cmpeq_epi8(s, comma),
- _mm256_andnot_si256(
- _mm256_cmpeq_epi8(s, prr),
- _mm256_andnot_si256(
- _mm256_cmpeq_epi8(s, prl),
- _mm256_andnot_si256(_mm256_cmpeq_epi8(s, dq),
- _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l),
- _mm256_cmpgt_epi8(r0r, s))))));
- x = _mm256_or_si256(
- _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
- x);
- x = _mm256_or_si256(
- _mm256_andnot_si256(
- _mm256_cmpeq_epi8(s, cbr),
- _mm256_andnot_si256(_mm256_cmpeq_epi8(s, cbl),
- _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l),
- _mm256_cmpgt_epi8(r2r, s)))),
- x);
- m = ~(uint32_t)_mm256_movemask_epi8(x);
- if (m) {
- return first + ctz(m);
- }
- }
- return last;
- }
- #endif /* __AVX2__ */
- static int parser_token(sfparse_parser *sfp, sfparse_value *dest) {
- const uint8_t *base;
- #ifdef __AVX2__
- const uint8_t *last;
- #endif /* __AVX2__ */
- /* The first byte has already been validated by the caller. */
- base = sfp->pos++;
- #ifdef __AVX2__
- if (sfp->end - sfp->pos >= 32) {
- last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
- sfp->pos = find_char_token(sfp->pos, last);
- if (sfp->pos != last) {
- goto fin;
- }
- }
- #endif /* __AVX2__ */
- for (; !parser_eof(sfp); ++sfp->pos) {
- switch (*sfp->pos) {
- TOKEN_CASES:
- continue;
- }
- break;
- }
- #ifdef __AVX2__
- fin:
- #endif /* __AVX2__ */
- if (dest) {
- dest->type = SFPARSE_TYPE_TOKEN;
- dest->flags = SFPARSE_VALUE_FLAG_NONE;
- dest->vec.base = (uint8_t *)base;
- dest->vec.len = (size_t)(sfp->pos - base);
- }
- return 0;
- }
- #ifdef __AVX2__
- static const uint8_t *find_char_byteseq(const uint8_t *first,
- const uint8_t *last) {
- const __m256i pls = _mm256_set1_epi8('+');
- const __m256i fs = _mm256_set1_epi8('/');
- const __m256i r0l = _mm256_set1_epi8('0' - 1);
- const __m256i r0r = _mm256_set1_epi8('9' + 1);
- const __m256i r1l = _mm256_set1_epi8('A' - 1);
- const __m256i r1r = _mm256_set1_epi8('Z' + 1);
- const __m256i r2l = _mm256_set1_epi8('a' - 1);
- const __m256i r2r = _mm256_set1_epi8('z' + 1);
- __m256i s, x;
- uint32_t m;
- for (; first != last; first += 32) {
- s = _mm256_loadu_si256((void *)first);
- x = _mm256_cmpeq_epi8(s, pls);
- x = _mm256_or_si256(_mm256_cmpeq_epi8(s, fs), x);
- x = _mm256_or_si256(
- _mm256_and_si256(_mm256_cmpgt_epi8(s, r0l), _mm256_cmpgt_epi8(r0r, s)),
- x);
- x = _mm256_or_si256(
- _mm256_and_si256(_mm256_cmpgt_epi8(s, r1l), _mm256_cmpgt_epi8(r1r, s)),
- x);
- x = _mm256_or_si256(
- _mm256_and_si256(_mm256_cmpgt_epi8(s, r2l), _mm256_cmpgt_epi8(r2r, s)),
- x);
- m = ~(uint32_t)_mm256_movemask_epi8(x);
- if (m) {
- return first + ctz(m);
- }
- }
- return last;
- }
- #endif /* __AVX2__ */
- static int parser_byteseq(sfparse_parser *sfp, sfparse_value *dest) {
- const uint8_t *base;
- #ifdef __AVX2__
- const uint8_t *last;
- #endif /* __AVX2__ */
- /* The first byte has already been validated by the caller. */
- assert(':' == *sfp->pos);
- base = ++sfp->pos;
- #ifdef __AVX2__
- if (sfp->end - sfp->pos >= 32) {
- last = sfp->pos + ((sfp->end - sfp->pos) & ~0x1fu);
- sfp->pos = find_char_byteseq(sfp->pos, last);
- }
- #endif /* __AVX2__ */
- for (; !parser_eof(sfp); ++sfp->pos) {
- switch (*sfp->pos) {
- case '+':
- case '/':
- DIGIT_CASES:
- ALPHA_CASES:
- continue;
- case '=':
- switch ((sfp->pos - base) & 0x3) {
- case 0:
- case 1:
- return SFPARSE_ERR_PARSE;
- case 2:
- ++sfp->pos;
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- if (*sfp->pos == '=') {
- ++sfp->pos;
- }
- break;
- case 3:
- ++sfp->pos;
- break;
- }
- if (parser_eof(sfp) || *sfp->pos != ':') {
- return SFPARSE_ERR_PARSE;
- }
- goto fin;
- case ':':
- if (((sfp->pos - base) & 0x3) == 1) {
- return SFPARSE_ERR_PARSE;
- }
- goto fin;
- default:
- return SFPARSE_ERR_PARSE;
- }
- }
- return SFPARSE_ERR_PARSE;
- fin:
- if (dest) {
- dest->type = SFPARSE_TYPE_BYTESEQ;
- dest->flags = SFPARSE_VALUE_FLAG_NONE;
- dest->vec.len = (size_t)(sfp->pos - base);
- dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
- }
- ++sfp->pos;
- return 0;
- }
- static int parser_boolean(sfparse_parser *sfp, sfparse_value *dest) {
- int b;
- /* The first byte has already been validated by the caller. */
- assert('?' == *sfp->pos);
- ++sfp->pos;
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- switch (*sfp->pos) {
- case '0':
- b = 0;
- break;
- case '1':
- b = 1;
- break;
- default:
- return SFPARSE_ERR_PARSE;
- }
- ++sfp->pos;
- if (dest) {
- dest->type = SFPARSE_TYPE_BOOLEAN;
- dest->flags = SFPARSE_VALUE_FLAG_NONE;
- dest->boolean = b;
- }
- return 0;
- }
- static int pctdecode(uint8_t *pc, const uint8_t **ppos) {
- uint8_t c, b = **ppos;
- switch (b) {
- DIGIT_CASES:
- c = (uint8_t)((b - '0') << 4);
- break;
- LCHEXALPHA_CASES:
- c = (uint8_t)((b - 'a' + 10) << 4);
- break;
- default:
- return -1;
- }
- b = *++*ppos;
- switch (b) {
- DIGIT_CASES:
- c |= (uint8_t)(b - '0');
- break;
- LCHEXALPHA_CASES:
- c |= (uint8_t)(b - 'a' + 10);
- break;
- default:
- return -1;
- }
- *pc = c;
- ++*ppos;
- return 0;
- }
- /* Start of utf8 dfa */
- /* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
- * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
- *
- * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
- #define UTF8_ACCEPT 0
- #define UTF8_REJECT 12
- /* clang-format off */
- static const uint8_t utf8d[] = {
- /*
- * The first part of the table maps bytes to character classes that
- * to reduce the size of the transition table and create bitmasks.
- */
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
- 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
- 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
- /*
- * The second part is a transition table that maps a combination
- * of a state of the automaton and a character class to a state.
- */
- 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
- 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
- 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
- 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
- 12,36,12,12,12,12,12,12,12,12,12,12,
- };
- /* clang-format on */
- static void utf8_decode(uint32_t *state, uint8_t byte) {
- *state = utf8d[256 + *state + utf8d[byte]];
- }
- /* End of utf8 dfa */
- static int parser_dispstring(sfparse_parser *sfp, sfparse_value *dest) {
- const uint8_t *base;
- uint8_t c;
- uint32_t utf8state = UTF8_ACCEPT;
- assert('%' == *sfp->pos);
- ++sfp->pos;
- if (parser_eof(sfp) || *sfp->pos != '"') {
- return SFPARSE_ERR_PARSE;
- }
- base = ++sfp->pos;
- for (; !parser_eof(sfp);) {
- switch (*sfp->pos) {
- X00_1F_CASES:
- X7F_FF_CASES:
- return SFPARSE_ERR_PARSE;
- case '%':
- ++sfp->pos;
- if (sfp->pos + 2 > sfp->end) {
- return SFPARSE_ERR_PARSE;
- }
- if (pctdecode(&c, &sfp->pos) != 0) {
- return SFPARSE_ERR_PARSE;
- }
- utf8_decode(&utf8state, c);
- if (utf8state == UTF8_REJECT) {
- return SFPARSE_ERR_PARSE;
- }
- break;
- case '"':
- if (utf8state != UTF8_ACCEPT) {
- return SFPARSE_ERR_PARSE;
- }
- if (dest) {
- dest->type = SFPARSE_TYPE_DISPSTRING;
- dest->flags = SFPARSE_VALUE_FLAG_NONE;
- dest->vec.len = (size_t)(sfp->pos - base);
- dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base;
- }
- ++sfp->pos;
- return 0;
- default:
- if (utf8state != UTF8_ACCEPT) {
- return SFPARSE_ERR_PARSE;
- }
- ++sfp->pos;
- }
- }
- return SFPARSE_ERR_PARSE;
- }
- static int parser_bare_item(sfparse_parser *sfp, sfparse_value *dest) {
- switch (*sfp->pos) {
- case '"':
- return parser_string(sfp, dest);
- case '-':
- DIGIT_CASES:
- return parser_number(sfp, dest);
- case '@':
- return parser_date(sfp, dest);
- case ':':
- return parser_byteseq(sfp, dest);
- case '?':
- return parser_boolean(sfp, dest);
- case '*':
- ALPHA_CASES:
- return parser_token(sfp, dest);
- case '%':
- return parser_dispstring(sfp, dest);
- default:
- return SFPARSE_ERR_PARSE;
- }
- }
- static int parser_skip_inner_list(sfparse_parser *sfp);
- int sfparse_parser_param(sfparse_parser *sfp, sfparse_vec *dest_key,
- sfparse_value *dest_value) {
- int rv;
- switch (sfp->state & SFPARSE_STATE_OP_MASK) {
- case SFPARSE_STATE_BEFORE:
- rv = parser_skip_inner_list(sfp);
- if (rv != 0) {
- return rv;
- }
- /* fall through */
- case SFPARSE_STATE_BEFORE_PARAMS:
- parser_set_op_state(sfp, SFPARSE_STATE_PARAMS);
- break;
- case SFPARSE_STATE_PARAMS:
- break;
- default:
- assert(0);
- abort();
- }
- if (parser_eof(sfp) || *sfp->pos != ';') {
- parser_set_op_state(sfp, SFPARSE_STATE_AFTER);
- return SFPARSE_ERR_EOF;
- }
- ++sfp->pos;
- parser_discard_sp(sfp);
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- rv = parser_key(sfp, dest_key);
- if (rv != 0) {
- return rv;
- }
- if (parser_eof(sfp) || *sfp->pos != '=') {
- if (dest_value) {
- dest_value->type = SFPARSE_TYPE_BOOLEAN;
- dest_value->flags = SFPARSE_VALUE_FLAG_NONE;
- dest_value->boolean = 1;
- }
- return 0;
- }
- ++sfp->pos;
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- return parser_bare_item(sfp, dest_value);
- }
- static int parser_skip_params(sfparse_parser *sfp) {
- int rv;
- for (;;) {
- rv = sfparse_parser_param(sfp, NULL, NULL);
- switch (rv) {
- case 0:
- break;
- case SFPARSE_ERR_EOF:
- return 0;
- case SFPARSE_ERR_PARSE:
- return rv;
- default:
- assert(0);
- abort();
- }
- }
- }
- int sfparse_parser_inner_list(sfparse_parser *sfp, sfparse_value *dest) {
- int rv;
- switch (sfp->state & SFPARSE_STATE_OP_MASK) {
- case SFPARSE_STATE_BEFORE:
- parser_discard_sp(sfp);
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- break;
- case SFPARSE_STATE_BEFORE_PARAMS:
- rv = parser_skip_params(sfp);
- if (rv != 0) {
- return rv;
- }
- /* Technically, we are entering SFPARSE_STATE_AFTER, but we will set
- another state without reading the state. */
- /* parser_set_op_state(sfp, SFPARSE_STATE_AFTER); */
- /* fall through */
- case SFPARSE_STATE_AFTER:
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- switch (*sfp->pos) {
- case ' ':
- parser_discard_sp(sfp);
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- break;
- case ')':
- break;
- default:
- return SFPARSE_ERR_PARSE;
- }
- break;
- default:
- assert(0);
- abort();
- }
- if (*sfp->pos == ')') {
- ++sfp->pos;
- parser_unset_inner_list_state(sfp);
- parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
- return SFPARSE_ERR_EOF;
- }
- rv = parser_bare_item(sfp, dest);
- if (rv != 0) {
- return rv;
- }
- parser_set_op_state(sfp, SFPARSE_STATE_BEFORE_PARAMS);
- return 0;
- }
- static int parser_skip_inner_list(sfparse_parser *sfp) {
- int rv;
- for (;;) {
- rv = sfparse_parser_inner_list(sfp, NULL);
- switch (rv) {
- case 0:
- break;
- case SFPARSE_ERR_EOF:
- return 0;
- case SFPARSE_ERR_PARSE:
- return rv;
- default:
- assert(0);
- abort();
- }
- }
- }
- static int parser_next_key_or_item(sfparse_parser *sfp) {
- parser_discard_ows(sfp);
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_EOF;
- }
- if (*sfp->pos != ',') {
- return SFPARSE_ERR_PARSE;
- }
- ++sfp->pos;
- parser_discard_ows(sfp);
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- return 0;
- }
- static int parser_dict_value(sfparse_parser *sfp, sfparse_value *dest) {
- int rv;
- if (parser_eof(sfp) || *(sfp->pos) != '=') {
- /* Boolean true */
- if (dest) {
- dest->type = SFPARSE_TYPE_BOOLEAN;
- dest->flags = SFPARSE_VALUE_FLAG_NONE;
- dest->boolean = 1;
- }
- sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
- return 0;
- }
- ++sfp->pos;
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- if (*sfp->pos == '(') {
- if (dest) {
- dest->type = SFPARSE_TYPE_INNER_LIST;
- dest->flags = SFPARSE_VALUE_FLAG_NONE;
- }
- ++sfp->pos;
- sfp->state = SFPARSE_STATE_DICT_INNER_LIST_BEFORE;
- return 0;
- }
- rv = parser_bare_item(sfp, dest);
- if (rv != 0) {
- return rv;
- }
- sfp->state = SFPARSE_STATE_DICT_BEFORE_PARAMS;
- return 0;
- }
- int sfparse_parser_dict(sfparse_parser *sfp, sfparse_vec *dest_key,
- sfparse_value *dest_value) {
- int rv;
- switch (sfp->state) {
- case SFPARSE_STATE_DICT_INNER_LIST_BEFORE:
- rv = parser_skip_inner_list(sfp);
- if (rv != 0) {
- return rv;
- }
- /* fall through */
- case SFPARSE_STATE_DICT_BEFORE_PARAMS:
- rv = parser_skip_params(sfp);
- if (rv != 0) {
- return rv;
- }
- /* fall through */
- case SFPARSE_STATE_DICT_AFTER:
- rv = parser_next_key_or_item(sfp);
- if (rv != 0) {
- return rv;
- }
- break;
- case SFPARSE_STATE_INITIAL:
- parser_discard_sp(sfp);
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_EOF;
- }
- break;
- default:
- assert(0);
- abort();
- }
- rv = parser_key(sfp, dest_key);
- if (rv != 0) {
- return rv;
- }
- return parser_dict_value(sfp, dest_value);
- }
- int sfparse_parser_list(sfparse_parser *sfp, sfparse_value *dest) {
- int rv;
- switch (sfp->state) {
- case SFPARSE_STATE_LIST_INNER_LIST_BEFORE:
- rv = parser_skip_inner_list(sfp);
- if (rv != 0) {
- return rv;
- }
- /* fall through */
- case SFPARSE_STATE_LIST_BEFORE_PARAMS:
- rv = parser_skip_params(sfp);
- if (rv != 0) {
- return rv;
- }
- /* fall through */
- case SFPARSE_STATE_LIST_AFTER:
- rv = parser_next_key_or_item(sfp);
- if (rv != 0) {
- return rv;
- }
- break;
- case SFPARSE_STATE_INITIAL:
- parser_discard_sp(sfp);
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_EOF;
- }
- break;
- default:
- assert(0);
- abort();
- }
- if (*sfp->pos == '(') {
- if (dest) {
- dest->type = SFPARSE_TYPE_INNER_LIST;
- dest->flags = SFPARSE_VALUE_FLAG_NONE;
- }
- ++sfp->pos;
- sfp->state = SFPARSE_STATE_LIST_INNER_LIST_BEFORE;
- return 0;
- }
- rv = parser_bare_item(sfp, dest);
- if (rv != 0) {
- return rv;
- }
- sfp->state = SFPARSE_STATE_LIST_BEFORE_PARAMS;
- return 0;
- }
- int sfparse_parser_item(sfparse_parser *sfp, sfparse_value *dest) {
- int rv;
- switch (sfp->state) {
- case SFPARSE_STATE_INITIAL:
- parser_discard_sp(sfp);
- if (parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- break;
- case SFPARSE_STATE_ITEM_INNER_LIST_BEFORE:
- rv = parser_skip_inner_list(sfp);
- if (rv != 0) {
- return rv;
- }
- /* fall through */
- case SFPARSE_STATE_ITEM_BEFORE_PARAMS:
- rv = parser_skip_params(sfp);
- if (rv != 0) {
- return rv;
- }
- /* fall through */
- case SFPARSE_STATE_ITEM_AFTER:
- parser_discard_sp(sfp);
- if (!parser_eof(sfp)) {
- return SFPARSE_ERR_PARSE;
- }
- return SFPARSE_ERR_EOF;
- default:
- assert(0);
- abort();
- }
- if (*sfp->pos == '(') {
- if (dest) {
- dest->type = SFPARSE_TYPE_INNER_LIST;
- dest->flags = SFPARSE_VALUE_FLAG_NONE;
- }
- ++sfp->pos;
- sfp->state = SFPARSE_STATE_ITEM_INNER_LIST_BEFORE;
- return 0;
- }
- rv = parser_bare_item(sfp, dest);
- if (rv != 0) {
- return rv;
- }
- sfp->state = SFPARSE_STATE_ITEM_BEFORE_PARAMS;
- return 0;
- }
- void sfparse_parser_init(sfparse_parser *sfp, const uint8_t *data,
- size_t datalen) {
- if (datalen == 0) {
- sfp->pos = sfp->end = NULL;
- } else {
- sfp->pos = data;
- sfp->end = data + datalen;
- }
- sfp->state = SFPARSE_STATE_INITIAL;
- }
- void sfparse_unescape(sfparse_vec *dest, const sfparse_vec *src) {
- const uint8_t *p, *q;
- uint8_t *o;
- size_t len, slen;
- if (src->len == 0) {
- dest->len = 0;
- return;
- }
- o = dest->base;
- p = src->base;
- len = src->len;
- for (;;) {
- q = memchr(p, '\\', len);
- if (q == NULL) {
- memcpy(o, p, len);
- o += len;
- dest->len = (size_t)(o - dest->base);
- return;
- }
- slen = (size_t)(q - p);
- memcpy(o, p, slen);
- o += slen;
- p = q + 1;
- *o++ = *p++;
- len -= slen + 2;
- }
- }
- void sfparse_base64decode(sfparse_vec *dest, const sfparse_vec *src) {
- static const int index_tbl[] = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
- 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1,
- -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
- 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1};
- uint8_t *o;
- const uint8_t *p, *end;
- uint32_t n;
- size_t i, left;
- int idx;
- if (src->len == 0) {
- dest->len = 0;
- return;
- }
- o = dest->base;
- p = src->base;
- left = src->len & 0x3;
- if (left == 0 && src->base[src->len - 1] == '=') {
- left = 4;
- }
- end = src->base + src->len - left;
- for (; p != end;) {
- n = 0;
- for (i = 1; i <= 4; ++i, ++p) {
- idx = index_tbl[*p];
- assert(idx != -1);
- n += (uint32_t)(idx << (24 - i * 6));
- }
- *o++ = (uint8_t)(n >> 16);
- *o++ = (n >> 8) & 0xffu;
- *o++ = n & 0xffu;
- }
- switch (left) {
- case 0:
- goto fin;
- case 1:
- assert(0);
- abort();
- case 3:
- if (src->base[src->len - 1] == '=') {
- left = 2;
- }
- break;
- case 4:
- assert('=' == src->base[src->len - 1]);
- if (src->base[src->len - 2] == '=') {
- left = 2;
- } else {
- left = 3;
- }
- break;
- }
- switch (left) {
- case 2:
- *o = (uint8_t)(index_tbl[*p++] << 2);
- *o++ |= (uint8_t)(index_tbl[*p++] >> 4);
- break;
- case 3:
- n = (uint32_t)(index_tbl[*p++] << 10);
- n += (uint32_t)(index_tbl[*p++] << 4);
- n += (uint32_t)(index_tbl[*p++] >> 2);
- *o++ = (n >> 8) & 0xffu;
- *o++ = n & 0xffu;
- break;
- }
- fin:
- dest->len = (size_t)(o - dest->base);
- }
- void sfparse_pctdecode(sfparse_vec *dest, const sfparse_vec *src) {
- const uint8_t *p, *q;
- uint8_t *o;
- size_t len, slen;
- if (src->len == 0) {
- dest->len = 0;
- return;
- }
- o = dest->base;
- p = src->base;
- len = src->len;
- for (;;) {
- q = memchr(p, '%', len);
- if (q == NULL) {
- memcpy(o, p, len);
- o += len;
- dest->len = (size_t)(o - dest->base);
- return;
- }
- slen = (size_t)(q - p);
- memcpy(o, p, slen);
- o += slen;
- p = q + 1;
- pctdecode(o++, &p);
- len -= slen + 3;
- }
- }
|