xmltok_impl.inc 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779
  1. /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
  2. See the file COPYING for copying permission.
  3. */
  4. /* This file is included! */
  5. #ifdef XML_TOK_IMPL_C
  6. #ifndef IS_INVALID_CHAR
  7. #define IS_INVALID_CHAR(enc, ptr, n) (0)
  8. #endif
  9. #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
  10. case BT_LEAD ## n: \
  11. if (end - ptr < n) \
  12. return XML_TOK_PARTIAL_CHAR; \
  13. if (IS_INVALID_CHAR(enc, ptr, n)) { \
  14. *(nextTokPtr) = (ptr); \
  15. return XML_TOK_INVALID; \
  16. } \
  17. ptr += n; \
  18. break;
  19. #define INVALID_CASES(ptr, nextTokPtr) \
  20. INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
  21. INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
  22. INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
  23. case BT_NONXML: \
  24. case BT_MALFORM: \
  25. case BT_TRAIL: \
  26. *(nextTokPtr) = (ptr); \
  27. return XML_TOK_INVALID;
  28. #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
  29. case BT_LEAD ## n: \
  30. if (end - ptr < n) \
  31. return XML_TOK_PARTIAL_CHAR; \
  32. if (!IS_NAME_CHAR(enc, ptr, n)) { \
  33. *nextTokPtr = ptr; \
  34. return XML_TOK_INVALID; \
  35. } \
  36. ptr += n; \
  37. break;
  38. #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
  39. case BT_NONASCII: \
  40. if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
  41. *nextTokPtr = ptr; \
  42. return XML_TOK_INVALID; \
  43. } \
  44. case BT_NMSTRT: \
  45. case BT_HEX: \
  46. case BT_DIGIT: \
  47. case BT_NAME: \
  48. case BT_MINUS: \
  49. ptr += MINBPC(enc); \
  50. break; \
  51. CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
  52. CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
  53. CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
  54. #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
  55. case BT_LEAD ## n: \
  56. if (end - ptr < n) \
  57. return XML_TOK_PARTIAL_CHAR; \
  58. if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
  59. *nextTokPtr = ptr; \
  60. return XML_TOK_INVALID; \
  61. } \
  62. ptr += n; \
  63. break;
  64. #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
  65. case BT_NONASCII: \
  66. if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
  67. *nextTokPtr = ptr; \
  68. return XML_TOK_INVALID; \
  69. } \
  70. case BT_NMSTRT: \
  71. case BT_HEX: \
  72. ptr += MINBPC(enc); \
  73. break; \
  74. CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
  75. CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
  76. CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
  77. #ifndef PREFIX
  78. #define PREFIX(ident) ident
  79. #endif
  80. #define HAS_CHARS(enc, ptr, end, count) \
  81. (end - ptr >= count * MINBPC(enc))
  82. #define HAS_CHAR(enc, ptr, end) \
  83. HAS_CHARS(enc, ptr, end, 1)
  84. #define REQUIRE_CHARS(enc, ptr, end, count) \
  85. { \
  86. if (! HAS_CHARS(enc, ptr, end, count)) { \
  87. return XML_TOK_PARTIAL; \
  88. } \
  89. }
  90. #define REQUIRE_CHAR(enc, ptr, end) \
  91. REQUIRE_CHARS(enc, ptr, end, 1)
  92. /* ptr points to character following "<!-" */
  93. static int PTRCALL
  94. PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
  95. const char *end, const char **nextTokPtr)
  96. {
  97. if (HAS_CHAR(enc, ptr, end)) {
  98. if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
  99. *nextTokPtr = ptr;
  100. return XML_TOK_INVALID;
  101. }
  102. ptr += MINBPC(enc);
  103. while (HAS_CHAR(enc, ptr, end)) {
  104. switch (BYTE_TYPE(enc, ptr)) {
  105. INVALID_CASES(ptr, nextTokPtr)
  106. case BT_MINUS:
  107. ptr += MINBPC(enc);
  108. REQUIRE_CHAR(enc, ptr, end);
  109. if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
  110. ptr += MINBPC(enc);
  111. REQUIRE_CHAR(enc, ptr, end);
  112. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  113. *nextTokPtr = ptr;
  114. return XML_TOK_INVALID;
  115. }
  116. *nextTokPtr = ptr + MINBPC(enc);
  117. return XML_TOK_COMMENT;
  118. }
  119. break;
  120. default:
  121. ptr += MINBPC(enc);
  122. break;
  123. }
  124. }
  125. }
  126. return XML_TOK_PARTIAL;
  127. }
  128. /* ptr points to character following "<!" */
  129. static int PTRCALL
  130. PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
  131. const char *end, const char **nextTokPtr)
  132. {
  133. REQUIRE_CHAR(enc, ptr, end);
  134. switch (BYTE_TYPE(enc, ptr)) {
  135. case BT_MINUS:
  136. return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  137. case BT_LSQB:
  138. *nextTokPtr = ptr + MINBPC(enc);
  139. return XML_TOK_COND_SECT_OPEN;
  140. case BT_NMSTRT:
  141. case BT_HEX:
  142. ptr += MINBPC(enc);
  143. break;
  144. default:
  145. *nextTokPtr = ptr;
  146. return XML_TOK_INVALID;
  147. }
  148. while (HAS_CHAR(enc, ptr, end)) {
  149. switch (BYTE_TYPE(enc, ptr)) {
  150. case BT_PERCNT:
  151. REQUIRE_CHARS(enc, ptr, end, 2);
  152. /* don't allow <!ENTITY% foo "whatever"> */
  153. switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
  154. case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
  155. *nextTokPtr = ptr;
  156. return XML_TOK_INVALID;
  157. }
  158. /* fall through */
  159. case BT_S: case BT_CR: case BT_LF:
  160. *nextTokPtr = ptr;
  161. return XML_TOK_DECL_OPEN;
  162. case BT_NMSTRT:
  163. case BT_HEX:
  164. ptr += MINBPC(enc);
  165. break;
  166. default:
  167. *nextTokPtr = ptr;
  168. return XML_TOK_INVALID;
  169. }
  170. }
  171. return XML_TOK_PARTIAL;
  172. }
  173. static int PTRCALL
  174. PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr,
  175. const char *end, int *tokPtr)
  176. {
  177. int upper = 0;
  178. *tokPtr = XML_TOK_PI;
  179. if (end - ptr != MINBPC(enc)*3)
  180. return 1;
  181. switch (BYTE_TO_ASCII(enc, ptr)) {
  182. case ASCII_x:
  183. break;
  184. case ASCII_X:
  185. upper = 1;
  186. break;
  187. default:
  188. return 1;
  189. }
  190. ptr += MINBPC(enc);
  191. switch (BYTE_TO_ASCII(enc, ptr)) {
  192. case ASCII_m:
  193. break;
  194. case ASCII_M:
  195. upper = 1;
  196. break;
  197. default:
  198. return 1;
  199. }
  200. ptr += MINBPC(enc);
  201. switch (BYTE_TO_ASCII(enc, ptr)) {
  202. case ASCII_l:
  203. break;
  204. case ASCII_L:
  205. upper = 1;
  206. break;
  207. default:
  208. return 1;
  209. }
  210. if (upper)
  211. return 0;
  212. *tokPtr = XML_TOK_XML_DECL;
  213. return 1;
  214. }
  215. /* ptr points to character following "<?" */
  216. static int PTRCALL
  217. PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
  218. const char *end, const char **nextTokPtr)
  219. {
  220. int tok;
  221. const char *target = ptr;
  222. REQUIRE_CHAR(enc, ptr, end);
  223. switch (BYTE_TYPE(enc, ptr)) {
  224. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  225. default:
  226. *nextTokPtr = ptr;
  227. return XML_TOK_INVALID;
  228. }
  229. while (HAS_CHAR(enc, ptr, end)) {
  230. switch (BYTE_TYPE(enc, ptr)) {
  231. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  232. case BT_S: case BT_CR: case BT_LF:
  233. if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
  234. *nextTokPtr = ptr;
  235. return XML_TOK_INVALID;
  236. }
  237. ptr += MINBPC(enc);
  238. while (HAS_CHAR(enc, ptr, end)) {
  239. switch (BYTE_TYPE(enc, ptr)) {
  240. INVALID_CASES(ptr, nextTokPtr)
  241. case BT_QUEST:
  242. ptr += MINBPC(enc);
  243. REQUIRE_CHAR(enc, ptr, end);
  244. if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  245. *nextTokPtr = ptr + MINBPC(enc);
  246. return tok;
  247. }
  248. break;
  249. default:
  250. ptr += MINBPC(enc);
  251. break;
  252. }
  253. }
  254. return XML_TOK_PARTIAL;
  255. case BT_QUEST:
  256. if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
  257. *nextTokPtr = ptr;
  258. return XML_TOK_INVALID;
  259. }
  260. ptr += MINBPC(enc);
  261. REQUIRE_CHAR(enc, ptr, end);
  262. if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  263. *nextTokPtr = ptr + MINBPC(enc);
  264. return tok;
  265. }
  266. /* fall through */
  267. default:
  268. *nextTokPtr = ptr;
  269. return XML_TOK_INVALID;
  270. }
  271. }
  272. return XML_TOK_PARTIAL;
  273. }
  274. static int PTRCALL
  275. PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr,
  276. const char *end, const char **nextTokPtr)
  277. {
  278. static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
  279. ASCII_T, ASCII_A, ASCII_LSQB };
  280. int i;
  281. /* CDATA[ */
  282. REQUIRE_CHARS(enc, ptr, end, 6);
  283. for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
  284. if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
  285. *nextTokPtr = ptr;
  286. return XML_TOK_INVALID;
  287. }
  288. }
  289. *nextTokPtr = ptr;
  290. return XML_TOK_CDATA_SECT_OPEN;
  291. }
  292. static int PTRCALL
  293. PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
  294. const char *end, const char **nextTokPtr)
  295. {
  296. if (ptr >= end)
  297. return XML_TOK_NONE;
  298. if (MINBPC(enc) > 1) {
  299. size_t n = end - ptr;
  300. if (n & (MINBPC(enc) - 1)) {
  301. n &= ~(MINBPC(enc) - 1);
  302. if (n == 0)
  303. return XML_TOK_PARTIAL;
  304. end = ptr + n;
  305. }
  306. }
  307. switch (BYTE_TYPE(enc, ptr)) {
  308. case BT_RSQB:
  309. ptr += MINBPC(enc);
  310. REQUIRE_CHAR(enc, ptr, end);
  311. if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
  312. break;
  313. ptr += MINBPC(enc);
  314. REQUIRE_CHAR(enc, ptr, end);
  315. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  316. ptr -= MINBPC(enc);
  317. break;
  318. }
  319. *nextTokPtr = ptr + MINBPC(enc);
  320. return XML_TOK_CDATA_SECT_CLOSE;
  321. case BT_CR:
  322. ptr += MINBPC(enc);
  323. REQUIRE_CHAR(enc, ptr, end);
  324. if (BYTE_TYPE(enc, ptr) == BT_LF)
  325. ptr += MINBPC(enc);
  326. *nextTokPtr = ptr;
  327. return XML_TOK_DATA_NEWLINE;
  328. case BT_LF:
  329. *nextTokPtr = ptr + MINBPC(enc);
  330. return XML_TOK_DATA_NEWLINE;
  331. INVALID_CASES(ptr, nextTokPtr)
  332. default:
  333. ptr += MINBPC(enc);
  334. break;
  335. }
  336. while (HAS_CHAR(enc, ptr, end)) {
  337. switch (BYTE_TYPE(enc, ptr)) {
  338. #define LEAD_CASE(n) \
  339. case BT_LEAD ## n: \
  340. if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
  341. *nextTokPtr = ptr; \
  342. return XML_TOK_DATA_CHARS; \
  343. } \
  344. ptr += n; \
  345. break;
  346. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  347. #undef LEAD_CASE
  348. case BT_NONXML:
  349. case BT_MALFORM:
  350. case BT_TRAIL:
  351. case BT_CR:
  352. case BT_LF:
  353. case BT_RSQB:
  354. *nextTokPtr = ptr;
  355. return XML_TOK_DATA_CHARS;
  356. default:
  357. ptr += MINBPC(enc);
  358. break;
  359. }
  360. }
  361. *nextTokPtr = ptr;
  362. return XML_TOK_DATA_CHARS;
  363. }
  364. /* ptr points to character following "</" */
  365. static int PTRCALL
  366. PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
  367. const char *end, const char **nextTokPtr)
  368. {
  369. REQUIRE_CHAR(enc, ptr, end);
  370. switch (BYTE_TYPE(enc, ptr)) {
  371. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  372. default:
  373. *nextTokPtr = ptr;
  374. return XML_TOK_INVALID;
  375. }
  376. while (HAS_CHAR(enc, ptr, end)) {
  377. switch (BYTE_TYPE(enc, ptr)) {
  378. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  379. case BT_S: case BT_CR: case BT_LF:
  380. for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
  381. switch (BYTE_TYPE(enc, ptr)) {
  382. case BT_S: case BT_CR: case BT_LF:
  383. break;
  384. case BT_GT:
  385. *nextTokPtr = ptr + MINBPC(enc);
  386. return XML_TOK_END_TAG;
  387. default:
  388. *nextTokPtr = ptr;
  389. return XML_TOK_INVALID;
  390. }
  391. }
  392. return XML_TOK_PARTIAL;
  393. #ifdef XML_NS
  394. case BT_COLON:
  395. /* no need to check qname syntax here,
  396. since end-tag must match exactly */
  397. ptr += MINBPC(enc);
  398. break;
  399. #endif
  400. case BT_GT:
  401. *nextTokPtr = ptr + MINBPC(enc);
  402. return XML_TOK_END_TAG;
  403. default:
  404. *nextTokPtr = ptr;
  405. return XML_TOK_INVALID;
  406. }
  407. }
  408. return XML_TOK_PARTIAL;
  409. }
  410. /* ptr points to character following "&#X" */
  411. static int PTRCALL
  412. PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
  413. const char *end, const char **nextTokPtr)
  414. {
  415. if (HAS_CHAR(enc, ptr, end)) {
  416. switch (BYTE_TYPE(enc, ptr)) {
  417. case BT_DIGIT:
  418. case BT_HEX:
  419. break;
  420. default:
  421. *nextTokPtr = ptr;
  422. return XML_TOK_INVALID;
  423. }
  424. for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
  425. switch (BYTE_TYPE(enc, ptr)) {
  426. case BT_DIGIT:
  427. case BT_HEX:
  428. break;
  429. case BT_SEMI:
  430. *nextTokPtr = ptr + MINBPC(enc);
  431. return XML_TOK_CHAR_REF;
  432. default:
  433. *nextTokPtr = ptr;
  434. return XML_TOK_INVALID;
  435. }
  436. }
  437. }
  438. return XML_TOK_PARTIAL;
  439. }
  440. /* ptr points to character following "&#" */
  441. static int PTRCALL
  442. PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
  443. const char *end, const char **nextTokPtr)
  444. {
  445. if (HAS_CHAR(enc, ptr, end)) {
  446. if (CHAR_MATCHES(enc, ptr, ASCII_x))
  447. return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  448. switch (BYTE_TYPE(enc, ptr)) {
  449. case BT_DIGIT:
  450. break;
  451. default:
  452. *nextTokPtr = ptr;
  453. return XML_TOK_INVALID;
  454. }
  455. for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
  456. switch (BYTE_TYPE(enc, ptr)) {
  457. case BT_DIGIT:
  458. break;
  459. case BT_SEMI:
  460. *nextTokPtr = ptr + MINBPC(enc);
  461. return XML_TOK_CHAR_REF;
  462. default:
  463. *nextTokPtr = ptr;
  464. return XML_TOK_INVALID;
  465. }
  466. }
  467. }
  468. return XML_TOK_PARTIAL;
  469. }
  470. /* ptr points to character following "&" */
  471. static int PTRCALL
  472. PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
  473. const char **nextTokPtr)
  474. {
  475. REQUIRE_CHAR(enc, ptr, end);
  476. switch (BYTE_TYPE(enc, ptr)) {
  477. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  478. case BT_NUM:
  479. return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  480. default:
  481. *nextTokPtr = ptr;
  482. return XML_TOK_INVALID;
  483. }
  484. while (HAS_CHAR(enc, ptr, end)) {
  485. switch (BYTE_TYPE(enc, ptr)) {
  486. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  487. case BT_SEMI:
  488. *nextTokPtr = ptr + MINBPC(enc);
  489. return XML_TOK_ENTITY_REF;
  490. default:
  491. *nextTokPtr = ptr;
  492. return XML_TOK_INVALID;
  493. }
  494. }
  495. return XML_TOK_PARTIAL;
  496. }
  497. /* ptr points to character following first character of attribute name */
  498. static int PTRCALL
  499. PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
  500. const char **nextTokPtr)
  501. {
  502. #ifdef XML_NS
  503. int hadColon = 0;
  504. #endif
  505. while (HAS_CHAR(enc, ptr, end)) {
  506. switch (BYTE_TYPE(enc, ptr)) {
  507. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  508. #ifdef XML_NS
  509. case BT_COLON:
  510. if (hadColon) {
  511. *nextTokPtr = ptr;
  512. return XML_TOK_INVALID;
  513. }
  514. hadColon = 1;
  515. ptr += MINBPC(enc);
  516. REQUIRE_CHAR(enc, ptr, end);
  517. switch (BYTE_TYPE(enc, ptr)) {
  518. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  519. default:
  520. *nextTokPtr = ptr;
  521. return XML_TOK_INVALID;
  522. }
  523. break;
  524. #endif
  525. case BT_S: case BT_CR: case BT_LF:
  526. for (;;) {
  527. int t;
  528. ptr += MINBPC(enc);
  529. REQUIRE_CHAR(enc, ptr, end);
  530. t = BYTE_TYPE(enc, ptr);
  531. if (t == BT_EQUALS)
  532. break;
  533. switch (t) {
  534. case BT_S:
  535. case BT_LF:
  536. case BT_CR:
  537. break;
  538. default:
  539. *nextTokPtr = ptr;
  540. return XML_TOK_INVALID;
  541. }
  542. }
  543. /* fall through */
  544. case BT_EQUALS:
  545. {
  546. int open;
  547. #ifdef XML_NS
  548. hadColon = 0;
  549. #endif
  550. for (;;) {
  551. ptr += MINBPC(enc);
  552. REQUIRE_CHAR(enc, ptr, end);
  553. open = BYTE_TYPE(enc, ptr);
  554. if (open == BT_QUOT || open == BT_APOS)
  555. break;
  556. switch (open) {
  557. case BT_S:
  558. case BT_LF:
  559. case BT_CR:
  560. break;
  561. default:
  562. *nextTokPtr = ptr;
  563. return XML_TOK_INVALID;
  564. }
  565. }
  566. ptr += MINBPC(enc);
  567. /* in attribute value */
  568. for (;;) {
  569. int t;
  570. REQUIRE_CHAR(enc, ptr, end);
  571. t = BYTE_TYPE(enc, ptr);
  572. if (t == open)
  573. break;
  574. switch (t) {
  575. INVALID_CASES(ptr, nextTokPtr)
  576. case BT_AMP:
  577. {
  578. int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
  579. if (tok <= 0) {
  580. if (tok == XML_TOK_INVALID)
  581. *nextTokPtr = ptr;
  582. return tok;
  583. }
  584. break;
  585. }
  586. case BT_LT:
  587. *nextTokPtr = ptr;
  588. return XML_TOK_INVALID;
  589. default:
  590. ptr += MINBPC(enc);
  591. break;
  592. }
  593. }
  594. ptr += MINBPC(enc);
  595. REQUIRE_CHAR(enc, ptr, end);
  596. switch (BYTE_TYPE(enc, ptr)) {
  597. case BT_S:
  598. case BT_CR:
  599. case BT_LF:
  600. break;
  601. case BT_SOL:
  602. goto sol;
  603. case BT_GT:
  604. goto gt;
  605. default:
  606. *nextTokPtr = ptr;
  607. return XML_TOK_INVALID;
  608. }
  609. /* ptr points to closing quote */
  610. for (;;) {
  611. ptr += MINBPC(enc);
  612. REQUIRE_CHAR(enc, ptr, end);
  613. switch (BYTE_TYPE(enc, ptr)) {
  614. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  615. case BT_S: case BT_CR: case BT_LF:
  616. continue;
  617. case BT_GT:
  618. gt:
  619. *nextTokPtr = ptr + MINBPC(enc);
  620. return XML_TOK_START_TAG_WITH_ATTS;
  621. case BT_SOL:
  622. sol:
  623. ptr += MINBPC(enc);
  624. REQUIRE_CHAR(enc, ptr, end);
  625. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  626. *nextTokPtr = ptr;
  627. return XML_TOK_INVALID;
  628. }
  629. *nextTokPtr = ptr + MINBPC(enc);
  630. return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
  631. default:
  632. *nextTokPtr = ptr;
  633. return XML_TOK_INVALID;
  634. }
  635. break;
  636. }
  637. break;
  638. }
  639. default:
  640. *nextTokPtr = ptr;
  641. return XML_TOK_INVALID;
  642. }
  643. }
  644. return XML_TOK_PARTIAL;
  645. }
  646. /* ptr points to character following "<" */
  647. static int PTRCALL
  648. PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
  649. const char **nextTokPtr)
  650. {
  651. #ifdef XML_NS
  652. int hadColon;
  653. #endif
  654. REQUIRE_CHAR(enc, ptr, end);
  655. switch (BYTE_TYPE(enc, ptr)) {
  656. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  657. case BT_EXCL:
  658. ptr += MINBPC(enc);
  659. REQUIRE_CHAR(enc, ptr, end);
  660. switch (BYTE_TYPE(enc, ptr)) {
  661. case BT_MINUS:
  662. return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  663. case BT_LSQB:
  664. return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
  665. end, nextTokPtr);
  666. }
  667. *nextTokPtr = ptr;
  668. return XML_TOK_INVALID;
  669. case BT_QUEST:
  670. return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  671. case BT_SOL:
  672. return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  673. default:
  674. *nextTokPtr = ptr;
  675. return XML_TOK_INVALID;
  676. }
  677. #ifdef XML_NS
  678. hadColon = 0;
  679. #endif
  680. /* we have a start-tag */
  681. while (HAS_CHAR(enc, ptr, end)) {
  682. switch (BYTE_TYPE(enc, ptr)) {
  683. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  684. #ifdef XML_NS
  685. case BT_COLON:
  686. if (hadColon) {
  687. *nextTokPtr = ptr;
  688. return XML_TOK_INVALID;
  689. }
  690. hadColon = 1;
  691. ptr += MINBPC(enc);
  692. REQUIRE_CHAR(enc, ptr, end);
  693. switch (BYTE_TYPE(enc, ptr)) {
  694. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  695. default:
  696. *nextTokPtr = ptr;
  697. return XML_TOK_INVALID;
  698. }
  699. break;
  700. #endif
  701. case BT_S: case BT_CR: case BT_LF:
  702. {
  703. ptr += MINBPC(enc);
  704. while (HAS_CHAR(enc, ptr, end)) {
  705. switch (BYTE_TYPE(enc, ptr)) {
  706. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  707. case BT_GT:
  708. goto gt;
  709. case BT_SOL:
  710. goto sol;
  711. case BT_S: case BT_CR: case BT_LF:
  712. ptr += MINBPC(enc);
  713. continue;
  714. default:
  715. *nextTokPtr = ptr;
  716. return XML_TOK_INVALID;
  717. }
  718. return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
  719. }
  720. return XML_TOK_PARTIAL;
  721. }
  722. case BT_GT:
  723. gt:
  724. *nextTokPtr = ptr + MINBPC(enc);
  725. return XML_TOK_START_TAG_NO_ATTS;
  726. case BT_SOL:
  727. sol:
  728. ptr += MINBPC(enc);
  729. REQUIRE_CHAR(enc, ptr, end);
  730. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  731. *nextTokPtr = ptr;
  732. return XML_TOK_INVALID;
  733. }
  734. *nextTokPtr = ptr + MINBPC(enc);
  735. return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
  736. default:
  737. *nextTokPtr = ptr;
  738. return XML_TOK_INVALID;
  739. }
  740. }
  741. return XML_TOK_PARTIAL;
  742. }
  743. static int PTRCALL
  744. PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
  745. const char **nextTokPtr)
  746. {
  747. if (ptr >= end)
  748. return XML_TOK_NONE;
  749. if (MINBPC(enc) > 1) {
  750. size_t n = end - ptr;
  751. if (n & (MINBPC(enc) - 1)) {
  752. n &= ~(MINBPC(enc) - 1);
  753. if (n == 0)
  754. return XML_TOK_PARTIAL;
  755. end = ptr + n;
  756. }
  757. }
  758. switch (BYTE_TYPE(enc, ptr)) {
  759. case BT_LT:
  760. return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  761. case BT_AMP:
  762. return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  763. case BT_CR:
  764. ptr += MINBPC(enc);
  765. if (! HAS_CHAR(enc, ptr, end))
  766. return XML_TOK_TRAILING_CR;
  767. if (BYTE_TYPE(enc, ptr) == BT_LF)
  768. ptr += MINBPC(enc);
  769. *nextTokPtr = ptr;
  770. return XML_TOK_DATA_NEWLINE;
  771. case BT_LF:
  772. *nextTokPtr = ptr + MINBPC(enc);
  773. return XML_TOK_DATA_NEWLINE;
  774. case BT_RSQB:
  775. ptr += MINBPC(enc);
  776. if (! HAS_CHAR(enc, ptr, end))
  777. return XML_TOK_TRAILING_RSQB;
  778. if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
  779. break;
  780. ptr += MINBPC(enc);
  781. if (! HAS_CHAR(enc, ptr, end))
  782. return XML_TOK_TRAILING_RSQB;
  783. if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  784. ptr -= MINBPC(enc);
  785. break;
  786. }
  787. *nextTokPtr = ptr;
  788. return XML_TOK_INVALID;
  789. INVALID_CASES(ptr, nextTokPtr)
  790. default:
  791. ptr += MINBPC(enc);
  792. break;
  793. }
  794. while (HAS_CHAR(enc, ptr, end)) {
  795. switch (BYTE_TYPE(enc, ptr)) {
  796. #define LEAD_CASE(n) \
  797. case BT_LEAD ## n: \
  798. if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
  799. *nextTokPtr = ptr; \
  800. return XML_TOK_DATA_CHARS; \
  801. } \
  802. ptr += n; \
  803. break;
  804. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  805. #undef LEAD_CASE
  806. case BT_RSQB:
  807. if (HAS_CHARS(enc, ptr, end, 2)) {
  808. if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
  809. ptr += MINBPC(enc);
  810. break;
  811. }
  812. if (HAS_CHARS(enc, ptr, end, 3)) {
  813. if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
  814. ptr += MINBPC(enc);
  815. break;
  816. }
  817. *nextTokPtr = ptr + 2*MINBPC(enc);
  818. return XML_TOK_INVALID;
  819. }
  820. }
  821. /* fall through */
  822. case BT_AMP:
  823. case BT_LT:
  824. case BT_NONXML:
  825. case BT_MALFORM:
  826. case BT_TRAIL:
  827. case BT_CR:
  828. case BT_LF:
  829. *nextTokPtr = ptr;
  830. return XML_TOK_DATA_CHARS;
  831. default:
  832. ptr += MINBPC(enc);
  833. break;
  834. }
  835. }
  836. *nextTokPtr = ptr;
  837. return XML_TOK_DATA_CHARS;
  838. }
  839. /* ptr points to character following "%" */
  840. static int PTRCALL
  841. PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
  842. const char **nextTokPtr)
  843. {
  844. REQUIRE_CHAR(enc, ptr, end);
  845. switch (BYTE_TYPE(enc, ptr)) {
  846. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  847. case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
  848. *nextTokPtr = ptr;
  849. return XML_TOK_PERCENT;
  850. default:
  851. *nextTokPtr = ptr;
  852. return XML_TOK_INVALID;
  853. }
  854. while (HAS_CHAR(enc, ptr, end)) {
  855. switch (BYTE_TYPE(enc, ptr)) {
  856. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  857. case BT_SEMI:
  858. *nextTokPtr = ptr + MINBPC(enc);
  859. return XML_TOK_PARAM_ENTITY_REF;
  860. default:
  861. *nextTokPtr = ptr;
  862. return XML_TOK_INVALID;
  863. }
  864. }
  865. return XML_TOK_PARTIAL;
  866. }
  867. static int PTRCALL
  868. PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
  869. const char **nextTokPtr)
  870. {
  871. REQUIRE_CHAR(enc, ptr, end);
  872. switch (BYTE_TYPE(enc, ptr)) {
  873. CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
  874. default:
  875. *nextTokPtr = ptr;
  876. return XML_TOK_INVALID;
  877. }
  878. while (HAS_CHAR(enc, ptr, end)) {
  879. switch (BYTE_TYPE(enc, ptr)) {
  880. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  881. case BT_CR: case BT_LF: case BT_S:
  882. case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
  883. *nextTokPtr = ptr;
  884. return XML_TOK_POUND_NAME;
  885. default:
  886. *nextTokPtr = ptr;
  887. return XML_TOK_INVALID;
  888. }
  889. }
  890. return -XML_TOK_POUND_NAME;
  891. }
  892. static int PTRCALL
  893. PREFIX(scanLit)(int open, const ENCODING *enc,
  894. const char *ptr, const char *end,
  895. const char **nextTokPtr)
  896. {
  897. while (HAS_CHAR(enc, ptr, end)) {
  898. int t = BYTE_TYPE(enc, ptr);
  899. switch (t) {
  900. INVALID_CASES(ptr, nextTokPtr)
  901. case BT_QUOT:
  902. case BT_APOS:
  903. ptr += MINBPC(enc);
  904. if (t != open)
  905. break;
  906. if (! HAS_CHAR(enc, ptr, end))
  907. return -XML_TOK_LITERAL;
  908. *nextTokPtr = ptr;
  909. switch (BYTE_TYPE(enc, ptr)) {
  910. case BT_S: case BT_CR: case BT_LF:
  911. case BT_GT: case BT_PERCNT: case BT_LSQB:
  912. return XML_TOK_LITERAL;
  913. default:
  914. return XML_TOK_INVALID;
  915. }
  916. default:
  917. ptr += MINBPC(enc);
  918. break;
  919. }
  920. }
  921. return XML_TOK_PARTIAL;
  922. }
  923. static int PTRCALL
  924. PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
  925. const char **nextTokPtr)
  926. {
  927. int tok;
  928. if (ptr >= end)
  929. return XML_TOK_NONE;
  930. if (MINBPC(enc) > 1) {
  931. size_t n = end - ptr;
  932. if (n & (MINBPC(enc) - 1)) {
  933. n &= ~(MINBPC(enc) - 1);
  934. if (n == 0)
  935. return XML_TOK_PARTIAL;
  936. end = ptr + n;
  937. }
  938. }
  939. switch (BYTE_TYPE(enc, ptr)) {
  940. case BT_QUOT:
  941. return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
  942. case BT_APOS:
  943. return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
  944. case BT_LT:
  945. {
  946. ptr += MINBPC(enc);
  947. REQUIRE_CHAR(enc, ptr, end);
  948. switch (BYTE_TYPE(enc, ptr)) {
  949. case BT_EXCL:
  950. return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  951. case BT_QUEST:
  952. return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  953. case BT_NMSTRT:
  954. case BT_HEX:
  955. case BT_NONASCII:
  956. case BT_LEAD2:
  957. case BT_LEAD3:
  958. case BT_LEAD4:
  959. *nextTokPtr = ptr - MINBPC(enc);
  960. return XML_TOK_INSTANCE_START;
  961. }
  962. *nextTokPtr = ptr;
  963. return XML_TOK_INVALID;
  964. }
  965. case BT_CR:
  966. if (ptr + MINBPC(enc) == end) {
  967. *nextTokPtr = end;
  968. /* indicate that this might be part of a CR/LF pair */
  969. return -XML_TOK_PROLOG_S;
  970. }
  971. /* fall through */
  972. case BT_S: case BT_LF:
  973. for (;;) {
  974. ptr += MINBPC(enc);
  975. if (! HAS_CHAR(enc, ptr, end))
  976. break;
  977. switch (BYTE_TYPE(enc, ptr)) {
  978. case BT_S: case BT_LF:
  979. break;
  980. case BT_CR:
  981. /* don't split CR/LF pair */
  982. if (ptr + MINBPC(enc) != end)
  983. break;
  984. /* fall through */
  985. default:
  986. *nextTokPtr = ptr;
  987. return XML_TOK_PROLOG_S;
  988. }
  989. }
  990. *nextTokPtr = ptr;
  991. return XML_TOK_PROLOG_S;
  992. case BT_PERCNT:
  993. return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  994. case BT_COMMA:
  995. *nextTokPtr = ptr + MINBPC(enc);
  996. return XML_TOK_COMMA;
  997. case BT_LSQB:
  998. *nextTokPtr = ptr + MINBPC(enc);
  999. return XML_TOK_OPEN_BRACKET;
  1000. case BT_RSQB:
  1001. ptr += MINBPC(enc);
  1002. if (! HAS_CHAR(enc, ptr, end))
  1003. return -XML_TOK_CLOSE_BRACKET;
  1004. if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
  1005. REQUIRE_CHARS(enc, ptr, end, 2);
  1006. if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
  1007. *nextTokPtr = ptr + 2*MINBPC(enc);
  1008. return XML_TOK_COND_SECT_CLOSE;
  1009. }
  1010. }
  1011. *nextTokPtr = ptr;
  1012. return XML_TOK_CLOSE_BRACKET;
  1013. case BT_LPAR:
  1014. *nextTokPtr = ptr + MINBPC(enc);
  1015. return XML_TOK_OPEN_PAREN;
  1016. case BT_RPAR:
  1017. ptr += MINBPC(enc);
  1018. if (! HAS_CHAR(enc, ptr, end))
  1019. return -XML_TOK_CLOSE_PAREN;
  1020. switch (BYTE_TYPE(enc, ptr)) {
  1021. case BT_AST:
  1022. *nextTokPtr = ptr + MINBPC(enc);
  1023. return XML_TOK_CLOSE_PAREN_ASTERISK;
  1024. case BT_QUEST:
  1025. *nextTokPtr = ptr + MINBPC(enc);
  1026. return XML_TOK_CLOSE_PAREN_QUESTION;
  1027. case BT_PLUS:
  1028. *nextTokPtr = ptr + MINBPC(enc);
  1029. return XML_TOK_CLOSE_PAREN_PLUS;
  1030. case BT_CR: case BT_LF: case BT_S:
  1031. case BT_GT: case BT_COMMA: case BT_VERBAR:
  1032. case BT_RPAR:
  1033. *nextTokPtr = ptr;
  1034. return XML_TOK_CLOSE_PAREN;
  1035. }
  1036. *nextTokPtr = ptr;
  1037. return XML_TOK_INVALID;
  1038. case BT_VERBAR:
  1039. *nextTokPtr = ptr + MINBPC(enc);
  1040. return XML_TOK_OR;
  1041. case BT_GT:
  1042. *nextTokPtr = ptr + MINBPC(enc);
  1043. return XML_TOK_DECL_CLOSE;
  1044. case BT_NUM:
  1045. return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1046. #define LEAD_CASE(n) \
  1047. case BT_LEAD ## n: \
  1048. if (end - ptr < n) \
  1049. return XML_TOK_PARTIAL_CHAR; \
  1050. if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
  1051. ptr += n; \
  1052. tok = XML_TOK_NAME; \
  1053. break; \
  1054. } \
  1055. if (IS_NAME_CHAR(enc, ptr, n)) { \
  1056. ptr += n; \
  1057. tok = XML_TOK_NMTOKEN; \
  1058. break; \
  1059. } \
  1060. *nextTokPtr = ptr; \
  1061. return XML_TOK_INVALID;
  1062. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1063. #undef LEAD_CASE
  1064. case BT_NMSTRT:
  1065. case BT_HEX:
  1066. tok = XML_TOK_NAME;
  1067. ptr += MINBPC(enc);
  1068. break;
  1069. case BT_DIGIT:
  1070. case BT_NAME:
  1071. case BT_MINUS:
  1072. #ifdef XML_NS
  1073. case BT_COLON:
  1074. #endif
  1075. tok = XML_TOK_NMTOKEN;
  1076. ptr += MINBPC(enc);
  1077. break;
  1078. case BT_NONASCII:
  1079. if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
  1080. ptr += MINBPC(enc);
  1081. tok = XML_TOK_NAME;
  1082. break;
  1083. }
  1084. if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
  1085. ptr += MINBPC(enc);
  1086. tok = XML_TOK_NMTOKEN;
  1087. break;
  1088. }
  1089. /* fall through */
  1090. default:
  1091. *nextTokPtr = ptr;
  1092. return XML_TOK_INVALID;
  1093. }
  1094. while (HAS_CHAR(enc, ptr, end)) {
  1095. switch (BYTE_TYPE(enc, ptr)) {
  1096. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  1097. case BT_GT: case BT_RPAR: case BT_COMMA:
  1098. case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
  1099. case BT_S: case BT_CR: case BT_LF:
  1100. *nextTokPtr = ptr;
  1101. return tok;
  1102. #ifdef XML_NS
  1103. case BT_COLON:
  1104. ptr += MINBPC(enc);
  1105. switch (tok) {
  1106. case XML_TOK_NAME:
  1107. REQUIRE_CHAR(enc, ptr, end);
  1108. tok = XML_TOK_PREFIXED_NAME;
  1109. switch (BYTE_TYPE(enc, ptr)) {
  1110. CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
  1111. default:
  1112. tok = XML_TOK_NMTOKEN;
  1113. break;
  1114. }
  1115. break;
  1116. case XML_TOK_PREFIXED_NAME:
  1117. tok = XML_TOK_NMTOKEN;
  1118. break;
  1119. }
  1120. break;
  1121. #endif
  1122. case BT_PLUS:
  1123. if (tok == XML_TOK_NMTOKEN) {
  1124. *nextTokPtr = ptr;
  1125. return XML_TOK_INVALID;
  1126. }
  1127. *nextTokPtr = ptr + MINBPC(enc);
  1128. return XML_TOK_NAME_PLUS;
  1129. case BT_AST:
  1130. if (tok == XML_TOK_NMTOKEN) {
  1131. *nextTokPtr = ptr;
  1132. return XML_TOK_INVALID;
  1133. }
  1134. *nextTokPtr = ptr + MINBPC(enc);
  1135. return XML_TOK_NAME_ASTERISK;
  1136. case BT_QUEST:
  1137. if (tok == XML_TOK_NMTOKEN) {
  1138. *nextTokPtr = ptr;
  1139. return XML_TOK_INVALID;
  1140. }
  1141. *nextTokPtr = ptr + MINBPC(enc);
  1142. return XML_TOK_NAME_QUESTION;
  1143. default:
  1144. *nextTokPtr = ptr;
  1145. return XML_TOK_INVALID;
  1146. }
  1147. }
  1148. return -tok;
  1149. }
  1150. static int PTRCALL
  1151. PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
  1152. const char *end, const char **nextTokPtr)
  1153. {
  1154. const char *start;
  1155. if (ptr >= end)
  1156. return XML_TOK_NONE;
  1157. else if (! HAS_CHAR(enc, ptr, end))
  1158. return XML_TOK_PARTIAL;
  1159. start = ptr;
  1160. while (HAS_CHAR(enc, ptr, end)) {
  1161. switch (BYTE_TYPE(enc, ptr)) {
  1162. #define LEAD_CASE(n) \
  1163. case BT_LEAD ## n: ptr += n; break;
  1164. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1165. #undef LEAD_CASE
  1166. case BT_AMP:
  1167. if (ptr == start)
  1168. return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1169. *nextTokPtr = ptr;
  1170. return XML_TOK_DATA_CHARS;
  1171. case BT_LT:
  1172. /* this is for inside entity references */
  1173. *nextTokPtr = ptr;
  1174. return XML_TOK_INVALID;
  1175. case BT_LF:
  1176. if (ptr == start) {
  1177. *nextTokPtr = ptr + MINBPC(enc);
  1178. return XML_TOK_DATA_NEWLINE;
  1179. }
  1180. *nextTokPtr = ptr;
  1181. return XML_TOK_DATA_CHARS;
  1182. case BT_CR:
  1183. if (ptr == start) {
  1184. ptr += MINBPC(enc);
  1185. if (! HAS_CHAR(enc, ptr, end))
  1186. return XML_TOK_TRAILING_CR;
  1187. if (BYTE_TYPE(enc, ptr) == BT_LF)
  1188. ptr += MINBPC(enc);
  1189. *nextTokPtr = ptr;
  1190. return XML_TOK_DATA_NEWLINE;
  1191. }
  1192. *nextTokPtr = ptr;
  1193. return XML_TOK_DATA_CHARS;
  1194. case BT_S:
  1195. if (ptr == start) {
  1196. *nextTokPtr = ptr + MINBPC(enc);
  1197. return XML_TOK_ATTRIBUTE_VALUE_S;
  1198. }
  1199. *nextTokPtr = ptr;
  1200. return XML_TOK_DATA_CHARS;
  1201. default:
  1202. ptr += MINBPC(enc);
  1203. break;
  1204. }
  1205. }
  1206. *nextTokPtr = ptr;
  1207. return XML_TOK_DATA_CHARS;
  1208. }
  1209. static int PTRCALL
  1210. PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
  1211. const char *end, const char **nextTokPtr)
  1212. {
  1213. const char *start;
  1214. if (ptr >= end)
  1215. return XML_TOK_NONE;
  1216. else if (! HAS_CHAR(enc, ptr, end))
  1217. return XML_TOK_PARTIAL;
  1218. start = ptr;
  1219. while (HAS_CHAR(enc, ptr, end)) {
  1220. switch (BYTE_TYPE(enc, ptr)) {
  1221. #define LEAD_CASE(n) \
  1222. case BT_LEAD ## n: ptr += n; break;
  1223. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1224. #undef LEAD_CASE
  1225. case BT_AMP:
  1226. if (ptr == start)
  1227. return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  1228. *nextTokPtr = ptr;
  1229. return XML_TOK_DATA_CHARS;
  1230. case BT_PERCNT:
  1231. if (ptr == start) {
  1232. int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
  1233. end, nextTokPtr);
  1234. return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
  1235. }
  1236. *nextTokPtr = ptr;
  1237. return XML_TOK_DATA_CHARS;
  1238. case BT_LF:
  1239. if (ptr == start) {
  1240. *nextTokPtr = ptr + MINBPC(enc);
  1241. return XML_TOK_DATA_NEWLINE;
  1242. }
  1243. *nextTokPtr = ptr;
  1244. return XML_TOK_DATA_CHARS;
  1245. case BT_CR:
  1246. if (ptr == start) {
  1247. ptr += MINBPC(enc);
  1248. if (! HAS_CHAR(enc, ptr, end))
  1249. return XML_TOK_TRAILING_CR;
  1250. if (BYTE_TYPE(enc, ptr) == BT_LF)
  1251. ptr += MINBPC(enc);
  1252. *nextTokPtr = ptr;
  1253. return XML_TOK_DATA_NEWLINE;
  1254. }
  1255. *nextTokPtr = ptr;
  1256. return XML_TOK_DATA_CHARS;
  1257. default:
  1258. ptr += MINBPC(enc);
  1259. break;
  1260. }
  1261. }
  1262. *nextTokPtr = ptr;
  1263. return XML_TOK_DATA_CHARS;
  1264. }
  1265. #ifdef XML_DTD
  1266. static int PTRCALL
  1267. PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
  1268. const char *end, const char **nextTokPtr)
  1269. {
  1270. int level = 0;
  1271. if (MINBPC(enc) > 1) {
  1272. size_t n = end - ptr;
  1273. if (n & (MINBPC(enc) - 1)) {
  1274. n &= ~(MINBPC(enc) - 1);
  1275. end = ptr + n;
  1276. }
  1277. }
  1278. while (HAS_CHAR(enc, ptr, end)) {
  1279. switch (BYTE_TYPE(enc, ptr)) {
  1280. INVALID_CASES(ptr, nextTokPtr)
  1281. case BT_LT:
  1282. ptr += MINBPC(enc);
  1283. REQUIRE_CHAR(enc, ptr, end);
  1284. if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
  1285. ptr += MINBPC(enc);
  1286. REQUIRE_CHAR(enc, ptr, end);
  1287. if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
  1288. ++level;
  1289. ptr += MINBPC(enc);
  1290. }
  1291. }
  1292. break;
  1293. case BT_RSQB:
  1294. ptr += MINBPC(enc);
  1295. REQUIRE_CHAR(enc, ptr, end);
  1296. if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
  1297. ptr += MINBPC(enc);
  1298. REQUIRE_CHAR(enc, ptr, end);
  1299. if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
  1300. ptr += MINBPC(enc);
  1301. if (level == 0) {
  1302. *nextTokPtr = ptr;
  1303. return XML_TOK_IGNORE_SECT;
  1304. }
  1305. --level;
  1306. }
  1307. }
  1308. break;
  1309. default:
  1310. ptr += MINBPC(enc);
  1311. break;
  1312. }
  1313. }
  1314. return XML_TOK_PARTIAL;
  1315. }
  1316. #endif /* XML_DTD */
  1317. static int PTRCALL
  1318. PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
  1319. const char **badPtr)
  1320. {
  1321. ptr += MINBPC(enc);
  1322. end -= MINBPC(enc);
  1323. for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
  1324. switch (BYTE_TYPE(enc, ptr)) {
  1325. case BT_DIGIT:
  1326. case BT_HEX:
  1327. case BT_MINUS:
  1328. case BT_APOS:
  1329. case BT_LPAR:
  1330. case BT_RPAR:
  1331. case BT_PLUS:
  1332. case BT_COMMA:
  1333. case BT_SOL:
  1334. case BT_EQUALS:
  1335. case BT_QUEST:
  1336. case BT_CR:
  1337. case BT_LF:
  1338. case BT_SEMI:
  1339. case BT_EXCL:
  1340. case BT_AST:
  1341. case BT_PERCNT:
  1342. case BT_NUM:
  1343. #ifdef XML_NS
  1344. case BT_COLON:
  1345. #endif
  1346. break;
  1347. case BT_S:
  1348. if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
  1349. *badPtr = ptr;
  1350. return 0;
  1351. }
  1352. break;
  1353. case BT_NAME:
  1354. case BT_NMSTRT:
  1355. if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
  1356. break;
  1357. default:
  1358. switch (BYTE_TO_ASCII(enc, ptr)) {
  1359. case 0x24: /* $ */
  1360. case 0x40: /* @ */
  1361. break;
  1362. default:
  1363. *badPtr = ptr;
  1364. return 0;
  1365. }
  1366. break;
  1367. }
  1368. }
  1369. return 1;
  1370. }
  1371. /* This must only be called for a well-formed start-tag or empty
  1372. element tag. Returns the number of attributes. Pointers to the
  1373. first attsMax attributes are stored in atts.
  1374. */
  1375. static int PTRCALL
  1376. PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
  1377. int attsMax, ATTRIBUTE *atts)
  1378. {
  1379. enum { other, inName, inValue } state = inName;
  1380. int nAtts = 0;
  1381. int open = 0; /* defined when state == inValue;
  1382. initialization just to shut up compilers */
  1383. for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
  1384. switch (BYTE_TYPE(enc, ptr)) {
  1385. #define START_NAME \
  1386. if (state == other) { \
  1387. if (nAtts < attsMax) { \
  1388. atts[nAtts].name = ptr; \
  1389. atts[nAtts].normalized = 1; \
  1390. } \
  1391. state = inName; \
  1392. }
  1393. #define LEAD_CASE(n) \
  1394. case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
  1395. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1396. #undef LEAD_CASE
  1397. case BT_NONASCII:
  1398. case BT_NMSTRT:
  1399. case BT_HEX:
  1400. START_NAME
  1401. break;
  1402. #undef START_NAME
  1403. case BT_QUOT:
  1404. if (state != inValue) {
  1405. if (nAtts < attsMax)
  1406. atts[nAtts].valuePtr = ptr + MINBPC(enc);
  1407. state = inValue;
  1408. open = BT_QUOT;
  1409. }
  1410. else if (open == BT_QUOT) {
  1411. state = other;
  1412. if (nAtts < attsMax)
  1413. atts[nAtts].valueEnd = ptr;
  1414. nAtts++;
  1415. }
  1416. break;
  1417. case BT_APOS:
  1418. if (state != inValue) {
  1419. if (nAtts < attsMax)
  1420. atts[nAtts].valuePtr = ptr + MINBPC(enc);
  1421. state = inValue;
  1422. open = BT_APOS;
  1423. }
  1424. else if (open == BT_APOS) {
  1425. state = other;
  1426. if (nAtts < attsMax)
  1427. atts[nAtts].valueEnd = ptr;
  1428. nAtts++;
  1429. }
  1430. break;
  1431. case BT_AMP:
  1432. if (nAtts < attsMax)
  1433. atts[nAtts].normalized = 0;
  1434. break;
  1435. case BT_S:
  1436. if (state == inName)
  1437. state = other;
  1438. else if (state == inValue
  1439. && nAtts < attsMax
  1440. && atts[nAtts].normalized
  1441. && (ptr == atts[nAtts].valuePtr
  1442. || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
  1443. || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
  1444. || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
  1445. atts[nAtts].normalized = 0;
  1446. break;
  1447. case BT_CR: case BT_LF:
  1448. /* This case ensures that the first attribute name is counted
  1449. Apart from that we could just change state on the quote. */
  1450. if (state == inName)
  1451. state = other;
  1452. else if (state == inValue && nAtts < attsMax)
  1453. atts[nAtts].normalized = 0;
  1454. break;
  1455. case BT_GT:
  1456. case BT_SOL:
  1457. if (state != inValue)
  1458. return nAtts;
  1459. break;
  1460. default:
  1461. break;
  1462. }
  1463. }
  1464. /* not reached */
  1465. }
  1466. static int PTRFASTCALL
  1467. PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr)
  1468. {
  1469. int result = 0;
  1470. /* skip &# */
  1471. ptr += 2*MINBPC(enc);
  1472. if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
  1473. for (ptr += MINBPC(enc);
  1474. !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
  1475. ptr += MINBPC(enc)) {
  1476. int c = BYTE_TO_ASCII(enc, ptr);
  1477. switch (c) {
  1478. case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
  1479. case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
  1480. result <<= 4;
  1481. result |= (c - ASCII_0);
  1482. break;
  1483. case ASCII_A: case ASCII_B: case ASCII_C:
  1484. case ASCII_D: case ASCII_E: case ASCII_F:
  1485. result <<= 4;
  1486. result += 10 + (c - ASCII_A);
  1487. break;
  1488. case ASCII_a: case ASCII_b: case ASCII_c:
  1489. case ASCII_d: case ASCII_e: case ASCII_f:
  1490. result <<= 4;
  1491. result += 10 + (c - ASCII_a);
  1492. break;
  1493. }
  1494. if (result >= 0x110000)
  1495. return -1;
  1496. }
  1497. }
  1498. else {
  1499. for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
  1500. int c = BYTE_TO_ASCII(enc, ptr);
  1501. result *= 10;
  1502. result += (c - ASCII_0);
  1503. if (result >= 0x110000)
  1504. return -1;
  1505. }
  1506. }
  1507. return checkCharRefNumber(result);
  1508. }
  1509. static int PTRCALL
  1510. PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr,
  1511. const char *end)
  1512. {
  1513. switch ((end - ptr)/MINBPC(enc)) {
  1514. case 2:
  1515. if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
  1516. switch (BYTE_TO_ASCII(enc, ptr)) {
  1517. case ASCII_l:
  1518. return ASCII_LT;
  1519. case ASCII_g:
  1520. return ASCII_GT;
  1521. }
  1522. }
  1523. break;
  1524. case 3:
  1525. if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
  1526. ptr += MINBPC(enc);
  1527. if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
  1528. ptr += MINBPC(enc);
  1529. if (CHAR_MATCHES(enc, ptr, ASCII_p))
  1530. return ASCII_AMP;
  1531. }
  1532. }
  1533. break;
  1534. case 4:
  1535. switch (BYTE_TO_ASCII(enc, ptr)) {
  1536. case ASCII_q:
  1537. ptr += MINBPC(enc);
  1538. if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
  1539. ptr += MINBPC(enc);
  1540. if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
  1541. ptr += MINBPC(enc);
  1542. if (CHAR_MATCHES(enc, ptr, ASCII_t))
  1543. return ASCII_QUOT;
  1544. }
  1545. }
  1546. break;
  1547. case ASCII_a:
  1548. ptr += MINBPC(enc);
  1549. if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
  1550. ptr += MINBPC(enc);
  1551. if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
  1552. ptr += MINBPC(enc);
  1553. if (CHAR_MATCHES(enc, ptr, ASCII_s))
  1554. return ASCII_APOS;
  1555. }
  1556. }
  1557. break;
  1558. }
  1559. }
  1560. return 0;
  1561. }
  1562. static int PTRCALL
  1563. PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
  1564. {
  1565. for (;;) {
  1566. switch (BYTE_TYPE(enc, ptr1)) {
  1567. #define LEAD_CASE(n) \
  1568. case BT_LEAD ## n: \
  1569. if (*ptr1++ != *ptr2++) \
  1570. return 0;
  1571. LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
  1572. #undef LEAD_CASE
  1573. /* fall through */
  1574. if (*ptr1++ != *ptr2++)
  1575. return 0;
  1576. break;
  1577. case BT_NONASCII:
  1578. case BT_NMSTRT:
  1579. #ifdef XML_NS
  1580. case BT_COLON:
  1581. #endif
  1582. case BT_HEX:
  1583. case BT_DIGIT:
  1584. case BT_NAME:
  1585. case BT_MINUS:
  1586. if (*ptr2++ != *ptr1++)
  1587. return 0;
  1588. if (MINBPC(enc) > 1) {
  1589. if (*ptr2++ != *ptr1++)
  1590. return 0;
  1591. if (MINBPC(enc) > 2) {
  1592. if (*ptr2++ != *ptr1++)
  1593. return 0;
  1594. if (MINBPC(enc) > 3) {
  1595. if (*ptr2++ != *ptr1++)
  1596. return 0;
  1597. }
  1598. }
  1599. }
  1600. break;
  1601. default:
  1602. if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
  1603. return 1;
  1604. switch (BYTE_TYPE(enc, ptr2)) {
  1605. case BT_LEAD2:
  1606. case BT_LEAD3:
  1607. case BT_LEAD4:
  1608. case BT_NONASCII:
  1609. case BT_NMSTRT:
  1610. #ifdef XML_NS
  1611. case BT_COLON:
  1612. #endif
  1613. case BT_HEX:
  1614. case BT_DIGIT:
  1615. case BT_NAME:
  1616. case BT_MINUS:
  1617. return 0;
  1618. default:
  1619. return 1;
  1620. }
  1621. }
  1622. }
  1623. /* not reached */
  1624. }
  1625. static int PTRCALL
  1626. PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1,
  1627. const char *end1, const char *ptr2)
  1628. {
  1629. for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
  1630. if (end1 - ptr1 < MINBPC(enc))
  1631. return 0;
  1632. if (!CHAR_MATCHES(enc, ptr1, *ptr2))
  1633. return 0;
  1634. }
  1635. return ptr1 == end1;
  1636. }
  1637. static int PTRFASTCALL
  1638. PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
  1639. {
  1640. const char *start = ptr;
  1641. for (;;) {
  1642. switch (BYTE_TYPE(enc, ptr)) {
  1643. #define LEAD_CASE(n) \
  1644. case BT_LEAD ## n: ptr += n; break;
  1645. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1646. #undef LEAD_CASE
  1647. case BT_NONASCII:
  1648. case BT_NMSTRT:
  1649. #ifdef XML_NS
  1650. case BT_COLON:
  1651. #endif
  1652. case BT_HEX:
  1653. case BT_DIGIT:
  1654. case BT_NAME:
  1655. case BT_MINUS:
  1656. ptr += MINBPC(enc);
  1657. break;
  1658. default:
  1659. return (int)(ptr - start);
  1660. }
  1661. }
  1662. }
  1663. static const char * PTRFASTCALL
  1664. PREFIX(skipS)(const ENCODING *enc, const char *ptr)
  1665. {
  1666. for (;;) {
  1667. switch (BYTE_TYPE(enc, ptr)) {
  1668. case BT_LF:
  1669. case BT_CR:
  1670. case BT_S:
  1671. ptr += MINBPC(enc);
  1672. break;
  1673. default:
  1674. return ptr;
  1675. }
  1676. }
  1677. }
  1678. static void PTRCALL
  1679. PREFIX(updatePosition)(const ENCODING *enc,
  1680. const char *ptr,
  1681. const char *end,
  1682. POSITION *pos)
  1683. {
  1684. while (HAS_CHAR(enc, ptr, end)) {
  1685. switch (BYTE_TYPE(enc, ptr)) {
  1686. #define LEAD_CASE(n) \
  1687. case BT_LEAD ## n: \
  1688. ptr += n; \
  1689. break;
  1690. LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
  1691. #undef LEAD_CASE
  1692. case BT_LF:
  1693. pos->columnNumber = (XML_Size)-1;
  1694. pos->lineNumber++;
  1695. ptr += MINBPC(enc);
  1696. break;
  1697. case BT_CR:
  1698. pos->lineNumber++;
  1699. ptr += MINBPC(enc);
  1700. if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
  1701. ptr += MINBPC(enc);
  1702. pos->columnNumber = (XML_Size)-1;
  1703. break;
  1704. default:
  1705. ptr += MINBPC(enc);
  1706. break;
  1707. }
  1708. pos->columnNumber++;
  1709. }
  1710. }
  1711. #undef DO_LEAD_CASE
  1712. #undef MULTIBYTE_CASES
  1713. #undef INVALID_CASES
  1714. #undef CHECK_NAME_CASE
  1715. #undef CHECK_NAME_CASES
  1716. #undef CHECK_NMSTRT_CASE
  1717. #undef CHECK_NMSTRT_CASES
  1718. #endif /* XML_TOK_IMPL_C */