sre_lib.h 62 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848
  1. /*
  2. * Secret Labs' Regular Expression Engine
  3. *
  4. * regular expression matching engine
  5. *
  6. * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
  7. *
  8. * See the sre.c file for information on usage and redistribution.
  9. */
  10. /* String matching engine */
  11. /* This file is included three times, with different character settings */
  12. LOCAL(int)
  13. SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
  14. {
  15. /* check if pointer is at given position */
  16. Py_ssize_t thisp, thatp;
  17. switch (at) {
  18. case SRE_AT_BEGINNING:
  19. case SRE_AT_BEGINNING_STRING:
  20. return ((void*) ptr == state->beginning);
  21. case SRE_AT_BEGINNING_LINE:
  22. return ((void*) ptr == state->beginning ||
  23. SRE_IS_LINEBREAK((int) ptr[-1]));
  24. case SRE_AT_END:
  25. return (((SRE_CHAR *)state->end - ptr == 1 &&
  26. SRE_IS_LINEBREAK((int) ptr[0])) ||
  27. ((void*) ptr == state->end));
  28. case SRE_AT_END_LINE:
  29. return ((void*) ptr == state->end ||
  30. SRE_IS_LINEBREAK((int) ptr[0]));
  31. case SRE_AT_END_STRING:
  32. return ((void*) ptr == state->end);
  33. case SRE_AT_BOUNDARY:
  34. if (state->beginning == state->end)
  35. return 0;
  36. thatp = ((void*) ptr > state->beginning) ?
  37. SRE_IS_WORD((int) ptr[-1]) : 0;
  38. thisp = ((void*) ptr < state->end) ?
  39. SRE_IS_WORD((int) ptr[0]) : 0;
  40. return thisp != thatp;
  41. case SRE_AT_NON_BOUNDARY:
  42. if (state->beginning == state->end)
  43. return 0;
  44. thatp = ((void*) ptr > state->beginning) ?
  45. SRE_IS_WORD((int) ptr[-1]) : 0;
  46. thisp = ((void*) ptr < state->end) ?
  47. SRE_IS_WORD((int) ptr[0]) : 0;
  48. return thisp == thatp;
  49. case SRE_AT_LOC_BOUNDARY:
  50. if (state->beginning == state->end)
  51. return 0;
  52. thatp = ((void*) ptr > state->beginning) ?
  53. SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
  54. thisp = ((void*) ptr < state->end) ?
  55. SRE_LOC_IS_WORD((int) ptr[0]) : 0;
  56. return thisp != thatp;
  57. case SRE_AT_LOC_NON_BOUNDARY:
  58. if (state->beginning == state->end)
  59. return 0;
  60. thatp = ((void*) ptr > state->beginning) ?
  61. SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
  62. thisp = ((void*) ptr < state->end) ?
  63. SRE_LOC_IS_WORD((int) ptr[0]) : 0;
  64. return thisp == thatp;
  65. case SRE_AT_UNI_BOUNDARY:
  66. if (state->beginning == state->end)
  67. return 0;
  68. thatp = ((void*) ptr > state->beginning) ?
  69. SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
  70. thisp = ((void*) ptr < state->end) ?
  71. SRE_UNI_IS_WORD((int) ptr[0]) : 0;
  72. return thisp != thatp;
  73. case SRE_AT_UNI_NON_BOUNDARY:
  74. if (state->beginning == state->end)
  75. return 0;
  76. thatp = ((void*) ptr > state->beginning) ?
  77. SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
  78. thisp = ((void*) ptr < state->end) ?
  79. SRE_UNI_IS_WORD((int) ptr[0]) : 0;
  80. return thisp == thatp;
  81. }
  82. return 0;
  83. }
  84. LOCAL(int)
  85. SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
  86. {
  87. /* check if character is a member of the given set */
  88. int ok = 1;
  89. for (;;) {
  90. switch (*set++) {
  91. case SRE_OP_FAILURE:
  92. return !ok;
  93. case SRE_OP_LITERAL:
  94. /* <LITERAL> <code> */
  95. if (ch == set[0])
  96. return ok;
  97. set++;
  98. break;
  99. case SRE_OP_CATEGORY:
  100. /* <CATEGORY> <code> */
  101. if (sre_category(set[0], (int) ch))
  102. return ok;
  103. set++;
  104. break;
  105. case SRE_OP_CHARSET:
  106. /* <CHARSET> <bitmap> */
  107. if (ch < 256 &&
  108. (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
  109. return ok;
  110. set += 256/SRE_CODE_BITS;
  111. break;
  112. case SRE_OP_RANGE:
  113. /* <RANGE> <lower> <upper> */
  114. if (set[0] <= ch && ch <= set[1])
  115. return ok;
  116. set += 2;
  117. break;
  118. case SRE_OP_RANGE_UNI_IGNORE:
  119. /* <RANGE_UNI_IGNORE> <lower> <upper> */
  120. {
  121. SRE_CODE uch;
  122. /* ch is already lower cased */
  123. if (set[0] <= ch && ch <= set[1])
  124. return ok;
  125. uch = sre_upper_unicode(ch);
  126. if (set[0] <= uch && uch <= set[1])
  127. return ok;
  128. set += 2;
  129. break;
  130. }
  131. case SRE_OP_NEGATE:
  132. ok = !ok;
  133. break;
  134. case SRE_OP_BIGCHARSET:
  135. /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
  136. {
  137. Py_ssize_t count, block;
  138. count = *(set++);
  139. if (ch < 0x10000u)
  140. block = ((unsigned char*)set)[ch >> 8];
  141. else
  142. block = -1;
  143. set += 256/sizeof(SRE_CODE);
  144. if (block >=0 &&
  145. (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
  146. (1u << (ch & (SRE_CODE_BITS-1)))))
  147. return ok;
  148. set += count * (256/SRE_CODE_BITS);
  149. break;
  150. }
  151. default:
  152. /* internal error -- there's not much we can do about it
  153. here, so let's just pretend it didn't match... */
  154. return 0;
  155. }
  156. }
  157. }
  158. LOCAL(int)
  159. SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
  160. {
  161. SRE_CODE lo, up;
  162. lo = sre_lower_locale(ch);
  163. if (SRE(charset)(state, set, lo))
  164. return 1;
  165. up = sre_upper_locale(ch);
  166. return up != lo && SRE(charset)(state, set, up);
  167. }
  168. LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
  169. LOCAL(Py_ssize_t)
  170. SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
  171. {
  172. SRE_CODE chr;
  173. SRE_CHAR c;
  174. const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
  175. const SRE_CHAR* end = (const SRE_CHAR *)state->end;
  176. Py_ssize_t i;
  177. /* adjust end */
  178. if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
  179. end = ptr + maxcount;
  180. switch (pattern[0]) {
  181. case SRE_OP_IN:
  182. /* repeated set */
  183. TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
  184. while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
  185. ptr++;
  186. break;
  187. case SRE_OP_ANY:
  188. /* repeated dot wildcard. */
  189. TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
  190. while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
  191. ptr++;
  192. break;
  193. case SRE_OP_ANY_ALL:
  194. /* repeated dot wildcard. skip to the end of the target
  195. string, and backtrack from there */
  196. TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
  197. ptr = end;
  198. break;
  199. case SRE_OP_LITERAL:
  200. /* repeated literal */
  201. chr = pattern[1];
  202. TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
  203. c = (SRE_CHAR) chr;
  204. #if SIZEOF_SRE_CHAR < 4
  205. if ((SRE_CODE) c != chr)
  206. ; /* literal can't match: doesn't fit in char width */
  207. else
  208. #endif
  209. while (ptr < end && *ptr == c)
  210. ptr++;
  211. break;
  212. case SRE_OP_LITERAL_IGNORE:
  213. /* repeated literal */
  214. chr = pattern[1];
  215. TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
  216. while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
  217. ptr++;
  218. break;
  219. case SRE_OP_LITERAL_UNI_IGNORE:
  220. /* repeated literal */
  221. chr = pattern[1];
  222. TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
  223. while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
  224. ptr++;
  225. break;
  226. case SRE_OP_LITERAL_LOC_IGNORE:
  227. /* repeated literal */
  228. chr = pattern[1];
  229. TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
  230. while (ptr < end && char_loc_ignore(chr, *ptr))
  231. ptr++;
  232. break;
  233. case SRE_OP_NOT_LITERAL:
  234. /* repeated non-literal */
  235. chr = pattern[1];
  236. TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
  237. c = (SRE_CHAR) chr;
  238. #if SIZEOF_SRE_CHAR < 4
  239. if ((SRE_CODE) c != chr)
  240. ptr = end; /* literal can't match: doesn't fit in char width */
  241. else
  242. #endif
  243. while (ptr < end && *ptr != c)
  244. ptr++;
  245. break;
  246. case SRE_OP_NOT_LITERAL_IGNORE:
  247. /* repeated non-literal */
  248. chr = pattern[1];
  249. TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
  250. while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
  251. ptr++;
  252. break;
  253. case SRE_OP_NOT_LITERAL_UNI_IGNORE:
  254. /* repeated non-literal */
  255. chr = pattern[1];
  256. TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
  257. while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
  258. ptr++;
  259. break;
  260. case SRE_OP_NOT_LITERAL_LOC_IGNORE:
  261. /* repeated non-literal */
  262. chr = pattern[1];
  263. TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
  264. while (ptr < end && !char_loc_ignore(chr, *ptr))
  265. ptr++;
  266. break;
  267. default:
  268. /* repeated single character pattern */
  269. TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
  270. while ((SRE_CHAR*) state->ptr < end) {
  271. i = SRE(match)(state, pattern, 0);
  272. if (i < 0)
  273. return i;
  274. if (!i)
  275. break;
  276. }
  277. TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
  278. (SRE_CHAR*) state->ptr - ptr));
  279. return (SRE_CHAR*) state->ptr - ptr;
  280. }
  281. TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
  282. ptr - (SRE_CHAR*) state->ptr));
  283. return ptr - (SRE_CHAR*) state->ptr;
  284. }
  285. /* The macros below should be used to protect recursive SRE(match)()
  286. * calls that *failed* and do *not* return immediately (IOW, those
  287. * that will backtrack). Explaining:
  288. *
  289. * - Recursive SRE(match)() returned true: that's usually a success
  290. * (besides atypical cases like ASSERT_NOT), therefore there's no
  291. * reason to restore lastmark;
  292. *
  293. * - Recursive SRE(match)() returned false but the current SRE(match)()
  294. * is returning to the caller: If the current SRE(match)() is the
  295. * top function of the recursion, returning false will be a matching
  296. * failure, and it doesn't matter where lastmark is pointing to.
  297. * If it's *not* the top function, it will be a recursive SRE(match)()
  298. * failure by itself, and the calling SRE(match)() will have to deal
  299. * with the failure by the same rules explained here (it will restore
  300. * lastmark by itself if necessary);
  301. *
  302. * - Recursive SRE(match)() returned false, and will continue the
  303. * outside 'for' loop: must be protected when breaking, since the next
  304. * OP could potentially depend on lastmark;
  305. *
  306. * - Recursive SRE(match)() returned false, and will be called again
  307. * inside a local for/while loop: must be protected between each
  308. * loop iteration, since the recursive SRE(match)() could do anything,
  309. * and could potentially depend on lastmark.
  310. *
  311. * For more information, check the discussion at SF patch #712900.
  312. */
  313. #define LASTMARK_SAVE() \
  314. do { \
  315. ctx->lastmark = state->lastmark; \
  316. ctx->lastindex = state->lastindex; \
  317. } while (0)
  318. #define LASTMARK_RESTORE() \
  319. do { \
  320. state->lastmark = ctx->lastmark; \
  321. state->lastindex = ctx->lastindex; \
  322. } while (0)
  323. #define RETURN_ERROR(i) do { return i; } while(0)
  324. #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
  325. #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
  326. #define RETURN_ON_ERROR(i) \
  327. do { if (i < 0) RETURN_ERROR(i); } while (0)
  328. #define RETURN_ON_SUCCESS(i) \
  329. do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
  330. #define RETURN_ON_FAILURE(i) \
  331. do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
  332. #define DATA_STACK_ALLOC(state, type, ptr) \
  333. do { \
  334. alloc_pos = state->data_stack_base; \
  335. TRACE(("allocating %s in %zd (%zd)\n", \
  336. Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
  337. if (sizeof(type) > state->data_stack_size - alloc_pos) { \
  338. int j = data_stack_grow(state, sizeof(type)); \
  339. if (j < 0) return j; \
  340. if (ctx_pos != -1) \
  341. DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
  342. } \
  343. ptr = (type*)(state->data_stack+alloc_pos); \
  344. state->data_stack_base += sizeof(type); \
  345. } while (0)
  346. #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
  347. do { \
  348. TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
  349. ptr = (type*)(state->data_stack+pos); \
  350. } while (0)
  351. #define DATA_STACK_PUSH(state, data, size) \
  352. do { \
  353. TRACE(("copy data in %p to %zd (%zd)\n", \
  354. data, state->data_stack_base, size)); \
  355. if (size > state->data_stack_size - state->data_stack_base) { \
  356. int j = data_stack_grow(state, size); \
  357. if (j < 0) return j; \
  358. if (ctx_pos != -1) \
  359. DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
  360. } \
  361. memcpy(state->data_stack+state->data_stack_base, data, size); \
  362. state->data_stack_base += size; \
  363. } while (0)
  364. /* We add an explicit cast to memcpy here because MSVC has a bug when
  365. compiling C code where it believes that `const void**` cannot be
  366. safely casted to `void*`, see bpo-39943 for details. */
  367. #define DATA_STACK_POP(state, data, size, discard) \
  368. do { \
  369. TRACE(("copy data to %p from %zd (%zd)\n", \
  370. data, state->data_stack_base-size, size)); \
  371. memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
  372. if (discard) \
  373. state->data_stack_base -= size; \
  374. } while (0)
  375. #define DATA_STACK_POP_DISCARD(state, size) \
  376. do { \
  377. TRACE(("discard data from %zd (%zd)\n", \
  378. state->data_stack_base-size, size)); \
  379. state->data_stack_base -= size; \
  380. } while(0)
  381. #define DATA_PUSH(x) \
  382. DATA_STACK_PUSH(state, (x), sizeof(*(x)))
  383. #define DATA_POP(x) \
  384. DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
  385. #define DATA_POP_DISCARD(x) \
  386. DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
  387. #define DATA_ALLOC(t,p) \
  388. DATA_STACK_ALLOC(state, t, p)
  389. #define DATA_LOOKUP_AT(t,p,pos) \
  390. DATA_STACK_LOOKUP_AT(state,t,p,pos)
  391. #define MARK_PUSH(lastmark) \
  392. do if (lastmark >= 0) { \
  393. size_t _marks_size = (lastmark+1) * sizeof(void*); \
  394. DATA_STACK_PUSH(state, state->mark, _marks_size); \
  395. } while (0)
  396. #define MARK_POP(lastmark) \
  397. do if (lastmark >= 0) { \
  398. size_t _marks_size = (lastmark+1) * sizeof(void*); \
  399. DATA_STACK_POP(state, state->mark, _marks_size, 1); \
  400. } while (0)
  401. #define MARK_POP_KEEP(lastmark) \
  402. do if (lastmark >= 0) { \
  403. size_t _marks_size = (lastmark+1) * sizeof(void*); \
  404. DATA_STACK_POP(state, state->mark, _marks_size, 0); \
  405. } while (0)
  406. #define MARK_POP_DISCARD(lastmark) \
  407. do if (lastmark >= 0) { \
  408. size_t _marks_size = (lastmark+1) * sizeof(void*); \
  409. DATA_STACK_POP_DISCARD(state, _marks_size); \
  410. } while (0)
  411. #define JUMP_NONE 0
  412. #define JUMP_MAX_UNTIL_1 1
  413. #define JUMP_MAX_UNTIL_2 2
  414. #define JUMP_MAX_UNTIL_3 3
  415. #define JUMP_MIN_UNTIL_1 4
  416. #define JUMP_MIN_UNTIL_2 5
  417. #define JUMP_MIN_UNTIL_3 6
  418. #define JUMP_REPEAT 7
  419. #define JUMP_REPEAT_ONE_1 8
  420. #define JUMP_REPEAT_ONE_2 9
  421. #define JUMP_MIN_REPEAT_ONE 10
  422. #define JUMP_BRANCH 11
  423. #define JUMP_ASSERT 12
  424. #define JUMP_ASSERT_NOT 13
  425. #define JUMP_POSS_REPEAT_1 14
  426. #define JUMP_POSS_REPEAT_2 15
  427. #define JUMP_ATOMIC_GROUP 16
  428. #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
  429. ctx->pattern = pattern; \
  430. ctx->ptr = ptr; \
  431. DATA_ALLOC(SRE(match_context), nextctx); \
  432. nextctx->pattern = nextpattern; \
  433. nextctx->toplevel = toplevel_; \
  434. nextctx->jump = jumpvalue; \
  435. nextctx->last_ctx_pos = ctx_pos; \
  436. pattern = nextpattern; \
  437. ctx_pos = alloc_pos; \
  438. ctx = nextctx; \
  439. goto entrance; \
  440. jumplabel: \
  441. pattern = ctx->pattern; \
  442. ptr = ctx->ptr;
  443. #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
  444. DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
  445. #define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
  446. DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
  447. typedef struct {
  448. Py_ssize_t count;
  449. union {
  450. SRE_CODE chr;
  451. SRE_REPEAT* rep;
  452. } u;
  453. int lastmark;
  454. int lastindex;
  455. const SRE_CODE* pattern;
  456. const SRE_CHAR* ptr;
  457. int toplevel;
  458. int jump;
  459. Py_ssize_t last_ctx_pos;
  460. } SRE(match_context);
  461. #define _MAYBE_CHECK_SIGNALS \
  462. do { \
  463. if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
  464. RETURN_ERROR(SRE_ERROR_INTERRUPTED); \
  465. } \
  466. } while (0)
  467. #ifdef Py_DEBUG
  468. # define MAYBE_CHECK_SIGNALS \
  469. do { \
  470. _MAYBE_CHECK_SIGNALS; \
  471. if (state->fail_after_count >= 0) { \
  472. if (state->fail_after_count-- == 0) { \
  473. PyErr_SetNone(state->fail_after_exc); \
  474. RETURN_ERROR(SRE_ERROR_INTERRUPTED); \
  475. } \
  476. } \
  477. } while (0)
  478. #else
  479. # define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
  480. #endif /* Py_DEBUG */
  481. #ifdef HAVE_COMPUTED_GOTOS
  482. #ifndef USE_COMPUTED_GOTOS
  483. #define USE_COMPUTED_GOTOS 1
  484. #endif
  485. #elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
  486. #error "Computed gotos are not supported on this compiler."
  487. #else
  488. #undef USE_COMPUTED_GOTOS
  489. #define USE_COMPUTED_GOTOS 0
  490. #endif
  491. #if USE_COMPUTED_GOTOS
  492. #define TARGET(OP) TARGET_ ## OP
  493. #define DISPATCH \
  494. do { \
  495. MAYBE_CHECK_SIGNALS; \
  496. goto *sre_targets[*pattern++]; \
  497. } while (0)
  498. #else
  499. #define TARGET(OP) case OP
  500. #define DISPATCH goto dispatch
  501. #endif
  502. /* check if string matches the given pattern. returns <0 for
  503. error, 0 for failure, and 1 for success */
  504. LOCAL(Py_ssize_t)
  505. SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
  506. {
  507. const SRE_CHAR* end = (const SRE_CHAR *)state->end;
  508. Py_ssize_t alloc_pos, ctx_pos = -1;
  509. Py_ssize_t ret = 0;
  510. int jump;
  511. unsigned int sigcount = state->sigcount;
  512. SRE(match_context)* ctx;
  513. SRE(match_context)* nextctx;
  514. TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
  515. DATA_ALLOC(SRE(match_context), ctx);
  516. ctx->last_ctx_pos = -1;
  517. ctx->jump = JUMP_NONE;
  518. ctx->toplevel = toplevel;
  519. ctx_pos = alloc_pos;
  520. #if USE_COMPUTED_GOTOS
  521. #include "sre_targets.h"
  522. #endif
  523. entrance:
  524. ; // Fashion statement.
  525. const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
  526. if (pattern[0] == SRE_OP_INFO) {
  527. /* optimization info block */
  528. /* <INFO> <1=skip> <2=flags> <3=min> ... */
  529. if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
  530. TRACE(("reject (got %tu chars, need %zu)\n",
  531. end - ptr, (size_t) pattern[3]));
  532. RETURN_FAILURE;
  533. }
  534. pattern += pattern[1] + 1;
  535. }
  536. #if USE_COMPUTED_GOTOS
  537. DISPATCH;
  538. #else
  539. dispatch:
  540. MAYBE_CHECK_SIGNALS;
  541. switch (*pattern++)
  542. #endif
  543. {
  544. TARGET(SRE_OP_MARK):
  545. /* set mark */
  546. /* <MARK> <gid> */
  547. TRACE(("|%p|%p|MARK %d\n", pattern,
  548. ptr, pattern[0]));
  549. {
  550. int i = pattern[0];
  551. if (i & 1)
  552. state->lastindex = i/2 + 1;
  553. if (i > state->lastmark) {
  554. /* state->lastmark is the highest valid index in the
  555. state->mark array. If it is increased by more than 1,
  556. the intervening marks must be set to NULL to signal
  557. that these marks have not been encountered. */
  558. int j = state->lastmark + 1;
  559. while (j < i)
  560. state->mark[j++] = NULL;
  561. state->lastmark = i;
  562. }
  563. state->mark[i] = ptr;
  564. }
  565. pattern++;
  566. DISPATCH;
  567. TARGET(SRE_OP_LITERAL):
  568. /* match literal string */
  569. /* <LITERAL> <code> */
  570. TRACE(("|%p|%p|LITERAL %d\n", pattern,
  571. ptr, *pattern));
  572. if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
  573. RETURN_FAILURE;
  574. pattern++;
  575. ptr++;
  576. DISPATCH;
  577. TARGET(SRE_OP_NOT_LITERAL):
  578. /* match anything that is not literal character */
  579. /* <NOT_LITERAL> <code> */
  580. TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
  581. ptr, *pattern));
  582. if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
  583. RETURN_FAILURE;
  584. pattern++;
  585. ptr++;
  586. DISPATCH;
  587. TARGET(SRE_OP_SUCCESS):
  588. /* end of pattern */
  589. TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
  590. if (ctx->toplevel &&
  591. ((state->match_all && ptr != state->end) ||
  592. (state->must_advance && ptr == state->start)))
  593. {
  594. RETURN_FAILURE;
  595. }
  596. state->ptr = ptr;
  597. RETURN_SUCCESS;
  598. TARGET(SRE_OP_AT):
  599. /* match at given position */
  600. /* <AT> <code> */
  601. TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
  602. if (!SRE(at)(state, ptr, *pattern))
  603. RETURN_FAILURE;
  604. pattern++;
  605. DISPATCH;
  606. TARGET(SRE_OP_CATEGORY):
  607. /* match at given category */
  608. /* <CATEGORY> <code> */
  609. TRACE(("|%p|%p|CATEGORY %d\n", pattern,
  610. ptr, *pattern));
  611. if (ptr >= end || !sre_category(pattern[0], ptr[0]))
  612. RETURN_FAILURE;
  613. pattern++;
  614. ptr++;
  615. DISPATCH;
  616. TARGET(SRE_OP_ANY):
  617. /* match anything (except a newline) */
  618. /* <ANY> */
  619. TRACE(("|%p|%p|ANY\n", pattern, ptr));
  620. if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
  621. RETURN_FAILURE;
  622. ptr++;
  623. DISPATCH;
  624. TARGET(SRE_OP_ANY_ALL):
  625. /* match anything */
  626. /* <ANY_ALL> */
  627. TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
  628. if (ptr >= end)
  629. RETURN_FAILURE;
  630. ptr++;
  631. DISPATCH;
  632. TARGET(SRE_OP_IN):
  633. /* match set member (or non_member) */
  634. /* <IN> <skip> <set> */
  635. TRACE(("|%p|%p|IN\n", pattern, ptr));
  636. if (ptr >= end ||
  637. !SRE(charset)(state, pattern + 1, *ptr))
  638. RETURN_FAILURE;
  639. pattern += pattern[0];
  640. ptr++;
  641. DISPATCH;
  642. TARGET(SRE_OP_LITERAL_IGNORE):
  643. TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
  644. pattern, ptr, pattern[0]));
  645. if (ptr >= end ||
  646. sre_lower_ascii(*ptr) != *pattern)
  647. RETURN_FAILURE;
  648. pattern++;
  649. ptr++;
  650. DISPATCH;
  651. TARGET(SRE_OP_LITERAL_UNI_IGNORE):
  652. TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
  653. pattern, ptr, pattern[0]));
  654. if (ptr >= end ||
  655. sre_lower_unicode(*ptr) != *pattern)
  656. RETURN_FAILURE;
  657. pattern++;
  658. ptr++;
  659. DISPATCH;
  660. TARGET(SRE_OP_LITERAL_LOC_IGNORE):
  661. TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
  662. pattern, ptr, pattern[0]));
  663. if (ptr >= end
  664. || !char_loc_ignore(*pattern, *ptr))
  665. RETURN_FAILURE;
  666. pattern++;
  667. ptr++;
  668. DISPATCH;
  669. TARGET(SRE_OP_NOT_LITERAL_IGNORE):
  670. TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
  671. pattern, ptr, *pattern));
  672. if (ptr >= end ||
  673. sre_lower_ascii(*ptr) == *pattern)
  674. RETURN_FAILURE;
  675. pattern++;
  676. ptr++;
  677. DISPATCH;
  678. TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
  679. TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
  680. pattern, ptr, *pattern));
  681. if (ptr >= end ||
  682. sre_lower_unicode(*ptr) == *pattern)
  683. RETURN_FAILURE;
  684. pattern++;
  685. ptr++;
  686. DISPATCH;
  687. TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
  688. TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
  689. pattern, ptr, *pattern));
  690. if (ptr >= end
  691. || char_loc_ignore(*pattern, *ptr))
  692. RETURN_FAILURE;
  693. pattern++;
  694. ptr++;
  695. DISPATCH;
  696. TARGET(SRE_OP_IN_IGNORE):
  697. TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
  698. if (ptr >= end
  699. || !SRE(charset)(state, pattern+1,
  700. (SRE_CODE)sre_lower_ascii(*ptr)))
  701. RETURN_FAILURE;
  702. pattern += pattern[0];
  703. ptr++;
  704. DISPATCH;
  705. TARGET(SRE_OP_IN_UNI_IGNORE):
  706. TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
  707. if (ptr >= end
  708. || !SRE(charset)(state, pattern+1,
  709. (SRE_CODE)sre_lower_unicode(*ptr)))
  710. RETURN_FAILURE;
  711. pattern += pattern[0];
  712. ptr++;
  713. DISPATCH;
  714. TARGET(SRE_OP_IN_LOC_IGNORE):
  715. TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
  716. if (ptr >= end
  717. || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
  718. RETURN_FAILURE;
  719. pattern += pattern[0];
  720. ptr++;
  721. DISPATCH;
  722. TARGET(SRE_OP_JUMP):
  723. TARGET(SRE_OP_INFO):
  724. /* jump forward */
  725. /* <JUMP> <offset> */
  726. TRACE(("|%p|%p|JUMP %d\n", pattern,
  727. ptr, pattern[0]));
  728. pattern += pattern[0];
  729. DISPATCH;
  730. TARGET(SRE_OP_BRANCH):
  731. /* alternation */
  732. /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
  733. TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
  734. LASTMARK_SAVE();
  735. if (state->repeat)
  736. MARK_PUSH(ctx->lastmark);
  737. for (; pattern[0]; pattern += pattern[0]) {
  738. if (pattern[1] == SRE_OP_LITERAL &&
  739. (ptr >= end ||
  740. (SRE_CODE) *ptr != pattern[2]))
  741. continue;
  742. if (pattern[1] == SRE_OP_IN &&
  743. (ptr >= end ||
  744. !SRE(charset)(state, pattern + 3,
  745. (SRE_CODE) *ptr)))
  746. continue;
  747. state->ptr = ptr;
  748. DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
  749. if (ret) {
  750. if (state->repeat)
  751. MARK_POP_DISCARD(ctx->lastmark);
  752. RETURN_ON_ERROR(ret);
  753. RETURN_SUCCESS;
  754. }
  755. if (state->repeat)
  756. MARK_POP_KEEP(ctx->lastmark);
  757. LASTMARK_RESTORE();
  758. }
  759. if (state->repeat)
  760. MARK_POP_DISCARD(ctx->lastmark);
  761. RETURN_FAILURE;
  762. TARGET(SRE_OP_REPEAT_ONE):
  763. /* match repeated sequence (maximizing regexp) */
  764. /* this operator only works if the repeated item is
  765. exactly one character wide, and we're not already
  766. collecting backtracking points. for other cases,
  767. use the MAX_REPEAT operator */
  768. /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
  769. TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
  770. pattern[1], pattern[2]));
  771. if ((Py_ssize_t) pattern[1] > end - ptr)
  772. RETURN_FAILURE; /* cannot match */
  773. state->ptr = ptr;
  774. ret = SRE(count)(state, pattern+3, pattern[2]);
  775. RETURN_ON_ERROR(ret);
  776. DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
  777. ctx->count = ret;
  778. ptr += ctx->count;
  779. /* when we arrive here, count contains the number of
  780. matches, and ptr points to the tail of the target
  781. string. check if the rest of the pattern matches,
  782. and backtrack if not. */
  783. if (ctx->count < (Py_ssize_t) pattern[1])
  784. RETURN_FAILURE;
  785. if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
  786. ptr == state->end &&
  787. !(ctx->toplevel && state->must_advance && ptr == state->start))
  788. {
  789. /* tail is empty. we're finished */
  790. state->ptr = ptr;
  791. RETURN_SUCCESS;
  792. }
  793. LASTMARK_SAVE();
  794. if (state->repeat)
  795. MARK_PUSH(ctx->lastmark);
  796. if (pattern[pattern[0]] == SRE_OP_LITERAL) {
  797. /* tail starts with a literal. skip positions where
  798. the rest of the pattern cannot possibly match */
  799. ctx->u.chr = pattern[pattern[0]+1];
  800. for (;;) {
  801. while (ctx->count >= (Py_ssize_t) pattern[1] &&
  802. (ptr >= end || *ptr != ctx->u.chr)) {
  803. ptr--;
  804. ctx->count--;
  805. }
  806. if (ctx->count < (Py_ssize_t) pattern[1])
  807. break;
  808. state->ptr = ptr;
  809. DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
  810. pattern+pattern[0]);
  811. if (ret) {
  812. if (state->repeat)
  813. MARK_POP_DISCARD(ctx->lastmark);
  814. RETURN_ON_ERROR(ret);
  815. RETURN_SUCCESS;
  816. }
  817. if (state->repeat)
  818. MARK_POP_KEEP(ctx->lastmark);
  819. LASTMARK_RESTORE();
  820. ptr--;
  821. ctx->count--;
  822. }
  823. if (state->repeat)
  824. MARK_POP_DISCARD(ctx->lastmark);
  825. } else {
  826. /* general case */
  827. while (ctx->count >= (Py_ssize_t) pattern[1]) {
  828. state->ptr = ptr;
  829. DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
  830. pattern+pattern[0]);
  831. if (ret) {
  832. if (state->repeat)
  833. MARK_POP_DISCARD(ctx->lastmark);
  834. RETURN_ON_ERROR(ret);
  835. RETURN_SUCCESS;
  836. }
  837. if (state->repeat)
  838. MARK_POP_KEEP(ctx->lastmark);
  839. LASTMARK_RESTORE();
  840. ptr--;
  841. ctx->count--;
  842. }
  843. if (state->repeat)
  844. MARK_POP_DISCARD(ctx->lastmark);
  845. }
  846. RETURN_FAILURE;
  847. TARGET(SRE_OP_MIN_REPEAT_ONE):
  848. /* match repeated sequence (minimizing regexp) */
  849. /* this operator only works if the repeated item is
  850. exactly one character wide, and we're not already
  851. collecting backtracking points. for other cases,
  852. use the MIN_REPEAT operator */
  853. /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
  854. TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
  855. pattern[1], pattern[2]));
  856. if ((Py_ssize_t) pattern[1] > end - ptr)
  857. RETURN_FAILURE; /* cannot match */
  858. state->ptr = ptr;
  859. if (pattern[1] == 0)
  860. ctx->count = 0;
  861. else {
  862. /* count using pattern min as the maximum */
  863. ret = SRE(count)(state, pattern+3, pattern[1]);
  864. RETURN_ON_ERROR(ret);
  865. DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
  866. if (ret < (Py_ssize_t) pattern[1])
  867. /* didn't match minimum number of times */
  868. RETURN_FAILURE;
  869. /* advance past minimum matches of repeat */
  870. ctx->count = ret;
  871. ptr += ctx->count;
  872. }
  873. if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
  874. !(ctx->toplevel &&
  875. ((state->match_all && ptr != state->end) ||
  876. (state->must_advance && ptr == state->start))))
  877. {
  878. /* tail is empty. we're finished */
  879. state->ptr = ptr;
  880. RETURN_SUCCESS;
  881. } else {
  882. /* general case */
  883. LASTMARK_SAVE();
  884. if (state->repeat)
  885. MARK_PUSH(ctx->lastmark);
  886. while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
  887. || ctx->count <= (Py_ssize_t)pattern[2]) {
  888. state->ptr = ptr;
  889. DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
  890. pattern+pattern[0]);
  891. if (ret) {
  892. if (state->repeat)
  893. MARK_POP_DISCARD(ctx->lastmark);
  894. RETURN_ON_ERROR(ret);
  895. RETURN_SUCCESS;
  896. }
  897. if (state->repeat)
  898. MARK_POP_KEEP(ctx->lastmark);
  899. LASTMARK_RESTORE();
  900. state->ptr = ptr;
  901. ret = SRE(count)(state, pattern+3, 1);
  902. RETURN_ON_ERROR(ret);
  903. DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
  904. if (ret == 0)
  905. break;
  906. assert(ret == 1);
  907. ptr++;
  908. ctx->count++;
  909. }
  910. if (state->repeat)
  911. MARK_POP_DISCARD(ctx->lastmark);
  912. }
  913. RETURN_FAILURE;
  914. TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
  915. /* match repeated sequence (maximizing regexp) without
  916. backtracking */
  917. /* this operator only works if the repeated item is
  918. exactly one character wide, and we're not already
  919. collecting backtracking points. for other cases,
  920. use the MAX_REPEAT operator */
  921. /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
  922. tail */
  923. TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
  924. ptr, pattern[1], pattern[2]));
  925. if (ptr + pattern[1] > end) {
  926. RETURN_FAILURE; /* cannot match */
  927. }
  928. state->ptr = ptr;
  929. ret = SRE(count)(state, pattern + 3, pattern[2]);
  930. RETURN_ON_ERROR(ret);
  931. DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
  932. ctx->count = ret;
  933. ptr += ctx->count;
  934. /* when we arrive here, count contains the number of
  935. matches, and ptr points to the tail of the target
  936. string. check if the rest of the pattern matches,
  937. and fail if not. */
  938. /* Test for not enough repetitions in match */
  939. if (ctx->count < (Py_ssize_t) pattern[1]) {
  940. RETURN_FAILURE;
  941. }
  942. /* Update the pattern to point to the next op code */
  943. pattern += pattern[0];
  944. /* Let the tail be evaluated separately and consider this
  945. match successful. */
  946. if (*pattern == SRE_OP_SUCCESS &&
  947. ptr == state->end &&
  948. !(ctx->toplevel && state->must_advance && ptr == state->start))
  949. {
  950. /* tail is empty. we're finished */
  951. state->ptr = ptr;
  952. RETURN_SUCCESS;
  953. }
  954. /* Attempt to match the rest of the string */
  955. DISPATCH;
  956. TARGET(SRE_OP_REPEAT):
  957. /* create repeat context. all the hard work is done
  958. by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
  959. /* <REPEAT> <skip> <1=min> <2=max>
  960. <3=repeat_index> item <UNTIL> tail */
  961. TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
  962. pattern[1], pattern[2]));
  963. /* install new repeat context */
  964. ctx->u.rep = repeat_pool_malloc(state);
  965. if (!ctx->u.rep) {
  966. RETURN_ERROR(SRE_ERROR_MEMORY);
  967. }
  968. ctx->u.rep->count = -1;
  969. ctx->u.rep->pattern = pattern;
  970. ctx->u.rep->prev = state->repeat;
  971. ctx->u.rep->last_ptr = NULL;
  972. state->repeat = ctx->u.rep;
  973. state->ptr = ptr;
  974. DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
  975. state->repeat = ctx->u.rep->prev;
  976. repeat_pool_free(state, ctx->u.rep);
  977. if (ret) {
  978. RETURN_ON_ERROR(ret);
  979. RETURN_SUCCESS;
  980. }
  981. RETURN_FAILURE;
  982. TARGET(SRE_OP_MAX_UNTIL):
  983. /* maximizing repeat */
  984. /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
  985. /* FIXME: we probably need to deal with zero-width
  986. matches in here... */
  987. ctx->u.rep = state->repeat;
  988. if (!ctx->u.rep)
  989. RETURN_ERROR(SRE_ERROR_STATE);
  990. state->ptr = ptr;
  991. ctx->count = ctx->u.rep->count+1;
  992. TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
  993. ptr, ctx->count));
  994. if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
  995. /* not enough matches */
  996. ctx->u.rep->count = ctx->count;
  997. DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
  998. ctx->u.rep->pattern+3);
  999. if (ret) {
  1000. RETURN_ON_ERROR(ret);
  1001. RETURN_SUCCESS;
  1002. }
  1003. ctx->u.rep->count = ctx->count-1;
  1004. state->ptr = ptr;
  1005. RETURN_FAILURE;
  1006. }
  1007. if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
  1008. ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
  1009. state->ptr != ctx->u.rep->last_ptr) {
  1010. /* we may have enough matches, but if we can
  1011. match another item, do so */
  1012. ctx->u.rep->count = ctx->count;
  1013. LASTMARK_SAVE();
  1014. MARK_PUSH(ctx->lastmark);
  1015. /* zero-width match protection */
  1016. DATA_PUSH(&ctx->u.rep->last_ptr);
  1017. ctx->u.rep->last_ptr = state->ptr;
  1018. DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
  1019. ctx->u.rep->pattern+3);
  1020. DATA_POP(&ctx->u.rep->last_ptr);
  1021. if (ret) {
  1022. MARK_POP_DISCARD(ctx->lastmark);
  1023. RETURN_ON_ERROR(ret);
  1024. RETURN_SUCCESS;
  1025. }
  1026. MARK_POP(ctx->lastmark);
  1027. LASTMARK_RESTORE();
  1028. ctx->u.rep->count = ctx->count-1;
  1029. state->ptr = ptr;
  1030. }
  1031. /* cannot match more repeated items here. make sure the
  1032. tail matches */
  1033. state->repeat = ctx->u.rep->prev;
  1034. DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
  1035. state->repeat = ctx->u.rep; // restore repeat before return
  1036. RETURN_ON_SUCCESS(ret);
  1037. state->ptr = ptr;
  1038. RETURN_FAILURE;
  1039. TARGET(SRE_OP_MIN_UNTIL):
  1040. /* minimizing repeat */
  1041. /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
  1042. ctx->u.rep = state->repeat;
  1043. if (!ctx->u.rep)
  1044. RETURN_ERROR(SRE_ERROR_STATE);
  1045. state->ptr = ptr;
  1046. ctx->count = ctx->u.rep->count+1;
  1047. TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
  1048. ptr, ctx->count, ctx->u.rep->pattern));
  1049. if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
  1050. /* not enough matches */
  1051. ctx->u.rep->count = ctx->count;
  1052. DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
  1053. ctx->u.rep->pattern+3);
  1054. if (ret) {
  1055. RETURN_ON_ERROR(ret);
  1056. RETURN_SUCCESS;
  1057. }
  1058. ctx->u.rep->count = ctx->count-1;
  1059. state->ptr = ptr;
  1060. RETURN_FAILURE;
  1061. }
  1062. /* see if the tail matches */
  1063. state->repeat = ctx->u.rep->prev;
  1064. LASTMARK_SAVE();
  1065. if (state->repeat)
  1066. MARK_PUSH(ctx->lastmark);
  1067. DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
  1068. SRE_REPEAT *repeat_of_tail = state->repeat;
  1069. state->repeat = ctx->u.rep; // restore repeat before return
  1070. if (ret) {
  1071. if (repeat_of_tail)
  1072. MARK_POP_DISCARD(ctx->lastmark);
  1073. RETURN_ON_ERROR(ret);
  1074. RETURN_SUCCESS;
  1075. }
  1076. if (repeat_of_tail)
  1077. MARK_POP(ctx->lastmark);
  1078. LASTMARK_RESTORE();
  1079. state->ptr = ptr;
  1080. if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
  1081. && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
  1082. state->ptr == ctx->u.rep->last_ptr)
  1083. RETURN_FAILURE;
  1084. ctx->u.rep->count = ctx->count;
  1085. /* zero-width match protection */
  1086. DATA_PUSH(&ctx->u.rep->last_ptr);
  1087. ctx->u.rep->last_ptr = state->ptr;
  1088. DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
  1089. ctx->u.rep->pattern+3);
  1090. DATA_POP(&ctx->u.rep->last_ptr);
  1091. if (ret) {
  1092. RETURN_ON_ERROR(ret);
  1093. RETURN_SUCCESS;
  1094. }
  1095. ctx->u.rep->count = ctx->count-1;
  1096. state->ptr = ptr;
  1097. RETURN_FAILURE;
  1098. TARGET(SRE_OP_POSSESSIVE_REPEAT):
  1099. /* create possessive repeat contexts. */
  1100. /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
  1101. <SUCCESS> tail */
  1102. TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
  1103. ptr, pattern[1], pattern[2]));
  1104. /* Set the global Input pointer to this context's Input
  1105. pointer */
  1106. state->ptr = ptr;
  1107. /* Set state->repeat to non-NULL */
  1108. ctx->u.rep = repeat_pool_malloc(state);
  1109. if (!ctx->u.rep) {
  1110. RETURN_ERROR(SRE_ERROR_MEMORY);
  1111. }
  1112. ctx->u.rep->count = -1;
  1113. ctx->u.rep->pattern = NULL;
  1114. ctx->u.rep->prev = state->repeat;
  1115. ctx->u.rep->last_ptr = NULL;
  1116. state->repeat = ctx->u.rep;
  1117. /* Initialize Count to 0 */
  1118. ctx->count = 0;
  1119. /* Check for minimum required matches. */
  1120. while (ctx->count < (Py_ssize_t)pattern[1]) {
  1121. /* not enough matches */
  1122. DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
  1123. &pattern[3]);
  1124. if (ret) {
  1125. RETURN_ON_ERROR(ret);
  1126. ctx->count++;
  1127. }
  1128. else {
  1129. state->ptr = ptr;
  1130. /* Restore state->repeat */
  1131. state->repeat = ctx->u.rep->prev;
  1132. repeat_pool_free(state, ctx->u.rep);
  1133. RETURN_FAILURE;
  1134. }
  1135. }
  1136. /* Clear the context's Input stream pointer so that it
  1137. doesn't match the global state so that the while loop can
  1138. be entered. */
  1139. ptr = NULL;
  1140. /* Keep trying to parse the <pattern> sub-pattern until the
  1141. end is reached, creating a new context each time. */
  1142. while ((ctx->count < (Py_ssize_t)pattern[2] ||
  1143. (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
  1144. state->ptr != ptr) {
  1145. /* Save the Capture Group Marker state into the current
  1146. Context and back up the current highest number
  1147. Capture Group marker. */
  1148. LASTMARK_SAVE();
  1149. MARK_PUSH(ctx->lastmark);
  1150. /* zero-width match protection */
  1151. /* Set the context's Input Stream pointer to be the
  1152. current Input Stream pointer from the global
  1153. state. When the loop reaches the next iteration,
  1154. the context will then store the last known good
  1155. position with the global state holding the Input
  1156. Input Stream position that has been updated with
  1157. the most recent match. Thus, if state's Input
  1158. stream remains the same as the one stored in the
  1159. current Context, we know we have successfully
  1160. matched an empty string and that all subsequent
  1161. matches will also be the empty string until the
  1162. maximum number of matches are counted, and because
  1163. of this, we could immediately stop at that point and
  1164. consider this match successful. */
  1165. ptr = state->ptr;
  1166. /* We have not reached the maximin matches, so try to
  1167. match once more. */
  1168. DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
  1169. &pattern[3]);
  1170. /* Check to see if the last attempted match
  1171. succeeded. */
  1172. if (ret) {
  1173. /* Drop the saved highest number Capture Group
  1174. marker saved above and use the newly updated
  1175. value. */
  1176. MARK_POP_DISCARD(ctx->lastmark);
  1177. RETURN_ON_ERROR(ret);
  1178. /* Success, increment the count. */
  1179. ctx->count++;
  1180. }
  1181. /* Last attempted match failed. */
  1182. else {
  1183. /* Restore the previously saved highest number
  1184. Capture Group marker since the last iteration
  1185. did not match, then restore that to the global
  1186. state. */
  1187. MARK_POP(ctx->lastmark);
  1188. LASTMARK_RESTORE();
  1189. /* Restore the global Input Stream pointer
  1190. since it can change after jumps. */
  1191. state->ptr = ptr;
  1192. /* We have sufficient matches, so exit loop. */
  1193. break;
  1194. }
  1195. }
  1196. /* Restore state->repeat */
  1197. state->repeat = ctx->u.rep->prev;
  1198. repeat_pool_free(state, ctx->u.rep);
  1199. /* Evaluate Tail */
  1200. /* Jump to end of pattern indicated by skip, and then skip
  1201. the SUCCESS op code that follows it. */
  1202. pattern += pattern[0] + 1;
  1203. ptr = state->ptr;
  1204. DISPATCH;
  1205. TARGET(SRE_OP_ATOMIC_GROUP):
  1206. /* Atomic Group Sub Pattern */
  1207. /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
  1208. TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
  1209. /* Set the global Input pointer to this context's Input
  1210. pointer */
  1211. state->ptr = ptr;
  1212. /* Evaluate the Atomic Group in a new context, terminating
  1213. when the end of the group, represented by a SUCCESS op
  1214. code, is reached. */
  1215. /* Group Pattern begins at an offset of 1 code. */
  1216. DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
  1217. &pattern[1]);
  1218. /* Test Exit Condition */
  1219. RETURN_ON_ERROR(ret);
  1220. if (ret == 0) {
  1221. /* Atomic Group failed to Match. */
  1222. state->ptr = ptr;
  1223. RETURN_FAILURE;
  1224. }
  1225. /* Evaluate Tail */
  1226. /* Jump to end of pattern indicated by skip, and then skip
  1227. the SUCCESS op code that follows it. */
  1228. pattern += pattern[0];
  1229. ptr = state->ptr;
  1230. DISPATCH;
  1231. TARGET(SRE_OP_GROUPREF):
  1232. /* match backreference */
  1233. TRACE(("|%p|%p|GROUPREF %d\n", pattern,
  1234. ptr, pattern[0]));
  1235. {
  1236. int groupref = pattern[0] * 2;
  1237. if (groupref >= state->lastmark) {
  1238. RETURN_FAILURE;
  1239. } else {
  1240. SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
  1241. SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
  1242. if (!p || !e || e < p)
  1243. RETURN_FAILURE;
  1244. while (p < e) {
  1245. if (ptr >= end || *ptr != *p)
  1246. RETURN_FAILURE;
  1247. p++;
  1248. ptr++;
  1249. }
  1250. }
  1251. }
  1252. pattern++;
  1253. DISPATCH;
  1254. TARGET(SRE_OP_GROUPREF_IGNORE):
  1255. /* match backreference */
  1256. TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
  1257. ptr, pattern[0]));
  1258. {
  1259. int groupref = pattern[0] * 2;
  1260. if (groupref >= state->lastmark) {
  1261. RETURN_FAILURE;
  1262. } else {
  1263. SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
  1264. SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
  1265. if (!p || !e || e < p)
  1266. RETURN_FAILURE;
  1267. while (p < e) {
  1268. if (ptr >= end ||
  1269. sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
  1270. RETURN_FAILURE;
  1271. p++;
  1272. ptr++;
  1273. }
  1274. }
  1275. }
  1276. pattern++;
  1277. DISPATCH;
  1278. TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
  1279. /* match backreference */
  1280. TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
  1281. ptr, pattern[0]));
  1282. {
  1283. int groupref = pattern[0] * 2;
  1284. if (groupref >= state->lastmark) {
  1285. RETURN_FAILURE;
  1286. } else {
  1287. SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
  1288. SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
  1289. if (!p || !e || e < p)
  1290. RETURN_FAILURE;
  1291. while (p < e) {
  1292. if (ptr >= end ||
  1293. sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
  1294. RETURN_FAILURE;
  1295. p++;
  1296. ptr++;
  1297. }
  1298. }
  1299. }
  1300. pattern++;
  1301. DISPATCH;
  1302. TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
  1303. /* match backreference */
  1304. TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
  1305. ptr, pattern[0]));
  1306. {
  1307. int groupref = pattern[0] * 2;
  1308. if (groupref >= state->lastmark) {
  1309. RETURN_FAILURE;
  1310. } else {
  1311. SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
  1312. SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
  1313. if (!p || !e || e < p)
  1314. RETURN_FAILURE;
  1315. while (p < e) {
  1316. if (ptr >= end ||
  1317. sre_lower_locale(*ptr) != sre_lower_locale(*p))
  1318. RETURN_FAILURE;
  1319. p++;
  1320. ptr++;
  1321. }
  1322. }
  1323. }
  1324. pattern++;
  1325. DISPATCH;
  1326. TARGET(SRE_OP_GROUPREF_EXISTS):
  1327. TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
  1328. ptr, pattern[0]));
  1329. /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
  1330. {
  1331. int groupref = pattern[0] * 2;
  1332. if (groupref >= state->lastmark) {
  1333. pattern += pattern[1];
  1334. DISPATCH;
  1335. } else {
  1336. SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
  1337. SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
  1338. if (!p || !e || e < p) {
  1339. pattern += pattern[1];
  1340. DISPATCH;
  1341. }
  1342. }
  1343. }
  1344. pattern += 2;
  1345. DISPATCH;
  1346. TARGET(SRE_OP_ASSERT):
  1347. /* assert subpattern */
  1348. /* <ASSERT> <skip> <back> <pattern> */
  1349. TRACE(("|%p|%p|ASSERT %d\n", pattern,
  1350. ptr, pattern[1]));
  1351. if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
  1352. RETURN_FAILURE;
  1353. state->ptr = ptr - pattern[1];
  1354. DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
  1355. RETURN_ON_FAILURE(ret);
  1356. pattern += pattern[0];
  1357. DISPATCH;
  1358. TARGET(SRE_OP_ASSERT_NOT):
  1359. /* assert not subpattern */
  1360. /* <ASSERT_NOT> <skip> <back> <pattern> */
  1361. TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
  1362. ptr, pattern[1]));
  1363. if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
  1364. state->ptr = ptr - pattern[1];
  1365. LASTMARK_SAVE();
  1366. if (state->repeat)
  1367. MARK_PUSH(ctx->lastmark);
  1368. DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
  1369. if (ret) {
  1370. if (state->repeat)
  1371. MARK_POP_DISCARD(ctx->lastmark);
  1372. RETURN_ON_ERROR(ret);
  1373. RETURN_FAILURE;
  1374. }
  1375. if (state->repeat)
  1376. MARK_POP(ctx->lastmark);
  1377. LASTMARK_RESTORE();
  1378. }
  1379. pattern += pattern[0];
  1380. DISPATCH;
  1381. TARGET(SRE_OP_FAILURE):
  1382. /* immediate failure */
  1383. TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
  1384. RETURN_FAILURE;
  1385. #if !USE_COMPUTED_GOTOS
  1386. default:
  1387. #endif
  1388. // Also any unused opcodes:
  1389. TARGET(SRE_OP_RANGE_UNI_IGNORE):
  1390. TARGET(SRE_OP_SUBPATTERN):
  1391. TARGET(SRE_OP_RANGE):
  1392. TARGET(SRE_OP_NEGATE):
  1393. TARGET(SRE_OP_BIGCHARSET):
  1394. TARGET(SRE_OP_CHARSET):
  1395. TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
  1396. pattern[-1]));
  1397. RETURN_ERROR(SRE_ERROR_ILLEGAL);
  1398. }
  1399. exit:
  1400. ctx_pos = ctx->last_ctx_pos;
  1401. jump = ctx->jump;
  1402. DATA_POP_DISCARD(ctx);
  1403. if (ctx_pos == -1) {
  1404. state->sigcount = sigcount;
  1405. return ret;
  1406. }
  1407. DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
  1408. switch (jump) {
  1409. case JUMP_MAX_UNTIL_2:
  1410. TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
  1411. goto jump_max_until_2;
  1412. case JUMP_MAX_UNTIL_3:
  1413. TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
  1414. goto jump_max_until_3;
  1415. case JUMP_MIN_UNTIL_2:
  1416. TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
  1417. goto jump_min_until_2;
  1418. case JUMP_MIN_UNTIL_3:
  1419. TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
  1420. goto jump_min_until_3;
  1421. case JUMP_BRANCH:
  1422. TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
  1423. goto jump_branch;
  1424. case JUMP_MAX_UNTIL_1:
  1425. TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
  1426. goto jump_max_until_1;
  1427. case JUMP_MIN_UNTIL_1:
  1428. TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
  1429. goto jump_min_until_1;
  1430. case JUMP_POSS_REPEAT_1:
  1431. TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
  1432. goto jump_poss_repeat_1;
  1433. case JUMP_POSS_REPEAT_2:
  1434. TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
  1435. goto jump_poss_repeat_2;
  1436. case JUMP_REPEAT:
  1437. TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
  1438. goto jump_repeat;
  1439. case JUMP_REPEAT_ONE_1:
  1440. TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
  1441. goto jump_repeat_one_1;
  1442. case JUMP_REPEAT_ONE_2:
  1443. TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
  1444. goto jump_repeat_one_2;
  1445. case JUMP_MIN_REPEAT_ONE:
  1446. TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
  1447. goto jump_min_repeat_one;
  1448. case JUMP_ATOMIC_GROUP:
  1449. TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
  1450. goto jump_atomic_group;
  1451. case JUMP_ASSERT:
  1452. TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
  1453. goto jump_assert;
  1454. case JUMP_ASSERT_NOT:
  1455. TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
  1456. goto jump_assert_not;
  1457. case JUMP_NONE:
  1458. TRACE(("|%p|%p|RETURN %zd\n", pattern,
  1459. ptr, ret));
  1460. break;
  1461. }
  1462. return ret; /* should never get here */
  1463. }
  1464. /* need to reset capturing groups between two SRE(match) callings in loops */
  1465. #define RESET_CAPTURE_GROUP() \
  1466. do { state->lastmark = state->lastindex = -1; } while (0)
  1467. LOCAL(Py_ssize_t)
  1468. SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
  1469. {
  1470. SRE_CHAR* ptr = (SRE_CHAR *)state->start;
  1471. SRE_CHAR* end = (SRE_CHAR *)state->end;
  1472. Py_ssize_t status = 0;
  1473. Py_ssize_t prefix_len = 0;
  1474. Py_ssize_t prefix_skip = 0;
  1475. SRE_CODE* prefix = NULL;
  1476. SRE_CODE* charset = NULL;
  1477. SRE_CODE* overlap = NULL;
  1478. int flags = 0;
  1479. if (ptr > end)
  1480. return 0;
  1481. if (pattern[0] == SRE_OP_INFO) {
  1482. /* optimization info block */
  1483. /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
  1484. flags = pattern[2];
  1485. if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
  1486. TRACE(("reject (got %tu chars, need %zu)\n",
  1487. end - ptr, (size_t) pattern[3]));
  1488. return 0;
  1489. }
  1490. if (pattern[3] > 1) {
  1491. /* adjust end point (but make sure we leave at least one
  1492. character in there, so literal search will work) */
  1493. end -= pattern[3] - 1;
  1494. if (end <= ptr)
  1495. end = ptr;
  1496. }
  1497. if (flags & SRE_INFO_PREFIX) {
  1498. /* pattern starts with a known prefix */
  1499. /* <length> <skip> <prefix data> <overlap data> */
  1500. prefix_len = pattern[5];
  1501. prefix_skip = pattern[6];
  1502. prefix = pattern + 7;
  1503. overlap = prefix + prefix_len - 1;
  1504. } else if (flags & SRE_INFO_CHARSET)
  1505. /* pattern starts with a character from a known set */
  1506. /* <charset> */
  1507. charset = pattern + 5;
  1508. pattern += 1 + pattern[1];
  1509. }
  1510. TRACE(("prefix = %p %zd %zd\n",
  1511. prefix, prefix_len, prefix_skip));
  1512. TRACE(("charset = %p\n", charset));
  1513. if (prefix_len == 1) {
  1514. /* pattern starts with a literal character */
  1515. SRE_CHAR c = (SRE_CHAR) prefix[0];
  1516. #if SIZEOF_SRE_CHAR < 4
  1517. if ((SRE_CODE) c != prefix[0])
  1518. return 0; /* literal can't match: doesn't fit in char width */
  1519. #endif
  1520. end = (SRE_CHAR *)state->end;
  1521. state->must_advance = 0;
  1522. while (ptr < end) {
  1523. while (*ptr != c) {
  1524. if (++ptr >= end)
  1525. return 0;
  1526. }
  1527. TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
  1528. state->start = ptr;
  1529. state->ptr = ptr + prefix_skip;
  1530. if (flags & SRE_INFO_LITERAL)
  1531. return 1; /* we got all of it */
  1532. status = SRE(match)(state, pattern + 2*prefix_skip, 0);
  1533. if (status != 0)
  1534. return status;
  1535. ++ptr;
  1536. RESET_CAPTURE_GROUP();
  1537. }
  1538. return 0;
  1539. }
  1540. if (prefix_len > 1) {
  1541. /* pattern starts with a known prefix. use the overlap
  1542. table to skip forward as fast as we possibly can */
  1543. Py_ssize_t i = 0;
  1544. end = (SRE_CHAR *)state->end;
  1545. if (prefix_len > end - ptr)
  1546. return 0;
  1547. #if SIZEOF_SRE_CHAR < 4
  1548. for (i = 0; i < prefix_len; i++)
  1549. if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
  1550. return 0; /* literal can't match: doesn't fit in char width */
  1551. #endif
  1552. while (ptr < end) {
  1553. SRE_CHAR c = (SRE_CHAR) prefix[0];
  1554. while (*ptr++ != c) {
  1555. if (ptr >= end)
  1556. return 0;
  1557. }
  1558. if (ptr >= end)
  1559. return 0;
  1560. i = 1;
  1561. state->must_advance = 0;
  1562. do {
  1563. if (*ptr == (SRE_CHAR) prefix[i]) {
  1564. if (++i != prefix_len) {
  1565. if (++ptr >= end)
  1566. return 0;
  1567. continue;
  1568. }
  1569. /* found a potential match */
  1570. TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
  1571. state->start = ptr - (prefix_len - 1);
  1572. state->ptr = ptr - (prefix_len - prefix_skip - 1);
  1573. if (flags & SRE_INFO_LITERAL)
  1574. return 1; /* we got all of it */
  1575. status = SRE(match)(state, pattern + 2*prefix_skip, 0);
  1576. if (status != 0)
  1577. return status;
  1578. /* close but no cigar -- try again */
  1579. if (++ptr >= end)
  1580. return 0;
  1581. RESET_CAPTURE_GROUP();
  1582. }
  1583. i = overlap[i];
  1584. } while (i != 0);
  1585. }
  1586. return 0;
  1587. }
  1588. if (charset) {
  1589. /* pattern starts with a character from a known set */
  1590. end = (SRE_CHAR *)state->end;
  1591. state->must_advance = 0;
  1592. for (;;) {
  1593. while (ptr < end && !SRE(charset)(state, charset, *ptr))
  1594. ptr++;
  1595. if (ptr >= end)
  1596. return 0;
  1597. TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
  1598. state->start = ptr;
  1599. state->ptr = ptr;
  1600. status = SRE(match)(state, pattern, 0);
  1601. if (status != 0)
  1602. break;
  1603. ptr++;
  1604. RESET_CAPTURE_GROUP();
  1605. }
  1606. } else {
  1607. /* general case */
  1608. assert(ptr <= end);
  1609. TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
  1610. state->start = state->ptr = ptr;
  1611. status = SRE(match)(state, pattern, 1);
  1612. state->must_advance = 0;
  1613. if (status == 0 && pattern[0] == SRE_OP_AT &&
  1614. (pattern[1] == SRE_AT_BEGINNING ||
  1615. pattern[1] == SRE_AT_BEGINNING_STRING))
  1616. {
  1617. state->start = state->ptr = ptr = end;
  1618. return 0;
  1619. }
  1620. while (status == 0 && ptr < end) {
  1621. ptr++;
  1622. RESET_CAPTURE_GROUP();
  1623. TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
  1624. state->start = state->ptr = ptr;
  1625. status = SRE(match)(state, pattern, 0);
  1626. }
  1627. }
  1628. return status;
  1629. }
  1630. #undef SRE_CHAR
  1631. #undef SIZEOF_SRE_CHAR
  1632. #undef SRE
  1633. /* vim:ts=4:sw=4:et
  1634. */