pcre_jit_compile.c 356 KB


  1. /*************************************************
  2. * Perl-Compatible Regular Expressions *
  3. *************************************************/
  4. /* PCRE is a library of functions to support regular expressions whose syntax
  5. and semantics are as close as possible to those of the Perl 5 language.
  6. Written by Philip Hazel
  7. Copyright (c) 1997-2013 University of Cambridge
  8. The machine code generator part (this module) was written by Zoltan Herczeg
  9. Copyright (c) 2010-2013
  10. -----------------------------------------------------------------------------
  11. Redistribution and use in source and binary forms, with or without
  12. modification, are permitted provided that the following conditions are met:
  13. * Redistributions of source code must retain the above copyright notice,
  14. this list of conditions and the following disclaimer.
  15. * Redistributions in binary form must reproduce the above copyright
  16. notice, this list of conditions and the following disclaimer in the
  17. documentation and/or other materials provided with the distribution.
  18. * Neither the name of the University of Cambridge nor the names of its
  19. contributors may be used to endorse or promote products derived from
  20. this software without specific prior written permission.
  21. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  25. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31. POSSIBILITY OF SUCH DAMAGE.
  32. -----------------------------------------------------------------------------
  33. */
  34. #ifdef HAVE_CONFIG_H
  35. #include "pcre_config.h"
  36. #endif
  37. #include "pcre_internal.h"
  38. #if defined SUPPORT_JIT
  39. /* All-in-one: Since we use the JIT compiler only from here,
  40. we just include it. This way we don't need to touch the build
  41. system files. */
  42. #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
  43. #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
  44. #define SLJIT_CONFIG_AUTO 1
  45. #define SLJIT_CONFIG_STATIC 1
  46. #define SLJIT_VERBOSE 0
  47. #define SLJIT_DEBUG 0
  48. #include "sljit/sljitLir.c"
  49. #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
  50. #error Unsupported architecture
  51. #endif
  52. /* Defines for debugging purposes. */
  53. /* 1 - Use unoptimized capturing brackets.
  54. 2 - Enable capture_last_ptr (includes option 1). */
  55. /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
  56. /* 1 - Always have a control head. */
  57. /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
  58. /* Allocate memory for the regex stack on the real machine stack.
  59. Fast, but limited size. */
  60. #define MACHINE_STACK_SIZE 32768
  61. /* Growth rate for stack allocated by the OS. Should be the multiply
  62. of page size. */
  63. #define STACK_GROWTH_RATE 8192
  64. /* Enable to check that the allocation could destroy temporaries. */
  65. #if defined SLJIT_DEBUG && SLJIT_DEBUG
  66. #define DESTROY_REGISTERS 1
  67. #endif
  68. /*
  69. Short summary about the backtracking mechanism empolyed by the jit code generator:
  70. The code generator follows the recursive nature of the PERL compatible regular
  71. expressions. The basic blocks of regular expressions are condition checkers
  72. whose execute different commands depending on the result of the condition check.
  73. The relationship between the operators can be horizontal (concatenation) and
  74. vertical (sub-expression) (See struct backtrack_common for more details).
  75. 'ab' - 'a' and 'b' regexps are concatenated
  76. 'a+' - 'a' is the sub-expression of the '+' operator
  77. The condition checkers are boolean (true/false) checkers. Machine code is generated
  78. for the checker itself and for the actions depending on the result of the checker.
  79. The 'true' case is called as the matching path (expected path), and the other is called as
  80. the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
  81. branches on the matching path.
  82. Greedy star operator (*) :
  83. Matching path: match happens.
  84. Backtrack path: match failed.
  85. Non-greedy star operator (*?) :
  86. Matching path: no need to perform a match.
  87. Backtrack path: match is required.
  88. The following example shows how the code generated for a capturing bracket
  89. with two alternatives. Let A, B, C, D are arbirary regular expressions, and
  90. we have the following regular expression:
  91. A(B|C)D
  92. The generated code will be the following:
  93. A matching path
  94. '(' matching path (pushing arguments to the stack)
  95. B matching path
  96. ')' matching path (pushing arguments to the stack)
  97. D matching path
  98. return with successful match
  99. D backtrack path
  100. ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
  101. B backtrack path
  102. C expected path
  103. jump to D matching path
  104. C backtrack path
  105. A backtrack path
  106. Notice, that the order of backtrack code paths are the opposite of the fast
  107. code paths. In this way the topmost value on the stack is always belong
  108. to the current backtrack code path. The backtrack path must check
  109. whether there is a next alternative. If so, it needs to jump back to
  110. the matching path eventually. Otherwise it needs to clear out its own stack
  111. frame and continue the execution on the backtrack code paths.
  112. */
  113. /*
  114. Saved stack frames:
  115. Atomic blocks and asserts require reloading the values of private data
  116. when the backtrack mechanism performed. Because of OP_RECURSE, the data
  117. are not necessarly known in compile time, thus we need a dynamic restore
  118. mechanism.
  119. The stack frames are stored in a chain list, and have the following format:
  120. ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
  121. Thus we can restore the private data to a particular point in the stack.
  122. */
  123. typedef struct jit_arguments {
  124. /* Pointers first. */
  125. struct sljit_stack *stack;
  126. const pcre_uchar *str;
  127. const pcre_uchar *begin;
  128. const pcre_uchar *end;
  129. int *offsets;
  130. pcre_uchar *mark_ptr;
  131. void *callout_data;
  132. /* Everything else after. */
  133. sljit_u32 limit_match;
  134. int real_offset_count;
  135. int offset_count;
  136. sljit_u8 notbol;
  137. sljit_u8 noteol;
  138. sljit_u8 notempty;
  139. sljit_u8 notempty_atstart;
  140. } jit_arguments;
  141. typedef struct executable_functions {
  142. void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
  143. void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
  144. sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
  145. PUBL(jit_callback) callback;
  146. void *userdata;
  147. sljit_u32 top_bracket;
  148. sljit_u32 limit_match;
  149. } executable_functions;
  150. typedef struct jump_list {
  151. struct sljit_jump *jump;
  152. struct jump_list *next;
  153. } jump_list;
  154. typedef struct stub_list {
  155. struct sljit_jump *start;
  156. struct sljit_label *quit;
  157. struct stub_list *next;
  158. } stub_list;
  159. typedef struct label_addr_list {
  160. struct sljit_label *label;
  161. sljit_uw *update_addr;
  162. struct label_addr_list *next;
  163. } label_addr_list;
  164. enum frame_types {
  165. no_frame = -1,
  166. no_stack = -2
  167. };
  168. enum control_types {
  169. type_mark = 0,
  170. type_then_trap = 1
  171. };
  172. typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
  173. /* The following structure is the key data type for the recursive
  174. code generator. It is allocated by compile_matchingpath, and contains
  175. the arguments for compile_backtrackingpath. Must be the first member
  176. of its descendants. */
  177. typedef struct backtrack_common {
  178. /* Concatenation stack. */
  179. struct backtrack_common *prev;
  180. jump_list *nextbacktracks;
  181. /* Internal stack (for component operators). */
  182. struct backtrack_common *top;
  183. jump_list *topbacktracks;
  184. /* Opcode pointer. */
  185. pcre_uchar *cc;
  186. } backtrack_common;
  187. typedef struct assert_backtrack {
  188. backtrack_common common;
  189. jump_list *condfailed;
  190. /* Less than 0 if a frame is not needed. */
  191. int framesize;
  192. /* Points to our private memory word on the stack. */
  193. int private_data_ptr;
  194. /* For iterators. */
  195. struct sljit_label *matchingpath;
  196. } assert_backtrack;
  197. typedef struct bracket_backtrack {
  198. backtrack_common common;
  199. /* Where to coninue if an alternative is successfully matched. */
  200. struct sljit_label *alternative_matchingpath;
  201. /* For rmin and rmax iterators. */
  202. struct sljit_label *recursive_matchingpath;
  203. /* For greedy ? operator. */
  204. struct sljit_label *zero_matchingpath;
  205. /* Contains the branches of a failed condition. */
  206. union {
  207. /* Both for OP_COND, OP_SCOND. */
  208. jump_list *condfailed;
  209. assert_backtrack *assert;
  210. /* For OP_ONCE. Less than 0 if not needed. */
  211. int framesize;
  212. } u;
  213. /* Points to our private memory word on the stack. */
  214. int private_data_ptr;
  215. } bracket_backtrack;
  216. typedef struct bracketpos_backtrack {
  217. backtrack_common common;
  218. /* Points to our private memory word on the stack. */
  219. int private_data_ptr;
  220. /* Reverting stack is needed. */
  221. int framesize;
  222. /* Allocated stack size. */
  223. int stacksize;
  224. } bracketpos_backtrack;
  225. typedef struct braminzero_backtrack {
  226. backtrack_common common;
  227. struct sljit_label *matchingpath;
  228. } braminzero_backtrack;
  229. typedef struct char_iterator_backtrack {
  230. backtrack_common common;
  231. /* Next iteration. */
  232. struct sljit_label *matchingpath;
  233. union {
  234. jump_list *backtracks;
  235. struct {
  236. unsigned int othercasebit;
  237. pcre_uchar chr;
  238. BOOL enabled;
  239. } charpos;
  240. } u;
  241. } char_iterator_backtrack;
  242. typedef struct ref_iterator_backtrack {
  243. backtrack_common common;
  244. /* Next iteration. */
  245. struct sljit_label *matchingpath;
  246. } ref_iterator_backtrack;
  247. typedef struct recurse_entry {
  248. struct recurse_entry *next;
  249. /* Contains the function entry. */
  250. struct sljit_label *entry;
  251. /* Collects the calls until the function is not created. */
  252. jump_list *calls;
  253. /* Points to the starting opcode. */
  254. sljit_sw start;
  255. } recurse_entry;
  256. typedef struct recurse_backtrack {
  257. backtrack_common common;
  258. BOOL inlined_pattern;
  259. } recurse_backtrack;
  260. #define OP_THEN_TRAP OP_TABLE_LENGTH
  261. typedef struct then_trap_backtrack {
  262. backtrack_common common;
  263. /* If then_trap is not NULL, this structure contains the real
  264. then_trap for the backtracking path. */
  265. struct then_trap_backtrack *then_trap;
  266. /* Points to the starting opcode. */
  267. sljit_sw start;
  268. /* Exit point for the then opcodes of this alternative. */
  269. jump_list *quit;
  270. /* Frame size of the current alternative. */
  271. int framesize;
  272. } then_trap_backtrack;
  273. #define MAX_RANGE_SIZE 4
  274. typedef struct compiler_common {
  275. /* The sljit ceneric compiler. */
  276. struct sljit_compiler *compiler;
  277. /* First byte code. */
  278. pcre_uchar *start;
  279. /* Maps private data offset to each opcode. */
  280. sljit_s32 *private_data_ptrs;
  281. /* Chain list of read-only data ptrs. */
  282. void *read_only_data_head;
  283. /* Tells whether the capturing bracket is optimized. */
  284. sljit_u8 *optimized_cbracket;
  285. /* Tells whether the starting offset is a target of then. */
  286. sljit_u8 *then_offsets;
  287. /* Current position where a THEN must jump. */
  288. then_trap_backtrack *then_trap;
  289. /* Starting offset of private data for capturing brackets. */
  290. sljit_s32 cbra_ptr;
  291. /* Output vector starting point. Must be divisible by 2. */
  292. sljit_s32 ovector_start;
  293. /* Points to the starting character of the current match. */
  294. sljit_s32 start_ptr;
  295. /* Last known position of the requested byte. */
  296. sljit_s32 req_char_ptr;
  297. /* Head of the last recursion. */
  298. sljit_s32 recursive_head_ptr;
  299. /* First inspected character for partial matching.
  300. (Needed for avoiding zero length partial matches.) */
  301. sljit_s32 start_used_ptr;
  302. /* Starting pointer for partial soft matches. */
  303. sljit_s32 hit_start;
  304. /* Pointer of the match end position. */
  305. sljit_s32 match_end_ptr;
  306. /* Points to the marked string. */
  307. sljit_s32 mark_ptr;
  308. /* Recursive control verb management chain. */
  309. sljit_s32 control_head_ptr;
  310. /* Points to the last matched capture block index. */
  311. sljit_s32 capture_last_ptr;
  312. /* Fast forward skipping byte code pointer. */
  313. pcre_uchar *fast_forward_bc_ptr;
  314. /* Locals used by fast fail optimization. */
  315. sljit_s32 fast_fail_start_ptr;
  316. sljit_s32 fast_fail_end_ptr;
  317. /* Flipped and lower case tables. */
  318. const sljit_u8 *fcc;
  319. sljit_sw lcc;
  320. /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
  321. int mode;
  322. /* TRUE, when minlength is greater than 0. */
  323. BOOL might_be_empty;
  324. /* \K is found in the pattern. */
  325. BOOL has_set_som;
  326. /* (*SKIP:arg) is found in the pattern. */
  327. BOOL has_skip_arg;
  328. /* (*THEN) is found in the pattern. */
  329. BOOL has_then;
  330. /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
  331. BOOL has_skip_in_assert_back;
  332. /* Currently in recurse or negative assert. */
  333. BOOL local_exit;
  334. /* Currently in a positive assert. */
  335. BOOL positive_assert;
  336. /* Newline control. */
  337. int nltype;
  338. sljit_u32 nlmax;
  339. sljit_u32 nlmin;
  340. int newline;
  341. int bsr_nltype;
  342. sljit_u32 bsr_nlmax;
  343. sljit_u32 bsr_nlmin;
  344. /* Dollar endonly. */
  345. int endonly;
  346. /* Tables. */
  347. sljit_sw ctypes;
  348. /* Named capturing brackets. */
  349. pcre_uchar *name_table;
  350. sljit_sw name_count;
  351. sljit_sw name_entry_size;
  352. /* Labels and jump lists. */
  353. struct sljit_label *partialmatchlabel;
  354. struct sljit_label *quit_label;
  355. struct sljit_label *forced_quit_label;
  356. struct sljit_label *accept_label;
  357. struct sljit_label *ff_newline_shortcut;
  358. stub_list *stubs;
  359. label_addr_list *label_addrs;
  360. recurse_entry *entries;
  361. recurse_entry *currententry;
  362. jump_list *partialmatch;
  363. jump_list *quit;
  364. jump_list *positive_assert_quit;
  365. jump_list *forced_quit;
  366. jump_list *accept;
  367. jump_list *calllimit;
  368. jump_list *stackalloc;
  369. jump_list *revertframes;
  370. jump_list *wordboundary;
  371. jump_list *anynewline;
  372. jump_list *hspace;
  373. jump_list *vspace;
  374. jump_list *casefulcmp;
  375. jump_list *caselesscmp;
  376. jump_list *reset_match;
  377. BOOL jscript_compat;
  378. #ifdef SUPPORT_UTF
  379. BOOL utf;
  380. #ifdef SUPPORT_UCP
  381. BOOL use_ucp;
  382. jump_list *getucd;
  383. #endif
  384. #ifdef COMPILE_PCRE8
  385. jump_list *utfreadchar;
  386. jump_list *utfreadchar16;
  387. jump_list *utfreadtype8;
  388. #endif
  389. #endif /* SUPPORT_UTF */
  390. } compiler_common;
  391. /* For byte_sequence_compare. */
  392. typedef struct compare_context {
  393. int length;
  394. int sourcereg;
  395. #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
  396. int ucharptr;
  397. union {
  398. sljit_s32 asint;
  399. sljit_u16 asushort;
  400. #if defined COMPILE_PCRE8
  401. sljit_u8 asbyte;
  402. sljit_u8 asuchars[4];
  403. #elif defined COMPILE_PCRE16
  404. sljit_u16 asuchars[2];
  405. #elif defined COMPILE_PCRE32
  406. sljit_u32 asuchars[1];
  407. #endif
  408. } c;
  409. union {
  410. sljit_s32 asint;
  411. sljit_u16 asushort;
  412. #if defined COMPILE_PCRE8
  413. sljit_u8 asbyte;
  414. sljit_u8 asuchars[4];
  415. #elif defined COMPILE_PCRE16
  416. sljit_u16 asuchars[2];
  417. #elif defined COMPILE_PCRE32
  418. sljit_u32 asuchars[1];
  419. #endif
  420. } oc;
  421. #endif
  422. } compare_context;
  423. /* Undefine sljit macros. */
  424. #undef CMP
  425. /* Used for accessing the elements of the stack. */
  426. #define STACK(i) ((i) * (int)sizeof(sljit_sw))
  427. #ifdef SLJIT_PREF_SHIFT_REG
  428. #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
  429. /* Nothing. */
  430. #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
  431. #define SHIFT_REG_IS_R3
  432. #else
  433. #error "Unsupported shift register"
  434. #endif
  435. #endif
  436. #define TMP1 SLJIT_R0
  437. #ifdef SHIFT_REG_IS_R3
  438. #define TMP2 SLJIT_R3
  439. #define TMP3 SLJIT_R2
  440. #else
  441. #define TMP2 SLJIT_R2
  442. #define TMP3 SLJIT_R3
  443. #endif
  444. #define STR_PTR SLJIT_S0
  445. #define STR_END SLJIT_S1
  446. #define STACK_TOP SLJIT_R1
  447. #define STACK_LIMIT SLJIT_S2
  448. #define COUNT_MATCH SLJIT_S3
  449. #define ARGUMENTS SLJIT_S4
  450. #define RETURN_ADDR SLJIT_R4
  451. /* Local space layout. */
  452. /* These two locals can be used by the current opcode. */
  453. #define LOCALS0 (0 * sizeof(sljit_sw))
  454. #define LOCALS1 (1 * sizeof(sljit_sw))
  455. /* Two local variables for possessive quantifiers (char1 cannot use them). */
  456. #define POSSESSIVE0 (2 * sizeof(sljit_sw))
  457. #define POSSESSIVE1 (3 * sizeof(sljit_sw))
  458. /* Max limit of recursions. */
  459. #define LIMIT_MATCH (4 * sizeof(sljit_sw))
  460. /* The output vector is stored on the stack, and contains pointers
  461. to characters. The vector data is divided into two groups: the first
  462. group contains the start / end character pointers, and the second is
  463. the start pointers when the end of the capturing group has not yet reached. */
  464. #define OVECTOR_START (common->ovector_start)
  465. #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
  466. #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
  467. #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
  468. #if defined COMPILE_PCRE8
  469. #define MOV_UCHAR SLJIT_MOV_U8
  470. #elif defined COMPILE_PCRE16
  471. #define MOV_UCHAR SLJIT_MOV_U16
  472. #elif defined COMPILE_PCRE32
  473. #define MOV_UCHAR SLJIT_MOV_U32
  474. #else
  475. #error Unsupported compiling mode
  476. #endif
  477. /* Shortcuts. */
  478. #define DEFINE_COMPILER \
  479. struct sljit_compiler *compiler = common->compiler
  480. #define OP1(op, dst, dstw, src, srcw) \
  481. sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
  482. #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
  483. sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
  484. #define LABEL() \
  485. sljit_emit_label(compiler)
  486. #define JUMP(type) \
  487. sljit_emit_jump(compiler, (type))
  488. #define JUMPTO(type, label) \
  489. sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
  490. #define JUMPHERE(jump) \
  491. sljit_set_label((jump), sljit_emit_label(compiler))
  492. #define SET_LABEL(jump, label) \
  493. sljit_set_label((jump), (label))
  494. #define CMP(type, src1, src1w, src2, src2w) \
  495. sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
  496. #define CMPTO(type, src1, src1w, src2, src2w, label) \
  497. sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
  498. #define OP_FLAGS(op, dst, dstw, type) \
  499. sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
  500. #define GET_LOCAL_BASE(dst, dstw, offset) \
  501. sljit_get_local_base(compiler, (dst), (dstw), (offset))
  502. #define READ_CHAR_MAX 0x7fffffff
  503. #define INVALID_UTF_CHAR 888
  504. static pcre_uchar *bracketend(pcre_uchar *cc)
  505. {
  506. SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
  507. do cc += GET(cc, 1); while (*cc == OP_ALT);
  508. SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
  509. cc += 1 + LINK_SIZE;
  510. return cc;
  511. }
  512. static int no_alternatives(pcre_uchar *cc)
  513. {
  514. int count = 0;
  515. SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
  516. do
  517. {
  518. cc += GET(cc, 1);
  519. count++;
  520. }
  521. while (*cc == OP_ALT);
  522. SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
  523. return count;
  524. }
  525. /* Functions whose might need modification for all new supported opcodes:
  526. next_opcode
  527. check_opcode_types
  528. set_private_data_ptrs
  529. get_framesize
  530. init_frame
  531. get_private_data_copy_length
  532. copy_private_data
  533. compile_matchingpath
  534. compile_backtrackingpath
  535. */
  536. static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
  537. {
  538. SLJIT_UNUSED_ARG(common);
  539. switch(*cc)
  540. {
  541. case OP_SOD:
  542. case OP_SOM:
  543. case OP_SET_SOM:
  544. case OP_NOT_WORD_BOUNDARY:
  545. case OP_WORD_BOUNDARY:
  546. case OP_NOT_DIGIT:
  547. case OP_DIGIT:
  548. case OP_NOT_WHITESPACE:
  549. case OP_WHITESPACE:
  550. case OP_NOT_WORDCHAR:
  551. case OP_WORDCHAR:
  552. case OP_ANY:
  553. case OP_ALLANY:
  554. case OP_NOTPROP:
  555. case OP_PROP:
  556. case OP_ANYNL:
  557. case OP_NOT_HSPACE:
  558. case OP_HSPACE:
  559. case OP_NOT_VSPACE:
  560. case OP_VSPACE:
  561. case OP_EXTUNI:
  562. case OP_EODN:
  563. case OP_EOD:
  564. case OP_CIRC:
  565. case OP_CIRCM:
  566. case OP_DOLL:
  567. case OP_DOLLM:
  568. case OP_CRSTAR:
  569. case OP_CRMINSTAR:
  570. case OP_CRPLUS:
  571. case OP_CRMINPLUS:
  572. case OP_CRQUERY:
  573. case OP_CRMINQUERY:
  574. case OP_CRRANGE:
  575. case OP_CRMINRANGE:
  576. case OP_CRPOSSTAR:
  577. case OP_CRPOSPLUS:
  578. case OP_CRPOSQUERY:
  579. case OP_CRPOSRANGE:
  580. case OP_CLASS:
  581. case OP_NCLASS:
  582. case OP_REF:
  583. case OP_REFI:
  584. case OP_DNREF:
  585. case OP_DNREFI:
  586. case OP_RECURSE:
  587. case OP_CALLOUT:
  588. case OP_ALT:
  589. case OP_KET:
  590. case OP_KETRMAX:
  591. case OP_KETRMIN:
  592. case OP_KETRPOS:
  593. case OP_REVERSE:
  594. case OP_ASSERT:
  595. case OP_ASSERT_NOT:
  596. case OP_ASSERTBACK:
  597. case OP_ASSERTBACK_NOT:
  598. case OP_ONCE:
  599. case OP_ONCE_NC:
  600. case OP_BRA:
  601. case OP_BRAPOS:
  602. case OP_CBRA:
  603. case OP_CBRAPOS:
  604. case OP_COND:
  605. case OP_SBRA:
  606. case OP_SBRAPOS:
  607. case OP_SCBRA:
  608. case OP_SCBRAPOS:
  609. case OP_SCOND:
  610. case OP_CREF:
  611. case OP_DNCREF:
  612. case OP_RREF:
  613. case OP_DNRREF:
  614. case OP_DEF:
  615. case OP_BRAZERO:
  616. case OP_BRAMINZERO:
  617. case OP_BRAPOSZERO:
  618. case OP_PRUNE:
  619. case OP_SKIP:
  620. case OP_THEN:
  621. case OP_COMMIT:
  622. case OP_FAIL:
  623. case OP_ACCEPT:
  624. case OP_ASSERT_ACCEPT:
  625. case OP_CLOSE:
  626. case OP_SKIPZERO:
  627. return cc + PRIV(OP_lengths)[*cc];
  628. case OP_CHAR:
  629. case OP_CHARI:
  630. case OP_NOT:
  631. case OP_NOTI:
  632. case OP_STAR:
  633. case OP_MINSTAR:
  634. case OP_PLUS:
  635. case OP_MINPLUS:
  636. case OP_QUERY:
  637. case OP_MINQUERY:
  638. case OP_UPTO:
  639. case OP_MINUPTO:
  640. case OP_EXACT:
  641. case OP_POSSTAR:
  642. case OP_POSPLUS:
  643. case OP_POSQUERY:
  644. case OP_POSUPTO:
  645. case OP_STARI:
  646. case OP_MINSTARI:
  647. case OP_PLUSI:
  648. case OP_MINPLUSI:
  649. case OP_QUERYI:
  650. case OP_MINQUERYI:
  651. case OP_UPTOI:
  652. case OP_MINUPTOI:
  653. case OP_EXACTI:
  654. case OP_POSSTARI:
  655. case OP_POSPLUSI:
  656. case OP_POSQUERYI:
  657. case OP_POSUPTOI:
  658. case OP_NOTSTAR:
  659. case OP_NOTMINSTAR:
  660. case OP_NOTPLUS:
  661. case OP_NOTMINPLUS:
  662. case OP_NOTQUERY:
  663. case OP_NOTMINQUERY:
  664. case OP_NOTUPTO:
  665. case OP_NOTMINUPTO:
  666. case OP_NOTEXACT:
  667. case OP_NOTPOSSTAR:
  668. case OP_NOTPOSPLUS:
  669. case OP_NOTPOSQUERY:
  670. case OP_NOTPOSUPTO:
  671. case OP_NOTSTARI:
  672. case OP_NOTMINSTARI:
  673. case OP_NOTPLUSI:
  674. case OP_NOTMINPLUSI:
  675. case OP_NOTQUERYI:
  676. case OP_NOTMINQUERYI:
  677. case OP_NOTUPTOI:
  678. case OP_NOTMINUPTOI:
  679. case OP_NOTEXACTI:
  680. case OP_NOTPOSSTARI:
  681. case OP_NOTPOSPLUSI:
  682. case OP_NOTPOSQUERYI:
  683. case OP_NOTPOSUPTOI:
  684. cc += PRIV(OP_lengths)[*cc];
  685. #ifdef SUPPORT_UTF
  686. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  687. #endif
  688. return cc;
  689. /* Special cases. */
  690. case OP_TYPESTAR:
  691. case OP_TYPEMINSTAR:
  692. case OP_TYPEPLUS:
  693. case OP_TYPEMINPLUS:
  694. case OP_TYPEQUERY:
  695. case OP_TYPEMINQUERY:
  696. case OP_TYPEUPTO:
  697. case OP_TYPEMINUPTO:
  698. case OP_TYPEEXACT:
  699. case OP_TYPEPOSSTAR:
  700. case OP_TYPEPOSPLUS:
  701. case OP_TYPEPOSQUERY:
  702. case OP_TYPEPOSUPTO:
  703. return cc + PRIV(OP_lengths)[*cc] - 1;
  704. case OP_ANYBYTE:
  705. #ifdef SUPPORT_UTF
  706. if (common->utf) return NULL;
  707. #endif
  708. return cc + 1;
  709. #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
  710. case OP_XCLASS:
  711. return cc + GET(cc, 1);
  712. #endif
  713. case OP_MARK:
  714. case OP_PRUNE_ARG:
  715. case OP_SKIP_ARG:
  716. case OP_THEN_ARG:
  717. return cc + 1 + 2 + cc[1];
  718. default:
  719. /* All opcodes are supported now! */
  720. SLJIT_UNREACHABLE();
  721. return NULL;
  722. }
  723. }
  724. static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
  725. {
  726. int count;
  727. pcre_uchar *slot;
  728. pcre_uchar *assert_back_end = cc - 1;
  729. /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
  730. while (cc < ccend)
  731. {
  732. switch(*cc)
  733. {
  734. case OP_SET_SOM:
  735. common->has_set_som = TRUE;
  736. common->might_be_empty = TRUE;
  737. cc += 1;
  738. break;
  739. case OP_REF:
  740. case OP_REFI:
  741. common->optimized_cbracket[GET2(cc, 1)] = 0;
  742. cc += 1 + IMM2_SIZE;
  743. break;
  744. case OP_CBRAPOS:
  745. case OP_SCBRAPOS:
  746. common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
  747. cc += 1 + LINK_SIZE + IMM2_SIZE;
  748. break;
  749. case OP_COND:
  750. case OP_SCOND:
  751. /* Only AUTO_CALLOUT can insert this opcode. We do
  752. not intend to support this case. */
  753. if (cc[1 + LINK_SIZE] == OP_CALLOUT)
  754. return FALSE;
  755. cc += 1 + LINK_SIZE;
  756. break;
  757. case OP_CREF:
  758. common->optimized_cbracket[GET2(cc, 1)] = 0;
  759. cc += 1 + IMM2_SIZE;
  760. break;
  761. case OP_DNREF:
  762. case OP_DNREFI:
  763. case OP_DNCREF:
  764. count = GET2(cc, 1 + IMM2_SIZE);
  765. slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
  766. while (count-- > 0)
  767. {
  768. common->optimized_cbracket[GET2(slot, 0)] = 0;
  769. slot += common->name_entry_size;
  770. }
  771. cc += 1 + 2 * IMM2_SIZE;
  772. break;
  773. case OP_RECURSE:
  774. /* Set its value only once. */
  775. if (common->recursive_head_ptr == 0)
  776. {
  777. common->recursive_head_ptr = common->ovector_start;
  778. common->ovector_start += sizeof(sljit_sw);
  779. }
  780. cc += 1 + LINK_SIZE;
  781. break;
  782. case OP_CALLOUT:
  783. if (common->capture_last_ptr == 0)
  784. {
  785. common->capture_last_ptr = common->ovector_start;
  786. common->ovector_start += sizeof(sljit_sw);
  787. }
  788. cc += 2 + 2 * LINK_SIZE;
  789. break;
  790. case OP_ASSERTBACK:
  791. slot = bracketend(cc);
  792. if (slot > assert_back_end)
  793. assert_back_end = slot;
  794. cc += 1 + LINK_SIZE;
  795. break;
  796. case OP_THEN_ARG:
  797. common->has_then = TRUE;
  798. common->control_head_ptr = 1;
  799. /* Fall through. */
  800. case OP_PRUNE_ARG:
  801. case OP_MARK:
  802. if (common->mark_ptr == 0)
  803. {
  804. common->mark_ptr = common->ovector_start;
  805. common->ovector_start += sizeof(sljit_sw);
  806. }
  807. cc += 1 + 2 + cc[1];
  808. break;
  809. case OP_THEN:
  810. common->has_then = TRUE;
  811. common->control_head_ptr = 1;
  812. cc += 1;
  813. break;
  814. case OP_SKIP:
  815. if (cc < assert_back_end)
  816. common->has_skip_in_assert_back = TRUE;
  817. cc += 1;
  818. break;
  819. case OP_SKIP_ARG:
  820. common->control_head_ptr = 1;
  821. common->has_skip_arg = TRUE;
  822. if (cc < assert_back_end)
  823. common->has_skip_in_assert_back = TRUE;
  824. cc += 1 + 2 + cc[1];
  825. break;
  826. default:
  827. cc = next_opcode(common, cc);
  828. if (cc == NULL)
  829. return FALSE;
  830. break;
  831. }
  832. }
  833. return TRUE;
  834. }
  835. static BOOL is_accelerated_repeat(pcre_uchar *cc)
  836. {
  837. switch(*cc)
  838. {
  839. case OP_TYPESTAR:
  840. case OP_TYPEMINSTAR:
  841. case OP_TYPEPLUS:
  842. case OP_TYPEMINPLUS:
  843. case OP_TYPEPOSSTAR:
  844. case OP_TYPEPOSPLUS:
  845. return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
  846. case OP_STAR:
  847. case OP_MINSTAR:
  848. case OP_PLUS:
  849. case OP_MINPLUS:
  850. case OP_POSSTAR:
  851. case OP_POSPLUS:
  852. case OP_STARI:
  853. case OP_MINSTARI:
  854. case OP_PLUSI:
  855. case OP_MINPLUSI:
  856. case OP_POSSTARI:
  857. case OP_POSPLUSI:
  858. case OP_NOTSTAR:
  859. case OP_NOTMINSTAR:
  860. case OP_NOTPLUS:
  861. case OP_NOTMINPLUS:
  862. case OP_NOTPOSSTAR:
  863. case OP_NOTPOSPLUS:
  864. case OP_NOTSTARI:
  865. case OP_NOTMINSTARI:
  866. case OP_NOTPLUSI:
  867. case OP_NOTMINPLUSI:
  868. case OP_NOTPOSSTARI:
  869. case OP_NOTPOSPLUSI:
  870. return TRUE;
  871. case OP_CLASS:
  872. case OP_NCLASS:
  873. #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
  874. case OP_XCLASS:
  875. cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
  876. #else
  877. cc += (1 + (32 / sizeof(pcre_uchar)));
  878. #endif
  879. switch(*cc)
  880. {
  881. case OP_CRSTAR:
  882. case OP_CRMINSTAR:
  883. case OP_CRPLUS:
  884. case OP_CRMINPLUS:
  885. case OP_CRPOSSTAR:
  886. case OP_CRPOSPLUS:
  887. return TRUE;
  888. }
  889. break;
  890. }
  891. return FALSE;
  892. }
  893. static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
  894. {
  895. pcre_uchar *cc = common->start;
  896. pcre_uchar *end;
  897. /* Skip not repeated brackets. */
  898. while (TRUE)
  899. {
  900. switch(*cc)
  901. {
  902. case OP_SOD:
  903. case OP_SOM:
  904. case OP_SET_SOM:
  905. case OP_NOT_WORD_BOUNDARY:
  906. case OP_WORD_BOUNDARY:
  907. case OP_EODN:
  908. case OP_EOD:
  909. case OP_CIRC:
  910. case OP_CIRCM:
  911. case OP_DOLL:
  912. case OP_DOLLM:
  913. /* Zero width assertions. */
  914. cc++;
  915. continue;
  916. }
  917. if (*cc != OP_BRA && *cc != OP_CBRA)
  918. break;
  919. end = cc + GET(cc, 1);
  920. if (*end != OP_KET || PRIVATE_DATA(end) != 0)
  921. return FALSE;
  922. if (*cc == OP_CBRA)
  923. {
  924. if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
  925. return FALSE;
  926. cc += IMM2_SIZE;
  927. }
  928. cc += 1 + LINK_SIZE;
  929. }
  930. if (is_accelerated_repeat(cc))
  931. {
  932. common->fast_forward_bc_ptr = cc;
  933. common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
  934. *private_data_start += sizeof(sljit_sw);
  935. return TRUE;
  936. }
  937. return FALSE;
  938. }
  939. static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
  940. {
  941. pcre_uchar *next_alt;
  942. SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
  943. if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
  944. return;
  945. next_alt = bracketend(cc) - (1 + LINK_SIZE);
  946. if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
  947. return;
  948. do
  949. {
  950. next_alt = cc + GET(cc, 1);
  951. cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
  952. while (TRUE)
  953. {
  954. switch(*cc)
  955. {
  956. case OP_SOD:
  957. case OP_SOM:
  958. case OP_SET_SOM:
  959. case OP_NOT_WORD_BOUNDARY:
  960. case OP_WORD_BOUNDARY:
  961. case OP_EODN:
  962. case OP_EOD:
  963. case OP_CIRC:
  964. case OP_CIRCM:
  965. case OP_DOLL:
  966. case OP_DOLLM:
  967. /* Zero width assertions. */
  968. cc++;
  969. continue;
  970. }
  971. break;
  972. }
  973. if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
  974. detect_fast_fail(common, cc, private_data_start, depth - 1);
  975. if (is_accelerated_repeat(cc))
  976. {
  977. common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
  978. if (common->fast_fail_start_ptr == 0)
  979. common->fast_fail_start_ptr = *private_data_start;
  980. *private_data_start += sizeof(sljit_sw);
  981. common->fast_fail_end_ptr = *private_data_start;
  982. if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
  983. return;
  984. }
  985. cc = next_alt;
  986. }
  987. while (*cc == OP_ALT);
  988. }
  989. static int get_class_iterator_size(pcre_uchar *cc)
  990. {
  991. sljit_u32 min;
  992. sljit_u32 max;
  993. switch(*cc)
  994. {
  995. case OP_CRSTAR:
  996. case OP_CRPLUS:
  997. return 2;
  998. case OP_CRMINSTAR:
  999. case OP_CRMINPLUS:
  1000. case OP_CRQUERY:
  1001. case OP_CRMINQUERY:
  1002. return 1;
  1003. case OP_CRRANGE:
  1004. case OP_CRMINRANGE:
  1005. min = GET2(cc, 1);
  1006. max = GET2(cc, 1 + IMM2_SIZE);
  1007. if (max == 0)
  1008. return (*cc == OP_CRRANGE) ? 2 : 1;
  1009. max -= min;
  1010. if (max > 2)
  1011. max = 2;
  1012. return max;
  1013. default:
  1014. return 0;
  1015. }
  1016. }
  1017. static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
  1018. {
  1019. pcre_uchar *end = bracketend(begin);
  1020. pcre_uchar *next;
  1021. pcre_uchar *next_end;
  1022. pcre_uchar *max_end;
  1023. pcre_uchar type;
  1024. sljit_sw length = end - begin;
  1025. int min, max, i;
  1026. /* Detect fixed iterations first. */
  1027. if (end[-(1 + LINK_SIZE)] != OP_KET)
  1028. return FALSE;
  1029. /* Already detected repeat. */
  1030. if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
  1031. return TRUE;
  1032. next = end;
  1033. min = 1;
  1034. while (1)
  1035. {
  1036. if (*next != *begin)
  1037. break;
  1038. next_end = bracketend(next);
  1039. if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
  1040. break;
  1041. next = next_end;
  1042. min++;
  1043. }
  1044. if (min == 2)
  1045. return FALSE;
  1046. max = 0;
  1047. max_end = next;
  1048. if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
  1049. {
  1050. type = *next;
  1051. while (1)
  1052. {
  1053. if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
  1054. break;
  1055. next_end = bracketend(next + 2 + LINK_SIZE);
  1056. if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
  1057. break;
  1058. next = next_end;
  1059. max++;
  1060. }
  1061. if (next[0] == type && next[1] == *begin && max >= 1)
  1062. {
  1063. next_end = bracketend(next + 1);
  1064. if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
  1065. {
  1066. for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
  1067. if (*next_end != OP_KET)
  1068. break;
  1069. if (i == max)
  1070. {
  1071. common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
  1072. common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
  1073. /* +2 the original and the last. */
  1074. common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
  1075. if (min == 1)
  1076. return TRUE;
  1077. min--;
  1078. max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
  1079. }
  1080. }
  1081. }
  1082. }
  1083. if (min >= 3)
  1084. {
  1085. common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
  1086. common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
  1087. common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
  1088. return TRUE;
  1089. }
  1090. return FALSE;
  1091. }
  1092. #define CASE_ITERATOR_PRIVATE_DATA_1 \
  1093. case OP_MINSTAR: \
  1094. case OP_MINPLUS: \
  1095. case OP_QUERY: \
  1096. case OP_MINQUERY: \
  1097. case OP_MINSTARI: \
  1098. case OP_MINPLUSI: \
  1099. case OP_QUERYI: \
  1100. case OP_MINQUERYI: \
  1101. case OP_NOTMINSTAR: \
  1102. case OP_NOTMINPLUS: \
  1103. case OP_NOTQUERY: \
  1104. case OP_NOTMINQUERY: \
  1105. case OP_NOTMINSTARI: \
  1106. case OP_NOTMINPLUSI: \
  1107. case OP_NOTQUERYI: \
  1108. case OP_NOTMINQUERYI:
  1109. #define CASE_ITERATOR_PRIVATE_DATA_2A \
  1110. case OP_STAR: \
  1111. case OP_PLUS: \
  1112. case OP_STARI: \
  1113. case OP_PLUSI: \
  1114. case OP_NOTSTAR: \
  1115. case OP_NOTPLUS: \
  1116. case OP_NOTSTARI: \
  1117. case OP_NOTPLUSI:
  1118. #define CASE_ITERATOR_PRIVATE_DATA_2B \
  1119. case OP_UPTO: \
  1120. case OP_MINUPTO: \
  1121. case OP_UPTOI: \
  1122. case OP_MINUPTOI: \
  1123. case OP_NOTUPTO: \
  1124. case OP_NOTMINUPTO: \
  1125. case OP_NOTUPTOI: \
  1126. case OP_NOTMINUPTOI:
  1127. #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
  1128. case OP_TYPEMINSTAR: \
  1129. case OP_TYPEMINPLUS: \
  1130. case OP_TYPEQUERY: \
  1131. case OP_TYPEMINQUERY:
  1132. #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
  1133. case OP_TYPESTAR: \
  1134. case OP_TYPEPLUS:
  1135. #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
  1136. case OP_TYPEUPTO: \
  1137. case OP_TYPEMINUPTO:
  1138. static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
  1139. {
  1140. pcre_uchar *cc = common->start;
  1141. pcre_uchar *alternative;
  1142. pcre_uchar *end = NULL;
  1143. int private_data_ptr = *private_data_start;
  1144. int space, size, bracketlen;
  1145. BOOL repeat_check = TRUE;
  1146. while (cc < ccend)
  1147. {
  1148. space = 0;
  1149. size = 0;
  1150. bracketlen = 0;
  1151. if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
  1152. break;
  1153. if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
  1154. {
  1155. if (detect_repeat(common, cc))
  1156. {
  1157. /* These brackets are converted to repeats, so no global
  1158. based single character repeat is allowed. */
  1159. if (cc >= end)
  1160. end = bracketend(cc);
  1161. }
  1162. }
  1163. repeat_check = TRUE;
  1164. switch(*cc)
  1165. {
  1166. case OP_KET:
  1167. if (common->private_data_ptrs[cc + 1 - common->start] != 0)
  1168. {
  1169. common->private_data_ptrs[cc - common->start] = private_data_ptr;
  1170. private_data_ptr += sizeof(sljit_sw);
  1171. cc += common->private_data_ptrs[cc + 1 - common->start];
  1172. }
  1173. cc += 1 + LINK_SIZE;
  1174. break;
  1175. case OP_ASSERT:
  1176. case OP_ASSERT_NOT:
  1177. case OP_ASSERTBACK:
  1178. case OP_ASSERTBACK_NOT:
  1179. case OP_ONCE:
  1180. case OP_ONCE_NC:
  1181. case OP_BRAPOS:
  1182. case OP_SBRA:
  1183. case OP_SBRAPOS:
  1184. case OP_SCOND:
  1185. common->private_data_ptrs[cc - common->start] = private_data_ptr;
  1186. private_data_ptr += sizeof(sljit_sw);
  1187. bracketlen = 1 + LINK_SIZE;
  1188. break;
  1189. case OP_CBRAPOS:
  1190. case OP_SCBRAPOS:
  1191. common->private_data_ptrs[cc - common->start] = private_data_ptr;
  1192. private_data_ptr += sizeof(sljit_sw);
  1193. bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
  1194. break;
  1195. case OP_COND:
  1196. /* Might be a hidden SCOND. */
  1197. alternative = cc + GET(cc, 1);
  1198. if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
  1199. {
  1200. common->private_data_ptrs[cc - common->start] = private_data_ptr;
  1201. private_data_ptr += sizeof(sljit_sw);
  1202. }
  1203. bracketlen = 1 + LINK_SIZE;
  1204. break;
  1205. case OP_BRA:
  1206. bracketlen = 1 + LINK_SIZE;
  1207. break;
  1208. case OP_CBRA:
  1209. case OP_SCBRA:
  1210. bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
  1211. break;
  1212. case OP_BRAZERO:
  1213. case OP_BRAMINZERO:
  1214. case OP_BRAPOSZERO:
  1215. repeat_check = FALSE;
  1216. size = 1;
  1217. break;
  1218. CASE_ITERATOR_PRIVATE_DATA_1
  1219. space = 1;
  1220. size = -2;
  1221. break;
  1222. CASE_ITERATOR_PRIVATE_DATA_2A
  1223. space = 2;
  1224. size = -2;
  1225. break;
  1226. CASE_ITERATOR_PRIVATE_DATA_2B
  1227. space = 2;
  1228. size = -(2 + IMM2_SIZE);
  1229. break;
  1230. CASE_ITERATOR_TYPE_PRIVATE_DATA_1
  1231. space = 1;
  1232. size = 1;
  1233. break;
  1234. CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
  1235. if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
  1236. space = 2;
  1237. size = 1;
  1238. break;
  1239. case OP_TYPEUPTO:
  1240. if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
  1241. space = 2;
  1242. size = 1 + IMM2_SIZE;
  1243. break;
  1244. case OP_TYPEMINUPTO:
  1245. space = 2;
  1246. size = 1 + IMM2_SIZE;
  1247. break;
  1248. case OP_CLASS:
  1249. case OP_NCLASS:
  1250. space = get_class_iterator_size(cc + size);
  1251. size = 1 + 32 / sizeof(pcre_uchar);
  1252. break;
  1253. #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
  1254. case OP_XCLASS:
  1255. space = get_class_iterator_size(cc + size);
  1256. size = GET(cc, 1);
  1257. break;
  1258. #endif
  1259. default:
  1260. cc = next_opcode(common, cc);
  1261. SLJIT_ASSERT(cc != NULL);
  1262. break;
  1263. }
  1264. /* Character iterators, which are not inside a repeated bracket,
  1265. gets a private slot instead of allocating it on the stack. */
  1266. if (space > 0 && cc >= end)
  1267. {
  1268. common->private_data_ptrs[cc - common->start] = private_data_ptr;
  1269. private_data_ptr += sizeof(sljit_sw) * space;
  1270. }
  1271. if (size != 0)
  1272. {
  1273. if (size < 0)
  1274. {
  1275. cc += -size;
  1276. #ifdef SUPPORT_UTF
  1277. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  1278. #endif
  1279. }
  1280. else
  1281. cc += size;
  1282. }
  1283. if (bracketlen > 0)
  1284. {
  1285. if (cc >= end)
  1286. {
  1287. end = bracketend(cc);
  1288. if (end[-1 - LINK_SIZE] == OP_KET)
  1289. end = NULL;
  1290. }
  1291. cc += bracketlen;
  1292. }
  1293. }
  1294. *private_data_start = private_data_ptr;
  1295. }
  1296. /* Returns with a frame_types (always < 0) if no need for frame. */
  1297. static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
  1298. {
  1299. int length = 0;
  1300. int possessive = 0;
  1301. BOOL stack_restore = FALSE;
  1302. BOOL setsom_found = recursive;
  1303. BOOL setmark_found = recursive;
  1304. /* The last capture is a local variable even for recursions. */
  1305. BOOL capture_last_found = FALSE;
  1306. #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
  1307. SLJIT_ASSERT(common->control_head_ptr != 0);
  1308. *needs_control_head = TRUE;
  1309. #else
  1310. *needs_control_head = FALSE;
  1311. #endif
  1312. if (ccend == NULL)
  1313. {
  1314. ccend = bracketend(cc) - (1 + LINK_SIZE);
  1315. if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
  1316. {
  1317. possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
  1318. /* This is correct regardless of common->capture_last_ptr. */
  1319. capture_last_found = TRUE;
  1320. }
  1321. cc = next_opcode(common, cc);
  1322. }
  1323. SLJIT_ASSERT(cc != NULL);
  1324. while (cc < ccend)
  1325. switch(*cc)
  1326. {
  1327. case OP_SET_SOM:
  1328. SLJIT_ASSERT(common->has_set_som);
  1329. stack_restore = TRUE;
  1330. if (!setsom_found)
  1331. {
  1332. length += 2;
  1333. setsom_found = TRUE;
  1334. }
  1335. cc += 1;
  1336. break;
  1337. case OP_MARK:
  1338. case OP_PRUNE_ARG:
  1339. case OP_THEN_ARG:
  1340. SLJIT_ASSERT(common->mark_ptr != 0);
  1341. stack_restore = TRUE;
  1342. if (!setmark_found)
  1343. {
  1344. length += 2;
  1345. setmark_found = TRUE;
  1346. }
  1347. if (common->control_head_ptr != 0)
  1348. *needs_control_head = TRUE;
  1349. cc += 1 + 2 + cc[1];
  1350. break;
  1351. case OP_RECURSE:
  1352. stack_restore = TRUE;
  1353. if (common->has_set_som && !setsom_found)
  1354. {
  1355. length += 2;
  1356. setsom_found = TRUE;
  1357. }
  1358. if (common->mark_ptr != 0 && !setmark_found)
  1359. {
  1360. length += 2;
  1361. setmark_found = TRUE;
  1362. }
  1363. if (common->capture_last_ptr != 0 && !capture_last_found)
  1364. {
  1365. length += 2;
  1366. capture_last_found = TRUE;
  1367. }
  1368. cc += 1 + LINK_SIZE;
  1369. break;
  1370. case OP_CBRA:
  1371. case OP_CBRAPOS:
  1372. case OP_SCBRA:
  1373. case OP_SCBRAPOS:
  1374. stack_restore = TRUE;
  1375. if (common->capture_last_ptr != 0 && !capture_last_found)
  1376. {
  1377. length += 2;
  1378. capture_last_found = TRUE;
  1379. }
  1380. length += 3;
  1381. cc += 1 + LINK_SIZE + IMM2_SIZE;
  1382. break;
  1383. case OP_THEN:
  1384. stack_restore = TRUE;
  1385. if (common->control_head_ptr != 0)
  1386. *needs_control_head = TRUE;
  1387. cc ++;
  1388. break;
  1389. default:
  1390. stack_restore = TRUE;
  1391. /* Fall through. */
  1392. case OP_NOT_WORD_BOUNDARY:
  1393. case OP_WORD_BOUNDARY:
  1394. case OP_NOT_DIGIT:
  1395. case OP_DIGIT:
  1396. case OP_NOT_WHITESPACE:
  1397. case OP_WHITESPACE:
  1398. case OP_NOT_WORDCHAR:
  1399. case OP_WORDCHAR:
  1400. case OP_ANY:
  1401. case OP_ALLANY:
  1402. case OP_ANYBYTE:
  1403. case OP_NOTPROP:
  1404. case OP_PROP:
  1405. case OP_ANYNL:
  1406. case OP_NOT_HSPACE:
  1407. case OP_HSPACE:
  1408. case OP_NOT_VSPACE:
  1409. case OP_VSPACE:
  1410. case OP_EXTUNI:
  1411. case OP_EODN:
  1412. case OP_EOD:
  1413. case OP_CIRC:
  1414. case OP_CIRCM:
  1415. case OP_DOLL:
  1416. case OP_DOLLM:
  1417. case OP_CHAR:
  1418. case OP_CHARI:
  1419. case OP_NOT:
  1420. case OP_NOTI:
  1421. case OP_EXACT:
  1422. case OP_POSSTAR:
  1423. case OP_POSPLUS:
  1424. case OP_POSQUERY:
  1425. case OP_POSUPTO:
  1426. case OP_EXACTI:
  1427. case OP_POSSTARI:
  1428. case OP_POSPLUSI:
  1429. case OP_POSQUERYI:
  1430. case OP_POSUPTOI:
  1431. case OP_NOTEXACT:
  1432. case OP_NOTPOSSTAR:
  1433. case OP_NOTPOSPLUS:
  1434. case OP_NOTPOSQUERY:
  1435. case OP_NOTPOSUPTO:
  1436. case OP_NOTEXACTI:
  1437. case OP_NOTPOSSTARI:
  1438. case OP_NOTPOSPLUSI:
  1439. case OP_NOTPOSQUERYI:
  1440. case OP_NOTPOSUPTOI:
  1441. case OP_TYPEEXACT:
  1442. case OP_TYPEPOSSTAR:
  1443. case OP_TYPEPOSPLUS:
  1444. case OP_TYPEPOSQUERY:
  1445. case OP_TYPEPOSUPTO:
  1446. case OP_CLASS:
  1447. case OP_NCLASS:
  1448. case OP_XCLASS:
  1449. case OP_CALLOUT:
  1450. cc = next_opcode(common, cc);
  1451. SLJIT_ASSERT(cc != NULL);
  1452. break;
  1453. }
  1454. /* Possessive quantifiers can use a special case. */
  1455. if (SLJIT_UNLIKELY(possessive == length))
  1456. return stack_restore ? no_frame : no_stack;
  1457. if (length > 0)
  1458. return length + 1;
  1459. return stack_restore ? no_frame : no_stack;
  1460. }
  1461. static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
  1462. {
  1463. DEFINE_COMPILER;
  1464. BOOL setsom_found = recursive;
  1465. BOOL setmark_found = recursive;
  1466. /* The last capture is a local variable even for recursions. */
  1467. BOOL capture_last_found = FALSE;
  1468. int offset;
  1469. /* >= 1 + shortest item size (2) */
  1470. SLJIT_UNUSED_ARG(stacktop);
  1471. SLJIT_ASSERT(stackpos >= stacktop + 2);
  1472. stackpos = STACK(stackpos);
  1473. if (ccend == NULL)
  1474. {
  1475. ccend = bracketend(cc) - (1 + LINK_SIZE);
  1476. if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
  1477. cc = next_opcode(common, cc);
  1478. }
  1479. SLJIT_ASSERT(cc != NULL);
  1480. while (cc < ccend)
  1481. switch(*cc)
  1482. {
  1483. case OP_SET_SOM:
  1484. SLJIT_ASSERT(common->has_set_som);
  1485. if (!setsom_found)
  1486. {
  1487. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
  1488. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
  1489. stackpos -= (int)sizeof(sljit_sw);
  1490. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1491. stackpos -= (int)sizeof(sljit_sw);
  1492. setsom_found = TRUE;
  1493. }
  1494. cc += 1;
  1495. break;
  1496. case OP_MARK:
  1497. case OP_PRUNE_ARG:
  1498. case OP_THEN_ARG:
  1499. SLJIT_ASSERT(common->mark_ptr != 0);
  1500. if (!setmark_found)
  1501. {
  1502. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
  1503. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
  1504. stackpos -= (int)sizeof(sljit_sw);
  1505. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1506. stackpos -= (int)sizeof(sljit_sw);
  1507. setmark_found = TRUE;
  1508. }
  1509. cc += 1 + 2 + cc[1];
  1510. break;
  1511. case OP_RECURSE:
  1512. if (common->has_set_som && !setsom_found)
  1513. {
  1514. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
  1515. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
  1516. stackpos -= (int)sizeof(sljit_sw);
  1517. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1518. stackpos -= (int)sizeof(sljit_sw);
  1519. setsom_found = TRUE;
  1520. }
  1521. if (common->mark_ptr != 0 && !setmark_found)
  1522. {
  1523. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
  1524. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
  1525. stackpos -= (int)sizeof(sljit_sw);
  1526. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1527. stackpos -= (int)sizeof(sljit_sw);
  1528. setmark_found = TRUE;
  1529. }
  1530. if (common->capture_last_ptr != 0 && !capture_last_found)
  1531. {
  1532. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
  1533. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
  1534. stackpos -= (int)sizeof(sljit_sw);
  1535. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1536. stackpos -= (int)sizeof(sljit_sw);
  1537. capture_last_found = TRUE;
  1538. }
  1539. cc += 1 + LINK_SIZE;
  1540. break;
  1541. case OP_CBRA:
  1542. case OP_CBRAPOS:
  1543. case OP_SCBRA:
  1544. case OP_SCBRAPOS:
  1545. if (common->capture_last_ptr != 0 && !capture_last_found)
  1546. {
  1547. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
  1548. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
  1549. stackpos -= (int)sizeof(sljit_sw);
  1550. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1551. stackpos -= (int)sizeof(sljit_sw);
  1552. capture_last_found = TRUE;
  1553. }
  1554. offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
  1555. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
  1556. stackpos -= (int)sizeof(sljit_sw);
  1557. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  1558. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  1559. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
  1560. stackpos -= (int)sizeof(sljit_sw);
  1561. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
  1562. stackpos -= (int)sizeof(sljit_sw);
  1563. cc += 1 + LINK_SIZE + IMM2_SIZE;
  1564. break;
  1565. default:
  1566. cc = next_opcode(common, cc);
  1567. SLJIT_ASSERT(cc != NULL);
  1568. break;
  1569. }
  1570. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
  1571. SLJIT_ASSERT(stackpos == STACK(stacktop));
  1572. }
  1573. static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
  1574. {
  1575. int private_data_length = needs_control_head ? 3 : 2;
  1576. int size;
  1577. pcre_uchar *alternative;
  1578. /* Calculate the sum of the private machine words. */
  1579. while (cc < ccend)
  1580. {
  1581. size = 0;
  1582. switch(*cc)
  1583. {
  1584. case OP_KET:
  1585. if (PRIVATE_DATA(cc) != 0)
  1586. {
  1587. private_data_length++;
  1588. SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
  1589. cc += PRIVATE_DATA(cc + 1);
  1590. }
  1591. cc += 1 + LINK_SIZE;
  1592. break;
  1593. case OP_ASSERT:
  1594. case OP_ASSERT_NOT:
  1595. case OP_ASSERTBACK:
  1596. case OP_ASSERTBACK_NOT:
  1597. case OP_ONCE:
  1598. case OP_ONCE_NC:
  1599. case OP_BRAPOS:
  1600. case OP_SBRA:
  1601. case OP_SBRAPOS:
  1602. case OP_SCOND:
  1603. private_data_length++;
  1604. SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
  1605. cc += 1 + LINK_SIZE;
  1606. break;
  1607. case OP_CBRA:
  1608. case OP_SCBRA:
  1609. if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
  1610. private_data_length++;
  1611. cc += 1 + LINK_SIZE + IMM2_SIZE;
  1612. break;
  1613. case OP_CBRAPOS:
  1614. case OP_SCBRAPOS:
  1615. private_data_length += 2;
  1616. cc += 1 + LINK_SIZE + IMM2_SIZE;
  1617. break;
  1618. case OP_COND:
  1619. /* Might be a hidden SCOND. */
  1620. alternative = cc + GET(cc, 1);
  1621. if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
  1622. private_data_length++;
  1623. cc += 1 + LINK_SIZE;
  1624. break;
  1625. CASE_ITERATOR_PRIVATE_DATA_1
  1626. if (PRIVATE_DATA(cc))
  1627. private_data_length++;
  1628. cc += 2;
  1629. #ifdef SUPPORT_UTF
  1630. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  1631. #endif
  1632. break;
  1633. CASE_ITERATOR_PRIVATE_DATA_2A
  1634. if (PRIVATE_DATA(cc))
  1635. private_data_length += 2;
  1636. cc += 2;
  1637. #ifdef SUPPORT_UTF
  1638. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  1639. #endif
  1640. break;
  1641. CASE_ITERATOR_PRIVATE_DATA_2B
  1642. if (PRIVATE_DATA(cc))
  1643. private_data_length += 2;
  1644. cc += 2 + IMM2_SIZE;
  1645. #ifdef SUPPORT_UTF
  1646. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  1647. #endif
  1648. break;
  1649. CASE_ITERATOR_TYPE_PRIVATE_DATA_1
  1650. if (PRIVATE_DATA(cc))
  1651. private_data_length++;
  1652. cc += 1;
  1653. break;
  1654. CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
  1655. if (PRIVATE_DATA(cc))
  1656. private_data_length += 2;
  1657. cc += 1;
  1658. break;
  1659. CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
  1660. if (PRIVATE_DATA(cc))
  1661. private_data_length += 2;
  1662. cc += 1 + IMM2_SIZE;
  1663. break;
  1664. case OP_CLASS:
  1665. case OP_NCLASS:
  1666. #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
  1667. case OP_XCLASS:
  1668. size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
  1669. #else
  1670. size = 1 + 32 / (int)sizeof(pcre_uchar);
  1671. #endif
  1672. if (PRIVATE_DATA(cc))
  1673. private_data_length += get_class_iterator_size(cc + size);
  1674. cc += size;
  1675. break;
  1676. default:
  1677. cc = next_opcode(common, cc);
  1678. SLJIT_ASSERT(cc != NULL);
  1679. break;
  1680. }
  1681. }
  1682. SLJIT_ASSERT(cc == ccend);
  1683. return private_data_length;
  1684. }
  1685. static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
  1686. BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
  1687. {
  1688. DEFINE_COMPILER;
  1689. int srcw[2];
  1690. int count, size;
  1691. BOOL tmp1next = TRUE;
  1692. BOOL tmp1empty = TRUE;
  1693. BOOL tmp2empty = TRUE;
  1694. pcre_uchar *alternative;
  1695. enum {
  1696. loop,
  1697. end
  1698. } status;
  1699. status = loop;
  1700. stackptr = STACK(stackptr);
  1701. stacktop = STACK(stacktop - 1);
  1702. if (!save)
  1703. {
  1704. stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
  1705. if (stackptr < stacktop)
  1706. {
  1707. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
  1708. stackptr += sizeof(sljit_sw);
  1709. tmp1empty = FALSE;
  1710. }
  1711. if (stackptr < stacktop)
  1712. {
  1713. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
  1714. stackptr += sizeof(sljit_sw);
  1715. tmp2empty = FALSE;
  1716. }
  1717. /* The tmp1next must be TRUE in either way. */
  1718. }
  1719. SLJIT_ASSERT(common->recursive_head_ptr != 0);
  1720. do
  1721. {
  1722. count = 0;
  1723. if (cc >= ccend)
  1724. {
  1725. if (!save)
  1726. break;
  1727. count = 1;
  1728. srcw[0] = common->recursive_head_ptr;
  1729. if (needs_control_head)
  1730. {
  1731. SLJIT_ASSERT(common->control_head_ptr != 0);
  1732. count = 2;
  1733. srcw[0] = common->control_head_ptr;
  1734. srcw[1] = common->recursive_head_ptr;
  1735. }
  1736. status = end;
  1737. }
  1738. else switch(*cc)
  1739. {
  1740. case OP_KET:
  1741. if (PRIVATE_DATA(cc) != 0)
  1742. {
  1743. count = 1;
  1744. srcw[0] = PRIVATE_DATA(cc);
  1745. SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
  1746. cc += PRIVATE_DATA(cc + 1);
  1747. }
  1748. cc += 1 + LINK_SIZE;
  1749. break;
  1750. case OP_ASSERT:
  1751. case OP_ASSERT_NOT:
  1752. case OP_ASSERTBACK:
  1753. case OP_ASSERTBACK_NOT:
  1754. case OP_ONCE:
  1755. case OP_ONCE_NC:
  1756. case OP_BRAPOS:
  1757. case OP_SBRA:
  1758. case OP_SBRAPOS:
  1759. case OP_SCOND:
  1760. count = 1;
  1761. srcw[0] = PRIVATE_DATA(cc);
  1762. SLJIT_ASSERT(srcw[0] != 0);
  1763. cc += 1 + LINK_SIZE;
  1764. break;
  1765. case OP_CBRA:
  1766. case OP_SCBRA:
  1767. if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
  1768. {
  1769. count = 1;
  1770. srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
  1771. }
  1772. cc += 1 + LINK_SIZE + IMM2_SIZE;
  1773. break;
  1774. case OP_CBRAPOS:
  1775. case OP_SCBRAPOS:
  1776. count = 2;
  1777. srcw[0] = PRIVATE_DATA(cc);
  1778. srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
  1779. SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
  1780. cc += 1 + LINK_SIZE + IMM2_SIZE;
  1781. break;
  1782. case OP_COND:
  1783. /* Might be a hidden SCOND. */
  1784. alternative = cc + GET(cc, 1);
  1785. if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
  1786. {
  1787. count = 1;
  1788. srcw[0] = PRIVATE_DATA(cc);
  1789. SLJIT_ASSERT(srcw[0] != 0);
  1790. }
  1791. cc += 1 + LINK_SIZE;
  1792. break;
  1793. CASE_ITERATOR_PRIVATE_DATA_1
  1794. if (PRIVATE_DATA(cc))
  1795. {
  1796. count = 1;
  1797. srcw[0] = PRIVATE_DATA(cc);
  1798. }
  1799. cc += 2;
  1800. #ifdef SUPPORT_UTF
  1801. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  1802. #endif
  1803. break;
  1804. CASE_ITERATOR_PRIVATE_DATA_2A
  1805. if (PRIVATE_DATA(cc))
  1806. {
  1807. count = 2;
  1808. srcw[0] = PRIVATE_DATA(cc);
  1809. srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
  1810. }
  1811. cc += 2;
  1812. #ifdef SUPPORT_UTF
  1813. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  1814. #endif
  1815. break;
  1816. CASE_ITERATOR_PRIVATE_DATA_2B
  1817. if (PRIVATE_DATA(cc))
  1818. {
  1819. count = 2;
  1820. srcw[0] = PRIVATE_DATA(cc);
  1821. srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
  1822. }
  1823. cc += 2 + IMM2_SIZE;
  1824. #ifdef SUPPORT_UTF
  1825. if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
  1826. #endif
  1827. break;
  1828. CASE_ITERATOR_TYPE_PRIVATE_DATA_1
  1829. if (PRIVATE_DATA(cc))
  1830. {
  1831. count = 1;
  1832. srcw[0] = PRIVATE_DATA(cc);
  1833. }
  1834. cc += 1;
  1835. break;
  1836. CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
  1837. if (PRIVATE_DATA(cc))
  1838. {
  1839. count = 2;
  1840. srcw[0] = PRIVATE_DATA(cc);
  1841. srcw[1] = srcw[0] + sizeof(sljit_sw);
  1842. }
  1843. cc += 1;
  1844. break;
  1845. CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
  1846. if (PRIVATE_DATA(cc))
  1847. {
  1848. count = 2;
  1849. srcw[0] = PRIVATE_DATA(cc);
  1850. srcw[1] = srcw[0] + sizeof(sljit_sw);
  1851. }
  1852. cc += 1 + IMM2_SIZE;
  1853. break;
  1854. case OP_CLASS:
  1855. case OP_NCLASS:
  1856. #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
  1857. case OP_XCLASS:
  1858. size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
  1859. #else
  1860. size = 1 + 32 / (int)sizeof(pcre_uchar);
  1861. #endif
  1862. if (PRIVATE_DATA(cc))
  1863. switch(get_class_iterator_size(cc + size))
  1864. {
  1865. case 1:
  1866. count = 1;
  1867. srcw[0] = PRIVATE_DATA(cc);
  1868. break;
  1869. case 2:
  1870. count = 2;
  1871. srcw[0] = PRIVATE_DATA(cc);
  1872. srcw[1] = srcw[0] + sizeof(sljit_sw);
  1873. break;
  1874. default:
  1875. SLJIT_UNREACHABLE();
  1876. break;
  1877. }
  1878. cc += size;
  1879. break;
  1880. default:
  1881. cc = next_opcode(common, cc);
  1882. SLJIT_ASSERT(cc != NULL);
  1883. break;
  1884. }
  1885. while (count > 0)
  1886. {
  1887. count--;
  1888. if (save)
  1889. {
  1890. if (tmp1next)
  1891. {
  1892. if (!tmp1empty)
  1893. {
  1894. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
  1895. stackptr += sizeof(sljit_sw);
  1896. }
  1897. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
  1898. tmp1empty = FALSE;
  1899. tmp1next = FALSE;
  1900. }
  1901. else
  1902. {
  1903. if (!tmp2empty)
  1904. {
  1905. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
  1906. stackptr += sizeof(sljit_sw);
  1907. }
  1908. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
  1909. tmp2empty = FALSE;
  1910. tmp1next = TRUE;
  1911. }
  1912. }
  1913. else
  1914. {
  1915. if (tmp1next)
  1916. {
  1917. SLJIT_ASSERT(!tmp1empty);
  1918. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
  1919. tmp1empty = stackptr >= stacktop;
  1920. if (!tmp1empty)
  1921. {
  1922. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
  1923. stackptr += sizeof(sljit_sw);
  1924. }
  1925. tmp1next = FALSE;
  1926. }
  1927. else
  1928. {
  1929. SLJIT_ASSERT(!tmp2empty);
  1930. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
  1931. tmp2empty = stackptr >= stacktop;
  1932. if (!tmp2empty)
  1933. {
  1934. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
  1935. stackptr += sizeof(sljit_sw);
  1936. }
  1937. tmp1next = TRUE;
  1938. }
  1939. }
  1940. }
  1941. }
  1942. while (status != end);
  1943. if (save)
  1944. {
  1945. if (tmp1next)
  1946. {
  1947. if (!tmp1empty)
  1948. {
  1949. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
  1950. stackptr += sizeof(sljit_sw);
  1951. }
  1952. if (!tmp2empty)
  1953. {
  1954. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
  1955. stackptr += sizeof(sljit_sw);
  1956. }
  1957. }
  1958. else
  1959. {
  1960. if (!tmp2empty)
  1961. {
  1962. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
  1963. stackptr += sizeof(sljit_sw);
  1964. }
  1965. if (!tmp1empty)
  1966. {
  1967. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
  1968. stackptr += sizeof(sljit_sw);
  1969. }
  1970. }
  1971. }
  1972. SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
  1973. }
  1974. static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
  1975. {
  1976. pcre_uchar *end = bracketend(cc);
  1977. BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
  1978. /* Assert captures then. */
  1979. if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
  1980. current_offset = NULL;
  1981. /* Conditional block does not. */
  1982. if (*cc == OP_COND || *cc == OP_SCOND)
  1983. has_alternatives = FALSE;
  1984. cc = next_opcode(common, cc);
  1985. if (has_alternatives)
  1986. current_offset = common->then_offsets + (cc - common->start);
  1987. while (cc < end)
  1988. {
  1989. if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
  1990. cc = set_then_offsets(common, cc, current_offset);
  1991. else
  1992. {
  1993. if (*cc == OP_ALT && has_alternatives)
  1994. current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
  1995. if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
  1996. *current_offset = 1;
  1997. cc = next_opcode(common, cc);
  1998. }
  1999. }
  2000. return end;
  2001. }
  2002. #undef CASE_ITERATOR_PRIVATE_DATA_1
  2003. #undef CASE_ITERATOR_PRIVATE_DATA_2A
  2004. #undef CASE_ITERATOR_PRIVATE_DATA_2B
  2005. #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
  2006. #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
  2007. #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
  2008. static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
  2009. {
  2010. return (value & (value - 1)) == 0;
  2011. }
  2012. static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
  2013. {
  2014. while (list)
  2015. {
  2016. /* sljit_set_label is clever enough to do nothing
  2017. if either the jump or the label is NULL. */
  2018. SET_LABEL(list->jump, label);
  2019. list = list->next;
  2020. }
  2021. }
  2022. static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
  2023. {
  2024. jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
  2025. if (list_item)
  2026. {
  2027. list_item->next = *list;
  2028. list_item->jump = jump;
  2029. *list = list_item;
  2030. }
  2031. }
  2032. static void add_stub(compiler_common *common, struct sljit_jump *start)
  2033. {
  2034. DEFINE_COMPILER;
  2035. stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
  2036. if (list_item)
  2037. {
  2038. list_item->start = start;
  2039. list_item->quit = LABEL();
  2040. list_item->next = common->stubs;
  2041. common->stubs = list_item;
  2042. }
  2043. }
  2044. static void flush_stubs(compiler_common *common)
  2045. {
  2046. DEFINE_COMPILER;
  2047. stub_list *list_item = common->stubs;
  2048. while (list_item)
  2049. {
  2050. JUMPHERE(list_item->start);
  2051. add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
  2052. JUMPTO(SLJIT_JUMP, list_item->quit);
  2053. list_item = list_item->next;
  2054. }
  2055. common->stubs = NULL;
  2056. }
  2057. static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
  2058. {
  2059. DEFINE_COMPILER;
  2060. label_addr_list *label_addr;
  2061. label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
  2062. if (label_addr == NULL)
  2063. return;
  2064. label_addr->label = LABEL();
  2065. label_addr->update_addr = update_addr;
  2066. label_addr->next = common->label_addrs;
  2067. common->label_addrs = label_addr;
  2068. }
  2069. static SLJIT_INLINE void count_match(compiler_common *common)
  2070. {
  2071. DEFINE_COMPILER;
  2072. OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
  2073. add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
  2074. }
  2075. static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
  2076. {
  2077. /* May destroy all locals and registers except TMP2. */
  2078. DEFINE_COMPILER;
  2079. SLJIT_ASSERT(size > 0);
  2080. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
  2081. #ifdef DESTROY_REGISTERS
  2082. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
  2083. OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
  2084. OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
  2085. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
  2086. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
  2087. #endif
  2088. add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
  2089. }
  2090. static SLJIT_INLINE void free_stack(compiler_common *common, int size)
  2091. {
  2092. DEFINE_COMPILER;
  2093. SLJIT_ASSERT(size > 0);
  2094. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
  2095. }
  2096. static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
  2097. {
  2098. DEFINE_COMPILER;
  2099. sljit_uw *result;
  2100. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  2101. return NULL;
  2102. result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
  2103. if (SLJIT_UNLIKELY(result == NULL))
  2104. {
  2105. sljit_set_compiler_memory_error(compiler);
  2106. return NULL;
  2107. }
  2108. *(void**)result = common->read_only_data_head;
  2109. common->read_only_data_head = (void *)result;
  2110. return result + 1;
  2111. }
  2112. static void free_read_only_data(void *current, void *allocator_data)
  2113. {
  2114. void *next;
  2115. SLJIT_UNUSED_ARG(allocator_data);
  2116. while (current != NULL)
  2117. {
  2118. next = *(void**)current;
  2119. SLJIT_FREE(current, allocator_data);
  2120. current = next;
  2121. }
  2122. }
  2123. static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
  2124. {
  2125. DEFINE_COMPILER;
  2126. struct sljit_label *loop;
  2127. int i;
  2128. /* At this point we can freely use all temporary registers. */
  2129. SLJIT_ASSERT(length > 1);
  2130. /* TMP1 returns with begin - 1. */
  2131. OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
  2132. if (length < 8)
  2133. {
  2134. for (i = 1; i < length; i++)
  2135. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
  2136. }
  2137. else
  2138. {
  2139. if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
  2140. {
  2141. GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
  2142. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
  2143. loop = LABEL();
  2144. sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
  2145. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
  2146. JUMPTO(SLJIT_NOT_ZERO, loop);
  2147. }
  2148. else
  2149. {
  2150. GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
  2151. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
  2152. loop = LABEL();
  2153. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
  2154. OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
  2155. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
  2156. JUMPTO(SLJIT_NOT_ZERO, loop);
  2157. }
  2158. }
  2159. }
  2160. static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
  2161. {
  2162. DEFINE_COMPILER;
  2163. sljit_s32 i;
  2164. SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
  2165. OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2166. for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
  2167. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
  2168. }
  2169. static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
  2170. {
  2171. DEFINE_COMPILER;
  2172. struct sljit_label *loop;
  2173. int i;
  2174. SLJIT_ASSERT(length > 1);
  2175. /* OVECTOR(1) contains the "string begin - 1" constant. */
  2176. if (length > 2)
  2177. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
  2178. if (length < 8)
  2179. {
  2180. for (i = 2; i < length; i++)
  2181. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
  2182. }
  2183. else
  2184. {
  2185. if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
  2186. {
  2187. GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
  2188. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
  2189. loop = LABEL();
  2190. sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  2191. OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
  2192. JUMPTO(SLJIT_NOT_ZERO, loop);
  2193. }
  2194. else
  2195. {
  2196. GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
  2197. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
  2198. loop = LABEL();
  2199. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
  2200. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
  2201. OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
  2202. JUMPTO(SLJIT_NOT_ZERO, loop);
  2203. }
  2204. }
  2205. OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
  2206. if (common->mark_ptr != 0)
  2207. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
  2208. if (common->control_head_ptr != 0)
  2209. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
  2210. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
  2211. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
  2212. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
  2213. }
  2214. static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
  2215. {
  2216. while (current != NULL)
  2217. {
  2218. switch (current[1])
  2219. {
  2220. case type_then_trap:
  2221. break;
  2222. case type_mark:
  2223. if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0)
  2224. return current[3];
  2225. break;
  2226. default:
  2227. SLJIT_UNREACHABLE();
  2228. break;
  2229. }
  2230. SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
  2231. current = (sljit_sw*)current[0];
  2232. }
  2233. return 0;
  2234. }
  2235. static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
  2236. {
  2237. DEFINE_COMPILER;
  2238. struct sljit_label *loop;
  2239. struct sljit_jump *early_quit;
  2240. BOOL has_pre;
  2241. /* At this point we can freely use all registers. */
  2242. OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
  2243. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
  2244. OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
  2245. if (common->mark_ptr != 0)
  2246. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
  2247. OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
  2248. if (common->mark_ptr != 0)
  2249. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
  2250. OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
  2251. OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
  2252. has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
  2253. GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
  2254. /* Unlikely, but possible */
  2255. early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
  2256. loop = LABEL();
  2257. if (has_pre)
  2258. sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
  2259. else
  2260. {
  2261. OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
  2262. OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
  2263. }
  2264. OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(int));
  2265. OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
  2266. /* Copy the integer value to the output buffer */
  2267. #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  2268. OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
  2269. #endif
  2270. OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
  2271. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
  2272. JUMPTO(SLJIT_NOT_ZERO, loop);
  2273. JUMPHERE(early_quit);
  2274. /* Calculate the return value, which is the maximum ovector value. */
  2275. if (topbracket > 1)
  2276. {
  2277. if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
  2278. {
  2279. GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
  2280. OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
  2281. /* OVECTOR(0) is never equal to SLJIT_S2. */
  2282. loop = LABEL();
  2283. sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
  2284. OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
  2285. CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
  2286. }
  2287. else
  2288. {
  2289. GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
  2290. OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
  2291. /* OVECTOR(0) is never equal to SLJIT_S2. */
  2292. loop = LABEL();
  2293. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
  2294. OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
  2295. OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
  2296. CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
  2297. }
  2298. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
  2299. }
  2300. else
  2301. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
  2302. }
  2303. static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
  2304. {
  2305. DEFINE_COMPILER;
  2306. struct sljit_jump *jump;
  2307. SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
  2308. SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
  2309. && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
  2310. OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
  2311. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
  2312. OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
  2313. CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
  2314. /* Store match begin and end. */
  2315. OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
  2316. OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
  2317. jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
  2318. OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
  2319. #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  2320. OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
  2321. #endif
  2322. OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
  2323. JUMPHERE(jump);
  2324. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
  2325. OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
  2326. #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  2327. OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
  2328. #endif
  2329. OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
  2330. OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
  2331. #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  2332. OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
  2333. #endif
  2334. OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
  2335. JUMPTO(SLJIT_JUMP, quit);
  2336. }
  2337. static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
  2338. {
  2339. /* May destroy TMP1. */
  2340. DEFINE_COMPILER;
  2341. struct sljit_jump *jump;
  2342. if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
  2343. {
  2344. /* The value of -1 must be kept for start_used_ptr! */
  2345. OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
  2346. /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
  2347. is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
  2348. jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
  2349. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  2350. JUMPHERE(jump);
  2351. }
  2352. else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
  2353. {
  2354. jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  2355. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  2356. JUMPHERE(jump);
  2357. }
  2358. }
  2359. static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
  2360. {
  2361. /* Detects if the character has an othercase. */
  2362. unsigned int c;
  2363. #ifdef SUPPORT_UTF
  2364. if (common->utf)
  2365. {
  2366. GETCHAR(c, cc);
  2367. if (c > 127)
  2368. {
  2369. #ifdef SUPPORT_UCP
  2370. return c != UCD_OTHERCASE(c);
  2371. #else
  2372. return FALSE;
  2373. #endif
  2374. }
  2375. #ifndef COMPILE_PCRE8
  2376. return common->fcc[c] != c;
  2377. #endif
  2378. }
  2379. else
  2380. #endif
  2381. c = *cc;
  2382. return MAX_255(c) ? common->fcc[c] != c : FALSE;
  2383. }
  2384. static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
  2385. {
  2386. /* Returns with the othercase. */
  2387. #ifdef SUPPORT_UTF
  2388. if (common->utf && c > 127)
  2389. {
  2390. #ifdef SUPPORT_UCP
  2391. return UCD_OTHERCASE(c);
  2392. #else
  2393. return c;
  2394. #endif
  2395. }
  2396. #endif
  2397. return TABLE_GET(c, common->fcc, c);
  2398. }
  2399. static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
  2400. {
  2401. /* Detects if the character and its othercase has only 1 bit difference. */
  2402. unsigned int c, oc, bit;
  2403. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  2404. int n;
  2405. #endif
  2406. #ifdef SUPPORT_UTF
  2407. if (common->utf)
  2408. {
  2409. GETCHAR(c, cc);
  2410. if (c <= 127)
  2411. oc = common->fcc[c];
  2412. else
  2413. {
  2414. #ifdef SUPPORT_UCP
  2415. oc = UCD_OTHERCASE(c);
  2416. #else
  2417. oc = c;
  2418. #endif
  2419. }
  2420. }
  2421. else
  2422. {
  2423. c = *cc;
  2424. oc = TABLE_GET(c, common->fcc, c);
  2425. }
  2426. #else
  2427. c = *cc;
  2428. oc = TABLE_GET(c, common->fcc, c);
  2429. #endif
  2430. SLJIT_ASSERT(c != oc);
  2431. bit = c ^ oc;
  2432. /* Optimized for English alphabet. */
  2433. if (c <= 127 && bit == 0x20)
  2434. return (0 << 8) | 0x20;
  2435. /* Since c != oc, they must have at least 1 bit difference. */
  2436. if (!is_powerof2(bit))
  2437. return 0;
  2438. #if defined COMPILE_PCRE8
  2439. #ifdef SUPPORT_UTF
  2440. if (common->utf && c > 127)
  2441. {
  2442. n = GET_EXTRALEN(*cc);
  2443. while ((bit & 0x3f) == 0)
  2444. {
  2445. n--;
  2446. bit >>= 6;
  2447. }
  2448. return (n << 8) | bit;
  2449. }
  2450. #endif /* SUPPORT_UTF */
  2451. return (0 << 8) | bit;
  2452. #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  2453. #ifdef SUPPORT_UTF
  2454. if (common->utf && c > 65535)
  2455. {
  2456. if (bit >= (1 << 10))
  2457. bit >>= 10;
  2458. else
  2459. return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
  2460. }
  2461. #endif /* SUPPORT_UTF */
  2462. return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
  2463. #endif /* COMPILE_PCRE[8|16|32] */
  2464. }
  2465. static void check_partial(compiler_common *common, BOOL force)
  2466. {
  2467. /* Checks whether a partial matching is occurred. Does not modify registers. */
  2468. DEFINE_COMPILER;
  2469. struct sljit_jump *jump = NULL;
  2470. SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
  2471. if (common->mode == JIT_COMPILE)
  2472. return;
  2473. if (!force)
  2474. jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  2475. else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
  2476. jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
  2477. if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
  2478. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
  2479. else
  2480. {
  2481. if (common->partialmatchlabel != NULL)
  2482. JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
  2483. else
  2484. add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
  2485. }
  2486. if (jump != NULL)
  2487. JUMPHERE(jump);
  2488. }
  2489. static void check_str_end(compiler_common *common, jump_list **end_reached)
  2490. {
  2491. /* Does not affect registers. Usually used in a tight spot. */
  2492. DEFINE_COMPILER;
  2493. struct sljit_jump *jump;
  2494. if (common->mode == JIT_COMPILE)
  2495. {
  2496. add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  2497. return;
  2498. }
  2499. jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
  2500. if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
  2501. {
  2502. add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
  2503. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
  2504. add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
  2505. }
  2506. else
  2507. {
  2508. add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
  2509. if (common->partialmatchlabel != NULL)
  2510. JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
  2511. else
  2512. add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
  2513. }
  2514. JUMPHERE(jump);
  2515. }
  2516. static void detect_partial_match(compiler_common *common, jump_list **backtracks)
  2517. {
  2518. DEFINE_COMPILER;
  2519. struct sljit_jump *jump;
  2520. if (common->mode == JIT_COMPILE)
  2521. {
  2522. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  2523. return;
  2524. }
  2525. /* Partial matching mode. */
  2526. jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
  2527. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
  2528. if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
  2529. {
  2530. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
  2531. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  2532. }
  2533. else
  2534. {
  2535. if (common->partialmatchlabel != NULL)
  2536. JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
  2537. else
  2538. add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
  2539. }
  2540. JUMPHERE(jump);
  2541. }
  2542. static void peek_char(compiler_common *common, sljit_u32 max)
  2543. {
  2544. /* Reads the character into TMP1, keeps STR_PTR.
  2545. Does not check STR_END. TMP2 Destroyed. */
  2546. DEFINE_COMPILER;
  2547. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  2548. struct sljit_jump *jump;
  2549. #endif
  2550. SLJIT_UNUSED_ARG(max);
  2551. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  2552. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  2553. if (common->utf)
  2554. {
  2555. if (max < 128) return;
  2556. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
  2557. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2558. add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
  2559. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  2560. JUMPHERE(jump);
  2561. }
  2562. #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
  2563. #if defined SUPPORT_UTF && defined COMPILE_PCRE16
  2564. if (common->utf)
  2565. {
  2566. if (max < 0xd800) return;
  2567. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  2568. jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
  2569. /* TMP2 contains the high surrogate. */
  2570. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  2571. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
  2572. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
  2573. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
  2574. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2575. JUMPHERE(jump);
  2576. }
  2577. #endif
  2578. }
  2579. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  2580. static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
  2581. {
  2582. /* Tells whether the character codes below 128 are enough
  2583. to determine a match. */
  2584. const sljit_u8 value = nclass ? 0xff : 0;
  2585. const sljit_u8 *end = bitset + 32;
  2586. bitset += 16;
  2587. do
  2588. {
  2589. if (*bitset++ != value)
  2590. return FALSE;
  2591. }
  2592. while (bitset < end);
  2593. return TRUE;
  2594. }
  2595. static void read_char7_type(compiler_common *common, BOOL full_read)
  2596. {
  2597. /* Reads the precise character type of a character into TMP1, if the character
  2598. is less than 128. Otherwise it returns with zero. Does not check STR_END. The
  2599. full_read argument tells whether characters above max are accepted or not. */
  2600. DEFINE_COMPILER;
  2601. struct sljit_jump *jump;
  2602. SLJIT_ASSERT(common->utf);
  2603. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
  2604. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2605. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
  2606. if (full_read)
  2607. {
  2608. jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
  2609. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  2610. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  2611. JUMPHERE(jump);
  2612. }
  2613. }
  2614. #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
  2615. static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
  2616. {
  2617. /* Reads the precise value of a character into TMP1, if the character is
  2618. between min and max (c >= min && c <= max). Otherwise it returns with a value
  2619. outside the range. Does not check STR_END. */
  2620. DEFINE_COMPILER;
  2621. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  2622. struct sljit_jump *jump;
  2623. #endif
  2624. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  2625. struct sljit_jump *jump2;
  2626. #endif
  2627. SLJIT_UNUSED_ARG(update_str_ptr);
  2628. SLJIT_UNUSED_ARG(min);
  2629. SLJIT_UNUSED_ARG(max);
  2630. SLJIT_ASSERT(min <= max);
  2631. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  2632. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2633. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  2634. if (common->utf)
  2635. {
  2636. if (max < 128 && !update_str_ptr) return;
  2637. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
  2638. if (min >= 0x10000)
  2639. {
  2640. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
  2641. if (update_str_ptr)
  2642. OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  2643. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  2644. jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
  2645. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  2646. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
  2647. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2648. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  2649. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  2650. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  2651. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2652. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
  2653. if (!update_str_ptr)
  2654. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
  2655. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  2656. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  2657. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2658. JUMPHERE(jump2);
  2659. if (update_str_ptr)
  2660. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
  2661. }
  2662. else if (min >= 0x800 && max <= 0xffff)
  2663. {
  2664. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
  2665. if (update_str_ptr)
  2666. OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  2667. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  2668. jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
  2669. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  2670. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
  2671. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2672. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  2673. if (!update_str_ptr)
  2674. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  2675. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  2676. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  2677. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2678. JUMPHERE(jump2);
  2679. if (update_str_ptr)
  2680. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
  2681. }
  2682. else if (max >= 0x800)
  2683. add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
  2684. else if (max < 128)
  2685. {
  2686. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  2687. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  2688. }
  2689. else
  2690. {
  2691. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  2692. if (!update_str_ptr)
  2693. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2694. else
  2695. OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  2696. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
  2697. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  2698. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  2699. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2700. if (update_str_ptr)
  2701. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
  2702. }
  2703. JUMPHERE(jump);
  2704. }
  2705. #endif
  2706. #if defined SUPPORT_UTF && defined COMPILE_PCRE16
  2707. if (common->utf)
  2708. {
  2709. if (max >= 0x10000)
  2710. {
  2711. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  2712. jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
  2713. /* TMP2 contains the high surrogate. */
  2714. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  2715. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
  2716. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
  2717. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2718. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
  2719. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2720. JUMPHERE(jump);
  2721. return;
  2722. }
  2723. if (max < 0xd800 && !update_str_ptr) return;
  2724. /* Skip low surrogate if necessary. */
  2725. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  2726. jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
  2727. if (update_str_ptr)
  2728. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2729. if (max >= 0xd800)
  2730. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
  2731. JUMPHERE(jump);
  2732. }
  2733. #endif
  2734. }
  2735. static SLJIT_INLINE void read_char(compiler_common *common)
  2736. {
  2737. read_char_range(common, 0, READ_CHAR_MAX, TRUE);
  2738. }
  2739. static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
  2740. {
  2741. /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
  2742. DEFINE_COMPILER;
  2743. #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
  2744. struct sljit_jump *jump;
  2745. #endif
  2746. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  2747. struct sljit_jump *jump2;
  2748. #endif
  2749. SLJIT_UNUSED_ARG(update_str_ptr);
  2750. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
  2751. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2752. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  2753. if (common->utf)
  2754. {
  2755. /* This can be an extra read in some situations, but hopefully
  2756. it is needed in most cases. */
  2757. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
  2758. jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
  2759. if (!update_str_ptr)
  2760. {
  2761. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  2762. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2763. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  2764. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  2765. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
  2766. OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
  2767. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  2768. jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
  2769. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
  2770. JUMPHERE(jump2);
  2771. }
  2772. else
  2773. add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
  2774. JUMPHERE(jump);
  2775. return;
  2776. }
  2777. #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
  2778. #if !defined COMPILE_PCRE8
  2779. /* The ctypes array contains only 256 values. */
  2780. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  2781. jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
  2782. #endif
  2783. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
  2784. #if !defined COMPILE_PCRE8
  2785. JUMPHERE(jump);
  2786. #endif
  2787. #if defined SUPPORT_UTF && defined COMPILE_PCRE16
  2788. if (common->utf && update_str_ptr)
  2789. {
  2790. /* Skip low surrogate if necessary. */
  2791. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
  2792. jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
  2793. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2794. JUMPHERE(jump);
  2795. }
  2796. #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
  2797. }
  2798. static void skip_char_back(compiler_common *common)
  2799. {
  2800. /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
  2801. DEFINE_COMPILER;
  2802. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  2803. #if defined COMPILE_PCRE8
  2804. struct sljit_label *label;
  2805. if (common->utf)
  2806. {
  2807. label = LABEL();
  2808. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
  2809. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2810. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
  2811. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
  2812. return;
  2813. }
  2814. #elif defined COMPILE_PCRE16
  2815. if (common->utf)
  2816. {
  2817. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
  2818. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2819. /* Skip low surrogate if necessary. */
  2820. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
  2821. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
  2822. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
  2823. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  2824. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  2825. return;
  2826. }
  2827. #endif /* COMPILE_PCRE[8|16] */
  2828. #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
  2829. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2830. }
  2831. static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
  2832. {
  2833. /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
  2834. DEFINE_COMPILER;
  2835. struct sljit_jump *jump;
  2836. if (nltype == NLTYPE_ANY)
  2837. {
  2838. add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
  2839. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  2840. add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
  2841. }
  2842. else if (nltype == NLTYPE_ANYCRLF)
  2843. {
  2844. if (jumpifmatch)
  2845. {
  2846. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
  2847. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
  2848. }
  2849. else
  2850. {
  2851. jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
  2852. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
  2853. JUMPHERE(jump);
  2854. }
  2855. }
  2856. else
  2857. {
  2858. SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
  2859. add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
  2860. }
  2861. }
  2862. #ifdef SUPPORT_UTF
  2863. #if defined COMPILE_PCRE8
  2864. static void do_utfreadchar(compiler_common *common)
  2865. {
  2866. /* Fast decoding a UTF-8 character. TMP1 contains the first byte
  2867. of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
  2868. DEFINE_COMPILER;
  2869. struct sljit_jump *jump;
  2870. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  2871. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  2872. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
  2873. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  2874. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  2875. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2876. /* Searching for the first zero. */
  2877. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
  2878. jump = JUMP(SLJIT_NOT_ZERO);
  2879. /* Two byte sequence. */
  2880. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2881. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
  2882. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  2883. JUMPHERE(jump);
  2884. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  2885. OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
  2886. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  2887. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  2888. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2889. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
  2890. jump = JUMP(SLJIT_NOT_ZERO);
  2891. /* Three byte sequence. */
  2892. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  2893. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
  2894. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  2895. /* Four byte sequence. */
  2896. JUMPHERE(jump);
  2897. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
  2898. OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
  2899. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  2900. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
  2901. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  2902. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2903. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
  2904. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  2905. }
  2906. static void do_utfreadchar16(compiler_common *common)
  2907. {
  2908. /* Fast decoding a UTF-8 character. TMP1 contains the first byte
  2909. of the character (>= 0xc0). Return value in TMP1. */
  2910. DEFINE_COMPILER;
  2911. struct sljit_jump *jump;
  2912. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  2913. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  2914. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
  2915. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  2916. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  2917. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2918. /* Searching for the first zero. */
  2919. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
  2920. jump = JUMP(SLJIT_NOT_ZERO);
  2921. /* Two byte sequence. */
  2922. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2923. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  2924. JUMPHERE(jump);
  2925. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
  2926. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
  2927. /* This code runs only in 8 bit mode. No need to shift the value. */
  2928. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  2929. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  2930. OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
  2931. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
  2932. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
  2933. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  2934. /* Three byte sequence. */
  2935. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  2936. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  2937. }
  2938. static void do_utfreadtype8(compiler_common *common)
  2939. {
  2940. /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
  2941. of the character (>= 0xc0). Return value in TMP1. */
  2942. DEFINE_COMPILER;
  2943. struct sljit_jump *jump;
  2944. struct sljit_jump *compare;
  2945. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  2946. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
  2947. jump = JUMP(SLJIT_NOT_ZERO);
  2948. /* Two byte sequence. */
  2949. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  2950. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  2951. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
  2952. /* The upper 5 bits are known at this point. */
  2953. compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
  2954. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
  2955. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
  2956. OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
  2957. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
  2958. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  2959. JUMPHERE(compare);
  2960. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  2961. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  2962. /* We only have types for characters less than 256. */
  2963. JUMPHERE(jump);
  2964. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  2965. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
  2966. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  2967. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  2968. }
  2969. #endif /* COMPILE_PCRE8 */
  2970. #endif /* SUPPORT_UTF */
  2971. #ifdef SUPPORT_UCP
  2972. /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
  2973. #define UCD_BLOCK_MASK 127
  2974. #define UCD_BLOCK_SHIFT 7
  2975. static void do_getucd(compiler_common *common)
  2976. {
  2977. /* Search the UCD record for the character comes in TMP1.
  2978. Returns chartype in TMP1 and UCD offset in TMP2. */
  2979. DEFINE_COMPILER;
  2980. #ifdef COMPILE_PCRE32
  2981. struct sljit_jump *jump;
  2982. #endif
  2983. #if defined SLJIT_DEBUG && SLJIT_DEBUG
  2984. /* dummy_ucd_record */
  2985. const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
  2986. SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
  2987. SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
  2988. #endif
  2989. SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
  2990. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  2991. #ifdef COMPILE_PCRE32
  2992. if (!common->utf)
  2993. {
  2994. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
  2995. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
  2996. JUMPHERE(jump);
  2997. }
  2998. #endif
  2999. OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
  3000. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
  3001. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
  3002. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
  3003. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  3004. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
  3005. OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
  3006. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
  3007. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
  3008. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  3009. }
  3010. #endif
  3011. static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
  3012. {
  3013. DEFINE_COMPILER;
  3014. struct sljit_label *mainloop;
  3015. struct sljit_label *newlinelabel = NULL;
  3016. struct sljit_jump *start;
  3017. struct sljit_jump *end = NULL;
  3018. struct sljit_jump *end2 = NULL;
  3019. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  3020. struct sljit_jump *singlechar;
  3021. #endif
  3022. jump_list *newline = NULL;
  3023. BOOL newlinecheck = FALSE;
  3024. BOOL readuchar = FALSE;
  3025. if (!(hascrorlf || (common->match_end_ptr != 0)) &&
  3026. (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
  3027. newlinecheck = TRUE;
  3028. if (common->match_end_ptr != 0)
  3029. {
  3030. /* Search for the end of the first line. */
  3031. OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
  3032. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  3033. {
  3034. mainloop = LABEL();
  3035. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3036. end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  3037. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  3038. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  3039. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
  3040. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
  3041. JUMPHERE(end);
  3042. OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3043. }
  3044. else
  3045. {
  3046. end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  3047. mainloop = LABEL();
  3048. /* Continual stores does not cause data dependency. */
  3049. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
  3050. read_char_range(common, common->nlmin, common->nlmax, TRUE);
  3051. check_newlinechar(common, common->nltype, &newline, TRUE);
  3052. CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
  3053. JUMPHERE(end);
  3054. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
  3055. set_jumps(newline, LABEL());
  3056. }
  3057. OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
  3058. }
  3059. start = JUMP(SLJIT_JUMP);
  3060. if (newlinecheck)
  3061. {
  3062. newlinelabel = LABEL();
  3063. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3064. end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  3065. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  3066. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
  3067. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
  3068. #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  3069. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
  3070. #endif
  3071. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  3072. end2 = JUMP(SLJIT_JUMP);
  3073. }
  3074. mainloop = LABEL();
  3075. /* Increasing the STR_PTR here requires one less jump in the most common case. */
  3076. #ifdef SUPPORT_UTF
  3077. if (common->utf) readuchar = TRUE;
  3078. #endif
  3079. if (newlinecheck) readuchar = TRUE;
  3080. if (readuchar)
  3081. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  3082. if (newlinecheck)
  3083. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
  3084. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3085. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  3086. #if defined COMPILE_PCRE8
  3087. if (common->utf)
  3088. {
  3089. singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
  3090. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  3091. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  3092. JUMPHERE(singlechar);
  3093. }
  3094. #elif defined COMPILE_PCRE16
  3095. if (common->utf)
  3096. {
  3097. singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
  3098. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
  3099. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  3100. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
  3101. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  3102. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  3103. JUMPHERE(singlechar);
  3104. }
  3105. #endif /* COMPILE_PCRE[8|16] */
  3106. #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
  3107. JUMPHERE(start);
  3108. if (newlinecheck)
  3109. {
  3110. JUMPHERE(end);
  3111. JUMPHERE(end2);
  3112. }
  3113. return mainloop;
  3114. }
  3115. #define MAX_N_CHARS 16
  3116. #define MAX_DIFF_CHARS 6
  3117. static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
  3118. {
  3119. pcre_uchar i, len;
  3120. len = chars[0];
  3121. if (len == 255)
  3122. return;
  3123. if (len == 0)
  3124. {
  3125. chars[0] = 1;
  3126. chars[1] = chr;
  3127. return;
  3128. }
  3129. for (i = len; i > 0; i--)
  3130. if (chars[i] == chr)
  3131. return;
  3132. if (len >= MAX_DIFF_CHARS - 1)
  3133. {
  3134. chars[0] = 255;
  3135. return;
  3136. }
  3137. len++;
  3138. chars[len] = chr;
  3139. chars[0] = len;
  3140. }
  3141. static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
  3142. {
  3143. /* Recursive function, which scans prefix literals. */
  3144. BOOL last, any, class, caseless;
  3145. int len, repeat, len_save, consumed = 0;
  3146. sljit_u32 chr; /* Any unicode character. */
  3147. sljit_u8 *bytes, *bytes_end, byte;
  3148. pcre_uchar *alternative, *cc_save, *oc;
  3149. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  3150. pcre_uchar othercase[8];
  3151. #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
  3152. pcre_uchar othercase[2];
  3153. #else
  3154. pcre_uchar othercase[1];
  3155. #endif
  3156. repeat = 1;
  3157. while (TRUE)
  3158. {
  3159. if (*rec_count == 0)
  3160. return 0;
  3161. (*rec_count)--;
  3162. last = TRUE;
  3163. any = FALSE;
  3164. class = FALSE;
  3165. caseless = FALSE;
  3166. switch (*cc)
  3167. {
  3168. case OP_CHARI:
  3169. caseless = TRUE;
  3170. case OP_CHAR:
  3171. last = FALSE;
  3172. cc++;
  3173. break;
  3174. case OP_SOD:
  3175. case OP_SOM:
  3176. case OP_SET_SOM:
  3177. case OP_NOT_WORD_BOUNDARY:
  3178. case OP_WORD_BOUNDARY:
  3179. case OP_EODN:
  3180. case OP_EOD:
  3181. case OP_CIRC:
  3182. case OP_CIRCM:
  3183. case OP_DOLL:
  3184. case OP_DOLLM:
  3185. /* Zero width assertions. */
  3186. cc++;
  3187. continue;
  3188. case OP_ASSERT:
  3189. case OP_ASSERT_NOT:
  3190. case OP_ASSERTBACK:
  3191. case OP_ASSERTBACK_NOT:
  3192. cc = bracketend(cc);
  3193. continue;
  3194. case OP_PLUSI:
  3195. case OP_MINPLUSI:
  3196. case OP_POSPLUSI:
  3197. caseless = TRUE;
  3198. case OP_PLUS:
  3199. case OP_MINPLUS:
  3200. case OP_POSPLUS:
  3201. cc++;
  3202. break;
  3203. case OP_EXACTI:
  3204. caseless = TRUE;
  3205. case OP_EXACT:
  3206. repeat = GET2(cc, 1);
  3207. last = FALSE;
  3208. cc += 1 + IMM2_SIZE;
  3209. break;
  3210. case OP_QUERYI:
  3211. case OP_MINQUERYI:
  3212. case OP_POSQUERYI:
  3213. caseless = TRUE;
  3214. case OP_QUERY:
  3215. case OP_MINQUERY:
  3216. case OP_POSQUERY:
  3217. len = 1;
  3218. cc++;
  3219. #ifdef SUPPORT_UTF
  3220. if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
  3221. #endif
  3222. max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
  3223. if (max_chars == 0)
  3224. return consumed;
  3225. last = FALSE;
  3226. break;
  3227. case OP_KET:
  3228. cc += 1 + LINK_SIZE;
  3229. continue;
  3230. case OP_ALT:
  3231. cc += GET(cc, 1);
  3232. continue;
  3233. case OP_ONCE:
  3234. case OP_ONCE_NC:
  3235. case OP_BRA:
  3236. case OP_BRAPOS:
  3237. case OP_CBRA:
  3238. case OP_CBRAPOS:
  3239. alternative = cc + GET(cc, 1);
  3240. while (*alternative == OP_ALT)
  3241. {
  3242. max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
  3243. if (max_chars == 0)
  3244. return consumed;
  3245. alternative += GET(alternative, 1);
  3246. }
  3247. if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
  3248. cc += IMM2_SIZE;
  3249. cc += 1 + LINK_SIZE;
  3250. continue;
  3251. case OP_CLASS:
  3252. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  3253. if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
  3254. return consumed;
  3255. #endif
  3256. class = TRUE;
  3257. break;
  3258. case OP_NCLASS:
  3259. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  3260. if (common->utf) return consumed;
  3261. #endif
  3262. class = TRUE;
  3263. break;
  3264. #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
  3265. case OP_XCLASS:
  3266. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  3267. if (common->utf) return consumed;
  3268. #endif
  3269. any = TRUE;
  3270. cc += GET(cc, 1);
  3271. break;
  3272. #endif
  3273. case OP_DIGIT:
  3274. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  3275. if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
  3276. return consumed;
  3277. #endif
  3278. any = TRUE;
  3279. cc++;
  3280. break;
  3281. case OP_WHITESPACE:
  3282. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  3283. if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
  3284. return consumed;
  3285. #endif
  3286. any = TRUE;
  3287. cc++;
  3288. break;
  3289. case OP_WORDCHAR:
  3290. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  3291. if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
  3292. return consumed;
  3293. #endif
  3294. any = TRUE;
  3295. cc++;
  3296. break;
  3297. case OP_NOT:
  3298. case OP_NOTI:
  3299. cc++;
  3300. /* Fall through. */
  3301. case OP_NOT_DIGIT:
  3302. case OP_NOT_WHITESPACE:
  3303. case OP_NOT_WORDCHAR:
  3304. case OP_ANY:
  3305. case OP_ALLANY:
  3306. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  3307. if (common->utf) return consumed;
  3308. #endif
  3309. any = TRUE;
  3310. cc++;
  3311. break;
  3312. #ifdef SUPPORT_UTF
  3313. case OP_NOTPROP:
  3314. case OP_PROP:
  3315. #ifndef COMPILE_PCRE32
  3316. if (common->utf) return consumed;
  3317. #endif
  3318. any = TRUE;
  3319. cc += 1 + 2;
  3320. break;
  3321. #endif
  3322. case OP_TYPEEXACT:
  3323. repeat = GET2(cc, 1);
  3324. cc += 1 + IMM2_SIZE;
  3325. continue;
  3326. case OP_NOTEXACT:
  3327. case OP_NOTEXACTI:
  3328. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  3329. if (common->utf) return consumed;
  3330. #endif
  3331. any = TRUE;
  3332. repeat = GET2(cc, 1);
  3333. cc += 1 + IMM2_SIZE + 1;
  3334. break;
  3335. default:
  3336. return consumed;
  3337. }
  3338. if (any)
  3339. {
  3340. do
  3341. {
  3342. chars[0] = 255;
  3343. consumed++;
  3344. if (--max_chars == 0)
  3345. return consumed;
  3346. chars += MAX_DIFF_CHARS;
  3347. }
  3348. while (--repeat > 0);
  3349. repeat = 1;
  3350. continue;
  3351. }
  3352. if (class)
  3353. {
  3354. bytes = (sljit_u8*) (cc + 1);
  3355. cc += 1 + 32 / sizeof(pcre_uchar);
  3356. switch (*cc)
  3357. {
  3358. case OP_CRSTAR:
  3359. case OP_CRMINSTAR:
  3360. case OP_CRPOSSTAR:
  3361. case OP_CRQUERY:
  3362. case OP_CRMINQUERY:
  3363. case OP_CRPOSQUERY:
  3364. max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
  3365. if (max_chars == 0)
  3366. return consumed;
  3367. break;
  3368. default:
  3369. case OP_CRPLUS:
  3370. case OP_CRMINPLUS:
  3371. case OP_CRPOSPLUS:
  3372. break;
  3373. case OP_CRRANGE:
  3374. case OP_CRMINRANGE:
  3375. case OP_CRPOSRANGE:
  3376. repeat = GET2(cc, 1);
  3377. if (repeat <= 0)
  3378. return consumed;
  3379. break;
  3380. }
  3381. do
  3382. {
  3383. if (bytes[31] & 0x80)
  3384. chars[0] = 255;
  3385. else if (chars[0] != 255)
  3386. {
  3387. bytes_end = bytes + 32;
  3388. chr = 0;
  3389. do
  3390. {
  3391. byte = *bytes++;
  3392. SLJIT_ASSERT((chr & 0x7) == 0);
  3393. if (byte == 0)
  3394. chr += 8;
  3395. else
  3396. {
  3397. do
  3398. {
  3399. if ((byte & 0x1) != 0)
  3400. add_prefix_char(chr, chars);
  3401. byte >>= 1;
  3402. chr++;
  3403. }
  3404. while (byte != 0);
  3405. chr = (chr + 7) & ~7;
  3406. }
  3407. }
  3408. while (chars[0] != 255 && bytes < bytes_end);
  3409. bytes = bytes_end - 32;
  3410. }
  3411. consumed++;
  3412. if (--max_chars == 0)
  3413. return consumed;
  3414. chars += MAX_DIFF_CHARS;
  3415. }
  3416. while (--repeat > 0);
  3417. switch (*cc)
  3418. {
  3419. case OP_CRSTAR:
  3420. case OP_CRMINSTAR:
  3421. case OP_CRPOSSTAR:
  3422. return consumed;
  3423. case OP_CRQUERY:
  3424. case OP_CRMINQUERY:
  3425. case OP_CRPOSQUERY:
  3426. cc++;
  3427. break;
  3428. case OP_CRRANGE:
  3429. case OP_CRMINRANGE:
  3430. case OP_CRPOSRANGE:
  3431. if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
  3432. return consumed;
  3433. cc += 1 + 2 * IMM2_SIZE;
  3434. break;
  3435. }
  3436. repeat = 1;
  3437. continue;
  3438. }
  3439. len = 1;
  3440. #ifdef SUPPORT_UTF
  3441. if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
  3442. #endif
  3443. if (caseless && char_has_othercase(common, cc))
  3444. {
  3445. #ifdef SUPPORT_UTF
  3446. if (common->utf)
  3447. {
  3448. GETCHAR(chr, cc);
  3449. if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
  3450. return consumed;
  3451. }
  3452. else
  3453. #endif
  3454. {
  3455. chr = *cc;
  3456. othercase[0] = TABLE_GET(chr, common->fcc, chr);
  3457. }
  3458. }
  3459. else
  3460. {
  3461. caseless = FALSE;
  3462. othercase[0] = 0; /* Stops compiler warning - PH */
  3463. }
  3464. len_save = len;
  3465. cc_save = cc;
  3466. while (TRUE)
  3467. {
  3468. oc = othercase;
  3469. do
  3470. {
  3471. chr = *cc;
  3472. add_prefix_char(*cc, chars);
  3473. if (caseless)
  3474. add_prefix_char(*oc, chars);
  3475. len--;
  3476. consumed++;
  3477. if (--max_chars == 0)
  3478. return consumed;
  3479. chars += MAX_DIFF_CHARS;
  3480. cc++;
  3481. oc++;
  3482. }
  3483. while (len > 0);
  3484. if (--repeat == 0)
  3485. break;
  3486. len = len_save;
  3487. cc = cc_save;
  3488. }
  3489. repeat = 1;
  3490. if (last)
  3491. return consumed;
  3492. }
  3493. }
  3494. #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
  3495. static sljit_s32 character_to_int32(pcre_uchar chr)
  3496. {
  3497. sljit_s32 value = (sljit_s32)chr;
  3498. #if defined COMPILE_PCRE8
  3499. #define SSE2_COMPARE_TYPE_INDEX 0
  3500. return ((unsigned int)value << 24) | ((unsigned int)value << 16) | ((unsigned int)value << 8) | (unsigned int)value;
  3501. #elif defined COMPILE_PCRE16
  3502. #define SSE2_COMPARE_TYPE_INDEX 1
  3503. return ((unsigned int)value << 16) | value;
  3504. #elif defined COMPILE_PCRE32
  3505. #define SSE2_COMPARE_TYPE_INDEX 2
  3506. return value;
  3507. #else
  3508. #error "Unsupported unit width"
  3509. #endif
  3510. }
  3511. static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
  3512. {
  3513. DEFINE_COMPILER;
  3514. struct sljit_label *start;
  3515. struct sljit_jump *quit[3];
  3516. struct sljit_jump *nomatch;
  3517. sljit_u8 instruction[8];
  3518. sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
  3519. sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
  3520. sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
  3521. BOOL load_twice = FALSE;
  3522. pcre_uchar bit;
  3523. bit = char1 ^ char2;
  3524. if (!is_powerof2(bit))
  3525. bit = 0;
  3526. if ((char1 != char2) && bit == 0)
  3527. load_twice = TRUE;
  3528. quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  3529. /* First part (unaligned start) */
  3530. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
  3531. SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
  3532. /* MOVD xmm, r/m32 */
  3533. instruction[0] = 0x66;
  3534. instruction[1] = 0x0f;
  3535. instruction[2] = 0x6e;
  3536. instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
  3537. sljit_emit_op_custom(compiler, instruction, 4);
  3538. if (char1 != char2)
  3539. {
  3540. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
  3541. /* MOVD xmm, r/m32 */
  3542. instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
  3543. sljit_emit_op_custom(compiler, instruction, 4);
  3544. }
  3545. /* PSHUFD xmm1, xmm2/m128, imm8 */
  3546. instruction[2] = 0x70;
  3547. instruction[3] = 0xc0 | (2 << 3) | 2;
  3548. instruction[4] = 0;
  3549. sljit_emit_op_custom(compiler, instruction, 5);
  3550. if (char1 != char2)
  3551. {
  3552. /* PSHUFD xmm1, xmm2/m128, imm8 */
  3553. instruction[3] = 0xc0 | (3 << 3) | 3;
  3554. instruction[4] = 0;
  3555. sljit_emit_op_custom(compiler, instruction, 5);
  3556. }
  3557. OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
  3558. OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
  3559. /* MOVDQA xmm1, xmm2/m128 */
  3560. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  3561. if (str_ptr_ind < 8)
  3562. {
  3563. instruction[2] = 0x6f;
  3564. instruction[3] = (0 << 3) | str_ptr_ind;
  3565. sljit_emit_op_custom(compiler, instruction, 4);
  3566. if (load_twice)
  3567. {
  3568. instruction[3] = (1 << 3) | str_ptr_ind;
  3569. sljit_emit_op_custom(compiler, instruction, 4);
  3570. }
  3571. }
  3572. else
  3573. {
  3574. instruction[1] = 0x41;
  3575. instruction[2] = 0x0f;
  3576. instruction[3] = 0x6f;
  3577. instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
  3578. sljit_emit_op_custom(compiler, instruction, 5);
  3579. if (load_twice)
  3580. {
  3581. instruction[4] = (1 << 3) | str_ptr_ind;
  3582. sljit_emit_op_custom(compiler, instruction, 5);
  3583. }
  3584. instruction[1] = 0x0f;
  3585. }
  3586. #else
  3587. instruction[2] = 0x6f;
  3588. instruction[3] = (0 << 3) | str_ptr_ind;
  3589. sljit_emit_op_custom(compiler, instruction, 4);
  3590. if (load_twice)
  3591. {
  3592. instruction[3] = (1 << 3) | str_ptr_ind;
  3593. sljit_emit_op_custom(compiler, instruction, 4);
  3594. }
  3595. #endif
  3596. if (bit != 0)
  3597. {
  3598. /* POR xmm1, xmm2/m128 */
  3599. instruction[2] = 0xeb;
  3600. instruction[3] = 0xc0 | (0 << 3) | 3;
  3601. sljit_emit_op_custom(compiler, instruction, 4);
  3602. }
  3603. /* PCMPEQB/W/D xmm1, xmm2/m128 */
  3604. instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
  3605. instruction[3] = 0xc0 | (0 << 3) | 2;
  3606. sljit_emit_op_custom(compiler, instruction, 4);
  3607. if (load_twice)
  3608. {
  3609. instruction[3] = 0xc0 | (1 << 3) | 3;
  3610. sljit_emit_op_custom(compiler, instruction, 4);
  3611. }
  3612. /* PMOVMSKB reg, xmm */
  3613. instruction[2] = 0xd7;
  3614. instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
  3615. sljit_emit_op_custom(compiler, instruction, 4);
  3616. if (load_twice)
  3617. {
  3618. OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
  3619. instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
  3620. sljit_emit_op_custom(compiler, instruction, 4);
  3621. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3622. OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
  3623. }
  3624. OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
  3625. /* BSF r32, r/m32 */
  3626. instruction[0] = 0x0f;
  3627. instruction[1] = 0xbc;
  3628. instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
  3629. sljit_emit_op_custom(compiler, instruction, 3);
  3630. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  3631. nomatch = JUMP(SLJIT_ZERO);
  3632. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  3633. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  3634. quit[1] = JUMP(SLJIT_JUMP);
  3635. JUMPHERE(nomatch);
  3636. start = LABEL();
  3637. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
  3638. quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  3639. /* Second part (aligned) */
  3640. instruction[0] = 0x66;
  3641. instruction[1] = 0x0f;
  3642. /* MOVDQA xmm1, xmm2/m128 */
  3643. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  3644. if (str_ptr_ind < 8)
  3645. {
  3646. instruction[2] = 0x6f;
  3647. instruction[3] = (0 << 3) | str_ptr_ind;
  3648. sljit_emit_op_custom(compiler, instruction, 4);
  3649. if (load_twice)
  3650. {
  3651. instruction[3] = (1 << 3) | str_ptr_ind;
  3652. sljit_emit_op_custom(compiler, instruction, 4);
  3653. }
  3654. }
  3655. else
  3656. {
  3657. instruction[1] = 0x41;
  3658. instruction[2] = 0x0f;
  3659. instruction[3] = 0x6f;
  3660. instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
  3661. sljit_emit_op_custom(compiler, instruction, 5);
  3662. if (load_twice)
  3663. {
  3664. instruction[4] = (1 << 3) | str_ptr_ind;
  3665. sljit_emit_op_custom(compiler, instruction, 5);
  3666. }
  3667. instruction[1] = 0x0f;
  3668. }
  3669. #else
  3670. instruction[2] = 0x6f;
  3671. instruction[3] = (0 << 3) | str_ptr_ind;
  3672. sljit_emit_op_custom(compiler, instruction, 4);
  3673. if (load_twice)
  3674. {
  3675. instruction[3] = (1 << 3) | str_ptr_ind;
  3676. sljit_emit_op_custom(compiler, instruction, 4);
  3677. }
  3678. #endif
  3679. if (bit != 0)
  3680. {
  3681. /* POR xmm1, xmm2/m128 */
  3682. instruction[2] = 0xeb;
  3683. instruction[3] = 0xc0 | (0 << 3) | 3;
  3684. sljit_emit_op_custom(compiler, instruction, 4);
  3685. }
  3686. /* PCMPEQB/W/D xmm1, xmm2/m128 */
  3687. instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
  3688. instruction[3] = 0xc0 | (0 << 3) | 2;
  3689. sljit_emit_op_custom(compiler, instruction, 4);
  3690. if (load_twice)
  3691. {
  3692. instruction[3] = 0xc0 | (1 << 3) | 3;
  3693. sljit_emit_op_custom(compiler, instruction, 4);
  3694. }
  3695. /* PMOVMSKB reg, xmm */
  3696. instruction[2] = 0xd7;
  3697. instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
  3698. sljit_emit_op_custom(compiler, instruction, 4);
  3699. if (load_twice)
  3700. {
  3701. instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
  3702. sljit_emit_op_custom(compiler, instruction, 4);
  3703. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
  3704. }
  3705. /* BSF r32, r/m32 */
  3706. instruction[0] = 0x0f;
  3707. instruction[1] = 0xbc;
  3708. instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
  3709. sljit_emit_op_custom(compiler, instruction, 3);
  3710. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  3711. JUMPTO(SLJIT_ZERO, start);
  3712. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  3713. start = LABEL();
  3714. SET_LABEL(quit[0], start);
  3715. SET_LABEL(quit[1], start);
  3716. SET_LABEL(quit[2], start);
  3717. }
  3718. #undef SSE2_COMPARE_TYPE_INDEX
  3719. #endif
  3720. static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
  3721. {
  3722. DEFINE_COMPILER;
  3723. struct sljit_label *start;
  3724. struct sljit_jump *quit;
  3725. struct sljit_jump *found;
  3726. pcre_uchar mask;
  3727. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  3728. struct sljit_label *utf_start = NULL;
  3729. struct sljit_jump *utf_quit = NULL;
  3730. #endif
  3731. BOOL has_match_end = (common->match_end_ptr != 0);
  3732. if (offset > 0)
  3733. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
  3734. if (has_match_end)
  3735. {
  3736. OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
  3737. OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
  3738. OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
  3739. sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
  3740. }
  3741. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  3742. if (common->utf && offset > 0)
  3743. utf_start = LABEL();
  3744. #endif
  3745. #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
  3746. /* SSE2 accelerated first character search. */
  3747. if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
  3748. {
  3749. fast_forward_first_char2_sse2(common, char1, char2);
  3750. SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
  3751. if (common->mode == JIT_COMPILE)
  3752. {
  3753. /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
  3754. SLJIT_ASSERT(common->forced_quit_label == NULL);
  3755. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
  3756. add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  3757. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  3758. if (common->utf && offset > 0)
  3759. {
  3760. SLJIT_ASSERT(common->mode == JIT_COMPILE);
  3761. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
  3762. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3763. #if defined COMPILE_PCRE8
  3764. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
  3765. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
  3766. #elif defined COMPILE_PCRE16
  3767. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
  3768. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
  3769. #else
  3770. #error "Unknown code width"
  3771. #endif
  3772. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3773. }
  3774. #endif
  3775. if (offset > 0)
  3776. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
  3777. }
  3778. else
  3779. {
  3780. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
  3781. if (has_match_end)
  3782. {
  3783. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  3784. sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0);
  3785. }
  3786. else
  3787. sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0);
  3788. }
  3789. if (has_match_end)
  3790. OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
  3791. return;
  3792. }
  3793. #endif
  3794. quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  3795. start = LABEL();
  3796. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  3797. if (char1 == char2)
  3798. found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
  3799. else
  3800. {
  3801. mask = char1 ^ char2;
  3802. if (is_powerof2(mask))
  3803. {
  3804. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
  3805. found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
  3806. }
  3807. else
  3808. {
  3809. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
  3810. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  3811. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
  3812. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
  3813. found = JUMP(SLJIT_NOT_ZERO);
  3814. }
  3815. }
  3816. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3817. CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
  3818. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  3819. if (common->utf && offset > 0)
  3820. utf_quit = JUMP(SLJIT_JUMP);
  3821. #endif
  3822. JUMPHERE(found);
  3823. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  3824. if (common->utf && offset > 0)
  3825. {
  3826. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
  3827. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3828. #if defined COMPILE_PCRE8
  3829. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
  3830. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
  3831. #elif defined COMPILE_PCRE16
  3832. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
  3833. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
  3834. #else
  3835. #error "Unknown code width"
  3836. #endif
  3837. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3838. JUMPHERE(utf_quit);
  3839. }
  3840. #endif
  3841. JUMPHERE(quit);
  3842. if (has_match_end)
  3843. {
  3844. quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
  3845. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  3846. if (offset > 0)
  3847. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
  3848. JUMPHERE(quit);
  3849. OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
  3850. }
  3851. if (offset > 0)
  3852. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
  3853. }
  3854. static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
  3855. {
  3856. DEFINE_COMPILER;
  3857. struct sljit_label *start;
  3858. struct sljit_jump *quit;
  3859. struct sljit_jump *match;
  3860. /* bytes[0] represent the number of characters between 0
  3861. and MAX_N_BYTES - 1, 255 represents any character. */
  3862. pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
  3863. sljit_s32 offset;
  3864. pcre_uchar mask;
  3865. pcre_uchar *char_set, *char_set_end;
  3866. int i, max, from;
  3867. int range_right = -1, range_len;
  3868. sljit_u8 *update_table = NULL;
  3869. BOOL in_range;
  3870. sljit_u32 rec_count;
  3871. for (i = 0; i < MAX_N_CHARS; i++)
  3872. chars[i * MAX_DIFF_CHARS] = 0;
  3873. rec_count = 10000;
  3874. max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
  3875. if (max < 1)
  3876. return FALSE;
  3877. in_range = FALSE;
  3878. /* Prevent compiler "uninitialized" warning */
  3879. from = 0;
  3880. range_len = 4 /* minimum length */ - 1;
  3881. for (i = 0; i <= max; i++)
  3882. {
  3883. if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
  3884. {
  3885. range_len = i - from;
  3886. range_right = i - 1;
  3887. }
  3888. if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
  3889. {
  3890. SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
  3891. if (!in_range)
  3892. {
  3893. in_range = TRUE;
  3894. from = i;
  3895. }
  3896. }
  3897. else
  3898. in_range = FALSE;
  3899. }
  3900. if (range_right >= 0)
  3901. {
  3902. update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
  3903. if (update_table == NULL)
  3904. return TRUE;
  3905. memset(update_table, IN_UCHARS(range_len), 256);
  3906. for (i = 0; i < range_len; i++)
  3907. {
  3908. char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
  3909. SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
  3910. char_set_end = char_set + char_set[0];
  3911. char_set++;
  3912. while (char_set <= char_set_end)
  3913. {
  3914. if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
  3915. update_table[(*char_set) & 0xff] = IN_UCHARS(i);
  3916. char_set++;
  3917. }
  3918. }
  3919. }
  3920. offset = -1;
  3921. /* Scan forward. */
  3922. for (i = 0; i < max; i++)
  3923. {
  3924. if (offset == -1)
  3925. {
  3926. if (chars[i * MAX_DIFF_CHARS] <= 2)
  3927. offset = i;
  3928. }
  3929. else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
  3930. {
  3931. if (chars[i * MAX_DIFF_CHARS] == 1)
  3932. offset = i;
  3933. else
  3934. {
  3935. mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
  3936. if (!is_powerof2(mask))
  3937. {
  3938. mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
  3939. if (is_powerof2(mask))
  3940. offset = i;
  3941. }
  3942. }
  3943. }
  3944. }
  3945. if (range_right < 0)
  3946. {
  3947. if (offset < 0)
  3948. return FALSE;
  3949. SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
  3950. /* Works regardless the value is 1 or 2. */
  3951. mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
  3952. fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
  3953. return TRUE;
  3954. }
  3955. if (range_right == offset)
  3956. offset = -1;
  3957. SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
  3958. max -= 1;
  3959. SLJIT_ASSERT(max > 0);
  3960. if (common->match_end_ptr != 0)
  3961. {
  3962. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  3963. OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
  3964. OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
  3965. quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
  3966. OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
  3967. JUMPHERE(quit);
  3968. }
  3969. else
  3970. OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
  3971. SLJIT_ASSERT(range_right >= 0);
  3972. #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  3973. OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
  3974. #endif
  3975. start = LABEL();
  3976. quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  3977. #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
  3978. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
  3979. #else
  3980. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
  3981. #endif
  3982. #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  3983. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
  3984. #else
  3985. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
  3986. #endif
  3987. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  3988. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
  3989. if (offset >= 0)
  3990. {
  3991. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
  3992. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  3993. if (chars[offset * MAX_DIFF_CHARS] == 1)
  3994. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
  3995. else
  3996. {
  3997. mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
  3998. if (is_powerof2(mask))
  3999. {
  4000. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
  4001. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
  4002. }
  4003. else
  4004. {
  4005. match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
  4006. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
  4007. JUMPHERE(match);
  4008. }
  4009. }
  4010. }
  4011. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  4012. if (common->utf && offset != 0)
  4013. {
  4014. if (offset < 0)
  4015. {
  4016. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  4017. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4018. }
  4019. else
  4020. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  4021. #if defined COMPILE_PCRE8
  4022. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
  4023. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
  4024. #elif defined COMPILE_PCRE16
  4025. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
  4026. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
  4027. #else
  4028. #error "Unknown code width"
  4029. #endif
  4030. if (offset < 0)
  4031. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4032. }
  4033. #endif
  4034. if (offset >= 0)
  4035. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4036. JUMPHERE(quit);
  4037. if (common->match_end_ptr != 0)
  4038. {
  4039. if (range_right >= 0)
  4040. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  4041. OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
  4042. if (range_right >= 0)
  4043. {
  4044. quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
  4045. OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
  4046. JUMPHERE(quit);
  4047. }
  4048. }
  4049. else
  4050. OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
  4051. return TRUE;
  4052. }
  4053. #undef MAX_N_CHARS
  4054. #undef MAX_DIFF_CHARS
  4055. static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
  4056. {
  4057. pcre_uchar oc;
  4058. oc = first_char;
  4059. if (caseless)
  4060. {
  4061. oc = TABLE_GET(first_char, common->fcc, first_char);
  4062. #if defined SUPPORT_UCP && !defined COMPILE_PCRE8
  4063. if (first_char > 127 && common->utf)
  4064. oc = UCD_OTHERCASE(first_char);
  4065. #endif
  4066. }
  4067. fast_forward_first_char2(common, first_char, oc, 0);
  4068. }
  4069. static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
  4070. {
  4071. DEFINE_COMPILER;
  4072. struct sljit_label *loop;
  4073. struct sljit_jump *lastchar;
  4074. struct sljit_jump *firstchar;
  4075. struct sljit_jump *quit;
  4076. struct sljit_jump *foundcr = NULL;
  4077. struct sljit_jump *notfoundnl;
  4078. jump_list *newline = NULL;
  4079. if (common->match_end_ptr != 0)
  4080. {
  4081. OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
  4082. OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  4083. }
  4084. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  4085. {
  4086. lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4087. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  4088. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  4089. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
  4090. firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
  4091. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
  4092. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
  4093. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
  4094. #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  4095. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
  4096. #endif
  4097. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  4098. loop = LABEL();
  4099. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4100. quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4101. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  4102. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  4103. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
  4104. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
  4105. JUMPHERE(quit);
  4106. JUMPHERE(firstchar);
  4107. JUMPHERE(lastchar);
  4108. if (common->match_end_ptr != 0)
  4109. OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
  4110. return;
  4111. }
  4112. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  4113. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  4114. firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
  4115. skip_char_back(common);
  4116. loop = LABEL();
  4117. common->ff_newline_shortcut = loop;
  4118. read_char_range(common, common->nlmin, common->nlmax, TRUE);
  4119. lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4120. if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
  4121. foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
  4122. check_newlinechar(common, common->nltype, &newline, FALSE);
  4123. set_jumps(newline, loop);
  4124. if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
  4125. {
  4126. quit = JUMP(SLJIT_JUMP);
  4127. JUMPHERE(foundcr);
  4128. notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4129. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  4130. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
  4131. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
  4132. #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  4133. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
  4134. #endif
  4135. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  4136. JUMPHERE(notfoundnl);
  4137. JUMPHERE(quit);
  4138. }
  4139. JUMPHERE(lastchar);
  4140. JUMPHERE(firstchar);
  4141. if (common->match_end_ptr != 0)
  4142. OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
  4143. }
  4144. static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
  4145. static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
  4146. {
  4147. DEFINE_COMPILER;
  4148. struct sljit_label *start;
  4149. struct sljit_jump *quit;
  4150. struct sljit_jump *found = NULL;
  4151. jump_list *matches = NULL;
  4152. #ifndef COMPILE_PCRE8
  4153. struct sljit_jump *jump;
  4154. #endif
  4155. if (common->match_end_ptr != 0)
  4156. {
  4157. OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
  4158. OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  4159. }
  4160. start = LABEL();
  4161. quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  4162. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  4163. #ifdef SUPPORT_UTF
  4164. if (common->utf)
  4165. OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
  4166. #endif
  4167. if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
  4168. {
  4169. #ifndef COMPILE_PCRE8
  4170. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
  4171. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
  4172. JUMPHERE(jump);
  4173. #endif
  4174. OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
  4175. OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
  4176. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
  4177. OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
  4178. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
  4179. found = JUMP(SLJIT_NOT_ZERO);
  4180. }
  4181. #ifdef SUPPORT_UTF
  4182. if (common->utf)
  4183. OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
  4184. #endif
  4185. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4186. #ifdef SUPPORT_UTF
  4187. #if defined COMPILE_PCRE8
  4188. if (common->utf)
  4189. {
  4190. CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
  4191. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  4192. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  4193. }
  4194. #elif defined COMPILE_PCRE16
  4195. if (common->utf)
  4196. {
  4197. CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
  4198. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
  4199. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  4200. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
  4201. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  4202. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  4203. }
  4204. #endif /* COMPILE_PCRE[8|16] */
  4205. #endif /* SUPPORT_UTF */
  4206. JUMPTO(SLJIT_JUMP, start);
  4207. if (found != NULL)
  4208. JUMPHERE(found);
  4209. if (matches != NULL)
  4210. set_jumps(matches, LABEL());
  4211. JUMPHERE(quit);
  4212. if (common->match_end_ptr != 0)
  4213. OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
  4214. }
  4215. static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
  4216. {
  4217. DEFINE_COMPILER;
  4218. struct sljit_label *loop;
  4219. struct sljit_jump *toolong;
  4220. struct sljit_jump *alreadyfound;
  4221. struct sljit_jump *found;
  4222. struct sljit_jump *foundoc = NULL;
  4223. struct sljit_jump *notfound;
  4224. sljit_u32 oc, bit;
  4225. SLJIT_ASSERT(common->req_char_ptr != 0);
  4226. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
  4227. OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
  4228. toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
  4229. alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
  4230. if (has_firstchar)
  4231. OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4232. else
  4233. OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
  4234. loop = LABEL();
  4235. notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
  4236. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
  4237. oc = req_char;
  4238. if (caseless)
  4239. {
  4240. oc = TABLE_GET(req_char, common->fcc, req_char);
  4241. #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
  4242. if (req_char > 127 && common->utf)
  4243. oc = UCD_OTHERCASE(req_char);
  4244. #endif
  4245. }
  4246. if (req_char == oc)
  4247. found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
  4248. else
  4249. {
  4250. bit = req_char ^ oc;
  4251. if (is_powerof2(bit))
  4252. {
  4253. OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
  4254. found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
  4255. }
  4256. else
  4257. {
  4258. found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
  4259. foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
  4260. }
  4261. }
  4262. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
  4263. JUMPTO(SLJIT_JUMP, loop);
  4264. JUMPHERE(found);
  4265. if (foundoc)
  4266. JUMPHERE(foundoc);
  4267. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
  4268. JUMPHERE(alreadyfound);
  4269. JUMPHERE(toolong);
  4270. return notfound;
  4271. }
  4272. static void do_revertframes(compiler_common *common)
  4273. {
  4274. DEFINE_COMPILER;
  4275. struct sljit_jump *jump;
  4276. struct sljit_label *mainloop;
  4277. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4278. OP1(SLJIT_MOV, TMP3, 0, STACK_TOP, 0);
  4279. GET_LOCAL_BASE(TMP1, 0, 0);
  4280. /* Drop frames until we reach STACK_TOP. */
  4281. mainloop = LABEL();
  4282. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
  4283. jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
  4284. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
  4285. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
  4286. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -3 * sizeof(sljit_sw));
  4287. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
  4288. JUMPTO(SLJIT_JUMP, mainloop);
  4289. JUMPHERE(jump);
  4290. jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
  4291. /* End of reverting values. */
  4292. OP1(SLJIT_MOV, STACK_TOP, 0, TMP3, 0);
  4293. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  4294. JUMPHERE(jump);
  4295. OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
  4296. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
  4297. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
  4298. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
  4299. JUMPTO(SLJIT_JUMP, mainloop);
  4300. }
  4301. static void check_wordboundary(compiler_common *common)
  4302. {
  4303. DEFINE_COMPILER;
  4304. struct sljit_jump *skipread;
  4305. jump_list *skipread_list = NULL;
  4306. #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
  4307. struct sljit_jump *jump;
  4308. #endif
  4309. SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
  4310. sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  4311. /* Get type of the previous char, and put it to LOCALS1. */
  4312. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  4313. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
  4314. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
  4315. skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
  4316. skip_char_back(common);
  4317. check_start_used_ptr(common);
  4318. read_char(common);
  4319. /* Testing char type. */
  4320. #ifdef SUPPORT_UCP
  4321. if (common->use_ucp)
  4322. {
  4323. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
  4324. jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
  4325. add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
  4326. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
  4327. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
  4328. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  4329. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
  4330. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
  4331. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  4332. JUMPHERE(jump);
  4333. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
  4334. }
  4335. else
  4336. #endif
  4337. {
  4338. #ifndef COMPILE_PCRE8
  4339. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  4340. #elif defined SUPPORT_UTF
  4341. /* Here LOCALS1 has already been zeroed. */
  4342. jump = NULL;
  4343. if (common->utf)
  4344. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  4345. #endif /* COMPILE_PCRE8 */
  4346. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
  4347. OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
  4348. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  4349. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
  4350. #ifndef COMPILE_PCRE8
  4351. JUMPHERE(jump);
  4352. #elif defined SUPPORT_UTF
  4353. if (jump != NULL)
  4354. JUMPHERE(jump);
  4355. #endif /* COMPILE_PCRE8 */
  4356. }
  4357. JUMPHERE(skipread);
  4358. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
  4359. check_str_end(common, &skipread_list);
  4360. peek_char(common, READ_CHAR_MAX);
  4361. /* Testing char type. This is a code duplication. */
  4362. #ifdef SUPPORT_UCP
  4363. if (common->use_ucp)
  4364. {
  4365. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
  4366. jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
  4367. add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
  4368. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
  4369. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
  4370. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  4371. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
  4372. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
  4373. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  4374. JUMPHERE(jump);
  4375. }
  4376. else
  4377. #endif
  4378. {
  4379. #ifndef COMPILE_PCRE8
  4380. /* TMP2 may be destroyed by peek_char. */
  4381. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
  4382. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  4383. #elif defined SUPPORT_UTF
  4384. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
  4385. jump = NULL;
  4386. if (common->utf)
  4387. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  4388. #endif
  4389. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
  4390. OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
  4391. OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
  4392. #ifndef COMPILE_PCRE8
  4393. JUMPHERE(jump);
  4394. #elif defined SUPPORT_UTF
  4395. if (jump != NULL)
  4396. JUMPHERE(jump);
  4397. #endif /* COMPILE_PCRE8 */
  4398. }
  4399. set_jumps(skipread_list, LABEL());
  4400. OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
  4401. sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  4402. }
  4403. static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
  4404. {
  4405. /* May destroy TMP1. */
  4406. DEFINE_COMPILER;
  4407. int ranges[MAX_RANGE_SIZE];
  4408. sljit_u8 bit, cbit, all;
  4409. int i, byte, length = 0;
  4410. bit = bits[0] & 0x1;
  4411. /* All bits will be zero or one (since bit is zero or one). */
  4412. all = -bit;
  4413. for (i = 0; i < 256; )
  4414. {
  4415. byte = i >> 3;
  4416. if ((i & 0x7) == 0 && bits[byte] == all)
  4417. i += 8;
  4418. else
  4419. {
  4420. cbit = (bits[byte] >> (i & 0x7)) & 0x1;
  4421. if (cbit != bit)
  4422. {
  4423. if (length >= MAX_RANGE_SIZE)
  4424. return FALSE;
  4425. ranges[length] = i;
  4426. length++;
  4427. bit = cbit;
  4428. all = -cbit;
  4429. }
  4430. i++;
  4431. }
  4432. }
  4433. if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
  4434. {
  4435. if (length >= MAX_RANGE_SIZE)
  4436. return FALSE;
  4437. ranges[length] = 256;
  4438. length++;
  4439. }
  4440. if (length < 0 || length > 4)
  4441. return FALSE;
  4442. bit = bits[0] & 0x1;
  4443. if (invert) bit ^= 0x1;
  4444. /* No character is accepted. */
  4445. if (length == 0 && bit == 0)
  4446. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  4447. switch(length)
  4448. {
  4449. case 0:
  4450. /* When bit != 0, all characters are accepted. */
  4451. return TRUE;
  4452. case 1:
  4453. add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
  4454. return TRUE;
  4455. case 2:
  4456. if (ranges[0] + 1 != ranges[1])
  4457. {
  4458. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
  4459. add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
  4460. }
  4461. else
  4462. add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
  4463. return TRUE;
  4464. case 3:
  4465. if (bit != 0)
  4466. {
  4467. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
  4468. if (ranges[0] + 1 != ranges[1])
  4469. {
  4470. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
  4471. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
  4472. }
  4473. else
  4474. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
  4475. return TRUE;
  4476. }
  4477. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
  4478. if (ranges[1] + 1 != ranges[2])
  4479. {
  4480. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
  4481. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
  4482. }
  4483. else
  4484. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
  4485. return TRUE;
  4486. case 4:
  4487. if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
  4488. && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
  4489. && (ranges[1] & (ranges[2] - ranges[0])) == 0
  4490. && is_powerof2(ranges[2] - ranges[0]))
  4491. {
  4492. SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
  4493. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
  4494. if (ranges[2] + 1 != ranges[3])
  4495. {
  4496. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
  4497. add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
  4498. }
  4499. else
  4500. add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
  4501. return TRUE;
  4502. }
  4503. if (bit != 0)
  4504. {
  4505. i = 0;
  4506. if (ranges[0] + 1 != ranges[1])
  4507. {
  4508. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
  4509. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
  4510. i = ranges[0];
  4511. }
  4512. else
  4513. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
  4514. if (ranges[2] + 1 != ranges[3])
  4515. {
  4516. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
  4517. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
  4518. }
  4519. else
  4520. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
  4521. return TRUE;
  4522. }
  4523. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
  4524. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
  4525. if (ranges[1] + 1 != ranges[2])
  4526. {
  4527. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
  4528. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
  4529. }
  4530. else
  4531. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
  4532. return TRUE;
  4533. default:
  4534. SLJIT_UNREACHABLE();
  4535. return FALSE;
  4536. }
  4537. }
  4538. static void check_anynewline(compiler_common *common)
  4539. {
  4540. /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
  4541. DEFINE_COMPILER;
  4542. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4543. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
  4544. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
  4545. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  4546. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
  4547. #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  4548. #ifdef COMPILE_PCRE8
  4549. if (common->utf)
  4550. {
  4551. #endif
  4552. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  4553. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
  4554. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
  4555. #ifdef COMPILE_PCRE8
  4556. }
  4557. #endif
  4558. #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
  4559. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
  4560. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  4561. }
  4562. static void check_hspace(compiler_common *common)
  4563. {
  4564. /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
  4565. DEFINE_COMPILER;
  4566. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4567. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
  4568. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  4569. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
  4570. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  4571. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
  4572. #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  4573. #ifdef COMPILE_PCRE8
  4574. if (common->utf)
  4575. {
  4576. #endif
  4577. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  4578. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
  4579. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  4580. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
  4581. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  4582. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
  4583. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
  4584. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  4585. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
  4586. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  4587. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
  4588. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  4589. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
  4590. #ifdef COMPILE_PCRE8
  4591. }
  4592. #endif
  4593. #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
  4594. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
  4595. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  4596. }
  4597. static void check_vspace(compiler_common *common)
  4598. {
  4599. /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
  4600. DEFINE_COMPILER;
  4601. sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
  4602. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
  4603. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
  4604. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  4605. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
  4606. #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  4607. #ifdef COMPILE_PCRE8
  4608. if (common->utf)
  4609. {
  4610. #endif
  4611. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  4612. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
  4613. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
  4614. #ifdef COMPILE_PCRE8
  4615. }
  4616. #endif
  4617. #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
  4618. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
  4619. sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
  4620. }
  4621. static void do_casefulcmp(compiler_common *common)
  4622. {
  4623. DEFINE_COMPILER;
  4624. struct sljit_jump *jump;
  4625. struct sljit_label *label;
  4626. int char1_reg;
  4627. int char2_reg;
  4628. if (sljit_get_register_index(TMP3) < 0)
  4629. {
  4630. char1_reg = STR_END;
  4631. char2_reg = STACK_TOP;
  4632. }
  4633. else
  4634. {
  4635. char1_reg = TMP3;
  4636. char2_reg = RETURN_ADDR;
  4637. }
  4638. sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  4639. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  4640. if (char1_reg == STR_END)
  4641. {
  4642. OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
  4643. OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
  4644. }
  4645. if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
  4646. {
  4647. label = LABEL();
  4648. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
  4649. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  4650. jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
  4651. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
  4652. JUMPTO(SLJIT_NOT_ZERO, label);
  4653. JUMPHERE(jump);
  4654. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  4655. }
  4656. else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
  4657. {
  4658. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
  4659. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4660. label = LABEL();
  4661. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
  4662. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  4663. jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
  4664. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
  4665. JUMPTO(SLJIT_NOT_ZERO, label);
  4666. JUMPHERE(jump);
  4667. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  4668. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4669. }
  4670. else
  4671. {
  4672. label = LABEL();
  4673. OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
  4674. OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
  4675. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
  4676. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4677. jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
  4678. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
  4679. JUMPTO(SLJIT_NOT_ZERO, label);
  4680. JUMPHERE(jump);
  4681. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  4682. }
  4683. if (char1_reg == STR_END)
  4684. {
  4685. OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
  4686. OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
  4687. }
  4688. sljit_emit_fast_return(compiler, TMP1, 0);
  4689. }
  4690. static void do_caselesscmp(compiler_common *common)
  4691. {
  4692. DEFINE_COMPILER;
  4693. struct sljit_jump *jump;
  4694. struct sljit_label *label;
  4695. int char1_reg = STR_END;
  4696. int char2_reg;
  4697. int lcc_table;
  4698. int opt_type = 0;
  4699. if (sljit_get_register_index(TMP3) < 0)
  4700. {
  4701. char2_reg = STACK_TOP;
  4702. lcc_table = STACK_LIMIT;
  4703. }
  4704. else
  4705. {
  4706. char2_reg = RETURN_ADDR;
  4707. lcc_table = TMP3;
  4708. }
  4709. if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
  4710. opt_type = 1;
  4711. else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
  4712. opt_type = 2;
  4713. sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  4714. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  4715. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
  4716. if (char2_reg == STACK_TOP)
  4717. {
  4718. OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
  4719. OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
  4720. }
  4721. OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
  4722. if (opt_type == 1)
  4723. {
  4724. label = LABEL();
  4725. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
  4726. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  4727. }
  4728. else if (opt_type == 2)
  4729. {
  4730. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
  4731. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4732. label = LABEL();
  4733. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
  4734. sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  4735. }
  4736. else
  4737. {
  4738. label = LABEL();
  4739. OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
  4740. OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
  4741. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
  4742. }
  4743. #ifndef COMPILE_PCRE8
  4744. jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
  4745. #endif
  4746. OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
  4747. #ifndef COMPILE_PCRE8
  4748. JUMPHERE(jump);
  4749. jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
  4750. #endif
  4751. OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
  4752. #ifndef COMPILE_PCRE8
  4753. JUMPHERE(jump);
  4754. #endif
  4755. if (opt_type == 0)
  4756. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4757. jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
  4758. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
  4759. JUMPTO(SLJIT_NOT_ZERO, label);
  4760. JUMPHERE(jump);
  4761. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  4762. if (opt_type == 2)
  4763. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  4764. if (char2_reg == STACK_TOP)
  4765. {
  4766. OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
  4767. OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
  4768. }
  4769. OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
  4770. sljit_emit_fast_return(compiler, TMP1, 0);
  4771. }
  4772. #if defined SUPPORT_UTF && defined SUPPORT_UCP
  4773. static const pcre_uchar * SLJIT_FUNC do_utf_caselesscmp(pcre_uchar *src1, pcre_uchar *src2, pcre_uchar *end1, pcre_uchar *end2)
  4774. {
  4775. /* This function would be ineffective to do in JIT level. */
  4776. sljit_u32 c1, c2;
  4777. const ucd_record *ur;
  4778. const sljit_u32 *pp;
  4779. while (src1 < end1)
  4780. {
  4781. if (src2 >= end2)
  4782. return (pcre_uchar*)1;
  4783. GETCHARINC(c1, src1);
  4784. GETCHARINC(c2, src2);
  4785. ur = GET_UCD(c2);
  4786. if (c1 != c2 && c1 != c2 + ur->other_case)
  4787. {
  4788. pp = PRIV(ucd_caseless_sets) + ur->caseset;
  4789. for (;;)
  4790. {
  4791. if (c1 < *pp) return NULL;
  4792. if (c1 == *pp++) break;
  4793. }
  4794. }
  4795. }
  4796. return src2;
  4797. }
  4798. #endif /* SUPPORT_UTF && SUPPORT_UCP */
  4799. static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
  4800. compare_context *context, jump_list **backtracks)
  4801. {
  4802. DEFINE_COMPILER;
  4803. unsigned int othercasebit = 0;
  4804. pcre_uchar *othercasechar = NULL;
  4805. #ifdef SUPPORT_UTF
  4806. int utflength;
  4807. #endif
  4808. if (caseless && char_has_othercase(common, cc))
  4809. {
  4810. othercasebit = char_get_othercase_bit(common, cc);
  4811. SLJIT_ASSERT(othercasebit);
  4812. /* Extracting bit difference info. */
  4813. #if defined COMPILE_PCRE8
  4814. othercasechar = cc + (othercasebit >> 8);
  4815. othercasebit &= 0xff;
  4816. #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  4817. /* Note that this code only handles characters in the BMP. If there
  4818. ever are characters outside the BMP whose othercase differs in only one
  4819. bit from itself (there currently are none), this code will need to be
  4820. revised for COMPILE_PCRE32. */
  4821. othercasechar = cc + (othercasebit >> 9);
  4822. if ((othercasebit & 0x100) != 0)
  4823. othercasebit = (othercasebit & 0xff) << 8;
  4824. else
  4825. othercasebit &= 0xff;
  4826. #endif /* COMPILE_PCRE[8|16|32] */
  4827. }
  4828. if (context->sourcereg == -1)
  4829. {
  4830. #if defined COMPILE_PCRE8
  4831. #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
  4832. if (context->length >= 4)
  4833. OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  4834. else if (context->length >= 2)
  4835. OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  4836. else
  4837. #endif
  4838. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  4839. #elif defined COMPILE_PCRE16
  4840. #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
  4841. if (context->length >= 4)
  4842. OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  4843. else
  4844. #endif
  4845. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  4846. #elif defined COMPILE_PCRE32
  4847. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
  4848. #endif /* COMPILE_PCRE[8|16|32] */
  4849. context->sourcereg = TMP2;
  4850. }
  4851. #ifdef SUPPORT_UTF
  4852. utflength = 1;
  4853. if (common->utf && HAS_EXTRALEN(*cc))
  4854. utflength += GET_EXTRALEN(*cc);
  4855. do
  4856. {
  4857. #endif
  4858. context->length -= IN_UCHARS(1);
  4859. #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
  4860. /* Unaligned read is supported. */
  4861. if (othercasebit != 0 && othercasechar == cc)
  4862. {
  4863. context->c.asuchars[context->ucharptr] = *cc | othercasebit;
  4864. context->oc.asuchars[context->ucharptr] = othercasebit;
  4865. }
  4866. else
  4867. {
  4868. context->c.asuchars[context->ucharptr] = *cc;
  4869. context->oc.asuchars[context->ucharptr] = 0;
  4870. }
  4871. context->ucharptr++;
  4872. #if defined COMPILE_PCRE8
  4873. if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
  4874. #else
  4875. if (context->ucharptr >= 2 || context->length == 0)
  4876. #endif
  4877. {
  4878. if (context->length >= 4)
  4879. OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
  4880. else if (context->length >= 2)
  4881. OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
  4882. #if defined COMPILE_PCRE8
  4883. else if (context->length >= 1)
  4884. OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
  4885. #endif /* COMPILE_PCRE8 */
  4886. context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
  4887. switch(context->ucharptr)
  4888. {
  4889. case 4 / sizeof(pcre_uchar):
  4890. if (context->oc.asint != 0)
  4891. OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
  4892. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
  4893. break;
  4894. case 2 / sizeof(pcre_uchar):
  4895. if (context->oc.asushort != 0)
  4896. OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
  4897. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
  4898. break;
  4899. #ifdef COMPILE_PCRE8
  4900. case 1:
  4901. if (context->oc.asbyte != 0)
  4902. OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
  4903. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
  4904. break;
  4905. #endif
  4906. default:
  4907. SLJIT_UNREACHABLE();
  4908. break;
  4909. }
  4910. context->ucharptr = 0;
  4911. }
  4912. #else
  4913. /* Unaligned read is unsupported or in 32 bit mode. */
  4914. if (context->length >= 1)
  4915. OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
  4916. context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
  4917. if (othercasebit != 0 && othercasechar == cc)
  4918. {
  4919. OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
  4920. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
  4921. }
  4922. else
  4923. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
  4924. #endif
  4925. cc++;
  4926. #ifdef SUPPORT_UTF
  4927. utflength--;
  4928. }
  4929. while (utflength > 0);
  4930. #endif
  4931. return cc;
  4932. }
  4933. #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
  4934. #define SET_TYPE_OFFSET(value) \
  4935. if ((value) != typeoffset) \
  4936. { \
  4937. if ((value) < typeoffset) \
  4938. OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
  4939. else \
  4940. OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
  4941. } \
  4942. typeoffset = (value);
  4943. #define SET_CHAR_OFFSET(value) \
  4944. if ((value) != charoffset) \
  4945. { \
  4946. if ((value) < charoffset) \
  4947. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
  4948. else \
  4949. OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
  4950. } \
  4951. charoffset = (value);
  4952. static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
  4953. static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
  4954. {
  4955. DEFINE_COMPILER;
  4956. jump_list *found = NULL;
  4957. jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
  4958. sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
  4959. struct sljit_jump *jump = NULL;
  4960. pcre_uchar *ccbegin;
  4961. int compares, invertcmp, numberofcmps;
  4962. #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
  4963. BOOL utf = common->utf;
  4964. #endif
  4965. #ifdef SUPPORT_UCP
  4966. BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
  4967. BOOL charsaved = FALSE;
  4968. int typereg = TMP1;
  4969. const sljit_u32 *other_cases;
  4970. sljit_uw typeoffset;
  4971. #endif
  4972. /* Scanning the necessary info. */
  4973. cc++;
  4974. ccbegin = cc;
  4975. compares = 0;
  4976. if (cc[-1] & XCL_MAP)
  4977. {
  4978. min = 0;
  4979. cc += 32 / sizeof(pcre_uchar);
  4980. }
  4981. while (*cc != XCL_END)
  4982. {
  4983. compares++;
  4984. if (*cc == XCL_SINGLE)
  4985. {
  4986. cc ++;
  4987. GETCHARINCTEST(c, cc);
  4988. if (c > max) max = c;
  4989. if (c < min) min = c;
  4990. #ifdef SUPPORT_UCP
  4991. needschar = TRUE;
  4992. #endif
  4993. }
  4994. else if (*cc == XCL_RANGE)
  4995. {
  4996. cc ++;
  4997. GETCHARINCTEST(c, cc);
  4998. if (c < min) min = c;
  4999. GETCHARINCTEST(c, cc);
  5000. if (c > max) max = c;
  5001. #ifdef SUPPORT_UCP
  5002. needschar = TRUE;
  5003. #endif
  5004. }
  5005. #ifdef SUPPORT_UCP
  5006. else
  5007. {
  5008. SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
  5009. cc++;
  5010. if (*cc == PT_CLIST)
  5011. {
  5012. other_cases = PRIV(ucd_caseless_sets) + cc[1];
  5013. while (*other_cases != NOTACHAR)
  5014. {
  5015. if (*other_cases > max) max = *other_cases;
  5016. if (*other_cases < min) min = *other_cases;
  5017. other_cases++;
  5018. }
  5019. }
  5020. else
  5021. {
  5022. max = READ_CHAR_MAX;
  5023. min = 0;
  5024. }
  5025. switch(*cc)
  5026. {
  5027. case PT_ANY:
  5028. /* Any either accepts everything or ignored. */
  5029. if (cc[-1] == XCL_PROP)
  5030. {
  5031. compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
  5032. if (list == backtracks)
  5033. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  5034. return;
  5035. }
  5036. break;
  5037. case PT_LAMP:
  5038. case PT_GC:
  5039. case PT_PC:
  5040. case PT_ALNUM:
  5041. needstype = TRUE;
  5042. break;
  5043. case PT_SC:
  5044. needsscript = TRUE;
  5045. break;
  5046. case PT_SPACE:
  5047. case PT_PXSPACE:
  5048. case PT_WORD:
  5049. case PT_PXGRAPH:
  5050. case PT_PXPRINT:
  5051. case PT_PXPUNCT:
  5052. needstype = TRUE;
  5053. needschar = TRUE;
  5054. break;
  5055. case PT_CLIST:
  5056. case PT_UCNC:
  5057. needschar = TRUE;
  5058. break;
  5059. default:
  5060. SLJIT_UNREACHABLE();
  5061. break;
  5062. }
  5063. cc += 2;
  5064. }
  5065. #endif
  5066. }
  5067. SLJIT_ASSERT(compares > 0);
  5068. /* We are not necessary in utf mode even in 8 bit mode. */
  5069. cc = ccbegin;
  5070. read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
  5071. if ((cc[-1] & XCL_HASPROP) == 0)
  5072. {
  5073. if ((cc[-1] & XCL_MAP) != 0)
  5074. {
  5075. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  5076. if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
  5077. {
  5078. OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
  5079. OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
  5080. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
  5081. OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
  5082. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
  5083. add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
  5084. }
  5085. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  5086. JUMPHERE(jump);
  5087. cc += 32 / sizeof(pcre_uchar);
  5088. }
  5089. else
  5090. {
  5091. OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
  5092. add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
  5093. }
  5094. }
  5095. else if ((cc[-1] & XCL_MAP) != 0)
  5096. {
  5097. OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
  5098. #ifdef SUPPORT_UCP
  5099. charsaved = TRUE;
  5100. #endif
  5101. if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
  5102. {
  5103. #ifdef COMPILE_PCRE8
  5104. jump = NULL;
  5105. if (common->utf)
  5106. #endif
  5107. jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  5108. OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
  5109. OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
  5110. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
  5111. OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
  5112. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
  5113. add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
  5114. #ifdef COMPILE_PCRE8
  5115. if (common->utf)
  5116. #endif
  5117. JUMPHERE(jump);
  5118. }
  5119. OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
  5120. cc += 32 / sizeof(pcre_uchar);
  5121. }
  5122. #ifdef SUPPORT_UCP
  5123. if (needstype || needsscript)
  5124. {
  5125. if (needschar && !charsaved)
  5126. OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
  5127. #ifdef COMPILE_PCRE32
  5128. if (!common->utf)
  5129. {
  5130. jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
  5131. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
  5132. JUMPHERE(jump);
  5133. }
  5134. #endif
  5135. OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
  5136. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
  5137. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
  5138. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
  5139. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
  5140. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
  5141. OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
  5142. /* Before anything else, we deal with scripts. */
  5143. if (needsscript)
  5144. {
  5145. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
  5146. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
  5147. ccbegin = cc;
  5148. while (*cc != XCL_END)
  5149. {
  5150. if (*cc == XCL_SINGLE)
  5151. {
  5152. cc ++;
  5153. GETCHARINCTEST(c, cc);
  5154. }
  5155. else if (*cc == XCL_RANGE)
  5156. {
  5157. cc ++;
  5158. GETCHARINCTEST(c, cc);
  5159. GETCHARINCTEST(c, cc);
  5160. }
  5161. else
  5162. {
  5163. SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
  5164. cc++;
  5165. if (*cc == PT_SC)
  5166. {
  5167. compares--;
  5168. invertcmp = (compares == 0 && list != backtracks);
  5169. if (cc[-1] == XCL_NOTPROP)
  5170. invertcmp ^= 0x1;
  5171. jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
  5172. add_jump(compiler, compares > 0 ? list : backtracks, jump);
  5173. }
  5174. cc += 2;
  5175. }
  5176. }
  5177. cc = ccbegin;
  5178. }
  5179. if (needschar)
  5180. {
  5181. OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
  5182. }
  5183. if (needstype)
  5184. {
  5185. if (!needschar)
  5186. {
  5187. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
  5188. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
  5189. }
  5190. else
  5191. {
  5192. OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
  5193. OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
  5194. typereg = RETURN_ADDR;
  5195. }
  5196. }
  5197. }
  5198. #endif
  5199. /* Generating code. */
  5200. charoffset = 0;
  5201. numberofcmps = 0;
  5202. #ifdef SUPPORT_UCP
  5203. typeoffset = 0;
  5204. #endif
  5205. while (*cc != XCL_END)
  5206. {
  5207. compares--;
  5208. invertcmp = (compares == 0 && list != backtracks);
  5209. jump = NULL;
  5210. if (*cc == XCL_SINGLE)
  5211. {
  5212. cc ++;
  5213. GETCHARINCTEST(c, cc);
  5214. if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
  5215. {
  5216. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  5217. OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  5218. numberofcmps++;
  5219. }
  5220. else if (numberofcmps > 0)
  5221. {
  5222. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  5223. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
  5224. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  5225. numberofcmps = 0;
  5226. }
  5227. else
  5228. {
  5229. jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  5230. numberofcmps = 0;
  5231. }
  5232. }
  5233. else if (*cc == XCL_RANGE)
  5234. {
  5235. cc ++;
  5236. GETCHARINCTEST(c, cc);
  5237. SET_CHAR_OFFSET(c);
  5238. GETCHARINCTEST(c, cc);
  5239. if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
  5240. {
  5241. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  5242. OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  5243. numberofcmps++;
  5244. }
  5245. else if (numberofcmps > 0)
  5246. {
  5247. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  5248. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
  5249. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  5250. numberofcmps = 0;
  5251. }
  5252. else
  5253. {
  5254. jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
  5255. numberofcmps = 0;
  5256. }
  5257. }
  5258. #ifdef SUPPORT_UCP
  5259. else
  5260. {
  5261. SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
  5262. if (*cc == XCL_NOTPROP)
  5263. invertcmp ^= 0x1;
  5264. cc++;
  5265. switch(*cc)
  5266. {
  5267. case PT_ANY:
  5268. if (!invertcmp)
  5269. jump = JUMP(SLJIT_JUMP);
  5270. break;
  5271. case PT_LAMP:
  5272. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
  5273. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  5274. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
  5275. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  5276. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
  5277. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
  5278. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  5279. break;
  5280. case PT_GC:
  5281. c = PRIV(ucp_typerange)[(int)cc[1] * 2];
  5282. SET_TYPE_OFFSET(c);
  5283. jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
  5284. break;
  5285. case PT_PC:
  5286. jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
  5287. break;
  5288. case PT_SC:
  5289. compares++;
  5290. /* Do nothing. */
  5291. break;
  5292. case PT_SPACE:
  5293. case PT_PXSPACE:
  5294. SET_CHAR_OFFSET(9);
  5295. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
  5296. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  5297. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
  5298. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  5299. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
  5300. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  5301. SET_TYPE_OFFSET(ucp_Zl);
  5302. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
  5303. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
  5304. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  5305. break;
  5306. case PT_WORD:
  5307. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
  5308. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  5309. /* Fall through. */
  5310. case PT_ALNUM:
  5311. SET_TYPE_OFFSET(ucp_Ll);
  5312. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
  5313. OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  5314. SET_TYPE_OFFSET(ucp_Nd);
  5315. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
  5316. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
  5317. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  5318. break;
  5319. case PT_CLIST:
  5320. other_cases = PRIV(ucd_caseless_sets) + cc[1];
  5321. /* At least three characters are required.
  5322. Otherwise this case would be handled by the normal code path. */
  5323. SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
  5324. SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
  5325. /* Optimizing character pairs, if their difference is power of 2. */
  5326. if (is_powerof2(other_cases[1] ^ other_cases[0]))
  5327. {
  5328. if (charoffset == 0)
  5329. OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
  5330. else
  5331. {
  5332. OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
  5333. OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
  5334. }
  5335. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
  5336. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  5337. other_cases += 2;
  5338. }
  5339. else if (is_powerof2(other_cases[2] ^ other_cases[1]))
  5340. {
  5341. if (charoffset == 0)
  5342. OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
  5343. else
  5344. {
  5345. OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
  5346. OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
  5347. }
  5348. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
  5349. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  5350. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
  5351. OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
  5352. other_cases += 3;
  5353. }
  5354. else
  5355. {
  5356. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
  5357. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  5358. }
  5359. while (*other_cases != NOTACHAR)
  5360. {
  5361. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
  5362. OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
  5363. }
  5364. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  5365. break;
  5366. case PT_UCNC:
  5367. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
  5368. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
  5369. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
  5370. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  5371. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
  5372. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  5373. SET_CHAR_OFFSET(0xa0);
  5374. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
  5375. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
  5376. SET_CHAR_OFFSET(0);
  5377. OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
  5378. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
  5379. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  5380. break;
  5381. case PT_PXGRAPH:
  5382. /* C and Z groups are the farthest two groups. */
  5383. SET_TYPE_OFFSET(ucp_Ll);
  5384. OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
  5385. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
  5386. jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
  5387. /* In case of ucp_Cf, we overwrite the result. */
  5388. SET_CHAR_OFFSET(0x2066);
  5389. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
  5390. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  5391. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
  5392. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  5393. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
  5394. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  5395. JUMPHERE(jump);
  5396. jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
  5397. break;
  5398. case PT_PXPRINT:
  5399. /* C and Z groups are the farthest two groups. */
  5400. SET_TYPE_OFFSET(ucp_Ll);
  5401. OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
  5402. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
  5403. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
  5404. OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
  5405. jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
  5406. /* In case of ucp_Cf, we overwrite the result. */
  5407. SET_CHAR_OFFSET(0x2066);
  5408. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
  5409. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  5410. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
  5411. OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
  5412. JUMPHERE(jump);
  5413. jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
  5414. break;
  5415. case PT_PXPUNCT:
  5416. SET_TYPE_OFFSET(ucp_Sc);
  5417. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
  5418. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
  5419. SET_CHAR_OFFSET(0);
  5420. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
  5421. OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
  5422. SET_TYPE_OFFSET(ucp_Pc);
  5423. OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
  5424. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
  5425. jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
  5426. break;
  5427. default:
  5428. SLJIT_UNREACHABLE();
  5429. break;
  5430. }
  5431. cc += 2;
  5432. }
  5433. #endif
  5434. if (jump != NULL)
  5435. add_jump(compiler, compares > 0 ? list : backtracks, jump);
  5436. }
  5437. if (found != NULL)
  5438. set_jumps(found, LABEL());
  5439. }
  5440. #undef SET_TYPE_OFFSET
  5441. #undef SET_CHAR_OFFSET
  5442. #endif
  5443. static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
  5444. {
  5445. DEFINE_COMPILER;
  5446. int length;
  5447. struct sljit_jump *jump[4];
  5448. #ifdef SUPPORT_UTF
  5449. struct sljit_label *label;
  5450. #endif /* SUPPORT_UTF */
  5451. switch(type)
  5452. {
  5453. case OP_SOD:
  5454. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  5455. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
  5456. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
  5457. return cc;
  5458. case OP_SOM:
  5459. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  5460. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  5461. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
  5462. return cc;
  5463. case OP_NOT_WORD_BOUNDARY:
  5464. case OP_WORD_BOUNDARY:
  5465. add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
  5466. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  5467. add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
  5468. return cc;
  5469. case OP_EODN:
  5470. /* Requires rather complex checks. */
  5471. jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  5472. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  5473. {
  5474. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  5475. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  5476. if (common->mode == JIT_COMPILE)
  5477. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
  5478. else
  5479. {
  5480. jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
  5481. OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
  5482. OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
  5483. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
  5484. OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
  5485. add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
  5486. check_partial(common, TRUE);
  5487. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  5488. JUMPHERE(jump[1]);
  5489. }
  5490. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  5491. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
  5492. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
  5493. }
  5494. else if (common->nltype == NLTYPE_FIXED)
  5495. {
  5496. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5497. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  5498. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
  5499. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
  5500. }
  5501. else
  5502. {
  5503. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  5504. jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
  5505. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  5506. OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
  5507. jump[2] = JUMP(SLJIT_GREATER);
  5508. add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
  5509. /* Equal. */
  5510. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  5511. jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
  5512. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  5513. JUMPHERE(jump[1]);
  5514. if (common->nltype == NLTYPE_ANYCRLF)
  5515. {
  5516. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5517. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
  5518. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
  5519. }
  5520. else
  5521. {
  5522. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
  5523. read_char_range(common, common->nlmin, common->nlmax, TRUE);
  5524. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
  5525. add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
  5526. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  5527. add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
  5528. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
  5529. }
  5530. JUMPHERE(jump[2]);
  5531. JUMPHERE(jump[3]);
  5532. }
  5533. JUMPHERE(jump[0]);
  5534. check_partial(common, FALSE);
  5535. return cc;
  5536. case OP_EOD:
  5537. add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
  5538. check_partial(common, FALSE);
  5539. return cc;
  5540. case OP_DOLL:
  5541. OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
  5542. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
  5543. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  5544. if (!common->endonly)
  5545. compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
  5546. else
  5547. {
  5548. add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
  5549. check_partial(common, FALSE);
  5550. }
  5551. return cc;
  5552. case OP_DOLLM:
  5553. jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
  5554. OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
  5555. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
  5556. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  5557. check_partial(common, FALSE);
  5558. jump[0] = JUMP(SLJIT_JUMP);
  5559. JUMPHERE(jump[1]);
  5560. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  5561. {
  5562. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  5563. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  5564. if (common->mode == JIT_COMPILE)
  5565. add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
  5566. else
  5567. {
  5568. jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
  5569. /* STR_PTR = STR_END - IN_UCHARS(1) */
  5570. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
  5571. check_partial(common, TRUE);
  5572. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  5573. JUMPHERE(jump[1]);
  5574. }
  5575. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
  5576. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
  5577. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
  5578. }
  5579. else
  5580. {
  5581. peek_char(common, common->nlmax);
  5582. check_newlinechar(common, common->nltype, backtracks, FALSE);
  5583. }
  5584. JUMPHERE(jump[0]);
  5585. return cc;
  5586. case OP_CIRC:
  5587. OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
  5588. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
  5589. add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
  5590. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
  5591. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  5592. return cc;
  5593. case OP_CIRCM:
  5594. OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
  5595. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
  5596. jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
  5597. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
  5598. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  5599. jump[0] = JUMP(SLJIT_JUMP);
  5600. JUMPHERE(jump[1]);
  5601. add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  5602. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  5603. {
  5604. OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
  5605. add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
  5606. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
  5607. OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  5608. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
  5609. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
  5610. }
  5611. else
  5612. {
  5613. skip_char_back(common);
  5614. read_char_range(common, common->nlmin, common->nlmax, TRUE);
  5615. check_newlinechar(common, common->nltype, backtracks, FALSE);
  5616. }
  5617. JUMPHERE(jump[0]);
  5618. return cc;
  5619. case OP_REVERSE:
  5620. length = GET(cc, 0);
  5621. if (length == 0)
  5622. return cc + LINK_SIZE;
  5623. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  5624. #ifdef SUPPORT_UTF
  5625. if (common->utf)
  5626. {
  5627. OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
  5628. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
  5629. label = LABEL();
  5630. add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
  5631. skip_char_back(common);
  5632. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
  5633. JUMPTO(SLJIT_NOT_ZERO, label);
  5634. }
  5635. else
  5636. #endif
  5637. {
  5638. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
  5639. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
  5640. add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
  5641. }
  5642. check_start_used_ptr(common);
  5643. return cc + LINK_SIZE;
  5644. }
  5645. SLJIT_UNREACHABLE();
  5646. return cc;
  5647. }
  5648. static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
  5649. {
  5650. DEFINE_COMPILER;
  5651. int length;
  5652. unsigned int c, oc, bit;
  5653. compare_context context;
  5654. struct sljit_jump *jump[3];
  5655. jump_list *end_list;
  5656. #ifdef SUPPORT_UTF
  5657. struct sljit_label *label;
  5658. #ifdef SUPPORT_UCP
  5659. pcre_uchar propdata[5];
  5660. #endif
  5661. #endif /* SUPPORT_UTF */
  5662. switch(type)
  5663. {
  5664. case OP_NOT_DIGIT:
  5665. case OP_DIGIT:
  5666. /* Digits are usually 0-9, so it is worth to optimize them. */
  5667. if (check_str_ptr)
  5668. detect_partial_match(common, backtracks);
  5669. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  5670. if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
  5671. read_char7_type(common, type == OP_NOT_DIGIT);
  5672. else
  5673. #endif
  5674. read_char8_type(common, type == OP_NOT_DIGIT);
  5675. /* Flip the starting bit in the negative case. */
  5676. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
  5677. add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
  5678. return cc;
  5679. case OP_NOT_WHITESPACE:
  5680. case OP_WHITESPACE:
  5681. if (check_str_ptr)
  5682. detect_partial_match(common, backtracks);
  5683. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  5684. if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
  5685. read_char7_type(common, type == OP_NOT_WHITESPACE);
  5686. else
  5687. #endif
  5688. read_char8_type(common, type == OP_NOT_WHITESPACE);
  5689. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
  5690. add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
  5691. return cc;
  5692. case OP_NOT_WORDCHAR:
  5693. case OP_WORDCHAR:
  5694. if (check_str_ptr)
  5695. detect_partial_match(common, backtracks);
  5696. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  5697. if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
  5698. read_char7_type(common, type == OP_NOT_WORDCHAR);
  5699. else
  5700. #endif
  5701. read_char8_type(common, type == OP_NOT_WORDCHAR);
  5702. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
  5703. add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
  5704. return cc;
  5705. case OP_ANY:
  5706. if (check_str_ptr)
  5707. detect_partial_match(common, backtracks);
  5708. read_char_range(common, common->nlmin, common->nlmax, TRUE);
  5709. if (common->nltype == NLTYPE_FIXED && common->newline > 255)
  5710. {
  5711. jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
  5712. end_list = NULL;
  5713. if (common->mode != JIT_PARTIAL_HARD_COMPILE)
  5714. add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  5715. else
  5716. check_str_end(common, &end_list);
  5717. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  5718. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
  5719. set_jumps(end_list, LABEL());
  5720. JUMPHERE(jump[0]);
  5721. }
  5722. else
  5723. check_newlinechar(common, common->nltype, backtracks, TRUE);
  5724. return cc;
  5725. case OP_ALLANY:
  5726. if (check_str_ptr)
  5727. detect_partial_match(common, backtracks);
  5728. #ifdef SUPPORT_UTF
  5729. if (common->utf)
  5730. {
  5731. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  5732. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5733. #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
  5734. #if defined COMPILE_PCRE8
  5735. jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
  5736. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  5737. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  5738. #elif defined COMPILE_PCRE16
  5739. jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
  5740. OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
  5741. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
  5742. OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
  5743. OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  5744. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  5745. #endif
  5746. JUMPHERE(jump[0]);
  5747. #endif /* COMPILE_PCRE[8|16] */
  5748. return cc;
  5749. }
  5750. #endif
  5751. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5752. return cc;
  5753. case OP_ANYBYTE:
  5754. if (check_str_ptr)
  5755. detect_partial_match(common, backtracks);
  5756. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5757. return cc;
  5758. #ifdef SUPPORT_UTF
  5759. #ifdef SUPPORT_UCP
  5760. case OP_NOTPROP:
  5761. case OP_PROP:
  5762. propdata[0] = XCL_HASPROP;
  5763. propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
  5764. propdata[2] = cc[0];
  5765. propdata[3] = cc[1];
  5766. propdata[4] = XCL_END;
  5767. if (check_str_ptr)
  5768. detect_partial_match(common, backtracks);
  5769. compile_xclass_matchingpath(common, propdata, backtracks);
  5770. return cc + 2;
  5771. #endif
  5772. #endif
  5773. case OP_ANYNL:
  5774. if (check_str_ptr)
  5775. detect_partial_match(common, backtracks);
  5776. read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
  5777. jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
  5778. /* We don't need to handle soft partial matching case. */
  5779. end_list = NULL;
  5780. if (common->mode != JIT_PARTIAL_HARD_COMPILE)
  5781. add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
  5782. else
  5783. check_str_end(common, &end_list);
  5784. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  5785. jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
  5786. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5787. jump[2] = JUMP(SLJIT_JUMP);
  5788. JUMPHERE(jump[0]);
  5789. check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
  5790. set_jumps(end_list, LABEL());
  5791. JUMPHERE(jump[1]);
  5792. JUMPHERE(jump[2]);
  5793. return cc;
  5794. case OP_NOT_HSPACE:
  5795. case OP_HSPACE:
  5796. if (check_str_ptr)
  5797. detect_partial_match(common, backtracks);
  5798. read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
  5799. add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
  5800. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  5801. add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
  5802. return cc;
  5803. case OP_NOT_VSPACE:
  5804. case OP_VSPACE:
  5805. if (check_str_ptr)
  5806. detect_partial_match(common, backtracks);
  5807. read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
  5808. add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
  5809. sljit_set_current_flags(compiler, SLJIT_SET_Z);
  5810. add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
  5811. return cc;
  5812. #ifdef SUPPORT_UCP
  5813. case OP_EXTUNI:
  5814. if (check_str_ptr)
  5815. detect_partial_match(common, backtracks);
  5816. read_char(common);
  5817. add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
  5818. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
  5819. /* Optimize register allocation: use a real register. */
  5820. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
  5821. OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
  5822. label = LABEL();
  5823. jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
  5824. OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
  5825. read_char(common);
  5826. add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
  5827. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
  5828. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
  5829. OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
  5830. OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
  5831. OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
  5832. OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
  5833. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
  5834. JUMPTO(SLJIT_NOT_ZERO, label);
  5835. OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
  5836. JUMPHERE(jump[0]);
  5837. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  5838. if (common->mode == JIT_PARTIAL_HARD_COMPILE)
  5839. {
  5840. jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
  5841. /* Since we successfully read a char above, partial matching must occure. */
  5842. check_partial(common, TRUE);
  5843. JUMPHERE(jump[0]);
  5844. }
  5845. return cc;
  5846. #endif
  5847. case OP_CHAR:
  5848. case OP_CHARI:
  5849. length = 1;
  5850. #ifdef SUPPORT_UTF
  5851. if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
  5852. #endif
  5853. if (common->mode == JIT_COMPILE && check_str_ptr
  5854. && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
  5855. {
  5856. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
  5857. add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
  5858. context.length = IN_UCHARS(length);
  5859. context.sourcereg = -1;
  5860. #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
  5861. context.ucharptr = 0;
  5862. #endif
  5863. return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
  5864. }
  5865. if (check_str_ptr)
  5866. detect_partial_match(common, backtracks);
  5867. #ifdef SUPPORT_UTF
  5868. if (common->utf)
  5869. {
  5870. GETCHAR(c, cc);
  5871. }
  5872. else
  5873. #endif
  5874. c = *cc;
  5875. if (type == OP_CHAR || !char_has_othercase(common, cc))
  5876. {
  5877. read_char_range(common, c, c, FALSE);
  5878. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
  5879. return cc + length;
  5880. }
  5881. oc = char_othercase(common, c);
  5882. read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
  5883. bit = c ^ oc;
  5884. if (is_powerof2(bit))
  5885. {
  5886. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
  5887. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
  5888. return cc + length;
  5889. }
  5890. jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
  5891. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
  5892. JUMPHERE(jump[0]);
  5893. return cc + length;
  5894. case OP_NOT:
  5895. case OP_NOTI:
  5896. if (check_str_ptr)
  5897. detect_partial_match(common, backtracks);
  5898. length = 1;
  5899. #ifdef SUPPORT_UTF
  5900. if (common->utf)
  5901. {
  5902. #ifdef COMPILE_PCRE8
  5903. c = *cc;
  5904. if (c < 128)
  5905. {
  5906. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
  5907. if (type == OP_NOT || !char_has_othercase(common, cc))
  5908. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
  5909. else
  5910. {
  5911. /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
  5912. OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
  5913. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
  5914. }
  5915. /* Skip the variable-length character. */
  5916. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  5917. jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
  5918. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
  5919. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
  5920. JUMPHERE(jump[0]);
  5921. return cc + 1;
  5922. }
  5923. else
  5924. #endif /* COMPILE_PCRE8 */
  5925. {
  5926. GETCHARLEN(c, cc, length);
  5927. }
  5928. }
  5929. else
  5930. #endif /* SUPPORT_UTF */
  5931. c = *cc;
  5932. if (type == OP_NOT || !char_has_othercase(common, cc))
  5933. {
  5934. read_char_range(common, c, c, TRUE);
  5935. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
  5936. }
  5937. else
  5938. {
  5939. oc = char_othercase(common, c);
  5940. read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
  5941. bit = c ^ oc;
  5942. if (is_powerof2(bit))
  5943. {
  5944. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
  5945. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
  5946. }
  5947. else
  5948. {
  5949. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
  5950. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
  5951. }
  5952. }
  5953. return cc + length;
  5954. case OP_CLASS:
  5955. case OP_NCLASS:
  5956. if (check_str_ptr)
  5957. detect_partial_match(common, backtracks);
  5958. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  5959. bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
  5960. read_char_range(common, 0, bit, type == OP_NCLASS);
  5961. #else
  5962. read_char_range(common, 0, 255, type == OP_NCLASS);
  5963. #endif
  5964. if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
  5965. return cc + 32 / sizeof(pcre_uchar);
  5966. #if defined SUPPORT_UTF && defined COMPILE_PCRE8
  5967. jump[0] = NULL;
  5968. if (common->utf)
  5969. {
  5970. jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
  5971. if (type == OP_CLASS)
  5972. {
  5973. add_jump(compiler, backtracks, jump[0]);
  5974. jump[0] = NULL;
  5975. }
  5976. }
  5977. #elif !defined COMPILE_PCRE8
  5978. jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
  5979. if (type == OP_CLASS)
  5980. {
  5981. add_jump(compiler, backtracks, jump[0]);
  5982. jump[0] = NULL;
  5983. }
  5984. #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
  5985. OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
  5986. OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
  5987. OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
  5988. OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
  5989. OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
  5990. add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
  5991. #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
  5992. if (jump[0] != NULL)
  5993. JUMPHERE(jump[0]);
  5994. #endif
  5995. return cc + 32 / sizeof(pcre_uchar);
  5996. #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  5997. case OP_XCLASS:
  5998. if (check_str_ptr)
  5999. detect_partial_match(common, backtracks);
  6000. compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
  6001. return cc + GET(cc, 0) - 1;
  6002. #endif
  6003. }
  6004. SLJIT_UNREACHABLE();
  6005. return cc;
  6006. }
  6007. static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
  6008. {
  6009. /* This function consumes at least one input character. */
  6010. /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
  6011. DEFINE_COMPILER;
  6012. pcre_uchar *ccbegin = cc;
  6013. compare_context context;
  6014. int size;
  6015. context.length = 0;
  6016. do
  6017. {
  6018. if (cc >= ccend)
  6019. break;
  6020. if (*cc == OP_CHAR)
  6021. {
  6022. size = 1;
  6023. #ifdef SUPPORT_UTF
  6024. if (common->utf && HAS_EXTRALEN(cc[1]))
  6025. size += GET_EXTRALEN(cc[1]);
  6026. #endif
  6027. }
  6028. else if (*cc == OP_CHARI)
  6029. {
  6030. size = 1;
  6031. #ifdef SUPPORT_UTF
  6032. if (common->utf)
  6033. {
  6034. if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
  6035. size = 0;
  6036. else if (HAS_EXTRALEN(cc[1]))
  6037. size += GET_EXTRALEN(cc[1]);
  6038. }
  6039. else
  6040. #endif
  6041. if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
  6042. size = 0;
  6043. }
  6044. else
  6045. size = 0;
  6046. cc += 1 + size;
  6047. context.length += IN_UCHARS(size);
  6048. }
  6049. while (size > 0 && context.length <= 128);
  6050. cc = ccbegin;
  6051. if (context.length > 0)
  6052. {
  6053. /* We have a fixed-length byte sequence. */
  6054. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
  6055. add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
  6056. context.sourcereg = -1;
  6057. #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
  6058. context.ucharptr = 0;
  6059. #endif
  6060. do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
  6061. return cc;
  6062. }
  6063. /* A non-fixed length character will be checked if length == 0. */
  6064. return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
  6065. }
  6066. /* Forward definitions. */
  6067. static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
  6068. static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
  6069. #define PUSH_BACKTRACK(size, ccstart, error) \
  6070. do \
  6071. { \
  6072. backtrack = sljit_alloc_memory(compiler, (size)); \
  6073. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
  6074. return error; \
  6075. memset(backtrack, 0, size); \
  6076. backtrack->prev = parent->top; \
  6077. backtrack->cc = (ccstart); \
  6078. parent->top = backtrack; \
  6079. } \
  6080. while (0)
  6081. #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
  6082. do \
  6083. { \
  6084. backtrack = sljit_alloc_memory(compiler, (size)); \
  6085. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
  6086. return; \
  6087. memset(backtrack, 0, size); \
  6088. backtrack->prev = parent->top; \
  6089. backtrack->cc = (ccstart); \
  6090. parent->top = backtrack; \
  6091. } \
  6092. while (0)
  6093. #define BACKTRACK_AS(type) ((type *)backtrack)
  6094. static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
  6095. {
  6096. /* The OVECTOR offset goes to TMP2. */
  6097. DEFINE_COMPILER;
  6098. int count = GET2(cc, 1 + IMM2_SIZE);
  6099. pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
  6100. unsigned int offset;
  6101. jump_list *found = NULL;
  6102. SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
  6103. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
  6104. count--;
  6105. while (count-- > 0)
  6106. {
  6107. offset = GET2(slot, 0) << 1;
  6108. GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
  6109. add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
  6110. slot += common->name_entry_size;
  6111. }
  6112. offset = GET2(slot, 0) << 1;
  6113. GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
  6114. if (backtracks != NULL && !common->jscript_compat)
  6115. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
  6116. set_jumps(found, LABEL());
  6117. }
  6118. static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
  6119. {
  6120. DEFINE_COMPILER;
  6121. BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
  6122. int offset = 0;
  6123. struct sljit_jump *jump = NULL;
  6124. struct sljit_jump *partial;
  6125. struct sljit_jump *nopartial;
  6126. if (ref)
  6127. {
  6128. offset = GET2(cc, 1) << 1;
  6129. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  6130. /* OVECTOR(1) contains the "string begin - 1" constant. */
  6131. if (withchecks && !common->jscript_compat)
  6132. add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
  6133. }
  6134. else
  6135. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
  6136. #if defined SUPPORT_UTF && defined SUPPORT_UCP
  6137. if (common->utf && *cc == OP_REFI)
  6138. {
  6139. SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1);
  6140. if (ref)
  6141. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  6142. else
  6143. OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  6144. if (withchecks)
  6145. jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_R2, 0);
  6146. /* No free saved registers so save data on stack. */
  6147. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
  6148. OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
  6149. OP1(SLJIT_MOV, SLJIT_R3, 0, STR_END, 0);
  6150. sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
  6151. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  6152. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
  6153. if (common->mode == JIT_COMPILE)
  6154. add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
  6155. else
  6156. {
  6157. OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
  6158. add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
  6159. nopartial = JUMP(SLJIT_NOT_EQUAL);
  6160. OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
  6161. check_partial(common, FALSE);
  6162. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  6163. JUMPHERE(nopartial);
  6164. }
  6165. }
  6166. else
  6167. #endif /* SUPPORT_UTF && SUPPORT_UCP */
  6168. {
  6169. if (ref)
  6170. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
  6171. else
  6172. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
  6173. if (withchecks)
  6174. jump = JUMP(SLJIT_ZERO);
  6175. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
  6176. partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
  6177. if (common->mode == JIT_COMPILE)
  6178. add_jump(compiler, backtracks, partial);
  6179. add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
  6180. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  6181. if (common->mode != JIT_COMPILE)
  6182. {
  6183. nopartial = JUMP(SLJIT_JUMP);
  6184. JUMPHERE(partial);
  6185. /* TMP2 -= STR_END - STR_PTR */
  6186. OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
  6187. OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
  6188. partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
  6189. OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
  6190. add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
  6191. add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  6192. JUMPHERE(partial);
  6193. check_partial(common, FALSE);
  6194. add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
  6195. JUMPHERE(nopartial);
  6196. }
  6197. }
  6198. if (jump != NULL)
  6199. {
  6200. if (emptyfail)
  6201. add_jump(compiler, backtracks, jump);
  6202. else
  6203. JUMPHERE(jump);
  6204. }
  6205. }
  6206. static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
  6207. {
  6208. DEFINE_COMPILER;
  6209. BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
  6210. backtrack_common *backtrack;
  6211. pcre_uchar type;
  6212. int offset = 0;
  6213. struct sljit_label *label;
  6214. struct sljit_jump *zerolength;
  6215. struct sljit_jump *jump = NULL;
  6216. pcre_uchar *ccbegin = cc;
  6217. int min = 0, max = 0;
  6218. BOOL minimize;
  6219. PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
  6220. if (ref)
  6221. offset = GET2(cc, 1) << 1;
  6222. else
  6223. cc += IMM2_SIZE;
  6224. type = cc[1 + IMM2_SIZE];
  6225. SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
  6226. minimize = (type & 0x1) != 0;
  6227. switch(type)
  6228. {
  6229. case OP_CRSTAR:
  6230. case OP_CRMINSTAR:
  6231. min = 0;
  6232. max = 0;
  6233. cc += 1 + IMM2_SIZE + 1;
  6234. break;
  6235. case OP_CRPLUS:
  6236. case OP_CRMINPLUS:
  6237. min = 1;
  6238. max = 0;
  6239. cc += 1 + IMM2_SIZE + 1;
  6240. break;
  6241. case OP_CRQUERY:
  6242. case OP_CRMINQUERY:
  6243. min = 0;
  6244. max = 1;
  6245. cc += 1 + IMM2_SIZE + 1;
  6246. break;
  6247. case OP_CRRANGE:
  6248. case OP_CRMINRANGE:
  6249. min = GET2(cc, 1 + IMM2_SIZE + 1);
  6250. max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
  6251. cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
  6252. break;
  6253. default:
  6254. SLJIT_UNREACHABLE();
  6255. break;
  6256. }
  6257. if (!minimize)
  6258. {
  6259. if (min == 0)
  6260. {
  6261. allocate_stack(common, 2);
  6262. if (ref)
  6263. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  6264. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  6265. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
  6266. /* Temporary release of STR_PTR. */
  6267. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  6268. /* Handles both invalid and empty cases. Since the minimum repeat,
  6269. is zero the invalid case is basically the same as an empty case. */
  6270. if (ref)
  6271. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  6272. else
  6273. {
  6274. compile_dnref_search(common, ccbegin, NULL);
  6275. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
  6276. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
  6277. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  6278. }
  6279. /* Restore if not zero length. */
  6280. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  6281. }
  6282. else
  6283. {
  6284. allocate_stack(common, 1);
  6285. if (ref)
  6286. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  6287. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  6288. if (ref)
  6289. {
  6290. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
  6291. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  6292. }
  6293. else
  6294. {
  6295. compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
  6296. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
  6297. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
  6298. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  6299. }
  6300. }
  6301. if (min > 1 || max > 1)
  6302. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
  6303. label = LABEL();
  6304. if (!ref)
  6305. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
  6306. compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
  6307. if (min > 1 || max > 1)
  6308. {
  6309. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
  6310. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  6311. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
  6312. if (min > 1)
  6313. CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
  6314. if (max > 1)
  6315. {
  6316. jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
  6317. allocate_stack(common, 1);
  6318. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  6319. JUMPTO(SLJIT_JUMP, label);
  6320. JUMPHERE(jump);
  6321. }
  6322. }
  6323. if (max == 0)
  6324. {
  6325. /* Includes min > 1 case as well. */
  6326. allocate_stack(common, 1);
  6327. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  6328. JUMPTO(SLJIT_JUMP, label);
  6329. }
  6330. JUMPHERE(zerolength);
  6331. BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
  6332. count_match(common);
  6333. return cc;
  6334. }
  6335. allocate_stack(common, ref ? 2 : 3);
  6336. if (ref)
  6337. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  6338. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  6339. if (type != OP_CRMINSTAR)
  6340. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
  6341. if (min == 0)
  6342. {
  6343. /* Handles both invalid and empty cases. Since the minimum repeat,
  6344. is zero the invalid case is basically the same as an empty case. */
  6345. if (ref)
  6346. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  6347. else
  6348. {
  6349. compile_dnref_search(common, ccbegin, NULL);
  6350. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
  6351. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
  6352. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  6353. }
  6354. /* Length is non-zero, we can match real repeats. */
  6355. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  6356. jump = JUMP(SLJIT_JUMP);
  6357. }
  6358. else
  6359. {
  6360. if (ref)
  6361. {
  6362. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
  6363. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  6364. }
  6365. else
  6366. {
  6367. compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
  6368. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
  6369. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
  6370. zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
  6371. }
  6372. }
  6373. BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
  6374. if (max > 0)
  6375. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
  6376. if (!ref)
  6377. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
  6378. compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
  6379. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  6380. if (min > 1)
  6381. {
  6382. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  6383. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  6384. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
  6385. CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
  6386. }
  6387. else if (max > 0)
  6388. OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
  6389. if (jump != NULL)
  6390. JUMPHERE(jump);
  6391. JUMPHERE(zerolength);
  6392. count_match(common);
  6393. return cc;
  6394. }
  6395. static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
  6396. {
  6397. DEFINE_COMPILER;
  6398. backtrack_common *backtrack;
  6399. recurse_entry *entry = common->entries;
  6400. recurse_entry *prev = NULL;
  6401. sljit_sw start = GET(cc, 1);
  6402. pcre_uchar *start_cc;
  6403. BOOL needs_control_head;
  6404. PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
  6405. /* Inlining simple patterns. */
  6406. if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
  6407. {
  6408. start_cc = common->start + start;
  6409. compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
  6410. BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
  6411. return cc + 1 + LINK_SIZE;
  6412. }
  6413. while (entry != NULL)
  6414. {
  6415. if (entry->start == start)
  6416. break;
  6417. prev = entry;
  6418. entry = entry->next;
  6419. }
  6420. if (entry == NULL)
  6421. {
  6422. entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
  6423. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  6424. return NULL;
  6425. entry->next = NULL;
  6426. entry->entry = NULL;
  6427. entry->calls = NULL;
  6428. entry->start = start;
  6429. if (prev != NULL)
  6430. prev->next = entry;
  6431. else
  6432. common->entries = entry;
  6433. }
  6434. if (common->has_set_som && common->mark_ptr != 0)
  6435. {
  6436. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
  6437. allocate_stack(common, 2);
  6438. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
  6439. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  6440. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
  6441. }
  6442. else if (common->has_set_som || common->mark_ptr != 0)
  6443. {
  6444. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
  6445. allocate_stack(common, 1);
  6446. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  6447. }
  6448. if (entry->entry == NULL)
  6449. add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
  6450. else
  6451. JUMPTO(SLJIT_FAST_CALL, entry->entry);
  6452. /* Leave if the match is failed. */
  6453. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
  6454. return cc + 1 + LINK_SIZE;
  6455. }
  6456. static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
  6457. {
  6458. const pcre_uchar *begin = arguments->begin;
  6459. int *offset_vector = arguments->offsets;
  6460. int offset_count = arguments->offset_count;
  6461. int i;
  6462. if (PUBL(callout) == NULL)
  6463. return 0;
  6464. callout_block->version = 2;
  6465. callout_block->callout_data = arguments->callout_data;
  6466. /* Offsets in subject. */
  6467. callout_block->subject_length = arguments->end - arguments->begin;
  6468. callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
  6469. callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
  6470. #if defined COMPILE_PCRE8
  6471. callout_block->subject = (PCRE_SPTR)begin;
  6472. #elif defined COMPILE_PCRE16
  6473. callout_block->subject = (PCRE_SPTR16)begin;
  6474. #elif defined COMPILE_PCRE32
  6475. callout_block->subject = (PCRE_SPTR32)begin;
  6476. #endif
  6477. /* Convert and copy the JIT offset vector to the offset_vector array. */
  6478. callout_block->capture_top = 0;
  6479. callout_block->offset_vector = offset_vector;
  6480. for (i = 2; i < offset_count; i += 2)
  6481. {
  6482. offset_vector[i] = jit_ovector[i] - begin;
  6483. offset_vector[i + 1] = jit_ovector[i + 1] - begin;
  6484. if (jit_ovector[i] >= begin)
  6485. callout_block->capture_top = i;
  6486. }
  6487. callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
  6488. if (offset_count > 0)
  6489. offset_vector[0] = -1;
  6490. if (offset_count > 1)
  6491. offset_vector[1] = -1;
  6492. return (*PUBL(callout))(callout_block);
  6493. }
  6494. /* Aligning to 8 byte. */
  6495. #define CALLOUT_ARG_SIZE \
  6496. (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
  6497. #define CALLOUT_ARG_OFFSET(arg) \
  6498. SLJIT_OFFSETOF(PUBL(callout_block), arg)
  6499. static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
  6500. {
  6501. DEFINE_COMPILER;
  6502. backtrack_common *backtrack;
  6503. PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
  6504. allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
  6505. SLJIT_ASSERT(common->capture_last_ptr != 0);
  6506. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
  6507. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  6508. OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
  6509. OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
  6510. /* These pointer sized fields temporarly stores internal variables. */
  6511. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
  6512. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
  6513. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
  6514. if (common->mark_ptr != 0)
  6515. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
  6516. OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
  6517. OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
  6518. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
  6519. /* Needed to save important temporary registers. */
  6520. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
  6521. /* SLJIT_R0 = arguments */
  6522. OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
  6523. GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
  6524. sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
  6525. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  6526. free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
  6527. /* Check return value. */
  6528. OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
  6529. add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32));
  6530. if (common->forced_quit_label == NULL)
  6531. add_jump(compiler, &common->forced_quit, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */);
  6532. else
  6533. JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->forced_quit_label);
  6534. return cc + 2 + 2 * LINK_SIZE;
  6535. }
  6536. #undef CALLOUT_ARG_SIZE
  6537. #undef CALLOUT_ARG_OFFSET
  6538. static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc)
  6539. {
  6540. while (TRUE)
  6541. {
  6542. switch (*cc)
  6543. {
  6544. case OP_NOT_WORD_BOUNDARY:
  6545. case OP_WORD_BOUNDARY:
  6546. case OP_CIRC:
  6547. case OP_CIRCM:
  6548. case OP_DOLL:
  6549. case OP_DOLLM:
  6550. case OP_CALLOUT:
  6551. case OP_ALT:
  6552. cc += PRIV(OP_lengths)[*cc];
  6553. break;
  6554. case OP_KET:
  6555. return FALSE;
  6556. default:
  6557. return TRUE;
  6558. }
  6559. }
  6560. }
  6561. static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
  6562. {
  6563. DEFINE_COMPILER;
  6564. int framesize;
  6565. int extrasize;
  6566. BOOL needs_control_head;
  6567. int private_data_ptr;
  6568. backtrack_common altbacktrack;
  6569. pcre_uchar *ccbegin;
  6570. pcre_uchar opcode;
  6571. pcre_uchar bra = OP_BRA;
  6572. jump_list *tmp = NULL;
  6573. jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
  6574. jump_list **found;
  6575. /* Saving previous accept variables. */
  6576. BOOL save_local_exit = common->local_exit;
  6577. BOOL save_positive_assert = common->positive_assert;
  6578. then_trap_backtrack *save_then_trap = common->then_trap;
  6579. struct sljit_label *save_quit_label = common->quit_label;
  6580. struct sljit_label *save_accept_label = common->accept_label;
  6581. jump_list *save_quit = common->quit;
  6582. jump_list *save_positive_assert_quit = common->positive_assert_quit;
  6583. jump_list *save_accept = common->accept;
  6584. struct sljit_jump *jump;
  6585. struct sljit_jump *brajump = NULL;
  6586. /* Assert captures then. */
  6587. common->then_trap = NULL;
  6588. if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
  6589. {
  6590. SLJIT_ASSERT(!conditional);
  6591. bra = *cc;
  6592. cc++;
  6593. }
  6594. private_data_ptr = PRIVATE_DATA(cc);
  6595. SLJIT_ASSERT(private_data_ptr != 0);
  6596. framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
  6597. backtrack->framesize = framesize;
  6598. backtrack->private_data_ptr = private_data_ptr;
  6599. opcode = *cc;
  6600. SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
  6601. found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
  6602. ccbegin = cc;
  6603. cc += GET(cc, 1);
  6604. if (bra == OP_BRAMINZERO)
  6605. {
  6606. /* This is a braminzero backtrack path. */
  6607. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  6608. free_stack(common, 1);
  6609. brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  6610. }
  6611. if (framesize < 0)
  6612. {
  6613. extrasize = 1;
  6614. if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
  6615. extrasize = 0;
  6616. if (needs_control_head)
  6617. extrasize++;
  6618. if (framesize == no_frame)
  6619. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
  6620. if (extrasize > 0)
  6621. allocate_stack(common, extrasize);
  6622. if (needs_control_head)
  6623. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  6624. if (extrasize > 0)
  6625. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  6626. if (needs_control_head)
  6627. {
  6628. SLJIT_ASSERT(extrasize == 2);
  6629. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
  6630. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
  6631. }
  6632. }
  6633. else
  6634. {
  6635. extrasize = needs_control_head ? 3 : 2;
  6636. allocate_stack(common, framesize + extrasize);
  6637. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  6638. OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
  6639. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
  6640. if (needs_control_head)
  6641. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  6642. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  6643. if (needs_control_head)
  6644. {
  6645. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
  6646. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
  6647. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
  6648. }
  6649. else
  6650. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
  6651. init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
  6652. }
  6653. memset(&altbacktrack, 0, sizeof(backtrack_common));
  6654. if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
  6655. {
  6656. /* Negative assert is stronger than positive assert. */
  6657. common->local_exit = TRUE;
  6658. common->quit_label = NULL;
  6659. common->quit = NULL;
  6660. common->positive_assert = FALSE;
  6661. }
  6662. else
  6663. common->positive_assert = TRUE;
  6664. common->positive_assert_quit = NULL;
  6665. while (1)
  6666. {
  6667. common->accept_label = NULL;
  6668. common->accept = NULL;
  6669. altbacktrack.top = NULL;
  6670. altbacktrack.topbacktracks = NULL;
  6671. if (*ccbegin == OP_ALT && extrasize > 0)
  6672. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  6673. altbacktrack.cc = ccbegin;
  6674. compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
  6675. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  6676. {
  6677. if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
  6678. {
  6679. common->local_exit = save_local_exit;
  6680. common->quit_label = save_quit_label;
  6681. common->quit = save_quit;
  6682. }
  6683. common->positive_assert = save_positive_assert;
  6684. common->then_trap = save_then_trap;
  6685. common->accept_label = save_accept_label;
  6686. common->positive_assert_quit = save_positive_assert_quit;
  6687. common->accept = save_accept;
  6688. return NULL;
  6689. }
  6690. common->accept_label = LABEL();
  6691. if (common->accept != NULL)
  6692. set_jumps(common->accept, common->accept_label);
  6693. /* Reset stack. */
  6694. if (framesize < 0)
  6695. {
  6696. if (framesize == no_frame)
  6697. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  6698. else if (extrasize > 0)
  6699. free_stack(common, extrasize);
  6700. if (needs_control_head)
  6701. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
  6702. }
  6703. else
  6704. {
  6705. if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
  6706. {
  6707. /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
  6708. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
  6709. if (needs_control_head)
  6710. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
  6711. }
  6712. else
  6713. {
  6714. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  6715. if (needs_control_head)
  6716. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
  6717. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  6718. }
  6719. }
  6720. if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
  6721. {
  6722. /* We know that STR_PTR was stored on the top of the stack. */
  6723. if (conditional)
  6724. {
  6725. if (extrasize > 0)
  6726. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
  6727. }
  6728. else if (bra == OP_BRAZERO)
  6729. {
  6730. if (framesize < 0)
  6731. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
  6732. else
  6733. {
  6734. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
  6735. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
  6736. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
  6737. }
  6738. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  6739. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  6740. }
  6741. else if (framesize >= 0)
  6742. {
  6743. /* For OP_BRA and OP_BRAMINZERO. */
  6744. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
  6745. }
  6746. }
  6747. add_jump(compiler, found, JUMP(SLJIT_JUMP));
  6748. compile_backtrackingpath(common, altbacktrack.top);
  6749. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  6750. {
  6751. if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
  6752. {
  6753. common->local_exit = save_local_exit;
  6754. common->quit_label = save_quit_label;
  6755. common->quit = save_quit;
  6756. }
  6757. common->positive_assert = save_positive_assert;
  6758. common->then_trap = save_then_trap;
  6759. common->accept_label = save_accept_label;
  6760. common->positive_assert_quit = save_positive_assert_quit;
  6761. common->accept = save_accept;
  6762. return NULL;
  6763. }
  6764. set_jumps(altbacktrack.topbacktracks, LABEL());
  6765. if (*cc != OP_ALT)
  6766. break;
  6767. ccbegin = cc;
  6768. cc += GET(cc, 1);
  6769. }
  6770. if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
  6771. {
  6772. SLJIT_ASSERT(common->positive_assert_quit == NULL);
  6773. /* Makes the check less complicated below. */
  6774. common->positive_assert_quit = common->quit;
  6775. }
  6776. /* None of them matched. */
  6777. if (common->positive_assert_quit != NULL)
  6778. {
  6779. jump = JUMP(SLJIT_JUMP);
  6780. set_jumps(common->positive_assert_quit, LABEL());
  6781. SLJIT_ASSERT(framesize != no_stack);
  6782. if (framesize < 0)
  6783. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
  6784. else
  6785. {
  6786. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  6787. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  6788. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
  6789. }
  6790. JUMPHERE(jump);
  6791. }
  6792. if (needs_control_head)
  6793. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
  6794. if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
  6795. {
  6796. /* Assert is failed. */
  6797. if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
  6798. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  6799. if (framesize < 0)
  6800. {
  6801. /* The topmost item should be 0. */
  6802. if (bra == OP_BRAZERO)
  6803. {
  6804. if (extrasize == 2)
  6805. free_stack(common, 1);
  6806. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  6807. }
  6808. else if (extrasize > 0)
  6809. free_stack(common, extrasize);
  6810. }
  6811. else
  6812. {
  6813. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
  6814. /* The topmost item should be 0. */
  6815. if (bra == OP_BRAZERO)
  6816. {
  6817. free_stack(common, framesize + extrasize - 1);
  6818. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  6819. }
  6820. else
  6821. free_stack(common, framesize + extrasize);
  6822. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
  6823. }
  6824. jump = JUMP(SLJIT_JUMP);
  6825. if (bra != OP_BRAZERO)
  6826. add_jump(compiler, target, jump);
  6827. /* Assert is successful. */
  6828. set_jumps(tmp, LABEL());
  6829. if (framesize < 0)
  6830. {
  6831. /* We know that STR_PTR was stored on the top of the stack. */
  6832. if (extrasize > 0)
  6833. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
  6834. /* Keep the STR_PTR on the top of the stack. */
  6835. if (bra == OP_BRAZERO)
  6836. {
  6837. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  6838. if (extrasize == 2)
  6839. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  6840. }
  6841. else if (bra == OP_BRAMINZERO)
  6842. {
  6843. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  6844. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  6845. }
  6846. }
  6847. else
  6848. {
  6849. if (bra == OP_BRA)
  6850. {
  6851. /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
  6852. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
  6853. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
  6854. }
  6855. else
  6856. {
  6857. /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
  6858. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
  6859. if (extrasize == 2)
  6860. {
  6861. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  6862. if (bra == OP_BRAMINZERO)
  6863. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  6864. }
  6865. else
  6866. {
  6867. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
  6868. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
  6869. }
  6870. }
  6871. }
  6872. if (bra == OP_BRAZERO)
  6873. {
  6874. backtrack->matchingpath = LABEL();
  6875. SET_LABEL(jump, backtrack->matchingpath);
  6876. }
  6877. else if (bra == OP_BRAMINZERO)
  6878. {
  6879. JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
  6880. JUMPHERE(brajump);
  6881. if (framesize >= 0)
  6882. {
  6883. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  6884. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  6885. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
  6886. }
  6887. set_jumps(backtrack->common.topbacktracks, LABEL());
  6888. }
  6889. }
  6890. else
  6891. {
  6892. /* AssertNot is successful. */
  6893. if (framesize < 0)
  6894. {
  6895. if (extrasize > 0)
  6896. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  6897. if (bra != OP_BRA)
  6898. {
  6899. if (extrasize == 2)
  6900. free_stack(common, 1);
  6901. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  6902. }
  6903. else if (extrasize > 0)
  6904. free_stack(common, extrasize);
  6905. }
  6906. else
  6907. {
  6908. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  6909. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
  6910. /* The topmost item should be 0. */
  6911. if (bra != OP_BRA)
  6912. {
  6913. free_stack(common, framesize + extrasize - 1);
  6914. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  6915. }
  6916. else
  6917. free_stack(common, framesize + extrasize);
  6918. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
  6919. }
  6920. if (bra == OP_BRAZERO)
  6921. backtrack->matchingpath = LABEL();
  6922. else if (bra == OP_BRAMINZERO)
  6923. {
  6924. JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
  6925. JUMPHERE(brajump);
  6926. }
  6927. if (bra != OP_BRA)
  6928. {
  6929. SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
  6930. set_jumps(backtrack->common.topbacktracks, LABEL());
  6931. backtrack->common.topbacktracks = NULL;
  6932. }
  6933. }
  6934. if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
  6935. {
  6936. common->local_exit = save_local_exit;
  6937. common->quit_label = save_quit_label;
  6938. common->quit = save_quit;
  6939. }
  6940. common->positive_assert = save_positive_assert;
  6941. common->then_trap = save_then_trap;
  6942. common->accept_label = save_accept_label;
  6943. common->positive_assert_quit = save_positive_assert_quit;
  6944. common->accept = save_accept;
  6945. return cc + 1 + LINK_SIZE;
  6946. }
  6947. static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
  6948. {
  6949. DEFINE_COMPILER;
  6950. int stacksize;
  6951. if (framesize < 0)
  6952. {
  6953. if (framesize == no_frame)
  6954. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  6955. else
  6956. {
  6957. stacksize = needs_control_head ? 1 : 0;
  6958. if (ket != OP_KET || has_alternatives)
  6959. stacksize++;
  6960. if (stacksize > 0)
  6961. free_stack(common, stacksize);
  6962. }
  6963. if (needs_control_head)
  6964. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
  6965. /* TMP2 which is set here used by OP_KETRMAX below. */
  6966. if (ket == OP_KETRMAX)
  6967. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
  6968. else if (ket == OP_KETRMIN)
  6969. {
  6970. /* Move the STR_PTR to the private_data_ptr. */
  6971. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
  6972. }
  6973. }
  6974. else
  6975. {
  6976. stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
  6977. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
  6978. if (needs_control_head)
  6979. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
  6980. if (ket == OP_KETRMAX)
  6981. {
  6982. /* TMP2 which is set here used by OP_KETRMAX below. */
  6983. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  6984. }
  6985. }
  6986. if (needs_control_head)
  6987. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
  6988. }
  6989. static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
  6990. {
  6991. DEFINE_COMPILER;
  6992. if (common->capture_last_ptr != 0)
  6993. {
  6994. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
  6995. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
  6996. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
  6997. stacksize++;
  6998. }
  6999. if (common->optimized_cbracket[offset >> 1] == 0)
  7000. {
  7001. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  7002. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  7003. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
  7004. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7005. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
  7006. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  7007. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  7008. stacksize += 2;
  7009. }
  7010. return stacksize;
  7011. }
  7012. /*
  7013. Handling bracketed expressions is probably the most complex part.
  7014. Stack layout naming characters:
  7015. S - Push the current STR_PTR
  7016. 0 - Push a 0 (NULL)
  7017. A - Push the current STR_PTR. Needed for restoring the STR_PTR
  7018. before the next alternative. Not pushed if there are no alternatives.
  7019. M - Any values pushed by the current alternative. Can be empty, or anything.
  7020. C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
  7021. L - Push the previous local (pointed by localptr) to the stack
  7022. () - opional values stored on the stack
  7023. ()* - optonal, can be stored multiple times
  7024. The following list shows the regular expression templates, their PCRE byte codes
  7025. and stack layout supported by pcre-sljit.
  7026. (?:) OP_BRA | OP_KET A M
  7027. () OP_CBRA | OP_KET C M
  7028. (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
  7029. OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
  7030. (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
  7031. OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
  7032. ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
  7033. OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
  7034. ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
  7035. OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
  7036. (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
  7037. (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
  7038. ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
  7039. ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
  7040. (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
  7041. OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
  7042. (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
  7043. OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
  7044. ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
  7045. OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
  7046. ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
  7047. OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
  7048. Stack layout naming characters:
  7049. A - Push the alternative index (starting from 0) on the stack.
  7050. Not pushed if there is no alternatives.
  7051. M - Any values pushed by the current alternative. Can be empty, or anything.
  7052. The next list shows the possible content of a bracket:
  7053. (|) OP_*BRA | OP_ALT ... M A
  7054. (?()|) OP_*COND | OP_ALT M A
  7055. (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
  7056. (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
  7057. Or nothing, if trace is unnecessary
  7058. */
  7059. static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
  7060. {
  7061. DEFINE_COMPILER;
  7062. backtrack_common *backtrack;
  7063. pcre_uchar opcode;
  7064. int private_data_ptr = 0;
  7065. int offset = 0;
  7066. int i, stacksize;
  7067. int repeat_ptr = 0, repeat_length = 0;
  7068. int repeat_type = 0, repeat_count = 0;
  7069. pcre_uchar *ccbegin;
  7070. pcre_uchar *matchingpath;
  7071. pcre_uchar *slot;
  7072. pcre_uchar bra = OP_BRA;
  7073. pcre_uchar ket;
  7074. assert_backtrack *assert;
  7075. BOOL has_alternatives;
  7076. BOOL needs_control_head = FALSE;
  7077. struct sljit_jump *jump;
  7078. struct sljit_jump *skip;
  7079. struct sljit_label *rmax_label = NULL;
  7080. struct sljit_jump *braminzero = NULL;
  7081. PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
  7082. if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
  7083. {
  7084. bra = *cc;
  7085. cc++;
  7086. opcode = *cc;
  7087. }
  7088. opcode = *cc;
  7089. ccbegin = cc;
  7090. matchingpath = bracketend(cc) - 1 - LINK_SIZE;
  7091. ket = *matchingpath;
  7092. if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
  7093. {
  7094. repeat_ptr = PRIVATE_DATA(matchingpath);
  7095. repeat_length = PRIVATE_DATA(matchingpath + 1);
  7096. repeat_type = PRIVATE_DATA(matchingpath + 2);
  7097. repeat_count = PRIVATE_DATA(matchingpath + 3);
  7098. SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
  7099. if (repeat_type == OP_UPTO)
  7100. ket = OP_KETRMAX;
  7101. if (repeat_type == OP_MINUPTO)
  7102. ket = OP_KETRMIN;
  7103. }
  7104. if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
  7105. {
  7106. /* Drop this bracket_backtrack. */
  7107. parent->top = backtrack->prev;
  7108. return matchingpath + 1 + LINK_SIZE + repeat_length;
  7109. }
  7110. matchingpath = ccbegin + 1 + LINK_SIZE;
  7111. SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
  7112. SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
  7113. cc += GET(cc, 1);
  7114. has_alternatives = *cc == OP_ALT;
  7115. if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
  7116. has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE;
  7117. if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
  7118. opcode = OP_SCOND;
  7119. if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
  7120. opcode = OP_ONCE;
  7121. if (opcode == OP_CBRA || opcode == OP_SCBRA)
  7122. {
  7123. /* Capturing brackets has a pre-allocated space. */
  7124. offset = GET2(ccbegin, 1 + LINK_SIZE);
  7125. if (common->optimized_cbracket[offset] == 0)
  7126. {
  7127. private_data_ptr = OVECTOR_PRIV(offset);
  7128. offset <<= 1;
  7129. }
  7130. else
  7131. {
  7132. offset <<= 1;
  7133. private_data_ptr = OVECTOR(offset);
  7134. }
  7135. BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
  7136. matchingpath += IMM2_SIZE;
  7137. }
  7138. else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
  7139. {
  7140. /* Other brackets simply allocate the next entry. */
  7141. private_data_ptr = PRIVATE_DATA(ccbegin);
  7142. SLJIT_ASSERT(private_data_ptr != 0);
  7143. BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
  7144. if (opcode == OP_ONCE)
  7145. BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
  7146. }
  7147. /* Instructions before the first alternative. */
  7148. stacksize = 0;
  7149. if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
  7150. stacksize++;
  7151. if (bra == OP_BRAZERO)
  7152. stacksize++;
  7153. if (stacksize > 0)
  7154. allocate_stack(common, stacksize);
  7155. stacksize = 0;
  7156. if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
  7157. {
  7158. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
  7159. stacksize++;
  7160. }
  7161. if (bra == OP_BRAZERO)
  7162. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  7163. if (bra == OP_BRAMINZERO)
  7164. {
  7165. /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
  7166. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  7167. if (ket != OP_KETRMIN)
  7168. {
  7169. free_stack(common, 1);
  7170. braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  7171. }
  7172. else
  7173. {
  7174. if (opcode == OP_ONCE || opcode >= OP_SBRA)
  7175. {
  7176. jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  7177. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  7178. /* Nothing stored during the first run. */
  7179. skip = JUMP(SLJIT_JUMP);
  7180. JUMPHERE(jump);
  7181. /* Checking zero-length iteration. */
  7182. if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
  7183. {
  7184. /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
  7185. braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7186. }
  7187. else
  7188. {
  7189. /* Except when the whole stack frame must be saved. */
  7190. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7191. braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
  7192. }
  7193. JUMPHERE(skip);
  7194. }
  7195. else
  7196. {
  7197. jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  7198. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  7199. JUMPHERE(jump);
  7200. }
  7201. }
  7202. }
  7203. if (repeat_type != 0)
  7204. {
  7205. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
  7206. if (repeat_type == OP_EXACT)
  7207. rmax_label = LABEL();
  7208. }
  7209. if (ket == OP_KETRMIN)
  7210. BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
  7211. if (ket == OP_KETRMAX)
  7212. {
  7213. rmax_label = LABEL();
  7214. if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
  7215. BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
  7216. }
  7217. /* Handling capturing brackets and alternatives. */
  7218. if (opcode == OP_ONCE)
  7219. {
  7220. stacksize = 0;
  7221. if (needs_control_head)
  7222. {
  7223. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  7224. stacksize++;
  7225. }
  7226. if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
  7227. {
  7228. /* Neither capturing brackets nor recursions are found in the block. */
  7229. if (ket == OP_KETRMIN)
  7230. {
  7231. stacksize += 2;
  7232. if (!needs_control_head)
  7233. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7234. }
  7235. else
  7236. {
  7237. if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
  7238. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
  7239. if (ket == OP_KETRMAX || has_alternatives)
  7240. stacksize++;
  7241. }
  7242. if (stacksize > 0)
  7243. allocate_stack(common, stacksize);
  7244. stacksize = 0;
  7245. if (needs_control_head)
  7246. {
  7247. stacksize++;
  7248. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  7249. }
  7250. if (ket == OP_KETRMIN)
  7251. {
  7252. if (needs_control_head)
  7253. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7254. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  7255. if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
  7256. OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
  7257. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
  7258. }
  7259. else if (ket == OP_KETRMAX || has_alternatives)
  7260. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  7261. }
  7262. else
  7263. {
  7264. if (ket != OP_KET || has_alternatives)
  7265. stacksize++;
  7266. stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
  7267. allocate_stack(common, stacksize);
  7268. if (needs_control_head)
  7269. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  7270. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7271. OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
  7272. stacksize = needs_control_head ? 1 : 0;
  7273. if (ket != OP_KET || has_alternatives)
  7274. {
  7275. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  7276. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
  7277. stacksize++;
  7278. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
  7279. }
  7280. else
  7281. {
  7282. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
  7283. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
  7284. }
  7285. init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
  7286. }
  7287. }
  7288. else if (opcode == OP_CBRA || opcode == OP_SCBRA)
  7289. {
  7290. /* Saving the previous values. */
  7291. if (common->optimized_cbracket[offset >> 1] != 0)
  7292. {
  7293. SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
  7294. allocate_stack(common, 2);
  7295. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7296. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
  7297. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
  7298. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
  7299. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
  7300. }
  7301. else
  7302. {
  7303. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7304. allocate_stack(common, 1);
  7305. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
  7306. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  7307. }
  7308. }
  7309. else if (opcode == OP_SBRA || opcode == OP_SCOND)
  7310. {
  7311. /* Saving the previous value. */
  7312. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7313. allocate_stack(common, 1);
  7314. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
  7315. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  7316. }
  7317. else if (has_alternatives)
  7318. {
  7319. /* Pushing the starting string pointer. */
  7320. allocate_stack(common, 1);
  7321. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  7322. }
  7323. /* Generating code for the first alternative. */
  7324. if (opcode == OP_COND || opcode == OP_SCOND)
  7325. {
  7326. if (*matchingpath == OP_CREF)
  7327. {
  7328. SLJIT_ASSERT(has_alternatives);
  7329. add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
  7330. CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
  7331. matchingpath += 1 + IMM2_SIZE;
  7332. }
  7333. else if (*matchingpath == OP_DNCREF)
  7334. {
  7335. SLJIT_ASSERT(has_alternatives);
  7336. i = GET2(matchingpath, 1 + IMM2_SIZE);
  7337. slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
  7338. OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
  7339. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
  7340. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
  7341. slot += common->name_entry_size;
  7342. i--;
  7343. while (i-- > 0)
  7344. {
  7345. OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
  7346. OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
  7347. slot += common->name_entry_size;
  7348. }
  7349. OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
  7350. add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
  7351. matchingpath += 1 + 2 * IMM2_SIZE;
  7352. }
  7353. else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL)
  7354. {
  7355. /* Never has other case. */
  7356. BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
  7357. SLJIT_ASSERT(!has_alternatives);
  7358. if (*matchingpath == OP_FAIL)
  7359. stacksize = 0;
  7360. else if (*matchingpath == OP_RREF)
  7361. {
  7362. stacksize = GET2(matchingpath, 1);
  7363. if (common->currententry == NULL)
  7364. stacksize = 0;
  7365. else if (stacksize == RREF_ANY)
  7366. stacksize = 1;
  7367. else if (common->currententry->start == 0)
  7368. stacksize = stacksize == 0;
  7369. else
  7370. stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
  7371. if (stacksize != 0)
  7372. matchingpath += 1 + IMM2_SIZE;
  7373. }
  7374. else
  7375. {
  7376. if (common->currententry == NULL || common->currententry->start == 0)
  7377. stacksize = 0;
  7378. else
  7379. {
  7380. stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
  7381. slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
  7382. i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
  7383. while (stacksize > 0)
  7384. {
  7385. if ((int)GET2(slot, 0) == i)
  7386. break;
  7387. slot += common->name_entry_size;
  7388. stacksize--;
  7389. }
  7390. }
  7391. if (stacksize != 0)
  7392. matchingpath += 1 + 2 * IMM2_SIZE;
  7393. }
  7394. /* The stacksize == 0 is a common "else" case. */
  7395. if (stacksize == 0)
  7396. {
  7397. if (*cc == OP_ALT)
  7398. {
  7399. matchingpath = cc + 1 + LINK_SIZE;
  7400. cc += GET(cc, 1);
  7401. }
  7402. else
  7403. matchingpath = cc;
  7404. }
  7405. }
  7406. else
  7407. {
  7408. SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
  7409. /* Similar code as PUSH_BACKTRACK macro. */
  7410. assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
  7411. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  7412. return NULL;
  7413. memset(assert, 0, sizeof(assert_backtrack));
  7414. assert->common.cc = matchingpath;
  7415. BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
  7416. matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
  7417. }
  7418. }
  7419. compile_matchingpath(common, matchingpath, cc, backtrack);
  7420. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  7421. return NULL;
  7422. if (opcode == OP_ONCE)
  7423. match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
  7424. stacksize = 0;
  7425. if (repeat_type == OP_MINUPTO)
  7426. {
  7427. /* We need to preserve the counter. TMP2 will be used below. */
  7428. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
  7429. stacksize++;
  7430. }
  7431. if (ket != OP_KET || bra != OP_BRA)
  7432. stacksize++;
  7433. if (offset != 0)
  7434. {
  7435. if (common->capture_last_ptr != 0)
  7436. stacksize++;
  7437. if (common->optimized_cbracket[offset >> 1] == 0)
  7438. stacksize += 2;
  7439. }
  7440. if (has_alternatives && opcode != OP_ONCE)
  7441. stacksize++;
  7442. if (stacksize > 0)
  7443. allocate_stack(common, stacksize);
  7444. stacksize = 0;
  7445. if (repeat_type == OP_MINUPTO)
  7446. {
  7447. /* TMP2 was set above. */
  7448. OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
  7449. stacksize++;
  7450. }
  7451. if (ket != OP_KET || bra != OP_BRA)
  7452. {
  7453. if (ket != OP_KET)
  7454. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  7455. else
  7456. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
  7457. stacksize++;
  7458. }
  7459. if (offset != 0)
  7460. stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
  7461. if (has_alternatives)
  7462. {
  7463. if (opcode != OP_ONCE)
  7464. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
  7465. if (ket != OP_KETRMAX)
  7466. BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
  7467. }
  7468. /* Must be after the matchingpath label. */
  7469. if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
  7470. {
  7471. SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
  7472. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  7473. }
  7474. if (ket == OP_KETRMAX)
  7475. {
  7476. if (repeat_type != 0)
  7477. {
  7478. if (has_alternatives)
  7479. BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
  7480. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
  7481. JUMPTO(SLJIT_NOT_ZERO, rmax_label);
  7482. /* Drop STR_PTR for greedy plus quantifier. */
  7483. if (opcode != OP_ONCE)
  7484. free_stack(common, 1);
  7485. }
  7486. else if (opcode == OP_ONCE || opcode >= OP_SBRA)
  7487. {
  7488. if (has_alternatives)
  7489. BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
  7490. /* Checking zero-length iteration. */
  7491. if (opcode != OP_ONCE)
  7492. {
  7493. CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
  7494. /* Drop STR_PTR for greedy plus quantifier. */
  7495. if (bra != OP_BRAZERO)
  7496. free_stack(common, 1);
  7497. }
  7498. else
  7499. /* TMP2 must contain the starting STR_PTR. */
  7500. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
  7501. }
  7502. else
  7503. JUMPTO(SLJIT_JUMP, rmax_label);
  7504. BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
  7505. }
  7506. if (repeat_type == OP_EXACT)
  7507. {
  7508. count_match(common);
  7509. OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
  7510. JUMPTO(SLJIT_NOT_ZERO, rmax_label);
  7511. }
  7512. else if (repeat_type == OP_UPTO)
  7513. {
  7514. /* We need to preserve the counter. */
  7515. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
  7516. allocate_stack(common, 1);
  7517. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  7518. }
  7519. if (bra == OP_BRAZERO)
  7520. BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
  7521. if (bra == OP_BRAMINZERO)
  7522. {
  7523. /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
  7524. JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
  7525. if (braminzero != NULL)
  7526. {
  7527. JUMPHERE(braminzero);
  7528. /* We need to release the end pointer to perform the
  7529. backtrack for the zero-length iteration. When
  7530. framesize is < 0, OP_ONCE will do the release itself. */
  7531. if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
  7532. {
  7533. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7534. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  7535. }
  7536. else if (ket == OP_KETRMIN && opcode != OP_ONCE)
  7537. free_stack(common, 1);
  7538. }
  7539. /* Continue to the normal backtrack. */
  7540. }
  7541. if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
  7542. count_match(common);
  7543. /* Skip the other alternatives. */
  7544. while (*cc == OP_ALT)
  7545. cc += GET(cc, 1);
  7546. cc += 1 + LINK_SIZE;
  7547. if (opcode == OP_ONCE)
  7548. {
  7549. /* We temporarily encode the needs_control_head in the lowest bit.
  7550. Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
  7551. the same value for small signed numbers (including negative numbers). */
  7552. BACKTRACK_AS(bracket_backtrack)->u.framesize = ((unsigned int)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
  7553. }
  7554. return cc + repeat_length;
  7555. }
  7556. static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
  7557. {
  7558. DEFINE_COMPILER;
  7559. backtrack_common *backtrack;
  7560. pcre_uchar opcode;
  7561. int private_data_ptr;
  7562. int cbraprivptr = 0;
  7563. BOOL needs_control_head;
  7564. int framesize;
  7565. int stacksize;
  7566. int offset = 0;
  7567. BOOL zero = FALSE;
  7568. pcre_uchar *ccbegin = NULL;
  7569. int stack; /* Also contains the offset of control head. */
  7570. struct sljit_label *loop = NULL;
  7571. struct jump_list *emptymatch = NULL;
  7572. PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
  7573. if (*cc == OP_BRAPOSZERO)
  7574. {
  7575. zero = TRUE;
  7576. cc++;
  7577. }
  7578. opcode = *cc;
  7579. private_data_ptr = PRIVATE_DATA(cc);
  7580. SLJIT_ASSERT(private_data_ptr != 0);
  7581. BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
  7582. switch(opcode)
  7583. {
  7584. case OP_BRAPOS:
  7585. case OP_SBRAPOS:
  7586. ccbegin = cc + 1 + LINK_SIZE;
  7587. break;
  7588. case OP_CBRAPOS:
  7589. case OP_SCBRAPOS:
  7590. offset = GET2(cc, 1 + LINK_SIZE);
  7591. /* This case cannot be optimized in the same was as
  7592. normal capturing brackets. */
  7593. SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
  7594. cbraprivptr = OVECTOR_PRIV(offset);
  7595. offset <<= 1;
  7596. ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
  7597. break;
  7598. default:
  7599. SLJIT_UNREACHABLE();
  7600. break;
  7601. }
  7602. framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
  7603. BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
  7604. if (framesize < 0)
  7605. {
  7606. if (offset != 0)
  7607. {
  7608. stacksize = 2;
  7609. if (common->capture_last_ptr != 0)
  7610. stacksize++;
  7611. }
  7612. else
  7613. stacksize = 1;
  7614. if (needs_control_head)
  7615. stacksize++;
  7616. if (!zero)
  7617. stacksize++;
  7618. BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
  7619. allocate_stack(common, stacksize);
  7620. if (framesize == no_frame)
  7621. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
  7622. stack = 0;
  7623. if (offset != 0)
  7624. {
  7625. stack = 2;
  7626. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
  7627. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
  7628. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
  7629. if (common->capture_last_ptr != 0)
  7630. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
  7631. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
  7632. if (needs_control_head)
  7633. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  7634. if (common->capture_last_ptr != 0)
  7635. {
  7636. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
  7637. stack = 3;
  7638. }
  7639. }
  7640. else
  7641. {
  7642. if (needs_control_head)
  7643. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  7644. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  7645. stack = 1;
  7646. }
  7647. if (needs_control_head)
  7648. stack++;
  7649. if (!zero)
  7650. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
  7651. if (needs_control_head)
  7652. {
  7653. stack--;
  7654. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
  7655. }
  7656. }
  7657. else
  7658. {
  7659. stacksize = framesize + 1;
  7660. if (!zero)
  7661. stacksize++;
  7662. if (needs_control_head)
  7663. stacksize++;
  7664. if (offset == 0)
  7665. stacksize++;
  7666. BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
  7667. allocate_stack(common, stacksize);
  7668. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7669. if (needs_control_head)
  7670. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  7671. OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
  7672. stack = 0;
  7673. if (!zero)
  7674. {
  7675. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
  7676. stack = 1;
  7677. }
  7678. if (needs_control_head)
  7679. {
  7680. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
  7681. stack++;
  7682. }
  7683. if (offset == 0)
  7684. {
  7685. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
  7686. stack++;
  7687. }
  7688. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
  7689. init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
  7690. stack -= 1 + (offset == 0);
  7691. }
  7692. if (offset != 0)
  7693. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
  7694. loop = LABEL();
  7695. while (*cc != OP_KETRPOS)
  7696. {
  7697. backtrack->top = NULL;
  7698. backtrack->topbacktracks = NULL;
  7699. cc += GET(cc, 1);
  7700. compile_matchingpath(common, ccbegin, cc, backtrack);
  7701. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  7702. return NULL;
  7703. if (framesize < 0)
  7704. {
  7705. if (framesize == no_frame)
  7706. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7707. if (offset != 0)
  7708. {
  7709. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
  7710. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  7711. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
  7712. if (common->capture_last_ptr != 0)
  7713. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
  7714. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  7715. }
  7716. else
  7717. {
  7718. if (opcode == OP_SBRAPOS)
  7719. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  7720. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  7721. }
  7722. /* Even if the match is empty, we need to reset the control head. */
  7723. if (needs_control_head)
  7724. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
  7725. if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
  7726. add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
  7727. if (!zero)
  7728. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
  7729. }
  7730. else
  7731. {
  7732. if (offset != 0)
  7733. {
  7734. OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
  7735. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
  7736. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  7737. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
  7738. if (common->capture_last_ptr != 0)
  7739. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
  7740. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  7741. }
  7742. else
  7743. {
  7744. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7745. OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
  7746. if (opcode == OP_SBRAPOS)
  7747. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
  7748. OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
  7749. }
  7750. /* Even if the match is empty, we need to reset the control head. */
  7751. if (needs_control_head)
  7752. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
  7753. if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
  7754. add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
  7755. if (!zero)
  7756. {
  7757. if (framesize < 0)
  7758. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
  7759. else
  7760. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  7761. }
  7762. }
  7763. JUMPTO(SLJIT_JUMP, loop);
  7764. flush_stubs(common);
  7765. compile_backtrackingpath(common, backtrack->top);
  7766. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  7767. return NULL;
  7768. set_jumps(backtrack->topbacktracks, LABEL());
  7769. if (framesize < 0)
  7770. {
  7771. if (offset != 0)
  7772. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
  7773. else
  7774. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  7775. }
  7776. else
  7777. {
  7778. if (offset != 0)
  7779. {
  7780. /* Last alternative. */
  7781. if (*cc == OP_KETRPOS)
  7782. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7783. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
  7784. }
  7785. else
  7786. {
  7787. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  7788. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
  7789. }
  7790. }
  7791. if (*cc == OP_KETRPOS)
  7792. break;
  7793. ccbegin = cc + 1 + LINK_SIZE;
  7794. }
  7795. /* We don't have to restore the control head in case of a failed match. */
  7796. backtrack->topbacktracks = NULL;
  7797. if (!zero)
  7798. {
  7799. if (framesize < 0)
  7800. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
  7801. else /* TMP2 is set to [private_data_ptr] above. */
  7802. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
  7803. }
  7804. /* None of them matched. */
  7805. set_jumps(emptymatch, LABEL());
  7806. count_match(common);
  7807. return cc + 1 + LINK_SIZE;
  7808. }
  7809. static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end)
  7810. {
  7811. int class_len;
  7812. *opcode = *cc;
  7813. *exact = 0;
  7814. if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
  7815. {
  7816. cc++;
  7817. *type = OP_CHAR;
  7818. }
  7819. else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
  7820. {
  7821. cc++;
  7822. *type = OP_CHARI;
  7823. *opcode -= OP_STARI - OP_STAR;
  7824. }
  7825. else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
  7826. {
  7827. cc++;
  7828. *type = OP_NOT;
  7829. *opcode -= OP_NOTSTAR - OP_STAR;
  7830. }
  7831. else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
  7832. {
  7833. cc++;
  7834. *type = OP_NOTI;
  7835. *opcode -= OP_NOTSTARI - OP_STAR;
  7836. }
  7837. else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
  7838. {
  7839. cc++;
  7840. *opcode -= OP_TYPESTAR - OP_STAR;
  7841. *type = OP_END;
  7842. }
  7843. else
  7844. {
  7845. SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
  7846. *type = *opcode;
  7847. cc++;
  7848. class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
  7849. *opcode = cc[class_len - 1];
  7850. if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
  7851. {
  7852. *opcode -= OP_CRSTAR - OP_STAR;
  7853. *end = cc + class_len;
  7854. if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
  7855. {
  7856. *exact = 1;
  7857. *opcode -= OP_PLUS - OP_STAR;
  7858. }
  7859. }
  7860. else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
  7861. {
  7862. *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
  7863. *end = cc + class_len;
  7864. if (*opcode == OP_POSPLUS)
  7865. {
  7866. *exact = 1;
  7867. *opcode = OP_POSSTAR;
  7868. }
  7869. }
  7870. else
  7871. {
  7872. SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
  7873. *max = GET2(cc, (class_len + IMM2_SIZE));
  7874. *exact = GET2(cc, class_len);
  7875. if (*max == 0)
  7876. {
  7877. if (*opcode == OP_CRPOSRANGE)
  7878. *opcode = OP_POSSTAR;
  7879. else
  7880. *opcode -= OP_CRRANGE - OP_STAR;
  7881. }
  7882. else
  7883. {
  7884. *max -= *exact;
  7885. if (*max == 0)
  7886. *opcode = OP_EXACT;
  7887. else if (*max == 1)
  7888. {
  7889. if (*opcode == OP_CRPOSRANGE)
  7890. *opcode = OP_POSQUERY;
  7891. else
  7892. *opcode -= OP_CRRANGE - OP_QUERY;
  7893. }
  7894. else
  7895. {
  7896. if (*opcode == OP_CRPOSRANGE)
  7897. *opcode = OP_POSUPTO;
  7898. else
  7899. *opcode -= OP_CRRANGE - OP_UPTO;
  7900. }
  7901. }
  7902. *end = cc + class_len + 2 * IMM2_SIZE;
  7903. }
  7904. return cc;
  7905. }
  7906. switch(*opcode)
  7907. {
  7908. case OP_EXACT:
  7909. *exact = GET2(cc, 0);
  7910. cc += IMM2_SIZE;
  7911. break;
  7912. case OP_PLUS:
  7913. case OP_MINPLUS:
  7914. *exact = 1;
  7915. *opcode -= OP_PLUS - OP_STAR;
  7916. break;
  7917. case OP_POSPLUS:
  7918. *exact = 1;
  7919. *opcode = OP_POSSTAR;
  7920. break;
  7921. case OP_UPTO:
  7922. case OP_MINUPTO:
  7923. case OP_POSUPTO:
  7924. *max = GET2(cc, 0);
  7925. cc += IMM2_SIZE;
  7926. break;
  7927. }
  7928. if (*type == OP_END)
  7929. {
  7930. *type = *cc;
  7931. *end = next_opcode(common, cc);
  7932. cc++;
  7933. return cc;
  7934. }
  7935. *end = cc + 1;
  7936. #ifdef SUPPORT_UTF
  7937. if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
  7938. #endif
  7939. return cc;
  7940. }
  7941. static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
  7942. {
  7943. DEFINE_COMPILER;
  7944. backtrack_common *backtrack;
  7945. pcre_uchar opcode;
  7946. pcre_uchar type;
  7947. sljit_u32 max = 0, exact;
  7948. BOOL fast_fail;
  7949. sljit_s32 fast_str_ptr;
  7950. BOOL charpos_enabled;
  7951. pcre_uchar charpos_char;
  7952. unsigned int charpos_othercasebit;
  7953. pcre_uchar *end;
  7954. jump_list *no_match = NULL;
  7955. jump_list *no_char1_match = NULL;
  7956. struct sljit_jump *jump = NULL;
  7957. struct sljit_label *label;
  7958. int private_data_ptr = PRIVATE_DATA(cc);
  7959. int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
  7960. int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
  7961. int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
  7962. int tmp_base, tmp_offset;
  7963. PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
  7964. fast_str_ptr = PRIVATE_DATA(cc + 1);
  7965. fast_fail = TRUE;
  7966. SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
  7967. if (cc == common->fast_forward_bc_ptr)
  7968. fast_fail = FALSE;
  7969. else if (common->fast_fail_start_ptr == 0)
  7970. fast_str_ptr = 0;
  7971. SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
  7972. || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
  7973. cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
  7974. if (type != OP_EXTUNI)
  7975. {
  7976. tmp_base = TMP3;
  7977. tmp_offset = 0;
  7978. }
  7979. else
  7980. {
  7981. tmp_base = SLJIT_MEM1(SLJIT_SP);
  7982. tmp_offset = POSSESSIVE0;
  7983. }
  7984. if (fast_fail && fast_str_ptr != 0)
  7985. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
  7986. /* Handle fixed part first. */
  7987. if (exact > 1)
  7988. {
  7989. SLJIT_ASSERT(fast_str_ptr == 0);
  7990. if (common->mode == JIT_COMPILE
  7991. #ifdef SUPPORT_UTF
  7992. && !common->utf
  7993. #endif
  7994. && type != OP_ANYNL && type != OP_EXTUNI)
  7995. {
  7996. OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
  7997. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
  7998. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
  7999. label = LABEL();
  8000. compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
  8001. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  8002. JUMPTO(SLJIT_NOT_ZERO, label);
  8003. }
  8004. else
  8005. {
  8006. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
  8007. label = LABEL();
  8008. compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
  8009. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  8010. JUMPTO(SLJIT_NOT_ZERO, label);
  8011. }
  8012. }
  8013. else if (exact == 1)
  8014. compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
  8015. switch(opcode)
  8016. {
  8017. case OP_STAR:
  8018. case OP_UPTO:
  8019. SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
  8020. if (type == OP_ANYNL || type == OP_EXTUNI)
  8021. {
  8022. SLJIT_ASSERT(private_data_ptr == 0);
  8023. SLJIT_ASSERT(fast_str_ptr == 0);
  8024. allocate_stack(common, 2);
  8025. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8026. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
  8027. if (opcode == OP_UPTO)
  8028. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
  8029. label = LABEL();
  8030. compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
  8031. if (opcode == OP_UPTO)
  8032. {
  8033. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
  8034. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  8035. jump = JUMP(SLJIT_ZERO);
  8036. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
  8037. }
  8038. /* We cannot use TMP3 because of this allocate_stack. */
  8039. allocate_stack(common, 1);
  8040. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8041. JUMPTO(SLJIT_JUMP, label);
  8042. if (jump != NULL)
  8043. JUMPHERE(jump);
  8044. }
  8045. else
  8046. {
  8047. charpos_enabled = FALSE;
  8048. charpos_char = 0;
  8049. charpos_othercasebit = 0;
  8050. if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
  8051. {
  8052. charpos_enabled = TRUE;
  8053. #ifdef SUPPORT_UTF
  8054. charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
  8055. #endif
  8056. if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
  8057. {
  8058. charpos_othercasebit = char_get_othercase_bit(common, end + 1);
  8059. if (charpos_othercasebit == 0)
  8060. charpos_enabled = FALSE;
  8061. }
  8062. if (charpos_enabled)
  8063. {
  8064. charpos_char = end[1];
  8065. /* Consumpe the OP_CHAR opcode. */
  8066. end += 2;
  8067. #if defined COMPILE_PCRE8
  8068. SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
  8069. #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  8070. SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
  8071. if ((charpos_othercasebit & 0x100) != 0)
  8072. charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
  8073. #endif
  8074. if (charpos_othercasebit != 0)
  8075. charpos_char |= charpos_othercasebit;
  8076. BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
  8077. BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
  8078. BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
  8079. }
  8080. }
  8081. if (charpos_enabled)
  8082. {
  8083. if (opcode == OP_UPTO)
  8084. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
  8085. /* Search the first instance of charpos_char. */
  8086. jump = JUMP(SLJIT_JUMP);
  8087. label = LABEL();
  8088. if (opcode == OP_UPTO)
  8089. {
  8090. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  8091. add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
  8092. }
  8093. compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
  8094. if (fast_str_ptr != 0)
  8095. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
  8096. JUMPHERE(jump);
  8097. detect_partial_match(common, &backtrack->topbacktracks);
  8098. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  8099. if (charpos_othercasebit != 0)
  8100. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
  8101. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
  8102. if (private_data_ptr == 0)
  8103. allocate_stack(common, 2);
  8104. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8105. OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
  8106. if (opcode == OP_UPTO)
  8107. {
  8108. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  8109. add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
  8110. }
  8111. /* Search the last instance of charpos_char. */
  8112. label = LABEL();
  8113. compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
  8114. if (fast_str_ptr != 0)
  8115. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
  8116. detect_partial_match(common, &no_match);
  8117. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
  8118. if (charpos_othercasebit != 0)
  8119. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
  8120. if (opcode == OP_STAR)
  8121. {
  8122. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
  8123. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8124. }
  8125. else
  8126. {
  8127. jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
  8128. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8129. JUMPHERE(jump);
  8130. }
  8131. if (opcode == OP_UPTO)
  8132. {
  8133. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  8134. JUMPTO(SLJIT_NOT_ZERO, label);
  8135. }
  8136. else
  8137. JUMPTO(SLJIT_JUMP, label);
  8138. set_jumps(no_match, LABEL());
  8139. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  8140. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  8141. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8142. }
  8143. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  8144. else if (common->utf)
  8145. {
  8146. if (private_data_ptr == 0)
  8147. allocate_stack(common, 2);
  8148. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8149. OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
  8150. if (opcode == OP_UPTO)
  8151. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
  8152. label = LABEL();
  8153. compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
  8154. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8155. if (opcode == OP_UPTO)
  8156. {
  8157. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  8158. JUMPTO(SLJIT_NOT_ZERO, label);
  8159. }
  8160. else
  8161. JUMPTO(SLJIT_JUMP, label);
  8162. set_jumps(no_match, LABEL());
  8163. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  8164. if (fast_str_ptr != 0)
  8165. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
  8166. }
  8167. #endif
  8168. else
  8169. {
  8170. if (private_data_ptr == 0)
  8171. allocate_stack(common, 2);
  8172. OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
  8173. if (opcode == OP_UPTO)
  8174. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
  8175. label = LABEL();
  8176. detect_partial_match(common, &no_match);
  8177. compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
  8178. if (opcode == OP_UPTO)
  8179. {
  8180. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  8181. JUMPTO(SLJIT_NOT_ZERO, label);
  8182. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  8183. }
  8184. else
  8185. JUMPTO(SLJIT_JUMP, label);
  8186. set_jumps(no_char1_match, LABEL());
  8187. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  8188. set_jumps(no_match, LABEL());
  8189. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8190. if (fast_str_ptr != 0)
  8191. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
  8192. }
  8193. }
  8194. BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
  8195. break;
  8196. case OP_MINSTAR:
  8197. if (private_data_ptr == 0)
  8198. allocate_stack(common, 1);
  8199. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8200. BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
  8201. if (fast_str_ptr != 0)
  8202. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
  8203. break;
  8204. case OP_MINUPTO:
  8205. SLJIT_ASSERT(fast_str_ptr == 0);
  8206. if (private_data_ptr == 0)
  8207. allocate_stack(common, 2);
  8208. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8209. OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
  8210. BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
  8211. break;
  8212. case OP_QUERY:
  8213. case OP_MINQUERY:
  8214. SLJIT_ASSERT(fast_str_ptr == 0);
  8215. if (private_data_ptr == 0)
  8216. allocate_stack(common, 1);
  8217. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8218. if (opcode == OP_QUERY)
  8219. compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
  8220. BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
  8221. break;
  8222. case OP_EXACT:
  8223. break;
  8224. case OP_POSSTAR:
  8225. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  8226. if (common->utf)
  8227. {
  8228. OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
  8229. label = LABEL();
  8230. compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
  8231. OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
  8232. JUMPTO(SLJIT_JUMP, label);
  8233. set_jumps(no_match, LABEL());
  8234. OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
  8235. if (fast_str_ptr != 0)
  8236. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
  8237. break;
  8238. }
  8239. #endif
  8240. label = LABEL();
  8241. detect_partial_match(common, &no_match);
  8242. compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
  8243. JUMPTO(SLJIT_JUMP, label);
  8244. set_jumps(no_char1_match, LABEL());
  8245. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  8246. set_jumps(no_match, LABEL());
  8247. if (fast_str_ptr != 0)
  8248. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
  8249. break;
  8250. case OP_POSUPTO:
  8251. SLJIT_ASSERT(fast_str_ptr == 0);
  8252. #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
  8253. if (common->utf)
  8254. {
  8255. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
  8256. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
  8257. label = LABEL();
  8258. compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
  8259. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
  8260. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  8261. JUMPTO(SLJIT_NOT_ZERO, label);
  8262. set_jumps(no_match, LABEL());
  8263. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
  8264. break;
  8265. }
  8266. #endif
  8267. OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
  8268. label = LABEL();
  8269. detect_partial_match(common, &no_match);
  8270. compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
  8271. OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
  8272. JUMPTO(SLJIT_NOT_ZERO, label);
  8273. OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  8274. set_jumps(no_char1_match, LABEL());
  8275. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  8276. set_jumps(no_match, LABEL());
  8277. break;
  8278. case OP_POSQUERY:
  8279. SLJIT_ASSERT(fast_str_ptr == 0);
  8280. OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
  8281. compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
  8282. OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
  8283. set_jumps(no_match, LABEL());
  8284. OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
  8285. break;
  8286. default:
  8287. SLJIT_UNREACHABLE();
  8288. break;
  8289. }
  8290. count_match(common);
  8291. return end;
  8292. }
  8293. static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
  8294. {
  8295. DEFINE_COMPILER;
  8296. backtrack_common *backtrack;
  8297. PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
  8298. if (*cc == OP_FAIL)
  8299. {
  8300. add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
  8301. return cc + 1;
  8302. }
  8303. if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
  8304. {
  8305. /* No need to check notempty conditions. */
  8306. if (common->accept_label == NULL)
  8307. add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
  8308. else
  8309. JUMPTO(SLJIT_JUMP, common->accept_label);
  8310. return cc + 1;
  8311. }
  8312. if (common->accept_label == NULL)
  8313. add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
  8314. else
  8315. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
  8316. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  8317. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
  8318. add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  8319. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
  8320. if (common->accept_label == NULL)
  8321. add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
  8322. else
  8323. CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
  8324. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  8325. if (common->accept_label == NULL)
  8326. add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
  8327. else
  8328. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
  8329. add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
  8330. return cc + 1;
  8331. }
  8332. static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
  8333. {
  8334. DEFINE_COMPILER;
  8335. int offset = GET2(cc, 1);
  8336. BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
  8337. /* Data will be discarded anyway... */
  8338. if (common->currententry != NULL)
  8339. return cc + 1 + IMM2_SIZE;
  8340. if (!optimized_cbracket)
  8341. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
  8342. offset <<= 1;
  8343. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  8344. if (!optimized_cbracket)
  8345. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  8346. return cc + 1 + IMM2_SIZE;
  8347. }
  8348. static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
  8349. {
  8350. DEFINE_COMPILER;
  8351. backtrack_common *backtrack;
  8352. pcre_uchar opcode = *cc;
  8353. pcre_uchar *ccend = cc + 1;
  8354. if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
  8355. ccend += 2 + cc[1];
  8356. PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
  8357. if (opcode == OP_SKIP)
  8358. {
  8359. allocate_stack(common, 1);
  8360. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8361. return ccend;
  8362. }
  8363. if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
  8364. {
  8365. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  8366. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
  8367. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
  8368. OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
  8369. }
  8370. return ccend;
  8371. }
  8372. static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP };
  8373. static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
  8374. {
  8375. DEFINE_COMPILER;
  8376. backtrack_common *backtrack;
  8377. BOOL needs_control_head;
  8378. int size;
  8379. PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
  8380. common->then_trap = BACKTRACK_AS(then_trap_backtrack);
  8381. BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
  8382. BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
  8383. BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
  8384. size = BACKTRACK_AS(then_trap_backtrack)->framesize;
  8385. size = 3 + (size < 0 ? 0 : size);
  8386. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  8387. allocate_stack(common, size);
  8388. if (size > 3)
  8389. OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
  8390. else
  8391. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
  8392. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
  8393. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
  8394. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
  8395. size = BACKTRACK_AS(then_trap_backtrack)->framesize;
  8396. if (size >= 0)
  8397. init_frame(common, cc, ccend, size - 1, 0, FALSE);
  8398. }
  8399. static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
  8400. {
  8401. DEFINE_COMPILER;
  8402. backtrack_common *backtrack;
  8403. BOOL has_then_trap = FALSE;
  8404. then_trap_backtrack *save_then_trap = NULL;
  8405. SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
  8406. if (common->has_then && common->then_offsets[cc - common->start] != 0)
  8407. {
  8408. SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
  8409. has_then_trap = TRUE;
  8410. save_then_trap = common->then_trap;
  8411. /* Tail item on backtrack. */
  8412. compile_then_trap_matchingpath(common, cc, ccend, parent);
  8413. }
  8414. while (cc < ccend)
  8415. {
  8416. switch(*cc)
  8417. {
  8418. case OP_SOD:
  8419. case OP_SOM:
  8420. case OP_NOT_WORD_BOUNDARY:
  8421. case OP_WORD_BOUNDARY:
  8422. case OP_EODN:
  8423. case OP_EOD:
  8424. case OP_DOLL:
  8425. case OP_DOLLM:
  8426. case OP_CIRC:
  8427. case OP_CIRCM:
  8428. case OP_REVERSE:
  8429. cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
  8430. break;
  8431. case OP_NOT_DIGIT:
  8432. case OP_DIGIT:
  8433. case OP_NOT_WHITESPACE:
  8434. case OP_WHITESPACE:
  8435. case OP_NOT_WORDCHAR:
  8436. case OP_WORDCHAR:
  8437. case OP_ANY:
  8438. case OP_ALLANY:
  8439. case OP_ANYBYTE:
  8440. case OP_NOTPROP:
  8441. case OP_PROP:
  8442. case OP_ANYNL:
  8443. case OP_NOT_HSPACE:
  8444. case OP_HSPACE:
  8445. case OP_NOT_VSPACE:
  8446. case OP_VSPACE:
  8447. case OP_EXTUNI:
  8448. case OP_NOT:
  8449. case OP_NOTI:
  8450. cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
  8451. break;
  8452. case OP_SET_SOM:
  8453. PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
  8454. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
  8455. allocate_stack(common, 1);
  8456. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
  8457. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
  8458. cc++;
  8459. break;
  8460. case OP_CHAR:
  8461. case OP_CHARI:
  8462. if (common->mode == JIT_COMPILE)
  8463. cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
  8464. else
  8465. cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
  8466. break;
  8467. case OP_STAR:
  8468. case OP_MINSTAR:
  8469. case OP_PLUS:
  8470. case OP_MINPLUS:
  8471. case OP_QUERY:
  8472. case OP_MINQUERY:
  8473. case OP_UPTO:
  8474. case OP_MINUPTO:
  8475. case OP_EXACT:
  8476. case OP_POSSTAR:
  8477. case OP_POSPLUS:
  8478. case OP_POSQUERY:
  8479. case OP_POSUPTO:
  8480. case OP_STARI:
  8481. case OP_MINSTARI:
  8482. case OP_PLUSI:
  8483. case OP_MINPLUSI:
  8484. case OP_QUERYI:
  8485. case OP_MINQUERYI:
  8486. case OP_UPTOI:
  8487. case OP_MINUPTOI:
  8488. case OP_EXACTI:
  8489. case OP_POSSTARI:
  8490. case OP_POSPLUSI:
  8491. case OP_POSQUERYI:
  8492. case OP_POSUPTOI:
  8493. case OP_NOTSTAR:
  8494. case OP_NOTMINSTAR:
  8495. case OP_NOTPLUS:
  8496. case OP_NOTMINPLUS:
  8497. case OP_NOTQUERY:
  8498. case OP_NOTMINQUERY:
  8499. case OP_NOTUPTO:
  8500. case OP_NOTMINUPTO:
  8501. case OP_NOTEXACT:
  8502. case OP_NOTPOSSTAR:
  8503. case OP_NOTPOSPLUS:
  8504. case OP_NOTPOSQUERY:
  8505. case OP_NOTPOSUPTO:
  8506. case OP_NOTSTARI:
  8507. case OP_NOTMINSTARI:
  8508. case OP_NOTPLUSI:
  8509. case OP_NOTMINPLUSI:
  8510. case OP_NOTQUERYI:
  8511. case OP_NOTMINQUERYI:
  8512. case OP_NOTUPTOI:
  8513. case OP_NOTMINUPTOI:
  8514. case OP_NOTEXACTI:
  8515. case OP_NOTPOSSTARI:
  8516. case OP_NOTPOSPLUSI:
  8517. case OP_NOTPOSQUERYI:
  8518. case OP_NOTPOSUPTOI:
  8519. case OP_TYPESTAR:
  8520. case OP_TYPEMINSTAR:
  8521. case OP_TYPEPLUS:
  8522. case OP_TYPEMINPLUS:
  8523. case OP_TYPEQUERY:
  8524. case OP_TYPEMINQUERY:
  8525. case OP_TYPEUPTO:
  8526. case OP_TYPEMINUPTO:
  8527. case OP_TYPEEXACT:
  8528. case OP_TYPEPOSSTAR:
  8529. case OP_TYPEPOSPLUS:
  8530. case OP_TYPEPOSQUERY:
  8531. case OP_TYPEPOSUPTO:
  8532. cc = compile_iterator_matchingpath(common, cc, parent);
  8533. break;
  8534. case OP_CLASS:
  8535. case OP_NCLASS:
  8536. if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
  8537. cc = compile_iterator_matchingpath(common, cc, parent);
  8538. else
  8539. cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
  8540. break;
  8541. #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
  8542. case OP_XCLASS:
  8543. if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
  8544. cc = compile_iterator_matchingpath(common, cc, parent);
  8545. else
  8546. cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
  8547. break;
  8548. #endif
  8549. case OP_REF:
  8550. case OP_REFI:
  8551. if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
  8552. cc = compile_ref_iterator_matchingpath(common, cc, parent);
  8553. else
  8554. {
  8555. compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
  8556. cc += 1 + IMM2_SIZE;
  8557. }
  8558. break;
  8559. case OP_DNREF:
  8560. case OP_DNREFI:
  8561. if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
  8562. cc = compile_ref_iterator_matchingpath(common, cc, parent);
  8563. else
  8564. {
  8565. compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
  8566. compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
  8567. cc += 1 + 2 * IMM2_SIZE;
  8568. }
  8569. break;
  8570. case OP_RECURSE:
  8571. cc = compile_recurse_matchingpath(common, cc, parent);
  8572. break;
  8573. case OP_CALLOUT:
  8574. cc = compile_callout_matchingpath(common, cc, parent);
  8575. break;
  8576. case OP_ASSERT:
  8577. case OP_ASSERT_NOT:
  8578. case OP_ASSERTBACK:
  8579. case OP_ASSERTBACK_NOT:
  8580. PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
  8581. cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
  8582. break;
  8583. case OP_BRAMINZERO:
  8584. PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
  8585. cc = bracketend(cc + 1);
  8586. if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
  8587. {
  8588. allocate_stack(common, 1);
  8589. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  8590. }
  8591. else
  8592. {
  8593. allocate_stack(common, 2);
  8594. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8595. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
  8596. }
  8597. BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
  8598. count_match(common);
  8599. break;
  8600. case OP_ONCE:
  8601. case OP_ONCE_NC:
  8602. case OP_BRA:
  8603. case OP_CBRA:
  8604. case OP_COND:
  8605. case OP_SBRA:
  8606. case OP_SCBRA:
  8607. case OP_SCOND:
  8608. cc = compile_bracket_matchingpath(common, cc, parent);
  8609. break;
  8610. case OP_BRAZERO:
  8611. if (cc[1] > OP_ASSERTBACK_NOT)
  8612. cc = compile_bracket_matchingpath(common, cc, parent);
  8613. else
  8614. {
  8615. PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
  8616. cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
  8617. }
  8618. break;
  8619. case OP_BRAPOS:
  8620. case OP_CBRAPOS:
  8621. case OP_SBRAPOS:
  8622. case OP_SCBRAPOS:
  8623. case OP_BRAPOSZERO:
  8624. cc = compile_bracketpos_matchingpath(common, cc, parent);
  8625. break;
  8626. case OP_MARK:
  8627. PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
  8628. SLJIT_ASSERT(common->mark_ptr != 0);
  8629. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
  8630. allocate_stack(common, common->has_skip_arg ? 5 : 1);
  8631. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  8632. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
  8633. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
  8634. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
  8635. OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
  8636. if (common->has_skip_arg)
  8637. {
  8638. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  8639. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
  8640. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
  8641. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
  8642. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
  8643. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
  8644. }
  8645. cc += 1 + 2 + cc[1];
  8646. break;
  8647. case OP_PRUNE:
  8648. case OP_PRUNE_ARG:
  8649. case OP_SKIP:
  8650. case OP_SKIP_ARG:
  8651. case OP_THEN:
  8652. case OP_THEN_ARG:
  8653. case OP_COMMIT:
  8654. cc = compile_control_verb_matchingpath(common, cc, parent);
  8655. break;
  8656. case OP_FAIL:
  8657. case OP_ACCEPT:
  8658. case OP_ASSERT_ACCEPT:
  8659. cc = compile_fail_accept_matchingpath(common, cc, parent);
  8660. break;
  8661. case OP_CLOSE:
  8662. cc = compile_close_matchingpath(common, cc);
  8663. break;
  8664. case OP_SKIPZERO:
  8665. cc = bracketend(cc + 1);
  8666. break;
  8667. default:
  8668. SLJIT_UNREACHABLE();
  8669. return;
  8670. }
  8671. if (cc == NULL)
  8672. return;
  8673. }
  8674. if (has_then_trap)
  8675. {
  8676. /* Head item on backtrack. */
  8677. PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
  8678. BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
  8679. BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
  8680. common->then_trap = save_then_trap;
  8681. }
  8682. SLJIT_ASSERT(cc == ccend);
  8683. }
  8684. #undef PUSH_BACKTRACK
  8685. #undef PUSH_BACKTRACK_NOVALUE
  8686. #undef BACKTRACK_AS
  8687. #define COMPILE_BACKTRACKINGPATH(current) \
  8688. do \
  8689. { \
  8690. compile_backtrackingpath(common, (current)); \
  8691. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
  8692. return; \
  8693. } \
  8694. while (0)
  8695. #define CURRENT_AS(type) ((type *)current)
  8696. static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  8697. {
  8698. DEFINE_COMPILER;
  8699. pcre_uchar *cc = current->cc;
  8700. pcre_uchar opcode;
  8701. pcre_uchar type;
  8702. sljit_u32 max = 0, exact;
  8703. struct sljit_label *label = NULL;
  8704. struct sljit_jump *jump = NULL;
  8705. jump_list *jumplist = NULL;
  8706. pcre_uchar *end;
  8707. int private_data_ptr = PRIVATE_DATA(cc);
  8708. int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
  8709. int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
  8710. int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
  8711. cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
  8712. switch(opcode)
  8713. {
  8714. case OP_STAR:
  8715. case OP_UPTO:
  8716. if (type == OP_ANYNL || type == OP_EXTUNI)
  8717. {
  8718. SLJIT_ASSERT(private_data_ptr == 0);
  8719. set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
  8720. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8721. free_stack(common, 1);
  8722. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  8723. }
  8724. else
  8725. {
  8726. if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
  8727. {
  8728. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  8729. OP1(SLJIT_MOV, TMP2, 0, base, offset1);
  8730. OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
  8731. jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
  8732. label = LABEL();
  8733. OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
  8734. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8735. if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
  8736. OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
  8737. CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  8738. skip_char_back(common);
  8739. CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
  8740. }
  8741. else
  8742. {
  8743. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  8744. jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
  8745. skip_char_back(common);
  8746. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8747. JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  8748. }
  8749. JUMPHERE(jump);
  8750. if (private_data_ptr == 0)
  8751. free_stack(common, 2);
  8752. }
  8753. break;
  8754. case OP_MINSTAR:
  8755. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  8756. compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
  8757. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8758. JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  8759. set_jumps(jumplist, LABEL());
  8760. if (private_data_ptr == 0)
  8761. free_stack(common, 1);
  8762. break;
  8763. case OP_MINUPTO:
  8764. OP1(SLJIT_MOV, TMP1, 0, base, offset1);
  8765. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  8766. OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  8767. add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
  8768. OP1(SLJIT_MOV, base, offset1, TMP1, 0);
  8769. compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
  8770. OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
  8771. JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  8772. set_jumps(jumplist, LABEL());
  8773. if (private_data_ptr == 0)
  8774. free_stack(common, 2);
  8775. break;
  8776. case OP_QUERY:
  8777. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  8778. OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
  8779. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  8780. jump = JUMP(SLJIT_JUMP);
  8781. set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
  8782. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  8783. OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
  8784. JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  8785. JUMPHERE(jump);
  8786. if (private_data_ptr == 0)
  8787. free_stack(common, 1);
  8788. break;
  8789. case OP_MINQUERY:
  8790. OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
  8791. OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
  8792. jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  8793. compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
  8794. JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
  8795. set_jumps(jumplist, LABEL());
  8796. JUMPHERE(jump);
  8797. if (private_data_ptr == 0)
  8798. free_stack(common, 1);
  8799. break;
  8800. case OP_EXACT:
  8801. case OP_POSSTAR:
  8802. case OP_POSQUERY:
  8803. case OP_POSUPTO:
  8804. break;
  8805. default:
  8806. SLJIT_UNREACHABLE();
  8807. break;
  8808. }
  8809. set_jumps(current->topbacktracks, LABEL());
  8810. }
  8811. static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  8812. {
  8813. DEFINE_COMPILER;
  8814. pcre_uchar *cc = current->cc;
  8815. BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
  8816. pcre_uchar type;
  8817. type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
  8818. if ((type & 0x1) == 0)
  8819. {
  8820. /* Maximize case. */
  8821. set_jumps(current->topbacktracks, LABEL());
  8822. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8823. free_stack(common, 1);
  8824. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
  8825. return;
  8826. }
  8827. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8828. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
  8829. set_jumps(current->topbacktracks, LABEL());
  8830. free_stack(common, ref ? 2 : 3);
  8831. }
  8832. static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  8833. {
  8834. DEFINE_COMPILER;
  8835. if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
  8836. compile_backtrackingpath(common, current->top);
  8837. set_jumps(current->topbacktracks, LABEL());
  8838. if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
  8839. return;
  8840. if (common->has_set_som && common->mark_ptr != 0)
  8841. {
  8842. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8843. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  8844. free_stack(common, 2);
  8845. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
  8846. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
  8847. }
  8848. else if (common->has_set_som || common->mark_ptr != 0)
  8849. {
  8850. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8851. free_stack(common, 1);
  8852. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
  8853. }
  8854. }
  8855. static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  8856. {
  8857. DEFINE_COMPILER;
  8858. pcre_uchar *cc = current->cc;
  8859. pcre_uchar bra = OP_BRA;
  8860. struct sljit_jump *brajump = NULL;
  8861. SLJIT_ASSERT(*cc != OP_BRAMINZERO);
  8862. if (*cc == OP_BRAZERO)
  8863. {
  8864. bra = *cc;
  8865. cc++;
  8866. }
  8867. if (bra == OP_BRAZERO)
  8868. {
  8869. SLJIT_ASSERT(current->topbacktracks == NULL);
  8870. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8871. }
  8872. if (CURRENT_AS(assert_backtrack)->framesize < 0)
  8873. {
  8874. set_jumps(current->topbacktracks, LABEL());
  8875. if (bra == OP_BRAZERO)
  8876. {
  8877. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8878. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
  8879. free_stack(common, 1);
  8880. }
  8881. return;
  8882. }
  8883. if (bra == OP_BRAZERO)
  8884. {
  8885. if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
  8886. {
  8887. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8888. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
  8889. free_stack(common, 1);
  8890. return;
  8891. }
  8892. free_stack(common, 1);
  8893. brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
  8894. }
  8895. if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
  8896. {
  8897. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
  8898. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  8899. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(assert_backtrack)->framesize - 1));
  8900. set_jumps(current->topbacktracks, LABEL());
  8901. }
  8902. else
  8903. set_jumps(current->topbacktracks, LABEL());
  8904. if (bra == OP_BRAZERO)
  8905. {
  8906. /* We know there is enough place on the stack. */
  8907. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
  8908. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
  8909. JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
  8910. JUMPHERE(brajump);
  8911. }
  8912. }
  8913. static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  8914. {
  8915. DEFINE_COMPILER;
  8916. int opcode, stacksize, alt_count, alt_max;
  8917. int offset = 0;
  8918. int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
  8919. int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
  8920. pcre_uchar *cc = current->cc;
  8921. pcre_uchar *ccbegin;
  8922. pcre_uchar *ccprev;
  8923. pcre_uchar bra = OP_BRA;
  8924. pcre_uchar ket;
  8925. assert_backtrack *assert;
  8926. sljit_uw *next_update_addr = NULL;
  8927. BOOL has_alternatives;
  8928. BOOL needs_control_head = FALSE;
  8929. struct sljit_jump *brazero = NULL;
  8930. struct sljit_jump *alt1 = NULL;
  8931. struct sljit_jump *alt2 = NULL;
  8932. struct sljit_jump *once = NULL;
  8933. struct sljit_jump *cond = NULL;
  8934. struct sljit_label *rmin_label = NULL;
  8935. struct sljit_label *exact_label = NULL;
  8936. if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
  8937. {
  8938. bra = *cc;
  8939. cc++;
  8940. }
  8941. opcode = *cc;
  8942. ccbegin = bracketend(cc) - 1 - LINK_SIZE;
  8943. ket = *ccbegin;
  8944. if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
  8945. {
  8946. repeat_ptr = PRIVATE_DATA(ccbegin);
  8947. repeat_type = PRIVATE_DATA(ccbegin + 2);
  8948. repeat_count = PRIVATE_DATA(ccbegin + 3);
  8949. SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
  8950. if (repeat_type == OP_UPTO)
  8951. ket = OP_KETRMAX;
  8952. if (repeat_type == OP_MINUPTO)
  8953. ket = OP_KETRMIN;
  8954. }
  8955. ccbegin = cc;
  8956. cc += GET(cc, 1);
  8957. has_alternatives = *cc == OP_ALT;
  8958. if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
  8959. has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
  8960. if (opcode == OP_CBRA || opcode == OP_SCBRA)
  8961. offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
  8962. if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
  8963. opcode = OP_SCOND;
  8964. if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
  8965. opcode = OP_ONCE;
  8966. alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
  8967. /* Decoding the needs_control_head in framesize. */
  8968. if (opcode == OP_ONCE)
  8969. {
  8970. needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
  8971. CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
  8972. }
  8973. if (ket != OP_KET && repeat_type != 0)
  8974. {
  8975. /* TMP1 is used in OP_KETRMIN below. */
  8976. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8977. free_stack(common, 1);
  8978. if (repeat_type == OP_UPTO)
  8979. OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
  8980. else
  8981. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
  8982. }
  8983. if (ket == OP_KETRMAX)
  8984. {
  8985. if (bra == OP_BRAZERO)
  8986. {
  8987. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8988. free_stack(common, 1);
  8989. brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
  8990. }
  8991. }
  8992. else if (ket == OP_KETRMIN)
  8993. {
  8994. if (bra != OP_BRAMINZERO)
  8995. {
  8996. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  8997. if (repeat_type != 0)
  8998. {
  8999. /* TMP1 was set a few lines above. */
  9000. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
  9001. /* Drop STR_PTR for non-greedy plus quantifier. */
  9002. if (opcode != OP_ONCE)
  9003. free_stack(common, 1);
  9004. }
  9005. else if (opcode >= OP_SBRA || opcode == OP_ONCE)
  9006. {
  9007. /* Checking zero-length iteration. */
  9008. if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
  9009. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
  9010. else
  9011. {
  9012. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9013. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
  9014. }
  9015. /* Drop STR_PTR for non-greedy plus quantifier. */
  9016. if (opcode != OP_ONCE)
  9017. free_stack(common, 1);
  9018. }
  9019. else
  9020. JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
  9021. }
  9022. rmin_label = LABEL();
  9023. if (repeat_type != 0)
  9024. OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
  9025. }
  9026. else if (bra == OP_BRAZERO)
  9027. {
  9028. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9029. free_stack(common, 1);
  9030. brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
  9031. }
  9032. else if (repeat_type == OP_EXACT)
  9033. {
  9034. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
  9035. exact_label = LABEL();
  9036. }
  9037. if (offset != 0)
  9038. {
  9039. if (common->capture_last_ptr != 0)
  9040. {
  9041. SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
  9042. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9043. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  9044. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
  9045. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
  9046. free_stack(common, 3);
  9047. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
  9048. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
  9049. }
  9050. else if (common->optimized_cbracket[offset >> 1] == 0)
  9051. {
  9052. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9053. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  9054. free_stack(common, 2);
  9055. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  9056. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
  9057. }
  9058. }
  9059. if (SLJIT_UNLIKELY(opcode == OP_ONCE))
  9060. {
  9061. if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
  9062. {
  9063. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9064. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  9065. }
  9066. once = JUMP(SLJIT_JUMP);
  9067. }
  9068. else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
  9069. {
  9070. if (has_alternatives)
  9071. {
  9072. /* Always exactly one alternative. */
  9073. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9074. free_stack(common, 1);
  9075. alt_max = 2;
  9076. alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
  9077. }
  9078. }
  9079. else if (has_alternatives)
  9080. {
  9081. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9082. free_stack(common, 1);
  9083. if (alt_max > 4)
  9084. {
  9085. /* Table jump if alt_max is greater than 4. */
  9086. next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
  9087. if (SLJIT_UNLIKELY(next_update_addr == NULL))
  9088. return;
  9089. sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
  9090. add_label_addr(common, next_update_addr++);
  9091. }
  9092. else
  9093. {
  9094. if (alt_max == 4)
  9095. alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
  9096. alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
  9097. }
  9098. }
  9099. COMPILE_BACKTRACKINGPATH(current->top);
  9100. if (current->topbacktracks)
  9101. set_jumps(current->topbacktracks, LABEL());
  9102. if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
  9103. {
  9104. /* Conditional block always has at most one alternative. */
  9105. if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
  9106. {
  9107. SLJIT_ASSERT(has_alternatives);
  9108. assert = CURRENT_AS(bracket_backtrack)->u.assert;
  9109. if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
  9110. {
  9111. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
  9112. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  9113. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
  9114. }
  9115. cond = JUMP(SLJIT_JUMP);
  9116. set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
  9117. }
  9118. else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
  9119. {
  9120. SLJIT_ASSERT(has_alternatives);
  9121. cond = JUMP(SLJIT_JUMP);
  9122. set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
  9123. }
  9124. else
  9125. SLJIT_ASSERT(!has_alternatives);
  9126. }
  9127. if (has_alternatives)
  9128. {
  9129. alt_count = sizeof(sljit_uw);
  9130. do
  9131. {
  9132. current->top = NULL;
  9133. current->topbacktracks = NULL;
  9134. current->nextbacktracks = NULL;
  9135. /* Conditional blocks always have an additional alternative, even if it is empty. */
  9136. if (*cc == OP_ALT)
  9137. {
  9138. ccprev = cc + 1 + LINK_SIZE;
  9139. cc += GET(cc, 1);
  9140. if (opcode != OP_COND && opcode != OP_SCOND)
  9141. {
  9142. if (opcode != OP_ONCE)
  9143. {
  9144. if (private_data_ptr != 0)
  9145. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
  9146. else
  9147. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9148. }
  9149. else
  9150. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
  9151. }
  9152. compile_matchingpath(common, ccprev, cc, current);
  9153. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  9154. return;
  9155. }
  9156. /* Instructions after the current alternative is successfully matched. */
  9157. /* There is a similar code in compile_bracket_matchingpath. */
  9158. if (opcode == OP_ONCE)
  9159. match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
  9160. stacksize = 0;
  9161. if (repeat_type == OP_MINUPTO)
  9162. {
  9163. /* We need to preserve the counter. TMP2 will be used below. */
  9164. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
  9165. stacksize++;
  9166. }
  9167. if (ket != OP_KET || bra != OP_BRA)
  9168. stacksize++;
  9169. if (offset != 0)
  9170. {
  9171. if (common->capture_last_ptr != 0)
  9172. stacksize++;
  9173. if (common->optimized_cbracket[offset >> 1] == 0)
  9174. stacksize += 2;
  9175. }
  9176. if (opcode != OP_ONCE)
  9177. stacksize++;
  9178. if (stacksize > 0)
  9179. allocate_stack(common, stacksize);
  9180. stacksize = 0;
  9181. if (repeat_type == OP_MINUPTO)
  9182. {
  9183. /* TMP2 was set above. */
  9184. OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
  9185. stacksize++;
  9186. }
  9187. if (ket != OP_KET || bra != OP_BRA)
  9188. {
  9189. if (ket != OP_KET)
  9190. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
  9191. else
  9192. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
  9193. stacksize++;
  9194. }
  9195. if (offset != 0)
  9196. stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
  9197. if (opcode != OP_ONCE)
  9198. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
  9199. if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
  9200. {
  9201. /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
  9202. SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
  9203. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
  9204. }
  9205. JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
  9206. if (opcode != OP_ONCE)
  9207. {
  9208. if (alt_max > 4)
  9209. add_label_addr(common, next_update_addr++);
  9210. else
  9211. {
  9212. if (alt_count != 2 * sizeof(sljit_uw))
  9213. {
  9214. JUMPHERE(alt1);
  9215. if (alt_max == 3 && alt_count == sizeof(sljit_uw))
  9216. alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
  9217. }
  9218. else
  9219. {
  9220. JUMPHERE(alt2);
  9221. if (alt_max == 4)
  9222. alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
  9223. }
  9224. }
  9225. alt_count += sizeof(sljit_uw);
  9226. }
  9227. COMPILE_BACKTRACKINGPATH(current->top);
  9228. if (current->topbacktracks)
  9229. set_jumps(current->topbacktracks, LABEL());
  9230. SLJIT_ASSERT(!current->nextbacktracks);
  9231. }
  9232. while (*cc == OP_ALT);
  9233. if (cond != NULL)
  9234. {
  9235. SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
  9236. assert = CURRENT_AS(bracket_backtrack)->u.assert;
  9237. if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
  9238. {
  9239. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
  9240. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  9241. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1));
  9242. }
  9243. JUMPHERE(cond);
  9244. }
  9245. /* Free the STR_PTR. */
  9246. if (private_data_ptr == 0)
  9247. free_stack(common, 1);
  9248. }
  9249. if (offset != 0)
  9250. {
  9251. /* Using both tmp register is better for instruction scheduling. */
  9252. if (common->optimized_cbracket[offset >> 1] != 0)
  9253. {
  9254. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9255. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  9256. free_stack(common, 2);
  9257. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  9258. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
  9259. }
  9260. else
  9261. {
  9262. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9263. free_stack(common, 1);
  9264. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
  9265. }
  9266. }
  9267. else if (opcode == OP_SBRA || opcode == OP_SCOND)
  9268. {
  9269. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
  9270. free_stack(common, 1);
  9271. }
  9272. else if (opcode == OP_ONCE)
  9273. {
  9274. cc = ccbegin + GET(ccbegin, 1);
  9275. stacksize = needs_control_head ? 1 : 0;
  9276. if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
  9277. {
  9278. /* Reset head and drop saved frame. */
  9279. stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
  9280. }
  9281. else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
  9282. {
  9283. /* The STR_PTR must be released. */
  9284. stacksize++;
  9285. }
  9286. if (stacksize > 0)
  9287. free_stack(common, stacksize);
  9288. JUMPHERE(once);
  9289. /* Restore previous private_data_ptr */
  9290. if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
  9291. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
  9292. else if (ket == OP_KETRMIN)
  9293. {
  9294. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  9295. /* See the comment below. */
  9296. free_stack(common, 2);
  9297. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
  9298. }
  9299. }
  9300. if (repeat_type == OP_EXACT)
  9301. {
  9302. OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
  9303. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
  9304. CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
  9305. }
  9306. else if (ket == OP_KETRMAX)
  9307. {
  9308. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9309. if (bra != OP_BRAZERO)
  9310. free_stack(common, 1);
  9311. CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
  9312. if (bra == OP_BRAZERO)
  9313. {
  9314. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  9315. JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
  9316. JUMPHERE(brazero);
  9317. free_stack(common, 1);
  9318. }
  9319. }
  9320. else if (ket == OP_KETRMIN)
  9321. {
  9322. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9323. /* OP_ONCE removes everything in case of a backtrack, so we don't
  9324. need to explicitly release the STR_PTR. The extra release would
  9325. affect badly the free_stack(2) above. */
  9326. if (opcode != OP_ONCE)
  9327. free_stack(common, 1);
  9328. CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
  9329. if (opcode == OP_ONCE)
  9330. free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
  9331. else if (bra == OP_BRAMINZERO)
  9332. free_stack(common, 1);
  9333. }
  9334. else if (bra == OP_BRAZERO)
  9335. {
  9336. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9337. JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
  9338. JUMPHERE(brazero);
  9339. }
  9340. }
  9341. static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  9342. {
  9343. DEFINE_COMPILER;
  9344. int offset;
  9345. struct sljit_jump *jump;
  9346. if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
  9347. {
  9348. if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
  9349. {
  9350. offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
  9351. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9352. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
  9353. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
  9354. if (common->capture_last_ptr != 0)
  9355. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
  9356. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
  9357. if (common->capture_last_ptr != 0)
  9358. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
  9359. }
  9360. set_jumps(current->topbacktracks, LABEL());
  9361. free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
  9362. return;
  9363. }
  9364. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
  9365. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  9366. if (current->topbacktracks)
  9367. {
  9368. jump = JUMP(SLJIT_JUMP);
  9369. set_jumps(current->topbacktracks, LABEL());
  9370. /* Drop the stack frame. */
  9371. free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
  9372. JUMPHERE(jump);
  9373. }
  9374. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
  9375. }
  9376. static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  9377. {
  9378. assert_backtrack backtrack;
  9379. current->top = NULL;
  9380. current->topbacktracks = NULL;
  9381. current->nextbacktracks = NULL;
  9382. if (current->cc[1] > OP_ASSERTBACK_NOT)
  9383. {
  9384. /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
  9385. compile_bracket_matchingpath(common, current->cc, current);
  9386. compile_bracket_backtrackingpath(common, current->top);
  9387. }
  9388. else
  9389. {
  9390. memset(&backtrack, 0, sizeof(backtrack));
  9391. backtrack.common.cc = current->cc;
  9392. backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
  9393. /* Manual call of compile_assert_matchingpath. */
  9394. compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
  9395. }
  9396. SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
  9397. }
  9398. static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  9399. {
  9400. DEFINE_COMPILER;
  9401. pcre_uchar opcode = *current->cc;
  9402. struct sljit_label *loop;
  9403. struct sljit_jump *jump;
  9404. if (opcode == OP_THEN || opcode == OP_THEN_ARG)
  9405. {
  9406. if (common->then_trap != NULL)
  9407. {
  9408. SLJIT_ASSERT(common->control_head_ptr != 0);
  9409. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  9410. OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
  9411. OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
  9412. jump = JUMP(SLJIT_JUMP);
  9413. loop = LABEL();
  9414. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9415. JUMPHERE(jump);
  9416. CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
  9417. CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
  9418. add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
  9419. return;
  9420. }
  9421. else if (common->positive_assert)
  9422. {
  9423. add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
  9424. return;
  9425. }
  9426. }
  9427. if (common->local_exit)
  9428. {
  9429. if (common->quit_label == NULL)
  9430. add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
  9431. else
  9432. JUMPTO(SLJIT_JUMP, common->quit_label);
  9433. return;
  9434. }
  9435. if (opcode == OP_SKIP_ARG)
  9436. {
  9437. SLJIT_ASSERT(common->control_head_ptr != 0);
  9438. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
  9439. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
  9440. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
  9441. sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
  9442. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  9443. OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
  9444. add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
  9445. return;
  9446. }
  9447. if (opcode == OP_SKIP)
  9448. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9449. else
  9450. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
  9451. add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
  9452. }
  9453. static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  9454. {
  9455. DEFINE_COMPILER;
  9456. struct sljit_jump *jump;
  9457. int size;
  9458. if (CURRENT_AS(then_trap_backtrack)->then_trap)
  9459. {
  9460. common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
  9461. return;
  9462. }
  9463. size = CURRENT_AS(then_trap_backtrack)->framesize;
  9464. size = 3 + (size < 0 ? 0 : size);
  9465. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
  9466. free_stack(common, size);
  9467. jump = JUMP(SLJIT_JUMP);
  9468. set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
  9469. /* STACK_TOP is set by THEN. */
  9470. if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
  9471. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  9472. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9473. free_stack(common, 3);
  9474. JUMPHERE(jump);
  9475. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
  9476. }
  9477. static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
  9478. {
  9479. DEFINE_COMPILER;
  9480. then_trap_backtrack *save_then_trap = common->then_trap;
  9481. while (current)
  9482. {
  9483. if (current->nextbacktracks != NULL)
  9484. set_jumps(current->nextbacktracks, LABEL());
  9485. switch(*current->cc)
  9486. {
  9487. case OP_SET_SOM:
  9488. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9489. free_stack(common, 1);
  9490. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
  9491. break;
  9492. case OP_STAR:
  9493. case OP_MINSTAR:
  9494. case OP_PLUS:
  9495. case OP_MINPLUS:
  9496. case OP_QUERY:
  9497. case OP_MINQUERY:
  9498. case OP_UPTO:
  9499. case OP_MINUPTO:
  9500. case OP_EXACT:
  9501. case OP_POSSTAR:
  9502. case OP_POSPLUS:
  9503. case OP_POSQUERY:
  9504. case OP_POSUPTO:
  9505. case OP_STARI:
  9506. case OP_MINSTARI:
  9507. case OP_PLUSI:
  9508. case OP_MINPLUSI:
  9509. case OP_QUERYI:
  9510. case OP_MINQUERYI:
  9511. case OP_UPTOI:
  9512. case OP_MINUPTOI:
  9513. case OP_EXACTI:
  9514. case OP_POSSTARI:
  9515. case OP_POSPLUSI:
  9516. case OP_POSQUERYI:
  9517. case OP_POSUPTOI:
  9518. case OP_NOTSTAR:
  9519. case OP_NOTMINSTAR:
  9520. case OP_NOTPLUS:
  9521. case OP_NOTMINPLUS:
  9522. case OP_NOTQUERY:
  9523. case OP_NOTMINQUERY:
  9524. case OP_NOTUPTO:
  9525. case OP_NOTMINUPTO:
  9526. case OP_NOTEXACT:
  9527. case OP_NOTPOSSTAR:
  9528. case OP_NOTPOSPLUS:
  9529. case OP_NOTPOSQUERY:
  9530. case OP_NOTPOSUPTO:
  9531. case OP_NOTSTARI:
  9532. case OP_NOTMINSTARI:
  9533. case OP_NOTPLUSI:
  9534. case OP_NOTMINPLUSI:
  9535. case OP_NOTQUERYI:
  9536. case OP_NOTMINQUERYI:
  9537. case OP_NOTUPTOI:
  9538. case OP_NOTMINUPTOI:
  9539. case OP_NOTEXACTI:
  9540. case OP_NOTPOSSTARI:
  9541. case OP_NOTPOSPLUSI:
  9542. case OP_NOTPOSQUERYI:
  9543. case OP_NOTPOSUPTOI:
  9544. case OP_TYPESTAR:
  9545. case OP_TYPEMINSTAR:
  9546. case OP_TYPEPLUS:
  9547. case OP_TYPEMINPLUS:
  9548. case OP_TYPEQUERY:
  9549. case OP_TYPEMINQUERY:
  9550. case OP_TYPEUPTO:
  9551. case OP_TYPEMINUPTO:
  9552. case OP_TYPEEXACT:
  9553. case OP_TYPEPOSSTAR:
  9554. case OP_TYPEPOSPLUS:
  9555. case OP_TYPEPOSQUERY:
  9556. case OP_TYPEPOSUPTO:
  9557. case OP_CLASS:
  9558. case OP_NCLASS:
  9559. #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
  9560. case OP_XCLASS:
  9561. #endif
  9562. compile_iterator_backtrackingpath(common, current);
  9563. break;
  9564. case OP_REF:
  9565. case OP_REFI:
  9566. case OP_DNREF:
  9567. case OP_DNREFI:
  9568. compile_ref_iterator_backtrackingpath(common, current);
  9569. break;
  9570. case OP_RECURSE:
  9571. compile_recurse_backtrackingpath(common, current);
  9572. break;
  9573. case OP_ASSERT:
  9574. case OP_ASSERT_NOT:
  9575. case OP_ASSERTBACK:
  9576. case OP_ASSERTBACK_NOT:
  9577. compile_assert_backtrackingpath(common, current);
  9578. break;
  9579. case OP_ONCE:
  9580. case OP_ONCE_NC:
  9581. case OP_BRA:
  9582. case OP_CBRA:
  9583. case OP_COND:
  9584. case OP_SBRA:
  9585. case OP_SCBRA:
  9586. case OP_SCOND:
  9587. compile_bracket_backtrackingpath(common, current);
  9588. break;
  9589. case OP_BRAZERO:
  9590. if (current->cc[1] > OP_ASSERTBACK_NOT)
  9591. compile_bracket_backtrackingpath(common, current);
  9592. else
  9593. compile_assert_backtrackingpath(common, current);
  9594. break;
  9595. case OP_BRAPOS:
  9596. case OP_CBRAPOS:
  9597. case OP_SBRAPOS:
  9598. case OP_SCBRAPOS:
  9599. case OP_BRAPOSZERO:
  9600. compile_bracketpos_backtrackingpath(common, current);
  9601. break;
  9602. case OP_BRAMINZERO:
  9603. compile_braminzero_backtrackingpath(common, current);
  9604. break;
  9605. case OP_MARK:
  9606. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
  9607. if (common->has_skip_arg)
  9608. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9609. free_stack(common, common->has_skip_arg ? 5 : 1);
  9610. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
  9611. if (common->has_skip_arg)
  9612. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
  9613. break;
  9614. case OP_THEN:
  9615. case OP_THEN_ARG:
  9616. case OP_PRUNE:
  9617. case OP_PRUNE_ARG:
  9618. case OP_SKIP:
  9619. case OP_SKIP_ARG:
  9620. compile_control_verb_backtrackingpath(common, current);
  9621. break;
  9622. case OP_COMMIT:
  9623. if (!common->local_exit)
  9624. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
  9625. if (common->quit_label == NULL)
  9626. add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
  9627. else
  9628. JUMPTO(SLJIT_JUMP, common->quit_label);
  9629. break;
  9630. case OP_CALLOUT:
  9631. case OP_FAIL:
  9632. case OP_ACCEPT:
  9633. case OP_ASSERT_ACCEPT:
  9634. set_jumps(current->topbacktracks, LABEL());
  9635. break;
  9636. case OP_THEN_TRAP:
  9637. /* A virtual opcode for then traps. */
  9638. compile_then_trap_backtrackingpath(common, current);
  9639. break;
  9640. default:
  9641. SLJIT_UNREACHABLE();
  9642. break;
  9643. }
  9644. current = current->prev;
  9645. }
  9646. common->then_trap = save_then_trap;
  9647. }
  9648. static SLJIT_INLINE void compile_recurse(compiler_common *common)
  9649. {
  9650. DEFINE_COMPILER;
  9651. pcre_uchar *cc = common->start + common->currententry->start;
  9652. pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
  9653. pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
  9654. BOOL needs_control_head;
  9655. int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
  9656. int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
  9657. int alternativesize;
  9658. BOOL needs_frame;
  9659. backtrack_common altbacktrack;
  9660. struct sljit_jump *jump;
  9661. /* Recurse captures then. */
  9662. common->then_trap = NULL;
  9663. SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
  9664. needs_frame = framesize >= 0;
  9665. if (!needs_frame)
  9666. framesize = 0;
  9667. alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
  9668. SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
  9669. common->currententry->entry = LABEL();
  9670. set_jumps(common->currententry->calls, common->currententry->entry);
  9671. sljit_emit_fast_enter(compiler, TMP2, 0);
  9672. count_match(common);
  9673. allocate_stack(common, private_data_size + framesize + alternativesize);
  9674. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
  9675. copy_private_data(common, ccbegin, ccend, TRUE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
  9676. if (needs_control_head)
  9677. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
  9678. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
  9679. if (needs_frame)
  9680. init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
  9681. if (alternativesize > 0)
  9682. OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
  9683. memset(&altbacktrack, 0, sizeof(backtrack_common));
  9684. common->quit_label = NULL;
  9685. common->accept_label = NULL;
  9686. common->quit = NULL;
  9687. common->accept = NULL;
  9688. altbacktrack.cc = ccbegin;
  9689. cc += GET(cc, 1);
  9690. while (1)
  9691. {
  9692. altbacktrack.top = NULL;
  9693. altbacktrack.topbacktracks = NULL;
  9694. if (altbacktrack.cc != ccbegin)
  9695. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
  9696. compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
  9697. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  9698. return;
  9699. add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
  9700. compile_backtrackingpath(common, altbacktrack.top);
  9701. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  9702. return;
  9703. set_jumps(altbacktrack.topbacktracks, LABEL());
  9704. if (*cc != OP_ALT)
  9705. break;
  9706. altbacktrack.cc = cc + 1 + LINK_SIZE;
  9707. cc += GET(cc, 1);
  9708. }
  9709. /* None of them matched. */
  9710. OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
  9711. jump = JUMP(SLJIT_JUMP);
  9712. if (common->quit != NULL)
  9713. {
  9714. set_jumps(common->quit, LABEL());
  9715. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
  9716. if (needs_frame)
  9717. {
  9718. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
  9719. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  9720. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
  9721. }
  9722. OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
  9723. common->quit = NULL;
  9724. add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
  9725. }
  9726. set_jumps(common->accept, LABEL());
  9727. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
  9728. if (needs_frame)
  9729. {
  9730. OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
  9731. add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
  9732. OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
  9733. }
  9734. OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
  9735. JUMPHERE(jump);
  9736. if (common->quit != NULL)
  9737. set_jumps(common->quit, LABEL());
  9738. copy_private_data(common, ccbegin, ccend, FALSE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head);
  9739. free_stack(common, private_data_size + framesize + alternativesize);
  9740. if (needs_control_head)
  9741. {
  9742. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-3));
  9743. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
  9744. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
  9745. OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
  9746. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
  9747. }
  9748. else
  9749. {
  9750. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
  9751. OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
  9752. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
  9753. }
  9754. sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), STACK(-1));
  9755. }
  9756. #undef COMPILE_BACKTRACKINGPATH
  9757. #undef CURRENT_AS
  9758. void
  9759. PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
  9760. {
  9761. struct sljit_compiler *compiler;
  9762. backtrack_common rootbacktrack;
  9763. compiler_common common_data;
  9764. compiler_common *common = &common_data;
  9765. const sljit_u8 *tables = re->tables;
  9766. pcre_study_data *study;
  9767. int private_data_size;
  9768. pcre_uchar *ccend;
  9769. executable_functions *functions;
  9770. void *executable_func;
  9771. sljit_uw executable_size;
  9772. sljit_uw total_length;
  9773. label_addr_list *label_addr;
  9774. struct sljit_label *mainloop_label = NULL;
  9775. struct sljit_label *continue_match_label;
  9776. struct sljit_label *empty_match_found_label = NULL;
  9777. struct sljit_label *empty_match_backtrack_label = NULL;
  9778. struct sljit_label *reset_match_label;
  9779. struct sljit_label *quit_label;
  9780. struct sljit_jump *jump;
  9781. struct sljit_jump *minlength_check_failed = NULL;
  9782. struct sljit_jump *reqbyte_notfound = NULL;
  9783. struct sljit_jump *empty_match = NULL;
  9784. SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
  9785. study = extra->study_data;
  9786. if (!tables)
  9787. tables = PRIV(default_tables);
  9788. memset(&rootbacktrack, 0, sizeof(backtrack_common));
  9789. memset(common, 0, sizeof(compiler_common));
  9790. rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
  9791. common->start = rootbacktrack.cc;
  9792. common->read_only_data_head = NULL;
  9793. common->fcc = tables + fcc_offset;
  9794. common->lcc = (sljit_sw)(tables + lcc_offset);
  9795. common->mode = mode;
  9796. common->might_be_empty = study->minlength == 0;
  9797. common->nltype = NLTYPE_FIXED;
  9798. switch(re->options & PCRE_NEWLINE_BITS)
  9799. {
  9800. case 0:
  9801. /* Compile-time default */
  9802. switch(NEWLINE)
  9803. {
  9804. case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
  9805. case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
  9806. default: common->newline = NEWLINE; break;
  9807. }
  9808. break;
  9809. case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
  9810. case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
  9811. case PCRE_NEWLINE_CR+
  9812. PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
  9813. case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
  9814. case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
  9815. default: return;
  9816. }
  9817. common->nlmax = READ_CHAR_MAX;
  9818. common->nlmin = 0;
  9819. if ((re->options & PCRE_BSR_ANYCRLF) != 0)
  9820. common->bsr_nltype = NLTYPE_ANYCRLF;
  9821. else if ((re->options & PCRE_BSR_UNICODE) != 0)
  9822. common->bsr_nltype = NLTYPE_ANY;
  9823. else
  9824. {
  9825. #ifdef BSR_ANYCRLF
  9826. common->bsr_nltype = NLTYPE_ANYCRLF;
  9827. #else
  9828. common->bsr_nltype = NLTYPE_ANY;
  9829. #endif
  9830. }
  9831. common->bsr_nlmax = READ_CHAR_MAX;
  9832. common->bsr_nlmin = 0;
  9833. common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
  9834. common->ctypes = (sljit_sw)(tables + ctypes_offset);
  9835. common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
  9836. common->name_count = re->name_count;
  9837. common->name_entry_size = re->name_entry_size;
  9838. common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
  9839. #ifdef SUPPORT_UTF
  9840. /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
  9841. common->utf = (re->options & PCRE_UTF8) != 0;
  9842. #ifdef SUPPORT_UCP
  9843. common->use_ucp = (re->options & PCRE_UCP) != 0;
  9844. #endif
  9845. if (common->utf)
  9846. {
  9847. if (common->nltype == NLTYPE_ANY)
  9848. common->nlmax = 0x2029;
  9849. else if (common->nltype == NLTYPE_ANYCRLF)
  9850. common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
  9851. else
  9852. {
  9853. /* We only care about the first newline character. */
  9854. common->nlmax = common->newline & 0xff;
  9855. }
  9856. if (common->nltype == NLTYPE_FIXED)
  9857. common->nlmin = common->newline & 0xff;
  9858. else
  9859. common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
  9860. if (common->bsr_nltype == NLTYPE_ANY)
  9861. common->bsr_nlmax = 0x2029;
  9862. else
  9863. common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
  9864. common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
  9865. }
  9866. #endif /* SUPPORT_UTF */
  9867. ccend = bracketend(common->start);
  9868. /* Calculate the local space size on the stack. */
  9869. common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
  9870. common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data);
  9871. if (!common->optimized_cbracket)
  9872. return;
  9873. #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
  9874. memset(common->optimized_cbracket, 0, re->top_bracket + 1);
  9875. #else
  9876. memset(common->optimized_cbracket, 1, re->top_bracket + 1);
  9877. #endif
  9878. SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
  9879. #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
  9880. common->capture_last_ptr = common->ovector_start;
  9881. common->ovector_start += sizeof(sljit_sw);
  9882. #endif
  9883. if (!check_opcode_types(common, common->start, ccend))
  9884. {
  9885. SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
  9886. return;
  9887. }
  9888. /* Checking flags and updating ovector_start. */
  9889. if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
  9890. {
  9891. common->req_char_ptr = common->ovector_start;
  9892. common->ovector_start += sizeof(sljit_sw);
  9893. }
  9894. if (mode != JIT_COMPILE)
  9895. {
  9896. common->start_used_ptr = common->ovector_start;
  9897. common->ovector_start += sizeof(sljit_sw);
  9898. if (mode == JIT_PARTIAL_SOFT_COMPILE)
  9899. {
  9900. common->hit_start = common->ovector_start;
  9901. common->ovector_start += 2 * sizeof(sljit_sw);
  9902. }
  9903. }
  9904. if ((re->options & PCRE_FIRSTLINE) != 0)
  9905. {
  9906. common->match_end_ptr = common->ovector_start;
  9907. common->ovector_start += sizeof(sljit_sw);
  9908. }
  9909. #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
  9910. common->control_head_ptr = 1;
  9911. #endif
  9912. if (common->control_head_ptr != 0)
  9913. {
  9914. common->control_head_ptr = common->ovector_start;
  9915. common->ovector_start += sizeof(sljit_sw);
  9916. }
  9917. if (common->has_set_som)
  9918. {
  9919. /* Saving the real start pointer is necessary. */
  9920. common->start_ptr = common->ovector_start;
  9921. common->ovector_start += sizeof(sljit_sw);
  9922. }
  9923. /* Aligning ovector to even number of sljit words. */
  9924. if ((common->ovector_start & sizeof(sljit_sw)) != 0)
  9925. common->ovector_start += sizeof(sljit_sw);
  9926. if (common->start_ptr == 0)
  9927. common->start_ptr = OVECTOR(0);
  9928. /* Capturing brackets cannot be optimized if callouts are allowed. */
  9929. if (common->capture_last_ptr != 0)
  9930. memset(common->optimized_cbracket, 0, re->top_bracket + 1);
  9931. SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
  9932. common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
  9933. total_length = ccend - common->start;
  9934. common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data);
  9935. if (!common->private_data_ptrs)
  9936. {
  9937. SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
  9938. return;
  9939. }
  9940. memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
  9941. private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
  9942. set_private_data_ptrs(common, &private_data_size, ccend);
  9943. if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
  9944. {
  9945. if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
  9946. detect_fast_fail(common, common->start, &private_data_size, 4);
  9947. }
  9948. SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
  9949. if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
  9950. {
  9951. SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
  9952. SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
  9953. return;
  9954. }
  9955. if (common->has_then)
  9956. {
  9957. common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
  9958. memset(common->then_offsets, 0, total_length);
  9959. set_then_offsets(common, common->start, NULL);
  9960. }
  9961. compiler = sljit_create_compiler(NULL);
  9962. if (!compiler)
  9963. {
  9964. SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
  9965. SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
  9966. return;
  9967. }
  9968. common->compiler = compiler;
  9969. /* Main pcre_jit_exec entry. */
  9970. sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
  9971. /* Register init. */
  9972. reset_ovector(common, (re->top_bracket + 1) * 2);
  9973. if (common->req_char_ptr != 0)
  9974. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
  9975. OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
  9976. OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
  9977. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  9978. OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
  9979. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
  9980. OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
  9981. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
  9982. OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
  9983. OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
  9984. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
  9985. if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
  9986. reset_fast_fail(common);
  9987. if (mode == JIT_PARTIAL_SOFT_COMPILE)
  9988. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
  9989. if (common->mark_ptr != 0)
  9990. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
  9991. if (common->control_head_ptr != 0)
  9992. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
  9993. /* Main part of the matching */
  9994. if ((re->options & PCRE_ANCHORED) == 0)
  9995. {
  9996. mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0);
  9997. continue_match_label = LABEL();
  9998. /* Forward search if possible. */
  9999. if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
  10000. {
  10001. if (mode == JIT_COMPILE && fast_forward_first_n_chars(common))
  10002. ;
  10003. else if ((re->flags & PCRE_FIRSTSET) != 0)
  10004. fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0);
  10005. else if ((re->flags & PCRE_STARTLINE) != 0)
  10006. fast_forward_newline(common);
  10007. else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
  10008. fast_forward_start_bits(common, study->start_bits);
  10009. }
  10010. }
  10011. else
  10012. continue_match_label = LABEL();
  10013. if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0)
  10014. {
  10015. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
  10016. OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
  10017. minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
  10018. }
  10019. if (common->req_char_ptr != 0)
  10020. reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
  10021. /* Store the current STR_PTR in OVECTOR(0). */
  10022. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
  10023. /* Copy the limit of allowed recursions. */
  10024. OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
  10025. if (common->capture_last_ptr != 0)
  10026. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);
  10027. if (common->fast_forward_bc_ptr != NULL)
  10028. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
  10029. if (common->start_ptr != OVECTOR(0))
  10030. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
  10031. /* Copy the beginning of the string. */
  10032. if (mode == JIT_PARTIAL_SOFT_COMPILE)
  10033. {
  10034. jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
  10035. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  10036. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
  10037. JUMPHERE(jump);
  10038. }
  10039. else if (mode == JIT_PARTIAL_HARD_COMPILE)
  10040. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
  10041. compile_matchingpath(common, common->start, ccend, &rootbacktrack);
  10042. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  10043. {
  10044. sljit_free_compiler(compiler);
  10045. SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
  10046. SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
  10047. free_read_only_data(common->read_only_data_head, compiler->allocator_data);
  10048. return;
  10049. }
  10050. if (common->might_be_empty)
  10051. {
  10052. empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
  10053. empty_match_found_label = LABEL();
  10054. }
  10055. common->accept_label = LABEL();
  10056. if (common->accept != NULL)
  10057. set_jumps(common->accept, common->accept_label);
  10058. /* This means we have a match. Update the ovector. */
  10059. copy_ovector(common, re->top_bracket + 1);
  10060. common->quit_label = common->forced_quit_label = LABEL();
  10061. if (common->quit != NULL)
  10062. set_jumps(common->quit, common->quit_label);
  10063. if (common->forced_quit != NULL)
  10064. set_jumps(common->forced_quit, common->forced_quit_label);
  10065. if (minlength_check_failed != NULL)
  10066. SET_LABEL(minlength_check_failed, common->forced_quit_label);
  10067. sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
  10068. if (mode != JIT_COMPILE)
  10069. {
  10070. common->partialmatchlabel = LABEL();
  10071. set_jumps(common->partialmatch, common->partialmatchlabel);
  10072. return_with_partial_match(common, common->quit_label);
  10073. }
  10074. if (common->might_be_empty)
  10075. empty_match_backtrack_label = LABEL();
  10076. compile_backtrackingpath(common, rootbacktrack.top);
  10077. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  10078. {
  10079. sljit_free_compiler(compiler);
  10080. SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
  10081. SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
  10082. free_read_only_data(common->read_only_data_head, compiler->allocator_data);
  10083. return;
  10084. }
  10085. SLJIT_ASSERT(rootbacktrack.prev == NULL);
  10086. reset_match_label = LABEL();
  10087. if (mode == JIT_PARTIAL_SOFT_COMPILE)
  10088. {
  10089. /* Update hit_start only in the first time. */
  10090. jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
  10091. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
  10092. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
  10093. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
  10094. JUMPHERE(jump);
  10095. }
  10096. /* Check we have remaining characters. */
  10097. if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
  10098. {
  10099. SLJIT_ASSERT(common->match_end_ptr != 0);
  10100. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
  10101. }
  10102. OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
  10103. (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
  10104. if ((re->options & PCRE_ANCHORED) == 0)
  10105. {
  10106. if (common->ff_newline_shortcut != NULL)
  10107. {
  10108. if ((re->options & PCRE_FIRSTLINE) == 0)
  10109. CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
  10110. /* There cannot be more newlines here. */
  10111. }
  10112. else
  10113. CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label);
  10114. }
  10115. /* No more remaining characters. */
  10116. if (reqbyte_notfound != NULL)
  10117. JUMPHERE(reqbyte_notfound);
  10118. if (mode == JIT_PARTIAL_SOFT_COMPILE)
  10119. CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
  10120. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
  10121. JUMPTO(SLJIT_JUMP, common->quit_label);
  10122. flush_stubs(common);
  10123. if (common->might_be_empty)
  10124. {
  10125. JUMPHERE(empty_match);
  10126. OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
  10127. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
  10128. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label);
  10129. OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
  10130. CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label);
  10131. OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
  10132. CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
  10133. JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
  10134. }
  10135. common->fast_forward_bc_ptr = NULL;
  10136. common->fast_fail_start_ptr = 0;
  10137. common->fast_fail_end_ptr = 0;
  10138. common->currententry = common->entries;
  10139. common->local_exit = TRUE;
  10140. quit_label = common->quit_label;
  10141. while (common->currententry != NULL)
  10142. {
  10143. /* Might add new entries. */
  10144. compile_recurse(common);
  10145. if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
  10146. {
  10147. sljit_free_compiler(compiler);
  10148. SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
  10149. SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
  10150. free_read_only_data(common->read_only_data_head, compiler->allocator_data);
  10151. return;
  10152. }
  10153. flush_stubs(common);
  10154. common->currententry = common->currententry->next;
  10155. }
  10156. common->local_exit = FALSE;
  10157. common->quit_label = quit_label;
  10158. /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
  10159. /* This is a (really) rare case. */
  10160. set_jumps(common->stackalloc, LABEL());
  10161. /* RETURN_ADDR is not a saved register. */
  10162. sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  10163. SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1);
  10164. OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STACK_TOP, 0);
  10165. OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
  10166. OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
  10167. OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
  10168. OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
  10169. sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
  10170. jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
  10171. OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
  10172. OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
  10173. OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
  10174. OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
  10175. sljit_emit_fast_return(compiler, TMP1, 0);
  10176. /* Allocation failed. */
  10177. JUMPHERE(jump);
  10178. /* We break the return address cache here, but this is a really rare case. */
  10179. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
  10180. JUMPTO(SLJIT_JUMP, common->quit_label);
  10181. /* Call limit reached. */
  10182. set_jumps(common->calllimit, LABEL());
  10183. OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
  10184. JUMPTO(SLJIT_JUMP, common->quit_label);
  10185. if (common->revertframes != NULL)
  10186. {
  10187. set_jumps(common->revertframes, LABEL());
  10188. do_revertframes(common);
  10189. }
  10190. if (common->wordboundary != NULL)
  10191. {
  10192. set_jumps(common->wordboundary, LABEL());
  10193. check_wordboundary(common);
  10194. }
  10195. if (common->anynewline != NULL)
  10196. {
  10197. set_jumps(common->anynewline, LABEL());
  10198. check_anynewline(common);
  10199. }
  10200. if (common->hspace != NULL)
  10201. {
  10202. set_jumps(common->hspace, LABEL());
  10203. check_hspace(common);
  10204. }
  10205. if (common->vspace != NULL)
  10206. {
  10207. set_jumps(common->vspace, LABEL());
  10208. check_vspace(common);
  10209. }
  10210. if (common->casefulcmp != NULL)
  10211. {
  10212. set_jumps(common->casefulcmp, LABEL());
  10213. do_casefulcmp(common);
  10214. }
  10215. if (common->caselesscmp != NULL)
  10216. {
  10217. set_jumps(common->caselesscmp, LABEL());
  10218. do_caselesscmp(common);
  10219. }
  10220. if (common->reset_match != NULL)
  10221. {
  10222. set_jumps(common->reset_match, LABEL());
  10223. do_reset_match(common, (re->top_bracket + 1) * 2);
  10224. CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
  10225. OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
  10226. JUMPTO(SLJIT_JUMP, reset_match_label);
  10227. }
  10228. #ifdef SUPPORT_UTF
  10229. #ifdef COMPILE_PCRE8
  10230. if (common->utfreadchar != NULL)
  10231. {
  10232. set_jumps(common->utfreadchar, LABEL());
  10233. do_utfreadchar(common);
  10234. }
  10235. if (common->utfreadchar16 != NULL)
  10236. {
  10237. set_jumps(common->utfreadchar16, LABEL());
  10238. do_utfreadchar16(common);
  10239. }
  10240. if (common->utfreadtype8 != NULL)
  10241. {
  10242. set_jumps(common->utfreadtype8, LABEL());
  10243. do_utfreadtype8(common);
  10244. }
  10245. #endif /* COMPILE_PCRE8 */
  10246. #endif /* SUPPORT_UTF */
  10247. #ifdef SUPPORT_UCP
  10248. if (common->getucd != NULL)
  10249. {
  10250. set_jumps(common->getucd, LABEL());
  10251. do_getucd(common);
  10252. }
  10253. #endif
  10254. SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data);
  10255. SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data);
  10256. executable_func = sljit_generate_code(compiler);
  10257. executable_size = sljit_get_generated_code_size(compiler);
  10258. label_addr = common->label_addrs;
  10259. while (label_addr != NULL)
  10260. {
  10261. *label_addr->update_addr = sljit_get_label_addr(label_addr->label);
  10262. label_addr = label_addr->next;
  10263. }
  10264. sljit_free_compiler(compiler);
  10265. if (executable_func == NULL)
  10266. {
  10267. free_read_only_data(common->read_only_data_head, compiler->allocator_data);
  10268. return;
  10269. }
  10270. /* Reuse the function descriptor if possible. */
  10271. if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
  10272. functions = (executable_functions *)extra->executable_jit;
  10273. else
  10274. {
  10275. /* Note: If your memory-checker has flagged the allocation below as a
  10276. * memory leak, it is probably because you either forgot to call
  10277. * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or
  10278. * pcre16_extra) object, or you called said function after having
  10279. * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field
  10280. * of the object. (The function will only free the JIT data if the
  10281. * bit remains set, as the bit indicates that the pointer to the data
  10282. * is valid.)
  10283. */
  10284. functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data);
  10285. if (functions == NULL)
  10286. {
  10287. /* This case is highly unlikely since we just recently
  10288. freed a lot of memory. Not impossible though. */
  10289. sljit_free_code(executable_func);
  10290. free_read_only_data(common->read_only_data_head, compiler->allocator_data);
  10291. return;
  10292. }
  10293. memset(functions, 0, sizeof(executable_functions));
  10294. functions->top_bracket = (re->top_bracket + 1) * 2;
  10295. functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0;
  10296. extra->executable_jit = functions;
  10297. extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
  10298. }
  10299. functions->executable_funcs[mode] = executable_func;
  10300. functions->read_only_data_heads[mode] = common->read_only_data_head;
  10301. functions->executable_sizes[mode] = executable_size;
  10302. }
  10303. static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func)
  10304. {
  10305. union {
  10306. void *executable_func;
  10307. jit_function call_executable_func;
  10308. } convert_executable_func;
  10309. sljit_u8 local_space[MACHINE_STACK_SIZE];
  10310. struct sljit_stack local_stack;
  10311. local_stack.min_start = local_space;
  10312. local_stack.start = local_space;
  10313. local_stack.end = local_space + MACHINE_STACK_SIZE;
  10314. local_stack.top = local_space + MACHINE_STACK_SIZE;
  10315. arguments->stack = &local_stack;
  10316. convert_executable_func.executable_func = executable_func;
  10317. return convert_executable_func.call_executable_func(arguments);
  10318. }
  10319. int
  10320. PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
  10321. int length, int start_offset, int options, int *offsets, int offset_count)
  10322. {
  10323. executable_functions *functions = (executable_functions *)extra_data->executable_jit;
  10324. union {
  10325. void *executable_func;
  10326. jit_function call_executable_func;
  10327. } convert_executable_func;
  10328. jit_arguments arguments;
  10329. int max_offset_count;
  10330. int retval;
  10331. int mode = JIT_COMPILE;
  10332. if ((options & PCRE_PARTIAL_HARD) != 0)
  10333. mode = JIT_PARTIAL_HARD_COMPILE;
  10334. else if ((options & PCRE_PARTIAL_SOFT) != 0)
  10335. mode = JIT_PARTIAL_SOFT_COMPILE;
  10336. if (functions->executable_funcs[mode] == NULL)
  10337. return PCRE_ERROR_JIT_BADOPTION;
  10338. /* Sanity checks should be handled by pcre_exec. */
  10339. arguments.str = subject + start_offset;
  10340. arguments.begin = subject;
  10341. arguments.end = subject + length;
  10342. arguments.mark_ptr = NULL;
  10343. /* JIT decreases this value less frequently than the interpreter. */
  10344. arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
  10345. if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
  10346. arguments.limit_match = functions->limit_match;
  10347. arguments.notbol = (options & PCRE_NOTBOL) != 0;
  10348. arguments.noteol = (options & PCRE_NOTEOL) != 0;
  10349. arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
  10350. arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
  10351. arguments.offsets = offsets;
  10352. arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
  10353. arguments.real_offset_count = offset_count;
  10354. /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
  10355. the output vector for storing captured strings, with the remainder used as
  10356. workspace. We don't need the workspace here. For compatibility, we limit the
  10357. number of captured strings in the same way as pcre_exec(), so that the user
  10358. gets the same result with and without JIT. */
  10359. if (offset_count != 2)
  10360. offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
  10361. max_offset_count = functions->top_bracket;
  10362. if (offset_count > max_offset_count)
  10363. offset_count = max_offset_count;
  10364. arguments.offset_count = offset_count;
  10365. if (functions->callback)
  10366. arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata);
  10367. else
  10368. arguments.stack = (struct sljit_stack *)functions->userdata;
  10369. if (arguments.stack == NULL)
  10370. retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]);
  10371. else
  10372. {
  10373. convert_executable_func.executable_func = functions->executable_funcs[mode];
  10374. retval = convert_executable_func.call_executable_func(&arguments);
  10375. }
  10376. if (retval * 2 > offset_count)
  10377. retval = 0;
  10378. if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
  10379. *(extra_data->mark) = arguments.mark_ptr;
  10380. return retval;
  10381. }
  10382. #if defined COMPILE_PCRE8
  10383. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  10384. pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
  10385. PCRE_SPTR subject, int length, int start_offset, int options,
  10386. int *offsets, int offset_count, pcre_jit_stack *stack)
  10387. #elif defined COMPILE_PCRE16
  10388. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  10389. pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
  10390. PCRE_SPTR16 subject, int length, int start_offset, int options,
  10391. int *offsets, int offset_count, pcre16_jit_stack *stack)
  10392. #elif defined COMPILE_PCRE32
  10393. PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
  10394. pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
  10395. PCRE_SPTR32 subject, int length, int start_offset, int options,
  10396. int *offsets, int offset_count, pcre32_jit_stack *stack)
  10397. #endif
  10398. {
  10399. pcre_uchar *subject_ptr = (pcre_uchar *)subject;
  10400. executable_functions *functions = (executable_functions *)extra_data->executable_jit;
  10401. union {
  10402. void *executable_func;
  10403. jit_function call_executable_func;
  10404. } convert_executable_func;
  10405. jit_arguments arguments;
  10406. int max_offset_count;
  10407. int retval;
  10408. int mode = JIT_COMPILE;
  10409. SLJIT_UNUSED_ARG(argument_re);
  10410. /* Plausibility checks */
  10411. if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
  10412. if ((options & PCRE_PARTIAL_HARD) != 0)
  10413. mode = JIT_PARTIAL_HARD_COMPILE;
  10414. else if ((options & PCRE_PARTIAL_SOFT) != 0)
  10415. mode = JIT_PARTIAL_SOFT_COMPILE;
  10416. if (functions == NULL || functions->executable_funcs[mode] == NULL)
  10417. return PCRE_ERROR_JIT_BADOPTION;
  10418. /* Sanity checks should be handled by pcre_exec. */
  10419. arguments.stack = (struct sljit_stack *)stack;
  10420. arguments.str = subject_ptr + start_offset;
  10421. arguments.begin = subject_ptr;
  10422. arguments.end = subject_ptr + length;
  10423. arguments.mark_ptr = NULL;
  10424. /* JIT decreases this value less frequently than the interpreter. */
  10425. arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit);
  10426. if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match)
  10427. arguments.limit_match = functions->limit_match;
  10428. arguments.notbol = (options & PCRE_NOTBOL) != 0;
  10429. arguments.noteol = (options & PCRE_NOTEOL) != 0;
  10430. arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
  10431. arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
  10432. arguments.offsets = offsets;
  10433. arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL;
  10434. arguments.real_offset_count = offset_count;
  10435. /* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of
  10436. the output vector for storing captured strings, with the remainder used as
  10437. workspace. We don't need the workspace here. For compatibility, we limit the
  10438. number of captured strings in the same way as pcre_exec(), so that the user
  10439. gets the same result with and without JIT. */
  10440. if (offset_count != 2)
  10441. offset_count = ((offset_count - (offset_count % 3)) * 2) / 3;
  10442. max_offset_count = functions->top_bracket;
  10443. if (offset_count > max_offset_count)
  10444. offset_count = max_offset_count;
  10445. arguments.offset_count = offset_count;
  10446. convert_executable_func.executable_func = functions->executable_funcs[mode];
  10447. retval = convert_executable_func.call_executable_func(&arguments);
  10448. if (retval * 2 > offset_count)
  10449. retval = 0;
  10450. if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
  10451. *(extra_data->mark) = arguments.mark_ptr;
  10452. return retval;
  10453. }
  10454. void
  10455. PRIV(jit_free)(void *executable_funcs)
  10456. {
  10457. int i;
  10458. executable_functions *functions = (executable_functions *)executable_funcs;
  10459. for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
  10460. {
  10461. if (functions->executable_funcs[i] != NULL)
  10462. sljit_free_code(functions->executable_funcs[i]);
  10463. free_read_only_data(functions->read_only_data_heads[i], NULL);
  10464. }
  10465. SLJIT_FREE(functions, compiler->allocator_data);
  10466. }
  10467. int
  10468. PRIV(jit_get_size)(void *executable_funcs)
  10469. {
  10470. int i;
  10471. sljit_uw size = 0;
  10472. sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
  10473. for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
  10474. size += executable_sizes[i];
  10475. return (int)size;
  10476. }
  10477. const char*
  10478. PRIV(jit_get_target)(void)
  10479. {
  10480. return sljit_get_platform_name();
  10481. }
  10482. #if defined COMPILE_PCRE8
  10483. PCRE_EXP_DECL pcre_jit_stack *
  10484. pcre_jit_stack_alloc(int startsize, int maxsize)
  10485. #elif defined COMPILE_PCRE16
  10486. PCRE_EXP_DECL pcre16_jit_stack *
  10487. pcre16_jit_stack_alloc(int startsize, int maxsize)
  10488. #elif defined COMPILE_PCRE32
  10489. PCRE_EXP_DECL pcre32_jit_stack *
  10490. pcre32_jit_stack_alloc(int startsize, int maxsize)
  10491. #endif
  10492. {
  10493. if (startsize < 1 || maxsize < 1)
  10494. return NULL;
  10495. if (startsize > maxsize)
  10496. startsize = maxsize;
  10497. startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
  10498. maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
  10499. return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL);
  10500. }
  10501. #if defined COMPILE_PCRE8
  10502. PCRE_EXP_DECL void
  10503. pcre_jit_stack_free(pcre_jit_stack *stack)
  10504. #elif defined COMPILE_PCRE16
  10505. PCRE_EXP_DECL void
  10506. pcre16_jit_stack_free(pcre16_jit_stack *stack)
  10507. #elif defined COMPILE_PCRE32
  10508. PCRE_EXP_DECL void
  10509. pcre32_jit_stack_free(pcre32_jit_stack *stack)
  10510. #endif
  10511. {
  10512. sljit_free_stack((struct sljit_stack *)stack, NULL);
  10513. }
  10514. #if defined COMPILE_PCRE8
  10515. PCRE_EXP_DECL void
  10516. pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
  10517. #elif defined COMPILE_PCRE16
  10518. PCRE_EXP_DECL void
  10519. pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
  10520. #elif defined COMPILE_PCRE32
  10521. PCRE_EXP_DECL void
  10522. pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
  10523. #endif
  10524. {
  10525. executable_functions *functions;
  10526. if (extra != NULL &&
  10527. (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
  10528. extra->executable_jit != NULL)
  10529. {
  10530. functions = (executable_functions *)extra->executable_jit;
  10531. functions->callback = callback;
  10532. functions->userdata = userdata;
  10533. }
  10534. }
  10535. #if defined COMPILE_PCRE8
  10536. PCRE_EXP_DECL void
  10537. pcre_jit_free_unused_memory(void)
  10538. #elif defined COMPILE_PCRE16
  10539. PCRE_EXP_DECL void
  10540. pcre16_jit_free_unused_memory(void)
  10541. #elif defined COMPILE_PCRE32
  10542. PCRE_EXP_DECL void
  10543. pcre32_jit_free_unused_memory(void)
  10544. #endif
  10545. {
  10546. sljit_free_unused_memory_exec();
  10547. }
  10548. #else /* SUPPORT_JIT */
  10549. /* These are dummy functions to avoid linking errors when JIT support is not
  10550. being compiled. */
  10551. #if defined COMPILE_PCRE8
  10552. PCRE_EXP_DECL pcre_jit_stack *
  10553. pcre_jit_stack_alloc(int startsize, int maxsize)
  10554. #elif defined COMPILE_PCRE16
  10555. PCRE_EXP_DECL pcre16_jit_stack *
  10556. pcre16_jit_stack_alloc(int startsize, int maxsize)
  10557. #elif defined COMPILE_PCRE32
  10558. PCRE_EXP_DECL pcre32_jit_stack *
  10559. pcre32_jit_stack_alloc(int startsize, int maxsize)
  10560. #endif
  10561. {
  10562. (void)startsize;
  10563. (void)maxsize;
  10564. return NULL;
  10565. }
  10566. #if defined COMPILE_PCRE8
  10567. PCRE_EXP_DECL void
  10568. pcre_jit_stack_free(pcre_jit_stack *stack)
  10569. #elif defined COMPILE_PCRE16
  10570. PCRE_EXP_DECL void
  10571. pcre16_jit_stack_free(pcre16_jit_stack *stack)
  10572. #elif defined COMPILE_PCRE32
  10573. PCRE_EXP_DECL void
  10574. pcre32_jit_stack_free(pcre32_jit_stack *stack)
  10575. #endif
  10576. {
  10577. (void)stack;
  10578. }
  10579. #if defined COMPILE_PCRE8
  10580. PCRE_EXP_DECL void
  10581. pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
  10582. #elif defined COMPILE_PCRE16
  10583. PCRE_EXP_DECL void
  10584. pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata)
  10585. #elif defined COMPILE_PCRE32
  10586. PCRE_EXP_DECL void
  10587. pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata)
  10588. #endif
  10589. {
  10590. (void)extra;
  10591. (void)callback;
  10592. (void)userdata;
  10593. }
  10594. #if defined COMPILE_PCRE8
  10595. PCRE_EXP_DECL void
  10596. pcre_jit_free_unused_memory(void)
  10597. #elif defined COMPILE_PCRE16
  10598. PCRE_EXP_DECL void
  10599. pcre16_jit_free_unused_memory(void)
  10600. #elif defined COMPILE_PCRE32
  10601. PCRE_EXP_DECL void
  10602. pcre32_jit_free_unused_memory(void)
  10603. #endif
  10604. {
  10605. }
  10606. #endif
  10607. /* End of pcre_jit_compile.c */