sre.h 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. /*
  2. * Secret Labs' Regular Expression Engine
  3. *
  4. * regular expression matching engine
  5. *
  6. * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
  7. *
  8. * See the sre.c file for information on usage and redistribution.
  9. */
  10. #ifndef SRE_INCLUDED
  11. #define SRE_INCLUDED
  12. #include "sre_constants.h"
  13. /* size of a code word (must be unsigned short or larger, and
  14. large enough to hold a UCS4 character) */
  15. #define SRE_CODE Py_UCS4
  16. #if SIZEOF_SIZE_T > 4
  17. # define SRE_MAXREPEAT (~(SRE_CODE)0)
  18. # define SRE_MAXGROUPS ((SRE_CODE)INT32_MAX / 2)
  19. #else
  20. # define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
  21. # define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_VOID_P / 2)
  22. #endif
  23. typedef struct {
  24. PyObject_VAR_HEAD
  25. Py_ssize_t groups; /* must be first! */
  26. PyObject* groupindex; /* dict */
  27. PyObject* indexgroup; /* tuple */
  28. /* compatibility */
  29. PyObject* pattern; /* pattern source (or None) */
  30. int flags; /* flags used when compiling pattern source */
  31. PyObject *weakreflist; /* List of weak references */
  32. int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */
  33. #ifdef Py_DEBUG
  34. /* for simulation of user interruption */
  35. int fail_after_count;
  36. PyObject *fail_after_exc;
  37. #endif
  38. /* pattern code */
  39. Py_ssize_t codesize;
  40. SRE_CODE code[1];
  41. } PatternObject;
  42. #define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
  43. typedef struct {
  44. PyObject_VAR_HEAD
  45. PyObject* string; /* link to the target string (must be first) */
  46. PyObject* regs; /* cached list of matching spans */
  47. PatternObject* pattern; /* link to the regex (pattern) object */
  48. Py_ssize_t pos, endpos; /* current target slice */
  49. Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
  50. Py_ssize_t groups; /* number of groups (start/end marks) */
  51. Py_ssize_t mark[1];
  52. } MatchObject;
  53. typedef struct {
  54. PyObject_VAR_HEAD
  55. Py_ssize_t chunks; /* the number of group references and non-NULL literals
  56. * self->chunks <= 2*Py_SIZE(self) + 1 */
  57. PyObject *literal;
  58. struct {
  59. Py_ssize_t index;
  60. PyObject *literal; /* NULL if empty */
  61. } items[0];
  62. } TemplateObject;
  63. typedef struct SRE_REPEAT_T {
  64. Py_ssize_t count;
  65. const SRE_CODE* pattern; /* points to REPEAT operator arguments */
  66. const void* last_ptr; /* helper to check for infinite loops */
  67. struct SRE_REPEAT_T *prev; /* points to previous repeat context */
  68. /* for SRE_REPEAT pool */
  69. struct SRE_REPEAT_T *pool_prev;
  70. struct SRE_REPEAT_T *pool_next;
  71. } SRE_REPEAT;
  72. typedef struct {
  73. /* string pointers */
  74. const void* ptr; /* current position (also end of current slice) */
  75. const void* beginning; /* start of original string */
  76. const void* start; /* start of current slice */
  77. const void* end; /* end of original string */
  78. /* attributes for the match object */
  79. PyObject* string;
  80. Py_buffer buffer;
  81. Py_ssize_t pos, endpos;
  82. int isbytes;
  83. int charsize; /* character size */
  84. int match_all;
  85. int must_advance;
  86. /* marks */
  87. int lastmark;
  88. int lastindex;
  89. const void** mark;
  90. /* dynamically allocated stuff */
  91. char* data_stack;
  92. size_t data_stack_size;
  93. size_t data_stack_base;
  94. /* current repeat context */
  95. SRE_REPEAT *repeat;
  96. /* SRE_REPEAT pool */
  97. SRE_REPEAT *repeat_pool_used;
  98. SRE_REPEAT *repeat_pool_unused;
  99. unsigned int sigcount;
  100. #ifdef Py_DEBUG
  101. int fail_after_count;
  102. PyObject *fail_after_exc;
  103. #endif
  104. } SRE_STATE;
  105. typedef struct {
  106. PyObject_HEAD
  107. PatternObject* pattern;
  108. SRE_STATE state;
  109. int executing;
  110. } ScannerObject;
  111. #endif