common.h 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. /*
  2. * common.h --
  3. *
  4. * Copyright (c) 2007-2010, Dmitry Prokoptsev <dprokoptsev@gmail.com>,
  5. * Alexander Gololobov <agololobov@gmail.com>
  6. *
  7. * This file is part of Pire, the Perl Incompatible
  8. * Regular Expressions library.
  9. *
  10. * Pire is free software: you can redistribute it and/or modify
  11. * it under the terms of the GNU Lesser Public License as published by
  12. * the Free Software Foundation, either version 3 of the License, or
  13. * (at your option) any later version.
  14. *
  15. * Pire is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU Lesser Public License for more details.
  19. * You should have received a copy of the GNU Lesser Public License
  20. * along with Pire. If not, see <http://www.gnu.org/licenses>.
  21. */
  22. #ifndef PIRE_TEST_COMMON_H_INCLUDED
  23. #define PIRE_TEST_COMMON_H_INCLUDED
  24. #include <stdio.h>
  25. #include <pire.h>
  26. #include <stub/stl.h>
  27. #include <stub/defaults.h>
  28. #include <stub/lexical_cast.h>
  29. #include "stub/cppunit.h"
  30. using namespace Pire;
  31. /*****************************************************************************
  32. * Helpers
  33. *****************************************************************************/
  34. inline Pire::Fsm ParseRegexp(const char* str, const char* options = "", const Pire::Encoding** enc = 0)
  35. {
  36. Pire::Lexer lexer;
  37. TVector<wchar32> ucs4;
  38. bool surround = true;
  39. for (; *options; ++options) {
  40. if (*options == 'i')
  41. lexer.AddFeature(Pire::Features::CaseInsensitive());
  42. else if (*options == 'u')
  43. lexer.SetEncoding(Pire::Encodings::Utf8());
  44. else if (*options == 'n')
  45. surround = false;
  46. else if (*options == 'a')
  47. lexer.AddFeature(Pire::Features::AndNotSupport());
  48. else
  49. throw std::invalid_argument("Unknown option: " + ystring(1, *options));
  50. }
  51. if (enc)
  52. *enc = &lexer.Encoding();
  53. lexer.Encoding().FromLocal(str, str + strlen(str), std::back_inserter(ucs4));
  54. lexer.Assign(ucs4.begin(), ucs4.end());
  55. Pire::Fsm fsm = lexer.Parse();
  56. if (surround)
  57. fsm.Surround();
  58. return fsm;
  59. }
  60. inline bool HasError(const char* regexp) {
  61. try {
  62. ParseRegexp(regexp);
  63. return false;
  64. } catch (Pire::Error& ex) {
  65. return true;
  66. }
  67. }
  68. struct Scanners {
  69. Pire::Scanner fast;
  70. Pire::NonrelocScanner nonreloc;
  71. Pire::SimpleScanner simple;
  72. Pire::SlowScanner slow;
  73. Pire::ScannerNoMask fastNoMask;
  74. Pire::NonrelocScannerNoMask nonrelocNoMask;
  75. Pire::HalfFinalScanner halfFinal;
  76. Pire::HalfFinalScannerNoMask halfFinalNoMask;
  77. Pire::NonrelocHalfFinalScanner nonrelocHalfFinal;
  78. Pire::NonrelocHalfFinalScannerNoMask nonrelocHalfFinalNoMask;
  79. Scanners(const Pire::Fsm& fsm, size_t distance = 0)
  80. : fast(Pire::Fsm(fsm).Compile<Pire::Scanner>(distance))
  81. , nonreloc(Pire::Fsm(fsm).Compile<Pire::NonrelocScanner>(distance))
  82. , simple(Pire::Fsm(fsm).Compile<Pire::SimpleScanner>(distance))
  83. , slow(Pire::Fsm(fsm).Compile<Pire::SlowScanner>(distance))
  84. , fastNoMask(Pire::Fsm(fsm).Compile<Pire::ScannerNoMask>(distance))
  85. , nonrelocNoMask(Pire::Fsm(fsm).Compile<Pire::NonrelocScannerNoMask>(distance))
  86. , halfFinal(Pire::Fsm(fsm).Compile<Pire::HalfFinalScanner>(distance))
  87. , halfFinalNoMask(Pire::Fsm(fsm).Compile<Pire::HalfFinalScannerNoMask>(distance))
  88. , nonrelocHalfFinal(Pire::Fsm(fsm).Compile<Pire::NonrelocHalfFinalScanner>(distance))
  89. , nonrelocHalfFinalNoMask(Pire::Fsm(fsm).Compile<Pire::NonrelocHalfFinalScannerNoMask>(distance))
  90. {}
  91. Scanners(const char* str, const char* options = "")
  92. {
  93. Pire::Fsm fsm = ParseRegexp(str, options);
  94. fast = Pire::Fsm(fsm).Compile<Pire::Scanner>();
  95. nonreloc = Pire::Fsm(fsm).Compile<Pire::NonrelocScanner>();
  96. simple = Pire::Fsm(fsm).Compile<Pire::SimpleScanner>();
  97. slow = Pire::Fsm(fsm).Compile<Pire::SlowScanner>();
  98. fastNoMask = Pire::Fsm(fsm).Compile<Pire::ScannerNoMask>();
  99. nonrelocNoMask = Pire::Fsm(fsm).Compile<Pire::NonrelocScannerNoMask>();
  100. halfFinal = Pire::Fsm(fsm).Compile<Pire::HalfFinalScanner>();
  101. halfFinalNoMask = Pire::Fsm(fsm).Compile<Pire::HalfFinalScannerNoMask>();
  102. nonrelocHalfFinal = Pire::Fsm(fsm).Compile<Pire::NonrelocHalfFinalScanner>();
  103. nonrelocHalfFinalNoMask = Pire::Fsm(fsm).Compile<Pire::NonrelocHalfFinalScannerNoMask>();
  104. }
  105. };
  106. #ifdef PIRE_DEBUG
  107. template <class Scanner>
  108. inline ystring DbgState(const Scanner& scanner, typename Scanner::State state)
  109. {
  110. return ToString(scanner.StateIndex(state)) + (scanner.Final(state) ? ystring(" [final]") : ystring());
  111. }
  112. /*
  113. inline ystring DbgState(const Pire::SimpleScanner& scanner, Pire::SimpleScanner::State state)
  114. {
  115. return ToString(scanner.StateIndex(state)) + (scanner.Final(state) ? ystring(" [final]") : ystring());
  116. }
  117. */
  118. inline ystring DbgState(const Pire::SlowScanner& scanner, const Pire::SlowScanner::State& state)
  119. {
  120. return ystring("(") + Join(state.states.begin(), state.states.end(), ", ") + ystring(")") + (scanner.Final(state) ? ystring(" [final]") : ystring());
  121. }
  122. template<class Scanner>
  123. void DbgRun(const Scanner& scanner, typename Scanner::State& state, const char* begin, const char* end)
  124. {
  125. for (; begin != end; ++begin) {
  126. char tmp[8];
  127. if (*begin >= 32) {
  128. tmp[0] = *begin;
  129. tmp[1] = 0;
  130. } else
  131. snprintf(tmp, sizeof(tmp)-1, "\\%03o", (unsigned char) *begin);
  132. std::clog << DbgState(scanner, state) << " --[" << tmp << "]--> ";
  133. scanner.Next(state, (unsigned char) *begin);
  134. std::clog << DbgState(scanner, state) << "\n";
  135. }
  136. }
  137. #define Run DbgRun
  138. #endif
  139. template<class Scanner>
  140. typename Scanner::State RunRegexp(const Scanner& scanner, const ystring& str)
  141. {
  142. PIRE_IFDEBUG(std::clog << "--- checking against " << str << "\n");
  143. typename Scanner::State state;
  144. scanner.Initialize(state);
  145. Step(scanner, state, BeginMark);
  146. Run(scanner, state, str.c_str(), str.c_str() + str.length());
  147. Step(scanner, state, EndMark);
  148. return state;
  149. }
  150. template<class Scanner>
  151. typename Scanner::State RunRegexp(const Scanner& scanner, const char* str)
  152. {
  153. return RunRegexp(scanner, ystring(str));
  154. }
  155. template<class Scanner>
  156. bool Matches(const Scanner& scanner, const ystring& str)
  157. {
  158. auto state = RunRegexp(scanner, str);
  159. auto result = scanner.AcceptedRegexps(state);
  160. return result.first != result.second;
  161. }
  162. template<class Scanner>
  163. bool Matches(const Scanner& scanner, const char* str)
  164. {
  165. return Matches(scanner, ystring(str));
  166. }
  167. #define SCANNER(fsm) for (Scanners m_scanners(fsm), *m_flag = &m_scanners; m_flag; m_flag = 0)
  168. #define APPROXIMATE_SCANNER(fsm, distance) for (Scanners m_scanners(fsm, distance), *m_flag = &m_scanners; m_flag; m_flag = 0)
  169. #define REGEXP(pattern) for (Scanners m_scanners(pattern), *m_flag = &m_scanners; m_flag; m_flag = 0)
  170. #define REGEXP2(pattern,flags) for (Scanners m_scanners(pattern, flags), *m_flag = &m_scanners; m_flag; m_flag = 0)
  171. #define ACCEPTS(str) \
  172. do {\
  173. UNIT_ASSERT(Matches(m_scanners.fast, str));\
  174. UNIT_ASSERT(Matches(m_scanners.nonreloc, str));\
  175. UNIT_ASSERT(Matches(m_scanners.simple, str));\
  176. UNIT_ASSERT(Matches(m_scanners.slow, str));\
  177. UNIT_ASSERT(Matches(m_scanners.fastNoMask, str));\
  178. UNIT_ASSERT(Matches(m_scanners.nonrelocNoMask, str));\
  179. UNIT_ASSERT(Matches(m_scanners.halfFinal, str));\
  180. UNIT_ASSERT(Matches(m_scanners.halfFinalNoMask, str));\
  181. UNIT_ASSERT(Matches(m_scanners.nonrelocHalfFinal, str));\
  182. UNIT_ASSERT(Matches(m_scanners.nonrelocHalfFinalNoMask, str));\
  183. } while (false)
  184. #define DENIES(str) \
  185. do {\
  186. UNIT_ASSERT(!Matches(m_scanners.fast, str));\
  187. UNIT_ASSERT(!Matches(m_scanners.nonreloc, str));\
  188. UNIT_ASSERT(!Matches(m_scanners.simple, str));\
  189. UNIT_ASSERT(!Matches(m_scanners.slow, str));\
  190. UNIT_ASSERT(!Matches(m_scanners.fastNoMask, str));\
  191. UNIT_ASSERT(!Matches(m_scanners.nonrelocNoMask, str));\
  192. UNIT_ASSERT(!Matches(m_scanners.halfFinal, str));\
  193. UNIT_ASSERT(!Matches(m_scanners.halfFinalNoMask, str));\
  194. UNIT_ASSERT(!Matches(m_scanners.nonrelocHalfFinal, str));\
  195. UNIT_ASSERT(!Matches(m_scanners.nonrelocHalfFinalNoMask, str));\
  196. } while (false)
  197. #endif