pcre.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671
  1. // Copyright 2003-2010 Google Inc. All Rights Reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. #ifndef UTIL_PCRE_H_
  5. #define UTIL_PCRE_H_
  6. // This is a variant of PCRE's pcrecpp.h, originally written at Google.
  7. // The main changes are the addition of the HitLimit method and
  8. // compilation as PCRE in namespace re2.
  9. // C++ interface to the pcre regular-expression library. PCRE supports
  10. // Perl-style regular expressions (with extensions like \d, \w, \s,
  11. // ...).
  12. //
  13. // -----------------------------------------------------------------------
  14. // REGEXP SYNTAX:
  15. //
  16. // This module uses the pcre library and hence supports its syntax
  17. // for regular expressions:
  18. //
  19. // http://www.google.com/search?q=pcre
  20. //
  21. // The syntax is pretty similar to Perl's. For those not familiar
  22. // with Perl's regular expressions, here are some examples of the most
  23. // commonly used extensions:
  24. //
  25. // "hello (\\w+) world" -- \w matches a "word" character
  26. // "version (\\d+)" -- \d matches a digit
  27. // "hello\\s+world" -- \s matches any whitespace character
  28. // "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
  29. // "(?i)hello" -- (?i) turns on case-insensitive matching
  30. // "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
  31. //
  32. // -----------------------------------------------------------------------
  33. // MATCHING INTERFACE:
  34. //
  35. // The "FullMatch" operation checks that supplied text matches a
  36. // supplied pattern exactly.
  37. //
  38. // Example: successful match
  39. // ABSL_CHECK(PCRE::FullMatch("hello", "h.*o"));
  40. //
  41. // Example: unsuccessful match (requires full match):
  42. // ABSL_CHECK(!PCRE::FullMatch("hello", "e"));
  43. //
  44. // -----------------------------------------------------------------------
  45. // UTF-8 AND THE MATCHING INTERFACE:
  46. //
  47. // By default, pattern and text are plain text, one byte per character.
  48. // The UTF8 flag, passed to the constructor, causes both pattern
  49. // and string to be treated as UTF-8 text, still a byte stream but
  50. // potentially multiple bytes per character. In practice, the text
  51. // is likelier to be UTF-8 than the pattern, but the match returned
  52. // may depend on the UTF8 flag, so always use it when matching
  53. // UTF8 text. E.g., "." will match one byte normally but with UTF8
  54. // set may match up to three bytes of a multi-byte character.
  55. //
  56. // Example:
  57. // PCRE re(utf8_pattern, PCRE::UTF8);
  58. // ABSL_CHECK(PCRE::FullMatch(utf8_string, re));
  59. //
  60. // -----------------------------------------------------------------------
  61. // MATCHING WITH SUBSTRING EXTRACTION:
  62. //
  63. // You can supply extra pointer arguments to extract matched substrings.
  64. //
  65. // Example: extracts "ruby" into "s" and 1234 into "i"
  66. // int i;
  67. // std::string s;
  68. // ABSL_CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
  69. //
  70. // Example: fails because string cannot be stored in integer
  71. // ABSL_CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
  72. //
  73. // Example: fails because there aren't enough sub-patterns:
  74. // ABSL_CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
  75. //
  76. // Example: does not try to extract any extra sub-patterns
  77. // ABSL_CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
  78. //
  79. // Example: does not try to extract into NULL
  80. // ABSL_CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
  81. //
  82. // Example: integer overflow causes failure
  83. // ABSL_CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
  84. //
  85. // -----------------------------------------------------------------------
  86. // PARTIAL MATCHES
  87. //
  88. // You can use the "PartialMatch" operation when you want the pattern
  89. // to match any substring of the text.
  90. //
  91. // Example: simple search for a string:
  92. // ABSL_CHECK(PCRE::PartialMatch("hello", "ell"));
  93. //
  94. // Example: find first number in a string
  95. // int number;
  96. // ABSL_CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
  97. // ABSL_CHECK_EQ(number, 100);
  98. //
  99. // -----------------------------------------------------------------------
  100. // PPCRE-COMPILED PCREGULAR EXPPCRESSIONS
  101. //
  102. // PCRE makes it easy to use any string as a regular expression, without
  103. // requiring a separate compilation step.
  104. //
  105. // If speed is of the essence, you can create a pre-compiled "PCRE"
  106. // object from the pattern and use it multiple times. If you do so,
  107. // you can typically parse text faster than with sscanf.
  108. //
  109. // Example: precompile pattern for faster matching:
  110. // PCRE pattern("h.*o");
  111. // while (ReadLine(&str)) {
  112. // if (PCRE::FullMatch(str, pattern)) ...;
  113. // }
  114. //
  115. // -----------------------------------------------------------------------
  116. // SCANNING TEXT INCPCREMENTALLY
  117. //
  118. // The "Consume" operation may be useful if you want to repeatedly
  119. // match regular expressions at the front of a string and skip over
  120. // them as they match. This requires use of the string_view type,
  121. // which represents a sub-range of a real string.
  122. //
  123. // Example: read lines of the form "var = value" from a string.
  124. // std::string contents = ...; // Fill string somehow
  125. // absl::string_view input(contents); // Wrap a string_view around it
  126. //
  127. // std::string var;
  128. // int value;
  129. // while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
  130. // ...;
  131. // }
  132. //
  133. // Each successful call to "Consume" will set "var/value", and also
  134. // advance "input" so it points past the matched text. Note that if the
  135. // regular expression matches an empty string, input will advance
  136. // by 0 bytes. If the regular expression being used might match
  137. // an empty string, the loop body must check for this case and either
  138. // advance the string or break out of the loop.
  139. //
  140. // The "FindAndConsume" operation is similar to "Consume" but does not
  141. // anchor your match at the beginning of the string. For example, you
  142. // could extract all words from a string by repeatedly calling
  143. // PCRE::FindAndConsume(&input, "(\\w+)", &word)
  144. //
  145. // -----------------------------------------------------------------------
  146. // PARSING HEX/OCTAL/C-RADIX NUMBERS
  147. //
  148. // By default, if you pass a pointer to a numeric value, the
  149. // corresponding text is interpreted as a base-10 number. You can
  150. // instead wrap the pointer with a call to one of the operators Hex(),
  151. // Octal(), or CRadix() to interpret the text in another base. The
  152. // CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
  153. // prefixes, but defaults to base-10.
  154. //
  155. // Example:
  156. // int a, b, c, d;
  157. // ABSL_CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
  158. // Octal(&a), Hex(&b), CRadix(&c), CRadix(&d));
  159. // will leave 64 in a, b, c, and d.
  160. #include "absl/strings/string_view.h"
  161. #ifdef USEPCRE
  162. #error #include <pcre.h>
  163. namespace re2 {
  164. const bool UsingPCRE = true;
  165. } // namespace re2
  166. #else
  167. struct pcre; // opaque
  168. namespace re2 {
  169. const bool UsingPCRE = false;
  170. } // namespace re2
  171. #endif
  172. // To produce a DLL, CMake can automatically export code symbols,
  173. // but not data symbols, so we have to annotate those manually...
  174. #if defined(RE2_BUILD_TESTING_DLL)
  175. #define RE2_TESTING_DLL __declspec(dllexport)
  176. #elif defined(RE2_CONSUME_TESTING_DLL)
  177. #define RE2_TESTING_DLL __declspec(dllimport)
  178. #else
  179. #define RE2_TESTING_DLL
  180. #endif
  181. namespace re2 {
  182. class PCRE_Options;
  183. // Interface for regular expression matching. Also corresponds to a
  184. // pre-compiled regular expression. An "PCRE" object is safe for
  185. // concurrent use by multiple threads.
  186. class PCRE {
  187. public:
  188. // We convert user-passed pointers into special Arg objects
  189. class Arg;
  190. // Marks end of arg list.
  191. // ONLY USE IN OPTIONAL ARG DEFAULTS.
  192. // DO NOT PASS EXPLICITLY.
  193. RE2_TESTING_DLL static Arg no_more_args;
  194. // Options are same value as those in pcre. We provide them here
  195. // to avoid users needing to include pcre.h and also to isolate
  196. // users from pcre should we change the underlying library.
  197. // Only those needed by Google programs are exposed here to
  198. // avoid collision with options employed internally by regexp.cc
  199. // Note that some options have equivalents that can be specified in
  200. // the regexp itself. For example, prefixing your regexp with
  201. // "(?s)" has the same effect as the PCRE_DOTALL option.
  202. enum Option {
  203. None = 0x0000,
  204. UTF8 = 0x0800, // == PCRE_UTF8
  205. EnabledCompileOptions = UTF8,
  206. EnabledExecOptions = 0x0000, // TODO: use to replace anchor flag
  207. };
  208. // We provide implicit conversions from strings so that users can
  209. // pass in a string or a "const char*" wherever an "PCRE" is expected.
  210. PCRE(const char* pattern);
  211. PCRE(const char* pattern, Option option);
  212. PCRE(const std::string& pattern);
  213. PCRE(const std::string& pattern, Option option);
  214. PCRE(const char *pattern, const PCRE_Options& re_option);
  215. PCRE(const std::string& pattern, const PCRE_Options& re_option);
  216. ~PCRE();
  217. // The string specification for this PCRE. E.g.
  218. // PCRE re("ab*c?d+");
  219. // re.pattern(); // "ab*c?d+"
  220. const std::string& pattern() const { return pattern_; }
  221. // If PCRE could not be created properly, returns an error string.
  222. // Else returns the empty string.
  223. const std::string& error() const { return *error_; }
  224. // Whether the PCRE has hit a match limit during execution.
  225. // Not thread safe. Intended only for testing.
  226. // If hitting match limits is a problem,
  227. // you should be using PCRE2 (re2/re2.h)
  228. // instead of checking this flag.
  229. bool HitLimit();
  230. void ClearHitLimit();
  231. /***** The useful part: the matching interface *****/
  232. // Matches "text" against "pattern". If pointer arguments are
  233. // supplied, copies matched sub-patterns into them.
  234. //
  235. // You can pass in a "const char*" or a "std::string" for "text".
  236. // You can pass in a "const char*" or a "std::string" or a "PCRE" for "pattern".
  237. //
  238. // The provided pointer arguments can be pointers to any scalar numeric
  239. // type, or one of:
  240. // std::string (matched piece is copied to string)
  241. // absl::string_view (string_view is mutated to point to matched piece)
  242. // T ("bool T::ParseFrom(const char*, size_t)" must exist)
  243. // (void*)NULL (the corresponding matched sub-pattern is not copied)
  244. //
  245. // Returns true iff all of the following conditions are satisfied:
  246. // a. "text" matches "pattern" exactly
  247. // b. The number of matched sub-patterns is >= number of supplied pointers
  248. // c. The "i"th argument has a suitable type for holding the
  249. // string captured as the "i"th sub-pattern. If you pass in
  250. // NULL for the "i"th argument, or pass fewer arguments than
  251. // number of sub-patterns, "i"th captured sub-pattern is
  252. // ignored.
  253. //
  254. // CAVEAT: An optional sub-pattern that does not exist in the
  255. // matched string is assigned the empty string. Therefore, the
  256. // following will return false (because the empty string is not a
  257. // valid number):
  258. // int number;
  259. // PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
  260. struct FullMatchFunctor {
  261. bool operator ()(absl::string_view text, const PCRE& re, // 3..16 args
  262. const Arg& ptr1 = no_more_args,
  263. const Arg& ptr2 = no_more_args,
  264. const Arg& ptr3 = no_more_args,
  265. const Arg& ptr4 = no_more_args,
  266. const Arg& ptr5 = no_more_args,
  267. const Arg& ptr6 = no_more_args,
  268. const Arg& ptr7 = no_more_args,
  269. const Arg& ptr8 = no_more_args,
  270. const Arg& ptr9 = no_more_args,
  271. const Arg& ptr10 = no_more_args,
  272. const Arg& ptr11 = no_more_args,
  273. const Arg& ptr12 = no_more_args,
  274. const Arg& ptr13 = no_more_args,
  275. const Arg& ptr14 = no_more_args,
  276. const Arg& ptr15 = no_more_args,
  277. const Arg& ptr16 = no_more_args) const;
  278. };
  279. RE2_TESTING_DLL static const FullMatchFunctor FullMatch;
  280. // Exactly like FullMatch(), except that "pattern" is allowed to match
  281. // a substring of "text".
  282. struct PartialMatchFunctor {
  283. bool operator ()(absl::string_view text, const PCRE& re, // 3..16 args
  284. const Arg& ptr1 = no_more_args,
  285. const Arg& ptr2 = no_more_args,
  286. const Arg& ptr3 = no_more_args,
  287. const Arg& ptr4 = no_more_args,
  288. const Arg& ptr5 = no_more_args,
  289. const Arg& ptr6 = no_more_args,
  290. const Arg& ptr7 = no_more_args,
  291. const Arg& ptr8 = no_more_args,
  292. const Arg& ptr9 = no_more_args,
  293. const Arg& ptr10 = no_more_args,
  294. const Arg& ptr11 = no_more_args,
  295. const Arg& ptr12 = no_more_args,
  296. const Arg& ptr13 = no_more_args,
  297. const Arg& ptr14 = no_more_args,
  298. const Arg& ptr15 = no_more_args,
  299. const Arg& ptr16 = no_more_args) const;
  300. };
  301. RE2_TESTING_DLL static const PartialMatchFunctor PartialMatch;
  302. // Like FullMatch() and PartialMatch(), except that pattern has to
  303. // match a prefix of "text", and "input" is advanced past the matched
  304. // text. Note: "input" is modified iff this routine returns true.
  305. struct ConsumeFunctor {
  306. bool operator ()(absl::string_view* input, const PCRE& pattern, // 3..16 args
  307. const Arg& ptr1 = no_more_args,
  308. const Arg& ptr2 = no_more_args,
  309. const Arg& ptr3 = no_more_args,
  310. const Arg& ptr4 = no_more_args,
  311. const Arg& ptr5 = no_more_args,
  312. const Arg& ptr6 = no_more_args,
  313. const Arg& ptr7 = no_more_args,
  314. const Arg& ptr8 = no_more_args,
  315. const Arg& ptr9 = no_more_args,
  316. const Arg& ptr10 = no_more_args,
  317. const Arg& ptr11 = no_more_args,
  318. const Arg& ptr12 = no_more_args,
  319. const Arg& ptr13 = no_more_args,
  320. const Arg& ptr14 = no_more_args,
  321. const Arg& ptr15 = no_more_args,
  322. const Arg& ptr16 = no_more_args) const;
  323. };
  324. RE2_TESTING_DLL static const ConsumeFunctor Consume;
  325. // Like Consume(..), but does not anchor the match at the beginning of the
  326. // string. That is, "pattern" need not start its match at the beginning of
  327. // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next
  328. // word in "s" and stores it in "word".
  329. struct FindAndConsumeFunctor {
  330. bool operator ()(absl::string_view* input, const PCRE& pattern, // 3..16 args
  331. const Arg& ptr1 = no_more_args,
  332. const Arg& ptr2 = no_more_args,
  333. const Arg& ptr3 = no_more_args,
  334. const Arg& ptr4 = no_more_args,
  335. const Arg& ptr5 = no_more_args,
  336. const Arg& ptr6 = no_more_args,
  337. const Arg& ptr7 = no_more_args,
  338. const Arg& ptr8 = no_more_args,
  339. const Arg& ptr9 = no_more_args,
  340. const Arg& ptr10 = no_more_args,
  341. const Arg& ptr11 = no_more_args,
  342. const Arg& ptr12 = no_more_args,
  343. const Arg& ptr13 = no_more_args,
  344. const Arg& ptr14 = no_more_args,
  345. const Arg& ptr15 = no_more_args,
  346. const Arg& ptr16 = no_more_args) const;
  347. };
  348. RE2_TESTING_DLL static const FindAndConsumeFunctor FindAndConsume;
  349. // Replace the first match of "pattern" in "str" with "rewrite".
  350. // Within "rewrite", backslash-escaped digits (\1 to \9) can be
  351. // used to insert text matching corresponding parenthesized group
  352. // from the pattern. \0 in "rewrite" refers to the entire matching
  353. // text. E.g.,
  354. //
  355. // std::string s = "yabba dabba doo";
  356. // ABSL_CHECK(PCRE::Replace(&s, "b+", "d"));
  357. //
  358. // will leave "s" containing "yada dabba doo"
  359. //
  360. // Returns true if the pattern matches and a replacement occurs,
  361. // false otherwise.
  362. static bool Replace(std::string* str, const PCRE& pattern,
  363. absl::string_view rewrite);
  364. // Like Replace(), except replaces all occurrences of the pattern in
  365. // the string with the rewrite. Replacements are not subject to
  366. // re-matching. E.g.,
  367. //
  368. // std::string s = "yabba dabba doo";
  369. // ABSL_CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
  370. //
  371. // will leave "s" containing "yada dada doo"
  372. //
  373. // Returns the number of replacements made.
  374. static int GlobalReplace(std::string* str, const PCRE& pattern,
  375. absl::string_view rewrite);
  376. // Like Replace, except that if the pattern matches, "rewrite"
  377. // is copied into "out" with substitutions. The non-matching
  378. // portions of "text" are ignored.
  379. //
  380. // Returns true iff a match occurred and the extraction happened
  381. // successfully; if no match occurs, the string is left unaffected.
  382. static bool Extract(absl::string_view text, const PCRE& pattern,
  383. absl::string_view rewrite, std::string* out);
  384. // Check that the given @p rewrite string is suitable for use with
  385. // this PCRE. It checks that:
  386. // * The PCRE has enough parenthesized subexpressions to satisfy all
  387. // of the \N tokens in @p rewrite, and
  388. // * The @p rewrite string doesn't have any syntax errors
  389. // ('\' followed by anything besides [0-9] and '\').
  390. // Making this test will guarantee that "replace" and "extract"
  391. // operations won't ABSL_LOG(ERROR) or fail because of a bad rewrite
  392. // string.
  393. // @param rewrite The proposed rewrite string.
  394. // @param error An error message is recorded here, iff we return false.
  395. // Otherwise, it is unchanged.
  396. // @return true, iff @p rewrite is suitable for use with the PCRE.
  397. bool CheckRewriteString(absl::string_view rewrite, std::string* error) const;
  398. // Returns a copy of 'unquoted' with all potentially meaningful
  399. // regexp characters backslash-escaped. The returned string, used
  400. // as a regular expression, will exactly match the original string.
  401. // For example,
  402. // 1.5-2.0?
  403. // becomes:
  404. // 1\.5\-2\.0\?
  405. static std::string QuoteMeta(absl::string_view unquoted);
  406. /***** Generic matching interface (not so nice to use) *****/
  407. // Type of match (TODO: Should be restructured as an Option)
  408. enum Anchor {
  409. UNANCHORED, // No anchoring
  410. ANCHOR_START, // Anchor at start only
  411. ANCHOR_BOTH, // Anchor at start and end
  412. };
  413. // General matching routine. Stores the length of the match in
  414. // "*consumed" if successful.
  415. bool DoMatch(absl::string_view text, Anchor anchor, size_t* consumed,
  416. const Arg* const* args, int n) const;
  417. // Return the number of capturing subpatterns, or -1 if the
  418. // regexp wasn't valid on construction.
  419. int NumberOfCapturingGroups() const;
  420. private:
  421. void Init(const char* pattern, Option option, int match_limit,
  422. int stack_limit, bool report_errors);
  423. // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
  424. // pairs of integers for the beginning and end positions of matched
  425. // text. The first pair corresponds to the entire matched text;
  426. // subsequent pairs correspond, in order, to parentheses-captured
  427. // matches. Returns the number of pairs (one more than the number of
  428. // the last subpattern with a match) if matching was successful
  429. // and zero if the match failed.
  430. // I.e. for PCRE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
  431. // against "foo", "bar", and "baz" respectively.
  432. // When matching PCRE("(foo)|hello") against "hello", it will return 1.
  433. // But the values for all subpattern are filled in into "vec".
  434. int TryMatch(absl::string_view text, size_t startpos, Anchor anchor,
  435. bool empty_ok, int* vec, int vecsize) const;
  436. // Append the "rewrite" string, with backslash substitutions from "text"
  437. // and "vec", to string "out".
  438. bool Rewrite(std::string* out, absl::string_view rewrite,
  439. absl::string_view text, int* vec, int veclen) const;
  440. // internal implementation for DoMatch
  441. bool DoMatchImpl(absl::string_view text, Anchor anchor, size_t* consumed,
  442. const Arg* const args[], int n, int* vec, int vecsize) const;
  443. // Compile the regexp for the specified anchoring mode
  444. pcre* Compile(Anchor anchor);
  445. std::string pattern_;
  446. Option options_;
  447. pcre* re_full_; // For full matches
  448. pcre* re_partial_; // For partial matches
  449. const std::string* error_; // Error indicator (or empty string)
  450. bool report_errors_; // Silences error logging if false
  451. int match_limit_; // Limit on execution resources
  452. int stack_limit_; // Limit on stack resources (bytes)
  453. mutable int hit_limit_; // Hit limit during execution (bool)
  454. PCRE(const PCRE&) = delete;
  455. PCRE& operator=(const PCRE&) = delete;
  456. };
  457. // PCRE_Options allow you to set the PCRE::Options, plus any pcre
  458. // "extra" options. The only extras are match_limit, which limits
  459. // the CPU time of a match, and stack_limit, which limits the
  460. // stack usage. Setting a limit to <= 0 lets PCRE pick a sensible default
  461. // that should not cause too many problems in production code.
  462. // If PCRE hits a limit during a match, it may return a false negative,
  463. // but (hopefully) it won't crash.
  464. //
  465. // NOTE: If you are handling regular expressions specified by
  466. // (external or internal) users, rather than hard-coded ones,
  467. // you should be using PCRE2, which uses an alternate implementation
  468. // that avoids these issues. See http://go/re2quick.
  469. class PCRE_Options {
  470. public:
  471. // constructor
  472. PCRE_Options() : option_(PCRE::None), match_limit_(0), stack_limit_(0), report_errors_(true) {}
  473. // accessors
  474. PCRE::Option option() const { return option_; }
  475. void set_option(PCRE::Option option) {
  476. option_ = option;
  477. }
  478. int match_limit() const { return match_limit_; }
  479. void set_match_limit(int match_limit) {
  480. match_limit_ = match_limit;
  481. }
  482. int stack_limit() const { return stack_limit_; }
  483. void set_stack_limit(int stack_limit) {
  484. stack_limit_ = stack_limit;
  485. }
  486. // If the regular expression is malformed, an error message will be printed
  487. // iff report_errors() is true. Default: true.
  488. bool report_errors() const { return report_errors_; }
  489. void set_report_errors(bool report_errors) {
  490. report_errors_ = report_errors;
  491. }
  492. private:
  493. PCRE::Option option_;
  494. int match_limit_;
  495. int stack_limit_;
  496. bool report_errors_;
  497. };
  498. /***** Implementation details *****/
  499. // Hex/Octal/Binary?
  500. // Special class for parsing into objects that define a ParseFrom() method
  501. template <typename T>
  502. class _PCRE_MatchObject {
  503. public:
  504. static inline bool Parse(const char* str, size_t n, void* dest) {
  505. if (dest == NULL) return true;
  506. T* object = reinterpret_cast<T*>(dest);
  507. return object->ParseFrom(str, n);
  508. }
  509. };
  510. class PCRE::Arg {
  511. public:
  512. // Empty constructor so we can declare arrays of PCRE::Arg
  513. Arg();
  514. // Constructor specially designed for NULL arguments
  515. Arg(void*);
  516. typedef bool (*Parser)(const char* str, size_t n, void* dest);
  517. // Type-specific parsers
  518. #define MAKE_PARSER(type, name) \
  519. Arg(type* p) : arg_(p), parser_(name) {} \
  520. Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
  521. MAKE_PARSER(char, parse_char);
  522. MAKE_PARSER(signed char, parse_schar);
  523. MAKE_PARSER(unsigned char, parse_uchar);
  524. MAKE_PARSER(float, parse_float);
  525. MAKE_PARSER(double, parse_double);
  526. MAKE_PARSER(std::string, parse_string);
  527. MAKE_PARSER(absl::string_view, parse_string_view);
  528. MAKE_PARSER(short, parse_short);
  529. MAKE_PARSER(unsigned short, parse_ushort);
  530. MAKE_PARSER(int, parse_int);
  531. MAKE_PARSER(unsigned int, parse_uint);
  532. MAKE_PARSER(long, parse_long);
  533. MAKE_PARSER(unsigned long, parse_ulong);
  534. MAKE_PARSER(long long, parse_longlong);
  535. MAKE_PARSER(unsigned long long, parse_ulonglong);
  536. #undef MAKE_PARSER
  537. // Generic constructor
  538. template <typename T> Arg(T*, Parser parser);
  539. // Generic constructor template
  540. template <typename T> Arg(T* p)
  541. : arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
  542. }
  543. // Parse the data
  544. bool Parse(const char* str, size_t n) const;
  545. private:
  546. void* arg_;
  547. Parser parser_;
  548. static bool parse_null (const char* str, size_t n, void* dest);
  549. static bool parse_char (const char* str, size_t n, void* dest);
  550. static bool parse_schar (const char* str, size_t n, void* dest);
  551. static bool parse_uchar (const char* str, size_t n, void* dest);
  552. static bool parse_float (const char* str, size_t n, void* dest);
  553. static bool parse_double (const char* str, size_t n, void* dest);
  554. static bool parse_string (const char* str, size_t n, void* dest);
  555. static bool parse_string_view (const char* str, size_t n, void* dest);
  556. #define DECLARE_INTEGER_PARSER(name) \
  557. private: \
  558. static bool parse_##name(const char* str, size_t n, void* dest); \
  559. static bool parse_##name##_radix(const char* str, size_t n, void* dest, \
  560. int radix); \
  561. \
  562. public: \
  563. static bool parse_##name##_hex(const char* str, size_t n, void* dest); \
  564. static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
  565. static bool parse_##name##_cradix(const char* str, size_t n, void* dest)
  566. DECLARE_INTEGER_PARSER(short);
  567. DECLARE_INTEGER_PARSER(ushort);
  568. DECLARE_INTEGER_PARSER(int);
  569. DECLARE_INTEGER_PARSER(uint);
  570. DECLARE_INTEGER_PARSER(long);
  571. DECLARE_INTEGER_PARSER(ulong);
  572. DECLARE_INTEGER_PARSER(longlong);
  573. DECLARE_INTEGER_PARSER(ulonglong);
  574. #undef DECLARE_INTEGER_PARSER
  575. };
  576. inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
  577. inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
  578. inline bool PCRE::Arg::Parse(const char* str, size_t n) const {
  579. return (*parser_)(str, n, arg_);
  580. }
  581. // This part of the parser, appropriate only for ints, deals with bases
  582. #define MAKE_INTEGER_PARSER(type, name) \
  583. inline PCRE::Arg Hex(type* ptr) { \
  584. return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_hex); \
  585. } \
  586. inline PCRE::Arg Octal(type* ptr) { \
  587. return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_octal); \
  588. } \
  589. inline PCRE::Arg CRadix(type* ptr) { \
  590. return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_cradix); \
  591. }
  592. MAKE_INTEGER_PARSER(short, short);
  593. MAKE_INTEGER_PARSER(unsigned short, ushort);
  594. MAKE_INTEGER_PARSER(int, int);
  595. MAKE_INTEGER_PARSER(unsigned int, uint);
  596. MAKE_INTEGER_PARSER(long, long);
  597. MAKE_INTEGER_PARSER(unsigned long, ulong);
  598. MAKE_INTEGER_PARSER(long long, longlong);
  599. MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
  600. #undef MAKE_INTEGER_PARSER
  601. } // namespace re2
  602. #endif // UTIL_PCRE_H_