pcre.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. // Copyright 2003-2010 Google Inc. All Rights Reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. #ifndef UTIL_PCRE_H_
  5. #define UTIL_PCRE_H_
  6. // This is a variant of PCRE's pcrecpp.h, originally written at Google.
  7. // The main changes are the addition of the HitLimit method and
  8. // compilation as PCRE in namespace re2.
  9. // C++ interface to the pcre regular-expression library. PCRE supports
  10. // Perl-style regular expressions (with extensions like \d, \w, \s,
  11. // ...).
  12. //
  13. // -----------------------------------------------------------------------
  14. // REGEXP SYNTAX:
  15. //
  16. // This module uses the pcre library and hence supports its syntax
  17. // for regular expressions:
  18. //
  19. // http://www.google.com/search?q=pcre
  20. //
  21. // The syntax is pretty similar to Perl's. For those not familiar
  22. // with Perl's regular expressions, here are some examples of the most
  23. // commonly used extensions:
  24. //
  25. // "hello (\\w+) world" -- \w matches a "word" character
  26. // "version (\\d+)" -- \d matches a digit
  27. // "hello\\s+world" -- \s matches any whitespace character
  28. // "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
  29. // "(?i)hello" -- (?i) turns on case-insensitive matching
  30. // "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
  31. //
  32. // -----------------------------------------------------------------------
  33. // MATCHING INTERFACE:
  34. //
  35. // The "FullMatch" operation checks that supplied text matches a
  36. // supplied pattern exactly.
  37. //
  38. // Example: successful match
  39. // CHECK(PCRE::FullMatch("hello", "h.*o"));
  40. //
  41. // Example: unsuccessful match (requires full match):
  42. // CHECK(!PCRE::FullMatch("hello", "e"));
  43. //
  44. // -----------------------------------------------------------------------
  45. // UTF-8 AND THE MATCHING INTERFACE:
  46. //
  47. // By default, pattern and text are plain text, one byte per character.
  48. // The UTF8 flag, passed to the constructor, causes both pattern
  49. // and string to be treated as UTF-8 text, still a byte stream but
  50. // potentially multiple bytes per character. In practice, the text
  51. // is likelier to be UTF-8 than the pattern, but the match returned
  52. // may depend on the UTF8 flag, so always use it when matching
  53. // UTF8 text. E.g., "." will match one byte normally but with UTF8
  54. // set may match up to three bytes of a multi-byte character.
  55. //
  56. // Example:
  57. // PCRE re(utf8_pattern, PCRE::UTF8);
  58. // CHECK(PCRE::FullMatch(utf8_string, re));
  59. //
  60. // -----------------------------------------------------------------------
  61. // MATCHING WITH SUBSTRING EXTRACTION:
  62. //
  63. // You can supply extra pointer arguments to extract matched substrings.
  64. //
  65. // Example: extracts "ruby" into "s" and 1234 into "i"
  66. // int i;
  67. // std::string s;
  68. // CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
  69. //
  70. // Example: fails because string cannot be stored in integer
  71. // CHECK(!PCRE::FullMatch("ruby", "(.*)", &i));
  72. //
  73. // Example: fails because there aren't enough sub-patterns:
  74. // CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s));
  75. //
  76. // Example: does not try to extract any extra sub-patterns
  77. // CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s));
  78. //
  79. // Example: does not try to extract into NULL
  80. // CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i));
  81. //
  82. // Example: integer overflow causes failure
  83. // CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
  84. //
  85. // -----------------------------------------------------------------------
  86. // PARTIAL MATCHES
  87. //
  88. // You can use the "PartialMatch" operation when you want the pattern
  89. // to match any substring of the text.
  90. //
  91. // Example: simple search for a string:
  92. // CHECK(PCRE::PartialMatch("hello", "ell"));
  93. //
  94. // Example: find first number in a string
  95. // int number;
  96. // CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number));
  97. // CHECK_EQ(number, 100);
  98. //
  99. // -----------------------------------------------------------------------
  100. // PPCRE-COMPILED PCREGULAR EXPPCRESSIONS
  101. //
  102. // PCRE makes it easy to use any string as a regular expression, without
  103. // requiring a separate compilation step.
  104. //
  105. // If speed is of the essence, you can create a pre-compiled "PCRE"
  106. // object from the pattern and use it multiple times. If you do so,
  107. // you can typically parse text faster than with sscanf.
  108. //
  109. // Example: precompile pattern for faster matching:
  110. // PCRE pattern("h.*o");
  111. // while (ReadLine(&str)) {
  112. // if (PCRE::FullMatch(str, pattern)) ...;
  113. // }
  114. //
  115. // -----------------------------------------------------------------------
  116. // SCANNING TEXT INCPCREMENTALLY
  117. //
  118. // The "Consume" operation may be useful if you want to repeatedly
  119. // match regular expressions at the front of a string and skip over
  120. // them as they match. This requires use of the "StringPiece" type,
  121. // which represents a sub-range of a real string.
  122. //
  123. // Example: read lines of the form "var = value" from a string.
  124. // std::string contents = ...; // Fill string somehow
  125. // StringPiece input(contents); // Wrap a StringPiece around it
  126. //
  127. // std::string var;
  128. // int value;
  129. // while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) {
  130. // ...;
  131. // }
  132. //
  133. // Each successful call to "Consume" will set "var/value", and also
  134. // advance "input" so it points past the matched text. Note that if the
  135. // regular expression matches an empty string, input will advance
  136. // by 0 bytes. If the regular expression being used might match
  137. // an empty string, the loop body must check for this case and either
  138. // advance the string or break out of the loop.
  139. //
  140. // The "FindAndConsume" operation is similar to "Consume" but does not
  141. // anchor your match at the beginning of the string. For example, you
  142. // could extract all words from a string by repeatedly calling
  143. // PCRE::FindAndConsume(&input, "(\\w+)", &word)
  144. //
  145. // -----------------------------------------------------------------------
  146. // PARSING HEX/OCTAL/C-RADIX NUMBERS
  147. //
  148. // By default, if you pass a pointer to a numeric value, the
  149. // corresponding text is interpreted as a base-10 number. You can
  150. // instead wrap the pointer with a call to one of the operators Hex(),
  151. // Octal(), or CRadix() to interpret the text in another base. The
  152. // CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
  153. // prefixes, but defaults to base-10.
  154. //
  155. // Example:
  156. // int a, b, c, d;
  157. // CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)",
  158. // Octal(&a), Hex(&b), CRadix(&c), CRadix(&d));
  159. // will leave 64 in a, b, c, and d.
  160. #include "util/util.h"
  161. #include "re2/stringpiece.h"
  162. #ifdef USEPCRE
  163. #include <pcre.h>
  164. namespace re2 {
  165. const bool UsingPCRE = true;
  166. } // namespace re2
  167. #else
  168. struct pcre; // opaque
  169. namespace re2 {
  170. const bool UsingPCRE = false;
  171. } // namespace re2
  172. #endif
  173. namespace re2 {
  174. class PCRE_Options;
  175. // Interface for regular expression matching. Also corresponds to a
  176. // pre-compiled regular expression. An "PCRE" object is safe for
  177. // concurrent use by multiple threads.
  178. class PCRE {
  179. public:
  180. // We convert user-passed pointers into special Arg objects
  181. class Arg;
  182. // Marks end of arg list.
  183. // ONLY USE IN OPTIONAL ARG DEFAULTS.
  184. // DO NOT PASS EXPLICITLY.
  185. static Arg no_more_args;
  186. // Options are same value as those in pcre. We provide them here
  187. // to avoid users needing to include pcre.h and also to isolate
  188. // users from pcre should we change the underlying library.
  189. // Only those needed by Google programs are exposed here to
  190. // avoid collision with options employed internally by regexp.cc
  191. // Note that some options have equivalents that can be specified in
  192. // the regexp itself. For example, prefixing your regexp with
  193. // "(?s)" has the same effect as the PCRE_DOTALL option.
  194. enum Option {
  195. None = 0x0000,
  196. UTF8 = 0x0800, // == PCRE_UTF8
  197. EnabledCompileOptions = UTF8,
  198. EnabledExecOptions = 0x0000, // TODO: use to replace anchor flag
  199. };
  200. // We provide implicit conversions from strings so that users can
  201. // pass in a string or a "const char*" wherever an "PCRE" is expected.
  202. PCRE(const char* pattern);
  203. PCRE(const char* pattern, Option option);
  204. PCRE(const std::string& pattern);
  205. PCRE(const std::string& pattern, Option option);
  206. PCRE(const char *pattern, const PCRE_Options& re_option);
  207. PCRE(const std::string& pattern, const PCRE_Options& re_option);
  208. ~PCRE();
  209. // The string specification for this PCRE. E.g.
  210. // PCRE re("ab*c?d+");
  211. // re.pattern(); // "ab*c?d+"
  212. const std::string& pattern() const { return pattern_; }
  213. // If PCRE could not be created properly, returns an error string.
  214. // Else returns the empty string.
  215. const std::string& error() const { return *error_; }
  216. // Whether the PCRE has hit a match limit during execution.
  217. // Not thread safe. Intended only for testing.
  218. // If hitting match limits is a problem,
  219. // you should be using PCRE2 (re2/re2.h)
  220. // instead of checking this flag.
  221. bool HitLimit();
  222. void ClearHitLimit();
  223. /***** The useful part: the matching interface *****/
  224. // Matches "text" against "pattern". If pointer arguments are
  225. // supplied, copies matched sub-patterns into them.
  226. //
  227. // You can pass in a "const char*" or a "std::string" for "text".
  228. // You can pass in a "const char*" or a "std::string" or a "PCRE" for "pattern".
  229. //
  230. // The provided pointer arguments can be pointers to any scalar numeric
  231. // type, or one of:
  232. // std::string (matched piece is copied to string)
  233. // StringPiece (StringPiece is mutated to point to matched piece)
  234. // T (where "bool T::ParseFrom(const char*, size_t)" exists)
  235. // (void*)NULL (the corresponding matched sub-pattern is not copied)
  236. //
  237. // Returns true iff all of the following conditions are satisfied:
  238. // a. "text" matches "pattern" exactly
  239. // b. The number of matched sub-patterns is >= number of supplied pointers
  240. // c. The "i"th argument has a suitable type for holding the
  241. // string captured as the "i"th sub-pattern. If you pass in
  242. // NULL for the "i"th argument, or pass fewer arguments than
  243. // number of sub-patterns, "i"th captured sub-pattern is
  244. // ignored.
  245. //
  246. // CAVEAT: An optional sub-pattern that does not exist in the
  247. // matched string is assigned the empty string. Therefore, the
  248. // following will return false (because the empty string is not a
  249. // valid number):
  250. // int number;
  251. // PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
  252. struct FullMatchFunctor {
  253. bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
  254. const Arg& ptr1 = no_more_args,
  255. const Arg& ptr2 = no_more_args,
  256. const Arg& ptr3 = no_more_args,
  257. const Arg& ptr4 = no_more_args,
  258. const Arg& ptr5 = no_more_args,
  259. const Arg& ptr6 = no_more_args,
  260. const Arg& ptr7 = no_more_args,
  261. const Arg& ptr8 = no_more_args,
  262. const Arg& ptr9 = no_more_args,
  263. const Arg& ptr10 = no_more_args,
  264. const Arg& ptr11 = no_more_args,
  265. const Arg& ptr12 = no_more_args,
  266. const Arg& ptr13 = no_more_args,
  267. const Arg& ptr14 = no_more_args,
  268. const Arg& ptr15 = no_more_args,
  269. const Arg& ptr16 = no_more_args) const;
  270. };
  271. static const FullMatchFunctor FullMatch;
  272. // Exactly like FullMatch(), except that "pattern" is allowed to match
  273. // a substring of "text".
  274. struct PartialMatchFunctor {
  275. bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
  276. const Arg& ptr1 = no_more_args,
  277. const Arg& ptr2 = no_more_args,
  278. const Arg& ptr3 = no_more_args,
  279. const Arg& ptr4 = no_more_args,
  280. const Arg& ptr5 = no_more_args,
  281. const Arg& ptr6 = no_more_args,
  282. const Arg& ptr7 = no_more_args,
  283. const Arg& ptr8 = no_more_args,
  284. const Arg& ptr9 = no_more_args,
  285. const Arg& ptr10 = no_more_args,
  286. const Arg& ptr11 = no_more_args,
  287. const Arg& ptr12 = no_more_args,
  288. const Arg& ptr13 = no_more_args,
  289. const Arg& ptr14 = no_more_args,
  290. const Arg& ptr15 = no_more_args,
  291. const Arg& ptr16 = no_more_args) const;
  292. };
  293. static const PartialMatchFunctor PartialMatch;
  294. // Like FullMatch() and PartialMatch(), except that pattern has to
  295. // match a prefix of "text", and "input" is advanced past the matched
  296. // text. Note: "input" is modified iff this routine returns true.
  297. struct ConsumeFunctor {
  298. bool operator ()(StringPiece* input, const PCRE& pattern, // 3..16 args
  299. const Arg& ptr1 = no_more_args,
  300. const Arg& ptr2 = no_more_args,
  301. const Arg& ptr3 = no_more_args,
  302. const Arg& ptr4 = no_more_args,
  303. const Arg& ptr5 = no_more_args,
  304. const Arg& ptr6 = no_more_args,
  305. const Arg& ptr7 = no_more_args,
  306. const Arg& ptr8 = no_more_args,
  307. const Arg& ptr9 = no_more_args,
  308. const Arg& ptr10 = no_more_args,
  309. const Arg& ptr11 = no_more_args,
  310. const Arg& ptr12 = no_more_args,
  311. const Arg& ptr13 = no_more_args,
  312. const Arg& ptr14 = no_more_args,
  313. const Arg& ptr15 = no_more_args,
  314. const Arg& ptr16 = no_more_args) const;
  315. };
  316. static const ConsumeFunctor Consume;
  317. // Like Consume(..), but does not anchor the match at the beginning of the
  318. // string. That is, "pattern" need not start its match at the beginning of
  319. // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next
  320. // word in "s" and stores it in "word".
  321. struct FindAndConsumeFunctor {
  322. bool operator ()(StringPiece* input, const PCRE& pattern,
  323. const Arg& ptr1 = no_more_args,
  324. const Arg& ptr2 = no_more_args,
  325. const Arg& ptr3 = no_more_args,
  326. const Arg& ptr4 = no_more_args,
  327. const Arg& ptr5 = no_more_args,
  328. const Arg& ptr6 = no_more_args,
  329. const Arg& ptr7 = no_more_args,
  330. const Arg& ptr8 = no_more_args,
  331. const Arg& ptr9 = no_more_args,
  332. const Arg& ptr10 = no_more_args,
  333. const Arg& ptr11 = no_more_args,
  334. const Arg& ptr12 = no_more_args,
  335. const Arg& ptr13 = no_more_args,
  336. const Arg& ptr14 = no_more_args,
  337. const Arg& ptr15 = no_more_args,
  338. const Arg& ptr16 = no_more_args) const;
  339. };
  340. static const FindAndConsumeFunctor FindAndConsume;
  341. // Replace the first match of "pattern" in "str" with "rewrite".
  342. // Within "rewrite", backslash-escaped digits (\1 to \9) can be
  343. // used to insert text matching corresponding parenthesized group
  344. // from the pattern. \0 in "rewrite" refers to the entire matching
  345. // text. E.g.,
  346. //
  347. // std::string s = "yabba dabba doo";
  348. // CHECK(PCRE::Replace(&s, "b+", "d"));
  349. //
  350. // will leave "s" containing "yada dabba doo"
  351. //
  352. // Returns true if the pattern matches and a replacement occurs,
  353. // false otherwise.
  354. static bool Replace(std::string *str,
  355. const PCRE& pattern,
  356. const StringPiece& rewrite);
  357. // Like Replace(), except replaces all occurrences of the pattern in
  358. // the string with the rewrite. Replacements are not subject to
  359. // re-matching. E.g.,
  360. //
  361. // std::string s = "yabba dabba doo";
  362. // CHECK(PCRE::GlobalReplace(&s, "b+", "d"));
  363. //
  364. // will leave "s" containing "yada dada doo"
  365. //
  366. // Returns the number of replacements made.
  367. static int GlobalReplace(std::string *str,
  368. const PCRE& pattern,
  369. const StringPiece& rewrite);
  370. // Like Replace, except that if the pattern matches, "rewrite"
  371. // is copied into "out" with substitutions. The non-matching
  372. // portions of "text" are ignored.
  373. //
  374. // Returns true iff a match occurred and the extraction happened
  375. // successfully; if no match occurs, the string is left unaffected.
  376. static bool Extract(const StringPiece &text,
  377. const PCRE& pattern,
  378. const StringPiece &rewrite,
  379. std::string *out);
  380. // Check that the given @p rewrite string is suitable for use with
  381. // this PCRE. It checks that:
  382. // * The PCRE has enough parenthesized subexpressions to satisfy all
  383. // of the \N tokens in @p rewrite, and
  384. // * The @p rewrite string doesn't have any syntax errors
  385. // ('\' followed by anything besides [0-9] and '\').
  386. // Making this test will guarantee that "replace" and "extract"
  387. // operations won't LOG(ERROR) or fail because of a bad rewrite
  388. // string.
  389. // @param rewrite The proposed rewrite string.
  390. // @param error An error message is recorded here, iff we return false.
  391. // Otherwise, it is unchanged.
  392. // @return true, iff @p rewrite is suitable for use with the PCRE.
  393. bool CheckRewriteString(const StringPiece& rewrite,
  394. std::string* error) const;
  395. // Returns a copy of 'unquoted' with all potentially meaningful
  396. // regexp characters backslash-escaped. The returned string, used
  397. // as a regular expression, will exactly match the original string.
  398. // For example,
  399. // 1.5-2.0?
  400. // becomes:
  401. // 1\.5\-2\.0\?
  402. static std::string QuoteMeta(const StringPiece& unquoted);
  403. /***** Generic matching interface (not so nice to use) *****/
  404. // Type of match (TODO: Should be restructured as an Option)
  405. enum Anchor {
  406. UNANCHORED, // No anchoring
  407. ANCHOR_START, // Anchor at start only
  408. ANCHOR_BOTH, // Anchor at start and end
  409. };
  410. // General matching routine. Stores the length of the match in
  411. // "*consumed" if successful.
  412. bool DoMatch(const StringPiece& text,
  413. Anchor anchor,
  414. size_t* consumed,
  415. const Arg* const* args, int n) const;
  416. // Return the number of capturing subpatterns, or -1 if the
  417. // regexp wasn't valid on construction.
  418. int NumberOfCapturingGroups() const;
  419. private:
  420. void Init(const char* pattern, Option option, int match_limit,
  421. int stack_limit, bool report_errors);
  422. // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
  423. // pairs of integers for the beginning and end positions of matched
  424. // text. The first pair corresponds to the entire matched text;
  425. // subsequent pairs correspond, in order, to parentheses-captured
  426. // matches. Returns the number of pairs (one more than the number of
  427. // the last subpattern with a match) if matching was successful
  428. // and zero if the match failed.
  429. // I.e. for PCRE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
  430. // against "foo", "bar", and "baz" respectively.
  431. // When matching PCRE("(foo)|hello") against "hello", it will return 1.
  432. // But the values for all subpattern are filled in into "vec".
  433. int TryMatch(const StringPiece& text,
  434. size_t startpos,
  435. Anchor anchor,
  436. bool empty_ok,
  437. int *vec,
  438. int vecsize) const;
  439. // Append the "rewrite" string, with backslash subsitutions from "text"
  440. // and "vec", to string "out".
  441. bool Rewrite(std::string *out,
  442. const StringPiece &rewrite,
  443. const StringPiece &text,
  444. int *vec,
  445. int veclen) const;
  446. // internal implementation for DoMatch
  447. bool DoMatchImpl(const StringPiece& text,
  448. Anchor anchor,
  449. size_t* consumed,
  450. const Arg* const args[],
  451. int n,
  452. int* vec,
  453. int vecsize) const;
  454. // Compile the regexp for the specified anchoring mode
  455. pcre* Compile(Anchor anchor);
  456. std::string pattern_;
  457. Option options_;
  458. pcre* re_full_; // For full matches
  459. pcre* re_partial_; // For partial matches
  460. const std::string* error_; // Error indicator (or empty string)
  461. bool report_errors_; // Silences error logging if false
  462. int match_limit_; // Limit on execution resources
  463. int stack_limit_; // Limit on stack resources (bytes)
  464. mutable int32_t hit_limit_; // Hit limit during execution (bool)
  465. PCRE(const PCRE&) = delete;
  466. PCRE& operator=(const PCRE&) = delete;
  467. };
  468. // PCRE_Options allow you to set the PCRE::Options, plus any pcre
  469. // "extra" options. The only extras are match_limit, which limits
  470. // the CPU time of a match, and stack_limit, which limits the
  471. // stack usage. Setting a limit to <= 0 lets PCRE pick a sensible default
  472. // that should not cause too many problems in production code.
  473. // If PCRE hits a limit during a match, it may return a false negative,
  474. // but (hopefully) it won't crash.
  475. //
  476. // NOTE: If you are handling regular expressions specified by
  477. // (external or internal) users, rather than hard-coded ones,
  478. // you should be using PCRE2, which uses an alternate implementation
  479. // that avoids these issues. See http://go/re2quick.
  480. class PCRE_Options {
  481. public:
  482. // constructor
  483. PCRE_Options() : option_(PCRE::None), match_limit_(0), stack_limit_(0), report_errors_(true) {}
  484. // accessors
  485. PCRE::Option option() const { return option_; }
  486. void set_option(PCRE::Option option) {
  487. option_ = option;
  488. }
  489. int match_limit() const { return match_limit_; }
  490. void set_match_limit(int match_limit) {
  491. match_limit_ = match_limit;
  492. }
  493. int stack_limit() const { return stack_limit_; }
  494. void set_stack_limit(int stack_limit) {
  495. stack_limit_ = stack_limit;
  496. }
  497. // If the regular expression is malformed, an error message will be printed
  498. // iff report_errors() is true. Default: true.
  499. bool report_errors() const { return report_errors_; }
  500. void set_report_errors(bool report_errors) {
  501. report_errors_ = report_errors;
  502. }
  503. private:
  504. PCRE::Option option_;
  505. int match_limit_;
  506. int stack_limit_;
  507. bool report_errors_;
  508. };
  509. /***** Implementation details *****/
  510. // Hex/Octal/Binary?
  511. // Special class for parsing into objects that define a ParseFrom() method
  512. template <typename T>
  513. class _PCRE_MatchObject {
  514. public:
  515. static inline bool Parse(const char* str, size_t n, void* dest) {
  516. if (dest == NULL) return true;
  517. T* object = reinterpret_cast<T*>(dest);
  518. return object->ParseFrom(str, n);
  519. }
  520. };
  521. class PCRE::Arg {
  522. public:
  523. // Empty constructor so we can declare arrays of PCRE::Arg
  524. Arg();
  525. // Constructor specially designed for NULL arguments
  526. Arg(void*);
  527. typedef bool (*Parser)(const char* str, size_t n, void* dest);
  528. // Type-specific parsers
  529. #define MAKE_PARSER(type, name) \
  530. Arg(type* p) : arg_(p), parser_(name) {} \
  531. Arg(type* p, Parser parser) : arg_(p), parser_(parser) {}
  532. MAKE_PARSER(char, parse_char);
  533. MAKE_PARSER(signed char, parse_schar);
  534. MAKE_PARSER(unsigned char, parse_uchar);
  535. MAKE_PARSER(float, parse_float);
  536. MAKE_PARSER(double, parse_double);
  537. MAKE_PARSER(std::string, parse_string);
  538. MAKE_PARSER(StringPiece, parse_stringpiece);
  539. MAKE_PARSER(short, parse_short);
  540. MAKE_PARSER(unsigned short, parse_ushort);
  541. MAKE_PARSER(int, parse_int);
  542. MAKE_PARSER(unsigned int, parse_uint);
  543. MAKE_PARSER(long, parse_long);
  544. MAKE_PARSER(unsigned long, parse_ulong);
  545. MAKE_PARSER(long long, parse_longlong);
  546. MAKE_PARSER(unsigned long long, parse_ulonglong);
  547. #undef MAKE_PARSER
  548. // Generic constructor
  549. template <typename T> Arg(T*, Parser parser);
  550. // Generic constructor template
  551. template <typename T> Arg(T* p)
  552. : arg_(p), parser_(_PCRE_MatchObject<T>::Parse) {
  553. }
  554. // Parse the data
  555. bool Parse(const char* str, size_t n) const;
  556. private:
  557. void* arg_;
  558. Parser parser_;
  559. static bool parse_null (const char* str, size_t n, void* dest);
  560. static bool parse_char (const char* str, size_t n, void* dest);
  561. static bool parse_schar (const char* str, size_t n, void* dest);
  562. static bool parse_uchar (const char* str, size_t n, void* dest);
  563. static bool parse_float (const char* str, size_t n, void* dest);
  564. static bool parse_double (const char* str, size_t n, void* dest);
  565. static bool parse_string (const char* str, size_t n, void* dest);
  566. static bool parse_stringpiece (const char* str, size_t n, void* dest);
  567. #define DECLARE_INTEGER_PARSER(name) \
  568. private: \
  569. static bool parse_##name(const char* str, size_t n, void* dest); \
  570. static bool parse_##name##_radix(const char* str, size_t n, void* dest, \
  571. int radix); \
  572. \
  573. public: \
  574. static bool parse_##name##_hex(const char* str, size_t n, void* dest); \
  575. static bool parse_##name##_octal(const char* str, size_t n, void* dest); \
  576. static bool parse_##name##_cradix(const char* str, size_t n, void* dest)
  577. DECLARE_INTEGER_PARSER(short);
  578. DECLARE_INTEGER_PARSER(ushort);
  579. DECLARE_INTEGER_PARSER(int);
  580. DECLARE_INTEGER_PARSER(uint);
  581. DECLARE_INTEGER_PARSER(long);
  582. DECLARE_INTEGER_PARSER(ulong);
  583. DECLARE_INTEGER_PARSER(longlong);
  584. DECLARE_INTEGER_PARSER(ulonglong);
  585. #undef DECLARE_INTEGER_PARSER
  586. };
  587. inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { }
  588. inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
  589. inline bool PCRE::Arg::Parse(const char* str, size_t n) const {
  590. return (*parser_)(str, n, arg_);
  591. }
  592. // This part of the parser, appropriate only for ints, deals with bases
  593. #define MAKE_INTEGER_PARSER(type, name) \
  594. inline PCRE::Arg Hex(type* ptr) { \
  595. return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_hex); \
  596. } \
  597. inline PCRE::Arg Octal(type* ptr) { \
  598. return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_octal); \
  599. } \
  600. inline PCRE::Arg CRadix(type* ptr) { \
  601. return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_cradix); \
  602. }
  603. MAKE_INTEGER_PARSER(short, short);
  604. MAKE_INTEGER_PARSER(unsigned short, ushort);
  605. MAKE_INTEGER_PARSER(int, int);
  606. MAKE_INTEGER_PARSER(unsigned int, uint);
  607. MAKE_INTEGER_PARSER(long, long);
  608. MAKE_INTEGER_PARSER(unsigned long, ulong);
  609. MAKE_INTEGER_PARSER(long long, longlong);
  610. MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
  611. #undef MAKE_INTEGER_PARSER
  612. } // namespace re2
  613. #endif // UTIL_PCRE_H_