GlobPattern.cpp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements a glob pattern matcher.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Support/GlobPattern.h"
  13. #include "llvm/ADT/ArrayRef.h"
  14. #include "llvm/ADT/Optional.h"
  15. #include "llvm/ADT/StringRef.h"
  16. #include "llvm/Support/Errc.h"
  17. using namespace llvm;
  18. static bool hasWildcard(StringRef S) {
  19. return S.find_first_of("?*[\\") != StringRef::npos;
  20. }
  21. // Expands character ranges and returns a bitmap.
  22. // For example, "a-cf-hz" is expanded to "abcfghz".
  23. static Expected<BitVector> expand(StringRef S, StringRef Original) {
  24. BitVector BV(256, false);
  25. // Expand X-Y.
  26. for (;;) {
  27. if (S.size() < 3)
  28. break;
  29. uint8_t Start = S[0];
  30. uint8_t End = S[2];
  31. // If it doesn't start with something like X-Y,
  32. // consume the first character and proceed.
  33. if (S[1] != '-') {
  34. BV[Start] = true;
  35. S = S.substr(1);
  36. continue;
  37. }
  38. // It must be in the form of X-Y.
  39. // Validate it and then interpret the range.
  40. if (Start > End)
  41. return make_error<StringError>("invalid glob pattern: " + Original,
  42. errc::invalid_argument);
  43. for (int C = Start; C <= End; ++C)
  44. BV[(uint8_t)C] = true;
  45. S = S.substr(3);
  46. }
  47. for (char C : S)
  48. BV[(uint8_t)C] = true;
  49. return BV;
  50. }
  51. // This is a scanner for the glob pattern.
  52. // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
  53. // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
  54. // equivalent to "[^<chars>]"), or a non-meta character.
  55. // This function returns the first token in S.
  56. static Expected<BitVector> scan(StringRef &S, StringRef Original) {
  57. switch (S[0]) {
  58. case '*':
  59. S = S.substr(1);
  60. // '*' is represented by an empty bitvector.
  61. // All other bitvectors are 256-bit long.
  62. return BitVector();
  63. case '?':
  64. S = S.substr(1);
  65. return BitVector(256, true);
  66. case '[': {
  67. // ']' is allowed as the first character of a character class. '[]' is
  68. // invalid. So, just skip the first character.
  69. size_t End = S.find(']', 2);
  70. if (End == StringRef::npos)
  71. return make_error<StringError>("invalid glob pattern: " + Original,
  72. errc::invalid_argument);
  73. StringRef Chars = S.substr(1, End - 1);
  74. S = S.substr(End + 1);
  75. if (Chars.startswith("^") || Chars.startswith("!")) {
  76. Expected<BitVector> BV = expand(Chars.substr(1), Original);
  77. if (!BV)
  78. return BV.takeError();
  79. return BV->flip();
  80. }
  81. return expand(Chars, Original);
  82. }
  83. case '\\':
  84. // Eat this character and fall through below to treat it like a non-meta
  85. // character.
  86. S = S.substr(1);
  87. LLVM_FALLTHROUGH;
  88. default:
  89. BitVector BV(256, false);
  90. BV[(uint8_t)S[0]] = true;
  91. S = S.substr(1);
  92. return BV;
  93. }
  94. }
  95. Expected<GlobPattern> GlobPattern::create(StringRef S) {
  96. GlobPattern Pat;
  97. // S doesn't contain any metacharacter,
  98. // so the regular string comparison should work.
  99. if (!hasWildcard(S)) {
  100. Pat.Exact = S;
  101. return Pat;
  102. }
  103. // S is something like "foo*", and the "* is not escaped. We can use
  104. // startswith().
  105. if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
  106. Pat.Prefix = S.drop_back();
  107. return Pat;
  108. }
  109. // S is something like "*foo". We can use endswith().
  110. if (S.startswith("*") && !hasWildcard(S.drop_front())) {
  111. Pat.Suffix = S.drop_front();
  112. return Pat;
  113. }
  114. // Otherwise, we need to do real glob pattern matching.
  115. // Parse the pattern now.
  116. StringRef Original = S;
  117. while (!S.empty()) {
  118. Expected<BitVector> BV = scan(S, Original);
  119. if (!BV)
  120. return BV.takeError();
  121. Pat.Tokens.push_back(*BV);
  122. }
  123. return Pat;
  124. }
  125. bool GlobPattern::match(StringRef S) const {
  126. if (Exact)
  127. return S == *Exact;
  128. if (Prefix)
  129. return S.startswith(*Prefix);
  130. if (Suffix)
  131. return S.endswith(*Suffix);
  132. return matchOne(Tokens, S);
  133. }
  134. // Runs glob pattern Pats against string S.
  135. bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
  136. for (;;) {
  137. if (Pats.empty())
  138. return S.empty();
  139. // If Pats[0] is '*', try to match Pats[1..] against all possible
  140. // tail strings of S to see at least one pattern succeeds.
  141. if (Pats[0].size() == 0) {
  142. Pats = Pats.slice(1);
  143. if (Pats.empty())
  144. // Fast path. If a pattern is '*', it matches anything.
  145. return true;
  146. for (size_t I = 0, E = S.size(); I < E; ++I)
  147. if (matchOne(Pats, S.substr(I)))
  148. return true;
  149. return false;
  150. }
  151. // If Pats[0] is not '*', it must consume one character.
  152. if (S.empty() || !Pats[0][(uint8_t)S[0]])
  153. return false;
  154. Pats = Pats.slice(1);
  155. S = S.substr(1);
  156. }
  157. }