GlobPattern.cpp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements a glob pattern matcher.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Support/GlobPattern.h"
  13. #include "llvm/ADT/ArrayRef.h"
  14. #include "llvm/ADT/StringRef.h"
  15. #include "llvm/Support/Errc.h"
  16. using namespace llvm;
  17. static bool hasWildcard(StringRef S) {
  18. return S.find_first_of("?*[\\") != StringRef::npos;
  19. }
  20. // Expands character ranges and returns a bitmap.
  21. // For example, "a-cf-hz" is expanded to "abcfghz".
  22. static Expected<BitVector> expand(StringRef S, StringRef Original) {
  23. BitVector BV(256, false);
  24. // Expand X-Y.
  25. for (;;) {
  26. if (S.size() < 3)
  27. break;
  28. uint8_t Start = S[0];
  29. uint8_t End = S[2];
  30. // If it doesn't start with something like X-Y,
  31. // consume the first character and proceed.
  32. if (S[1] != '-') {
  33. BV[Start] = true;
  34. S = S.substr(1);
  35. continue;
  36. }
  37. // It must be in the form of X-Y.
  38. // Validate it and then interpret the range.
  39. if (Start > End)
  40. return make_error<StringError>("invalid glob pattern: " + Original,
  41. errc::invalid_argument);
  42. for (int C = Start; C <= End; ++C)
  43. BV[(uint8_t)C] = true;
  44. S = S.substr(3);
  45. }
  46. for (char C : S)
  47. BV[(uint8_t)C] = true;
  48. return BV;
  49. }
  50. // This is a scanner for the glob pattern.
  51. // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
  52. // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
  53. // equivalent to "[^<chars>]"), or a non-meta character.
  54. // This function returns the first token in S.
  55. static Expected<BitVector> scan(StringRef &S, StringRef Original) {
  56. switch (S[0]) {
  57. case '*':
  58. S = S.substr(1);
  59. // '*' is represented by an empty bitvector.
  60. // All other bitvectors are 256-bit long.
  61. return BitVector();
  62. case '?':
  63. S = S.substr(1);
  64. return BitVector(256, true);
  65. case '[': {
  66. // ']' is allowed as the first character of a character class. '[]' is
  67. // invalid. So, just skip the first character.
  68. size_t End = S.find(']', 2);
  69. if (End == StringRef::npos)
  70. return make_error<StringError>("invalid glob pattern: " + Original,
  71. errc::invalid_argument);
  72. StringRef Chars = S.substr(1, End - 1);
  73. S = S.substr(End + 1);
  74. if (Chars.startswith("^") || Chars.startswith("!")) {
  75. Expected<BitVector> BV = expand(Chars.substr(1), Original);
  76. if (!BV)
  77. return BV.takeError();
  78. return BV->flip();
  79. }
  80. return expand(Chars, Original);
  81. }
  82. case '\\':
  83. // Eat this character and fall through below to treat it like a non-meta
  84. // character.
  85. S = S.substr(1);
  86. [[fallthrough]];
  87. default:
  88. BitVector BV(256, false);
  89. BV[(uint8_t)S[0]] = true;
  90. S = S.substr(1);
  91. return BV;
  92. }
  93. }
  94. Expected<GlobPattern> GlobPattern::create(StringRef S) {
  95. GlobPattern Pat;
  96. // S doesn't contain any metacharacter,
  97. // so the regular string comparison should work.
  98. if (!hasWildcard(S)) {
  99. Pat.Exact = S;
  100. return Pat;
  101. }
  102. // S is something like "foo*", and the "* is not escaped. We can use
  103. // startswith().
  104. if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
  105. Pat.Prefix = S.drop_back();
  106. return Pat;
  107. }
  108. // S is something like "*foo". We can use endswith().
  109. if (S.startswith("*") && !hasWildcard(S.drop_front())) {
  110. Pat.Suffix = S.drop_front();
  111. return Pat;
  112. }
  113. // Otherwise, we need to do real glob pattern matching.
  114. // Parse the pattern now.
  115. StringRef Original = S;
  116. while (!S.empty()) {
  117. Expected<BitVector> BV = scan(S, Original);
  118. if (!BV)
  119. return BV.takeError();
  120. Pat.Tokens.push_back(*BV);
  121. }
  122. return Pat;
  123. }
  124. bool GlobPattern::match(StringRef S) const {
  125. if (Exact)
  126. return S == *Exact;
  127. if (Prefix)
  128. return S.startswith(*Prefix);
  129. if (Suffix)
  130. return S.endswith(*Suffix);
  131. return matchOne(Tokens, S);
  132. }
  133. // Runs glob pattern Pats against string S.
  134. bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
  135. for (;;) {
  136. if (Pats.empty())
  137. return S.empty();
  138. // If Pats[0] is '*', try to match Pats[1..] against all possible
  139. // tail strings of S to see at least one pattern succeeds.
  140. if (Pats[0].size() == 0) {
  141. Pats = Pats.slice(1);
  142. if (Pats.empty())
  143. // Fast path. If a pattern is '*', it matches anything.
  144. return true;
  145. for (size_t I = 0, E = S.size(); I < E; ++I)
  146. if (matchOne(Pats, S.substr(I)))
  147. return true;
  148. return false;
  149. }
  150. // If Pats[0] is not '*', it must consume one character.
  151. if (S.empty() || !Pats[0][(uint8_t)S[0]])
  152. return false;
  153. Pats = Pats.slice(1);
  154. S = S.substr(1);
  155. }
  156. }