123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177 |
- //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file implements a glob pattern matcher.
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/Support/GlobPattern.h"
- #include "llvm/ADT/ArrayRef.h"
- #include "llvm/ADT/StringRef.h"
- #include "llvm/Support/Errc.h"
- using namespace llvm;
- static bool hasWildcard(StringRef S) {
- return S.find_first_of("?*[\\") != StringRef::npos;
- }
- // Expands character ranges and returns a bitmap.
- // For example, "a-cf-hz" is expanded to "abcfghz".
- static Expected<BitVector> expand(StringRef S, StringRef Original) {
- BitVector BV(256, false);
- // Expand X-Y.
- for (;;) {
- if (S.size() < 3)
- break;
- uint8_t Start = S[0];
- uint8_t End = S[2];
- // If it doesn't start with something like X-Y,
- // consume the first character and proceed.
- if (S[1] != '-') {
- BV[Start] = true;
- S = S.substr(1);
- continue;
- }
- // It must be in the form of X-Y.
- // Validate it and then interpret the range.
- if (Start > End)
- return make_error<StringError>("invalid glob pattern: " + Original,
- errc::invalid_argument);
- for (int C = Start; C <= End; ++C)
- BV[(uint8_t)C] = true;
- S = S.substr(3);
- }
- for (char C : S)
- BV[(uint8_t)C] = true;
- return BV;
- }
- // This is a scanner for the glob pattern.
- // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
- // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
- // equivalent to "[^<chars>]"), or a non-meta character.
- // This function returns the first token in S.
- static Expected<BitVector> scan(StringRef &S, StringRef Original) {
- switch (S[0]) {
- case '*':
- S = S.substr(1);
- // '*' is represented by an empty bitvector.
- // All other bitvectors are 256-bit long.
- return BitVector();
- case '?':
- S = S.substr(1);
- return BitVector(256, true);
- case '[': {
- // ']' is allowed as the first character of a character class. '[]' is
- // invalid. So, just skip the first character.
- size_t End = S.find(']', 2);
- if (End == StringRef::npos)
- return make_error<StringError>("invalid glob pattern: " + Original,
- errc::invalid_argument);
- StringRef Chars = S.substr(1, End - 1);
- S = S.substr(End + 1);
- if (Chars.startswith("^") || Chars.startswith("!")) {
- Expected<BitVector> BV = expand(Chars.substr(1), Original);
- if (!BV)
- return BV.takeError();
- return BV->flip();
- }
- return expand(Chars, Original);
- }
- case '\\':
- // Eat this character and fall through below to treat it like a non-meta
- // character.
- S = S.substr(1);
- [[fallthrough]];
- default:
- BitVector BV(256, false);
- BV[(uint8_t)S[0]] = true;
- S = S.substr(1);
- return BV;
- }
- }
- Expected<GlobPattern> GlobPattern::create(StringRef S) {
- GlobPattern Pat;
- // S doesn't contain any metacharacter,
- // so the regular string comparison should work.
- if (!hasWildcard(S)) {
- Pat.Exact = S;
- return Pat;
- }
- // S is something like "foo*", and the "* is not escaped. We can use
- // startswith().
- if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
- Pat.Prefix = S.drop_back();
- return Pat;
- }
- // S is something like "*foo". We can use endswith().
- if (S.startswith("*") && !hasWildcard(S.drop_front())) {
- Pat.Suffix = S.drop_front();
- return Pat;
- }
- // Otherwise, we need to do real glob pattern matching.
- // Parse the pattern now.
- StringRef Original = S;
- while (!S.empty()) {
- Expected<BitVector> BV = scan(S, Original);
- if (!BV)
- return BV.takeError();
- Pat.Tokens.push_back(*BV);
- }
- return Pat;
- }
- bool GlobPattern::match(StringRef S) const {
- if (Exact)
- return S == *Exact;
- if (Prefix)
- return S.startswith(*Prefix);
- if (Suffix)
- return S.endswith(*Suffix);
- return matchOne(Tokens, S);
- }
- // Runs glob pattern Pats against string S.
- bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
- for (;;) {
- if (Pats.empty())
- return S.empty();
- // If Pats[0] is '*', try to match Pats[1..] against all possible
- // tail strings of S to see at least one pattern succeeds.
- if (Pats[0].size() == 0) {
- Pats = Pats.slice(1);
- if (Pats.empty())
- // Fast path. If a pattern is '*', it matches anything.
- return true;
- for (size_t I = 0, E = S.size(); I < E; ++I)
- if (matchOne(Pats, S.substr(I)))
- return true;
- return false;
- }
- // If Pats[0] is not '*', it must consume one character.
- if (S.empty() || !Pats[0][(uint8_t)S[0]])
- return false;
- Pats = Pats.slice(1);
- S = S.substr(1);
- }
- }
|