FileMatchTrie.cpp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. //===- FileMatchTrie.cpp --------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains the implementation of a FileMatchTrie.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "clang/Tooling/FileMatchTrie.h"
  13. #include "llvm/ADT/StringMap.h"
  14. #include "llvm/ADT/StringRef.h"
  15. #include "llvm/Support/FileSystem.h"
  16. #include "llvm/Support/Path.h"
  17. #include "llvm/Support/raw_ostream.h"
  18. #include <string>
  19. #include <vector>
  20. using namespace clang;
  21. using namespace tooling;
  22. namespace {
  23. /// Default \c PathComparator using \c llvm::sys::fs::equivalent().
  24. struct DefaultPathComparator : public PathComparator {
  25. bool equivalent(StringRef FileA, StringRef FileB) const override {
  26. return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
  27. }
  28. };
  29. } // namespace
  30. namespace clang {
  31. namespace tooling {
  32. /// A node of the \c FileMatchTrie.
  33. ///
  34. /// Each node has storage for up to one path and a map mapping a path segment to
  35. /// child nodes. The trie starts with an empty root node.
  36. class FileMatchTrieNode {
  37. public:
  38. /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes
  39. /// the number of \c NewPath's trailing characters already consumed during
  40. /// recursion.
  41. ///
  42. /// An insert of a path
  43. /// 'p'starts at the root node and does the following:
  44. /// - If the node is empty, insert 'p' into its storage and abort.
  45. /// - If the node has a path 'p2' but no children, take the last path segment
  46. /// 's' of 'p2', put a new child into the map at 's' an insert the rest of
  47. /// 'p2' there.
  48. /// - Insert a new child for the last segment of 'p' and insert the rest of
  49. /// 'p' there.
  50. ///
  51. /// An insert operation is linear in the number of a path's segments.
  52. void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
  53. // We cannot put relative paths into the FileMatchTrie as then a path can be
  54. // a postfix of another path, violating a core assumption of the trie.
  55. if (llvm::sys::path::is_relative(NewPath))
  56. return;
  57. if (Path.empty()) {
  58. // This is an empty leaf. Store NewPath and return.
  59. Path = std::string(NewPath);
  60. return;
  61. }
  62. if (Children.empty()) {
  63. // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
  64. if (NewPath == Path)
  65. return;
  66. // Make this a node and create a child-leaf with 'Path'.
  67. StringRef Element(llvm::sys::path::filename(
  68. StringRef(Path).drop_back(ConsumedLength)));
  69. Children[Element].Path = Path;
  70. }
  71. StringRef Element(llvm::sys::path::filename(
  72. StringRef(NewPath).drop_back(ConsumedLength)));
  73. Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
  74. }
  75. /// Tries to find the node under this \c FileMatchTrieNode that best
  76. /// matches 'FileName'.
  77. ///
  78. /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
  79. /// \c true and an empty string is returned. If no path fits 'FileName', an
  80. /// empty string is returned. \c ConsumedLength denotes the number of
  81. /// \c Filename's trailing characters already consumed during recursion.
  82. ///
  83. /// To find the best matching node for a given path 'p', the
  84. /// \c findEquivalent() function is called recursively for each path segment
  85. /// (back to front) of 'p' until a node 'n' is reached that does not ..
  86. /// - .. have children. In this case it is checked
  87. /// whether the stored path is equivalent to 'p'. If yes, the best match is
  88. /// found. Otherwise continue with the parent node as if this node did not
  89. /// exist.
  90. /// - .. a child matching the next path segment. In this case, all children of
  91. /// 'n' are an equally good match for 'p'. All children are of 'n' are found
  92. /// recursively and their equivalence to 'p' is determined. If none are
  93. /// equivalent, continue with the parent node as if 'n' didn't exist. If one
  94. /// is equivalent, the best match is found. Otherwise, report and ambigiuity
  95. /// error.
  96. StringRef findEquivalent(const PathComparator& Comparator,
  97. StringRef FileName,
  98. bool &IsAmbiguous,
  99. unsigned ConsumedLength = 0) const {
  100. // Note: we support only directory symlinks for performance reasons.
  101. if (Children.empty()) {
  102. // As far as we do not support file symlinks, compare
  103. // basenames here to avoid request to file system.
  104. if (llvm::sys::path::filename(Path) ==
  105. llvm::sys::path::filename(FileName) &&
  106. Comparator.equivalent(StringRef(Path), FileName))
  107. return StringRef(Path);
  108. return {};
  109. }
  110. StringRef Element(llvm::sys::path::filename(FileName.drop_back(
  111. ConsumedLength)));
  112. llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
  113. Children.find(Element);
  114. if (MatchingChild != Children.end()) {
  115. StringRef Result = MatchingChild->getValue().findEquivalent(
  116. Comparator, FileName, IsAmbiguous,
  117. ConsumedLength + Element.size() + 1);
  118. if (!Result.empty() || IsAmbiguous)
  119. return Result;
  120. }
  121. // If `ConsumedLength` is zero, this is the root and we have no filename
  122. // match. Give up in this case, we don't try to find symlinks with
  123. // different names.
  124. if (ConsumedLength == 0)
  125. return {};
  126. std::vector<StringRef> AllChildren;
  127. getAll(AllChildren, MatchingChild);
  128. StringRef Result;
  129. for (const auto &Child : AllChildren) {
  130. if (Comparator.equivalent(Child, FileName)) {
  131. if (Result.empty()) {
  132. Result = Child;
  133. } else {
  134. IsAmbiguous = true;
  135. return {};
  136. }
  137. }
  138. }
  139. return Result;
  140. }
  141. private:
  142. /// Gets all paths under this FileMatchTrieNode.
  143. void getAll(std::vector<StringRef> &Results,
  144. llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
  145. if (Path.empty())
  146. return;
  147. if (Children.empty()) {
  148. Results.push_back(StringRef(Path));
  149. return;
  150. }
  151. for (llvm::StringMap<FileMatchTrieNode>::const_iterator
  152. It = Children.begin(), E = Children.end();
  153. It != E; ++It) {
  154. if (It == Except)
  155. continue;
  156. It->getValue().getAll(Results, Children.end());
  157. }
  158. }
  159. // The stored absolute path in this node. Only valid for leaf nodes, i.e.
  160. // nodes where Children.empty().
  161. std::string Path;
  162. // The children of this node stored in a map based on the next path segment.
  163. llvm::StringMap<FileMatchTrieNode> Children;
  164. };
  165. } // namespace tooling
  166. } // namespace clang
  167. FileMatchTrie::FileMatchTrie()
  168. : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
  169. FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
  170. : Root(new FileMatchTrieNode), Comparator(Comparator) {}
  171. FileMatchTrie::~FileMatchTrie() {
  172. delete Root;
  173. }
  174. void FileMatchTrie::insert(StringRef NewPath) {
  175. Root->insert(NewPath);
  176. }
  177. StringRef FileMatchTrie::findEquivalent(StringRef FileName,
  178. raw_ostream &Error) const {
  179. if (llvm::sys::path::is_relative(FileName)) {
  180. Error << "Cannot resolve relative paths";
  181. return {};
  182. }
  183. bool IsAmbiguous = false;
  184. StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
  185. if (IsAmbiguous)
  186. Error << "Path is ambiguous";
  187. return Result;
  188. }