FileCollector.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. //===-- FileCollector.cpp ---------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/Support/FileCollector.h"
  9. #include "llvm/ADT/SmallString.h"
  10. #include "llvm/ADT/Twine.h"
  11. #include "llvm/Support/FileSystem.h"
  12. #include "llvm/Support/Path.h"
  13. #include "llvm/Support/Process.h"
  14. using namespace llvm;
  15. FileCollectorBase::FileCollectorBase() = default;
  16. FileCollectorBase::~FileCollectorBase() = default;
  17. void FileCollectorBase::addFile(const Twine &File) {
  18. std::lock_guard<std::mutex> lock(Mutex);
  19. std::string FileStr = File.str();
  20. if (markAsSeen(FileStr))
  21. addFileImpl(FileStr);
  22. }
  23. void FileCollectorBase::addDirectory(const Twine &Dir) {
  24. assert(sys::fs::is_directory(Dir));
  25. std::error_code EC;
  26. addDirectoryImpl(Dir, vfs::getRealFileSystem(), EC);
  27. }
  28. static bool isCaseSensitivePath(StringRef Path) {
  29. SmallString<256> TmpDest = Path, UpperDest, RealDest;
  30. // Remove component traversals, links, etc.
  31. if (sys::fs::real_path(Path, TmpDest))
  32. return true; // Current default value in vfs.yaml
  33. Path = TmpDest;
  34. // Change path to all upper case and ask for its real path, if the latter
  35. // exists and is equal to path, it's not case sensitive. Default to case
  36. // sensitive in the absence of real_path, since this is the YAMLVFSWriter
  37. // default.
  38. UpperDest = Path.upper();
  39. if (!sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest))
  40. return false;
  41. return true;
  42. }
  43. FileCollector::FileCollector(std::string Root, std::string OverlayRoot)
  44. : Root(Root), OverlayRoot(OverlayRoot) {
  45. assert(sys::path::is_absolute(Root) && "Root not absolute");
  46. assert(sys::path::is_absolute(OverlayRoot) && "OverlayRoot not absolute");
  47. }
  48. void FileCollector::PathCanonicalizer::updateWithRealPath(
  49. SmallVectorImpl<char> &Path) {
  50. StringRef SrcPath(Path.begin(), Path.size());
  51. StringRef Filename = sys::path::filename(SrcPath);
  52. StringRef Directory = sys::path::parent_path(SrcPath);
  53. // Use real_path to fix any symbolic link component present in the directory
  54. // part of the path, caching the search because computing the real path is
  55. // expensive.
  56. SmallString<256> RealPath;
  57. auto DirWithSymlink = CachedDirs.find(Directory);
  58. if (DirWithSymlink == CachedDirs.end()) {
  59. // FIXME: Should this be a call to FileSystem::getRealpath(), in some
  60. // cases? What if there is nothing on disk?
  61. if (sys::fs::real_path(Directory, RealPath))
  62. return;
  63. CachedDirs[Directory] = std::string(RealPath.str());
  64. } else {
  65. RealPath = DirWithSymlink->second;
  66. }
  67. // Finish recreating the path by appending the original filename, since we
  68. // don't need to resolve symlinks in the filename.
  69. //
  70. // FIXME: If we can cope with this, maybe we can cope without calling
  71. // getRealPath() at all when there's no ".." component.
  72. sys::path::append(RealPath, Filename);
  73. // Swap to create the output.
  74. Path.swap(RealPath);
  75. }
  76. /// Make Path absolute.
  77. static void makeAbsolute(SmallVectorImpl<char> &Path) {
  78. // We need an absolute src path to append to the root.
  79. sys::fs::make_absolute(Path);
  80. // Canonicalize src to a native path to avoid mixed separator styles.
  81. sys::path::native(Path);
  82. // Remove redundant leading "./" pieces and consecutive separators.
  83. Path.erase(Path.begin(), sys::path::remove_leading_dotslash(
  84. StringRef(Path.begin(), Path.size()))
  85. .begin());
  86. }
  87. FileCollector::PathCanonicalizer::PathStorage
  88. FileCollector::PathCanonicalizer::canonicalize(StringRef SrcPath) {
  89. PathStorage Paths;
  90. Paths.VirtualPath = SrcPath;
  91. makeAbsolute(Paths.VirtualPath);
  92. // If a ".." component is present after a symlink component, remove_dots may
  93. // lead to the wrong real destination path. Let the source be canonicalized
  94. // like that but make sure we always use the real path for the destination.
  95. Paths.CopyFrom = Paths.VirtualPath;
  96. updateWithRealPath(Paths.CopyFrom);
  97. // Canonicalize the virtual path by removing "..", "." components.
  98. sys::path::remove_dots(Paths.VirtualPath, /*remove_dot_dot=*/true);
  99. return Paths;
  100. }
  101. void FileCollector::addFileImpl(StringRef SrcPath) {
  102. PathCanonicalizer::PathStorage Paths = Canonicalizer.canonicalize(SrcPath);
  103. SmallString<256> DstPath = StringRef(Root);
  104. sys::path::append(DstPath, sys::path::relative_path(Paths.CopyFrom));
  105. // Always map a canonical src path to its real path into the YAML, by doing
  106. // this we map different virtual src paths to the same entry in the VFS
  107. // overlay, which is a way to emulate symlink inside the VFS; this is also
  108. // needed for correctness, not doing that can lead to module redefinition
  109. // errors.
  110. addFileToMapping(Paths.VirtualPath, DstPath);
  111. }
  112. llvm::vfs::directory_iterator
  113. FileCollector::addDirectoryImpl(const llvm::Twine &Dir,
  114. IntrusiveRefCntPtr<vfs::FileSystem> FS,
  115. std::error_code &EC) {
  116. auto It = FS->dir_begin(Dir, EC);
  117. if (EC)
  118. return It;
  119. addFile(Dir);
  120. for (; !EC && It != llvm::vfs::directory_iterator(); It.increment(EC)) {
  121. if (It->type() == sys::fs::file_type::regular_file ||
  122. It->type() == sys::fs::file_type::directory_file ||
  123. It->type() == sys::fs::file_type::symlink_file) {
  124. addFile(It->path());
  125. }
  126. }
  127. if (EC)
  128. return It;
  129. // Return a new iterator.
  130. return FS->dir_begin(Dir, EC);
  131. }
  132. /// Set the access and modification time for the given file from the given
  133. /// status object.
  134. static std::error_code
  135. copyAccessAndModificationTime(StringRef Filename,
  136. const sys::fs::file_status &Stat) {
  137. int FD;
  138. if (auto EC =
  139. sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
  140. return EC;
  141. if (auto EC = sys::fs::setLastAccessAndModificationTime(
  142. FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
  143. return EC;
  144. if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
  145. return EC;
  146. return {};
  147. }
  148. std::error_code FileCollector::copyFiles(bool StopOnError) {
  149. auto Err = sys::fs::create_directories(Root, /*IgnoreExisting=*/true);
  150. if (Err) {
  151. return Err;
  152. }
  153. std::lock_guard<std::mutex> lock(Mutex);
  154. for (auto &entry : VFSWriter.getMappings()) {
  155. // Get the status of the original file/directory.
  156. sys::fs::file_status Stat;
  157. if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) {
  158. if (StopOnError)
  159. return EC;
  160. continue;
  161. }
  162. // Continue if the file doesn't exist.
  163. if (Stat.type() == sys::fs::file_type::file_not_found)
  164. continue;
  165. // Create directory tree.
  166. if (std::error_code EC =
  167. sys::fs::create_directories(sys::path::parent_path(entry.RPath),
  168. /*IgnoreExisting=*/true)) {
  169. if (StopOnError)
  170. return EC;
  171. }
  172. if (Stat.type() == sys::fs::file_type::directory_file) {
  173. // Construct a directory when it's just a directory entry.
  174. if (std::error_code EC =
  175. sys::fs::create_directories(entry.RPath,
  176. /*IgnoreExisting=*/true)) {
  177. if (StopOnError)
  178. return EC;
  179. }
  180. continue;
  181. }
  182. // Copy file over.
  183. if (std::error_code EC = sys::fs::copy_file(entry.VPath, entry.RPath)) {
  184. if (StopOnError)
  185. return EC;
  186. }
  187. // Copy over permissions.
  188. if (auto perms = sys::fs::getPermissions(entry.VPath)) {
  189. if (std::error_code EC = sys::fs::setPermissions(entry.RPath, *perms)) {
  190. if (StopOnError)
  191. return EC;
  192. }
  193. }
  194. // Copy over modification time.
  195. copyAccessAndModificationTime(entry.RPath, Stat);
  196. }
  197. return {};
  198. }
  199. std::error_code FileCollector::writeMapping(StringRef MappingFile) {
  200. std::lock_guard<std::mutex> lock(Mutex);
  201. VFSWriter.setOverlayDir(OverlayRoot);
  202. VFSWriter.setCaseSensitivity(isCaseSensitivePath(OverlayRoot));
  203. VFSWriter.setUseExternalNames(false);
  204. std::error_code EC;
  205. raw_fd_ostream os(MappingFile, EC, sys::fs::OF_TextWithCRLF);
  206. if (EC)
  207. return EC;
  208. VFSWriter.write(os);
  209. return {};
  210. }
  211. namespace llvm {
  212. class FileCollectorFileSystem : public vfs::FileSystem {
  213. public:
  214. explicit FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS,
  215. std::shared_ptr<FileCollector> Collector)
  216. : FS(std::move(FS)), Collector(std::move(Collector)) {}
  217. llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override {
  218. auto Result = FS->status(Path);
  219. if (Result && Result->exists())
  220. Collector->addFile(Path);
  221. return Result;
  222. }
  223. llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
  224. openFileForRead(const Twine &Path) override {
  225. auto Result = FS->openFileForRead(Path);
  226. if (Result && *Result)
  227. Collector->addFile(Path);
  228. return Result;
  229. }
  230. llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir,
  231. std::error_code &EC) override {
  232. return Collector->addDirectoryImpl(Dir, FS, EC);
  233. }
  234. std::error_code getRealPath(const Twine &Path,
  235. SmallVectorImpl<char> &Output) const override {
  236. auto EC = FS->getRealPath(Path, Output);
  237. if (!EC) {
  238. Collector->addFile(Path);
  239. if (Output.size() > 0)
  240. Collector->addFile(Output);
  241. }
  242. return EC;
  243. }
  244. std::error_code isLocal(const Twine &Path, bool &Result) override {
  245. return FS->isLocal(Path, Result);
  246. }
  247. llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
  248. return FS->getCurrentWorkingDirectory();
  249. }
  250. std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override {
  251. return FS->setCurrentWorkingDirectory(Path);
  252. }
  253. private:
  254. IntrusiveRefCntPtr<vfs::FileSystem> FS;
  255. std::shared_ptr<FileCollector> Collector;
  256. };
  257. } // namespace llvm
  258. IntrusiveRefCntPtr<vfs::FileSystem>
  259. FileCollector::createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,
  260. std::shared_ptr<FileCollector> Collector) {
  261. return new FileCollectorFileSystem(std::move(BaseFS), std::move(Collector));
  262. }