xref: /src/contrib/llvm-project/llvm/lib/Support/FileCollector.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
11d5ae102SDimitry Andric //===-- FileCollector.cpp ---------------------------------------*- C++ -*-===//
21d5ae102SDimitry Andric //
31d5ae102SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
41d5ae102SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
51d5ae102SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61d5ae102SDimitry Andric //
71d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
81d5ae102SDimitry Andric 
91d5ae102SDimitry Andric #include "llvm/Support/FileCollector.h"
101d5ae102SDimitry Andric #include "llvm/ADT/SmallString.h"
11cfca06d7SDimitry Andric #include "llvm/ADT/Twine.h"
121d5ae102SDimitry Andric #include "llvm/Support/FileSystem.h"
131d5ae102SDimitry Andric #include "llvm/Support/Path.h"
141d5ae102SDimitry Andric #include "llvm/Support/Process.h"
151d5ae102SDimitry Andric 
161d5ae102SDimitry Andric using namespace llvm;
171d5ae102SDimitry Andric 
18b60736ecSDimitry Andric FileCollectorBase::FileCollectorBase() = default;
19b60736ecSDimitry Andric FileCollectorBase::~FileCollectorBase() = default;
20b60736ecSDimitry Andric 
addFile(const Twine & File)21b60736ecSDimitry Andric void FileCollectorBase::addFile(const Twine &File) {
22b60736ecSDimitry Andric   std::lock_guard<std::mutex> lock(Mutex);
23b60736ecSDimitry Andric   std::string FileStr = File.str();
24b60736ecSDimitry Andric   if (markAsSeen(FileStr))
25b60736ecSDimitry Andric     addFileImpl(FileStr);
26b60736ecSDimitry Andric }
27b60736ecSDimitry Andric 
addDirectory(const Twine & Dir)28b60736ecSDimitry Andric void FileCollectorBase::addDirectory(const Twine &Dir) {
29b60736ecSDimitry Andric   assert(sys::fs::is_directory(Dir));
30b60736ecSDimitry Andric   std::error_code EC;
31b60736ecSDimitry Andric   addDirectoryImpl(Dir, vfs::getRealFileSystem(), EC);
32b60736ecSDimitry Andric }
33b60736ecSDimitry Andric 
isCaseSensitivePath(StringRef Path)341d5ae102SDimitry Andric static bool isCaseSensitivePath(StringRef Path) {
351d5ae102SDimitry Andric   SmallString<256> TmpDest = Path, UpperDest, RealDest;
361d5ae102SDimitry Andric 
371d5ae102SDimitry Andric   // Remove component traversals, links, etc.
38344a3780SDimitry Andric   if (sys::fs::real_path(Path, TmpDest))
391d5ae102SDimitry Andric     return true; // Current default value in vfs.yaml
401d5ae102SDimitry Andric   Path = TmpDest;
411d5ae102SDimitry Andric 
421d5ae102SDimitry Andric   // Change path to all upper case and ask for its real path, if the latter
431d5ae102SDimitry Andric   // exists and is equal to path, it's not case sensitive. Default to case
441d5ae102SDimitry Andric   // sensitive in the absence of real_path, since this is the YAMLVFSWriter
451d5ae102SDimitry Andric   // default.
461d5ae102SDimitry Andric   UpperDest = Path.upper();
47ac9a064cSDimitry Andric   if (!sys::fs::real_path(UpperDest, RealDest) && Path == RealDest)
481d5ae102SDimitry Andric     return false;
491d5ae102SDimitry Andric   return true;
501d5ae102SDimitry Andric }
511d5ae102SDimitry Andric 
FileCollector(std::string Root,std::string OverlayRoot)521d5ae102SDimitry Andric FileCollector::FileCollector(std::string Root, std::string OverlayRoot)
53e3b55780SDimitry Andric     : Root(Root), OverlayRoot(OverlayRoot) {
54e3b55780SDimitry Andric   assert(sys::path::is_absolute(Root) && "Root not absolute");
55e3b55780SDimitry Andric   assert(sys::path::is_absolute(OverlayRoot) && "OverlayRoot not absolute");
561d5ae102SDimitry Andric }
571d5ae102SDimitry Andric 
updateWithRealPath(SmallVectorImpl<char> & Path)58b60736ecSDimitry Andric void FileCollector::PathCanonicalizer::updateWithRealPath(
59b60736ecSDimitry Andric     SmallVectorImpl<char> &Path) {
60b60736ecSDimitry Andric   StringRef SrcPath(Path.begin(), Path.size());
61b60736ecSDimitry Andric   StringRef Filename = sys::path::filename(SrcPath);
62b60736ecSDimitry Andric   StringRef Directory = sys::path::parent_path(SrcPath);
631d5ae102SDimitry Andric 
64b60736ecSDimitry Andric   // Use real_path to fix any symbolic link component present in the directory
65b60736ecSDimitry Andric   // part of the path, caching the search because computing the real path is
66b60736ecSDimitry Andric   // expensive.
67b60736ecSDimitry Andric   SmallString<256> RealPath;
68b60736ecSDimitry Andric   auto DirWithSymlink = CachedDirs.find(Directory);
69b60736ecSDimitry Andric   if (DirWithSymlink == CachedDirs.end()) {
70b60736ecSDimitry Andric     // FIXME: Should this be a call to FileSystem::getRealpath(), in some
71b60736ecSDimitry Andric     // cases? What if there is nothing on disk?
72b60736ecSDimitry Andric     if (sys::fs::real_path(Directory, RealPath))
73b60736ecSDimitry Andric       return;
744df029ccSDimitry Andric     CachedDirs[Directory] = std::string(RealPath);
751d5ae102SDimitry Andric   } else {
761d5ae102SDimitry Andric     RealPath = DirWithSymlink->second;
771d5ae102SDimitry Andric   }
781d5ae102SDimitry Andric 
79b60736ecSDimitry Andric   // Finish recreating the path by appending the original filename, since we
80b60736ecSDimitry Andric   // don't need to resolve symlinks in the filename.
81b60736ecSDimitry Andric   //
82b60736ecSDimitry Andric   // FIXME: If we can cope with this, maybe we can cope without calling
83b60736ecSDimitry Andric   // getRealPath() at all when there's no ".." component.
84b60736ecSDimitry Andric   sys::path::append(RealPath, Filename);
85b60736ecSDimitry Andric 
86b60736ecSDimitry Andric   // Swap to create the output.
87b60736ecSDimitry Andric   Path.swap(RealPath);
881d5ae102SDimitry Andric }
891d5ae102SDimitry Andric 
90b60736ecSDimitry Andric /// Make Path absolute.
makeAbsolute(SmallVectorImpl<char> & Path)91b60736ecSDimitry Andric static void makeAbsolute(SmallVectorImpl<char> &Path) {
921d5ae102SDimitry Andric   // We need an absolute src path to append to the root.
93b60736ecSDimitry Andric   sys::fs::make_absolute(Path);
941d5ae102SDimitry Andric 
951d5ae102SDimitry Andric   // Canonicalize src to a native path to avoid mixed separator styles.
96b60736ecSDimitry Andric   sys::path::native(Path);
971d5ae102SDimitry Andric 
981d5ae102SDimitry Andric   // Remove redundant leading "./" pieces and consecutive separators.
99b60736ecSDimitry Andric   Path.erase(Path.begin(), sys::path::remove_leading_dotslash(
100b60736ecSDimitry Andric                                StringRef(Path.begin(), Path.size()))
101b60736ecSDimitry Andric                                .begin());
102b60736ecSDimitry Andric }
1031d5ae102SDimitry Andric 
104b60736ecSDimitry Andric FileCollector::PathCanonicalizer::PathStorage
canonicalize(StringRef SrcPath)105b60736ecSDimitry Andric FileCollector::PathCanonicalizer::canonicalize(StringRef SrcPath) {
106b60736ecSDimitry Andric   PathStorage Paths;
107b60736ecSDimitry Andric   Paths.VirtualPath = SrcPath;
108b60736ecSDimitry Andric   makeAbsolute(Paths.VirtualPath);
1091d5ae102SDimitry Andric 
1101d5ae102SDimitry Andric   // If a ".." component is present after a symlink component, remove_dots may
1111d5ae102SDimitry Andric   // lead to the wrong real destination path. Let the source be canonicalized
1121d5ae102SDimitry Andric   // like that but make sure we always use the real path for the destination.
113b60736ecSDimitry Andric   Paths.CopyFrom = Paths.VirtualPath;
114b60736ecSDimitry Andric   updateWithRealPath(Paths.CopyFrom);
115b60736ecSDimitry Andric 
116b60736ecSDimitry Andric   // Canonicalize the virtual path by removing "..", "." components.
117b60736ecSDimitry Andric   sys::path::remove_dots(Paths.VirtualPath, /*remove_dot_dot=*/true);
118b60736ecSDimitry Andric 
119b60736ecSDimitry Andric   return Paths;
120b60736ecSDimitry Andric }
121b60736ecSDimitry Andric 
addFileImpl(StringRef SrcPath)122b60736ecSDimitry Andric void FileCollector::addFileImpl(StringRef SrcPath) {
123b60736ecSDimitry Andric   PathCanonicalizer::PathStorage Paths = Canonicalizer.canonicalize(SrcPath);
1241d5ae102SDimitry Andric 
1251d5ae102SDimitry Andric   SmallString<256> DstPath = StringRef(Root);
126b60736ecSDimitry Andric   sys::path::append(DstPath, sys::path::relative_path(Paths.CopyFrom));
1271d5ae102SDimitry Andric 
1281d5ae102SDimitry Andric   // Always map a canonical src path to its real path into the YAML, by doing
1291d5ae102SDimitry Andric   // this we map different virtual src paths to the same entry in the VFS
1301d5ae102SDimitry Andric   // overlay, which is a way to emulate symlink inside the VFS; this is also
1311d5ae102SDimitry Andric   // needed for correctness, not doing that can lead to module redefinition
1321d5ae102SDimitry Andric   // errors.
133b60736ecSDimitry Andric   addFileToMapping(Paths.VirtualPath, DstPath);
1341d5ae102SDimitry Andric }
1351d5ae102SDimitry Andric 
136cfca06d7SDimitry Andric llvm::vfs::directory_iterator
addDirectoryImpl(const llvm::Twine & Dir,IntrusiveRefCntPtr<vfs::FileSystem> FS,std::error_code & EC)137cfca06d7SDimitry Andric FileCollector::addDirectoryImpl(const llvm::Twine &Dir,
138cfca06d7SDimitry Andric                                 IntrusiveRefCntPtr<vfs::FileSystem> FS,
139cfca06d7SDimitry Andric                                 std::error_code &EC) {
140cfca06d7SDimitry Andric   auto It = FS->dir_begin(Dir, EC);
141cfca06d7SDimitry Andric   if (EC)
142cfca06d7SDimitry Andric     return It;
143cfca06d7SDimitry Andric   addFile(Dir);
144cfca06d7SDimitry Andric   for (; !EC && It != llvm::vfs::directory_iterator(); It.increment(EC)) {
145cfca06d7SDimitry Andric     if (It->type() == sys::fs::file_type::regular_file ||
146cfca06d7SDimitry Andric         It->type() == sys::fs::file_type::directory_file ||
147cfca06d7SDimitry Andric         It->type() == sys::fs::file_type::symlink_file) {
148cfca06d7SDimitry Andric       addFile(It->path());
149cfca06d7SDimitry Andric     }
150cfca06d7SDimitry Andric   }
151cfca06d7SDimitry Andric   if (EC)
152cfca06d7SDimitry Andric     return It;
153cfca06d7SDimitry Andric   // Return a new iterator.
154cfca06d7SDimitry Andric   return FS->dir_begin(Dir, EC);
155cfca06d7SDimitry Andric }
156cfca06d7SDimitry Andric 
1571d5ae102SDimitry Andric /// Set the access and modification time for the given file from the given
1581d5ae102SDimitry Andric /// status object.
1591d5ae102SDimitry Andric static std::error_code
copyAccessAndModificationTime(StringRef Filename,const sys::fs::file_status & Stat)1601d5ae102SDimitry Andric copyAccessAndModificationTime(StringRef Filename,
1611d5ae102SDimitry Andric                               const sys::fs::file_status &Stat) {
1621d5ae102SDimitry Andric   int FD;
1631d5ae102SDimitry Andric 
1641d5ae102SDimitry Andric   if (auto EC =
1651d5ae102SDimitry Andric           sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
1661d5ae102SDimitry Andric     return EC;
1671d5ae102SDimitry Andric 
1681d5ae102SDimitry Andric   if (auto EC = sys::fs::setLastAccessAndModificationTime(
1691d5ae102SDimitry Andric           FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
1701d5ae102SDimitry Andric     return EC;
1711d5ae102SDimitry Andric 
1721d5ae102SDimitry Andric   if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
1731d5ae102SDimitry Andric     return EC;
1741d5ae102SDimitry Andric 
1751d5ae102SDimitry Andric   return {};
1761d5ae102SDimitry Andric }
1771d5ae102SDimitry Andric 
copyFiles(bool StopOnError)1781d5ae102SDimitry Andric std::error_code FileCollector::copyFiles(bool StopOnError) {
179cfca06d7SDimitry Andric   auto Err = sys::fs::create_directories(Root, /*IgnoreExisting=*/true);
180cfca06d7SDimitry Andric   if (Err) {
181cfca06d7SDimitry Andric     return Err;
182cfca06d7SDimitry Andric   }
183cfca06d7SDimitry Andric 
184cfca06d7SDimitry Andric   std::lock_guard<std::mutex> lock(Mutex);
185cfca06d7SDimitry Andric 
1861d5ae102SDimitry Andric   for (auto &entry : VFSWriter.getMappings()) {
1871d5ae102SDimitry Andric     // Get the status of the original file/directory.
1881d5ae102SDimitry Andric     sys::fs::file_status Stat;
1891d5ae102SDimitry Andric     if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) {
1901d5ae102SDimitry Andric       if (StopOnError)
1911d5ae102SDimitry Andric         return EC;
1921d5ae102SDimitry Andric       continue;
1931d5ae102SDimitry Andric     }
1941d5ae102SDimitry Andric 
195b60736ecSDimitry Andric     // Continue if the file doesn't exist.
196b60736ecSDimitry Andric     if (Stat.type() == sys::fs::file_type::file_not_found)
197b60736ecSDimitry Andric       continue;
198b60736ecSDimitry Andric 
199b60736ecSDimitry Andric     // Create directory tree.
200b60736ecSDimitry Andric     if (std::error_code EC =
201b60736ecSDimitry Andric             sys::fs::create_directories(sys::path::parent_path(entry.RPath),
202b60736ecSDimitry Andric                                         /*IgnoreExisting=*/true)) {
203b60736ecSDimitry Andric       if (StopOnError)
204b60736ecSDimitry Andric         return EC;
205b60736ecSDimitry Andric     }
206b60736ecSDimitry Andric 
2071d5ae102SDimitry Andric     if (Stat.type() == sys::fs::file_type::directory_file) {
2081d5ae102SDimitry Andric       // Construct a directory when it's just a directory entry.
2091d5ae102SDimitry Andric       if (std::error_code EC =
2101d5ae102SDimitry Andric               sys::fs::create_directories(entry.RPath,
2111d5ae102SDimitry Andric                                           /*IgnoreExisting=*/true)) {
2121d5ae102SDimitry Andric         if (StopOnError)
2131d5ae102SDimitry Andric           return EC;
2141d5ae102SDimitry Andric       }
2151d5ae102SDimitry Andric       continue;
2161d5ae102SDimitry Andric     }
2171d5ae102SDimitry Andric 
2181d5ae102SDimitry Andric     // Copy file over.
2191d5ae102SDimitry Andric     if (std::error_code EC = sys::fs::copy_file(entry.VPath, entry.RPath)) {
2201d5ae102SDimitry Andric       if (StopOnError)
2211d5ae102SDimitry Andric         return EC;
2221d5ae102SDimitry Andric     }
2231d5ae102SDimitry Andric 
2241d5ae102SDimitry Andric     // Copy over permissions.
2251d5ae102SDimitry Andric     if (auto perms = sys::fs::getPermissions(entry.VPath)) {
2261d5ae102SDimitry Andric       if (std::error_code EC = sys::fs::setPermissions(entry.RPath, *perms)) {
2271d5ae102SDimitry Andric         if (StopOnError)
2281d5ae102SDimitry Andric           return EC;
2291d5ae102SDimitry Andric       }
2301d5ae102SDimitry Andric     }
2311d5ae102SDimitry Andric 
2321d5ae102SDimitry Andric     // Copy over modification time.
2331d5ae102SDimitry Andric     copyAccessAndModificationTime(entry.RPath, Stat);
2341d5ae102SDimitry Andric   }
2351d5ae102SDimitry Andric   return {};
2361d5ae102SDimitry Andric }
2371d5ae102SDimitry Andric 
writeMapping(StringRef MappingFile)238cfca06d7SDimitry Andric std::error_code FileCollector::writeMapping(StringRef MappingFile) {
2391d5ae102SDimitry Andric   std::lock_guard<std::mutex> lock(Mutex);
2401d5ae102SDimitry Andric 
2411d5ae102SDimitry Andric   VFSWriter.setOverlayDir(OverlayRoot);
2421d5ae102SDimitry Andric   VFSWriter.setCaseSensitivity(isCaseSensitivePath(OverlayRoot));
2431d5ae102SDimitry Andric   VFSWriter.setUseExternalNames(false);
2441d5ae102SDimitry Andric 
2451d5ae102SDimitry Andric   std::error_code EC;
246344a3780SDimitry Andric   raw_fd_ostream os(MappingFile, EC, sys::fs::OF_TextWithCRLF);
2471d5ae102SDimitry Andric   if (EC)
2481d5ae102SDimitry Andric     return EC;
2491d5ae102SDimitry Andric 
2501d5ae102SDimitry Andric   VFSWriter.write(os);
2511d5ae102SDimitry Andric 
2521d5ae102SDimitry Andric   return {};
2531d5ae102SDimitry Andric }
2541d5ae102SDimitry Andric 
255cfca06d7SDimitry Andric namespace llvm {
2561d5ae102SDimitry Andric 
2571d5ae102SDimitry Andric class FileCollectorFileSystem : public vfs::FileSystem {
2581d5ae102SDimitry Andric public:
FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS,std::shared_ptr<FileCollector> Collector)2591d5ae102SDimitry Andric   explicit FileCollectorFileSystem(IntrusiveRefCntPtr<vfs::FileSystem> FS,
2601d5ae102SDimitry Andric                                    std::shared_ptr<FileCollector> Collector)
2611d5ae102SDimitry Andric       : FS(std::move(FS)), Collector(std::move(Collector)) {}
2621d5ae102SDimitry Andric 
status(const Twine & Path)2631d5ae102SDimitry Andric   llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override {
2641d5ae102SDimitry Andric     auto Result = FS->status(Path);
2651d5ae102SDimitry Andric     if (Result && Result->exists())
2661d5ae102SDimitry Andric       Collector->addFile(Path);
2671d5ae102SDimitry Andric     return Result;
2681d5ae102SDimitry Andric   }
2691d5ae102SDimitry Andric 
2701d5ae102SDimitry Andric   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
openFileForRead(const Twine & Path)2711d5ae102SDimitry Andric   openFileForRead(const Twine &Path) override {
2721d5ae102SDimitry Andric     auto Result = FS->openFileForRead(Path);
2731d5ae102SDimitry Andric     if (Result && *Result)
2741d5ae102SDimitry Andric       Collector->addFile(Path);
2751d5ae102SDimitry Andric     return Result;
2761d5ae102SDimitry Andric   }
2771d5ae102SDimitry Andric 
dir_begin(const llvm::Twine & Dir,std::error_code & EC)2781d5ae102SDimitry Andric   llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir,
2791d5ae102SDimitry Andric                                           std::error_code &EC) override {
280cfca06d7SDimitry Andric     return Collector->addDirectoryImpl(Dir, FS, EC);
2811d5ae102SDimitry Andric   }
2821d5ae102SDimitry Andric 
getRealPath(const Twine & Path,SmallVectorImpl<char> & Output)2831d5ae102SDimitry Andric   std::error_code getRealPath(const Twine &Path,
284ac9a064cSDimitry Andric                               SmallVectorImpl<char> &Output) override {
2851d5ae102SDimitry Andric     auto EC = FS->getRealPath(Path, Output);
2861d5ae102SDimitry Andric     if (!EC) {
2871d5ae102SDimitry Andric       Collector->addFile(Path);
2881d5ae102SDimitry Andric       if (Output.size() > 0)
2891d5ae102SDimitry Andric         Collector->addFile(Output);
2901d5ae102SDimitry Andric     }
2911d5ae102SDimitry Andric     return EC;
2921d5ae102SDimitry Andric   }
2931d5ae102SDimitry Andric 
isLocal(const Twine & Path,bool & Result)2941d5ae102SDimitry Andric   std::error_code isLocal(const Twine &Path, bool &Result) override {
2951d5ae102SDimitry Andric     return FS->isLocal(Path, Result);
2961d5ae102SDimitry Andric   }
2971d5ae102SDimitry Andric 
getCurrentWorkingDirectory() const2981d5ae102SDimitry Andric   llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
2991d5ae102SDimitry Andric     return FS->getCurrentWorkingDirectory();
3001d5ae102SDimitry Andric   }
3011d5ae102SDimitry Andric 
setCurrentWorkingDirectory(const llvm::Twine & Path)3021d5ae102SDimitry Andric   std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override {
3031d5ae102SDimitry Andric     return FS->setCurrentWorkingDirectory(Path);
3041d5ae102SDimitry Andric   }
3051d5ae102SDimitry Andric 
3061d5ae102SDimitry Andric private:
3071d5ae102SDimitry Andric   IntrusiveRefCntPtr<vfs::FileSystem> FS;
3081d5ae102SDimitry Andric   std::shared_ptr<FileCollector> Collector;
3091d5ae102SDimitry Andric };
3101d5ae102SDimitry Andric 
311cfca06d7SDimitry Andric } // namespace llvm
3121d5ae102SDimitry Andric 
3131d5ae102SDimitry Andric IntrusiveRefCntPtr<vfs::FileSystem>
createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,std::shared_ptr<FileCollector> Collector)3141d5ae102SDimitry Andric FileCollector::createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,
3151d5ae102SDimitry Andric                                   std::shared_ptr<FileCollector> Collector) {
3161d5ae102SDimitry Andric   return new FileCollectorFileSystem(std::move(BaseFS), std::move(Collector));
3171d5ae102SDimitry Andric }
318