xref: /src/contrib/llvm-project/llvm/lib/Support/CachePruning.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
101095a5dSDimitry Andric //===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===//
201095a5dSDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
601095a5dSDimitry Andric //
701095a5dSDimitry Andric //===----------------------------------------------------------------------===//
801095a5dSDimitry Andric //
901095a5dSDimitry Andric // This file implements the pruning of a directory based on least recently used.
1001095a5dSDimitry Andric //
1101095a5dSDimitry Andric //===----------------------------------------------------------------------===//
1201095a5dSDimitry Andric 
1301095a5dSDimitry Andric #include "llvm/Support/CachePruning.h"
14cfca06d7SDimitry Andric #include "llvm/ADT/StringRef.h"
1501095a5dSDimitry Andric #include "llvm/Support/Debug.h"
1601095a5dSDimitry Andric #include "llvm/Support/Errc.h"
1771d5a254SDimitry Andric #include "llvm/Support/Error.h"
1801095a5dSDimitry Andric #include "llvm/Support/FileSystem.h"
1901095a5dSDimitry Andric #include "llvm/Support/Path.h"
20e3b55780SDimitry Andric #include "llvm/Support/WithColor.h"
2101095a5dSDimitry Andric #include "llvm/Support/raw_ostream.h"
2201095a5dSDimitry Andric 
2301095a5dSDimitry Andric #define DEBUG_TYPE "cache-pruning"
2401095a5dSDimitry Andric 
2501095a5dSDimitry Andric #include <set>
26b915e9e0SDimitry Andric #include <system_error>
2701095a5dSDimitry Andric 
2801095a5dSDimitry Andric using namespace llvm;
2901095a5dSDimitry Andric 
30d8e91e46SDimitry Andric namespace {
31d8e91e46SDimitry Andric struct FileInfo {
32d8e91e46SDimitry Andric   sys::TimePoint<> Time;
33d8e91e46SDimitry Andric   uint64_t Size;
34d8e91e46SDimitry Andric   std::string Path;
35d8e91e46SDimitry Andric 
36d8e91e46SDimitry Andric   /// Used to determine which files to prune first. Also used to determine
37d8e91e46SDimitry Andric   /// set membership, so must take into account all fields.
operator <__anon176891080111::FileInfo38d8e91e46SDimitry Andric   bool operator<(const FileInfo &Other) const {
39e6d15924SDimitry Andric     return std::tie(Time, Other.Size, Path) <
40e6d15924SDimitry Andric            std::tie(Other.Time, Size, Other.Path);
41d8e91e46SDimitry Andric   }
42d8e91e46SDimitry Andric };
43d8e91e46SDimitry Andric } // anonymous namespace
44d8e91e46SDimitry Andric 
4501095a5dSDimitry Andric /// Write a new timestamp file with the given path. This is used for the pruning
4601095a5dSDimitry Andric /// interval option.
writeTimestampFile(StringRef TimestampFile)4701095a5dSDimitry Andric static void writeTimestampFile(StringRef TimestampFile) {
4801095a5dSDimitry Andric   std::error_code EC;
491d5ae102SDimitry Andric   raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::OF_None);
5001095a5dSDimitry Andric }
5101095a5dSDimitry Andric 
parseDuration(StringRef Duration)5271d5a254SDimitry Andric static Expected<std::chrono::seconds> parseDuration(StringRef Duration) {
5371d5a254SDimitry Andric   if (Duration.empty())
5471d5a254SDimitry Andric     return make_error<StringError>("Duration must not be empty",
5571d5a254SDimitry Andric                                    inconvertibleErrorCode());
5671d5a254SDimitry Andric 
5771d5a254SDimitry Andric   StringRef NumStr = Duration.slice(0, Duration.size()-1);
5871d5a254SDimitry Andric   uint64_t Num;
5971d5a254SDimitry Andric   if (NumStr.getAsInteger(0, Num))
6071d5a254SDimitry Andric     return make_error<StringError>("'" + NumStr + "' not an integer",
6171d5a254SDimitry Andric                                    inconvertibleErrorCode());
6271d5a254SDimitry Andric 
6371d5a254SDimitry Andric   switch (Duration.back()) {
6471d5a254SDimitry Andric   case 's':
6571d5a254SDimitry Andric     return std::chrono::seconds(Num);
6671d5a254SDimitry Andric   case 'm':
6771d5a254SDimitry Andric     return std::chrono::minutes(Num);
6871d5a254SDimitry Andric   case 'h':
6971d5a254SDimitry Andric     return std::chrono::hours(Num);
7071d5a254SDimitry Andric   default:
7171d5a254SDimitry Andric     return make_error<StringError>("'" + Duration +
7271d5a254SDimitry Andric                                        "' must end with one of 's', 'm' or 'h'",
7371d5a254SDimitry Andric                                    inconvertibleErrorCode());
7471d5a254SDimitry Andric   }
7571d5a254SDimitry Andric }
7671d5a254SDimitry Andric 
7771d5a254SDimitry Andric Expected<CachePruningPolicy>
parseCachePruningPolicy(StringRef PolicyStr)7871d5a254SDimitry Andric llvm::parseCachePruningPolicy(StringRef PolicyStr) {
7971d5a254SDimitry Andric   CachePruningPolicy Policy;
8071d5a254SDimitry Andric   std::pair<StringRef, StringRef> P = {"", PolicyStr};
8171d5a254SDimitry Andric   while (!P.second.empty()) {
8271d5a254SDimitry Andric     P = P.second.split(':');
8371d5a254SDimitry Andric 
8471d5a254SDimitry Andric     StringRef Key, Value;
8571d5a254SDimitry Andric     std::tie(Key, Value) = P.first.split('=');
8671d5a254SDimitry Andric     if (Key == "prune_interval") {
8771d5a254SDimitry Andric       auto DurationOrErr = parseDuration(Value);
8871d5a254SDimitry Andric       if (!DurationOrErr)
8971d5a254SDimitry Andric         return DurationOrErr.takeError();
9071d5a254SDimitry Andric       Policy.Interval = *DurationOrErr;
9171d5a254SDimitry Andric     } else if (Key == "prune_after") {
9271d5a254SDimitry Andric       auto DurationOrErr = parseDuration(Value);
9371d5a254SDimitry Andric       if (!DurationOrErr)
9471d5a254SDimitry Andric         return DurationOrErr.takeError();
9571d5a254SDimitry Andric       Policy.Expiration = *DurationOrErr;
9671d5a254SDimitry Andric     } else if (Key == "cache_size") {
9771d5a254SDimitry Andric       if (Value.back() != '%')
9871d5a254SDimitry Andric         return make_error<StringError>("'" + Value + "' must be a percentage",
9971d5a254SDimitry Andric                                        inconvertibleErrorCode());
10008bbd35aSDimitry Andric       StringRef SizeStr = Value.drop_back();
10171d5a254SDimitry Andric       uint64_t Size;
10271d5a254SDimitry Andric       if (SizeStr.getAsInteger(0, Size))
10371d5a254SDimitry Andric         return make_error<StringError>("'" + SizeStr + "' not an integer",
10471d5a254SDimitry Andric                                        inconvertibleErrorCode());
10571d5a254SDimitry Andric       if (Size > 100)
10671d5a254SDimitry Andric         return make_error<StringError>("'" + SizeStr +
10771d5a254SDimitry Andric                                            "' must be between 0 and 100",
10871d5a254SDimitry Andric                                        inconvertibleErrorCode());
10908bbd35aSDimitry Andric       Policy.MaxSizePercentageOfAvailableSpace = Size;
11008bbd35aSDimitry Andric     } else if (Key == "cache_size_bytes") {
11108bbd35aSDimitry Andric       uint64_t Mult = 1;
11208bbd35aSDimitry Andric       switch (tolower(Value.back())) {
11308bbd35aSDimitry Andric       case 'k':
11408bbd35aSDimitry Andric         Mult = 1024;
11508bbd35aSDimitry Andric         Value = Value.drop_back();
11608bbd35aSDimitry Andric         break;
11708bbd35aSDimitry Andric       case 'm':
11808bbd35aSDimitry Andric         Mult = 1024 * 1024;
11908bbd35aSDimitry Andric         Value = Value.drop_back();
12008bbd35aSDimitry Andric         break;
12108bbd35aSDimitry Andric       case 'g':
12208bbd35aSDimitry Andric         Mult = 1024 * 1024 * 1024;
12308bbd35aSDimitry Andric         Value = Value.drop_back();
12408bbd35aSDimitry Andric         break;
12508bbd35aSDimitry Andric       }
12608bbd35aSDimitry Andric       uint64_t Size;
12708bbd35aSDimitry Andric       if (Value.getAsInteger(0, Size))
12808bbd35aSDimitry Andric         return make_error<StringError>("'" + Value + "' not an integer",
12908bbd35aSDimitry Andric                                        inconvertibleErrorCode());
13008bbd35aSDimitry Andric       Policy.MaxSizeBytes = Size * Mult;
131044eb2f6SDimitry Andric     } else if (Key == "cache_size_files") {
132044eb2f6SDimitry Andric       if (Value.getAsInteger(0, Policy.MaxSizeFiles))
133044eb2f6SDimitry Andric         return make_error<StringError>("'" + Value + "' not an integer",
134044eb2f6SDimitry Andric                                        inconvertibleErrorCode());
13571d5a254SDimitry Andric     } else {
13671d5a254SDimitry Andric       return make_error<StringError>("Unknown key: '" + Key + "'",
13771d5a254SDimitry Andric                                      inconvertibleErrorCode());
13871d5a254SDimitry Andric     }
13971d5a254SDimitry Andric   }
14071d5a254SDimitry Andric 
14171d5a254SDimitry Andric   return Policy;
14271d5a254SDimitry Andric }
14371d5a254SDimitry Andric 
14401095a5dSDimitry Andric /// Prune the cache of files that haven't been accessed in a long time.
pruneCache(StringRef Path,CachePruningPolicy Policy,const std::vector<std::unique_ptr<MemoryBuffer>> & Files)145e3b55780SDimitry Andric bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy,
146e3b55780SDimitry Andric                       const std::vector<std::unique_ptr<MemoryBuffer>> &Files) {
147b915e9e0SDimitry Andric   using namespace std::chrono;
148b915e9e0SDimitry Andric 
14901095a5dSDimitry Andric   if (Path.empty())
15001095a5dSDimitry Andric     return false;
15101095a5dSDimitry Andric 
15201095a5dSDimitry Andric   bool isPathDir;
15301095a5dSDimitry Andric   if (sys::fs::is_directory(Path, isPathDir))
15401095a5dSDimitry Andric     return false;
15501095a5dSDimitry Andric 
15601095a5dSDimitry Andric   if (!isPathDir)
15701095a5dSDimitry Andric     return false;
15801095a5dSDimitry Andric 
15908bbd35aSDimitry Andric   Policy.MaxSizePercentageOfAvailableSpace =
16008bbd35aSDimitry Andric       std::min(Policy.MaxSizePercentageOfAvailableSpace, 100u);
16171d5a254SDimitry Andric 
16271d5a254SDimitry Andric   if (Policy.Expiration == seconds(0) &&
16308bbd35aSDimitry Andric       Policy.MaxSizePercentageOfAvailableSpace == 0 &&
164044eb2f6SDimitry Andric       Policy.MaxSizeBytes == 0 && Policy.MaxSizeFiles == 0) {
165eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "No pruning settings set, exit early\n");
16601095a5dSDimitry Andric     // Nothing will be pruned, early exit
16701095a5dSDimitry Andric     return false;
16801095a5dSDimitry Andric   }
16901095a5dSDimitry Andric 
17001095a5dSDimitry Andric   // Try to stat() the timestamp file.
17101095a5dSDimitry Andric   SmallString<128> TimestampFile(Path);
17201095a5dSDimitry Andric   sys::path::append(TimestampFile, "llvmcache.timestamp");
17301095a5dSDimitry Andric   sys::fs::file_status FileStatus;
174b915e9e0SDimitry Andric   const auto CurrentTime = system_clock::now();
17501095a5dSDimitry Andric   if (auto EC = sys::fs::status(TimestampFile, FileStatus)) {
17601095a5dSDimitry Andric     if (EC == errc::no_such_file_or_directory) {
17701095a5dSDimitry Andric       // If the timestamp file wasn't there, create one now.
17801095a5dSDimitry Andric       writeTimestampFile(TimestampFile);
17901095a5dSDimitry Andric     } else {
18001095a5dSDimitry Andric       // Unknown error?
18101095a5dSDimitry Andric       return false;
18201095a5dSDimitry Andric     }
18301095a5dSDimitry Andric   } else {
184c7dac04cSDimitry Andric     if (!Policy.Interval)
185c7dac04cSDimitry Andric       return false;
186044eb2f6SDimitry Andric     if (Policy.Interval != seconds(0)) {
18701095a5dSDimitry Andric       // Check whether the time stamp is older than our pruning interval.
18801095a5dSDimitry Andric       // If not, do nothing.
189b915e9e0SDimitry Andric       const auto TimeStampModTime = FileStatus.getLastModificationTime();
19001095a5dSDimitry Andric       auto TimeStampAge = CurrentTime - TimeStampModTime;
191c7dac04cSDimitry Andric       if (TimeStampAge <= *Policy.Interval) {
192eb11fae6SDimitry Andric         LLVM_DEBUG(dbgs() << "Timestamp file too recent ("
193b915e9e0SDimitry Andric                           << duration_cast<seconds>(TimeStampAge).count()
19401095a5dSDimitry Andric                           << "s old), do not prune.\n");
19501095a5dSDimitry Andric         return false;
19601095a5dSDimitry Andric       }
19701095a5dSDimitry Andric     }
19801095a5dSDimitry Andric     // Write a new timestamp file so that nobody else attempts to prune.
19901095a5dSDimitry Andric     // There is a benign race condition here, if two processes happen to
20001095a5dSDimitry Andric     // notice at the same time that the timestamp is out-of-date.
20101095a5dSDimitry Andric     writeTimestampFile(TimestampFile);
20201095a5dSDimitry Andric   }
20301095a5dSDimitry Andric 
204d8e91e46SDimitry Andric   // Keep track of files to delete to get below the size limit.
205d8e91e46SDimitry Andric   // Order by time of last use so that recently used files are preserved.
206d8e91e46SDimitry Andric   std::set<FileInfo> FileInfos;
20701095a5dSDimitry Andric   uint64_t TotalSize = 0;
20801095a5dSDimitry Andric 
20901095a5dSDimitry Andric   // Walk the entire directory cache, looking for unused files.
21001095a5dSDimitry Andric   std::error_code EC;
21101095a5dSDimitry Andric   SmallString<128> CachePathNative;
21201095a5dSDimitry Andric   sys::path::native(Path, CachePathNative);
21301095a5dSDimitry Andric   // Walk all of the files within this directory.
21401095a5dSDimitry Andric   for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd;
21501095a5dSDimitry Andric        File != FileEnd && !EC; File.increment(EC)) {
216b60736ecSDimitry Andric     // Ignore filenames not beginning with "llvmcache-" or "Thin-". This
21771d5a254SDimitry Andric     // includes the timestamp file as well as any files created by the user.
21871d5a254SDimitry Andric     // This acts as a safeguard against data loss if the user specifies the
21971d5a254SDimitry Andric     // wrong directory as their cache directory.
220b60736ecSDimitry Andric     StringRef filename = sys::path::filename(File->path());
221b1c73532SDimitry Andric     if (!filename.starts_with("llvmcache-") && !filename.starts_with("Thin-"))
22201095a5dSDimitry Andric       continue;
22301095a5dSDimitry Andric 
22401095a5dSDimitry Andric     // Look at this file. If we can't stat it, there's nothing interesting
22501095a5dSDimitry Andric     // there.
226044eb2f6SDimitry Andric     ErrorOr<sys::fs::basic_file_status> StatusOrErr = File->status();
227044eb2f6SDimitry Andric     if (!StatusOrErr) {
228eb11fae6SDimitry Andric       LLVM_DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n");
22901095a5dSDimitry Andric       continue;
23001095a5dSDimitry Andric     }
23101095a5dSDimitry Andric 
23201095a5dSDimitry Andric     // If the file hasn't been used recently enough, delete it
233044eb2f6SDimitry Andric     const auto FileAccessTime = StatusOrErr->getLastAccessedTime();
23401095a5dSDimitry Andric     auto FileAge = CurrentTime - FileAccessTime;
235044eb2f6SDimitry Andric     if (Policy.Expiration != seconds(0) && FileAge > Policy.Expiration) {
236eb11fae6SDimitry Andric       LLVM_DEBUG(dbgs() << "Remove " << File->path() << " ("
237eb11fae6SDimitry Andric                         << duration_cast<seconds>(FileAge).count()
238eb11fae6SDimitry Andric                         << "s old)\n");
23901095a5dSDimitry Andric       sys::fs::remove(File->path());
24001095a5dSDimitry Andric       continue;
24101095a5dSDimitry Andric     }
24201095a5dSDimitry Andric 
24301095a5dSDimitry Andric     // Leave it here for now, but add it to the list of size-based pruning.
244044eb2f6SDimitry Andric     TotalSize += StatusOrErr->getSize();
245d8e91e46SDimitry Andric     FileInfos.insert({FileAccessTime, StatusOrErr->getSize(), File->path()});
24601095a5dSDimitry Andric   }
24701095a5dSDimitry Andric 
248d8e91e46SDimitry Andric   auto FileInfo = FileInfos.begin();
249d8e91e46SDimitry Andric   size_t NumFiles = FileInfos.size();
250044eb2f6SDimitry Andric 
251044eb2f6SDimitry Andric   auto RemoveCacheFile = [&]() {
252044eb2f6SDimitry Andric     // Remove the file.
253d8e91e46SDimitry Andric     sys::fs::remove(FileInfo->Path);
254044eb2f6SDimitry Andric     // Update size
255d8e91e46SDimitry Andric     TotalSize -= FileInfo->Size;
256044eb2f6SDimitry Andric     NumFiles--;
257d8e91e46SDimitry Andric     LLVM_DEBUG(dbgs() << " - Remove " << FileInfo->Path << " (size "
258d8e91e46SDimitry Andric                       << FileInfo->Size << "), new occupancy is " << TotalSize
259d8e91e46SDimitry Andric                       << "%\n");
260d8e91e46SDimitry Andric     ++FileInfo;
261044eb2f6SDimitry Andric   };
262044eb2f6SDimitry Andric 
263e3b55780SDimitry Andric   // files.size() is greater the number of inputs  by one. However, a timestamp
264e3b55780SDimitry Andric   // file is created and stored in the cache directory if --thinlto-cache-policy
265e3b55780SDimitry Andric   // option is used. Therefore, files.size() is used as ActualNums.
266e3b55780SDimitry Andric   const size_t ActualNums = Files.size();
267e3b55780SDimitry Andric   if (Policy.MaxSizeFiles && ActualNums > Policy.MaxSizeFiles)
268e3b55780SDimitry Andric     WithColor::warning()
269e3b55780SDimitry Andric         << "ThinLTO cache pruning happens since the number of created files ("
270e3b55780SDimitry Andric         << ActualNums << ") exceeds the maximum number of files ("
271e3b55780SDimitry Andric         << Policy.MaxSizeFiles
272e3b55780SDimitry Andric         << "); consider adjusting --thinlto-cache-policy\n";
273e3b55780SDimitry Andric 
274044eb2f6SDimitry Andric   // Prune for number of files.
275044eb2f6SDimitry Andric   if (Policy.MaxSizeFiles)
276044eb2f6SDimitry Andric     while (NumFiles > Policy.MaxSizeFiles)
277044eb2f6SDimitry Andric       RemoveCacheFile();
278044eb2f6SDimitry Andric 
27901095a5dSDimitry Andric   // Prune for size now if needed
280044eb2f6SDimitry Andric   if (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0) {
28101095a5dSDimitry Andric     auto ErrOrSpaceInfo = sys::fs::disk_space(Path);
28201095a5dSDimitry Andric     if (!ErrOrSpaceInfo) {
28301095a5dSDimitry Andric       report_fatal_error("Can't get available size");
28401095a5dSDimitry Andric     }
28501095a5dSDimitry Andric     sys::fs::space_info SpaceInfo = ErrOrSpaceInfo.get();
28601095a5dSDimitry Andric     auto AvailableSpace = TotalSize + SpaceInfo.free;
28708bbd35aSDimitry Andric 
28808bbd35aSDimitry Andric     if (Policy.MaxSizePercentageOfAvailableSpace == 0)
28908bbd35aSDimitry Andric       Policy.MaxSizePercentageOfAvailableSpace = 100;
29008bbd35aSDimitry Andric     if (Policy.MaxSizeBytes == 0)
29108bbd35aSDimitry Andric       Policy.MaxSizeBytes = AvailableSpace;
29208bbd35aSDimitry Andric     auto TotalSizeTarget = std::min<uint64_t>(
29308bbd35aSDimitry Andric         AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull,
29408bbd35aSDimitry Andric         Policy.MaxSizeBytes);
29508bbd35aSDimitry Andric 
296eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace)
297eb11fae6SDimitry Andric                       << "% target is: "
298eb11fae6SDimitry Andric                       << Policy.MaxSizePercentageOfAvailableSpace << "%, "
299eb11fae6SDimitry Andric                       << Policy.MaxSizeBytes << " bytes\n");
30008bbd35aSDimitry Andric 
301e3b55780SDimitry Andric     size_t ActualSizes = 0;
302e3b55780SDimitry Andric     for (const auto &File : Files)
303e3b55780SDimitry Andric       if (File)
304e3b55780SDimitry Andric         ActualSizes += File->getBufferSize();
305e3b55780SDimitry Andric 
306e3b55780SDimitry Andric     if (ActualSizes > TotalSizeTarget)
307e3b55780SDimitry Andric       WithColor::warning()
308e3b55780SDimitry Andric           << "ThinLTO cache pruning happens since the total size of the cache "
309e3b55780SDimitry Andric              "files consumed by the current link job ("
310e3b55780SDimitry Andric           << ActualSizes << "  bytes) exceeds maximum cache size ("
311e3b55780SDimitry Andric           << TotalSizeTarget
312e3b55780SDimitry Andric           << " bytes); consider adjusting --thinlto-cache-policy\n";
313e3b55780SDimitry Andric 
314044eb2f6SDimitry Andric     // Remove the oldest accessed files first, till we get below the threshold.
315d8e91e46SDimitry Andric     while (TotalSize > TotalSizeTarget && FileInfo != FileInfos.end())
316044eb2f6SDimitry Andric       RemoveCacheFile();
31701095a5dSDimitry Andric   }
31801095a5dSDimitry Andric   return true;
31901095a5dSDimitry Andric }
320