101095a5dSDimitry Andric //===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===//
201095a5dSDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
601095a5dSDimitry Andric //
701095a5dSDimitry Andric //===----------------------------------------------------------------------===//
801095a5dSDimitry Andric //
901095a5dSDimitry Andric // This file implements the pruning of a directory based on least recently used.
1001095a5dSDimitry Andric //
1101095a5dSDimitry Andric //===----------------------------------------------------------------------===//
1201095a5dSDimitry Andric
1301095a5dSDimitry Andric #include "llvm/Support/CachePruning.h"
14cfca06d7SDimitry Andric #include "llvm/ADT/StringRef.h"
1501095a5dSDimitry Andric #include "llvm/Support/Debug.h"
1601095a5dSDimitry Andric #include "llvm/Support/Errc.h"
1771d5a254SDimitry Andric #include "llvm/Support/Error.h"
1801095a5dSDimitry Andric #include "llvm/Support/FileSystem.h"
1901095a5dSDimitry Andric #include "llvm/Support/Path.h"
20e3b55780SDimitry Andric #include "llvm/Support/WithColor.h"
2101095a5dSDimitry Andric #include "llvm/Support/raw_ostream.h"
2201095a5dSDimitry Andric
2301095a5dSDimitry Andric #define DEBUG_TYPE "cache-pruning"
2401095a5dSDimitry Andric
2501095a5dSDimitry Andric #include <set>
26b915e9e0SDimitry Andric #include <system_error>
2701095a5dSDimitry Andric
2801095a5dSDimitry Andric using namespace llvm;
2901095a5dSDimitry Andric
30d8e91e46SDimitry Andric namespace {
31d8e91e46SDimitry Andric struct FileInfo {
32d8e91e46SDimitry Andric sys::TimePoint<> Time;
33d8e91e46SDimitry Andric uint64_t Size;
34d8e91e46SDimitry Andric std::string Path;
35d8e91e46SDimitry Andric
36d8e91e46SDimitry Andric /// Used to determine which files to prune first. Also used to determine
37d8e91e46SDimitry Andric /// set membership, so must take into account all fields.
operator <__anon176891080111::FileInfo38d8e91e46SDimitry Andric bool operator<(const FileInfo &Other) const {
39e6d15924SDimitry Andric return std::tie(Time, Other.Size, Path) <
40e6d15924SDimitry Andric std::tie(Other.Time, Size, Other.Path);
41d8e91e46SDimitry Andric }
42d8e91e46SDimitry Andric };
43d8e91e46SDimitry Andric } // anonymous namespace
44d8e91e46SDimitry Andric
4501095a5dSDimitry Andric /// Write a new timestamp file with the given path. This is used for the pruning
4601095a5dSDimitry Andric /// interval option.
writeTimestampFile(StringRef TimestampFile)4701095a5dSDimitry Andric static void writeTimestampFile(StringRef TimestampFile) {
4801095a5dSDimitry Andric std::error_code EC;
491d5ae102SDimitry Andric raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::OF_None);
5001095a5dSDimitry Andric }
5101095a5dSDimitry Andric
parseDuration(StringRef Duration)5271d5a254SDimitry Andric static Expected<std::chrono::seconds> parseDuration(StringRef Duration) {
5371d5a254SDimitry Andric if (Duration.empty())
5471d5a254SDimitry Andric return make_error<StringError>("Duration must not be empty",
5571d5a254SDimitry Andric inconvertibleErrorCode());
5671d5a254SDimitry Andric
5771d5a254SDimitry Andric StringRef NumStr = Duration.slice(0, Duration.size()-1);
5871d5a254SDimitry Andric uint64_t Num;
5971d5a254SDimitry Andric if (NumStr.getAsInteger(0, Num))
6071d5a254SDimitry Andric return make_error<StringError>("'" + NumStr + "' not an integer",
6171d5a254SDimitry Andric inconvertibleErrorCode());
6271d5a254SDimitry Andric
6371d5a254SDimitry Andric switch (Duration.back()) {
6471d5a254SDimitry Andric case 's':
6571d5a254SDimitry Andric return std::chrono::seconds(Num);
6671d5a254SDimitry Andric case 'm':
6771d5a254SDimitry Andric return std::chrono::minutes(Num);
6871d5a254SDimitry Andric case 'h':
6971d5a254SDimitry Andric return std::chrono::hours(Num);
7071d5a254SDimitry Andric default:
7171d5a254SDimitry Andric return make_error<StringError>("'" + Duration +
7271d5a254SDimitry Andric "' must end with one of 's', 'm' or 'h'",
7371d5a254SDimitry Andric inconvertibleErrorCode());
7471d5a254SDimitry Andric }
7571d5a254SDimitry Andric }
7671d5a254SDimitry Andric
7771d5a254SDimitry Andric Expected<CachePruningPolicy>
parseCachePruningPolicy(StringRef PolicyStr)7871d5a254SDimitry Andric llvm::parseCachePruningPolicy(StringRef PolicyStr) {
7971d5a254SDimitry Andric CachePruningPolicy Policy;
8071d5a254SDimitry Andric std::pair<StringRef, StringRef> P = {"", PolicyStr};
8171d5a254SDimitry Andric while (!P.second.empty()) {
8271d5a254SDimitry Andric P = P.second.split(':');
8371d5a254SDimitry Andric
8471d5a254SDimitry Andric StringRef Key, Value;
8571d5a254SDimitry Andric std::tie(Key, Value) = P.first.split('=');
8671d5a254SDimitry Andric if (Key == "prune_interval") {
8771d5a254SDimitry Andric auto DurationOrErr = parseDuration(Value);
8871d5a254SDimitry Andric if (!DurationOrErr)
8971d5a254SDimitry Andric return DurationOrErr.takeError();
9071d5a254SDimitry Andric Policy.Interval = *DurationOrErr;
9171d5a254SDimitry Andric } else if (Key == "prune_after") {
9271d5a254SDimitry Andric auto DurationOrErr = parseDuration(Value);
9371d5a254SDimitry Andric if (!DurationOrErr)
9471d5a254SDimitry Andric return DurationOrErr.takeError();
9571d5a254SDimitry Andric Policy.Expiration = *DurationOrErr;
9671d5a254SDimitry Andric } else if (Key == "cache_size") {
9771d5a254SDimitry Andric if (Value.back() != '%')
9871d5a254SDimitry Andric return make_error<StringError>("'" + Value + "' must be a percentage",
9971d5a254SDimitry Andric inconvertibleErrorCode());
10008bbd35aSDimitry Andric StringRef SizeStr = Value.drop_back();
10171d5a254SDimitry Andric uint64_t Size;
10271d5a254SDimitry Andric if (SizeStr.getAsInteger(0, Size))
10371d5a254SDimitry Andric return make_error<StringError>("'" + SizeStr + "' not an integer",
10471d5a254SDimitry Andric inconvertibleErrorCode());
10571d5a254SDimitry Andric if (Size > 100)
10671d5a254SDimitry Andric return make_error<StringError>("'" + SizeStr +
10771d5a254SDimitry Andric "' must be between 0 and 100",
10871d5a254SDimitry Andric inconvertibleErrorCode());
10908bbd35aSDimitry Andric Policy.MaxSizePercentageOfAvailableSpace = Size;
11008bbd35aSDimitry Andric } else if (Key == "cache_size_bytes") {
11108bbd35aSDimitry Andric uint64_t Mult = 1;
11208bbd35aSDimitry Andric switch (tolower(Value.back())) {
11308bbd35aSDimitry Andric case 'k':
11408bbd35aSDimitry Andric Mult = 1024;
11508bbd35aSDimitry Andric Value = Value.drop_back();
11608bbd35aSDimitry Andric break;
11708bbd35aSDimitry Andric case 'm':
11808bbd35aSDimitry Andric Mult = 1024 * 1024;
11908bbd35aSDimitry Andric Value = Value.drop_back();
12008bbd35aSDimitry Andric break;
12108bbd35aSDimitry Andric case 'g':
12208bbd35aSDimitry Andric Mult = 1024 * 1024 * 1024;
12308bbd35aSDimitry Andric Value = Value.drop_back();
12408bbd35aSDimitry Andric break;
12508bbd35aSDimitry Andric }
12608bbd35aSDimitry Andric uint64_t Size;
12708bbd35aSDimitry Andric if (Value.getAsInteger(0, Size))
12808bbd35aSDimitry Andric return make_error<StringError>("'" + Value + "' not an integer",
12908bbd35aSDimitry Andric inconvertibleErrorCode());
13008bbd35aSDimitry Andric Policy.MaxSizeBytes = Size * Mult;
131044eb2f6SDimitry Andric } else if (Key == "cache_size_files") {
132044eb2f6SDimitry Andric if (Value.getAsInteger(0, Policy.MaxSizeFiles))
133044eb2f6SDimitry Andric return make_error<StringError>("'" + Value + "' not an integer",
134044eb2f6SDimitry Andric inconvertibleErrorCode());
13571d5a254SDimitry Andric } else {
13671d5a254SDimitry Andric return make_error<StringError>("Unknown key: '" + Key + "'",
13771d5a254SDimitry Andric inconvertibleErrorCode());
13871d5a254SDimitry Andric }
13971d5a254SDimitry Andric }
14071d5a254SDimitry Andric
14171d5a254SDimitry Andric return Policy;
14271d5a254SDimitry Andric }
14371d5a254SDimitry Andric
14401095a5dSDimitry Andric /// Prune the cache of files that haven't been accessed in a long time.
pruneCache(StringRef Path,CachePruningPolicy Policy,const std::vector<std::unique_ptr<MemoryBuffer>> & Files)145e3b55780SDimitry Andric bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy,
146e3b55780SDimitry Andric const std::vector<std::unique_ptr<MemoryBuffer>> &Files) {
147b915e9e0SDimitry Andric using namespace std::chrono;
148b915e9e0SDimitry Andric
14901095a5dSDimitry Andric if (Path.empty())
15001095a5dSDimitry Andric return false;
15101095a5dSDimitry Andric
15201095a5dSDimitry Andric bool isPathDir;
15301095a5dSDimitry Andric if (sys::fs::is_directory(Path, isPathDir))
15401095a5dSDimitry Andric return false;
15501095a5dSDimitry Andric
15601095a5dSDimitry Andric if (!isPathDir)
15701095a5dSDimitry Andric return false;
15801095a5dSDimitry Andric
15908bbd35aSDimitry Andric Policy.MaxSizePercentageOfAvailableSpace =
16008bbd35aSDimitry Andric std::min(Policy.MaxSizePercentageOfAvailableSpace, 100u);
16171d5a254SDimitry Andric
16271d5a254SDimitry Andric if (Policy.Expiration == seconds(0) &&
16308bbd35aSDimitry Andric Policy.MaxSizePercentageOfAvailableSpace == 0 &&
164044eb2f6SDimitry Andric Policy.MaxSizeBytes == 0 && Policy.MaxSizeFiles == 0) {
165eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "No pruning settings set, exit early\n");
16601095a5dSDimitry Andric // Nothing will be pruned, early exit
16701095a5dSDimitry Andric return false;
16801095a5dSDimitry Andric }
16901095a5dSDimitry Andric
17001095a5dSDimitry Andric // Try to stat() the timestamp file.
17101095a5dSDimitry Andric SmallString<128> TimestampFile(Path);
17201095a5dSDimitry Andric sys::path::append(TimestampFile, "llvmcache.timestamp");
17301095a5dSDimitry Andric sys::fs::file_status FileStatus;
174b915e9e0SDimitry Andric const auto CurrentTime = system_clock::now();
17501095a5dSDimitry Andric if (auto EC = sys::fs::status(TimestampFile, FileStatus)) {
17601095a5dSDimitry Andric if (EC == errc::no_such_file_or_directory) {
17701095a5dSDimitry Andric // If the timestamp file wasn't there, create one now.
17801095a5dSDimitry Andric writeTimestampFile(TimestampFile);
17901095a5dSDimitry Andric } else {
18001095a5dSDimitry Andric // Unknown error?
18101095a5dSDimitry Andric return false;
18201095a5dSDimitry Andric }
18301095a5dSDimitry Andric } else {
184c7dac04cSDimitry Andric if (!Policy.Interval)
185c7dac04cSDimitry Andric return false;
186044eb2f6SDimitry Andric if (Policy.Interval != seconds(0)) {
18701095a5dSDimitry Andric // Check whether the time stamp is older than our pruning interval.
18801095a5dSDimitry Andric // If not, do nothing.
189b915e9e0SDimitry Andric const auto TimeStampModTime = FileStatus.getLastModificationTime();
19001095a5dSDimitry Andric auto TimeStampAge = CurrentTime - TimeStampModTime;
191c7dac04cSDimitry Andric if (TimeStampAge <= *Policy.Interval) {
192eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "Timestamp file too recent ("
193b915e9e0SDimitry Andric << duration_cast<seconds>(TimeStampAge).count()
19401095a5dSDimitry Andric << "s old), do not prune.\n");
19501095a5dSDimitry Andric return false;
19601095a5dSDimitry Andric }
19701095a5dSDimitry Andric }
19801095a5dSDimitry Andric // Write a new timestamp file so that nobody else attempts to prune.
19901095a5dSDimitry Andric // There is a benign race condition here, if two processes happen to
20001095a5dSDimitry Andric // notice at the same time that the timestamp is out-of-date.
20101095a5dSDimitry Andric writeTimestampFile(TimestampFile);
20201095a5dSDimitry Andric }
20301095a5dSDimitry Andric
204d8e91e46SDimitry Andric // Keep track of files to delete to get below the size limit.
205d8e91e46SDimitry Andric // Order by time of last use so that recently used files are preserved.
206d8e91e46SDimitry Andric std::set<FileInfo> FileInfos;
20701095a5dSDimitry Andric uint64_t TotalSize = 0;
20801095a5dSDimitry Andric
20901095a5dSDimitry Andric // Walk the entire directory cache, looking for unused files.
21001095a5dSDimitry Andric std::error_code EC;
21101095a5dSDimitry Andric SmallString<128> CachePathNative;
21201095a5dSDimitry Andric sys::path::native(Path, CachePathNative);
21301095a5dSDimitry Andric // Walk all of the files within this directory.
21401095a5dSDimitry Andric for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd;
21501095a5dSDimitry Andric File != FileEnd && !EC; File.increment(EC)) {
216b60736ecSDimitry Andric // Ignore filenames not beginning with "llvmcache-" or "Thin-". This
21771d5a254SDimitry Andric // includes the timestamp file as well as any files created by the user.
21871d5a254SDimitry Andric // This acts as a safeguard against data loss if the user specifies the
21971d5a254SDimitry Andric // wrong directory as their cache directory.
220b60736ecSDimitry Andric StringRef filename = sys::path::filename(File->path());
221b1c73532SDimitry Andric if (!filename.starts_with("llvmcache-") && !filename.starts_with("Thin-"))
22201095a5dSDimitry Andric continue;
22301095a5dSDimitry Andric
22401095a5dSDimitry Andric // Look at this file. If we can't stat it, there's nothing interesting
22501095a5dSDimitry Andric // there.
226044eb2f6SDimitry Andric ErrorOr<sys::fs::basic_file_status> StatusOrErr = File->status();
227044eb2f6SDimitry Andric if (!StatusOrErr) {
228eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n");
22901095a5dSDimitry Andric continue;
23001095a5dSDimitry Andric }
23101095a5dSDimitry Andric
23201095a5dSDimitry Andric // If the file hasn't been used recently enough, delete it
233044eb2f6SDimitry Andric const auto FileAccessTime = StatusOrErr->getLastAccessedTime();
23401095a5dSDimitry Andric auto FileAge = CurrentTime - FileAccessTime;
235044eb2f6SDimitry Andric if (Policy.Expiration != seconds(0) && FileAge > Policy.Expiration) {
236eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "Remove " << File->path() << " ("
237eb11fae6SDimitry Andric << duration_cast<seconds>(FileAge).count()
238eb11fae6SDimitry Andric << "s old)\n");
23901095a5dSDimitry Andric sys::fs::remove(File->path());
24001095a5dSDimitry Andric continue;
24101095a5dSDimitry Andric }
24201095a5dSDimitry Andric
24301095a5dSDimitry Andric // Leave it here for now, but add it to the list of size-based pruning.
244044eb2f6SDimitry Andric TotalSize += StatusOrErr->getSize();
245d8e91e46SDimitry Andric FileInfos.insert({FileAccessTime, StatusOrErr->getSize(), File->path()});
24601095a5dSDimitry Andric }
24701095a5dSDimitry Andric
248d8e91e46SDimitry Andric auto FileInfo = FileInfos.begin();
249d8e91e46SDimitry Andric size_t NumFiles = FileInfos.size();
250044eb2f6SDimitry Andric
251044eb2f6SDimitry Andric auto RemoveCacheFile = [&]() {
252044eb2f6SDimitry Andric // Remove the file.
253d8e91e46SDimitry Andric sys::fs::remove(FileInfo->Path);
254044eb2f6SDimitry Andric // Update size
255d8e91e46SDimitry Andric TotalSize -= FileInfo->Size;
256044eb2f6SDimitry Andric NumFiles--;
257d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " - Remove " << FileInfo->Path << " (size "
258d8e91e46SDimitry Andric << FileInfo->Size << "), new occupancy is " << TotalSize
259d8e91e46SDimitry Andric << "%\n");
260d8e91e46SDimitry Andric ++FileInfo;
261044eb2f6SDimitry Andric };
262044eb2f6SDimitry Andric
263e3b55780SDimitry Andric // files.size() is greater the number of inputs by one. However, a timestamp
264e3b55780SDimitry Andric // file is created and stored in the cache directory if --thinlto-cache-policy
265e3b55780SDimitry Andric // option is used. Therefore, files.size() is used as ActualNums.
266e3b55780SDimitry Andric const size_t ActualNums = Files.size();
267e3b55780SDimitry Andric if (Policy.MaxSizeFiles && ActualNums > Policy.MaxSizeFiles)
268e3b55780SDimitry Andric WithColor::warning()
269e3b55780SDimitry Andric << "ThinLTO cache pruning happens since the number of created files ("
270e3b55780SDimitry Andric << ActualNums << ") exceeds the maximum number of files ("
271e3b55780SDimitry Andric << Policy.MaxSizeFiles
272e3b55780SDimitry Andric << "); consider adjusting --thinlto-cache-policy\n";
273e3b55780SDimitry Andric
274044eb2f6SDimitry Andric // Prune for number of files.
275044eb2f6SDimitry Andric if (Policy.MaxSizeFiles)
276044eb2f6SDimitry Andric while (NumFiles > Policy.MaxSizeFiles)
277044eb2f6SDimitry Andric RemoveCacheFile();
278044eb2f6SDimitry Andric
27901095a5dSDimitry Andric // Prune for size now if needed
280044eb2f6SDimitry Andric if (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0) {
28101095a5dSDimitry Andric auto ErrOrSpaceInfo = sys::fs::disk_space(Path);
28201095a5dSDimitry Andric if (!ErrOrSpaceInfo) {
28301095a5dSDimitry Andric report_fatal_error("Can't get available size");
28401095a5dSDimitry Andric }
28501095a5dSDimitry Andric sys::fs::space_info SpaceInfo = ErrOrSpaceInfo.get();
28601095a5dSDimitry Andric auto AvailableSpace = TotalSize + SpaceInfo.free;
28708bbd35aSDimitry Andric
28808bbd35aSDimitry Andric if (Policy.MaxSizePercentageOfAvailableSpace == 0)
28908bbd35aSDimitry Andric Policy.MaxSizePercentageOfAvailableSpace = 100;
29008bbd35aSDimitry Andric if (Policy.MaxSizeBytes == 0)
29108bbd35aSDimitry Andric Policy.MaxSizeBytes = AvailableSpace;
29208bbd35aSDimitry Andric auto TotalSizeTarget = std::min<uint64_t>(
29308bbd35aSDimitry Andric AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull,
29408bbd35aSDimitry Andric Policy.MaxSizeBytes);
29508bbd35aSDimitry Andric
296eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace)
297eb11fae6SDimitry Andric << "% target is: "
298eb11fae6SDimitry Andric << Policy.MaxSizePercentageOfAvailableSpace << "%, "
299eb11fae6SDimitry Andric << Policy.MaxSizeBytes << " bytes\n");
30008bbd35aSDimitry Andric
301e3b55780SDimitry Andric size_t ActualSizes = 0;
302e3b55780SDimitry Andric for (const auto &File : Files)
303e3b55780SDimitry Andric if (File)
304e3b55780SDimitry Andric ActualSizes += File->getBufferSize();
305e3b55780SDimitry Andric
306e3b55780SDimitry Andric if (ActualSizes > TotalSizeTarget)
307e3b55780SDimitry Andric WithColor::warning()
308e3b55780SDimitry Andric << "ThinLTO cache pruning happens since the total size of the cache "
309e3b55780SDimitry Andric "files consumed by the current link job ("
310e3b55780SDimitry Andric << ActualSizes << " bytes) exceeds maximum cache size ("
311e3b55780SDimitry Andric << TotalSizeTarget
312e3b55780SDimitry Andric << " bytes); consider adjusting --thinlto-cache-policy\n";
313e3b55780SDimitry Andric
314044eb2f6SDimitry Andric // Remove the oldest accessed files first, till we get below the threshold.
315d8e91e46SDimitry Andric while (TotalSize > TotalSizeTarget && FileInfo != FileInfos.end())
316044eb2f6SDimitry Andric RemoveCacheFile();
31701095a5dSDimitry Andric }
31801095a5dSDimitry Andric return true;
31901095a5dSDimitry Andric }
320