xref: /src/contrib/llvm-project/llvm/lib/Support/Compression.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
159d6cff9SDimitry Andric //===--- Compression.cpp - Compression implementation ---------------------===//
259d6cff9SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
659d6cff9SDimitry Andric //
759d6cff9SDimitry Andric //===----------------------------------------------------------------------===//
859d6cff9SDimitry Andric //
959d6cff9SDimitry Andric //  This file implements compression functions.
1059d6cff9SDimitry Andric //
1159d6cff9SDimitry Andric //===----------------------------------------------------------------------===//
1259d6cff9SDimitry Andric 
1359d6cff9SDimitry Andric #include "llvm/Support/Compression.h"
145a5ac124SDimitry Andric #include "llvm/ADT/SmallVector.h"
1559d6cff9SDimitry Andric #include "llvm/ADT/StringRef.h"
1659d6cff9SDimitry Andric #include "llvm/Config/config.h"
1759d6cff9SDimitry Andric #include "llvm/Support/Compiler.h"
1871d5a254SDimitry Andric #include "llvm/Support/Error.h"
1959d6cff9SDimitry Andric #include "llvm/Support/ErrorHandling.h"
20b60736ecSDimitry Andric #if LLVM_ENABLE_ZLIB
2159d6cff9SDimitry Andric #include <zlib.h>
2259d6cff9SDimitry Andric #endif
234b4fe385SDimitry Andric #if LLVM_ENABLE_ZSTD
244b4fe385SDimitry Andric #include <zstd.h>
254b4fe385SDimitry Andric #endif
2659d6cff9SDimitry Andric 
2759d6cff9SDimitry Andric using namespace llvm;
281f917f69SDimitry Andric using namespace llvm::compression;
2959d6cff9SDimitry Andric 
getReasonIfUnsupported(compression::Format F)30e3b55780SDimitry Andric const char *compression::getReasonIfUnsupported(compression::Format F) {
31e3b55780SDimitry Andric   switch (F) {
32e3b55780SDimitry Andric   case compression::Format::Zlib:
33e3b55780SDimitry Andric     if (zlib::isAvailable())
34e3b55780SDimitry Andric       return nullptr;
35e3b55780SDimitry Andric     return "LLVM was not built with LLVM_ENABLE_ZLIB or did not find zlib at "
36e3b55780SDimitry Andric            "build time";
37e3b55780SDimitry Andric   case compression::Format::Zstd:
38e3b55780SDimitry Andric     if (zstd::isAvailable())
39e3b55780SDimitry Andric       return nullptr;
40e3b55780SDimitry Andric     return "LLVM was not built with LLVM_ENABLE_ZSTD or did not find zstd at "
41e3b55780SDimitry Andric            "build time";
42e3b55780SDimitry Andric   }
43e3b55780SDimitry Andric   llvm_unreachable("");
44e3b55780SDimitry Andric }
45e3b55780SDimitry Andric 
compress(Params P,ArrayRef<uint8_t> Input,SmallVectorImpl<uint8_t> & Output)46e3b55780SDimitry Andric void compression::compress(Params P, ArrayRef<uint8_t> Input,
47e3b55780SDimitry Andric                            SmallVectorImpl<uint8_t> &Output) {
48e3b55780SDimitry Andric   switch (P.format) {
49e3b55780SDimitry Andric   case compression::Format::Zlib:
50e3b55780SDimitry Andric     zlib::compress(Input, Output, P.level);
51e3b55780SDimitry Andric     break;
52e3b55780SDimitry Andric   case compression::Format::Zstd:
53ac9a064cSDimitry Andric     zstd::compress(Input, Output, P.level, P.zstdEnableLdm);
54e3b55780SDimitry Andric     break;
55e3b55780SDimitry Andric   }
56e3b55780SDimitry Andric }
57e3b55780SDimitry Andric 
decompress(DebugCompressionType T,ArrayRef<uint8_t> Input,uint8_t * Output,size_t UncompressedSize)58e3b55780SDimitry Andric Error compression::decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
59e3b55780SDimitry Andric                               uint8_t *Output, size_t UncompressedSize) {
60e3b55780SDimitry Andric   switch (formatFor(T)) {
61e3b55780SDimitry Andric   case compression::Format::Zlib:
62e3b55780SDimitry Andric     return zlib::decompress(Input, Output, UncompressedSize);
63e3b55780SDimitry Andric   case compression::Format::Zstd:
64e3b55780SDimitry Andric     return zstd::decompress(Input, Output, UncompressedSize);
65e3b55780SDimitry Andric   }
66e3b55780SDimitry Andric   llvm_unreachable("");
67e3b55780SDimitry Andric }
68e3b55780SDimitry Andric 
decompress(compression::Format F,ArrayRef<uint8_t> Input,SmallVectorImpl<uint8_t> & Output,size_t UncompressedSize)69e3b55780SDimitry Andric Error compression::decompress(compression::Format F, ArrayRef<uint8_t> Input,
70e3b55780SDimitry Andric                               SmallVectorImpl<uint8_t> &Output,
71e3b55780SDimitry Andric                               size_t UncompressedSize) {
72e3b55780SDimitry Andric   switch (F) {
73e3b55780SDimitry Andric   case compression::Format::Zlib:
74e3b55780SDimitry Andric     return zlib::decompress(Input, Output, UncompressedSize);
75e3b55780SDimitry Andric   case compression::Format::Zstd:
76e3b55780SDimitry Andric     return zstd::decompress(Input, Output, UncompressedSize);
77e3b55780SDimitry Andric   }
78e3b55780SDimitry Andric   llvm_unreachable("");
79e3b55780SDimitry Andric }
80e3b55780SDimitry Andric 
decompress(DebugCompressionType T,ArrayRef<uint8_t> Input,SmallVectorImpl<uint8_t> & Output,size_t UncompressedSize)81e3b55780SDimitry Andric Error compression::decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
82e3b55780SDimitry Andric                               SmallVectorImpl<uint8_t> &Output,
83e3b55780SDimitry Andric                               size_t UncompressedSize) {
84e3b55780SDimitry Andric   return decompress(formatFor(T), Input, Output, UncompressedSize);
85e3b55780SDimitry Andric }
86e3b55780SDimitry Andric 
87b60736ecSDimitry Andric #if LLVM_ENABLE_ZLIB
8871d5a254SDimitry Andric 
convertZlibCodeToString(int Code)8971d5a254SDimitry Andric static StringRef convertZlibCodeToString(int Code) {
9071d5a254SDimitry Andric   switch (Code) {
9171d5a254SDimitry Andric   case Z_MEM_ERROR:
9271d5a254SDimitry Andric     return "zlib error: Z_MEM_ERROR";
9371d5a254SDimitry Andric   case Z_BUF_ERROR:
9471d5a254SDimitry Andric     return "zlib error: Z_BUF_ERROR";
9571d5a254SDimitry Andric   case Z_STREAM_ERROR:
9671d5a254SDimitry Andric     return "zlib error: Z_STREAM_ERROR";
9771d5a254SDimitry Andric   case Z_DATA_ERROR:
9871d5a254SDimitry Andric     return "zlib error: Z_DATA_ERROR";
9971d5a254SDimitry Andric   case Z_OK:
10071d5a254SDimitry Andric   default:
10171d5a254SDimitry Andric     llvm_unreachable("unknown or unexpected zlib status code");
10259d6cff9SDimitry Andric   }
10359d6cff9SDimitry Andric }
10459d6cff9SDimitry Andric 
isAvailable()10559d6cff9SDimitry Andric bool zlib::isAvailable() { return true; }
10671d5a254SDimitry Andric 
compress(ArrayRef<uint8_t> Input,SmallVectorImpl<uint8_t> & CompressedBuffer,int Level)1071f917f69SDimitry Andric void zlib::compress(ArrayRef<uint8_t> Input,
1081f917f69SDimitry Andric                     SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
1091f917f69SDimitry Andric   unsigned long CompressedSize = ::compressBound(Input.size());
11077fc4c14SDimitry Andric   CompressedBuffer.resize_for_overwrite(CompressedSize);
1111f917f69SDimitry Andric   int Res = ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize,
1121f917f69SDimitry Andric                         (const Bytef *)Input.data(), Input.size(), Level);
113145449b1SDimitry Andric   if (Res == Z_MEM_ERROR)
114145449b1SDimitry Andric     report_bad_alloc_error("Allocation failed");
115145449b1SDimitry Andric   assert(Res == Z_OK);
11667c32a98SDimitry Andric   // Tell MemorySanitizer that zlib output buffer is fully initialized.
11767c32a98SDimitry Andric   // This avoids a false report when running LLVM with uninstrumented ZLib.
11867c32a98SDimitry Andric   __msan_unpoison(CompressedBuffer.data(), CompressedSize);
1191f917f69SDimitry Andric   if (CompressedSize < CompressedBuffer.size())
12077fc4c14SDimitry Andric     CompressedBuffer.truncate(CompressedSize);
12159d6cff9SDimitry Andric }
12259d6cff9SDimitry Andric 
decompress(ArrayRef<uint8_t> Input,uint8_t * Output,size_t & UncompressedSize)123e3b55780SDimitry Andric Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
124b915e9e0SDimitry Andric                        size_t &UncompressedSize) {
125e3b55780SDimitry Andric   int Res = ::uncompress((Bytef *)Output, (uLongf *)&UncompressedSize,
1261f917f69SDimitry Andric                          (const Bytef *)Input.data(), Input.size());
127b915e9e0SDimitry Andric   // Tell MemorySanitizer that zlib output buffer is fully initialized.
128b915e9e0SDimitry Andric   // This avoids a false report when running LLVM with uninstrumented ZLib.
129e3b55780SDimitry Andric   __msan_unpoison(Output, UncompressedSize);
1301f917f69SDimitry Andric   return Res ? make_error<StringError>(convertZlibCodeToString(Res),
1311f917f69SDimitry Andric                                        inconvertibleErrorCode())
1321f917f69SDimitry Andric              : Error::success();
133b915e9e0SDimitry Andric }
134b915e9e0SDimitry Andric 
decompress(ArrayRef<uint8_t> Input,SmallVectorImpl<uint8_t> & Output,size_t UncompressedSize)135e3b55780SDimitry Andric Error zlib::decompress(ArrayRef<uint8_t> Input,
136e3b55780SDimitry Andric                        SmallVectorImpl<uint8_t> &Output,
13759d6cff9SDimitry Andric                        size_t UncompressedSize) {
138e3b55780SDimitry Andric   Output.resize_for_overwrite(UncompressedSize);
139e3b55780SDimitry Andric   Error E = zlib::decompress(Input, Output.data(), UncompressedSize);
140e3b55780SDimitry Andric   if (UncompressedSize < Output.size())
141e3b55780SDimitry Andric     Output.truncate(UncompressedSize);
14271d5a254SDimitry Andric   return E;
14359d6cff9SDimitry Andric }
14459d6cff9SDimitry Andric 
14559d6cff9SDimitry Andric #else
isAvailable()14659d6cff9SDimitry Andric bool zlib::isAvailable() { return false; }
compress(ArrayRef<uint8_t> Input,SmallVectorImpl<uint8_t> & CompressedBuffer,int Level)1471f917f69SDimitry Andric void zlib::compress(ArrayRef<uint8_t> Input,
1481f917f69SDimitry Andric                     SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
14971d5a254SDimitry Andric   llvm_unreachable("zlib::compress is unavailable");
15059d6cff9SDimitry Andric }
decompress(ArrayRef<uint8_t> Input,uint8_t * UncompressedBuffer,size_t & UncompressedSize)151e3b55780SDimitry Andric Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
152b915e9e0SDimitry Andric                        size_t &UncompressedSize) {
153e3b55780SDimitry Andric   llvm_unreachable("zlib::decompress is unavailable");
154b915e9e0SDimitry Andric }
decompress(ArrayRef<uint8_t> Input,SmallVectorImpl<uint8_t> & UncompressedBuffer,size_t UncompressedSize)155e3b55780SDimitry Andric Error zlib::decompress(ArrayRef<uint8_t> Input,
1561f917f69SDimitry Andric                        SmallVectorImpl<uint8_t> &UncompressedBuffer,
15759d6cff9SDimitry Andric                        size_t UncompressedSize) {
158e3b55780SDimitry Andric   llvm_unreachable("zlib::decompress is unavailable");
15959d6cff9SDimitry Andric }
16059d6cff9SDimitry Andric #endif
1614b4fe385SDimitry Andric 
1624b4fe385SDimitry Andric #if LLVM_ENABLE_ZSTD
1634b4fe385SDimitry Andric 
isAvailable()1644b4fe385SDimitry Andric bool zstd::isAvailable() { return true; }
1654b4fe385SDimitry Andric 
166ac9a064cSDimitry Andric #include <zstd.h> // Ensure ZSTD library is included
167ac9a064cSDimitry Andric 
compress(ArrayRef<uint8_t> Input,SmallVectorImpl<uint8_t> & CompressedBuffer,int Level,bool EnableLdm)1684b4fe385SDimitry Andric void zstd::compress(ArrayRef<uint8_t> Input,
169ac9a064cSDimitry Andric                     SmallVectorImpl<uint8_t> &CompressedBuffer, int Level,
170ac9a064cSDimitry Andric                     bool EnableLdm) {
171ac9a064cSDimitry Andric   ZSTD_CCtx *Cctx = ZSTD_createCCtx();
172ac9a064cSDimitry Andric   if (!Cctx)
173ac9a064cSDimitry Andric     report_bad_alloc_error("Failed to create ZSTD_CCtx");
174ac9a064cSDimitry Andric 
175ac9a064cSDimitry Andric   if (ZSTD_isError(ZSTD_CCtx_setParameter(
176ac9a064cSDimitry Andric           Cctx, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) {
177ac9a064cSDimitry Andric     ZSTD_freeCCtx(Cctx);
178ac9a064cSDimitry Andric     report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching");
179ac9a064cSDimitry Andric   }
180ac9a064cSDimitry Andric 
181ac9a064cSDimitry Andric   if (ZSTD_isError(
182ac9a064cSDimitry Andric           ZSTD_CCtx_setParameter(Cctx, ZSTD_c_compressionLevel, Level))) {
183ac9a064cSDimitry Andric     ZSTD_freeCCtx(Cctx);
184ac9a064cSDimitry Andric     report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel");
185ac9a064cSDimitry Andric   }
186ac9a064cSDimitry Andric 
187ac9a064cSDimitry Andric   unsigned long CompressedBufferSize = ZSTD_compressBound(Input.size());
1884b4fe385SDimitry Andric   CompressedBuffer.resize_for_overwrite(CompressedBufferSize);
189ac9a064cSDimitry Andric 
190ac9a064cSDimitry Andric   size_t const CompressedSize =
191ac9a064cSDimitry Andric       ZSTD_compress2(Cctx, CompressedBuffer.data(), CompressedBufferSize,
192ac9a064cSDimitry Andric                      Input.data(), Input.size());
193ac9a064cSDimitry Andric 
194ac9a064cSDimitry Andric   ZSTD_freeCCtx(Cctx);
195ac9a064cSDimitry Andric 
1964b4fe385SDimitry Andric   if (ZSTD_isError(CompressedSize))
197ac9a064cSDimitry Andric     report_bad_alloc_error("Compression failed");
198ac9a064cSDimitry Andric 
1994b4fe385SDimitry Andric   __msan_unpoison(CompressedBuffer.data(), CompressedSize);
2004b4fe385SDimitry Andric   if (CompressedSize < CompressedBuffer.size())
2014b4fe385SDimitry Andric     CompressedBuffer.truncate(CompressedSize);
2024b4fe385SDimitry Andric }
2034b4fe385SDimitry Andric 
decompress(ArrayRef<uint8_t> Input,uint8_t * Output,size_t & UncompressedSize)204e3b55780SDimitry Andric Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
2054b4fe385SDimitry Andric                        size_t &UncompressedSize) {
206e3b55780SDimitry Andric   const size_t Res = ::ZSTD_decompress(
207e3b55780SDimitry Andric       Output, UncompressedSize, (const uint8_t *)Input.data(), Input.size());
2084b4fe385SDimitry Andric   UncompressedSize = Res;
2094b4fe385SDimitry Andric   // Tell MemorySanitizer that zstd output buffer is fully initialized.
2104b4fe385SDimitry Andric   // This avoids a false report when running LLVM with uninstrumented ZLib.
211e3b55780SDimitry Andric   __msan_unpoison(Output, UncompressedSize);
2124b4fe385SDimitry Andric   return ZSTD_isError(Res) ? make_error<StringError>(ZSTD_getErrorName(Res),
2134b4fe385SDimitry Andric                                                      inconvertibleErrorCode())
2144b4fe385SDimitry Andric                            : Error::success();
2154b4fe385SDimitry Andric }
2164b4fe385SDimitry Andric 
decompress(ArrayRef<uint8_t> Input,SmallVectorImpl<uint8_t> & Output,size_t UncompressedSize)217e3b55780SDimitry Andric Error zstd::decompress(ArrayRef<uint8_t> Input,
218e3b55780SDimitry Andric                        SmallVectorImpl<uint8_t> &Output,
2194b4fe385SDimitry Andric                        size_t UncompressedSize) {
220e3b55780SDimitry Andric   Output.resize_for_overwrite(UncompressedSize);
221e3b55780SDimitry Andric   Error E = zstd::decompress(Input, Output.data(), UncompressedSize);
222e3b55780SDimitry Andric   if (UncompressedSize < Output.size())
223e3b55780SDimitry Andric     Output.truncate(UncompressedSize);
2244b4fe385SDimitry Andric   return E;
2254b4fe385SDimitry Andric }
2264b4fe385SDimitry Andric 
2274b4fe385SDimitry Andric #else
isAvailable()2284b4fe385SDimitry Andric bool zstd::isAvailable() { return false; }
compress(ArrayRef<uint8_t> Input,SmallVectorImpl<uint8_t> & CompressedBuffer,int Level,bool EnableLdm)2294b4fe385SDimitry Andric void zstd::compress(ArrayRef<uint8_t> Input,
230ac9a064cSDimitry Andric                     SmallVectorImpl<uint8_t> &CompressedBuffer, int Level,
231ac9a064cSDimitry Andric                     bool EnableLdm) {
2324b4fe385SDimitry Andric   llvm_unreachable("zstd::compress is unavailable");
2334b4fe385SDimitry Andric }
decompress(ArrayRef<uint8_t> Input,uint8_t * Output,size_t & UncompressedSize)234e3b55780SDimitry Andric Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
2354b4fe385SDimitry Andric                        size_t &UncompressedSize) {
236e3b55780SDimitry Andric   llvm_unreachable("zstd::decompress is unavailable");
2374b4fe385SDimitry Andric }
decompress(ArrayRef<uint8_t> Input,SmallVectorImpl<uint8_t> & Output,size_t UncompressedSize)238e3b55780SDimitry Andric Error zstd::decompress(ArrayRef<uint8_t> Input,
239e3b55780SDimitry Andric                        SmallVectorImpl<uint8_t> &Output,
2404b4fe385SDimitry Andric                        size_t UncompressedSize) {
241e3b55780SDimitry Andric   llvm_unreachable("zstd::decompress is unavailable");
2424b4fe385SDimitry Andric }
2434b4fe385SDimitry Andric #endif
244