1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2 /*
3  * Copyright (c) Meta Platforms, Inc. and affiliates.
4  * All rights reserved.
5  *
6  * This source code is licensed under both the BSD-style license (found in the
7  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8  * in the COPYING file in the root directory of this source tree).
9  * You may select, at your option, one of the above-listed licenses.
10  */
11 
12  /*-*************************************
13  *  Dependencies
14  ***************************************/
15 #include "zstd_compress_literals.h"
16 
17 
18 /* **************************************************************
19 *  Debug Traces
20 ****************************************************************/
21 #if DEBUGLEVEL >= 2
22 
23 static size_t showHexa(const void* src, size_t srcSize)
24 {
25     const BYTE* const ip = (const BYTE*)src;
26     size_t u;
27     for (u=0; u<srcSize; u++) {
28         RAWLOG(5, " %02X", ip[u]); (void)ip;
29     }
30     RAWLOG(5, " \n");
31     return srcSize;
32 }
33 
34 #endif
35 
36 
37 /* **************************************************************
38 *  Literals compression - special cases
39 ****************************************************************/
40 size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
41 {
42     BYTE* const ostart = (BYTE*)dst;
43     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
44 
45     DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
46 
47     RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
48 
49     switch(flSize)
50     {
51         case 1: /* 2 - 1 - 5 */
52             ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
53             break;
54         case 2: /* 2 - 2 - 12 */
55             MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
56             break;
57         case 3: /* 2 - 2 - 20 */
58             MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
59             break;
60         default:   /* not necessary : flSize is {1,2,3} */
61             assert(0);
62     }
63 
64     ZSTD_memcpy(ostart + flSize, src, srcSize);
65     DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
66     return srcSize + flSize;
67 }
68 
69 static int allBytesIdentical(const void* src, size_t srcSize)
70 {
71     assert(srcSize >= 1);
72     assert(src != NULL);
73     {   const BYTE b = ((const BYTE*)src)[0];
74         size_t p;
75         for (p=1; p<srcSize; p++) {
76             if (((const BYTE*)src)[p] != b) return 0;
77         }
78         return 1;
79     }
80 }
81 
82 size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
83 {
84     BYTE* const ostart = (BYTE*)dst;
85     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
86 
87     assert(dstCapacity >= 4); (void)dstCapacity;
88     assert(allBytesIdentical(src, srcSize));
89 
90     switch(flSize)
91     {
92         case 1: /* 2 - 1 - 5 */
93             ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
94             break;
95         case 2: /* 2 - 2 - 12 */
96             MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
97             break;
98         case 3: /* 2 - 2 - 20 */
99             MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
100             break;
101         default:   /* not necessary : flSize is {1,2,3} */
102             assert(0);
103     }
104 
105     ostart[flSize] = *(const BYTE*)src;
106     DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
107     return flSize+1;
108 }
109 
110 /* ZSTD_minLiteralsToCompress() :
111  * returns minimal amount of literals
112  * for literal compression to even be attempted.
113  * Minimum is made tighter as compression strategy increases.
114  */
115 static size_t
116 ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
117 {
118     assert((int)strategy >= 0);
119     assert((int)strategy <= 9);
120     /* btultra2 : min 8 bytes;
121      * then 2x larger for each successive compression strategy
122      * max threshold 64 bytes */
123     {   int const shift = MIN(9-(int)strategy, 3);
124         size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
125         DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
126         return mintc;
127     }
128 }
129 
130 size_t ZSTD_compressLiterals (
131                   void* dst, size_t dstCapacity,
132             const void* src, size_t srcSize,
133                   void* entropyWorkspace, size_t entropyWorkspaceSize,
134             const ZSTD_hufCTables_t* prevHuf,
135                   ZSTD_hufCTables_t* nextHuf,
136                   ZSTD_strategy strategy,
137                   int disableLiteralCompression,
138                   int suspectUncompressible,
139                   int bmi2)
140 {
141     size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
142     BYTE*  const ostart = (BYTE*)dst;
143     U32 singleStream = srcSize < 256;
144     SymbolEncodingType_e hType = set_compressed;
145     size_t cLitSize;
146 
147     DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
148                 disableLiteralCompression, (U32)srcSize, dstCapacity);
149 
150     DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
151 
152     /* Prepare nextEntropy assuming reusing the existing table */
153     ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
154 
155     if (disableLiteralCompression)
156         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
157 
158     /* if too small, don't even attempt compression (speed opt) */
159     if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
160         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
161 
162     RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
163     {   HUF_repeat repeat = prevHuf->repeatMode;
164         int const flags = 0
165             | (bmi2 ? HUF_flags_bmi2 : 0)
166             | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
167             | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
168             | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
169 
170         typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
171         huf_compress_f huf_compress;
172         if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
173         huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
174         cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
175                                 src, srcSize,
176                                 HUF_SYMBOLVALUE_MAX, LitHufLog,
177                                 entropyWorkspace, entropyWorkspaceSize,
178                                 (HUF_CElt*)nextHuf->CTable,
179                                 &repeat, flags);
180         DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
181         if (repeat != HUF_repeat_none) {
182             /* reused the existing table */
183             DEBUGLOG(5, "reusing statistics from previous huffman block");
184             hType = set_repeat;
185         }
186     }
187 
188     {   size_t const minGain = ZSTD_minGain(srcSize, strategy);
189         if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
190             ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
191             return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
192     }   }
193     if (cLitSize==1) {
194         /* A return value of 1 signals that the alphabet consists of a single symbol.
195          * However, in some rare circumstances, it could be the compressed size (a single byte).
196          * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
197          * (it's also necessary to not generate statistics).
198          * Therefore, in such a case, actively check that all bytes are identical. */
199         if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
200             ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
201             return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
202     }   }
203 
204     if (hType == set_compressed) {
205         /* using a newly constructed table */
206         nextHuf->repeatMode = HUF_repeat_check;
207     }
208 
209     /* Build header */
210     switch(lhSize)
211     {
212     case 3: /* 2 - 2 - 10 - 10 */
213         if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
214         {   U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
215             MEM_writeLE24(ostart, lhc);
216             break;
217         }
218     case 4: /* 2 - 2 - 14 - 14 */
219         assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
220         {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
221             MEM_writeLE32(ostart, lhc);
222             break;
223         }
224     case 5: /* 2 - 2 - 18 - 18 */
225         assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
226         {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
227             MEM_writeLE32(ostart, lhc);
228             ostart[4] = (BYTE)(cLitSize >> 10);
229             break;
230         }
231     default:  /* not possible : lhSize is {3,4,5} */
232         assert(0);
233     }
234     DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
235     return lhSize+cLitSize;
236 }
237