xref: /src/sys/contrib/openzfs/module/zstd/lib/compress/zstd_compress_internal.h (revision 8a62a2a5659d1839d8799b4274c04469d7f17c78) !
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2 /*
3  * Copyright (c) Meta Platforms, Inc. and affiliates.
4  * All rights reserved.
5  *
6  * This source code is licensed under both the BSD-style license (found in the
7  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8  * in the COPYING file in the root directory of this source tree).
9  * You may select, at your option, one of the above-listed licenses.
10  */
11 
12 /* This header contains definitions
13  * that shall **only** be used by modules within lib/compress.
14  */
15 
16 #ifndef ZSTD_COMPRESS_H
17 #define ZSTD_COMPRESS_H
18 
19 /*-*************************************
20 *  Dependencies
21 ***************************************/
22 #include "../common/zstd_internal.h"
23 #include "zstd_cwksp.h"
24 #ifdef ZSTD_MULTITHREAD
25 #  include "zstdmt_compress.h"
26 #endif
27 #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
28 #include "zstd_preSplit.h" /* ZSTD_SLIPBLOCK_WORKSPACESIZE */
29 
30 /*-*************************************
31 *  Constants
32 ***************************************/
33 #define kSearchStrength      8
34 #define HASH_READ_SIZE       8
35 #define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
36                                        It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
37                                        It's not a big deal though : candidate will just be sorted again.
38                                        Additionally, candidate position 1 will be lost.
39                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
40                                        The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
41                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
42 
43 
44 /*-*************************************
45 *  Context memory management
46 ***************************************/
47 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
48 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
49 
50 typedef struct ZSTD_prefixDict_s {
51     const void* dict;
52     size_t dictSize;
53     ZSTD_dictContentType_e dictContentType;
54 } ZSTD_prefixDict;
55 
56 typedef struct {
57     void* dictBuffer;
58     void const* dict;
59     size_t dictSize;
60     ZSTD_dictContentType_e dictContentType;
61     ZSTD_CDict* cdict;
62 } ZSTD_localDict;
63 
64 typedef struct {
65     HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
66     HUF_repeat repeatMode;
67 } ZSTD_hufCTables_t;
68 
69 typedef struct {
70     FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
71     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
72     FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
73     FSE_repeat offcode_repeatMode;
74     FSE_repeat matchlength_repeatMode;
75     FSE_repeat litlength_repeatMode;
76 } ZSTD_fseCTables_t;
77 
78 typedef struct {
79     ZSTD_hufCTables_t huf;
80     ZSTD_fseCTables_t fse;
81 } ZSTD_entropyCTables_t;
82 
83 /***********************************************
84 *  Sequences *
85 ***********************************************/
86 typedef struct SeqDef_s {
87     U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
88     U16 litLength;
89     U16 mlBase;    /* mlBase == matchLength - MINMATCH */
90 } SeqDef;
91 
92 /* Controls whether seqStore has a single "long" litLength or matchLength. See SeqStore_t. */
93 typedef enum {
94     ZSTD_llt_none = 0,             /* no longLengthType */
95     ZSTD_llt_literalLength = 1,    /* represents a long literal */
96     ZSTD_llt_matchLength = 2       /* represents a long match */
97 } ZSTD_longLengthType_e;
98 
99 typedef struct {
100     SeqDef* sequencesStart;
101     SeqDef* sequences;      /* ptr to end of sequences */
102     BYTE*  litStart;
103     BYTE*  lit;             /* ptr to end of literals */
104     BYTE*  llCode;
105     BYTE*  mlCode;
106     BYTE*  ofCode;
107     size_t maxNbSeq;
108     size_t maxNbLit;
109 
110     /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
111      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
112      * the existing value of the litLength or matchLength by 0x10000.
113      */
114     ZSTD_longLengthType_e longLengthType;
115     U32                   longLengthPos;  /* Index of the sequence to apply long length modification to */
116 } SeqStore_t;
117 
118 typedef struct {
119     U32 litLength;
120     U32 matchLength;
121 } ZSTD_SequenceLength;
122 
123 /**
124  * Returns the ZSTD_SequenceLength for the given sequences. It handles the decoding of long sequences
125  * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
126  */
ZSTD_getSequenceLength(SeqStore_t const * seqStore,SeqDef const * seq)127 MEM_STATIC ZSTD_SequenceLength ZSTD_getSequenceLength(SeqStore_t const* seqStore, SeqDef const* seq)
128 {
129     ZSTD_SequenceLength seqLen;
130     seqLen.litLength = seq->litLength;
131     seqLen.matchLength = seq->mlBase + MINMATCH;
132     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
133         if (seqStore->longLengthType == ZSTD_llt_literalLength) {
134             seqLen.litLength += 0x10000;
135         }
136         if (seqStore->longLengthType == ZSTD_llt_matchLength) {
137             seqLen.matchLength += 0x10000;
138         }
139     }
140     return seqLen;
141 }
142 
143 const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
144 int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
145 
146 
147 /***********************************************
148 *  Entropy buffer statistics structs and funcs *
149 ***********************************************/
150 /** ZSTD_hufCTablesMetadata_t :
151  *  Stores Literals Block Type for a super-block in hType, and
152  *  huffman tree description in hufDesBuffer.
153  *  hufDesSize refers to the size of huffman tree description in bytes.
154  *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
155 typedef struct {
156     SymbolEncodingType_e hType;
157     BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
158     size_t hufDesSize;
159 } ZSTD_hufCTablesMetadata_t;
160 
161 /** ZSTD_fseCTablesMetadata_t :
162  *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
163  *  fse tables in fseTablesBuffer.
164  *  fseTablesSize refers to the size of fse tables in bytes.
165  *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
166 typedef struct {
167     SymbolEncodingType_e llType;
168     SymbolEncodingType_e ofType;
169     SymbolEncodingType_e mlType;
170     BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
171     size_t fseTablesSize;
172     size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
173 } ZSTD_fseCTablesMetadata_t;
174 
175 typedef struct {
176     ZSTD_hufCTablesMetadata_t hufMetadata;
177     ZSTD_fseCTablesMetadata_t fseMetadata;
178 } ZSTD_entropyCTablesMetadata_t;
179 
180 /** ZSTD_buildBlockEntropyStats() :
181  *  Builds entropy for the block.
182  *  @return : 0 on success or error code */
183 size_t ZSTD_buildBlockEntropyStats(
184                     const SeqStore_t* seqStorePtr,
185                     const ZSTD_entropyCTables_t* prevEntropy,
186                           ZSTD_entropyCTables_t* nextEntropy,
187                     const ZSTD_CCtx_params* cctxParams,
188                           ZSTD_entropyCTablesMetadata_t* entropyMetadata,
189                           void* workspace, size_t wkspSize);
190 
191 /*********************************
192 *  Compression internals structs *
193 *********************************/
194 
195 typedef struct {
196     U32 off;            /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
197     U32 len;            /* Raw length of match */
198 } ZSTD_match_t;
199 
200 typedef struct {
201     U32 offset;         /* Offset of sequence */
202     U32 litLength;      /* Length of literals prior to match */
203     U32 matchLength;    /* Raw length of match */
204 } rawSeq;
205 
206 typedef struct {
207   rawSeq* seq;          /* The start of the sequences */
208   size_t pos;           /* The index in seq where reading stopped. pos <= size. */
209   size_t posInSequence; /* The position within the sequence at seq[pos] where reading
210                            stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
211   size_t size;          /* The number of sequences. <= capacity. */
212   size_t capacity;      /* The capacity starting from `seq` pointer */
213 } RawSeqStore_t;
214 
215 UNUSED_ATTR static const RawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
216 
217 typedef struct {
218     int price;  /* price from beginning of segment to this position */
219     U32 off;    /* offset of previous match */
220     U32 mlen;   /* length of previous match */
221     U32 litlen; /* nb of literals since previous match */
222     U32 rep[ZSTD_REP_NUM];  /* offset history after previous match */
223 } ZSTD_optimal_t;
224 
225 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
226 
227 #define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
228 typedef struct {
229     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
230     unsigned* litFreq;           /* table of literals statistics, of size 256 */
231     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
232     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
233     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
234     ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_SIZE */
235     ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
236 
237     U32  litSum;                 /* nb of literals */
238     U32  litLengthSum;           /* nb of litLength codes */
239     U32  matchLengthSum;         /* nb of matchLength codes */
240     U32  offCodeSum;             /* nb of offset codes */
241     U32  litSumBasePrice;        /* to compare to log2(litfreq) */
242     U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
243     U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
244     U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
245     ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
246     const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
247     ZSTD_ParamSwitch_e literalCompressionMode;
248 } optState_t;
249 
250 typedef struct {
251   ZSTD_entropyCTables_t entropy;
252   U32 rep[ZSTD_REP_NUM];
253 } ZSTD_compressedBlockState_t;
254 
255 typedef struct {
256     BYTE const* nextSrc;       /* next block here to continue on current prefix */
257     BYTE const* base;          /* All regular indexes relative to this position */
258     BYTE const* dictBase;      /* extDict indexes relative to this position */
259     U32 dictLimit;             /* below that point, need extDict */
260     U32 lowLimit;              /* below that point, no more valid data */
261     U32 nbOverflowCorrections; /* Number of times overflow correction has run since
262                                 * ZSTD_window_init(). Useful for debugging coredumps
263                                 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
264                                 */
265 } ZSTD_window_t;
266 
267 #define ZSTD_WINDOW_START_INDEX 2
268 
269 typedef struct ZSTD_MatchState_t ZSTD_MatchState_t;
270 
271 #define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
272 
273 struct ZSTD_MatchState_t {
274     ZSTD_window_t window;   /* State for window round buffer management */
275     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
276                              * When loadedDictEnd != 0, a dictionary is in use, and still valid.
277                              * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
278                              * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
279                              * When dict referential is copied into active context (i.e. not attached),
280                              * loadedDictEnd == dictSize, since referential starts from zero.
281                              */
282     U32 nextToUpdate;       /* index from which to continue table update */
283     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
284 
285     U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
286     BYTE* tagTable;                          /* For row-based matchFinder: A row-based table containing the hashes and head index. */
287     U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
288     U64 hashSalt;                            /* For row-based matchFinder: salts the hash for reuse of tag table */
289     U32 hashSaltEntropy;                     /* For row-based matchFinder: collects entropy for salt generation */
290 
291     U32* hashTable;
292     U32* hashTable3;
293     U32* chainTable;
294 
295     int forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
296 
297     int dedicatedDictSearch;  /* Indicates whether this matchState is using the
298                                * dedicated dictionary search structure.
299                                */
300     optState_t opt;         /* optimal parser state */
301     const ZSTD_MatchState_t* dictMatchState;
302     ZSTD_compressionParameters cParams;
303     const RawSeqStore_t* ldmSeqStore;
304 
305     /* Controls prefetching in some dictMatchState matchfinders.
306      * This behavior is controlled from the cctx ms.
307      * This parameter has no effect in the cdict ms. */
308     int prefetchCDictTables;
309 
310     /* When == 0, lazy match finders insert every position.
311      * When != 0, lazy match finders only insert positions they search.
312      * This allows them to skip much faster over incompressible data,
313      * at a small cost to compression ratio.
314      */
315     int lazySkipping;
316 };
317 
318 typedef struct {
319     ZSTD_compressedBlockState_t* prevCBlock;
320     ZSTD_compressedBlockState_t* nextCBlock;
321     ZSTD_MatchState_t matchState;
322 } ZSTD_blockState_t;
323 
324 typedef struct {
325     U32 offset;
326     U32 checksum;
327 } ldmEntry_t;
328 
329 typedef struct {
330     BYTE const* split;
331     U32 hash;
332     U32 checksum;
333     ldmEntry_t* bucket;
334 } ldmMatchCandidate_t;
335 
336 #define LDM_BATCH_SIZE 64
337 
338 typedef struct {
339     ZSTD_window_t window;   /* State for the window round buffer management */
340     ldmEntry_t* hashTable;
341     U32 loadedDictEnd;
342     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
343     size_t splitIndices[LDM_BATCH_SIZE];
344     ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
345 } ldmState_t;
346 
347 typedef struct {
348     ZSTD_ParamSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
349     U32 hashLog;            /* Log size of hashTable */
350     U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
351     U32 minMatchLength;     /* Minimum match length */
352     U32 hashRateLog;       /* Log number of entries to skip */
353     U32 windowLog;          /* Window log for the LDM */
354 } ldmParams_t;
355 
356 typedef struct {
357     int collectSequences;
358     ZSTD_Sequence* seqStart;
359     size_t seqIndex;
360     size_t maxSequences;
361 } SeqCollector;
362 
363 struct ZSTD_CCtx_params_s {
364     ZSTD_format_e format;
365     ZSTD_compressionParameters cParams;
366     ZSTD_frameParameters fParams;
367 
368     int compressionLevel;
369     int forceWindow;           /* force back-references to respect limit of
370                                 * 1<<wLog, even for dictionary */
371     size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
372                                 * No target when targetCBlockSize == 0.
373                                 * There is no guarantee on compressed block size */
374     int srcSizeHint;           /* User's best guess of source size.
375                                 * Hint is not valid when srcSizeHint == 0.
376                                 * There is no guarantee that hint is close to actual source size */
377 
378     ZSTD_dictAttachPref_e attachDictPref;
379     ZSTD_ParamSwitch_e literalCompressionMode;
380 
381     /* Multithreading: used to pass parameters to mtctx */
382     int nbWorkers;
383     size_t jobSize;
384     int overlapLog;
385     int rsyncable;
386 
387     /* Long distance matching parameters */
388     ldmParams_t ldmParams;
389 
390     /* Dedicated dict search algorithm trigger */
391     int enableDedicatedDictSearch;
392 
393     /* Input/output buffer modes */
394     ZSTD_bufferMode_e inBufferMode;
395     ZSTD_bufferMode_e outBufferMode;
396 
397     /* Sequence compression API */
398     ZSTD_SequenceFormat_e blockDelimiters;
399     int validateSequences;
400 
401     /* Block splitting
402      * @postBlockSplitter executes split analysis after sequences are produced,
403      * it's more accurate but consumes more resources.
404      * @preBlockSplitter_level splits before knowing sequences,
405      * it's more approximative but also cheaper.
406      * Valid @preBlockSplitter_level values range from 0 to 6 (included).
407      * 0 means auto, 1 means do not split,
408      * then levels are sorted in increasing cpu budget, from 2 (fastest) to 6 (slowest).
409      * Highest @preBlockSplitter_level combines well with @postBlockSplitter.
410      */
411     ZSTD_ParamSwitch_e postBlockSplitter;
412     int preBlockSplitter_level;
413 
414     /* Adjust the max block size*/
415     size_t maxBlockSize;
416 
417     /* Param for deciding whether to use row-based matchfinder */
418     ZSTD_ParamSwitch_e useRowMatchFinder;
419 
420     /* Always load a dictionary in ext-dict mode (not prefix mode)? */
421     int deterministicRefPrefix;
422 
423     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
424     ZSTD_customMem customMem;
425 
426     /* Controls prefetching in some dictMatchState matchfinders */
427     ZSTD_ParamSwitch_e prefetchCDictTables;
428 
429     /* Controls whether zstd will fall back to an internal matchfinder
430      * if the external matchfinder returns an error code. */
431     int enableMatchFinderFallback;
432 
433     /* Parameters for the external sequence producer API.
434      * Users set these parameters through ZSTD_registerSequenceProducer().
435      * It is not possible to set these parameters individually through the public API. */
436     void* extSeqProdState;
437     ZSTD_sequenceProducer_F extSeqProdFunc;
438 
439     /* Controls repcode search in external sequence parsing */
440     ZSTD_ParamSwitch_e searchForExternalRepcodes;
441 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
442 
443 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
444 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
445 #define TMP_WORKSPACE_SIZE (MAX(ENTROPY_WORKSPACE_SIZE, ZSTD_SLIPBLOCK_WORKSPACESIZE))
446 
447 /**
448  * Indicates whether this compression proceeds directly from user-provided
449  * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
450  * whether the context needs to buffer the input/output (ZSTDb_buffered).
451  */
452 typedef enum {
453     ZSTDb_not_buffered,
454     ZSTDb_buffered
455 } ZSTD_buffered_policy_e;
456 
457 /**
458  * Struct that contains all elements of block splitter that should be allocated
459  * in a wksp.
460  */
461 #define ZSTD_MAX_NB_BLOCK_SPLITS 196
462 typedef struct {
463     SeqStore_t fullSeqStoreChunk;
464     SeqStore_t firstHalfSeqStore;
465     SeqStore_t secondHalfSeqStore;
466     SeqStore_t currSeqStore;
467     SeqStore_t nextSeqStore;
468 
469     U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
470     ZSTD_entropyCTablesMetadata_t entropyMetadata;
471 } ZSTD_blockSplitCtx;
472 
473 struct ZSTD_CCtx_s {
474     ZSTD_compressionStage_e stage;
475     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
476     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
477     ZSTD_CCtx_params requestedParams;
478     ZSTD_CCtx_params appliedParams;
479     ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
480     U32   dictID;
481     size_t dictContentSize;
482 
483     ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
484     size_t blockSizeMax;
485     unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
486     unsigned long long consumedSrcSize;
487     unsigned long long producedCSize;
488     XXH64_state_t xxhState;
489     ZSTD_customMem customMem;
490     ZSTD_threadPool* pool;
491     size_t staticSize;
492     SeqCollector seqCollector;
493     int isFirstBlock;
494     int initialized;
495 
496     SeqStore_t seqStore;      /* sequences storage ptrs */
497     ldmState_t ldmState;      /* long distance matching state */
498     rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
499     size_t maxNbLdmSequences;
500     RawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
501     ZSTD_blockState_t blockState;
502     void* tmpWorkspace;  /* used as substitute of stack space - must be aligned for S64 type */
503     size_t tmpWkspSize;
504 
505     /* Whether we are streaming or not */
506     ZSTD_buffered_policy_e bufferedPolicy;
507 
508     /* streaming */
509     char*  inBuff;
510     size_t inBuffSize;
511     size_t inToCompress;
512     size_t inBuffPos;
513     size_t inBuffTarget;
514     char*  outBuff;
515     size_t outBuffSize;
516     size_t outBuffContentSize;
517     size_t outBuffFlushedSize;
518     ZSTD_cStreamStage streamStage;
519     U32    frameEnded;
520 
521     /* Stable in/out buffer verification */
522     ZSTD_inBuffer expectedInBuffer;
523     size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */
524     size_t expectedOutBufferSize;
525 
526     /* Dictionary */
527     ZSTD_localDict localDict;
528     const ZSTD_CDict* cdict;
529     ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
530 
531     /* Multi-threading */
532 #ifdef ZSTD_MULTITHREAD
533     ZSTDMT_CCtx* mtctx;
534 #endif
535 
536     /* Tracing */
537 #if ZSTD_TRACE
538     ZSTD_TraceCtx traceCtx;
539 #endif
540 
541     /* Workspace for block splitter */
542     ZSTD_blockSplitCtx blockSplitCtx;
543 
544     /* Buffer for output from external sequence producer */
545     ZSTD_Sequence* extSeqBuf;
546     size_t extSeqBufCapacity;
547 };
548 
549 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
550 typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
551 
552 typedef enum {
553     ZSTD_noDict = 0,
554     ZSTD_extDict = 1,
555     ZSTD_dictMatchState = 2,
556     ZSTD_dedicatedDictSearch = 3
557 } ZSTD_dictMode_e;
558 
559 typedef enum {
560     ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
561                                  * In this mode we use both the srcSize and the dictSize
562                                  * when selecting and adjusting parameters.
563                                  */
564     ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
565                                  * In this mode we only take the srcSize into account when selecting
566                                  * and adjusting parameters.
567                                  */
568     ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
569                                  * In this mode we take both the source size and the dictionary size
570                                  * into account when selecting and adjusting the parameters.
571                                  */
572     ZSTD_cpm_unknown = 3        /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
573                                  * We don't know what these parameters are for. We default to the legacy
574                                  * behavior of taking both the source size and the dict size into account
575                                  * when selecting and adjusting parameters.
576                                  */
577 } ZSTD_CParamMode_e;
578 
579 typedef size_t (*ZSTD_BlockCompressor_f) (
580         ZSTD_MatchState_t* bs, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
581         void const* src, size_t srcSize);
582 ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
583 
584 
ZSTD_LLcode(U32 litLength)585 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
586 {
587     static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
588                                        8,  9, 10, 11, 12, 13, 14, 15,
589                                       16, 16, 17, 17, 18, 18, 19, 19,
590                                       20, 20, 20, 20, 21, 21, 21, 21,
591                                       22, 22, 22, 22, 22, 22, 22, 22,
592                                       23, 23, 23, 23, 23, 23, 23, 23,
593                                       24, 24, 24, 24, 24, 24, 24, 24,
594                                       24, 24, 24, 24, 24, 24, 24, 24 };
595     static const U32 LL_deltaCode = 19;
596     return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
597 }
598 
599 /* ZSTD_MLcode() :
600  * note : mlBase = matchLength - MINMATCH;
601  *        because it's the format it's stored in seqStore->sequences */
ZSTD_MLcode(U32 mlBase)602 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
603 {
604     static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
605                                       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
606                                       32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
607                                       38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
608                                       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
609                                       41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
610                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
611                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
612     static const U32 ML_deltaCode = 36;
613     return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
614 }
615 
616 /* ZSTD_cParam_withinBounds:
617  * @return 1 if value is within cParam bounds,
618  * 0 otherwise */
ZSTD_cParam_withinBounds(ZSTD_cParameter cParam,int value)619 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
620 {
621     ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
622     if (ZSTD_isError(bounds.error)) return 0;
623     if (value < bounds.lowerBound) return 0;
624     if (value > bounds.upperBound) return 0;
625     return 1;
626 }
627 
628 /* ZSTD_selectAddr:
629  * @return index >= lowLimit ? candidate : backup,
630  * tries to force branchless codegen. */
631 MEM_STATIC const BYTE*
ZSTD_selectAddr(U32 index,U32 lowLimit,const BYTE * candidate,const BYTE * backup)632 ZSTD_selectAddr(U32 index, U32 lowLimit, const BYTE* candidate, const BYTE* backup)
633 {
634 #if defined(__GNUC__) && defined(__x86_64__)
635     __asm__ (
636         "cmp %1, %2\n"
637         "cmova %3, %0\n"
638         : "+r"(candidate)
639         : "r"(index), "r"(lowLimit), "r"(backup)
640         );
641     return candidate;
642 #else
643     return index >= lowLimit ? candidate : backup;
644 #endif
645 }
646 
647 /* ZSTD_noCompressBlock() :
648  * Writes uncompressed block to dst buffer from given src.
649  * Returns the size of the block */
650 MEM_STATIC size_t
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)651 ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
652 {
653     U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
654     DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
655     RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
656                     dstSize_tooSmall, "dst buf too small for uncompressed block");
657     MEM_writeLE24(dst, cBlockHeader24);
658     ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
659     return ZSTD_blockHeaderSize + srcSize;
660 }
661 
662 MEM_STATIC size_t
ZSTD_rleCompressBlock(void * dst,size_t dstCapacity,BYTE src,size_t srcSize,U32 lastBlock)663 ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
664 {
665     BYTE* const op = (BYTE*)dst;
666     U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
667     RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
668     MEM_writeLE24(op, cBlockHeader);
669     op[3] = src;
670     return 4;
671 }
672 
673 
674 /* ZSTD_minGain() :
675  * minimum compression required
676  * to generate a compress block or a compressed literals section.
677  * note : use same formula for both situations */
ZSTD_minGain(size_t srcSize,ZSTD_strategy strat)678 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
679 {
680     U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
681     ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
682     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
683     return (srcSize >> minlog) + 2;
684 }
685 
ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params * cctxParams)686 MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
687 {
688     switch (cctxParams->literalCompressionMode) {
689     case ZSTD_ps_enable:
690         return 0;
691     case ZSTD_ps_disable:
692         return 1;
693     default:
694         assert(0 /* impossible: pre-validated */);
695         ZSTD_FALLTHROUGH;
696     case ZSTD_ps_auto:
697         return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
698     }
699 }
700 
701 /*! ZSTD_safecopyLiterals() :
702  *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
703  *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
704  *  large copies.
705  */
706 static void
ZSTD_safecopyLiterals(BYTE * op,BYTE const * ip,BYTE const * const iend,BYTE const * ilimit_w)707 ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
708 {
709     assert(iend > ilimit_w);
710     if (ip <= ilimit_w) {
711         ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
712         op += ilimit_w - ip;
713         ip = ilimit_w;
714     }
715     while (ip < iend) *op++ = *ip++;
716 }
717 
718 
719 #define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
720 #define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
721 #define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
722 #define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
723 #define OFFSET_TO_OFFBASE(o)  (assert((o)>0), o + ZSTD_REP_NUM)
724 #define OFFBASE_IS_OFFSET(o)  ((o) > ZSTD_REP_NUM)
725 #define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
726 #define OFFBASE_TO_OFFSET(o)  (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
727 #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o))  /* returns ID 1,2,3 */
728 
729 /*! ZSTD_storeSeqOnly() :
730  *  Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t.
731  *  Literals themselves are not copied, but @litPtr is updated.
732  *  @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
733  *  @matchLength : must be >= MINMATCH
734 */
735 HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeqOnly(SeqStore_t * seqStorePtr,size_t litLength,U32 offBase,size_t matchLength)736 ZSTD_storeSeqOnly(SeqStore_t* seqStorePtr,
737               size_t litLength,
738               U32 offBase,
739               size_t matchLength)
740 {
741     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
742 
743     /* literal Length */
744     assert(litLength <= ZSTD_BLOCKSIZE_MAX);
745     if (UNLIKELY(litLength>0xFFFF)) {
746         assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
747         seqStorePtr->longLengthType = ZSTD_llt_literalLength;
748         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
749     }
750     seqStorePtr->sequences[0].litLength = (U16)litLength;
751 
752     /* match offset */
753     seqStorePtr->sequences[0].offBase = offBase;
754 
755     /* match Length */
756     assert(matchLength <= ZSTD_BLOCKSIZE_MAX);
757     assert(matchLength >= MINMATCH);
758     {   size_t const mlBase = matchLength - MINMATCH;
759         if (UNLIKELY(mlBase>0xFFFF)) {
760             assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
761             seqStorePtr->longLengthType = ZSTD_llt_matchLength;
762             seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
763         }
764         seqStorePtr->sequences[0].mlBase = (U16)mlBase;
765     }
766 
767     seqStorePtr->sequences++;
768 }
769 
770 /*! ZSTD_storeSeq() :
771  *  Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t.
772  *  @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
773  *  @matchLength : must be >= MINMATCH
774  *  Allowed to over-read literals up to litLimit.
775 */
776 HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeq(SeqStore_t * seqStorePtr,size_t litLength,const BYTE * literals,const BYTE * litLimit,U32 offBase,size_t matchLength)777 ZSTD_storeSeq(SeqStore_t* seqStorePtr,
778               size_t litLength, const BYTE* literals, const BYTE* litLimit,
779               U32 offBase,
780               size_t matchLength)
781 {
782     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
783     BYTE const* const litEnd = literals + litLength;
784 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
785     static const BYTE* g_start = NULL;
786     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
787     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
788         DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
789                pos, (U32)litLength, (U32)matchLength, (U32)offBase);
790     }
791 #endif
792     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
793     /* copy Literals */
794     assert(seqStorePtr->maxNbLit <= 128 KB);
795     assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
796     assert(literals + litLength <= litLimit);
797     if (litEnd <= litLimit_w) {
798         /* Common case we can use wildcopy.
799          * First copy 16 bytes, because literals are likely short.
800          */
801         ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
802         ZSTD_copy16(seqStorePtr->lit, literals);
803         if (litLength > 16) {
804             ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
805         }
806     } else {
807         ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
808     }
809     seqStorePtr->lit += litLength;
810 
811     ZSTD_storeSeqOnly(seqStorePtr, litLength, offBase, matchLength);
812 }
813 
814 /* ZSTD_updateRep() :
815  * updates in-place @rep (array of repeat offsets)
816  * @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
817  */
818 MEM_STATIC void
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM],U32 const offBase,U32 const ll0)819 ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
820 {
821     if (OFFBASE_IS_OFFSET(offBase)) {  /* full offset */
822         rep[2] = rep[1];
823         rep[1] = rep[0];
824         rep[0] = OFFBASE_TO_OFFSET(offBase);
825     } else {   /* repcode */
826         U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
827         if (repCode > 0) {  /* note : if repCode==0, no change */
828             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
829             rep[2] = (repCode >= 2) ? rep[1] : rep[2];
830             rep[1] = rep[0];
831             rep[0] = currentOffset;
832         } else {   /* repCode == 0 */
833             /* nothing to do */
834         }
835     }
836 }
837 
838 typedef struct repcodes_s {
839     U32 rep[3];
840 } Repcodes_t;
841 
842 MEM_STATIC Repcodes_t
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM],U32 const offBase,U32 const ll0)843 ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
844 {
845     Repcodes_t newReps;
846     ZSTD_memcpy(&newReps, rep, sizeof(newReps));
847     ZSTD_updateRep(newReps.rep, offBase, ll0);
848     return newReps;
849 }
850 
851 
852 /*-*************************************
853 *  Match length counter
854 ***************************************/
ZSTD_count(const BYTE * pIn,const BYTE * pMatch,const BYTE * const pInLimit)855 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
856 {
857     const BYTE* const pStart = pIn;
858     const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
859 
860     if (pIn < pInLoopLimit) {
861         { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
862           if (diff) return ZSTD_NbCommonBytes(diff); }
863         pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
864         while (pIn < pInLoopLimit) {
865             size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
866             if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
867             pIn += ZSTD_NbCommonBytes(diff);
868             return (size_t)(pIn - pStart);
869     }   }
870     if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
871     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
872     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
873     return (size_t)(pIn - pStart);
874 }
875 
876 /** ZSTD_count_2segments() :
877  *  can count match length with `ip` & `match` in 2 different segments.
878  *  convention : on reaching mEnd, match count continue starting from iStart
879  */
880 MEM_STATIC size_t
ZSTD_count_2segments(const BYTE * ip,const BYTE * match,const BYTE * iEnd,const BYTE * mEnd,const BYTE * iStart)881 ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
882                      const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
883 {
884     const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
885     size_t const matchLength = ZSTD_count(ip, match, vEnd);
886     if (match + matchLength != mEnd) return matchLength;
887     DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
888     DEBUGLOG(7, "distance from match beginning to end dictionary = %i", (int)(mEnd - match));
889     DEBUGLOG(7, "distance from current pos to end buffer = %i", (int)(iEnd - ip));
890     DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
891     DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
892     return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
893 }
894 
895 
896 /*-*************************************
897  *  Hashes
898  ***************************************/
899 static const U32 prime3bytes = 506832829U;
ZSTD_hash3(U32 u,U32 h,U32 s)900 static U32    ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s)  >> (32-h) ; }
ZSTD_hash3Ptr(const void * ptr,U32 h)901 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
ZSTD_hash3PtrS(const void * ptr,U32 h,U32 s)902 MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
903 
904 static const U32 prime4bytes = 2654435761U;
ZSTD_hash4(U32 u,U32 h,U32 s)905 static U32    ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
ZSTD_hash4Ptr(const void * ptr,U32 h)906 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
ZSTD_hash4PtrS(const void * ptr,U32 h,U32 s)907 static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
908 
909 static const U64 prime5bytes = 889523592379ULL;
ZSTD_hash5(U64 u,U32 h,U64 s)910 static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
ZSTD_hash5Ptr(const void * p,U32 h)911 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
ZSTD_hash5PtrS(const void * p,U32 h,U64 s)912 static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
913 
914 static const U64 prime6bytes = 227718039650203ULL;
ZSTD_hash6(U64 u,U32 h,U64 s)915 static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
ZSTD_hash6Ptr(const void * p,U32 h)916 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
ZSTD_hash6PtrS(const void * p,U32 h,U64 s)917 static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
918 
919 static const U64 prime7bytes = 58295818150454627ULL;
ZSTD_hash7(U64 u,U32 h,U64 s)920 static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
ZSTD_hash7Ptr(const void * p,U32 h)921 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
ZSTD_hash7PtrS(const void * p,U32 h,U64 s)922 static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
923 
924 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
ZSTD_hash8(U64 u,U32 h,U64 s)925 static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes)  ^ s) >> (64-h)) ; }
ZSTD_hash8Ptr(const void * p,U32 h)926 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
ZSTD_hash8PtrS(const void * p,U32 h,U64 s)927 static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
928 
929 
930 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtr(const void * p,U32 hBits,U32 mls)931 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
932 {
933     /* Although some of these hashes do support hBits up to 64, some do not.
934      * To be on the safe side, always avoid hBits > 32. */
935     assert(hBits <= 32);
936 
937     switch(mls)
938     {
939     default:
940     case 4: return ZSTD_hash4Ptr(p, hBits);
941     case 5: return ZSTD_hash5Ptr(p, hBits);
942     case 6: return ZSTD_hash6Ptr(p, hBits);
943     case 7: return ZSTD_hash7Ptr(p, hBits);
944     case 8: return ZSTD_hash8Ptr(p, hBits);
945     }
946 }
947 
948 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtrSalted(const void * p,U32 hBits,U32 mls,const U64 hashSalt)949 size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
950     /* Although some of these hashes do support hBits up to 64, some do not.
951      * To be on the safe side, always avoid hBits > 32. */
952     assert(hBits <= 32);
953 
954     switch(mls)
955     {
956         default:
957         case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
958         case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
959         case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
960         case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
961         case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
962     }
963 }
964 
965 
966 /** ZSTD_ipow() :
967  * Return base^exponent.
968  */
ZSTD_ipow(U64 base,U64 exponent)969 static U64 ZSTD_ipow(U64 base, U64 exponent)
970 {
971     U64 power = 1;
972     while (exponent) {
973       if (exponent & 1) power *= base;
974       exponent >>= 1;
975       base *= base;
976     }
977     return power;
978 }
979 
980 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
981 
982 /** ZSTD_rollingHash_append() :
983  * Add the buffer to the hash value.
984  */
ZSTD_rollingHash_append(U64 hash,void const * buf,size_t size)985 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
986 {
987     BYTE const* istart = (BYTE const*)buf;
988     size_t pos;
989     for (pos = 0; pos < size; ++pos) {
990         hash *= prime8bytes;
991         hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
992     }
993     return hash;
994 }
995 
996 /** ZSTD_rollingHash_compute() :
997  * Compute the rolling hash value of the buffer.
998  */
ZSTD_rollingHash_compute(void const * buf,size_t size)999 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
1000 {
1001     return ZSTD_rollingHash_append(0, buf, size);
1002 }
1003 
1004 /** ZSTD_rollingHash_primePower() :
1005  * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
1006  * over a window of length bytes.
1007  */
ZSTD_rollingHash_primePower(U32 length)1008 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
1009 {
1010     return ZSTD_ipow(prime8bytes, length - 1);
1011 }
1012 
1013 /** ZSTD_rollingHash_rotate() :
1014  * Rotate the rolling hash by one byte.
1015  */
ZSTD_rollingHash_rotate(U64 hash,BYTE toRemove,BYTE toAdd,U64 primePower)1016 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
1017 {
1018     hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
1019     hash *= prime8bytes;
1020     hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
1021     return hash;
1022 }
1023 
1024 /*-*************************************
1025 *  Round buffer management
1026 ***************************************/
1027 /* Max @current value allowed:
1028  * In 32-bit mode: we want to avoid crossing the 2 GB limit,
1029  *                 reducing risks of side effects in case of signed operations on indexes.
1030  * In 64-bit mode: we want to ensure that adding the maximum job size (512 MB)
1031  *                 doesn't overflow U32 index capacity (4 GB) */
1032 #define ZSTD_CURRENT_MAX (MEM_64bits() ? 3500U MB : 2000U MB)
1033 /* Maximum chunk size before overflow correction needs to be called again */
1034 #define ZSTD_CHUNKSIZE_MAX                                                     \
1035     ( ((U32)-1)                  /* Maximum ending current index */            \
1036     - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
1037 
1038 /**
1039  * ZSTD_window_clear():
1040  * Clears the window containing the history by simply setting it to empty.
1041  */
ZSTD_window_clear(ZSTD_window_t * window)1042 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
1043 {
1044     size_t const endT = (size_t)(window->nextSrc - window->base);
1045     U32 const end = (U32)endT;
1046 
1047     window->lowLimit = end;
1048     window->dictLimit = end;
1049 }
1050 
ZSTD_window_isEmpty(ZSTD_window_t const window)1051 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
1052 {
1053     return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
1054            window.lowLimit == ZSTD_WINDOW_START_INDEX &&
1055            (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
1056 }
1057 
1058 /**
1059  * ZSTD_window_hasExtDict():
1060  * Returns non-zero if the window has a non-empty extDict.
1061  */
ZSTD_window_hasExtDict(ZSTD_window_t const window)1062 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
1063 {
1064     return window.lowLimit < window.dictLimit;
1065 }
1066 
1067 /**
1068  * ZSTD_matchState_dictMode():
1069  * Inspects the provided matchState and figures out what dictMode should be
1070  * passed to the compressor.
1071  */
ZSTD_matchState_dictMode(const ZSTD_MatchState_t * ms)1072 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_MatchState_t *ms)
1073 {
1074     return ZSTD_window_hasExtDict(ms->window) ?
1075         ZSTD_extDict :
1076         ms->dictMatchState != NULL ?
1077             (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
1078             ZSTD_noDict;
1079 }
1080 
1081 /* Defining this macro to non-zero tells zstd to run the overflow correction
1082  * code much more frequently. This is very inefficient, and should only be
1083  * used for tests and fuzzers.
1084  */
1085 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
1086 #  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1087 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
1088 #  else
1089 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
1090 #  endif
1091 #endif
1092 
1093 /**
1094  * ZSTD_window_canOverflowCorrect():
1095  * Returns non-zero if the indices are large enough for overflow correction
1096  * to work correctly without impacting compression ratio.
1097  */
ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src)1098 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
1099                                               U32 cycleLog,
1100                                               U32 maxDist,
1101                                               U32 loadedDictEnd,
1102                                               void const* src)
1103 {
1104     U32 const cycleSize = 1u << cycleLog;
1105     U32 const curr = (U32)((BYTE const*)src - window.base);
1106     U32 const minIndexToOverflowCorrect = cycleSize
1107                                         + MAX(maxDist, cycleSize)
1108                                         + ZSTD_WINDOW_START_INDEX;
1109 
1110     /* Adjust the min index to backoff the overflow correction frequency,
1111      * so we don't waste too much CPU in overflow correction. If this
1112      * computation overflows we don't really care, we just need to make
1113      * sure it is at least minIndexToOverflowCorrect.
1114      */
1115     U32 const adjustment = window.nbOverflowCorrections + 1;
1116     U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
1117                                   minIndexToOverflowCorrect);
1118     U32 const indexLargeEnough = curr > adjustedIndex;
1119 
1120     /* Only overflow correct early if the dictionary is invalidated already,
1121      * so we don't hurt compression ratio.
1122      */
1123     U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
1124 
1125     return indexLargeEnough && dictionaryInvalidated;
1126 }
1127 
1128 /**
1129  * ZSTD_window_needOverflowCorrection():
1130  * Returns non-zero if the indices are getting too large and need overflow
1131  * protection.
1132  */
ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src,void const * srcEnd)1133 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
1134                                                   U32 cycleLog,
1135                                                   U32 maxDist,
1136                                                   U32 loadedDictEnd,
1137                                                   void const* src,
1138                                                   void const* srcEnd)
1139 {
1140     U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
1141     if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1142         if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
1143             return 1;
1144         }
1145     }
1146     return curr > ZSTD_CURRENT_MAX;
1147 }
1148 
1149 /**
1150  * ZSTD_window_correctOverflow():
1151  * Reduces the indices to protect from index overflow.
1152  * Returns the correction made to the indices, which must be applied to every
1153  * stored index.
1154  *
1155  * The least significant cycleLog bits of the indices must remain the same,
1156  * which may be 0. Every index up to maxDist in the past must be valid.
1157  */
1158 MEM_STATIC
1159 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_window_correctOverflow(ZSTD_window_t * window,U32 cycleLog,U32 maxDist,void const * src)1160 U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1161                                            U32 maxDist, void const* src)
1162 {
1163     /* preemptive overflow correction:
1164      * 1. correction is large enough:
1165      *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1166      *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1167      *
1168      *    current - newCurrent
1169      *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1170      *    > (3<<29) - (1<<chainLog)
1171      *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
1172      *    > 1<<29
1173      *
1174      * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1175      *    After correction, current is less than (1<<chainLog + 1<<windowLog).
1176      *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1177      *    In 32-bit mode we are safe, because (chainLog <= 29), so
1178      *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1179      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1180      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1181      */
1182     U32 const cycleSize = 1u << cycleLog;
1183     U32 const cycleMask = cycleSize - 1;
1184     U32 const curr = (U32)((BYTE const*)src - window->base);
1185     U32 const currentCycle = curr & cycleMask;
1186     /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1187     U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1188                                      ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1189                                      : 0;
1190     U32 const newCurrent = currentCycle
1191                          + currentCycleCorrection
1192                          + MAX(maxDist, cycleSize);
1193     U32 const correction = curr - newCurrent;
1194     /* maxDist must be a power of two so that:
1195      *   (newCurrent & cycleMask) == (curr & cycleMask)
1196      * This is required to not corrupt the chains / binary tree.
1197      */
1198     assert((maxDist & (maxDist - 1)) == 0);
1199     assert((curr & cycleMask) == (newCurrent & cycleMask));
1200     assert(curr > newCurrent);
1201     if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1202         /* Loose bound, should be around 1<<29 (see above) */
1203         assert(correction > 1<<28);
1204     }
1205 
1206     window->base += correction;
1207     window->dictBase += correction;
1208     if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1209         window->lowLimit = ZSTD_WINDOW_START_INDEX;
1210     } else {
1211         window->lowLimit -= correction;
1212     }
1213     if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1214         window->dictLimit = ZSTD_WINDOW_START_INDEX;
1215     } else {
1216         window->dictLimit -= correction;
1217     }
1218 
1219     /* Ensure we can still reference the full window. */
1220     assert(newCurrent >= maxDist);
1221     assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1222     /* Ensure that lowLimit and dictLimit didn't underflow. */
1223     assert(window->lowLimit <= newCurrent);
1224     assert(window->dictLimit <= newCurrent);
1225 
1226     ++window->nbOverflowCorrections;
1227 
1228     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1229              window->lowLimit);
1230     return correction;
1231 }
1232 
1233 /**
1234  * ZSTD_window_enforceMaxDist():
1235  * Updates lowLimit so that:
1236  *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1237  *
1238  * It ensures index is valid as long as index >= lowLimit.
1239  * This must be called before a block compression call.
1240  *
1241  * loadedDictEnd is only defined if a dictionary is in use for current compression.
1242  * As the name implies, loadedDictEnd represents the index at end of dictionary.
1243  * The value lies within context's referential, it can be directly compared to blockEndIdx.
1244  *
1245  * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1246  * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1247  * This is because dictionaries are allowed to be referenced fully
1248  * as long as the last byte of the dictionary is in the window.
1249  * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1250  *
1251  * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1252  * In dictMatchState mode, lowLimit and dictLimit are the same,
1253  * and the dictionary is below them.
1254  * forceWindow and dictMatchState are therefore incompatible.
1255  */
1256 MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_MatchState_t ** dictMatchStatePtr)1257 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1258                      const void* blockEnd,
1259                            U32   maxDist,
1260                            U32*  loadedDictEndPtr,
1261                      const ZSTD_MatchState_t** dictMatchStatePtr)
1262 {
1263     U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1264     U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1265     DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1266                 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1267 
1268     /* - When there is no dictionary : loadedDictEnd == 0.
1269          In which case, the test (blockEndIdx > maxDist) is merely to avoid
1270          overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1271        - When there is a standard dictionary :
1272          Index referential is copied from the dictionary,
1273          which means it starts from 0.
1274          In which case, loadedDictEnd == dictSize,
1275          and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1276          since `blockEndIdx` also starts from zero.
1277        - When there is an attached dictionary :
1278          loadedDictEnd is expressed within the referential of the context,
1279          so it can be directly compared against blockEndIdx.
1280     */
1281     if (blockEndIdx > maxDist + loadedDictEnd) {
1282         U32 const newLowLimit = blockEndIdx - maxDist;
1283         if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1284         if (window->dictLimit < window->lowLimit) {
1285             DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1286                         (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1287             window->dictLimit = window->lowLimit;
1288         }
1289         /* On reaching window size, dictionaries are invalidated */
1290         if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1291         if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1292     }
1293 }
1294 
1295 /* Similar to ZSTD_window_enforceMaxDist(),
1296  * but only invalidates dictionary
1297  * when input progresses beyond window size.
1298  * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1299  *              loadedDictEnd uses same referential as window->base
1300  *              maxDist is the window size */
1301 MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_MatchState_t ** dictMatchStatePtr)1302 ZSTD_checkDictValidity(const ZSTD_window_t* window,
1303                        const void* blockEnd,
1304                              U32   maxDist,
1305                              U32*  loadedDictEndPtr,
1306                        const ZSTD_MatchState_t** dictMatchStatePtr)
1307 {
1308     assert(loadedDictEndPtr != NULL);
1309     assert(dictMatchStatePtr != NULL);
1310     {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1311         U32 const loadedDictEnd = *loadedDictEndPtr;
1312         DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1313                     (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1314         assert(blockEndIdx >= loadedDictEnd);
1315 
1316         if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) {
1317             /* On reaching window size, dictionaries are invalidated.
1318              * For simplification, if window size is reached anywhere within next block,
1319              * the dictionary is invalidated for the full block.
1320              *
1321              * We also have to invalidate the dictionary if ZSTD_window_update() has detected
1322              * non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
1323              * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
1324              * dictMatchState, so setting it to NULL is not a problem.
1325              */
1326             DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1327             *loadedDictEndPtr = 0;
1328             *dictMatchStatePtr = NULL;
1329         } else {
1330             if (*loadedDictEndPtr != 0) {
1331                 DEBUGLOG(6, "dictionary considered valid for current block");
1332     }   }   }
1333 }
1334 
ZSTD_window_init(ZSTD_window_t * window)1335 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1336     ZSTD_memset(window, 0, sizeof(*window));
1337     window->base = (BYTE const*)" ";
1338     window->dictBase = (BYTE const*)" ";
1339     ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1340     window->dictLimit = ZSTD_WINDOW_START_INDEX;    /* start from >0, so that 1st position is valid */
1341     window->lowLimit = ZSTD_WINDOW_START_INDEX;     /* it ensures first and later CCtx usages compress the same */
1342     window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX;   /* see issue #1241 */
1343     window->nbOverflowCorrections = 0;
1344 }
1345 
1346 /**
1347  * ZSTD_window_update():
1348  * Updates the window by appending [src, src + srcSize) to the window.
1349  * If it is not contiguous, the current prefix becomes the extDict, and we
1350  * forget about the extDict. Handles overlap of the prefix and extDict.
1351  * Returns non-zero if the segment is contiguous.
1352  */
1353 MEM_STATIC
1354 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_window_update(ZSTD_window_t * window,const void * src,size_t srcSize,int forceNonContiguous)1355 U32 ZSTD_window_update(ZSTD_window_t* window,
1356                  const void* src, size_t srcSize,
1357                        int forceNonContiguous)
1358 {
1359     BYTE const* const ip = (BYTE const*)src;
1360     U32 contiguous = 1;
1361     DEBUGLOG(5, "ZSTD_window_update");
1362     if (srcSize == 0)
1363         return contiguous;
1364     assert(window->base != NULL);
1365     assert(window->dictBase != NULL);
1366     /* Check if blocks follow each other */
1367     if (src != window->nextSrc || forceNonContiguous) {
1368         /* not contiguous */
1369         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1370         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1371         window->lowLimit = window->dictLimit;
1372         assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
1373         window->dictLimit = (U32)distanceFromBase;
1374         window->dictBase = window->base;
1375         window->base = ip - distanceFromBase;
1376         /* ms->nextToUpdate = window->dictLimit; */
1377         if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
1378         contiguous = 0;
1379     }
1380     window->nextSrc = ip + srcSize;
1381     /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1382     if ( (ip+srcSize > window->dictBase + window->lowLimit)
1383        & (ip < window->dictBase + window->dictLimit)) {
1384         size_t const highInputIdx = (size_t)((ip + srcSize) - window->dictBase);
1385         U32 const lowLimitMax = (highInputIdx > (size_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1386         assert(highInputIdx < UINT_MAX);
1387         window->lowLimit = lowLimitMax;
1388         DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1389     }
1390     return contiguous;
1391 }
1392 
1393 /**
1394  * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1395  */
ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t * ms,U32 curr,unsigned windowLog)1396 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog)
1397 {
1398     U32 const maxDistance = 1U << windowLog;
1399     U32 const lowestValid = ms->window.lowLimit;
1400     U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1401     U32 const isDictionary = (ms->loadedDictEnd != 0);
1402     /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1403      * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1404      * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1405      */
1406     U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1407     return matchLowest;
1408 }
1409 
1410 /**
1411  * Returns the lowest allowed match index in the prefix.
1412  */
ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t * ms,U32 curr,unsigned windowLog)1413 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog)
1414 {
1415     U32    const maxDistance = 1U << windowLog;
1416     U32    const lowestValid = ms->window.dictLimit;
1417     U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1418     U32    const isDictionary = (ms->loadedDictEnd != 0);
1419     /* When computing the lowest prefix index we need to take the dictionary into account to handle
1420      * the edge case where the dictionary and the source are contiguous in memory.
1421      */
1422     U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
1423     return matchLowest;
1424 }
1425 
1426 /* index_safety_check:
1427  * intentional underflow : ensure repIndex isn't overlapping dict + prefix
1428  * @return 1 if values are not overlapping,
1429  * 0 otherwise */
ZSTD_index_overlap_check(const U32 prefixLowestIndex,const U32 repIndex)1430 MEM_STATIC int ZSTD_index_overlap_check(const U32 prefixLowestIndex, const U32 repIndex) {
1431     return ((U32)((prefixLowestIndex-1)  - repIndex) >= 3);
1432 }
1433 
1434 
1435 /* debug functions */
1436 #if (DEBUGLEVEL>=2)
1437 
ZSTD_fWeight(U32 rawStat)1438 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1439 {
1440     U32 const fp_accuracy = 8;
1441     U32 const fp_multiplier = (1 << fp_accuracy);
1442     U32 const newStat = rawStat + 1;
1443     U32 const hb = ZSTD_highbit32(newStat);
1444     U32 const BWeight = hb * fp_multiplier;
1445     U32 const FWeight = (newStat << fp_accuracy) >> hb;
1446     U32 const weight = BWeight + FWeight;
1447     assert(hb + fp_accuracy < 31);
1448     return (double)weight / fp_multiplier;
1449 }
1450 
1451 /* display a table content,
1452  * listing each element, its frequency, and its predicted bit cost */
ZSTD_debugTable(const U32 * table,U32 max)1453 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1454 {
1455     unsigned u, sum;
1456     for (u=0, sum=0; u<=max; u++) sum += table[u];
1457     DEBUGLOG(2, "total nb elts: %u", sum);
1458     for (u=0; u<=max; u++) {
1459         DEBUGLOG(2, "%2u: %5u  (%.2f)",
1460                 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1461     }
1462 }
1463 
1464 #endif
1465 
1466 /* Short Cache */
1467 
1468 /* Normally, zstd matchfinders follow this flow:
1469  *     1. Compute hash at ip
1470  *     2. Load index from hashTable[hash]
1471  *     3. Check if *ip == *(base + index)
1472  * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
1473  *
1474  * Short cache is an optimization which allows us to avoid step 3 most of the time
1475  * when the data doesn't actually match. With short cache, the flow becomes:
1476  *     1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
1477  *     2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
1478  *     3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
1479  *
1480  * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
1481  * dictMatchState matchfinders.
1482  */
1483 #define ZSTD_SHORT_CACHE_TAG_BITS 8
1484 #define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
1485 
1486 /* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
1487  * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
ZSTD_writeTaggedIndex(U32 * const hashTable,size_t hashAndTag,U32 index)1488 MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
1489     size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
1490     U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
1491     assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
1492     hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
1493 }
1494 
1495 /* Helper function for short cache matchfinders.
1496  * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
ZSTD_comparePackedTags(size_t packedTag1,size_t packedTag2)1497 MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
1498     U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
1499     U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
1500     return tag1 == tag2;
1501 }
1502 
1503 /* ===============================================================
1504  * Shared internal declarations
1505  * These prototypes may be called from sources not in lib/compress
1506  * =============================================================== */
1507 
1508 /* ZSTD_loadCEntropy() :
1509  * dict : must point at beginning of a valid zstd dictionary.
1510  * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1511  * assumptions : magic number supposed already checked
1512  *               and dictSize >= 8 */
1513 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1514                          const void* const dict, size_t dictSize);
1515 
1516 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1517 
1518 typedef struct {
1519     U32 idx;            /* Index in array of ZSTD_Sequence */
1520     U32 posInSequence;  /* Position within sequence at idx */
1521     size_t posInSrc;    /* Number of bytes given by sequences provided so far */
1522 } ZSTD_SequencePosition;
1523 
1524 /* for benchmark */
1525 size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx,
1526                         const ZSTD_Sequence* const inSeqs, size_t nbSequences,
1527                         int const repcodeResolution);
1528 
1529 typedef struct {
1530     size_t nbSequences;
1531     size_t blockSize;
1532     size_t litSize;
1533 } BlockSummary;
1534 
1535 BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs);
1536 
1537 /* ==============================================================
1538  * Private declarations
1539  * These prototypes shall only be called from within lib/compress
1540  * ============================================================== */
1541 
1542 /* ZSTD_getCParamsFromCCtxParams() :
1543  * cParams are built depending on compressionLevel, src size hints,
1544  * LDM and manually set compression parameters.
1545  * Note: srcSizeHint == 0 means 0!
1546  */
1547 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1548         const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode);
1549 
1550 /*! ZSTD_initCStream_internal() :
1551  *  Private use only. Init streaming operation.
1552  *  expects params to be valid.
1553  *  must receive dict, or cdict, or none, but not both.
1554  *  @return : 0, or an error code */
1555 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1556                      const void* dict, size_t dictSize,
1557                      const ZSTD_CDict* cdict,
1558                      const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1559 
1560 void ZSTD_resetSeqStore(SeqStore_t* ssPtr);
1561 
1562 /*! ZSTD_getCParamsFromCDict() :
1563  *  as the name implies */
1564 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1565 
1566 /* ZSTD_compressBegin_advanced_internal() :
1567  * Private use only. To be called from zstdmt_compress.c. */
1568 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1569                                     const void* dict, size_t dictSize,
1570                                     ZSTD_dictContentType_e dictContentType,
1571                                     ZSTD_dictTableLoadMethod_e dtlm,
1572                                     const ZSTD_CDict* cdict,
1573                                     const ZSTD_CCtx_params* params,
1574                                     unsigned long long pledgedSrcSize);
1575 
1576 /* ZSTD_compress_advanced_internal() :
1577  * Private use only. To be called from zstdmt_compress.c. */
1578 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1579                                        void* dst, size_t dstCapacity,
1580                                  const void* src, size_t srcSize,
1581                                  const void* dict,size_t dictSize,
1582                                  const ZSTD_CCtx_params* params);
1583 
1584 
1585 /* ZSTD_writeLastEmptyBlock() :
1586  * output an empty Block with end-of-frame mark to complete a frame
1587  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1588  *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1589  */
1590 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1591 
1592 
1593 /* ZSTD_referenceExternalSequences() :
1594  * Must be called before starting a compression operation.
1595  * seqs must parse a prefix of the source.
1596  * This cannot be used when long range matching is enabled.
1597  * Zstd will use these sequences, and pass the literals to a secondary block
1598  * compressor.
1599  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1600  * access and data corruption.
1601  */
1602 void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1603 
1604 /** ZSTD_cycleLog() :
1605  *  condition for correct operation : hashLog > 1 */
1606 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1607 
1608 /** ZSTD_CCtx_trace() :
1609  *  Trace the end of a compression call.
1610  */
1611 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1612 
1613 /* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
ZSTD_hasExtSeqProd(const ZSTD_CCtx_params * params)1614 MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
1615     return params->extSeqProdFunc != NULL;
1616 }
1617 
1618 /* ===============================================================
1619  * Deprecated definitions that are still used internally to avoid
1620  * deprecation warnings. These functions are exactly equivalent to
1621  * their public variants, but avoid the deprecation warnings.
1622  * =============================================================== */
1623 
1624 size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
1625 
1626 size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
1627                                     void* dst, size_t dstCapacity,
1628                               const void* src, size_t srcSize);
1629 
1630 size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
1631                                void* dst, size_t dstCapacity,
1632                          const void* src, size_t srcSize);
1633 
1634 size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
1635 
1636 
1637 #endif /* ZSTD_COMPRESS_H */
1638