1 /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 2 /* 3 * Copyright (c) Meta Platforms, Inc. and affiliates. 4 * All rights reserved. 5 * 6 * This source code is licensed under both the BSD-style license (found in the 7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 8 * in the COPYING file in the root directory of this source tree). 9 * You may select, at your option, one of the above-listed licenses. 10 */ 11 12 /* This header contains definitions 13 * that shall **only** be used by modules within lib/compress. 14 */ 15 16 #ifndef ZSTD_COMPRESS_H 17 #define ZSTD_COMPRESS_H 18 19 /*-************************************* 20 * Dependencies 21 ***************************************/ 22 #include "../common/zstd_internal.h" 23 #include "zstd_cwksp.h" 24 #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */ 25 #include "zstd_preSplit.h" /* ZSTD_SLIPBLOCK_WORKSPACESIZE */ 26 27 /*-************************************* 28 * Constants 29 ***************************************/ 30 #define kSearchStrength 8 31 #define HASH_READ_SIZE 8 32 #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". 33 It could be confused for a real successor at index "1", if sorted as larger than its predecessor. 34 It's not a big deal though : candidate will just be sorted again. 35 Additionally, candidate position 1 will be lost. 36 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. 37 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy. 38 This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ 39 40 41 /*-************************************* 42 * Context memory management 43 ***************************************/ 44 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; 45 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; 46 47 typedef struct ZSTD_prefixDict_s { 48 const void* dict; 49 size_t dictSize; 50 ZSTD_dictContentType_e dictContentType; 51 } ZSTD_prefixDict; 52 53 typedef struct { 54 void* dictBuffer; 55 void const* dict; 56 size_t dictSize; 57 ZSTD_dictContentType_e dictContentType; 58 ZSTD_CDict* cdict; 59 } ZSTD_localDict; 60 61 typedef struct { 62 HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)]; 63 HUF_repeat repeatMode; 64 } ZSTD_hufCTables_t; 65 66 typedef struct { 67 FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; 68 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; 69 FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; 70 FSE_repeat offcode_repeatMode; 71 FSE_repeat matchlength_repeatMode; 72 FSE_repeat litlength_repeatMode; 73 } ZSTD_fseCTables_t; 74 75 typedef struct { 76 ZSTD_hufCTables_t huf; 77 ZSTD_fseCTables_t fse; 78 } ZSTD_entropyCTables_t; 79 80 /* ********************************************* 81 * Sequences * 82 ***********************************************/ 83 typedef struct SeqDef_s { 84 U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ 85 U16 litLength; 86 U16 mlBase; /* mlBase == matchLength - MINMATCH */ 87 } SeqDef; 88 89 /* Controls whether seqStore has a single "long" litLength or matchLength. See SeqStore_t. */ 90 typedef enum { 91 ZSTD_llt_none = 0, /* no longLengthType */ 92 ZSTD_llt_literalLength = 1, /* represents a long literal */ 93 ZSTD_llt_matchLength = 2 /* represents a long match */ 94 } ZSTD_longLengthType_e; 95 96 typedef struct { 97 SeqDef* sequencesStart; 98 SeqDef* sequences; /* ptr to end of sequences */ 99 BYTE* litStart; 100 BYTE* lit; /* ptr to end of literals */ 101 BYTE* llCode; 102 BYTE* mlCode; 103 BYTE* ofCode; 104 size_t maxNbSeq; 105 size_t maxNbLit; 106 107 /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength 108 * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment 109 * the existing value of the litLength or matchLength by 0x10000. 110 */ 111 ZSTD_longLengthType_e longLengthType; 112 U32 longLengthPos; /* Index of the sequence to apply long length modification to */ 113 } SeqStore_t; 114 115 typedef struct { 116 U32 litLength; 117 U32 matchLength; 118 } ZSTD_SequenceLength; 119 120 /* 121 * Returns the ZSTD_SequenceLength for the given sequences. It handles the decoding of long sequences 122 * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. 123 */ 124 MEM_STATIC ZSTD_SequenceLength ZSTD_getSequenceLength(SeqStore_t const* seqStore, SeqDef const* seq) 125 { 126 ZSTD_SequenceLength seqLen; 127 seqLen.litLength = seq->litLength; 128 seqLen.matchLength = seq->mlBase + MINMATCH; 129 if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { 130 if (seqStore->longLengthType == ZSTD_llt_literalLength) { 131 seqLen.litLength += 0x10000; 132 } 133 if (seqStore->longLengthType == ZSTD_llt_matchLength) { 134 seqLen.matchLength += 0x10000; 135 } 136 } 137 return seqLen; 138 } 139 140 const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ 141 int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ 142 143 144 /* ********************************************* 145 * Entropy buffer statistics structs and funcs * 146 ***********************************************/ 147 /* ZSTD_hufCTablesMetadata_t : 148 * Stores Literals Block Type for a super-block in hType, and 149 * huffman tree description in hufDesBuffer. 150 * hufDesSize refers to the size of huffman tree description in bytes. 151 * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ 152 typedef struct { 153 SymbolEncodingType_e hType; 154 BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; 155 size_t hufDesSize; 156 } ZSTD_hufCTablesMetadata_t; 157 158 /* ZSTD_fseCTablesMetadata_t : 159 * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and 160 * fse tables in fseTablesBuffer. 161 * fseTablesSize refers to the size of fse tables in bytes. 162 * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ 163 typedef struct { 164 SymbolEncodingType_e llType; 165 SymbolEncodingType_e ofType; 166 SymbolEncodingType_e mlType; 167 BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; 168 size_t fseTablesSize; 169 size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ 170 } ZSTD_fseCTablesMetadata_t; 171 172 typedef struct { 173 ZSTD_hufCTablesMetadata_t hufMetadata; 174 ZSTD_fseCTablesMetadata_t fseMetadata; 175 } ZSTD_entropyCTablesMetadata_t; 176 177 /* ZSTD_buildBlockEntropyStats() : 178 * Builds entropy for the block. 179 * @return : 0 on success or error code */ 180 size_t ZSTD_buildBlockEntropyStats( 181 const SeqStore_t* seqStorePtr, 182 const ZSTD_entropyCTables_t* prevEntropy, 183 ZSTD_entropyCTables_t* nextEntropy, 184 const ZSTD_CCtx_params* cctxParams, 185 ZSTD_entropyCTablesMetadata_t* entropyMetadata, 186 void* workspace, size_t wkspSize); 187 188 /* ******************************* 189 * Compression internals structs * 190 *********************************/ 191 192 typedef struct { 193 U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */ 194 U32 len; /* Raw length of match */ 195 } ZSTD_match_t; 196 197 typedef struct { 198 U32 offset; /* Offset of sequence */ 199 U32 litLength; /* Length of literals prior to match */ 200 U32 matchLength; /* Raw length of match */ 201 } rawSeq; 202 203 typedef struct { 204 rawSeq* seq; /* The start of the sequences */ 205 size_t pos; /* The index in seq where reading stopped. pos <= size. */ 206 size_t posInSequence; /* The position within the sequence at seq[pos] where reading 207 stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ 208 size_t size; /* The number of sequences. <= capacity. */ 209 size_t capacity; /* The capacity starting from `seq` pointer */ 210 } RawSeqStore_t; 211 212 UNUSED_ATTR static const RawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; 213 214 typedef struct { 215 int price; /* price from beginning of segment to this position */ 216 U32 off; /* offset of previous match */ 217 U32 mlen; /* length of previous match */ 218 U32 litlen; /* nb of literals since previous match */ 219 U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */ 220 } ZSTD_optimal_t; 221 222 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; 223 224 #define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3) 225 typedef struct { 226 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ 227 unsigned* litFreq; /* table of literals statistics, of size 256 */ 228 unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ 229 unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ 230 unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ 231 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */ 232 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */ 233 234 U32 litSum; /* nb of literals */ 235 U32 litLengthSum; /* nb of litLength codes */ 236 U32 matchLengthSum; /* nb of matchLength codes */ 237 U32 offCodeSum; /* nb of offset codes */ 238 U32 litSumBasePrice; /* to compare to log2(litfreq) */ 239 U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ 240 U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ 241 U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ 242 ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ 243 const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ 244 ZSTD_ParamSwitch_e literalCompressionMode; 245 } optState_t; 246 247 typedef struct { 248 ZSTD_entropyCTables_t entropy; 249 U32 rep[ZSTD_REP_NUM]; 250 } ZSTD_compressedBlockState_t; 251 252 typedef struct { 253 BYTE const* nextSrc; /* next block here to continue on current prefix */ 254 BYTE const* base; /* All regular indexes relative to this position */ 255 BYTE const* dictBase; /* extDict indexes relative to this position */ 256 U32 dictLimit; /* below that point, need extDict */ 257 U32 lowLimit; /* below that point, no more valid data */ 258 U32 nbOverflowCorrections; /* Number of times overflow correction has run since 259 * ZSTD_window_init(). Useful for debugging coredumps 260 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY. 261 */ 262 } ZSTD_window_t; 263 264 #define ZSTD_WINDOW_START_INDEX 2 265 266 typedef struct ZSTD_MatchState_t ZSTD_MatchState_t; 267 268 #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ 269 270 struct ZSTD_MatchState_t { 271 ZSTD_window_t window; /* State for window round buffer management */ 272 U32 loadedDictEnd; /* index of end of dictionary, within context's referential. 273 * When loadedDictEnd != 0, a dictionary is in use, and still valid. 274 * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. 275 * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). 276 * When dict referential is copied into active context (i.e. not attached), 277 * loadedDictEnd == dictSize, since referential starts from zero. 278 */ 279 U32 nextToUpdate; /* index from which to continue table update */ 280 U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ 281 282 U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ 283 BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ 284 U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ 285 U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */ 286 U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */ 287 288 U32* hashTable; 289 U32* hashTable3; 290 U32* chainTable; 291 292 int forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ 293 294 int dedicatedDictSearch; /* Indicates whether this matchState is using the 295 * dedicated dictionary search structure. 296 */ 297 optState_t opt; /* optimal parser state */ 298 const ZSTD_MatchState_t* dictMatchState; 299 ZSTD_compressionParameters cParams; 300 const RawSeqStore_t* ldmSeqStore; 301 302 /* Controls prefetching in some dictMatchState matchfinders. 303 * This behavior is controlled from the cctx ms. 304 * This parameter has no effect in the cdict ms. */ 305 int prefetchCDictTables; 306 307 /* When == 0, lazy match finders insert every position. 308 * When != 0, lazy match finders only insert positions they search. 309 * This allows them to skip much faster over incompressible data, 310 * at a small cost to compression ratio. 311 */ 312 int lazySkipping; 313 }; 314 315 typedef struct { 316 ZSTD_compressedBlockState_t* prevCBlock; 317 ZSTD_compressedBlockState_t* nextCBlock; 318 ZSTD_MatchState_t matchState; 319 } ZSTD_blockState_t; 320 321 typedef struct { 322 U32 offset; 323 U32 checksum; 324 } ldmEntry_t; 325 326 typedef struct { 327 BYTE const* split; 328 U32 hash; 329 U32 checksum; 330 ldmEntry_t* bucket; 331 } ldmMatchCandidate_t; 332 333 #define LDM_BATCH_SIZE 64 334 335 typedef struct { 336 ZSTD_window_t window; /* State for the window round buffer management */ 337 ldmEntry_t* hashTable; 338 U32 loadedDictEnd; 339 BYTE* bucketOffsets; /* Next position in bucket to insert entry */ 340 size_t splitIndices[LDM_BATCH_SIZE]; 341 ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; 342 } ldmState_t; 343 344 typedef struct { 345 ZSTD_ParamSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ 346 U32 hashLog; /* Log size of hashTable */ 347 U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ 348 U32 minMatchLength; /* Minimum match length */ 349 U32 hashRateLog; /* Log number of entries to skip */ 350 U32 windowLog; /* Window log for the LDM */ 351 } ldmParams_t; 352 353 typedef struct { 354 int collectSequences; 355 ZSTD_Sequence* seqStart; 356 size_t seqIndex; 357 size_t maxSequences; 358 } SeqCollector; 359 360 struct ZSTD_CCtx_params_s { 361 ZSTD_format_e format; 362 ZSTD_compressionParameters cParams; 363 ZSTD_frameParameters fParams; 364 365 int compressionLevel; 366 int forceWindow; /* force back-references to respect limit of 367 * 1<<wLog, even for dictionary */ 368 size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. 369 * No target when targetCBlockSize == 0. 370 * There is no guarantee on compressed block size */ 371 int srcSizeHint; /* User's best guess of source size. 372 * Hint is not valid when srcSizeHint == 0. 373 * There is no guarantee that hint is close to actual source size */ 374 375 ZSTD_dictAttachPref_e attachDictPref; 376 ZSTD_ParamSwitch_e literalCompressionMode; 377 378 /* Multithreading: used to pass parameters to mtctx */ 379 int nbWorkers; 380 size_t jobSize; 381 int overlapLog; 382 int rsyncable; 383 384 /* Long distance matching parameters */ 385 ldmParams_t ldmParams; 386 387 /* Dedicated dict search algorithm trigger */ 388 int enableDedicatedDictSearch; 389 390 /* Input/output buffer modes */ 391 ZSTD_bufferMode_e inBufferMode; 392 ZSTD_bufferMode_e outBufferMode; 393 394 /* Sequence compression API */ 395 ZSTD_SequenceFormat_e blockDelimiters; 396 int validateSequences; 397 398 /* Block splitting 399 * @postBlockSplitter executes split analysis after sequences are produced, 400 * it's more accurate but consumes more resources. 401 * @preBlockSplitter_level splits before knowing sequences, 402 * it's more approximative but also cheaper. 403 * Valid @preBlockSplitter_level values range from 0 to 6 (included). 404 * 0 means auto, 1 means do not split, 405 * then levels are sorted in increasing cpu budget, from 2 (fastest) to 6 (slowest). 406 * Highest @preBlockSplitter_level combines well with @postBlockSplitter. 407 */ 408 ZSTD_ParamSwitch_e postBlockSplitter; 409 int preBlockSplitter_level; 410 411 /* Adjust the max block size*/ 412 size_t maxBlockSize; 413 414 /* Param for deciding whether to use row-based matchfinder */ 415 ZSTD_ParamSwitch_e useRowMatchFinder; 416 417 /* Always load a dictionary in ext-dict mode (not prefix mode)? */ 418 int deterministicRefPrefix; 419 420 /* Internal use, for createCCtxParams() and freeCCtxParams() only */ 421 ZSTD_customMem customMem; 422 423 /* Controls prefetching in some dictMatchState matchfinders */ 424 ZSTD_ParamSwitch_e prefetchCDictTables; 425 426 /* Controls whether zstd will fall back to an internal matchfinder 427 * if the external matchfinder returns an error code. */ 428 int enableMatchFinderFallback; 429 430 /* Parameters for the external sequence producer API. 431 * Users set these parameters through ZSTD_registerSequenceProducer(). 432 * It is not possible to set these parameters individually through the public API. */ 433 void* extSeqProdState; 434 ZSTD_sequenceProducer_F extSeqProdFunc; 435 436 /* Controls repcode search in external sequence parsing */ 437 ZSTD_ParamSwitch_e searchForExternalRepcodes; 438 }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ 439 440 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) 441 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE) 442 #define TMP_WORKSPACE_SIZE (MAX(ENTROPY_WORKSPACE_SIZE, ZSTD_SLIPBLOCK_WORKSPACESIZE)) 443 444 /* 445 * Indicates whether this compression proceeds directly from user-provided 446 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or 447 * whether the context needs to buffer the input/output (ZSTDb_buffered). 448 */ 449 typedef enum { 450 ZSTDb_not_buffered, 451 ZSTDb_buffered 452 } ZSTD_buffered_policy_e; 453 454 /* 455 * Struct that contains all elements of block splitter that should be allocated 456 * in a wksp. 457 */ 458 #define ZSTD_MAX_NB_BLOCK_SPLITS 196 459 typedef struct { 460 SeqStore_t fullSeqStoreChunk; 461 SeqStore_t firstHalfSeqStore; 462 SeqStore_t secondHalfSeqStore; 463 SeqStore_t currSeqStore; 464 SeqStore_t nextSeqStore; 465 466 U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; 467 ZSTD_entropyCTablesMetadata_t entropyMetadata; 468 } ZSTD_blockSplitCtx; 469 470 struct ZSTD_CCtx_s { 471 ZSTD_compressionStage_e stage; 472 int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ 473 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ 474 ZSTD_CCtx_params requestedParams; 475 ZSTD_CCtx_params appliedParams; 476 ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */ 477 U32 dictID; 478 size_t dictContentSize; 479 480 ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ 481 size_t blockSizeMax; 482 unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ 483 unsigned long long consumedSrcSize; 484 unsigned long long producedCSize; 485 struct xxh64_state xxhState; 486 ZSTD_customMem customMem; 487 ZSTD_threadPool* pool; 488 size_t staticSize; 489 SeqCollector seqCollector; 490 int isFirstBlock; 491 int initialized; 492 493 SeqStore_t seqStore; /* sequences storage ptrs */ 494 ldmState_t ldmState; /* long distance matching state */ 495 rawSeq* ldmSequences; /* Storage for the ldm output sequences */ 496 size_t maxNbLdmSequences; 497 RawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ 498 ZSTD_blockState_t blockState; 499 void* tmpWorkspace; /* used as substitute of stack space - must be aligned for S64 type */ 500 size_t tmpWkspSize; 501 502 /* Whether we are streaming or not */ 503 ZSTD_buffered_policy_e bufferedPolicy; 504 505 /* streaming */ 506 char* inBuff; 507 size_t inBuffSize; 508 size_t inToCompress; 509 size_t inBuffPos; 510 size_t inBuffTarget; 511 char* outBuff; 512 size_t outBuffSize; 513 size_t outBuffContentSize; 514 size_t outBuffFlushedSize; 515 ZSTD_cStreamStage streamStage; 516 U32 frameEnded; 517 518 /* Stable in/out buffer verification */ 519 ZSTD_inBuffer expectedInBuffer; 520 size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */ 521 size_t expectedOutBufferSize; 522 523 /* Dictionary */ 524 ZSTD_localDict localDict; 525 const ZSTD_CDict* cdict; 526 ZSTD_prefixDict prefixDict; /* single-usage dictionary */ 527 528 /* Multi-threading */ 529 530 /* Tracing */ 531 532 /* Workspace for block splitter */ 533 ZSTD_blockSplitCtx blockSplitCtx; 534 535 /* Buffer for output from external sequence producer */ 536 ZSTD_Sequence* extSeqBuf; 537 size_t extSeqBufCapacity; 538 }; 539 540 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; 541 typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e; 542 543 typedef enum { 544 ZSTD_noDict = 0, 545 ZSTD_extDict = 1, 546 ZSTD_dictMatchState = 2, 547 ZSTD_dedicatedDictSearch = 3 548 } ZSTD_dictMode_e; 549 550 typedef enum { 551 ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict. 552 * In this mode we use both the srcSize and the dictSize 553 * when selecting and adjusting parameters. 554 */ 555 ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch. 556 * In this mode we only take the srcSize into account when selecting 557 * and adjusting parameters. 558 */ 559 ZSTD_cpm_createCDict = 2, /* Creating a CDict. 560 * In this mode we take both the source size and the dictionary size 561 * into account when selecting and adjusting the parameters. 562 */ 563 ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. 564 * We don't know what these parameters are for. We default to the legacy 565 * behavior of taking both the source size and the dict size into account 566 * when selecting and adjusting parameters. 567 */ 568 } ZSTD_CParamMode_e; 569 570 typedef size_t (*ZSTD_BlockCompressor_f) ( 571 ZSTD_MatchState_t* bs, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 572 void const* src, size_t srcSize); 573 ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); 574 575 576 MEM_STATIC U32 ZSTD_LLcode(U32 litLength) 577 { 578 static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, 579 8, 9, 10, 11, 12, 13, 14, 15, 580 16, 16, 17, 17, 18, 18, 19, 19, 581 20, 20, 20, 20, 21, 21, 21, 21, 582 22, 22, 22, 22, 22, 22, 22, 22, 583 23, 23, 23, 23, 23, 23, 23, 23, 584 24, 24, 24, 24, 24, 24, 24, 24, 585 24, 24, 24, 24, 24, 24, 24, 24 }; 586 static const U32 LL_deltaCode = 19; 587 return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; 588 } 589 590 /* ZSTD_MLcode() : 591 * note : mlBase = matchLength - MINMATCH; 592 * because it's the format it's stored in seqStore->sequences */ 593 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) 594 { 595 static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 596 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 597 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 598 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 599 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 600 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 601 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 602 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; 603 static const U32 ML_deltaCode = 36; 604 return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; 605 } 606 607 /* ZSTD_cParam_withinBounds: 608 * @return 1 if value is within cParam bounds, 609 * 0 otherwise */ 610 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) 611 { 612 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); 613 if (ZSTD_isError(bounds.error)) return 0; 614 if (value < bounds.lowerBound) return 0; 615 if (value > bounds.upperBound) return 0; 616 return 1; 617 } 618 619 /* ZSTD_selectAddr: 620 * @return index >= lowLimit ? candidate : backup, 621 * tries to force branchless codegen. */ 622 MEM_STATIC const BYTE* 623 ZSTD_selectAddr(U32 index, U32 lowLimit, const BYTE* candidate, const BYTE* backup) 624 { 625 #if defined(__x86_64__) 626 __asm__ ( 627 "cmp %1, %2\n" 628 "cmova %3, %0\n" 629 : "+r"(candidate) 630 : "r"(index), "r"(lowLimit), "r"(backup) 631 ); 632 return candidate; 633 #else 634 return index >= lowLimit ? candidate : backup; 635 #endif 636 } 637 638 /* ZSTD_noCompressBlock() : 639 * Writes uncompressed block to dst buffer from given src. 640 * Returns the size of the block */ 641 MEM_STATIC size_t 642 ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) 643 { 644 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); 645 DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity); 646 RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, 647 dstSize_tooSmall, "dst buf too small for uncompressed block"); 648 MEM_writeLE24(dst, cBlockHeader24); 649 ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); 650 return ZSTD_blockHeaderSize + srcSize; 651 } 652 653 MEM_STATIC size_t 654 ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) 655 { 656 BYTE* const op = (BYTE*)dst; 657 U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); 658 RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); 659 MEM_writeLE24(op, cBlockHeader); 660 op[3] = src; 661 return 4; 662 } 663 664 665 /* ZSTD_minGain() : 666 * minimum compression required 667 * to generate a compress block or a compressed literals section. 668 * note : use same formula for both situations */ 669 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) 670 { 671 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; 672 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); 673 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat)); 674 return (srcSize >> minlog) + 2; 675 } 676 677 MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams) 678 { 679 switch (cctxParams->literalCompressionMode) { 680 case ZSTD_ps_enable: 681 return 0; 682 case ZSTD_ps_disable: 683 return 1; 684 default: 685 assert(0 /* impossible: pre-validated */); 686 ZSTD_FALLTHROUGH; 687 case ZSTD_ps_auto: 688 return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); 689 } 690 } 691 692 /*! ZSTD_safecopyLiterals() : 693 * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. 694 * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single 695 * large copies. 696 */ 697 static void 698 ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) 699 { 700 assert(iend > ilimit_w); 701 if (ip <= ilimit_w) { 702 ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); 703 op += ilimit_w - ip; 704 ip = ilimit_w; 705 } 706 while (ip < iend) *op++ = *ip++; 707 } 708 709 710 #define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1) 711 #define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2) 712 #define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3) 713 #define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */ 714 #define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM) 715 #define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM) 716 #define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM) 717 #define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM) 718 #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */ 719 720 /*! ZSTD_storeSeqOnly() : 721 * Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t. 722 * Literals themselves are not copied, but @litPtr is updated. 723 * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). 724 * @matchLength : must be >= MINMATCH 725 */ 726 HINT_INLINE UNUSED_ATTR void 727 ZSTD_storeSeqOnly(SeqStore_t* seqStorePtr, 728 size_t litLength, 729 U32 offBase, 730 size_t matchLength) 731 { 732 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); 733 734 /* literal Length */ 735 assert(litLength <= ZSTD_BLOCKSIZE_MAX); 736 if (UNLIKELY(litLength>0xFFFF)) { 737 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ 738 seqStorePtr->longLengthType = ZSTD_llt_literalLength; 739 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 740 } 741 seqStorePtr->sequences[0].litLength = (U16)litLength; 742 743 /* match offset */ 744 seqStorePtr->sequences[0].offBase = offBase; 745 746 /* match Length */ 747 assert(matchLength <= ZSTD_BLOCKSIZE_MAX); 748 assert(matchLength >= MINMATCH); 749 { size_t const mlBase = matchLength - MINMATCH; 750 if (UNLIKELY(mlBase>0xFFFF)) { 751 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ 752 seqStorePtr->longLengthType = ZSTD_llt_matchLength; 753 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 754 } 755 seqStorePtr->sequences[0].mlBase = (U16)mlBase; 756 } 757 758 seqStorePtr->sequences++; 759 } 760 761 /*! ZSTD_storeSeq() : 762 * Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t. 763 * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). 764 * @matchLength : must be >= MINMATCH 765 * Allowed to over-read literals up to litLimit. 766 */ 767 HINT_INLINE UNUSED_ATTR void 768 ZSTD_storeSeq(SeqStore_t* seqStorePtr, 769 size_t litLength, const BYTE* literals, const BYTE* litLimit, 770 U32 offBase, 771 size_t matchLength) 772 { 773 BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; 774 BYTE const* const litEnd = literals + litLength; 775 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) 776 static const BYTE* g_start = NULL; 777 if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ 778 { U32 const pos = (U32)((const BYTE*)literals - g_start); 779 DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u", 780 pos, (U32)litLength, (U32)matchLength, (U32)offBase); 781 } 782 #endif 783 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); 784 /* copy Literals */ 785 assert(seqStorePtr->maxNbLit <= 128 KB); 786 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); 787 assert(literals + litLength <= litLimit); 788 if (litEnd <= litLimit_w) { 789 /* Common case we can use wildcopy. 790 * First copy 16 bytes, because literals are likely short. 791 */ 792 ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16); 793 ZSTD_copy16(seqStorePtr->lit, literals); 794 if (litLength > 16) { 795 ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); 796 } 797 } else { 798 ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); 799 } 800 seqStorePtr->lit += litLength; 801 802 ZSTD_storeSeqOnly(seqStorePtr, litLength, offBase, matchLength); 803 } 804 805 /* ZSTD_updateRep() : 806 * updates in-place @rep (array of repeat offsets) 807 * @offBase : sum-type, using numeric representation of ZSTD_storeSeq() 808 */ 809 MEM_STATIC void 810 ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) 811 { 812 if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */ 813 rep[2] = rep[1]; 814 rep[1] = rep[0]; 815 rep[0] = OFFBASE_TO_OFFSET(offBase); 816 } else { /* repcode */ 817 U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; 818 if (repCode > 0) { /* note : if repCode==0, no change */ 819 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; 820 rep[2] = (repCode >= 2) ? rep[1] : rep[2]; 821 rep[1] = rep[0]; 822 rep[0] = currentOffset; 823 } else { /* repCode == 0 */ 824 /* nothing to do */ 825 } 826 } 827 } 828 829 typedef struct repcodes_s { 830 U32 rep[3]; 831 } Repcodes_t; 832 833 MEM_STATIC Repcodes_t 834 ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) 835 { 836 Repcodes_t newReps; 837 ZSTD_memcpy(&newReps, rep, sizeof(newReps)); 838 ZSTD_updateRep(newReps.rep, offBase, ll0); 839 return newReps; 840 } 841 842 843 /*-************************************* 844 * Match length counter 845 ***************************************/ 846 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) 847 { 848 const BYTE* const pStart = pIn; 849 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); 850 851 if (pIn < pInLoopLimit) { 852 { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 853 if (diff) return ZSTD_NbCommonBytes(diff); } 854 pIn+=sizeof(size_t); pMatch+=sizeof(size_t); 855 while (pIn < pInLoopLimit) { 856 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 857 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } 858 pIn += ZSTD_NbCommonBytes(diff); 859 return (size_t)(pIn - pStart); 860 } } 861 if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } 862 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } 863 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; 864 return (size_t)(pIn - pStart); 865 } 866 867 /* ZSTD_count_2segments() : 868 * can count match length with `ip` & `match` in 2 different segments. 869 * convention : on reaching mEnd, match count continue starting from iStart 870 */ 871 MEM_STATIC size_t 872 ZSTD_count_2segments(const BYTE* ip, const BYTE* match, 873 const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart) 874 { 875 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd); 876 size_t const matchLength = ZSTD_count(ip, match, vEnd); 877 if (match + matchLength != mEnd) return matchLength; 878 DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength); 879 DEBUGLOG(7, "distance from match beginning to end dictionary = %i", (int)(mEnd - match)); 880 DEBUGLOG(7, "distance from current pos to end buffer = %i", (int)(iEnd - ip)); 881 DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart); 882 DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd)); 883 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd); 884 } 885 886 887 /*-************************************* 888 * Hashes 889 ***************************************/ 890 static const U32 prime3bytes = 506832829U; 891 static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; } 892 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */ 893 MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); } 894 895 static const U32 prime4bytes = 2654435761U; 896 static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; } 897 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); } 898 static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); } 899 900 static const U64 prime5bytes = 889523592379ULL; 901 static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; } 902 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); } 903 static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); } 904 905 static const U64 prime6bytes = 227718039650203ULL; 906 static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; } 907 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); } 908 static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); } 909 910 static const U64 prime7bytes = 58295818150454627ULL; 911 static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; } 912 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); } 913 static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); } 914 915 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; 916 static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; } 917 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); } 918 static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); } 919 920 921 MEM_STATIC FORCE_INLINE_ATTR 922 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) 923 { 924 /* Although some of these hashes do support hBits up to 64, some do not. 925 * To be on the safe side, always avoid hBits > 32. */ 926 assert(hBits <= 32); 927 928 switch(mls) 929 { 930 default: 931 case 4: return ZSTD_hash4Ptr(p, hBits); 932 case 5: return ZSTD_hash5Ptr(p, hBits); 933 case 6: return ZSTD_hash6Ptr(p, hBits); 934 case 7: return ZSTD_hash7Ptr(p, hBits); 935 case 8: return ZSTD_hash8Ptr(p, hBits); 936 } 937 } 938 939 MEM_STATIC FORCE_INLINE_ATTR 940 size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) { 941 /* Although some of these hashes do support hBits up to 64, some do not. 942 * To be on the safe side, always avoid hBits > 32. */ 943 assert(hBits <= 32); 944 945 switch(mls) 946 { 947 default: 948 case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt); 949 case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt); 950 case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt); 951 case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt); 952 case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt); 953 } 954 } 955 956 957 /* ZSTD_ipow() : 958 * Return base^exponent. 959 */ 960 static U64 ZSTD_ipow(U64 base, U64 exponent) 961 { 962 U64 power = 1; 963 while (exponent) { 964 if (exponent & 1) power *= base; 965 exponent >>= 1; 966 base *= base; 967 } 968 return power; 969 } 970 971 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10 972 973 /* ZSTD_rollingHash_append() : 974 * Add the buffer to the hash value. 975 */ 976 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) 977 { 978 BYTE const* istart = (BYTE const*)buf; 979 size_t pos; 980 for (pos = 0; pos < size; ++pos) { 981 hash *= prime8bytes; 982 hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; 983 } 984 return hash; 985 } 986 987 /* ZSTD_rollingHash_compute() : 988 * Compute the rolling hash value of the buffer. 989 */ 990 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) 991 { 992 return ZSTD_rollingHash_append(0, buf, size); 993 } 994 995 /* ZSTD_rollingHash_primePower() : 996 * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash 997 * over a window of length bytes. 998 */ 999 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) 1000 { 1001 return ZSTD_ipow(prime8bytes, length - 1); 1002 } 1003 1004 /* ZSTD_rollingHash_rotate() : 1005 * Rotate the rolling hash by one byte. 1006 */ 1007 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) 1008 { 1009 hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; 1010 hash *= prime8bytes; 1011 hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; 1012 return hash; 1013 } 1014 1015 /*-************************************* 1016 * Round buffer management 1017 ***************************************/ 1018 /* Max @current value allowed: 1019 * In 32-bit mode: we want to avoid crossing the 2 GB limit, 1020 * reducing risks of side effects in case of signed operations on indexes. 1021 * In 64-bit mode: we want to ensure that adding the maximum job size (512 MB) 1022 * doesn't overflow U32 index capacity (4 GB) */ 1023 #define ZSTD_CURRENT_MAX (MEM_64bits() ? 3500U MB : 2000U MB) 1024 /* Maximum chunk size before overflow correction needs to be called again */ 1025 #define ZSTD_CHUNKSIZE_MAX \ 1026 ( ((U32)-1) /* Maximum ending current index */ \ 1027 - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ 1028 1029 /* 1030 * ZSTD_window_clear(): 1031 * Clears the window containing the history by simply setting it to empty. 1032 */ 1033 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) 1034 { 1035 size_t const endT = (size_t)(window->nextSrc - window->base); 1036 U32 const end = (U32)endT; 1037 1038 window->lowLimit = end; 1039 window->dictLimit = end; 1040 } 1041 1042 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) 1043 { 1044 return window.dictLimit == ZSTD_WINDOW_START_INDEX && 1045 window.lowLimit == ZSTD_WINDOW_START_INDEX && 1046 (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX; 1047 } 1048 1049 /* 1050 * ZSTD_window_hasExtDict(): 1051 * Returns non-zero if the window has a non-empty extDict. 1052 */ 1053 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) 1054 { 1055 return window.lowLimit < window.dictLimit; 1056 } 1057 1058 /* 1059 * ZSTD_matchState_dictMode(): 1060 * Inspects the provided matchState and figures out what dictMode should be 1061 * passed to the compressor. 1062 */ 1063 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_MatchState_t *ms) 1064 { 1065 return ZSTD_window_hasExtDict(ms->window) ? 1066 ZSTD_extDict : 1067 ms->dictMatchState != NULL ? 1068 (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : 1069 ZSTD_noDict; 1070 } 1071 1072 /* Defining this macro to non-zero tells zstd to run the overflow correction 1073 * code much more frequently. This is very inefficient, and should only be 1074 * used for tests and fuzzers. 1075 */ 1076 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1077 # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 1078 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1 1079 # else 1080 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0 1081 # endif 1082 #endif 1083 1084 /* 1085 * ZSTD_window_canOverflowCorrect(): 1086 * Returns non-zero if the indices are large enough for overflow correction 1087 * to work correctly without impacting compression ratio. 1088 */ 1089 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, 1090 U32 cycleLog, 1091 U32 maxDist, 1092 U32 loadedDictEnd, 1093 void const* src) 1094 { 1095 U32 const cycleSize = 1u << cycleLog; 1096 U32 const curr = (U32)((BYTE const*)src - window.base); 1097 U32 const minIndexToOverflowCorrect = cycleSize 1098 + MAX(maxDist, cycleSize) 1099 + ZSTD_WINDOW_START_INDEX; 1100 1101 /* Adjust the min index to backoff the overflow correction frequency, 1102 * so we don't waste too much CPU in overflow correction. If this 1103 * computation overflows we don't really care, we just need to make 1104 * sure it is at least minIndexToOverflowCorrect. 1105 */ 1106 U32 const adjustment = window.nbOverflowCorrections + 1; 1107 U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment, 1108 minIndexToOverflowCorrect); 1109 U32 const indexLargeEnough = curr > adjustedIndex; 1110 1111 /* Only overflow correct early if the dictionary is invalidated already, 1112 * so we don't hurt compression ratio. 1113 */ 1114 U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd; 1115 1116 return indexLargeEnough && dictionaryInvalidated; 1117 } 1118 1119 /* 1120 * ZSTD_window_needOverflowCorrection(): 1121 * Returns non-zero if the indices are getting too large and need overflow 1122 * protection. 1123 */ 1124 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, 1125 U32 cycleLog, 1126 U32 maxDist, 1127 U32 loadedDictEnd, 1128 void const* src, 1129 void const* srcEnd) 1130 { 1131 U32 const curr = (U32)((BYTE const*)srcEnd - window.base); 1132 if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { 1133 if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) { 1134 return 1; 1135 } 1136 } 1137 return curr > ZSTD_CURRENT_MAX; 1138 } 1139 1140 /* 1141 * ZSTD_window_correctOverflow(): 1142 * Reduces the indices to protect from index overflow. 1143 * Returns the correction made to the indices, which must be applied to every 1144 * stored index. 1145 * 1146 * The least significant cycleLog bits of the indices must remain the same, 1147 * which may be 0. Every index up to maxDist in the past must be valid. 1148 */ 1149 MEM_STATIC 1150 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1151 U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, 1152 U32 maxDist, void const* src) 1153 { 1154 /* preemptive overflow correction: 1155 * 1. correction is large enough: 1156 * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog 1157 * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog 1158 * 1159 * current - newCurrent 1160 * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog) 1161 * > (3<<29) - (1<<chainLog) 1162 * > (3<<29) - (1<<30) (NOTE: chainLog <= 30) 1163 * > 1<<29 1164 * 1165 * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: 1166 * After correction, current is less than (1<<chainLog + 1<<windowLog). 1167 * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t. 1168 * In 32-bit mode we are safe, because (chainLog <= 29), so 1169 * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32. 1170 * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32: 1171 * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32. 1172 */ 1173 U32 const cycleSize = 1u << cycleLog; 1174 U32 const cycleMask = cycleSize - 1; 1175 U32 const curr = (U32)((BYTE const*)src - window->base); 1176 U32 const currentCycle = curr & cycleMask; 1177 /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */ 1178 U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX 1179 ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX) 1180 : 0; 1181 U32 const newCurrent = currentCycle 1182 + currentCycleCorrection 1183 + MAX(maxDist, cycleSize); 1184 U32 const correction = curr - newCurrent; 1185 /* maxDist must be a power of two so that: 1186 * (newCurrent & cycleMask) == (curr & cycleMask) 1187 * This is required to not corrupt the chains / binary tree. 1188 */ 1189 assert((maxDist & (maxDist - 1)) == 0); 1190 assert((curr & cycleMask) == (newCurrent & cycleMask)); 1191 assert(curr > newCurrent); 1192 if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { 1193 /* Loose bound, should be around 1<<29 (see above) */ 1194 assert(correction > 1<<28); 1195 } 1196 1197 window->base += correction; 1198 window->dictBase += correction; 1199 if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) { 1200 window->lowLimit = ZSTD_WINDOW_START_INDEX; 1201 } else { 1202 window->lowLimit -= correction; 1203 } 1204 if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) { 1205 window->dictLimit = ZSTD_WINDOW_START_INDEX; 1206 } else { 1207 window->dictLimit -= correction; 1208 } 1209 1210 /* Ensure we can still reference the full window. */ 1211 assert(newCurrent >= maxDist); 1212 assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX); 1213 /* Ensure that lowLimit and dictLimit didn't underflow. */ 1214 assert(window->lowLimit <= newCurrent); 1215 assert(window->dictLimit <= newCurrent); 1216 1217 ++window->nbOverflowCorrections; 1218 1219 DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, 1220 window->lowLimit); 1221 return correction; 1222 } 1223 1224 /* 1225 * ZSTD_window_enforceMaxDist(): 1226 * Updates lowLimit so that: 1227 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd 1228 * 1229 * It ensures index is valid as long as index >= lowLimit. 1230 * This must be called before a block compression call. 1231 * 1232 * loadedDictEnd is only defined if a dictionary is in use for current compression. 1233 * As the name implies, loadedDictEnd represents the index at end of dictionary. 1234 * The value lies within context's referential, it can be directly compared to blockEndIdx. 1235 * 1236 * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. 1237 * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. 1238 * This is because dictionaries are allowed to be referenced fully 1239 * as long as the last byte of the dictionary is in the window. 1240 * Once input has progressed beyond window size, dictionary cannot be referenced anymore. 1241 * 1242 * In normal dict mode, the dictionary lies between lowLimit and dictLimit. 1243 * In dictMatchState mode, lowLimit and dictLimit are the same, 1244 * and the dictionary is below them. 1245 * forceWindow and dictMatchState are therefore incompatible. 1246 */ 1247 MEM_STATIC void 1248 ZSTD_window_enforceMaxDist(ZSTD_window_t* window, 1249 const void* blockEnd, 1250 U32 maxDist, 1251 U32* loadedDictEndPtr, 1252 const ZSTD_MatchState_t** dictMatchStatePtr) 1253 { 1254 U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); 1255 U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; 1256 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", 1257 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); 1258 1259 /* - When there is no dictionary : loadedDictEnd == 0. 1260 In which case, the test (blockEndIdx > maxDist) is merely to avoid 1261 overflowing next operation `newLowLimit = blockEndIdx - maxDist`. 1262 - When there is a standard dictionary : 1263 Index referential is copied from the dictionary, 1264 which means it starts from 0. 1265 In which case, loadedDictEnd == dictSize, 1266 and it makes sense to compare `blockEndIdx > maxDist + dictSize` 1267 since `blockEndIdx` also starts from zero. 1268 - When there is an attached dictionary : 1269 loadedDictEnd is expressed within the referential of the context, 1270 so it can be directly compared against blockEndIdx. 1271 */ 1272 if (blockEndIdx > maxDist + loadedDictEnd) { 1273 U32 const newLowLimit = blockEndIdx - maxDist; 1274 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; 1275 if (window->dictLimit < window->lowLimit) { 1276 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", 1277 (unsigned)window->dictLimit, (unsigned)window->lowLimit); 1278 window->dictLimit = window->lowLimit; 1279 } 1280 /* On reaching window size, dictionaries are invalidated */ 1281 if (loadedDictEndPtr) *loadedDictEndPtr = 0; 1282 if (dictMatchStatePtr) *dictMatchStatePtr = NULL; 1283 } 1284 } 1285 1286 /* Similar to ZSTD_window_enforceMaxDist(), 1287 * but only invalidates dictionary 1288 * when input progresses beyond window size. 1289 * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) 1290 * loadedDictEnd uses same referential as window->base 1291 * maxDist is the window size */ 1292 MEM_STATIC void 1293 ZSTD_checkDictValidity(const ZSTD_window_t* window, 1294 const void* blockEnd, 1295 U32 maxDist, 1296 U32* loadedDictEndPtr, 1297 const ZSTD_MatchState_t** dictMatchStatePtr) 1298 { 1299 assert(loadedDictEndPtr != NULL); 1300 assert(dictMatchStatePtr != NULL); 1301 { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); 1302 U32 const loadedDictEnd = *loadedDictEndPtr; 1303 DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", 1304 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); 1305 assert(blockEndIdx >= loadedDictEnd); 1306 1307 if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) { 1308 /* On reaching window size, dictionaries are invalidated. 1309 * For simplification, if window size is reached anywhere within next block, 1310 * the dictionary is invalidated for the full block. 1311 * 1312 * We also have to invalidate the dictionary if ZSTD_window_update() has detected 1313 * non-contiguous segments, which means that loadedDictEnd != window->dictLimit. 1314 * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use 1315 * dictMatchState, so setting it to NULL is not a problem. 1316 */ 1317 DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); 1318 *loadedDictEndPtr = 0; 1319 *dictMatchStatePtr = NULL; 1320 } else { 1321 if (*loadedDictEndPtr != 0) { 1322 DEBUGLOG(6, "dictionary considered valid for current block"); 1323 } } } 1324 } 1325 1326 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { 1327 ZSTD_memset(window, 0, sizeof(*window)); 1328 window->base = (BYTE const*)" "; 1329 window->dictBase = (BYTE const*)" "; 1330 ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */ 1331 window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */ 1332 window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */ 1333 window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */ 1334 window->nbOverflowCorrections = 0; 1335 } 1336 1337 /* 1338 * ZSTD_window_update(): 1339 * Updates the window by appending [src, src + srcSize) to the window. 1340 * If it is not contiguous, the current prefix becomes the extDict, and we 1341 * forget about the extDict. Handles overlap of the prefix and extDict. 1342 * Returns non-zero if the segment is contiguous. 1343 */ 1344 MEM_STATIC 1345 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1346 U32 ZSTD_window_update(ZSTD_window_t* window, 1347 const void* src, size_t srcSize, 1348 int forceNonContiguous) 1349 { 1350 BYTE const* const ip = (BYTE const*)src; 1351 U32 contiguous = 1; 1352 DEBUGLOG(5, "ZSTD_window_update"); 1353 if (srcSize == 0) 1354 return contiguous; 1355 assert(window->base != NULL); 1356 assert(window->dictBase != NULL); 1357 /* Check if blocks follow each other */ 1358 if (src != window->nextSrc || forceNonContiguous) { 1359 /* not contiguous */ 1360 size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); 1361 DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); 1362 window->lowLimit = window->dictLimit; 1363 assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ 1364 window->dictLimit = (U32)distanceFromBase; 1365 window->dictBase = window->base; 1366 window->base = ip - distanceFromBase; 1367 /* ms->nextToUpdate = window->dictLimit; */ 1368 if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ 1369 contiguous = 0; 1370 } 1371 window->nextSrc = ip + srcSize; 1372 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ 1373 if ( (ip+srcSize > window->dictBase + window->lowLimit) 1374 & (ip < window->dictBase + window->dictLimit)) { 1375 size_t const highInputIdx = (size_t)((ip + srcSize) - window->dictBase); 1376 U32 const lowLimitMax = (highInputIdx > (size_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; 1377 assert(highInputIdx < UINT_MAX); 1378 window->lowLimit = lowLimitMax; 1379 DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); 1380 } 1381 return contiguous; 1382 } 1383 1384 /* 1385 * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. 1386 */ 1387 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) 1388 { 1389 U32 const maxDistance = 1U << windowLog; 1390 U32 const lowestValid = ms->window.lowLimit; 1391 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; 1392 U32 const isDictionary = (ms->loadedDictEnd != 0); 1393 /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary 1394 * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't 1395 * valid for the entire block. So this check is sufficient to find the lowest valid match index. 1396 */ 1397 U32 const matchLowest = isDictionary ? lowestValid : withinWindow; 1398 return matchLowest; 1399 } 1400 1401 /* 1402 * Returns the lowest allowed match index in the prefix. 1403 */ 1404 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) 1405 { 1406 U32 const maxDistance = 1U << windowLog; 1407 U32 const lowestValid = ms->window.dictLimit; 1408 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; 1409 U32 const isDictionary = (ms->loadedDictEnd != 0); 1410 /* When computing the lowest prefix index we need to take the dictionary into account to handle 1411 * the edge case where the dictionary and the source are contiguous in memory. 1412 */ 1413 U32 const matchLowest = isDictionary ? lowestValid : withinWindow; 1414 return matchLowest; 1415 } 1416 1417 /* index_safety_check: 1418 * intentional underflow : ensure repIndex isn't overlapping dict + prefix 1419 * @return 1 if values are not overlapping, 1420 * 0 otherwise */ 1421 MEM_STATIC int ZSTD_index_overlap_check(const U32 prefixLowestIndex, const U32 repIndex) { 1422 return ((U32)((prefixLowestIndex-1) - repIndex) >= 3); 1423 } 1424 1425 1426 /* debug functions */ 1427 #if (DEBUGLEVEL>=2) 1428 1429 MEM_STATIC double ZSTD_fWeight(U32 rawStat) 1430 { 1431 U32 const fp_accuracy = 8; 1432 U32 const fp_multiplier = (1 << fp_accuracy); 1433 U32 const newStat = rawStat + 1; 1434 U32 const hb = ZSTD_highbit32(newStat); 1435 U32 const BWeight = hb * fp_multiplier; 1436 U32 const FWeight = (newStat << fp_accuracy) >> hb; 1437 U32 const weight = BWeight + FWeight; 1438 assert(hb + fp_accuracy < 31); 1439 return (double)weight / fp_multiplier; 1440 } 1441 1442 /* display a table content, 1443 * listing each element, its frequency, and its predicted bit cost */ 1444 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) 1445 { 1446 unsigned u, sum; 1447 for (u=0, sum=0; u<=max; u++) sum += table[u]; 1448 DEBUGLOG(2, "total nb elts: %u", sum); 1449 for (u=0; u<=max; u++) { 1450 DEBUGLOG(2, "%2u: %5u (%.2f)", 1451 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); 1452 } 1453 } 1454 1455 #endif 1456 1457 /* Short Cache */ 1458 1459 /* Normally, zstd matchfinders follow this flow: 1460 * 1. Compute hash at ip 1461 * 2. Load index from hashTable[hash] 1462 * 3. Check if *ip == *(base + index) 1463 * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss. 1464 * 1465 * Short cache is an optimization which allows us to avoid step 3 most of the time 1466 * when the data doesn't actually match. With short cache, the flow becomes: 1467 * 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip. 1468 * 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works. 1469 * 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue. 1470 * 1471 * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to 1472 * dictMatchState matchfinders. 1473 */ 1474 #define ZSTD_SHORT_CACHE_TAG_BITS 8 1475 #define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1) 1476 1477 /* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable. 1478 * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */ 1479 MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) { 1480 size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; 1481 U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK); 1482 assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0); 1483 hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag; 1484 } 1485 1486 /* Helper function for short cache matchfinders. 1487 * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */ 1488 MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) { 1489 U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK; 1490 U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK; 1491 return tag1 == tag2; 1492 } 1493 1494 /* =============================================================== 1495 * Shared internal declarations 1496 * These prototypes may be called from sources not in lib/compress 1497 * =============================================================== */ 1498 1499 /* ZSTD_loadCEntropy() : 1500 * dict : must point at beginning of a valid zstd dictionary. 1501 * return : size of dictionary header (size of magic number + dict ID + entropy tables) 1502 * assumptions : magic number supposed already checked 1503 * and dictSize >= 8 */ 1504 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, 1505 const void* const dict, size_t dictSize); 1506 1507 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); 1508 1509 typedef struct { 1510 U32 idx; /* Index in array of ZSTD_Sequence */ 1511 U32 posInSequence; /* Position within sequence at idx */ 1512 size_t posInSrc; /* Number of bytes given by sequences provided so far */ 1513 } ZSTD_SequencePosition; 1514 1515 /* for benchmark */ 1516 size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx, 1517 const ZSTD_Sequence* const inSeqs, size_t nbSequences, 1518 int const repcodeResolution); 1519 1520 typedef struct { 1521 size_t nbSequences; 1522 size_t blockSize; 1523 size_t litSize; 1524 } BlockSummary; 1525 1526 BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs); 1527 1528 /* ============================================================== 1529 * Private declarations 1530 * These prototypes shall only be called from within lib/compress 1531 * ============================================================== */ 1532 1533 /* ZSTD_getCParamsFromCCtxParams() : 1534 * cParams are built depending on compressionLevel, src size hints, 1535 * LDM and manually set compression parameters. 1536 * Note: srcSizeHint == 0 means 0! 1537 */ 1538 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 1539 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); 1540 1541 /*! ZSTD_initCStream_internal() : 1542 * Private use only. Init streaming operation. 1543 * expects params to be valid. 1544 * must receive dict, or cdict, or none, but not both. 1545 * @return : 0, or an error code */ 1546 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, 1547 const void* dict, size_t dictSize, 1548 const ZSTD_CDict* cdict, 1549 const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); 1550 1551 void ZSTD_resetSeqStore(SeqStore_t* ssPtr); 1552 1553 /*! ZSTD_getCParamsFromCDict() : 1554 * as the name implies */ 1555 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); 1556 1557 /* ZSTD_compressBegin_advanced_internal() : 1558 * Private use only. To be called from zstdmt_compress.c. */ 1559 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, 1560 const void* dict, size_t dictSize, 1561 ZSTD_dictContentType_e dictContentType, 1562 ZSTD_dictTableLoadMethod_e dtlm, 1563 const ZSTD_CDict* cdict, 1564 const ZSTD_CCtx_params* params, 1565 unsigned long long pledgedSrcSize); 1566 1567 /* ZSTD_compress_advanced_internal() : 1568 * Private use only. To be called from zstdmt_compress.c. */ 1569 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, 1570 void* dst, size_t dstCapacity, 1571 const void* src, size_t srcSize, 1572 const void* dict,size_t dictSize, 1573 const ZSTD_CCtx_params* params); 1574 1575 1576 /* ZSTD_writeLastEmptyBlock() : 1577 * output an empty Block with end-of-frame mark to complete a frame 1578 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) 1579 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) 1580 */ 1581 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); 1582 1583 1584 /* ZSTD_referenceExternalSequences() : 1585 * Must be called before starting a compression operation. 1586 * seqs must parse a prefix of the source. 1587 * This cannot be used when long range matching is enabled. 1588 * Zstd will use these sequences, and pass the literals to a secondary block 1589 * compressor. 1590 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory 1591 * access and data corruption. 1592 */ 1593 void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); 1594 1595 /* ZSTD_cycleLog() : 1596 * condition for correct operation : hashLog > 1 */ 1597 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); 1598 1599 /* ZSTD_CCtx_trace() : 1600 * Trace the end of a compression call. 1601 */ 1602 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); 1603 1604 /* Returns 1 if an external sequence producer is registered, otherwise returns 0. */ 1605 MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) { 1606 return params->extSeqProdFunc != NULL; 1607 } 1608 1609 /* =============================================================== 1610 * Deprecated definitions that are still used internally to avoid 1611 * deprecation warnings. These functions are exactly equivalent to 1612 * their public variants, but avoid the deprecation warnings. 1613 * =============================================================== */ 1614 1615 size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); 1616 1617 size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, 1618 void* dst, size_t dstCapacity, 1619 const void* src, size_t srcSize); 1620 1621 size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, 1622 void* dst, size_t dstCapacity, 1623 const void* src, size_t srcSize); 1624 1625 size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 1626 1627 1628 #endif /* ZSTD_COMPRESS_H */ 1629