1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 2 /* 3 * Copyright (c) Meta Platforms, Inc. and affiliates. 4 * All rights reserved. 5 * 6 * This source code is licensed under both the BSD-style license (found in the 7 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 8 * in the COPYING file in the root directory of this source tree). 9 * You may select, at your option, one of the above-listed licenses. 10 */ 11 12 #include "zstd_compress_internal.h" 13 #include "zstd_double_fast.h" 14 15 #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR 16 17 static 18 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 19 void ZSTD_fillDoubleHashTableForCDict(ZSTD_MatchState_t* ms, 20 void const* end, ZSTD_dictTableLoadMethod_e dtlm) 21 { 22 const ZSTD_compressionParameters* const cParams = &ms->cParams; 23 U32* const hashLarge = ms->hashTable; 24 U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; 25 U32 const mls = cParams->minMatch; 26 U32* const hashSmall = ms->chainTable; 27 U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; 28 const BYTE* const base = ms->window.base; 29 const BYTE* ip = base + ms->nextToUpdate; 30 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; 31 const U32 fastHashFillStep = 3; 32 33 /* Always insert every fastHashFillStep position into the hash tables. 34 * Insert the other positions into the large hash table if their entry 35 * is empty. 36 */ 37 for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { 38 U32 const curr = (U32)(ip - base); 39 U32 i; 40 for (i = 0; i < fastHashFillStep; ++i) { 41 size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls); 42 size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8); 43 if (i == 0) { 44 ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i); 45 } 46 if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { 47 ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i); 48 } 49 /* Only load extra positions for ZSTD_dtlm_full */ 50 if (dtlm == ZSTD_dtlm_fast) 51 break; 52 } } 53 } 54 55 static 56 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 57 void ZSTD_fillDoubleHashTableForCCtx(ZSTD_MatchState_t* ms, 58 void const* end, ZSTD_dictTableLoadMethod_e dtlm) 59 { 60 const ZSTD_compressionParameters* const cParams = &ms->cParams; 61 U32* const hashLarge = ms->hashTable; 62 U32 const hBitsL = cParams->hashLog; 63 U32 const mls = cParams->minMatch; 64 U32* const hashSmall = ms->chainTable; 65 U32 const hBitsS = cParams->chainLog; 66 const BYTE* const base = ms->window.base; 67 const BYTE* ip = base + ms->nextToUpdate; 68 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; 69 const U32 fastHashFillStep = 3; 70 71 /* Always insert every fastHashFillStep position into the hash tables. 72 * Insert the other positions into the large hash table if their entry 73 * is empty. 74 */ 75 for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { 76 U32 const curr = (U32)(ip - base); 77 U32 i; 78 for (i = 0; i < fastHashFillStep; ++i) { 79 size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); 80 size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); 81 if (i == 0) 82 hashSmall[smHash] = curr + i; 83 if (i == 0 || hashLarge[lgHash] == 0) 84 hashLarge[lgHash] = curr + i; 85 /* Only load extra positions for ZSTD_dtlm_full */ 86 if (dtlm == ZSTD_dtlm_fast) 87 break; 88 } } 89 } 90 91 void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms, 92 const void* const end, 93 ZSTD_dictTableLoadMethod_e dtlm, 94 ZSTD_tableFillPurpose_e tfp) 95 { 96 if (tfp == ZSTD_tfp_forCDict) { 97 ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm); 98 } else { 99 ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm); 100 } 101 } 102 103 104 FORCE_INLINE_TEMPLATE 105 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 106 size_t ZSTD_compressBlock_doubleFast_noDict_generic( 107 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 108 void const* src, size_t srcSize, U32 const mls /* template */) 109 { 110 ZSTD_compressionParameters const* cParams = &ms->cParams; 111 U32* const hashLong = ms->hashTable; 112 const U32 hBitsL = cParams->hashLog; 113 U32* const hashSmall = ms->chainTable; 114 const U32 hBitsS = cParams->chainLog; 115 const BYTE* const base = ms->window.base; 116 const BYTE* const istart = (const BYTE*)src; 117 const BYTE* anchor = istart; 118 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 119 /* presumes that, if there is a dictionary, it must be using Attach mode */ 120 const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); 121 const BYTE* const prefixLowest = base + prefixLowestIndex; 122 const BYTE* const iend = istart + srcSize; 123 const BYTE* const ilimit = iend - HASH_READ_SIZE; 124 U32 offset_1=rep[0], offset_2=rep[1]; 125 U32 offsetSaved1 = 0, offsetSaved2 = 0; 126 127 size_t mLength; 128 U32 offset; 129 U32 curr; 130 131 /* how many positions to search before increasing step size */ 132 const size_t kStepIncr = 1 << kSearchStrength; 133 /* the position at which to increment the step size if no match is found */ 134 const BYTE* nextStep; 135 size_t step; /* the current step size */ 136 137 size_t hl0; /* the long hash at ip */ 138 size_t hl1; /* the long hash at ip1 */ 139 140 U32 idxl0; /* the long match index for ip */ 141 U32 idxl1; /* the long match index for ip1 */ 142 143 const BYTE* matchl0; /* the long match for ip */ 144 const BYTE* matchs0; /* the short match for ip */ 145 const BYTE* matchl1; /* the long match for ip1 */ 146 const BYTE* matchs0_safe; /* matchs0 or safe address */ 147 148 const BYTE* ip = istart; /* the current position */ 149 const BYTE* ip1; /* the next position */ 150 /* Array of ~random data, should have low probability of matching data 151 * we load from here instead of from tables, if matchl0/matchl1 are 152 * invalid indices. Used to avoid unpredictable branches. */ 153 const BYTE dummy[] = {0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0xe2,0xb4}; 154 155 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); 156 157 /* init */ 158 ip += ((ip - prefixLowest) == 0); 159 { 160 U32 const current = (U32)(ip - base); 161 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); 162 U32 const maxRep = current - windowLow; 163 if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; 164 if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; 165 } 166 167 /* Outer Loop: one iteration per match found and stored */ 168 while (1) { 169 step = 1; 170 nextStep = ip + kStepIncr; 171 ip1 = ip + step; 172 173 if (ip1 > ilimit) { 174 goto _cleanup; 175 } 176 177 hl0 = ZSTD_hashPtr(ip, hBitsL, 8); 178 idxl0 = hashLong[hl0]; 179 matchl0 = base + idxl0; 180 181 /* Inner Loop: one iteration per search / position */ 182 do { 183 const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); 184 const U32 idxs0 = hashSmall[hs0]; 185 curr = (U32)(ip-base); 186 matchs0 = base + idxs0; 187 188 hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ 189 190 /* check noDict repcode */ 191 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { 192 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; 193 ip++; 194 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); 195 goto _match_stored; 196 } 197 198 hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); 199 200 /* idxl0 > prefixLowestIndex is a (somewhat) unpredictable branch. 201 * However expression below complies into conditional move. Since 202 * match is unlikely and we only *branch* on idxl0 > prefixLowestIndex 203 * if there is a match, all branches become predictable. */ 204 { const BYTE* const matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]); 205 206 /* check prefix long match */ 207 if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) { 208 mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; 209 offset = (U32)(ip-matchl0); 210 while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ 211 goto _match_found; 212 } } 213 214 idxl1 = hashLong[hl1]; 215 matchl1 = base + idxl1; 216 217 /* Same optimization as matchl0 above */ 218 matchs0_safe = ZSTD_selectAddr(idxs0, prefixLowestIndex, matchs0, &dummy[0]); 219 220 /* check prefix short match */ 221 if(MEM_read32(matchs0_safe) == MEM_read32(ip) && matchs0_safe == matchs0) { 222 goto _search_next_long; 223 } 224 225 if (ip1 >= nextStep) { 226 PREFETCH_L1(ip1 + 64); 227 PREFETCH_L1(ip1 + 128); 228 step++; 229 nextStep += kStepIncr; 230 } 231 ip = ip1; 232 ip1 += step; 233 234 hl0 = hl1; 235 idxl0 = idxl1; 236 matchl0 = matchl1; 237 #if defined(__aarch64__) 238 PREFETCH_L1(ip+256); 239 #endif 240 } while (ip1 <= ilimit); 241 242 _cleanup: 243 /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), 244 * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ 245 offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; 246 247 /* save reps for next block */ 248 rep[0] = offset_1 ? offset_1 : offsetSaved1; 249 rep[1] = offset_2 ? offset_2 : offsetSaved2; 250 251 /* Return the last literals size */ 252 return (size_t)(iend - anchor); 253 254 _search_next_long: 255 256 /* short match found: let's check for a longer one */ 257 mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; 258 offset = (U32)(ip - matchs0); 259 260 /* check long match at +1 position */ 261 if ((idxl1 > prefixLowestIndex) && (MEM_read64(matchl1) == MEM_read64(ip1))) { 262 size_t const l1len = ZSTD_count(ip1+8, matchl1+8, iend) + 8; 263 if (l1len > mLength) { 264 /* use the long match instead */ 265 ip = ip1; 266 mLength = l1len; 267 offset = (U32)(ip-matchl1); 268 matchs0 = matchl1; 269 } 270 } 271 272 while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* complete backward */ 273 274 /* fall-through */ 275 276 _match_found: /* requires ip, offset, mLength */ 277 offset_2 = offset_1; 278 offset_1 = offset; 279 280 if (step < 4) { 281 /* It is unsafe to write this value back to the hashtable when ip1 is 282 * greater than or equal to the new ip we will have after we're done 283 * processing this match. Rather than perform that test directly 284 * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler 285 * more predictable test. The minmatch even if we take a short match is 286 * 4 bytes, so as long as step, the distance between ip and ip1 287 * (initially) is less than 4, we know ip1 < new ip. */ 288 hashLong[hl1] = (U32)(ip1 - base); 289 } 290 291 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 292 293 _match_stored: 294 /* match found */ 295 ip += mLength; 296 anchor = ip; 297 298 if (ip <= ilimit) { 299 /* Complementary insertion */ 300 /* done after iLimit test, as candidates could be > iend-8 */ 301 { U32 const indexToInsert = curr+2; 302 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; 303 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); 304 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; 305 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); 306 } 307 308 /* check immediate repcode */ 309 while ( (ip <= ilimit) 310 && ( (offset_2>0) 311 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { 312 /* store sequence */ 313 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; 314 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ 315 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); 316 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); 317 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength); 318 ip += rLength; 319 anchor = ip; 320 continue; /* faster when present ... (?) */ 321 } 322 } 323 } 324 } 325 326 327 FORCE_INLINE_TEMPLATE 328 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 329 size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( 330 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 331 void const* src, size_t srcSize, 332 U32 const mls /* template */) 333 { 334 ZSTD_compressionParameters const* cParams = &ms->cParams; 335 U32* const hashLong = ms->hashTable; 336 const U32 hBitsL = cParams->hashLog; 337 U32* const hashSmall = ms->chainTable; 338 const U32 hBitsS = cParams->chainLog; 339 const BYTE* const base = ms->window.base; 340 const BYTE* const istart = (const BYTE*)src; 341 const BYTE* ip = istart; 342 const BYTE* anchor = istart; 343 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 344 /* presumes that, if there is a dictionary, it must be using Attach mode */ 345 const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); 346 const BYTE* const prefixLowest = base + prefixLowestIndex; 347 const BYTE* const iend = istart + srcSize; 348 const BYTE* const ilimit = iend - HASH_READ_SIZE; 349 U32 offset_1=rep[0], offset_2=rep[1]; 350 351 const ZSTD_MatchState_t* const dms = ms->dictMatchState; 352 const ZSTD_compressionParameters* const dictCParams = &dms->cParams; 353 const U32* const dictHashLong = dms->hashTable; 354 const U32* const dictHashSmall = dms->chainTable; 355 const U32 dictStartIndex = dms->window.dictLimit; 356 const BYTE* const dictBase = dms->window.base; 357 const BYTE* const dictStart = dictBase + dictStartIndex; 358 const BYTE* const dictEnd = dms->window.nextSrc; 359 const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); 360 const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; 361 const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; 362 const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); 363 364 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); 365 366 /* if a dictionary is attached, it must be within window range */ 367 assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); 368 369 if (ms->prefetchCDictTables) { 370 size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); 371 size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32); 372 PREFETCH_AREA(dictHashLong, hashTableBytes); 373 PREFETCH_AREA(dictHashSmall, chainTableBytes); 374 } 375 376 /* init */ 377 ip += (dictAndPrefixLength == 0); 378 379 /* dictMatchState repCode checks don't currently handle repCode == 0 380 * disabling. */ 381 assert(offset_1 <= dictAndPrefixLength); 382 assert(offset_2 <= dictAndPrefixLength); 383 384 /* Main Search Loop */ 385 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ 386 size_t mLength; 387 U32 offset; 388 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); 389 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); 390 size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8); 391 size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls); 392 U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS]; 393 U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS]; 394 int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL); 395 int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS); 396 U32 const curr = (U32)(ip-base); 397 U32 const matchIndexL = hashLong[h2]; 398 U32 matchIndexS = hashSmall[h]; 399 const BYTE* matchLong = base + matchIndexL; 400 const BYTE* match = base + matchIndexS; 401 const U32 repIndex = curr + 1 - offset_1; 402 const BYTE* repMatch = (repIndex < prefixLowestIndex) ? 403 dictBase + (repIndex - dictIndexDelta) : 404 base + repIndex; 405 hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ 406 407 /* check repcode */ 408 if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) 409 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 410 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 411 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 412 ip++; 413 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); 414 goto _match_stored; 415 } 416 417 if ((matchIndexL >= prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { 418 /* check prefix long match */ 419 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; 420 offset = (U32)(ip-matchLong); 421 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ 422 goto _match_found; 423 } else if (dictTagsMatchL) { 424 /* check dictMatchState long match */ 425 U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS; 426 const BYTE* dictMatchL = dictBase + dictMatchIndexL; 427 assert(dictMatchL < dictEnd); 428 429 if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { 430 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; 431 offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); 432 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ 433 goto _match_found; 434 } } 435 436 if (matchIndexS > prefixLowestIndex) { 437 /* short match candidate */ 438 if (MEM_read32(match) == MEM_read32(ip)) { 439 goto _search_next_long; 440 } 441 } else if (dictTagsMatchS) { 442 /* check dictMatchState short match */ 443 U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS; 444 match = dictBase + dictMatchIndexS; 445 matchIndexS = dictMatchIndexS + dictIndexDelta; 446 447 if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { 448 goto _search_next_long; 449 } } 450 451 ip += ((ip-anchor) >> kSearchStrength) + 1; 452 #if defined(__aarch64__) 453 PREFETCH_L1(ip+256); 454 #endif 455 continue; 456 457 _search_next_long: 458 { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); 459 size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8); 460 U32 const matchIndexL3 = hashLong[hl3]; 461 U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS]; 462 int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3); 463 const BYTE* matchL3 = base + matchIndexL3; 464 hashLong[hl3] = curr + 1; 465 466 /* check prefix long +1 match */ 467 if ((matchIndexL3 >= prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1))) { 468 mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; 469 ip++; 470 offset = (U32)(ip-matchL3); 471 while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ 472 goto _match_found; 473 } else if (dictTagsMatchL3) { 474 /* check dict long +1 match */ 475 U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS; 476 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; 477 assert(dictMatchL3 < dictEnd); 478 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { 479 mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; 480 ip++; 481 offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); 482 while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ 483 goto _match_found; 484 } } } 485 486 /* if no long +1 match, explore the short match we found */ 487 if (matchIndexS < prefixLowestIndex) { 488 mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; 489 offset = (U32)(curr - matchIndexS); 490 while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 491 } else { 492 mLength = ZSTD_count(ip+4, match+4, iend) + 4; 493 offset = (U32)(ip - match); 494 while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 495 } 496 497 _match_found: 498 offset_2 = offset_1; 499 offset_1 = offset; 500 501 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 502 503 _match_stored: 504 /* match found */ 505 ip += mLength; 506 anchor = ip; 507 508 if (ip <= ilimit) { 509 /* Complementary insertion */ 510 /* done after iLimit test, as candidates could be > iend-8 */ 511 { U32 const indexToInsert = curr+2; 512 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; 513 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); 514 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; 515 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); 516 } 517 518 /* check immediate repcode */ 519 while (ip <= ilimit) { 520 U32 const current2 = (U32)(ip-base); 521 U32 const repIndex2 = current2 - offset_2; 522 const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? 523 dictBase + repIndex2 - dictIndexDelta : 524 base + repIndex2; 525 if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex2)) 526 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 527 const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; 528 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; 529 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 530 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); 531 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; 532 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; 533 ip += repLength2; 534 anchor = ip; 535 continue; 536 } 537 break; 538 } 539 } 540 } /* while (ip < ilimit) */ 541 542 /* save reps for next block */ 543 rep[0] = offset_1; 544 rep[1] = offset_2; 545 546 /* Return the last literals size */ 547 return (size_t)(iend - anchor); 548 } 549 550 #define ZSTD_GEN_DFAST_FN(dictMode, mls) \ 551 static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ 552 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ 553 void const* src, size_t srcSize) \ 554 { \ 555 return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ 556 } 557 558 ZSTD_GEN_DFAST_FN(noDict, 4) 559 ZSTD_GEN_DFAST_FN(noDict, 5) 560 ZSTD_GEN_DFAST_FN(noDict, 6) 561 ZSTD_GEN_DFAST_FN(noDict, 7) 562 563 ZSTD_GEN_DFAST_FN(dictMatchState, 4) 564 ZSTD_GEN_DFAST_FN(dictMatchState, 5) 565 ZSTD_GEN_DFAST_FN(dictMatchState, 6) 566 ZSTD_GEN_DFAST_FN(dictMatchState, 7) 567 568 569 size_t ZSTD_compressBlock_doubleFast( 570 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 571 void const* src, size_t srcSize) 572 { 573 const U32 mls = ms->cParams.minMatch; 574 switch(mls) 575 { 576 default: /* includes case 3 */ 577 case 4 : 578 return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize); 579 case 5 : 580 return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize); 581 case 6 : 582 return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize); 583 case 7 : 584 return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize); 585 } 586 } 587 588 589 size_t ZSTD_compressBlock_doubleFast_dictMatchState( 590 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 591 void const* src, size_t srcSize) 592 { 593 const U32 mls = ms->cParams.minMatch; 594 switch(mls) 595 { 596 default: /* includes case 3 */ 597 case 4 : 598 return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize); 599 case 5 : 600 return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize); 601 case 6 : 602 return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize); 603 case 7 : 604 return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize); 605 } 606 } 607 608 609 static 610 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 611 size_t ZSTD_compressBlock_doubleFast_extDict_generic( 612 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 613 void const* src, size_t srcSize, 614 U32 const mls /* template */) 615 { 616 ZSTD_compressionParameters const* cParams = &ms->cParams; 617 U32* const hashLong = ms->hashTable; 618 U32 const hBitsL = cParams->hashLog; 619 U32* const hashSmall = ms->chainTable; 620 U32 const hBitsS = cParams->chainLog; 621 const BYTE* const istart = (const BYTE*)src; 622 const BYTE* ip = istart; 623 const BYTE* anchor = istart; 624 const BYTE* const iend = istart + srcSize; 625 const BYTE* const ilimit = iend - 8; 626 const BYTE* const base = ms->window.base; 627 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 628 const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); 629 const U32 dictStartIndex = lowLimit; 630 const U32 dictLimit = ms->window.dictLimit; 631 const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; 632 const BYTE* const prefixStart = base + prefixStartIndex; 633 const BYTE* const dictBase = ms->window.dictBase; 634 const BYTE* const dictStart = dictBase + dictStartIndex; 635 const BYTE* const dictEnd = dictBase + prefixStartIndex; 636 U32 offset_1=rep[0], offset_2=rep[1]; 637 638 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); 639 640 /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ 641 if (prefixStartIndex == dictStartIndex) 642 return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize); 643 644 /* Search Loop */ 645 while (ip < ilimit) { /* < instead of <=, because (ip+1) */ 646 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); 647 const U32 matchIndex = hashSmall[hSmall]; 648 const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; 649 const BYTE* match = matchBase + matchIndex; 650 651 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); 652 const U32 matchLongIndex = hashLong[hLong]; 653 const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; 654 const BYTE* matchLong = matchLongBase + matchLongIndex; 655 656 const U32 curr = (U32)(ip-base); 657 const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ 658 const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; 659 const BYTE* const repMatch = repBase + repIndex; 660 size_t mLength; 661 hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ 662 663 if (((ZSTD_index_overlap_check(prefixStartIndex, repIndex)) 664 & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ 665 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 666 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 667 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; 668 ip++; 669 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); 670 } else { 671 if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { 672 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; 673 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; 674 U32 offset; 675 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; 676 offset = curr - matchLongIndex; 677 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ 678 offset_2 = offset_1; 679 offset_1 = offset; 680 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 681 682 } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { 683 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); 684 U32 const matchIndex3 = hashLong[h3]; 685 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; 686 const BYTE* match3 = match3Base + matchIndex3; 687 U32 offset; 688 hashLong[h3] = curr + 1; 689 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { 690 const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; 691 const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; 692 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; 693 ip++; 694 offset = curr+1 - matchIndex3; 695 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ 696 } else { 697 const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; 698 const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; 699 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; 700 offset = curr - matchIndex; 701 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 702 } 703 offset_2 = offset_1; 704 offset_1 = offset; 705 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 706 707 } else { 708 ip += ((ip-anchor) >> kSearchStrength) + 1; 709 continue; 710 } } 711 712 /* move to next sequence start */ 713 ip += mLength; 714 anchor = ip; 715 716 if (ip <= ilimit) { 717 /* Complementary insertion */ 718 /* done after iLimit test, as candidates could be > iend-8 */ 719 { U32 const indexToInsert = curr+2; 720 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; 721 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); 722 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; 723 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); 724 } 725 726 /* check immediate repcode */ 727 while (ip <= ilimit) { 728 U32 const current2 = (U32)(ip-base); 729 U32 const repIndex2 = current2 - offset_2; 730 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; 731 if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) 732 & (offset_2 <= current2 - dictStartIndex)) 733 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 734 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 735 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 736 U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 737 ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); 738 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; 739 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; 740 ip += repLength2; 741 anchor = ip; 742 continue; 743 } 744 break; 745 } } } 746 747 /* save reps for next block */ 748 rep[0] = offset_1; 749 rep[1] = offset_2; 750 751 /* Return the last literals size */ 752 return (size_t)(iend - anchor); 753 } 754 755 ZSTD_GEN_DFAST_FN(extDict, 4) 756 ZSTD_GEN_DFAST_FN(extDict, 5) 757 ZSTD_GEN_DFAST_FN(extDict, 6) 758 ZSTD_GEN_DFAST_FN(extDict, 7) 759 760 size_t ZSTD_compressBlock_doubleFast_extDict( 761 ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 762 void const* src, size_t srcSize) 763 { 764 U32 const mls = ms->cParams.minMatch; 765 switch(mls) 766 { 767 default: /* includes case 3 */ 768 case 4 : 769 return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize); 770 case 5 : 771 return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize); 772 case 6 : 773 return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize); 774 case 7 : 775 return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); 776 } 777 } 778 779 #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ 780