xref: /src/sys/contrib/zstd/lib/compress/zstd_preSplit.c (revision c0d9a07101a1e72769ee0619a583f63a078fb391)
17e509d50SXin LI /*
27e509d50SXin LI  * Copyright (c) Meta Platforms, Inc. and affiliates.
37e509d50SXin LI  * All rights reserved.
47e509d50SXin LI  *
57e509d50SXin LI  * This source code is licensed under both the BSD-style license (found in the
67e509d50SXin LI  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
77e509d50SXin LI  * in the COPYING file in the root directory of this source tree).
87e509d50SXin LI  * You may select, at your option, one of the above-listed licenses.
97e509d50SXin LI  */
107e509d50SXin LI 
117e509d50SXin LI #include "../common/compiler.h" /* ZSTD_ALIGNOF */
127e509d50SXin LI #include "../common/mem.h" /* S64 */
137e509d50SXin LI #include "../common/zstd_deps.h" /* ZSTD_memset */
147e509d50SXin LI #include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */
157e509d50SXin LI #include "hist.h" /* HIST_add */
167e509d50SXin LI #include "zstd_preSplit.h"
177e509d50SXin LI 
187e509d50SXin LI 
197e509d50SXin LI #define BLOCKSIZE_MIN 3500
207e509d50SXin LI #define THRESHOLD_PENALTY_RATE 16
217e509d50SXin LI #define THRESHOLD_BASE (THRESHOLD_PENALTY_RATE - 2)
227e509d50SXin LI #define THRESHOLD_PENALTY 3
237e509d50SXin LI 
247e509d50SXin LI #define HASHLENGTH 2
257e509d50SXin LI #define HASHLOG_MAX 10
267e509d50SXin LI #define HASHTABLESIZE (1 << HASHLOG_MAX)
277e509d50SXin LI #define HASHMASK (HASHTABLESIZE - 1)
287e509d50SXin LI #define KNUTH 0x9e3779b9
297e509d50SXin LI 
307e509d50SXin LI /* for hashLog > 8, hash 2 bytes.
317e509d50SXin LI  * for hashLog == 8, just take the byte, no hashing.
327e509d50SXin LI  * The speed of this method relies on compile-time constant propagation */
hash2(const void * p,unsigned hashLog)337e509d50SXin LI FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog)
347e509d50SXin LI {
357e509d50SXin LI     assert(hashLog >= 8);
367e509d50SXin LI     if (hashLog == 8) return (U32)((const BYTE*)p)[0];
377e509d50SXin LI     assert(hashLog <= HASHLOG_MAX);
387e509d50SXin LI     return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog);
397e509d50SXin LI }
407e509d50SXin LI 
417e509d50SXin LI 
427e509d50SXin LI typedef struct {
437e509d50SXin LI   unsigned events[HASHTABLESIZE];
447e509d50SXin LI   size_t nbEvents;
457e509d50SXin LI } Fingerprint;
467e509d50SXin LI typedef struct {
477e509d50SXin LI     Fingerprint pastEvents;
487e509d50SXin LI     Fingerprint newEvents;
497e509d50SXin LI } FPStats;
507e509d50SXin LI 
initStats(FPStats * fpstats)517e509d50SXin LI static void initStats(FPStats* fpstats)
527e509d50SXin LI {
537e509d50SXin LI     ZSTD_memset(fpstats, 0, sizeof(FPStats));
547e509d50SXin LI }
557e509d50SXin LI 
567e509d50SXin LI FORCE_INLINE_TEMPLATE void
addEvents_generic(Fingerprint * fp,const void * src,size_t srcSize,size_t samplingRate,unsigned hashLog)577e509d50SXin LI addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
587e509d50SXin LI {
597e509d50SXin LI     const char* p = (const char*)src;
607e509d50SXin LI     size_t limit = srcSize - HASHLENGTH + 1;
617e509d50SXin LI     size_t n;
627e509d50SXin LI     assert(srcSize >= HASHLENGTH);
637e509d50SXin LI     for (n = 0; n < limit; n+=samplingRate) {
647e509d50SXin LI         fp->events[hash2(p+n, hashLog)]++;
657e509d50SXin LI     }
667e509d50SXin LI     fp->nbEvents += limit/samplingRate;
677e509d50SXin LI }
687e509d50SXin LI 
697e509d50SXin LI FORCE_INLINE_TEMPLATE void
recordFingerprint_generic(Fingerprint * fp,const void * src,size_t srcSize,size_t samplingRate,unsigned hashLog)707e509d50SXin LI recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
717e509d50SXin LI {
727e509d50SXin LI     ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog));
737e509d50SXin LI     fp->nbEvents = 0;
747e509d50SXin LI     addEvents_generic(fp, src, srcSize, samplingRate, hashLog);
757e509d50SXin LI }
767e509d50SXin LI 
777e509d50SXin LI typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);
787e509d50SXin LI 
797e509d50SXin LI #define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate
807e509d50SXin LI 
817e509d50SXin LI #define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize)                                 \
827e509d50SXin LI     static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
837e509d50SXin LI     {                                                                              \
847e509d50SXin LI         recordFingerprint_generic(fp, src, srcSize, _rate, _hSize);                \
857e509d50SXin LI     }
867e509d50SXin LI 
877e509d50SXin LI ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
887e509d50SXin LI ZSTD_GEN_RECORD_FINGERPRINT(5, 10)
897e509d50SXin LI ZSTD_GEN_RECORD_FINGERPRINT(11, 9)
907e509d50SXin LI ZSTD_GEN_RECORD_FINGERPRINT(43, 8)
917e509d50SXin LI 
927e509d50SXin LI 
abs64(S64 s64)937e509d50SXin LI static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); }
947e509d50SXin LI 
fpDistance(const Fingerprint * fp1,const Fingerprint * fp2,unsigned hashLog)957e509d50SXin LI static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog)
967e509d50SXin LI {
977e509d50SXin LI     U64 distance = 0;
987e509d50SXin LI     size_t n;
997e509d50SXin LI     assert(hashLog <= HASHLOG_MAX);
1007e509d50SXin LI     for (n = 0; n < ((size_t)1 << hashLog); n++) {
1017e509d50SXin LI         distance +=
1027e509d50SXin LI             abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents);
1037e509d50SXin LI     }
1047e509d50SXin LI     return distance;
1057e509d50SXin LI }
1067e509d50SXin LI 
1077e509d50SXin LI /* Compare newEvents with pastEvents
1087e509d50SXin LI  * return 1 when considered "too different"
1097e509d50SXin LI  */
compareFingerprints(const Fingerprint * ref,const Fingerprint * newfp,int penalty,unsigned hashLog)1107e509d50SXin LI static int compareFingerprints(const Fingerprint* ref,
1117e509d50SXin LI                             const Fingerprint* newfp,
1127e509d50SXin LI                             int penalty,
1137e509d50SXin LI                             unsigned hashLog)
1147e509d50SXin LI {
1157e509d50SXin LI     assert(ref->nbEvents > 0);
1167e509d50SXin LI     assert(newfp->nbEvents > 0);
1177e509d50SXin LI     {   U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents;
1187e509d50SXin LI         U64 deviation = fpDistance(ref, newfp, hashLog);
1197e509d50SXin LI         U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE;
1207e509d50SXin LI         return deviation >= threshold;
1217e509d50SXin LI     }
1227e509d50SXin LI }
1237e509d50SXin LI 
mergeEvents(Fingerprint * acc,const Fingerprint * newfp)1247e509d50SXin LI static void mergeEvents(Fingerprint* acc, const Fingerprint* newfp)
1257e509d50SXin LI {
1267e509d50SXin LI     size_t n;
1277e509d50SXin LI     for (n = 0; n < HASHTABLESIZE; n++) {
1287e509d50SXin LI         acc->events[n] += newfp->events[n];
1297e509d50SXin LI     }
1307e509d50SXin LI     acc->nbEvents += newfp->nbEvents;
1317e509d50SXin LI }
1327e509d50SXin LI 
flushEvents(FPStats * fpstats)1337e509d50SXin LI static void flushEvents(FPStats* fpstats)
1347e509d50SXin LI {
1357e509d50SXin LI     size_t n;
1367e509d50SXin LI     for (n = 0; n < HASHTABLESIZE; n++) {
1377e509d50SXin LI         fpstats->pastEvents.events[n] = fpstats->newEvents.events[n];
1387e509d50SXin LI     }
1397e509d50SXin LI     fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents;
1407e509d50SXin LI     ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents));
1417e509d50SXin LI }
1427e509d50SXin LI 
removeEvents(Fingerprint * acc,const Fingerprint * slice)1437e509d50SXin LI static void removeEvents(Fingerprint* acc, const Fingerprint* slice)
1447e509d50SXin LI {
1457e509d50SXin LI     size_t n;
1467e509d50SXin LI     for (n = 0; n < HASHTABLESIZE; n++) {
1477e509d50SXin LI         assert(acc->events[n] >= slice->events[n]);
1487e509d50SXin LI         acc->events[n] -= slice->events[n];
1497e509d50SXin LI     }
1507e509d50SXin LI     acc->nbEvents -= slice->nbEvents;
1517e509d50SXin LI }
1527e509d50SXin LI 
1537e509d50SXin LI #define CHUNKSIZE (8 << 10)
ZSTD_splitBlock_byChunks(const void * blockStart,size_t blockSize,int level,void * workspace,size_t wkspSize)1547e509d50SXin LI static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
1557e509d50SXin LI                         int level,
1567e509d50SXin LI                         void* workspace, size_t wkspSize)
1577e509d50SXin LI {
1587e509d50SXin LI     static const RecordEvents_f records_fs[] = {
1597e509d50SXin LI         FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)
1607e509d50SXin LI     };
1617e509d50SXin LI     static const unsigned hashParams[] = { 8, 9, 10, 10 };
1627e509d50SXin LI     const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]);
1637e509d50SXin LI     FPStats* const fpstats = (FPStats*)workspace;
1647e509d50SXin LI     const char* p = (const char*)blockStart;
1657e509d50SXin LI     int penalty = THRESHOLD_PENALTY;
1667e509d50SXin LI     size_t pos = 0;
1677e509d50SXin LI     assert(blockSize == (128 << 10));
1687e509d50SXin LI     assert(workspace != NULL);
1697e509d50SXin LI     assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
1707e509d50SXin LI     ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
1717e509d50SXin LI     assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
1727e509d50SXin LI 
1737e509d50SXin LI     initStats(fpstats);
1747e509d50SXin LI     record_f(&fpstats->pastEvents, p, CHUNKSIZE);
1757e509d50SXin LI     for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) {
1767e509d50SXin LI         record_f(&fpstats->newEvents, p + pos, CHUNKSIZE);
1777e509d50SXin LI         if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) {
1787e509d50SXin LI             return pos;
1797e509d50SXin LI         } else {
1807e509d50SXin LI             mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);
1817e509d50SXin LI             if (penalty > 0) penalty--;
1827e509d50SXin LI         }
1837e509d50SXin LI     }
1847e509d50SXin LI     assert(pos == blockSize);
1857e509d50SXin LI     return blockSize;
1867e509d50SXin LI     (void)flushEvents; (void)removeEvents;
1877e509d50SXin LI }
1887e509d50SXin LI 
1897e509d50SXin LI /* ZSTD_splitBlock_fromBorders(): very fast strategy :
1907e509d50SXin LI  * compare fingerprint from beginning and end of the block,
1917e509d50SXin LI  * derive from their difference if it's preferable to split in the middle,
1927e509d50SXin LI  * repeat the process a second time, for finer grained decision.
1937e509d50SXin LI  * 3 times did not brought improvements, so I stopped at 2.
1947e509d50SXin LI  * Benefits are good enough for a cheap heuristic.
1957e509d50SXin LI  * More accurate splitting saves more, but speed impact is also more perceptible.
1967e509d50SXin LI  * For better accuracy, use more elaborate variant *_byChunks.
1977e509d50SXin LI  */
ZSTD_splitBlock_fromBorders(const void * blockStart,size_t blockSize,void * workspace,size_t wkspSize)1987e509d50SXin LI static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize,
1997e509d50SXin LI                         void* workspace, size_t wkspSize)
2007e509d50SXin LI {
2017e509d50SXin LI #define SEGMENT_SIZE 512
2027e509d50SXin LI     FPStats* const fpstats = (FPStats*)workspace;
2037e509d50SXin LI     Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned));
2047e509d50SXin LI     assert(blockSize == (128 << 10));
2057e509d50SXin LI     assert(workspace != NULL);
2067e509d50SXin LI     assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
2077e509d50SXin LI     ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
2087e509d50SXin LI     assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
2097e509d50SXin LI 
2107e509d50SXin LI     initStats(fpstats);
2117e509d50SXin LI     HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE);
2127e509d50SXin LI     HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE);
2137e509d50SXin LI     fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE;
2147e509d50SXin LI     if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8))
2157e509d50SXin LI         return blockSize;
2167e509d50SXin LI 
2177e509d50SXin LI     HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE);
2187e509d50SXin LI     middleEvents->nbEvents = SEGMENT_SIZE;
2197e509d50SXin LI     {   U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8);
2207e509d50SXin LI         U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8);
2217e509d50SXin LI         U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3;
2227e509d50SXin LI         if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance)
2237e509d50SXin LI             return 64 KB;
2247e509d50SXin LI         return (distFromBegin > distFromEnd) ? 32 KB : 96 KB;
2257e509d50SXin LI     }
2267e509d50SXin LI }
2277e509d50SXin LI 
ZSTD_splitBlock(const void * blockStart,size_t blockSize,int level,void * workspace,size_t wkspSize)2287e509d50SXin LI size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
2297e509d50SXin LI                     int level,
2307e509d50SXin LI                     void* workspace, size_t wkspSize)
2317e509d50SXin LI {
2327e509d50SXin LI     DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level);
2337e509d50SXin LI     assert(0<=level && level<=4);
2347e509d50SXin LI     if (level == 0)
2357e509d50SXin LI         return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);
2367e509d50SXin LI     /* level >= 1*/
2377e509d50SXin LI     return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize);
2387e509d50SXin LI }
239