1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
2 /* ******************************************************************
3  * hist : Histogram functions
4  * part of Finite State Entropy project
5  * Copyright (c) Meta Platforms, Inc. and affiliates.
6  *
7  *  You can contact the author at :
8  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
9  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
10  *
11  * This source code is licensed under both the BSD-style license (found in the
12  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
13  * in the COPYING file in the root directory of this source tree).
14  * You may select, at your option, one of the above-listed licenses.
15 ****************************************************************** */
16 
17 /* --- dependencies --- */
18 #include "../common/mem.h"             /* U32, BYTE, etc. */
19 #include "../common/debug.h"           /* assert, DEBUGLOG */
20 #include "../common/error_private.h"   /* ERROR */
21 #include "hist.h"
22 
23 
24 /* --- Error management --- */
25 unsigned HIST_isError(size_t code) { return ERR_isError(code); }
26 
27 /*-**************************************************************
28  *  Histogram functions
29  ****************************************************************/
30 void HIST_add(unsigned* count, const void* src, size_t srcSize)
31 {
32     const BYTE* ip = (const BYTE*)src;
33     const BYTE* const end = ip + srcSize;
34 
35     while (ip<end) {
36         count[*ip++]++;
37     }
38 }
39 
40 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
41                            const void* src, size_t srcSize)
42 {
43     const BYTE* ip = (const BYTE*)src;
44     const BYTE* const end = ip + srcSize;
45     unsigned maxSymbolValue = *maxSymbolValuePtr;
46     unsigned largestCount=0;
47 
48     ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
49     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
50 
51     while (ip<end) {
52         assert(*ip <= maxSymbolValue);
53         count[*ip++]++;
54     }
55 
56     while (!count[maxSymbolValue]) maxSymbolValue--;
57     *maxSymbolValuePtr = maxSymbolValue;
58 
59     {   U32 s;
60         for (s=0; s<=maxSymbolValue; s++)
61             if (count[s] > largestCount) largestCount = count[s];
62     }
63 
64     return largestCount;
65 }
66 
67 typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
68 
69 /* HIST_count_parallel_wksp() :
70  * store histogram into 4 intermediate tables, recombined at the end.
71  * this design makes better use of OoO cpus,
72  * and is noticeably faster when some values are heavily repeated.
73  * But it needs some additional workspace for intermediate tables.
74  * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
75  * @return : largest histogram frequency,
76  *           or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
77 static size_t HIST_count_parallel_wksp(
78                                 unsigned* count, unsigned* maxSymbolValuePtr,
79                                 const void* source, size_t sourceSize,
80                                 HIST_checkInput_e check,
81                                 U32* const workSpace)
82 {
83     const BYTE* ip = (const BYTE*)source;
84     const BYTE* const iend = ip+sourceSize;
85     size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
86     unsigned max=0;
87     U32* const Counting1 = workSpace;
88     U32* const Counting2 = Counting1 + 256;
89     U32* const Counting3 = Counting2 + 256;
90     U32* const Counting4 = Counting3 + 256;
91 
92     /* safety checks */
93     assert(*maxSymbolValuePtr <= 255);
94     if (!sourceSize) {
95         ZSTD_memset(count, 0, countSize);
96         *maxSymbolValuePtr = 0;
97         return 0;
98     }
99     ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
100 
101     /* by stripes of 16 bytes */
102     {   U32 cached = MEM_read32(ip); ip += 4;
103         while (ip < iend-15) {
104             U32 c = cached; cached = MEM_read32(ip); ip += 4;
105             Counting1[(BYTE) c     ]++;
106             Counting2[(BYTE)(c>>8) ]++;
107             Counting3[(BYTE)(c>>16)]++;
108             Counting4[       c>>24 ]++;
109             c = cached; cached = MEM_read32(ip); ip += 4;
110             Counting1[(BYTE) c     ]++;
111             Counting2[(BYTE)(c>>8) ]++;
112             Counting3[(BYTE)(c>>16)]++;
113             Counting4[       c>>24 ]++;
114             c = cached; cached = MEM_read32(ip); ip += 4;
115             Counting1[(BYTE) c     ]++;
116             Counting2[(BYTE)(c>>8) ]++;
117             Counting3[(BYTE)(c>>16)]++;
118             Counting4[       c>>24 ]++;
119             c = cached; cached = MEM_read32(ip); ip += 4;
120             Counting1[(BYTE) c     ]++;
121             Counting2[(BYTE)(c>>8) ]++;
122             Counting3[(BYTE)(c>>16)]++;
123             Counting4[       c>>24 ]++;
124         }
125         ip-=4;
126     }
127 
128     /* finish last symbols */
129     while (ip<iend) Counting1[*ip++]++;
130 
131     {   U32 s;
132         for (s=0; s<256; s++) {
133             Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
134             if (Counting1[s] > max) max = Counting1[s];
135     }   }
136 
137     {   unsigned maxSymbolValue = 255;
138         while (!Counting1[maxSymbolValue]) maxSymbolValue--;
139         if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
140         *maxSymbolValuePtr = maxSymbolValue;
141         ZSTD_memmove(count, Counting1, countSize);   /* in case count & Counting1 are overlapping */
142     }
143     return (size_t)max;
144 }
145 
146 /* HIST_countFast_wksp() :
147  * Same as HIST_countFast(), but using an externally provided scratch buffer.
148  * `workSpace` is a writable buffer which must be 4-bytes aligned,
149  * `workSpaceSize` must be >= HIST_WKSP_SIZE
150  */
151 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
152                           const void* source, size_t sourceSize,
153                           void* workSpace, size_t workSpaceSize)
154 {
155     if (sourceSize < 1500) /* heuristic threshold */
156         return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
157     if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
158     if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
159     return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
160 }
161 
162 /* HIST_count_wksp() :
163  * Same as HIST_count(), but using an externally provided scratch buffer.
164  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
165 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
166                        const void* source, size_t sourceSize,
167                        void* workSpace, size_t workSpaceSize)
168 {
169     if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
170     if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
171     if (*maxSymbolValuePtr < 255)
172         return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
173     *maxSymbolValuePtr = 255;
174     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
175 }
176 
177