1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 2 /* ****************************************************************** 3 * hist : Histogram functions 4 * part of Finite State Entropy project 5 * Copyright (c) Meta Platforms, Inc. and affiliates. 6 * 7 * You can contact the author at : 8 * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 9 * - Public forum : https://groups.google.com/forum/#!forum/lz4c 10 * 11 * This source code is licensed under both the BSD-style license (found in the 12 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 13 * in the COPYING file in the root directory of this source tree). 14 * You may select, at your option, one of the above-listed licenses. 15 ****************************************************************** */ 16 17 /* --- dependencies --- */ 18 #include "../common/mem.h" /* U32, BYTE, etc. */ 19 #include "../common/debug.h" /* assert, DEBUGLOG */ 20 #include "../common/error_private.h" /* ERROR */ 21 #include "hist.h" 22 23 24 /* --- Error management --- */ 25 unsigned HIST_isError(size_t code) { return ERR_isError(code); } 26 27 /*-************************************************************** 28 * Histogram functions 29 ****************************************************************/ 30 void HIST_add(unsigned* count, const void* src, size_t srcSize) 31 { 32 const BYTE* ip = (const BYTE*)src; 33 const BYTE* const end = ip + srcSize; 34 35 while (ip<end) { 36 count[*ip++]++; 37 } 38 } 39 40 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, 41 const void* src, size_t srcSize) 42 { 43 const BYTE* ip = (const BYTE*)src; 44 const BYTE* const end = ip + srcSize; 45 unsigned maxSymbolValue = *maxSymbolValuePtr; 46 unsigned largestCount=0; 47 48 ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); 49 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } 50 51 while (ip<end) { 52 assert(*ip <= maxSymbolValue); 53 count[*ip++]++; 54 } 55 56 while (!count[maxSymbolValue]) maxSymbolValue--; 57 *maxSymbolValuePtr = maxSymbolValue; 58 59 { U32 s; 60 for (s=0; s<=maxSymbolValue; s++) 61 if (count[s] > largestCount) largestCount = count[s]; 62 } 63 64 return largestCount; 65 } 66 67 typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; 68 69 /* HIST_count_parallel_wksp() : 70 * store histogram into 4 intermediate tables, recombined at the end. 71 * this design makes better use of OoO cpus, 72 * and is noticeably faster when some values are heavily repeated. 73 * But it needs some additional workspace for intermediate tables. 74 * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. 75 * @return : largest histogram frequency, 76 * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */ 77 static size_t HIST_count_parallel_wksp( 78 unsigned* count, unsigned* maxSymbolValuePtr, 79 const void* source, size_t sourceSize, 80 HIST_checkInput_e check, 81 U32* const workSpace) 82 { 83 const BYTE* ip = (const BYTE*)source; 84 const BYTE* const iend = ip+sourceSize; 85 size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count); 86 unsigned max=0; 87 U32* const Counting1 = workSpace; 88 U32* const Counting2 = Counting1 + 256; 89 U32* const Counting3 = Counting2 + 256; 90 U32* const Counting4 = Counting3 + 256; 91 92 /* safety checks */ 93 assert(*maxSymbolValuePtr <= 255); 94 if (!sourceSize) { 95 ZSTD_memset(count, 0, countSize); 96 *maxSymbolValuePtr = 0; 97 return 0; 98 } 99 ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned)); 100 101 /* by stripes of 16 bytes */ 102 { U32 cached = MEM_read32(ip); ip += 4; 103 while (ip < iend-15) { 104 U32 c = cached; cached = MEM_read32(ip); ip += 4; 105 Counting1[(BYTE) c ]++; 106 Counting2[(BYTE)(c>>8) ]++; 107 Counting3[(BYTE)(c>>16)]++; 108 Counting4[ c>>24 ]++; 109 c = cached; cached = MEM_read32(ip); ip += 4; 110 Counting1[(BYTE) c ]++; 111 Counting2[(BYTE)(c>>8) ]++; 112 Counting3[(BYTE)(c>>16)]++; 113 Counting4[ c>>24 ]++; 114 c = cached; cached = MEM_read32(ip); ip += 4; 115 Counting1[(BYTE) c ]++; 116 Counting2[(BYTE)(c>>8) ]++; 117 Counting3[(BYTE)(c>>16)]++; 118 Counting4[ c>>24 ]++; 119 c = cached; cached = MEM_read32(ip); ip += 4; 120 Counting1[(BYTE) c ]++; 121 Counting2[(BYTE)(c>>8) ]++; 122 Counting3[(BYTE)(c>>16)]++; 123 Counting4[ c>>24 ]++; 124 } 125 ip-=4; 126 } 127 128 /* finish last symbols */ 129 while (ip<iend) Counting1[*ip++]++; 130 131 { U32 s; 132 for (s=0; s<256; s++) { 133 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; 134 if (Counting1[s] > max) max = Counting1[s]; 135 } } 136 137 { unsigned maxSymbolValue = 255; 138 while (!Counting1[maxSymbolValue]) maxSymbolValue--; 139 if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall); 140 *maxSymbolValuePtr = maxSymbolValue; 141 ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */ 142 } 143 return (size_t)max; 144 } 145 146 /* HIST_countFast_wksp() : 147 * Same as HIST_countFast(), but using an externally provided scratch buffer. 148 * `workSpace` is a writable buffer which must be 4-bytes aligned, 149 * `workSpaceSize` must be >= HIST_WKSP_SIZE 150 */ 151 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 152 const void* source, size_t sourceSize, 153 void* workSpace, size_t workSpaceSize) 154 { 155 if (sourceSize < 1500) /* heuristic threshold */ 156 return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); 157 if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 158 if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); 159 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); 160 } 161 162 /* HIST_count_wksp() : 163 * Same as HIST_count(), but using an externally provided scratch buffer. 164 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ 165 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 166 const void* source, size_t sourceSize, 167 void* workSpace, size_t workSpaceSize) 168 { 169 if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 170 if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); 171 if (*maxSymbolValuePtr < 255) 172 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); 173 *maxSymbolValuePtr = 255; 174 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); 175 } 176 177