1 /* ****************************************************************** 2 hist : Histogram functions 3 part of Finite State Entropy project 4 Copyright (C) 2013-present, Yann Collet. 5 6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7 8 Redistribution and use in source and binary forms, with or without 9 modification, are permitted provided that the following conditions are 10 met: 11 12 * Redistributions of source code must retain the above copyright 13 notice, this list of conditions and the following disclaimer. 14 * Redistributions in binary form must reproduce the above 15 copyright notice, this list of conditions and the following disclaimer 16 in the documentation and/or other materials provided with the 17 distribution. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 You can contact the author at : 32 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 33 - Public forum : https://groups.google.com/forum/#!forum/lz4c 34 ****************************************************************** */ 35 36 /* --- dependencies --- */ 37 #include "mem.h" /* U32, BYTE, etc. */ 38 #include "debug.h" /* assert, DEBUGLOG */ 39 #include "error_private.h" /* ERROR */ 40 #include "hist.h" 41 42 43 /* --- Error management --- */ 44 unsigned HIST_isError(size_t code) { return ERR_isError(code); } 45 46 /*-************************************************************** 47 * Histogram functions 48 ****************************************************************/ 49 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, 50 const void* src, size_t srcSize) 51 { 52 const BYTE* ip = (const BYTE*)src; 53 const BYTE* const end = ip + srcSize; 54 unsigned maxSymbolValue = *maxSymbolValuePtr; 55 unsigned largestCount=0; 56 57 memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); 58 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } 59 60 while (ip<end) { 61 assert(*ip <= maxSymbolValue); 62 count[*ip++]++; 63 } 64 65 while (!count[maxSymbolValue]) maxSymbolValue--; 66 *maxSymbolValuePtr = maxSymbolValue; 67 68 { U32 s; 69 for (s=0; s<=maxSymbolValue; s++) 70 if (count[s] > largestCount) largestCount = count[s]; 71 } 72 73 return largestCount; 74 } 75 76 77 /* HIST_count_parallel_wksp() : 78 * store histogram into 4 intermediate tables, recombined at the end. 79 * this design makes better use of OoO cpus, 80 * and is noticeably faster when some values are heavily repeated. 81 * But it needs some additional workspace for intermediate tables. 82 * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. 83 * @return : largest histogram frequency, 84 * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ 85 static size_t HIST_count_parallel_wksp( 86 unsigned* count, unsigned* maxSymbolValuePtr, 87 const void* source, size_t sourceSize, 88 unsigned checkMax, 89 unsigned* const workSpace) 90 { 91 const BYTE* ip = (const BYTE*)source; 92 const BYTE* const iend = ip+sourceSize; 93 unsigned maxSymbolValue = *maxSymbolValuePtr; 94 unsigned max=0; 95 U32* const Counting1 = workSpace; 96 U32* const Counting2 = Counting1 + 256; 97 U32* const Counting3 = Counting2 + 256; 98 U32* const Counting4 = Counting3 + 256; 99 100 memset(workSpace, 0, 4*256*sizeof(unsigned)); 101 102 /* safety checks */ 103 if (!sourceSize) { 104 memset(count, 0, maxSymbolValue + 1); 105 *maxSymbolValuePtr = 0; 106 return 0; 107 } 108 if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ 109 110 /* by stripes of 16 bytes */ 111 { U32 cached = MEM_read32(ip); ip += 4; 112 while (ip < iend-15) { 113 U32 c = cached; cached = MEM_read32(ip); ip += 4; 114 Counting1[(BYTE) c ]++; 115 Counting2[(BYTE)(c>>8) ]++; 116 Counting3[(BYTE)(c>>16)]++; 117 Counting4[ c>>24 ]++; 118 c = cached; cached = MEM_read32(ip); ip += 4; 119 Counting1[(BYTE) c ]++; 120 Counting2[(BYTE)(c>>8) ]++; 121 Counting3[(BYTE)(c>>16)]++; 122 Counting4[ c>>24 ]++; 123 c = cached; cached = MEM_read32(ip); ip += 4; 124 Counting1[(BYTE) c ]++; 125 Counting2[(BYTE)(c>>8) ]++; 126 Counting3[(BYTE)(c>>16)]++; 127 Counting4[ c>>24 ]++; 128 c = cached; cached = MEM_read32(ip); ip += 4; 129 Counting1[(BYTE) c ]++; 130 Counting2[(BYTE)(c>>8) ]++; 131 Counting3[(BYTE)(c>>16)]++; 132 Counting4[ c>>24 ]++; 133 } 134 ip-=4; 135 } 136 137 /* finish last symbols */ 138 while (ip<iend) Counting1[*ip++]++; 139 140 if (checkMax) { /* verify stats will fit into destination table */ 141 U32 s; for (s=255; s>maxSymbolValue; s--) { 142 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; 143 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); 144 } } 145 146 { U32 s; 147 if (maxSymbolValue > 255) maxSymbolValue = 255; 148 for (s=0; s<=maxSymbolValue; s++) { 149 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; 150 if (count[s] > max) max = count[s]; 151 } } 152 153 while (!count[maxSymbolValue]) maxSymbolValue--; 154 *maxSymbolValuePtr = maxSymbolValue; 155 return (size_t)max; 156 } 157 158 /* HIST_countFast_wksp() : 159 * Same as HIST_countFast(), but using an externally provided scratch buffer. 160 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ 161 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 162 const void* source, size_t sourceSize, 163 unsigned* workSpace) 164 { 165 if (sourceSize < 1500) /* heuristic threshold */ 166 return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); 167 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); 168 } 169 170 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ 171 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, 172 const void* source, size_t sourceSize) 173 { 174 unsigned tmpCounters[HIST_WKSP_SIZE_U32]; 175 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); 176 } 177 178 /* HIST_count_wksp() : 179 * Same as HIST_count(), but using an externally provided scratch buffer. 180 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ 181 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 182 const void* source, size_t sourceSize, unsigned* workSpace) 183 { 184 if (*maxSymbolValuePtr < 255) 185 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); 186 *maxSymbolValuePtr = 255; 187 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); 188 } 189 190 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, 191 const void* src, size_t srcSize) 192 { 193 unsigned tmpCounters[HIST_WKSP_SIZE_U32]; 194 return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); 195 } 196