1 /* ****************************************************************** 2 hist : Histogram functions 3 part of Finite State Entropy project 4 Copyright (C) 2013-present, Yann Collet. 5 6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7 8 Redistribution and use in source and binary forms, with or without 9 modification, are permitted provided that the following conditions are 10 met: 11 12 * Redistributions of source code must retain the above copyright 13 notice, this list of conditions and the following disclaimer. 14 * Redistributions in binary form must reproduce the above 15 copyright notice, this list of conditions and the following disclaimer 16 in the documentation and/or other materials provided with the 17 distribution. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 You can contact the author at : 32 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 33 - Public forum : https://groups.google.com/forum/#!forum/lz4c 34 ****************************************************************** */ 35 36 /* --- dependencies --- */ 37 #include "mem.h" /* U32, BYTE, etc. */ 38 #include "debug.h" /* assert, DEBUGLOG */ 39 #include "error_private.h" /* ERROR */ 40 #include "hist.h" 41 #include <ntifs.h> 42 #include <ntddk.h> 43 44 #define HIST_ALLOC_TAG 0x54534948 // "HIST" 45 46 /* --- Error management --- */ 47 unsigned HIST_isError(size_t code) { return ERR_isError(code); } 48 49 /*-************************************************************** 50 * Histogram functions 51 ****************************************************************/ 52 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, 53 const void* src, size_t srcSize) 54 { 55 const BYTE* ip = (const BYTE*)src; 56 const BYTE* const end = ip + srcSize; 57 unsigned maxSymbolValue = *maxSymbolValuePtr; 58 unsigned largestCount=0; 59 60 memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); 61 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } 62 63 while (ip<end) { 64 assert(*ip <= maxSymbolValue); 65 count[*ip++]++; 66 } 67 68 while (!count[maxSymbolValue]) maxSymbolValue--; 69 *maxSymbolValuePtr = maxSymbolValue; 70 71 { U32 s; 72 for (s=0; s<=maxSymbolValue; s++) 73 if (count[s] > largestCount) largestCount = count[s]; 74 } 75 76 return largestCount; 77 } 78 79 80 /* HIST_count_parallel_wksp() : 81 * store histogram into 4 intermediate tables, recombined at the end. 82 * this design makes better use of OoO cpus, 83 * and is noticeably faster when some values are heavily repeated. 84 * But it needs some additional workspace for intermediate tables. 85 * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. 86 * @return : largest histogram frequency, 87 * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ 88 static size_t HIST_count_parallel_wksp( 89 unsigned* count, unsigned* maxSymbolValuePtr, 90 const void* source, size_t sourceSize, 91 unsigned checkMax, 92 unsigned* const workSpace) 93 { 94 const BYTE* ip = (const BYTE*)source; 95 const BYTE* const iend = ip+sourceSize; 96 unsigned maxSymbolValue = *maxSymbolValuePtr; 97 unsigned max=0; 98 U32* const Counting1 = workSpace; 99 U32* const Counting2 = Counting1 + 256; 100 U32* const Counting3 = Counting2 + 256; 101 U32* const Counting4 = Counting3 + 256; 102 103 memset(workSpace, 0, 4*256*sizeof(unsigned)); 104 105 /* safety checks */ 106 if (!sourceSize) { 107 memset(count, 0, maxSymbolValue + 1); 108 *maxSymbolValuePtr = 0; 109 return 0; 110 } 111 if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ 112 113 /* by stripes of 16 bytes */ 114 { U32 cached = MEM_read32(ip); ip += 4; 115 while (ip < iend-15) { 116 U32 c = cached; cached = MEM_read32(ip); ip += 4; 117 Counting1[(BYTE) c ]++; 118 Counting2[(BYTE)(c>>8) ]++; 119 Counting3[(BYTE)(c>>16)]++; 120 Counting4[ c>>24 ]++; 121 c = cached; cached = MEM_read32(ip); ip += 4; 122 Counting1[(BYTE) c ]++; 123 Counting2[(BYTE)(c>>8) ]++; 124 Counting3[(BYTE)(c>>16)]++; 125 Counting4[ c>>24 ]++; 126 c = cached; cached = MEM_read32(ip); ip += 4; 127 Counting1[(BYTE) c ]++; 128 Counting2[(BYTE)(c>>8) ]++; 129 Counting3[(BYTE)(c>>16)]++; 130 Counting4[ c>>24 ]++; 131 c = cached; cached = MEM_read32(ip); ip += 4; 132 Counting1[(BYTE) c ]++; 133 Counting2[(BYTE)(c>>8) ]++; 134 Counting3[(BYTE)(c>>16)]++; 135 Counting4[ c>>24 ]++; 136 } 137 ip-=4; 138 } 139 140 /* finish last symbols */ 141 while (ip<iend) Counting1[*ip++]++; 142 143 if (checkMax) { /* verify stats will fit into destination table */ 144 U32 s; for (s=255; s>maxSymbolValue; s--) { 145 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; 146 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); 147 } } 148 149 { U32 s; 150 if (maxSymbolValue > 255) maxSymbolValue = 255; 151 for (s=0; s<=maxSymbolValue; s++) { 152 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; 153 if (count[s] > max) max = count[s]; 154 } } 155 156 while (!count[maxSymbolValue]) maxSymbolValue--; 157 *maxSymbolValuePtr = maxSymbolValue; 158 return (size_t)max; 159 } 160 161 /* HIST_countFast_wksp() : 162 * Same as HIST_countFast(), but using an externally provided scratch buffer. 163 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ 164 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 165 const void* source, size_t sourceSize, 166 unsigned* workSpace) 167 { 168 if (sourceSize < 1500) /* heuristic threshold */ 169 return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); 170 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); 171 } 172 173 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ 174 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, 175 const void* source, size_t sourceSize) 176 { 177 unsigned* tmpCounters = ExAllocatePoolWithTag(NonPagedPool, sizeof(unsigned) * HIST_WKSP_SIZE_U32, HIST_ALLOC_TAG); 178 size_t ret; 179 180 if (!tmpCounters) 181 return 0; 182 183 ret = HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); 184 185 ExFreePool(tmpCounters); 186 187 return ret; 188 } 189 190 /* HIST_count_wksp() : 191 * Same as HIST_count(), but using an externally provided scratch buffer. 192 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ 193 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 194 const void* source, size_t sourceSize, unsigned* workSpace) 195 { 196 if (*maxSymbolValuePtr < 255) 197 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); 198 *maxSymbolValuePtr = 255; 199 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); 200 } 201 202 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, 203 const void* src, size_t srcSize) 204 { 205 unsigned tmpCounters[HIST_WKSP_SIZE_U32]; 206 return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); 207 } 208