xref: /freebsd/sys/contrib/zstd/lib/compress/hist.c (revision c697fb7f)
1 /* ******************************************************************
2    hist : Histogram functions
3    part of Finite State Entropy project
4    Copyright (C) 2013-present, Yann Collet.
5 
6    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 
8    Redistribution and use in source and binary forms, with or without
9    modification, are permitted provided that the following conditions are
10    met:
11 
12        * Redistributions of source code must retain the above copyright
13    notice, this list of conditions and the following disclaimer.
14        * Redistributions in binary form must reproduce the above
15    copyright notice, this list of conditions and the following disclaimer
16    in the documentation and/or other materials provided with the
17    distribution.
18 
19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31     You can contact the author at :
32     - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
33     - Public forum : https://groups.google.com/forum/#!forum/lz4c
34 ****************************************************************** */
35 
36 /* --- dependencies --- */
37 #include "mem.h"             /* U32, BYTE, etc. */
38 #include "debug.h"           /* assert, DEBUGLOG */
39 #include "error_private.h"   /* ERROR */
40 #include "hist.h"
41 
42 
43 /* --- Error management --- */
44 unsigned HIST_isError(size_t code) { return ERR_isError(code); }
45 
46 /*-**************************************************************
47  *  Histogram functions
48  ****************************************************************/
49 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
50                            const void* src, size_t srcSize)
51 {
52     const BYTE* ip = (const BYTE*)src;
53     const BYTE* const end = ip + srcSize;
54     unsigned maxSymbolValue = *maxSymbolValuePtr;
55     unsigned largestCount=0;
56 
57     memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
58     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
59 
60     while (ip<end) {
61         assert(*ip <= maxSymbolValue);
62         count[*ip++]++;
63     }
64 
65     while (!count[maxSymbolValue]) maxSymbolValue--;
66     *maxSymbolValuePtr = maxSymbolValue;
67 
68     {   U32 s;
69         for (s=0; s<=maxSymbolValue; s++)
70             if (count[s] > largestCount) largestCount = count[s];
71     }
72 
73     return largestCount;
74 }
75 
76 typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
77 
78 /* HIST_count_parallel_wksp() :
79  * store histogram into 4 intermediate tables, recombined at the end.
80  * this design makes better use of OoO cpus,
81  * and is noticeably faster when some values are heavily repeated.
82  * But it needs some additional workspace for intermediate tables.
83  * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
84  * @return : largest histogram frequency,
85  *           or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
86 static size_t HIST_count_parallel_wksp(
87                                 unsigned* count, unsigned* maxSymbolValuePtr,
88                                 const void* source, size_t sourceSize,
89                                 HIST_checkInput_e check,
90                                 U32* const workSpace)
91 {
92     const BYTE* ip = (const BYTE*)source;
93     const BYTE* const iend = ip+sourceSize;
94     unsigned maxSymbolValue = *maxSymbolValuePtr;
95     unsigned max=0;
96     U32* const Counting1 = workSpace;
97     U32* const Counting2 = Counting1 + 256;
98     U32* const Counting3 = Counting2 + 256;
99     U32* const Counting4 = Counting3 + 256;
100 
101     memset(workSpace, 0, 4*256*sizeof(unsigned));
102 
103     /* safety checks */
104     if (!sourceSize) {
105         memset(count, 0, maxSymbolValue + 1);
106         *maxSymbolValuePtr = 0;
107         return 0;
108     }
109     if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
110 
111     /* by stripes of 16 bytes */
112     {   U32 cached = MEM_read32(ip); ip += 4;
113         while (ip < iend-15) {
114             U32 c = cached; cached = MEM_read32(ip); ip += 4;
115             Counting1[(BYTE) c     ]++;
116             Counting2[(BYTE)(c>>8) ]++;
117             Counting3[(BYTE)(c>>16)]++;
118             Counting4[       c>>24 ]++;
119             c = cached; cached = MEM_read32(ip); ip += 4;
120             Counting1[(BYTE) c     ]++;
121             Counting2[(BYTE)(c>>8) ]++;
122             Counting3[(BYTE)(c>>16)]++;
123             Counting4[       c>>24 ]++;
124             c = cached; cached = MEM_read32(ip); ip += 4;
125             Counting1[(BYTE) c     ]++;
126             Counting2[(BYTE)(c>>8) ]++;
127             Counting3[(BYTE)(c>>16)]++;
128             Counting4[       c>>24 ]++;
129             c = cached; cached = MEM_read32(ip); ip += 4;
130             Counting1[(BYTE) c     ]++;
131             Counting2[(BYTE)(c>>8) ]++;
132             Counting3[(BYTE)(c>>16)]++;
133             Counting4[       c>>24 ]++;
134         }
135         ip-=4;
136     }
137 
138     /* finish last symbols */
139     while (ip<iend) Counting1[*ip++]++;
140 
141     if (check) {   /* verify stats will fit into destination table */
142         U32 s; for (s=255; s>maxSymbolValue; s--) {
143             Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
144             if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
145     }   }
146 
147     {   U32 s;
148         if (maxSymbolValue > 255) maxSymbolValue = 255;
149         for (s=0; s<=maxSymbolValue; s++) {
150             count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
151             if (count[s] > max) max = count[s];
152     }   }
153 
154     while (!count[maxSymbolValue]) maxSymbolValue--;
155     *maxSymbolValuePtr = maxSymbolValue;
156     return (size_t)max;
157 }
158 
159 /* HIST_countFast_wksp() :
160  * Same as HIST_countFast(), but using an externally provided scratch buffer.
161  * `workSpace` is a writable buffer which must be 4-bytes aligned,
162  * `workSpaceSize` must be >= HIST_WKSP_SIZE
163  */
164 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
165                           const void* source, size_t sourceSize,
166                           void* workSpace, size_t workSpaceSize)
167 {
168     if (sourceSize < 1500) /* heuristic threshold */
169         return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
170     if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
171     if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
172     return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
173 }
174 
175 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
176 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
177                      const void* source, size_t sourceSize)
178 {
179     unsigned tmpCounters[HIST_WKSP_SIZE_U32];
180     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
181 }
182 
183 /* HIST_count_wksp() :
184  * Same as HIST_count(), but using an externally provided scratch buffer.
185  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
186 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
187                        const void* source, size_t sourceSize,
188                        void* workSpace, size_t workSpaceSize)
189 {
190     if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
191     if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
192     if (*maxSymbolValuePtr < 255)
193         return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
194     *maxSymbolValuePtr = 255;
195     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
196 }
197 
198 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
199                  const void* src, size_t srcSize)
200 {
201     unsigned tmpCounters[HIST_WKSP_SIZE_U32];
202     return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
203 }
204