1*c03c5b1cSMartin Matuska /* ******************************************************************
2*c03c5b1cSMartin Matuska  * hist : Histogram functions
3*c03c5b1cSMartin Matuska  * part of Finite State Entropy project
4*c03c5b1cSMartin Matuska  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5*c03c5b1cSMartin Matuska  *
6*c03c5b1cSMartin Matuska  *  You can contact the author at :
7*c03c5b1cSMartin Matuska  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
8*c03c5b1cSMartin Matuska  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
9*c03c5b1cSMartin Matuska  *
10*c03c5b1cSMartin Matuska  * This source code is licensed under both the BSD-style license (found in the
11*c03c5b1cSMartin Matuska  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12*c03c5b1cSMartin Matuska  * in the COPYING file in the root directory of this source tree).
13*c03c5b1cSMartin Matuska  * You may select, at your option, one of the above-listed licenses.
14*c03c5b1cSMartin Matuska ****************************************************************** */
15*c03c5b1cSMartin Matuska 
16*c03c5b1cSMartin Matuska /* --- dependencies --- */
17*c03c5b1cSMartin Matuska #include "../common/mem.h"             /* U32, BYTE, etc. */
18*c03c5b1cSMartin Matuska #include "../common/debug.h"           /* assert, DEBUGLOG */
19*c03c5b1cSMartin Matuska #include "../common/error_private.h"   /* ERROR */
20*c03c5b1cSMartin Matuska #include "hist.h"
21*c03c5b1cSMartin Matuska 
22*c03c5b1cSMartin Matuska 
23*c03c5b1cSMartin Matuska /* --- Error management --- */
HIST_isError(size_t code)24*c03c5b1cSMartin Matuska unsigned HIST_isError(size_t code) { return ERR_isError(code); }
25*c03c5b1cSMartin Matuska 
26*c03c5b1cSMartin Matuska /*-**************************************************************
27*c03c5b1cSMartin Matuska  *  Histogram functions
28*c03c5b1cSMartin Matuska  ****************************************************************/
HIST_count_simple(unsigned * count,unsigned * maxSymbolValuePtr,const void * src,size_t srcSize)29*c03c5b1cSMartin Matuska unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
30*c03c5b1cSMartin Matuska                            const void* src, size_t srcSize)
31*c03c5b1cSMartin Matuska {
32*c03c5b1cSMartin Matuska     const BYTE* ip = (const BYTE*)src;
33*c03c5b1cSMartin Matuska     const BYTE* const end = ip + srcSize;
34*c03c5b1cSMartin Matuska     unsigned maxSymbolValue = *maxSymbolValuePtr;
35*c03c5b1cSMartin Matuska     unsigned largestCount=0;
36*c03c5b1cSMartin Matuska 
37*c03c5b1cSMartin Matuska     memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
38*c03c5b1cSMartin Matuska     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
39*c03c5b1cSMartin Matuska 
40*c03c5b1cSMartin Matuska     while (ip<end) {
41*c03c5b1cSMartin Matuska         assert(*ip <= maxSymbolValue);
42*c03c5b1cSMartin Matuska         count[*ip++]++;
43*c03c5b1cSMartin Matuska     }
44*c03c5b1cSMartin Matuska 
45*c03c5b1cSMartin Matuska     while (!count[maxSymbolValue]) maxSymbolValue--;
46*c03c5b1cSMartin Matuska     *maxSymbolValuePtr = maxSymbolValue;
47*c03c5b1cSMartin Matuska 
48*c03c5b1cSMartin Matuska     {   U32 s;
49*c03c5b1cSMartin Matuska         for (s=0; s<=maxSymbolValue; s++)
50*c03c5b1cSMartin Matuska             if (count[s] > largestCount) largestCount = count[s];
51*c03c5b1cSMartin Matuska     }
52*c03c5b1cSMartin Matuska 
53*c03c5b1cSMartin Matuska     return largestCount;
54*c03c5b1cSMartin Matuska }
55*c03c5b1cSMartin Matuska 
56*c03c5b1cSMartin Matuska typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
57*c03c5b1cSMartin Matuska 
58*c03c5b1cSMartin Matuska /* HIST_count_parallel_wksp() :
59*c03c5b1cSMartin Matuska  * store histogram into 4 intermediate tables, recombined at the end.
60*c03c5b1cSMartin Matuska  * this design makes better use of OoO cpus,
61*c03c5b1cSMartin Matuska  * and is noticeably faster when some values are heavily repeated.
62*c03c5b1cSMartin Matuska  * But it needs some additional workspace for intermediate tables.
63*c03c5b1cSMartin Matuska  * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
64*c03c5b1cSMartin Matuska  * @return : largest histogram frequency,
65*c03c5b1cSMartin Matuska  *           or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
HIST_count_parallel_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,HIST_checkInput_e check,U32 * const workSpace)66*c03c5b1cSMartin Matuska static size_t HIST_count_parallel_wksp(
67*c03c5b1cSMartin Matuska                                 unsigned* count, unsigned* maxSymbolValuePtr,
68*c03c5b1cSMartin Matuska                                 const void* source, size_t sourceSize,
69*c03c5b1cSMartin Matuska                                 HIST_checkInput_e check,
70*c03c5b1cSMartin Matuska                                 U32* const workSpace)
71*c03c5b1cSMartin Matuska {
72*c03c5b1cSMartin Matuska     const BYTE* ip = (const BYTE*)source;
73*c03c5b1cSMartin Matuska     const BYTE* const iend = ip+sourceSize;
74*c03c5b1cSMartin Matuska     unsigned maxSymbolValue = *maxSymbolValuePtr;
75*c03c5b1cSMartin Matuska     unsigned max=0;
76*c03c5b1cSMartin Matuska     U32* const Counting1 = workSpace;
77*c03c5b1cSMartin Matuska     U32* const Counting2 = Counting1 + 256;
78*c03c5b1cSMartin Matuska     U32* const Counting3 = Counting2 + 256;
79*c03c5b1cSMartin Matuska     U32* const Counting4 = Counting3 + 256;
80*c03c5b1cSMartin Matuska 
81*c03c5b1cSMartin Matuska     memset(workSpace, 0, 4*256*sizeof(unsigned));
82*c03c5b1cSMartin Matuska 
83*c03c5b1cSMartin Matuska     /* safety checks */
84*c03c5b1cSMartin Matuska     if (!sourceSize) {
85*c03c5b1cSMartin Matuska         memset(count, 0, maxSymbolValue + 1);
86*c03c5b1cSMartin Matuska         *maxSymbolValuePtr = 0;
87*c03c5b1cSMartin Matuska         return 0;
88*c03c5b1cSMartin Matuska     }
89*c03c5b1cSMartin Matuska     if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
90*c03c5b1cSMartin Matuska 
91*c03c5b1cSMartin Matuska     /* by stripes of 16 bytes */
92*c03c5b1cSMartin Matuska     {   U32 cached = MEM_read32(ip); ip += 4;
93*c03c5b1cSMartin Matuska         while (ip < iend-15) {
94*c03c5b1cSMartin Matuska             U32 c = cached; cached = MEM_read32(ip); ip += 4;
95*c03c5b1cSMartin Matuska             Counting1[(BYTE) c     ]++;
96*c03c5b1cSMartin Matuska             Counting2[(BYTE)(c>>8) ]++;
97*c03c5b1cSMartin Matuska             Counting3[(BYTE)(c>>16)]++;
98*c03c5b1cSMartin Matuska             Counting4[       c>>24 ]++;
99*c03c5b1cSMartin Matuska             c = cached; cached = MEM_read32(ip); ip += 4;
100*c03c5b1cSMartin Matuska             Counting1[(BYTE) c     ]++;
101*c03c5b1cSMartin Matuska             Counting2[(BYTE)(c>>8) ]++;
102*c03c5b1cSMartin Matuska             Counting3[(BYTE)(c>>16)]++;
103*c03c5b1cSMartin Matuska             Counting4[       c>>24 ]++;
104*c03c5b1cSMartin Matuska             c = cached; cached = MEM_read32(ip); ip += 4;
105*c03c5b1cSMartin Matuska             Counting1[(BYTE) c     ]++;
106*c03c5b1cSMartin Matuska             Counting2[(BYTE)(c>>8) ]++;
107*c03c5b1cSMartin Matuska             Counting3[(BYTE)(c>>16)]++;
108*c03c5b1cSMartin Matuska             Counting4[       c>>24 ]++;
109*c03c5b1cSMartin Matuska             c = cached; cached = MEM_read32(ip); ip += 4;
110*c03c5b1cSMartin Matuska             Counting1[(BYTE) c     ]++;
111*c03c5b1cSMartin Matuska             Counting2[(BYTE)(c>>8) ]++;
112*c03c5b1cSMartin Matuska             Counting3[(BYTE)(c>>16)]++;
113*c03c5b1cSMartin Matuska             Counting4[       c>>24 ]++;
114*c03c5b1cSMartin Matuska         }
115*c03c5b1cSMartin Matuska         ip-=4;
116*c03c5b1cSMartin Matuska     }
117*c03c5b1cSMartin Matuska 
118*c03c5b1cSMartin Matuska     /* finish last symbols */
119*c03c5b1cSMartin Matuska     while (ip<iend) Counting1[*ip++]++;
120*c03c5b1cSMartin Matuska 
121*c03c5b1cSMartin Matuska     if (check) {   /* verify stats will fit into destination table */
122*c03c5b1cSMartin Matuska         U32 s; for (s=255; s>maxSymbolValue; s--) {
123*c03c5b1cSMartin Matuska             Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
124*c03c5b1cSMartin Matuska             if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
125*c03c5b1cSMartin Matuska     }   }
126*c03c5b1cSMartin Matuska 
127*c03c5b1cSMartin Matuska     {   U32 s;
128*c03c5b1cSMartin Matuska         if (maxSymbolValue > 255) maxSymbolValue = 255;
129*c03c5b1cSMartin Matuska         for (s=0; s<=maxSymbolValue; s++) {
130*c03c5b1cSMartin Matuska             count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
131*c03c5b1cSMartin Matuska             if (count[s] > max) max = count[s];
132*c03c5b1cSMartin Matuska     }   }
133*c03c5b1cSMartin Matuska 
134*c03c5b1cSMartin Matuska     while (!count[maxSymbolValue]) maxSymbolValue--;
135*c03c5b1cSMartin Matuska     *maxSymbolValuePtr = maxSymbolValue;
136*c03c5b1cSMartin Matuska     return (size_t)max;
137*c03c5b1cSMartin Matuska }
138*c03c5b1cSMartin Matuska 
139*c03c5b1cSMartin Matuska /* HIST_countFast_wksp() :
140*c03c5b1cSMartin Matuska  * Same as HIST_countFast(), but using an externally provided scratch buffer.
141*c03c5b1cSMartin Matuska  * `workSpace` is a writable buffer which must be 4-bytes aligned,
142*c03c5b1cSMartin Matuska  * `workSpaceSize` must be >= HIST_WKSP_SIZE
143*c03c5b1cSMartin Matuska  */
HIST_countFast_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,void * workSpace,size_t workSpaceSize)144*c03c5b1cSMartin Matuska size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
145*c03c5b1cSMartin Matuska                           const void* source, size_t sourceSize,
146*c03c5b1cSMartin Matuska                           void* workSpace, size_t workSpaceSize)
147*c03c5b1cSMartin Matuska {
148*c03c5b1cSMartin Matuska     if (sourceSize < 1500) /* heuristic threshold */
149*c03c5b1cSMartin Matuska         return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
150*c03c5b1cSMartin Matuska     if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
151*c03c5b1cSMartin Matuska     if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
152*c03c5b1cSMartin Matuska     return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
153*c03c5b1cSMartin Matuska }
154*c03c5b1cSMartin Matuska 
155*c03c5b1cSMartin Matuska /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
HIST_countFast(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize)156*c03c5b1cSMartin Matuska size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
157*c03c5b1cSMartin Matuska                      const void* source, size_t sourceSize)
158*c03c5b1cSMartin Matuska {
159*c03c5b1cSMartin Matuska     unsigned tmpCounters[HIST_WKSP_SIZE_U32];
160*c03c5b1cSMartin Matuska     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
161*c03c5b1cSMartin Matuska }
162*c03c5b1cSMartin Matuska 
163*c03c5b1cSMartin Matuska /* HIST_count_wksp() :
164*c03c5b1cSMartin Matuska  * Same as HIST_count(), but using an externally provided scratch buffer.
165*c03c5b1cSMartin Matuska  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
HIST_count_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,void * workSpace,size_t workSpaceSize)166*c03c5b1cSMartin Matuska size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
167*c03c5b1cSMartin Matuska                        const void* source, size_t sourceSize,
168*c03c5b1cSMartin Matuska                        void* workSpace, size_t workSpaceSize)
169*c03c5b1cSMartin Matuska {
170*c03c5b1cSMartin Matuska     if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
171*c03c5b1cSMartin Matuska     if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
172*c03c5b1cSMartin Matuska     if (*maxSymbolValuePtr < 255)
173*c03c5b1cSMartin Matuska         return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
174*c03c5b1cSMartin Matuska     *maxSymbolValuePtr = 255;
175*c03c5b1cSMartin Matuska     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
176*c03c5b1cSMartin Matuska }
177*c03c5b1cSMartin Matuska 
HIST_count(unsigned * count,unsigned * maxSymbolValuePtr,const void * src,size_t srcSize)178*c03c5b1cSMartin Matuska size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
179*c03c5b1cSMartin Matuska                  const void* src, size_t srcSize)
180*c03c5b1cSMartin Matuska {
181*c03c5b1cSMartin Matuska     unsigned tmpCounters[HIST_WKSP_SIZE_U32];
182*c03c5b1cSMartin Matuska     return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
183*c03c5b1cSMartin Matuska }
184