xref: /freebsd/sys/contrib/zstd/lib/compress/hist.c (revision 0f743729)
10f743729SConrad Meyer /* ******************************************************************
20f743729SConrad Meyer    hist : Histogram functions
30f743729SConrad Meyer    part of Finite State Entropy project
40f743729SConrad Meyer    Copyright (C) 2013-present, Yann Collet.
50f743729SConrad Meyer 
60f743729SConrad Meyer    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
70f743729SConrad Meyer 
80f743729SConrad Meyer    Redistribution and use in source and binary forms, with or without
90f743729SConrad Meyer    modification, are permitted provided that the following conditions are
100f743729SConrad Meyer    met:
110f743729SConrad Meyer 
120f743729SConrad Meyer        * Redistributions of source code must retain the above copyright
130f743729SConrad Meyer    notice, this list of conditions and the following disclaimer.
140f743729SConrad Meyer        * Redistributions in binary form must reproduce the above
150f743729SConrad Meyer    copyright notice, this list of conditions and the following disclaimer
160f743729SConrad Meyer    in the documentation and/or other materials provided with the
170f743729SConrad Meyer    distribution.
180f743729SConrad Meyer 
190f743729SConrad Meyer    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
200f743729SConrad Meyer    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
210f743729SConrad Meyer    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
220f743729SConrad Meyer    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
230f743729SConrad Meyer    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
240f743729SConrad Meyer    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
250f743729SConrad Meyer    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
260f743729SConrad Meyer    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
270f743729SConrad Meyer    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
280f743729SConrad Meyer    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
290f743729SConrad Meyer    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
300f743729SConrad Meyer 
310f743729SConrad Meyer     You can contact the author at :
320f743729SConrad Meyer     - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
330f743729SConrad Meyer     - Public forum : https://groups.google.com/forum/#!forum/lz4c
340f743729SConrad Meyer ****************************************************************** */
350f743729SConrad Meyer 
360f743729SConrad Meyer /* --- dependencies --- */
370f743729SConrad Meyer #include "mem.h"             /* U32, BYTE, etc. */
380f743729SConrad Meyer #include "debug.h"           /* assert, DEBUGLOG */
390f743729SConrad Meyer #include "error_private.h"   /* ERROR */
400f743729SConrad Meyer #include "hist.h"
410f743729SConrad Meyer 
420f743729SConrad Meyer 
430f743729SConrad Meyer /* --- Error management --- */
440f743729SConrad Meyer unsigned HIST_isError(size_t code) { return ERR_isError(code); }
450f743729SConrad Meyer 
460f743729SConrad Meyer /*-**************************************************************
470f743729SConrad Meyer  *  Histogram functions
480f743729SConrad Meyer  ****************************************************************/
490f743729SConrad Meyer unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
500f743729SConrad Meyer                            const void* src, size_t srcSize)
510f743729SConrad Meyer {
520f743729SConrad Meyer     const BYTE* ip = (const BYTE*)src;
530f743729SConrad Meyer     const BYTE* const end = ip + srcSize;
540f743729SConrad Meyer     unsigned maxSymbolValue = *maxSymbolValuePtr;
550f743729SConrad Meyer     unsigned largestCount=0;
560f743729SConrad Meyer 
570f743729SConrad Meyer     memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
580f743729SConrad Meyer     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
590f743729SConrad Meyer 
600f743729SConrad Meyer     while (ip<end) {
610f743729SConrad Meyer         assert(*ip <= maxSymbolValue);
620f743729SConrad Meyer         count[*ip++]++;
630f743729SConrad Meyer     }
640f743729SConrad Meyer 
650f743729SConrad Meyer     while (!count[maxSymbolValue]) maxSymbolValue--;
660f743729SConrad Meyer     *maxSymbolValuePtr = maxSymbolValue;
670f743729SConrad Meyer 
680f743729SConrad Meyer     {   U32 s;
690f743729SConrad Meyer         for (s=0; s<=maxSymbolValue; s++)
700f743729SConrad Meyer             if (count[s] > largestCount) largestCount = count[s];
710f743729SConrad Meyer     }
720f743729SConrad Meyer 
730f743729SConrad Meyer     return largestCount;
740f743729SConrad Meyer }
750f743729SConrad Meyer 
760f743729SConrad Meyer 
770f743729SConrad Meyer /* HIST_count_parallel_wksp() :
780f743729SConrad Meyer  * store histogram into 4 intermediate tables, recombined at the end.
790f743729SConrad Meyer  * this design makes better use of OoO cpus,
800f743729SConrad Meyer  * and is noticeably faster when some values are heavily repeated.
810f743729SConrad Meyer  * But it needs some additional workspace for intermediate tables.
820f743729SConrad Meyer  * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
830f743729SConrad Meyer  * @return : largest histogram frequency,
840f743729SConrad Meyer  *           or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
850f743729SConrad Meyer static size_t HIST_count_parallel_wksp(
860f743729SConrad Meyer                                 unsigned* count, unsigned* maxSymbolValuePtr,
870f743729SConrad Meyer                                 const void* source, size_t sourceSize,
880f743729SConrad Meyer                                 unsigned checkMax,
890f743729SConrad Meyer                                 unsigned* const workSpace)
900f743729SConrad Meyer {
910f743729SConrad Meyer     const BYTE* ip = (const BYTE*)source;
920f743729SConrad Meyer     const BYTE* const iend = ip+sourceSize;
930f743729SConrad Meyer     unsigned maxSymbolValue = *maxSymbolValuePtr;
940f743729SConrad Meyer     unsigned max=0;
950f743729SConrad Meyer     U32* const Counting1 = workSpace;
960f743729SConrad Meyer     U32* const Counting2 = Counting1 + 256;
970f743729SConrad Meyer     U32* const Counting3 = Counting2 + 256;
980f743729SConrad Meyer     U32* const Counting4 = Counting3 + 256;
990f743729SConrad Meyer 
1000f743729SConrad Meyer     memset(workSpace, 0, 4*256*sizeof(unsigned));
1010f743729SConrad Meyer 
1020f743729SConrad Meyer     /* safety checks */
1030f743729SConrad Meyer     if (!sourceSize) {
1040f743729SConrad Meyer         memset(count, 0, maxSymbolValue + 1);
1050f743729SConrad Meyer         *maxSymbolValuePtr = 0;
1060f743729SConrad Meyer         return 0;
1070f743729SConrad Meyer     }
1080f743729SConrad Meyer     if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
1090f743729SConrad Meyer 
1100f743729SConrad Meyer     /* by stripes of 16 bytes */
1110f743729SConrad Meyer     {   U32 cached = MEM_read32(ip); ip += 4;
1120f743729SConrad Meyer         while (ip < iend-15) {
1130f743729SConrad Meyer             U32 c = cached; cached = MEM_read32(ip); ip += 4;
1140f743729SConrad Meyer             Counting1[(BYTE) c     ]++;
1150f743729SConrad Meyer             Counting2[(BYTE)(c>>8) ]++;
1160f743729SConrad Meyer             Counting3[(BYTE)(c>>16)]++;
1170f743729SConrad Meyer             Counting4[       c>>24 ]++;
1180f743729SConrad Meyer             c = cached; cached = MEM_read32(ip); ip += 4;
1190f743729SConrad Meyer             Counting1[(BYTE) c     ]++;
1200f743729SConrad Meyer             Counting2[(BYTE)(c>>8) ]++;
1210f743729SConrad Meyer             Counting3[(BYTE)(c>>16)]++;
1220f743729SConrad Meyer             Counting4[       c>>24 ]++;
1230f743729SConrad Meyer             c = cached; cached = MEM_read32(ip); ip += 4;
1240f743729SConrad Meyer             Counting1[(BYTE) c     ]++;
1250f743729SConrad Meyer             Counting2[(BYTE)(c>>8) ]++;
1260f743729SConrad Meyer             Counting3[(BYTE)(c>>16)]++;
1270f743729SConrad Meyer             Counting4[       c>>24 ]++;
1280f743729SConrad Meyer             c = cached; cached = MEM_read32(ip); ip += 4;
1290f743729SConrad Meyer             Counting1[(BYTE) c     ]++;
1300f743729SConrad Meyer             Counting2[(BYTE)(c>>8) ]++;
1310f743729SConrad Meyer             Counting3[(BYTE)(c>>16)]++;
1320f743729SConrad Meyer             Counting4[       c>>24 ]++;
1330f743729SConrad Meyer         }
1340f743729SConrad Meyer         ip-=4;
1350f743729SConrad Meyer     }
1360f743729SConrad Meyer 
1370f743729SConrad Meyer     /* finish last symbols */
1380f743729SConrad Meyer     while (ip<iend) Counting1[*ip++]++;
1390f743729SConrad Meyer 
1400f743729SConrad Meyer     if (checkMax) {   /* verify stats will fit into destination table */
1410f743729SConrad Meyer         U32 s; for (s=255; s>maxSymbolValue; s--) {
1420f743729SConrad Meyer             Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
1430f743729SConrad Meyer             if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
1440f743729SConrad Meyer     }   }
1450f743729SConrad Meyer 
1460f743729SConrad Meyer     {   U32 s;
1470f743729SConrad Meyer         if (maxSymbolValue > 255) maxSymbolValue = 255;
1480f743729SConrad Meyer         for (s=0; s<=maxSymbolValue; s++) {
1490f743729SConrad Meyer             count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
1500f743729SConrad Meyer             if (count[s] > max) max = count[s];
1510f743729SConrad Meyer     }   }
1520f743729SConrad Meyer 
1530f743729SConrad Meyer     while (!count[maxSymbolValue]) maxSymbolValue--;
1540f743729SConrad Meyer     *maxSymbolValuePtr = maxSymbolValue;
1550f743729SConrad Meyer     return (size_t)max;
1560f743729SConrad Meyer }
1570f743729SConrad Meyer 
1580f743729SConrad Meyer /* HIST_countFast_wksp() :
1590f743729SConrad Meyer  * Same as HIST_countFast(), but using an externally provided scratch buffer.
1600f743729SConrad Meyer  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
1610f743729SConrad Meyer size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
1620f743729SConrad Meyer                           const void* source, size_t sourceSize,
1630f743729SConrad Meyer                           unsigned* workSpace)
1640f743729SConrad Meyer {
1650f743729SConrad Meyer     if (sourceSize < 1500) /* heuristic threshold */
1660f743729SConrad Meyer         return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
1670f743729SConrad Meyer     return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
1680f743729SConrad Meyer }
1690f743729SConrad Meyer 
1700f743729SConrad Meyer /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
1710f743729SConrad Meyer size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
1720f743729SConrad Meyer                      const void* source, size_t sourceSize)
1730f743729SConrad Meyer {
1740f743729SConrad Meyer     unsigned tmpCounters[HIST_WKSP_SIZE_U32];
1750f743729SConrad Meyer     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
1760f743729SConrad Meyer }
1770f743729SConrad Meyer 
1780f743729SConrad Meyer /* HIST_count_wksp() :
1790f743729SConrad Meyer  * Same as HIST_count(), but using an externally provided scratch buffer.
1800f743729SConrad Meyer  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
1810f743729SConrad Meyer size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
1820f743729SConrad Meyer                  const void* source, size_t sourceSize, unsigned* workSpace)
1830f743729SConrad Meyer {
1840f743729SConrad Meyer     if (*maxSymbolValuePtr < 255)
1850f743729SConrad Meyer         return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
1860f743729SConrad Meyer     *maxSymbolValuePtr = 255;
1870f743729SConrad Meyer     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
1880f743729SConrad Meyer }
1890f743729SConrad Meyer 
1900f743729SConrad Meyer size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
1910f743729SConrad Meyer                  const void* src, size_t srcSize)
1920f743729SConrad Meyer {
1930f743729SConrad Meyer     unsigned tmpCounters[HIST_WKSP_SIZE_U32];
1940f743729SConrad Meyer     return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
1950f743729SConrad Meyer }
196