1 /*
2     datagen.c - compressible data generator test tool
3     Copyright (C) Yann Collet 2012-2015
4 
5     GPL v2 License
6 
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 2 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License along
18     with this program; if not, write to the Free Software Foundation, Inc.,
19     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 
21     You can contact the author at :
22    - ZSTD source repository : https://github.com/Cyan4973/zstd
23    - Public forum : https://groups.google.com/forum/#!forum/lz4c
24 */
25 
26 /**************************************
27 *  Includes
28 **************************************/
29 #include <stdlib.h>    /* malloc */
30 #include <stdio.h>     /* FILE, fwrite */
31 #include <string.h>    /* memcpy */
32 
33 
34 /**************************************
35 *  Basic Types
36 **************************************/
37 #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
38 # include <stdint.h>
39   typedef  uint8_t BYTE;
40   typedef uint16_t U16;
41   typedef uint32_t U32;
42   typedef  int32_t S32;
43   typedef uint64_t U64;
44 #else
45   typedef unsigned char       BYTE;
46   typedef unsigned short      U16;
47   typedef unsigned int        U32;
48   typedef   signed int        S32;
49   typedef unsigned long long  U64;
50 #endif
51 
52 
53 /**************************************
54 *  OS-specific Includes
55 **************************************/
56 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
57 #  include <fcntl.h>   /* _O_BINARY */
58 #  include <io.h>      /* _setmode, _isatty */
59 #  define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
60 #else
61 #  define SET_BINARY_MODE(file)
62 #endif
63 
64 
65 /**************************************
66 *  Constants
67 **************************************/
68 #define KB *(1 <<10)
69 
70 #define PRIME1   2654435761U
71 #define PRIME2   2246822519U
72 
73 
74 /**************************************
75 *  Local types
76 **************************************/
77 #define LTLOG 13
78 #define LTSIZE (1<<LTLOG)
79 #define LTMASK (LTSIZE-1)
80 typedef BYTE litDistribTable[LTSIZE];
81 
82 
83 
84 
85 /*********************************************************
86 *  Local Functions
87 *********************************************************/
88 #define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
RDG_rand(U32 * src)89 static unsigned int RDG_rand(U32* src)
90 {
91     U32 rand32 = *src;
92     rand32 *= PRIME1;
93     rand32 ^= PRIME2;
94     rand32  = RDG_rotl32(rand32, 13);
95     *src = rand32;
96     return rand32;
97 }
98 
99 
RDG_fillLiteralDistrib(litDistribTable lt,double ld)100 static void RDG_fillLiteralDistrib(litDistribTable lt, double ld)
101 {
102     U32 i = 0;
103     BYTE character = '0';
104     BYTE firstChar = '(';
105     BYTE lastChar = '}';
106 
107     if (ld==0.0)
108     {
109         character = 0;
110         firstChar = 0;
111         lastChar =255;
112     }
113     while (i<LTSIZE)
114     {
115         U32 weight = (U32)((double)(LTSIZE - i) * ld) + 1;
116         U32 end;
117         if (weight + i > LTSIZE) weight = LTSIZE-i;
118         end = i + weight;
119         while (i < end) lt[i++] = character;
120         character++;
121         if (character > lastChar) character = firstChar;
122     }
123 }
124 
125 
RDG_genChar(U32 * seed,const litDistribTable lt)126 static BYTE RDG_genChar(U32* seed, const litDistribTable lt)
127 {
128     U32 id = RDG_rand(seed) & LTMASK;
129     return (lt[id]);
130 }
131 
132 
133 #define RDG_DICTSIZE    (32 KB)
134 #define RDG_RAND15BITS  ((RDG_rand(seed) >> 3) & 32767)
135 #define RDG_RANDLENGTH  ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
RDG_genBlock(void * buffer,size_t buffSize,size_t prefixSize,double matchProba,litDistribTable lt,unsigned * seedPtr)136 void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, litDistribTable lt, unsigned* seedPtr)
137 {
138     BYTE* buffPtr = (BYTE*)buffer;
139     const U32 matchProba32 = (U32)(32768 * matchProba);
140     size_t pos = prefixSize;
141     U32* seed = seedPtr;
142 
143     /* special case */
144     while (matchProba >= 1.0)
145     {
146         size_t size0 = RDG_rand(seed) & 3;
147         size0  = (size_t)1 << (16 + size0 * 2);
148         size0 += RDG_rand(seed) & (size0-1);   /* because size0 is power of 2*/
149         if (buffSize < pos + size0)
150         {
151             memset(buffPtr+pos, 0, buffSize-pos);
152             return;
153         }
154         memset(buffPtr+pos, 0, size0);
155         pos += size0;
156         buffPtr[pos-1] = RDG_genChar(seed, lt);
157     }
158 
159     /* init */
160     if (pos==0) buffPtr[0] = RDG_genChar(seed, lt), pos=1;
161 
162     /* Generate compressible data */
163     while (pos < buffSize)
164     {
165         /* Select : Literal (char) or Match (within 32K) */
166         if (RDG_RAND15BITS < matchProba32)
167         {
168             /* Copy (within 32K) */
169             size_t match;
170             size_t d;
171             int length = RDG_RANDLENGTH + 4;
172             U32 offset = RDG_RAND15BITS + 1;
173             if (offset > pos) offset = (U32)pos;
174             match = pos - offset;
175             d = pos + length;
176             if (d > buffSize) d = buffSize;
177             while (pos < d) buffPtr[pos++] = buffPtr[match++];
178         }
179         else
180         {
181             /* Literal (noise) */
182             size_t d;
183             size_t length = RDG_RANDLENGTH;
184             d = pos + length;
185             if (d > buffSize) d = buffSize;
186             while (pos < d) buffPtr[pos++] = RDG_genChar(seed, lt);
187         }
188     }
189 }
190 
191 
RDG_genBuffer(void * buffer,size_t size,double matchProba,double litProba,unsigned seed)192 void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
193 {
194     litDistribTable lt;
195     if (litProba==0.0) litProba = matchProba / 4.5;
196     RDG_fillLiteralDistrib(lt, litProba);
197     RDG_genBlock(buffer, size, 0, matchProba, lt, &seed);
198 }
199 
200 
201 #define RDG_BLOCKSIZE (128 KB)
RDG_genOut(unsigned long long size,double matchProba,double litProba,unsigned seed)202 void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
203 {
204     BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE];
205     U64 total = 0;
206     size_t genBlockSize = RDG_BLOCKSIZE;
207     litDistribTable lt;
208 
209     /* init */
210     if (litProba==0.0) litProba = matchProba / 4.5;
211     RDG_fillLiteralDistrib(lt, litProba);
212     SET_BINARY_MODE(stdout);
213 
214     /* Generate dict */
215     RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, lt, &seed);
216 
217     /* Generate compressible data */
218     while (total < size)
219     {
220         RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, lt, &seed);
221         if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total);
222         total += genBlockSize;
223         fwrite(buff, 1, genBlockSize, stdout);
224         /* update dict */
225         memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE);
226     }
227 }
228