1 /*
2 datagen.c - compressible data generator test tool
3 Copyright (C) Yann Collet 2012-2015
4
5 GPL v2 License
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License along
18 with this program; if not, write to the Free Software Foundation, Inc.,
19 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
21 You can contact the author at :
22 - ZSTD source repository : https://github.com/Cyan4973/zstd
23 - Public forum : https://groups.google.com/forum/#!forum/lz4c
24 */
25
26 /**************************************
27 * Includes
28 **************************************/
29 #include <stdlib.h> /* malloc */
30 #include <stdio.h> /* FILE, fwrite */
31 #include <string.h> /* memcpy */
32
33
34 /**************************************
35 * Basic Types
36 **************************************/
37 #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
38 # include <stdint.h>
39 typedef uint8_t BYTE;
40 typedef uint16_t U16;
41 typedef uint32_t U32;
42 typedef int32_t S32;
43 typedef uint64_t U64;
44 #else
45 typedef unsigned char BYTE;
46 typedef unsigned short U16;
47 typedef unsigned int U32;
48 typedef signed int S32;
49 typedef unsigned long long U64;
50 #endif
51
52
53 /**************************************
54 * OS-specific Includes
55 **************************************/
56 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
57 # include <fcntl.h> /* _O_BINARY */
58 # include <io.h> /* _setmode, _isatty */
59 # define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
60 #else
61 # define SET_BINARY_MODE(file)
62 #endif
63
64
65 /**************************************
66 * Constants
67 **************************************/
68 #define KB *(1 <<10)
69
70 #define PRIME1 2654435761U
71 #define PRIME2 2246822519U
72
73
74 /**************************************
75 * Local types
76 **************************************/
77 #define LTLOG 13
78 #define LTSIZE (1<<LTLOG)
79 #define LTMASK (LTSIZE-1)
80 typedef BYTE litDistribTable[LTSIZE];
81
82
83
84
85 /*********************************************************
86 * Local Functions
87 *********************************************************/
88 #define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
RDG_rand(U32 * src)89 static unsigned int RDG_rand(U32* src)
90 {
91 U32 rand32 = *src;
92 rand32 *= PRIME1;
93 rand32 ^= PRIME2;
94 rand32 = RDG_rotl32(rand32, 13);
95 *src = rand32;
96 return rand32;
97 }
98
99
RDG_fillLiteralDistrib(litDistribTable lt,double ld)100 static void RDG_fillLiteralDistrib(litDistribTable lt, double ld)
101 {
102 U32 i = 0;
103 BYTE character = '0';
104 BYTE firstChar = '(';
105 BYTE lastChar = '}';
106
107 if (ld==0.0)
108 {
109 character = 0;
110 firstChar = 0;
111 lastChar =255;
112 }
113 while (i<LTSIZE)
114 {
115 U32 weight = (U32)((double)(LTSIZE - i) * ld) + 1;
116 U32 end;
117 if (weight + i > LTSIZE) weight = LTSIZE-i;
118 end = i + weight;
119 while (i < end) lt[i++] = character;
120 character++;
121 if (character > lastChar) character = firstChar;
122 }
123 }
124
125
RDG_genChar(U32 * seed,const litDistribTable lt)126 static BYTE RDG_genChar(U32* seed, const litDistribTable lt)
127 {
128 U32 id = RDG_rand(seed) & LTMASK;
129 return (lt[id]);
130 }
131
132
133 #define RDG_DICTSIZE (32 KB)
134 #define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767)
135 #define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
RDG_genBlock(void * buffer,size_t buffSize,size_t prefixSize,double matchProba,litDistribTable lt,unsigned * seedPtr)136 void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, litDistribTable lt, unsigned* seedPtr)
137 {
138 BYTE* buffPtr = (BYTE*)buffer;
139 const U32 matchProba32 = (U32)(32768 * matchProba);
140 size_t pos = prefixSize;
141 U32* seed = seedPtr;
142
143 /* special case */
144 while (matchProba >= 1.0)
145 {
146 size_t size0 = RDG_rand(seed) & 3;
147 size0 = (size_t)1 << (16 + size0 * 2);
148 size0 += RDG_rand(seed) & (size0-1); /* because size0 is power of 2*/
149 if (buffSize < pos + size0)
150 {
151 memset(buffPtr+pos, 0, buffSize-pos);
152 return;
153 }
154 memset(buffPtr+pos, 0, size0);
155 pos += size0;
156 buffPtr[pos-1] = RDG_genChar(seed, lt);
157 }
158
159 /* init */
160 if (pos==0) buffPtr[0] = RDG_genChar(seed, lt), pos=1;
161
162 /* Generate compressible data */
163 while (pos < buffSize)
164 {
165 /* Select : Literal (char) or Match (within 32K) */
166 if (RDG_RAND15BITS < matchProba32)
167 {
168 /* Copy (within 32K) */
169 size_t match;
170 size_t d;
171 int length = RDG_RANDLENGTH + 4;
172 U32 offset = RDG_RAND15BITS + 1;
173 if (offset > pos) offset = (U32)pos;
174 match = pos - offset;
175 d = pos + length;
176 if (d > buffSize) d = buffSize;
177 while (pos < d) buffPtr[pos++] = buffPtr[match++];
178 }
179 else
180 {
181 /* Literal (noise) */
182 size_t d;
183 size_t length = RDG_RANDLENGTH;
184 d = pos + length;
185 if (d > buffSize) d = buffSize;
186 while (pos < d) buffPtr[pos++] = RDG_genChar(seed, lt);
187 }
188 }
189 }
190
191
RDG_genBuffer(void * buffer,size_t size,double matchProba,double litProba,unsigned seed)192 void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
193 {
194 litDistribTable lt;
195 if (litProba==0.0) litProba = matchProba / 4.5;
196 RDG_fillLiteralDistrib(lt, litProba);
197 RDG_genBlock(buffer, size, 0, matchProba, lt, &seed);
198 }
199
200
201 #define RDG_BLOCKSIZE (128 KB)
RDG_genOut(unsigned long long size,double matchProba,double litProba,unsigned seed)202 void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
203 {
204 BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE];
205 U64 total = 0;
206 size_t genBlockSize = RDG_BLOCKSIZE;
207 litDistribTable lt;
208
209 /* init */
210 if (litProba==0.0) litProba = matchProba / 4.5;
211 RDG_fillLiteralDistrib(lt, litProba);
212 SET_BINARY_MODE(stdout);
213
214 /* Generate dict */
215 RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, lt, &seed);
216
217 /* Generate compressible data */
218 while (total < size)
219 {
220 RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, lt, &seed);
221 if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total);
222 total += genBlockSize;
223 fwrite(buff, 1, genBlockSize, stdout);
224 /* update dict */
225 memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE);
226 }
227 }
228