1 /*
2 * file_input_raw
3 *
4 * See copyright in copyright.h and the accompanying file COPYING
5 *
6 */
7
8 #include <dieharder/libdieharder.h>
9
10 /*
11 * This is a wrapper for getting random numbers in RAW (binary integer)
12 * format from a file. A raw file has no headers -- it is presumed to be
13 * a pure bit stream. We therefore have to read it in a page at a time,
14 * realloc its required storage as needed, and count as we go. In this
15 * way we can figure out if e.g. a compressed file is sufficiently
16 * "random" to make it likely that the compression is good and so on.
17 */
18
19 static unsigned long int file_input_raw_get (void *vstate);
20 static double file_input_raw_get_double (void *vstate);
21 static void file_input_raw_set (void *vstate, unsigned long int s);
22
23 /*
24 * This typedef struct file_input_state_t struct contains the data
25 * maintained on the operation of the file_input rng, and can be accessed
26 * via rng->state->whatever
27 *
28 * fp is the file pointer
29 * flen is the number of rands in the file (filecount)
30 * rptr is a count of rands returned since last rewind
31 * rtot is a count of rands returned since the file was opened or it
32 * was deliberately reset.
33 * rewind_cnt is a count of how many times the file was rewound since
34 * its last open.
35 *
36 * file_input_state_t is defined in libdieharder.h currently and shared with
37 * file_input_raw.c
38 */
39
file_input_raw_get(void * vstate)40 static unsigned long int file_input_raw_get(void *vstate)
41 {
42
43 file_input_state_t *state = (file_input_state_t *) vstate;
44 unsigned int iret;
45
46 /*
47 * Check that the file is open (via file_input_raw_set()).
48 */
49 if(state->fp != NULL) {
50
51 /*
52 * Read in the next random number from the file
53 */
54 if(fread(&iret,sizeof(uint),1,state->fp) != 1){
55 fprintf(stderr,"# file_input_raw(): Error. This cannot happen.\n");
56 exit(0);
57 }
58
59 /*
60 * Success. iret is presumably valid and ready to return. Increment the
61 * counter of rands read so far.
62 */
63 state->rptr++;
64 state->rtot++;
65 if(verbose){
66 fprintf(stdout,"# file_input() %u: %u/%u -> %u\n",(uint)state->rtot,(uint)state->rptr,(uint)state->flen,(uint)iret);
67 }
68
69 /*
70 * This (with seed s == 0) basically rewinds the file and resets
71 * state->rptr to 0, but rtot keeps running,
72 */
73 if(state->flen && state->rptr == state->flen){
74 /*
75 * Reset/rewind the file
76 */
77 file_input_raw_set(vstate, 0);
78 }
79 return(iret);
80 } else {
81 fprintf(stderr,"Error: %s not open. Exiting.\n", filename);
82 exit(0);
83 }
84
85 }
86
file_input_raw_get_double(void * vstate)87 static double file_input_raw_get_double (void *vstate)
88 {
89 return file_input_raw_get (vstate) / (double) UINT_MAX;
90 }
91
92
93 /*
94 * file_input_raw_set() is very simple. If the file hasn't been opened
95 * yet, it opens it and sets flen and rptr to zero. Otherwise it
96 * rewinds it and sets rptr to zero. Typically it is only called one
97 * time per file by the user, although it will be called once per read
98 * page by file_input_raw_get().
99 */
100
file_input_raw_set(void * vstate,unsigned long int s)101 static void file_input_raw_set (void *vstate, unsigned long int s)
102 {
103
104 static uint first=1;
105 struct stat sbuf;
106
107 file_input_state_t *state = (file_input_state_t *) vstate;
108
109 if(verbose == D_FILE_INPUT_RAW || verbose == D_ALL){
110 fprintf(stdout,"# file_input_raw(): entering file_input_raw_set\n");
111 fprintf(stdout,"# file_input_raw(): state->fp = %p, seed = %lu\n",(void*) state->fp,s);
112 }
113
114 /*
115 * Get and set the file length, check to make sure the file exists,
116 * whatever...
117 */
118 if(first){
119 if(verbose){
120 fprintf(stdout,"# file_input_raw(): entering file_input_raw_set 1st call.\n");
121 }
122
123 /*
124 * This clears an obscure bug in FreeBSD reported by Lucius Windschuh,
125 * lwindschuh@googlemail.com, I think. Otherwise it should be
126 * harmless. It just initializes state->fp to 0 so that the file
127 * correctly opens later.
128 */
129 state->fp = NULL;
130
131 if(stat(filename, &sbuf)){
132 if(errno == EBADF){
133 fprintf(stderr,"# file_input_raw(): Error -- file descriptor %s bad.\n",filename);
134 exit(0);
135 }
136 }
137 /*
138 * Is this a regular file? If so, turn its byte length into a 32 bit uint
139 * length.
140 */
141 if(S_ISREG(sbuf.st_mode)){
142 /*
143 * sbuf.st_size should be type off_t, which is automatically u_int64_t
144 * if FILE_OFFSET_BITS is set to 64, which it is. So this should be
145 * able to manage large file sizes. Similarly, in the struct
146 * file_input_state_t flen should be type off_t. This means that
147 * filecount has to be off_t as well.
148 */
149 state->flen = sbuf.st_size/sizeof(uint);
150 filecount = state->flen;
151 if (filecount < 16) {
152 fprintf(stderr,"# file_input_raw(): Error -- file %s is too small.\n",filename);
153 exit(0);
154 }
155 } else if (S_ISDIR(sbuf.st_mode)){
156 fprintf(stderr,"# file_input_raw(): Error -- path %s is a directory.\n",filename);
157 exit(0);
158 } else {
159 /*
160 * This is neither a file nor a directory, so we will not
161 * even try to seek.
162 */
163 state->flen = 0;
164 }
165
166 /*
167 * This segment is executed only one time when the file is FIRST opened.
168 */
169 first = 0;
170 }
171
172 /*
173 * We use the "seed" to determine whether or not to reopen or
174 * rewind. A seed s == 0 for an open file means rewind; a seed
175 * of anything else forces a close (resetting rewind_cnt) followed
176 * by a reopen.
177 */
178 if(state->fp && s ) {
179 if(verbose == D_FILE_INPUT || verbose == D_ALL){
180 fprintf(stdout,"# file_input(): Closing/reopening/resetting %s\n",filename);
181 }
182 fclose(state->fp);
183 state->fp = NULL;
184 }
185
186 if (state->fp == NULL){
187 if(verbose == D_FILE_INPUT_RAW || verbose == D_ALL){
188 fprintf(stdout,"# file_input_raw(): Opening %s\n", filename);
189 }
190
191 /*
192 * If we get here, the file exists, is a regular file, and we know its
193 * length. We can now open it. The test catches all other conditions
194 * that might keep the file from reading, e.g. permissions.
195 */
196 if ((state->fp = fopen(filename,"r")) == NULL) {
197 fprintf(stderr,"# file_input_raw(): Error: Cannot open %s, exiting.\n", filename);
198 exit(0);
199 }
200
201 /*
202 * OK, so if we get here, the file is open.
203 */
204 if(verbose == D_FILE_INPUT_RAW || verbose == D_ALL){
205 fprintf(stdout,"# file_input_raw(): Opened %s for the first time.\n", filename);
206 fprintf(stdout,"# file_input_raw(): state->fp is %8p, file contains %u unsigned integers.\n",(void*) state->fp,(uint)state->flen);
207 }
208 state->rptr = 0; /* No rands read yet */
209 /*
210 * We only reset the entire file if there is a nonzero seed passed in.
211 * This clears both rtot and rewind_cnt in addition to rptr.
212 */
213 if(s) {
214 state->rtot = 0;
215 state->rewind_cnt = 0;
216 }
217
218 } else {
219 /*
220 * Rewinding seriously reduces the size of the space being explored.
221 * On the other hand, bombing a test also sucks, especially in a long
222 * -a(ll) run. Therefore we rewind every time our file pointer reaches
223 * the end of the file or call gsl_rng_set(rng,0).
224 */
225 if(state->flen && state->rptr >= state->flen){
226 rewind(state->fp);
227 state->rptr = 0;
228 state->rewind_cnt++;
229 if(verbose == D_FILE_INPUT_RAW || verbose == D_ALL){
230 fprintf(stderr,"# file_input_raw(): Rewinding %s at rtot = %u\n", filename,(uint) state->rtot);
231 fprintf(stderr,"# file_input_raw(): Rewind count = %u, resetting rptr = %u\n",state->rewind_cnt,(uint) state->rptr);
232 }
233 } else {
234 return;
235 }
236
237 }
238
239 }
240
241 static const gsl_rng_type file_input_raw_type =
242 {"file_input_raw", /* name */
243 UINT_MAX, /* RAND_MAX */
244 0, /* RAND_MIN */
245 sizeof (file_input_state_t),
246 &file_input_raw_set,
247 &file_input_raw_get,
248 &file_input_raw_get_double};
249
250 const gsl_rng_type *gsl_rng_file_input_raw = &file_input_raw_type;
251