1 /*
2  * file_input_raw
3  *
4  * See copyright in copyright.h and the accompanying file COPYING
5  *
6  */
7 
8 #include <dieharder/libdieharder.h>
9 
10 /*
11  * This is a wrapper for getting random numbers in RAW (binary integer)
12  * format from a file.  A raw file has no headers -- it is presumed to be
13  * a pure bit stream.  We therefore have to read it in a page at a time,
14  * realloc its required storage as needed, and count as we go.  In this
15  * way we can figure out if e.g. a compressed file is sufficiently
16  * "random" to make it likely that the compression is good and so on.
17  */
18 
19 static unsigned long int file_input_raw_get (void *vstate);
20 static double file_input_raw_get_double (void *vstate);
21 static void file_input_raw_set (void *vstate, unsigned long int s);
22 
23 /*
24  * This typedef struct file_input_state_t struct contains the data
25  * maintained on the operation of the file_input rng, and can be accessed
26  * via rng->state->whatever
27  *
28  *   fp is the file pointer
29  *   flen is the number of rands in the file (filecount)
30  *   rptr is a count of rands returned since last rewind
31  *   rtot is a count of rands returned since the file was opened or it
32  *      was deliberately reset.
33  *   rewind_cnt is a count of how many times the file was rewound since
34  *      its last open.
35  *
36  * file_input_state_t is defined in libdieharder.h currently and shared with
37  * file_input_raw.c
38  */
39 
file_input_raw_get(void * vstate)40 static unsigned long int file_input_raw_get(void *vstate)
41 {
42 
43  file_input_state_t *state = (file_input_state_t *) vstate;
44  unsigned int iret;
45 
46  /*
47   * Check that the file is open (via file_input_raw_set()).
48   */
49  if(state->fp != NULL) {
50 
51    /*
52     * Read in the next random number from the file
53     */
54    if(fread(&iret,sizeof(uint),1,state->fp) != 1){
55      fprintf(stderr,"# file_input_raw(): Error.  This cannot happen.\n");
56      exit(0);
57    }
58 
59    /*
60     * Success. iret is presumably valid and ready to return.  Increment the
61     * counter of rands read so far.
62     */
63    state->rptr++;
64    state->rtot++;
65    if(verbose){
66      fprintf(stdout,"# file_input() %u: %u/%u -> %u\n",(uint)state->rtot,(uint)state->rptr,(uint)state->flen,(uint)iret);
67    }
68 
69    /*
70     * This (with seed s == 0) basically rewinds the file and resets
71     * state->rptr to 0, but rtot keeps running,
72     */
73    if(state->flen && state->rptr == state->flen){
74      /*
75       * Reset/rewind the file
76       */
77      file_input_raw_set(vstate, 0);
78    }
79    return(iret);
80  } else {
81    fprintf(stderr,"Error: %s not open.  Exiting.\n", filename);
82    exit(0);
83  }
84 
85 }
86 
file_input_raw_get_double(void * vstate)87 static double file_input_raw_get_double (void *vstate)
88 {
89   return file_input_raw_get (vstate) / (double) UINT_MAX;
90 }
91 
92 
93 /*
94  * file_input_raw_set() is very simple.  If the file hasn't been opened
95  * yet, it opens it and sets flen and rptr to zero.  Otherwise it
96  * rewinds it and sets rptr to zero.  Typically it is only called one
97  * time per file by the user, although it will be called once per read
98  * page by file_input_raw_get().
99  */
100 
file_input_raw_set(void * vstate,unsigned long int s)101 static void file_input_raw_set (void *vstate, unsigned long int s)
102 {
103 
104  static uint first=1;
105  struct stat sbuf;
106 
107  file_input_state_t *state = (file_input_state_t *) vstate;
108 
109  if(verbose == D_FILE_INPUT_RAW || verbose == D_ALL){
110    fprintf(stdout,"# file_input_raw(): entering file_input_raw_set\n");
111    fprintf(stdout,"# file_input_raw(): state->fp = %p, seed = %lu\n",(void*) state->fp,s);
112  }
113 
114  /*
115   * Get and set the file length, check to make sure the file exists,
116   * whatever...
117   */
118  if(first){
119    if(verbose){
120      fprintf(stdout,"# file_input_raw(): entering file_input_raw_set 1st call.\n");
121    }
122 
123    /*
124     * This clears an obscure bug in FreeBSD reported by Lucius Windschuh,
125     * lwindschuh@googlemail.com, I think.  Otherwise it should be
126     * harmless.  It just initializes state->fp to 0 so that the file
127     * correctly opens later.
128     */
129    state->fp = NULL;
130 
131    if(stat(filename, &sbuf)){
132      if(errno == EBADF){
133        fprintf(stderr,"# file_input_raw(): Error -- file descriptor %s bad.\n",filename);
134        exit(0);
135      }
136    }
137    /*
138     * Is this a regular file?  If so, turn its byte length into a 32 bit uint
139     * length.
140     */
141    if(S_ISREG(sbuf.st_mode)){
142      /*
143       * sbuf.st_size should be type off_t, which is automatically u_int64_t
144       * if FILE_OFFSET_BITS is set to 64, which it is.  So this should be
145       * able to manage large file sizes.   Similarly, in the struct
146       * file_input_state_t flen should be type off_t.  This means that
147       * filecount has to be off_t as well.
148       */
149      state->flen = sbuf.st_size/sizeof(uint);
150      filecount = state->flen;
151      if (filecount < 16) {
152        fprintf(stderr,"# file_input_raw(): Error -- file %s is too small.\n",filename);
153        exit(0);
154      }
155    } else if (S_ISDIR(sbuf.st_mode)){
156      fprintf(stderr,"# file_input_raw(): Error -- path %s is a directory.\n",filename);
157      exit(0);
158    } else {
159       /*
160        * This is neither a file nor a directory, so we will not
161        * even try to seek.
162        */
163      state->flen = 0;
164    }
165 
166    /*
167     * This segment is executed only one time when the file is FIRST opened.
168     */
169    first = 0;
170  }
171 
172  /*
173   * We use the "seed" to determine whether or not to reopen or
174   * rewind.  A seed s == 0 for an open file means rewind; a seed
175   * of anything else forces a close (resetting rewind_cnt) followed
176   * by a reopen.
177   */
178  if(state->fp && s ) {
179    if(verbose == D_FILE_INPUT || verbose == D_ALL){
180      fprintf(stdout,"# file_input(): Closing/reopening/resetting %s\n",filename);
181    }
182    fclose(state->fp);
183    state->fp = NULL;
184  }
185 
186  if (state->fp == NULL){
187    if(verbose == D_FILE_INPUT_RAW || verbose == D_ALL){
188      fprintf(stdout,"# file_input_raw(): Opening %s\n", filename);
189    }
190 
191    /*
192     * If we get here, the file exists, is a regular file, and we know its
193     * length.  We can now open it.  The test catches all other conditions
194     * that might keep the file from reading, e.g. permissions.
195     */
196    if ((state->fp = fopen(filename,"r")) == NULL) {
197      fprintf(stderr,"# file_input_raw(): Error: Cannot open %s, exiting.\n", filename);
198      exit(0);
199    }
200 
201    /*
202     * OK, so if we get here, the file is open.
203     */
204    if(verbose == D_FILE_INPUT_RAW || verbose == D_ALL){
205      fprintf(stdout,"# file_input_raw(): Opened %s for the first time.\n", filename);
206      fprintf(stdout,"# file_input_raw(): state->fp is %8p, file contains %u unsigned integers.\n",(void*) state->fp,(uint)state->flen);
207    }
208    state->rptr = 0;  /* No rands read yet */
209    /*
210     * We only reset the entire file if there is a nonzero seed passed in.
211     * This clears both rtot and rewind_cnt in addition to rptr.
212     */
213    if(s) {
214      state->rtot = 0;
215      state->rewind_cnt = 0;
216    }
217 
218  } else {
219    /*
220     * Rewinding seriously reduces the size of the space being explored.
221     * On the other hand, bombing a test also sucks, especially in a long
222     * -a(ll) run.  Therefore we rewind every time our file pointer reaches
223     * the end of the file or call gsl_rng_set(rng,0).
224     */
225    if(state->flen && state->rptr >= state->flen){
226      rewind(state->fp);
227      state->rptr = 0;
228      state->rewind_cnt++;
229      if(verbose == D_FILE_INPUT_RAW || verbose == D_ALL){
230        fprintf(stderr,"# file_input_raw(): Rewinding %s at rtot = %u\n", filename,(uint) state->rtot);
231        fprintf(stderr,"# file_input_raw(): Rewind count = %u, resetting rptr = %u\n",state->rewind_cnt,(uint) state->rptr);
232      }
233    } else {
234      return;
235    }
236 
237  }
238 
239 }
240 
241 static const gsl_rng_type file_input_raw_type =
242 {"file_input_raw",                        /* name */
243  UINT_MAX,                    /* RAND_MAX */
244  0,                           /* RAND_MIN */
245  sizeof (file_input_state_t),
246  &file_input_raw_set,
247  &file_input_raw_get,
248  &file_input_raw_get_double};
249 
250 const gsl_rng_type *gsl_rng_file_input_raw = &file_input_raw_type;
251