1 /* Saving history in a line-based input stream
2 *
3 * Contents:
4 * 1. The <ESL_RECORDER> object
5 * 2. Using the <ESL_RECORDER>
6 * 3. Internal (static) functions
7 * 4. Benchmark driver
8 * 5. Unit tests
9 * 6. Test driver
10 * 7. Examples
11 */
12 #include "esl_config.h"
13
14 #include <string.h>
15
16 #include "easel.h"
17 #include "esl_recorder.h"
18
19 static void linearray_reverse(ESL_RECORDER *rc, int pos, int n);
20 static int recorder_new_baseline(ESL_RECORDER *rc, int newbase);
21
22 /*****************************************************************
23 * 1. The <ESL_RECORDER> object
24 *****************************************************************/
25
26 /* Function: esl_recorder_Create()
27 * Synopsis: Create an <ESL_RECORDER>.
28 * Incept: SRE, Fri Dec 25 16:27:40 2009 [Casa de Gatos]
29 *
30 * Purpose: Allocate a new <ESL_RECORDER> that will read
31 * line-by-line from input stream <fp>, saving
32 * a history of up to <maxlines> lines.
33 *
34 * Returns: pointer to the new <ESL_RECORDER> on success.
35 *
36 * Throws: <NULL> on allocation failure.
37 */
38 ESL_RECORDER *
esl_recorder_Create(FILE * fp,int maxlines)39 esl_recorder_Create(FILE *fp, int maxlines)
40 {
41 ESL_RECORDER *rc = NULL;
42 int i;
43 int status;
44
45 ESL_ALLOC(rc, sizeof(ESL_RECORDER));
46 rc->fp = fp;
47 rc->line = NULL;
48 rc->nalloc = maxlines;
49 rc->lalloc = NULL;
50 rc->offset = NULL;
51 rc->nread = 0;
52 rc->ncurr = 0;
53 rc->baseline = 0;
54 rc->markline = -1;
55
56 ESL_ALLOC(rc->line, sizeof(char *) * rc->nalloc);
57 for (i = 0; i < rc->nalloc; i++) rc->line[i] = NULL;
58
59 ESL_ALLOC(rc->lalloc, sizeof(int) * rc->nalloc);
60 for (i = 0; i < rc->nalloc; i++) rc->lalloc[i] = 0;
61
62 ESL_ALLOC(rc->offset, sizeof(off_t) * rc->nalloc);
63 for (i = 0; i < rc->nalloc; i++) rc->offset[i] = 0;
64
65 return rc;
66
67 ERROR:
68 esl_recorder_Destroy(rc);
69 return NULL;
70 }
71
72 /* Function: esl_recorder_ResizeTo()
73 * Synopsis: Reallocate an <ESL_RECORDER> for a new <maxlines>
74 * Incept: SRE, Fri Dec 25 17:02:46 2009 [Casa de Gatos]
75 *
76 * Purpose: Reallocate the <ESL_RECORDER> <rc> to have a new
77 * window size <maxlines>.
78 *
79 * The new <maxlines> may be more or less than the previous
80 * window size for <rc>.
81 *
82 * Returns: <eslOK> on success.
83 *
84 * Throws: <eslEMEM> if (re-)allocation fails.
85 *
86 * <eslEINVAL> if the recorder has a marked line (for start
87 * of a block) and you try to shrink it so much that that
88 * marked line would be lost.
89 *
90 * <eslEINCONCEIVABLE> on any baseline resetting problem;
91 * this would have to be an internal error in the module.
92 *
93 * Note: We may have to repermute the line array, and reset its
94 * baseline, as follows.
95 *
96 * In the growth case: if the line array is out of order
97 * (circularly permuted) we must straighten it out, which
98 * means resetting the baseline.
99 * i.e. to grow 3 1 2 to nalloc=6, we need 1 2 3 x x x;
100 * simple reallocation to 3 1 2 x x x doesn't work,
101 * next read would make 3 4 2 x x x.
102 *
103 * In the shrinkage case: if the line array is in use beyond the
104 * new array size, we set a new baseline to keep as much of the
105 * old array as possible.
106 *
107 * i.e. for 6->3
108 * 1 2 3 x x x -> 1 2 3
109 * 1 2 3 4 x x -> 2 3 4 with new baseline=2.
110 * 4 5 0 1 2 3 -> 3 4 5 with new baseline=3
111 */
112 int
esl_recorder_ResizeTo(ESL_RECORDER * rc,int new_maxlines)113 esl_recorder_ResizeTo(ESL_RECORDER *rc, int new_maxlines)
114 {
115 int idx;
116 int newbase;
117 void *tmp;
118 int minlines;
119 int status;
120
121 if (new_maxlines == rc->nalloc) return eslOK;
122
123 if (new_maxlines > rc->nalloc) /* growth case */
124 {
125 if ((rc->nread - rc->baseline) / rc->nalloc != 0) /* array is permuted; reorder it */
126 {
127 newbase = ESL_MAX(rc->baseline, rc->nread - rc->nalloc);
128 status = recorder_new_baseline(rc, newbase);
129 if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly");
130 }
131 }
132 else /* shrinkage case */
133 {
134 /* check that the marked line (if any) will stay in window */
135 if (rc->markline >= 0)
136 {
137 minlines = rc->nread - rc->markline;
138 if (new_maxlines < minlines)
139 ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing marked line");
140 }
141 /* check that current line will stay in window */
142 minlines = rc->nread - rc->ncurr + 1;
143 if (new_maxlines < minlines)
144 ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing current line");
145
146 if (rc->nread - rc->baseline > new_maxlines) /* baseline needs to move up */
147 {
148 newbase = rc->nread - new_maxlines;
149 status = recorder_new_baseline(rc, newbase);
150 if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly");
151 }
152
153 for (idx = new_maxlines; idx < rc->nalloc; idx++)
154 if (rc->line[idx]) free(rc->line[idx]);
155 }
156
157 ESL_RALLOC(rc->line, tmp, sizeof(char *) * new_maxlines);
158 ESL_RALLOC(rc->lalloc, tmp, sizeof(int) * new_maxlines);
159 ESL_RALLOC(rc->offset, tmp, sizeof(off_t) * new_maxlines);
160 for (idx = rc->nalloc; idx < new_maxlines; idx++) /* no-op in shrinkage case */
161 {
162 rc->line[idx] = NULL;
163 rc->lalloc[idx] = 0;
164 rc->offset[idx] = 0;
165 }
166 rc->nalloc = new_maxlines;
167 return eslOK;
168
169 ERROR:
170 return status;
171 }
172
173 /* Function: esl_recorder_GetFirst()
174 * Synopsis: Returns the earliest linenumber stored.
175 * Incept: SRE, Sat Jan 2 13:18:38 2010 [Zaragoza]
176 *
177 * Purpose: Returns the earliest line number that is
178 * stored in the recorder <rc>.
179 */
180 int
esl_recorder_GetFirst(ESL_RECORDER * rc)181 esl_recorder_GetFirst(ESL_RECORDER *rc)
182 {
183 return (ESL_MAX(rc->baseline, rc->nread-rc->nalloc));
184 }
185
186 /* Function: esl_recorder_GetLast()
187 * Synopsis: Returns the furthest linenumber stored.
188 * Incept: SRE, Sat Jan 2 13:19:45 2010 [Zaragoza]
189 *
190 * Purpose: Returns the furthest line number that is
191 * stored in the recorder <rc> -- the furthest
192 * we have read into the input stream so far.
193 * (This is not necessarily the current
194 * position in the stream, if we have repositioned.)
195 */
196 int
esl_recorder_GetLast(ESL_RECORDER * rc)197 esl_recorder_GetLast(ESL_RECORDER *rc)
198 {
199 return (rc->nread-1);
200 }
201
202 /* Function: esl_recorder_GetCurrent()
203 * Synopsis: Returns the current line number.
204 * Incept: SRE, Sat Jan 2 13:21:13 2010 [Zaragoza]
205 *
206 * Purpose: Returns the current line number -- the
207 * line number most recently returned by
208 * a call to <esl_recorder_Read()).
209 */
210 int
esl_recorder_GetCurrent(ESL_RECORDER * rc)211 esl_recorder_GetCurrent(ESL_RECORDER *rc)
212 {
213 return (rc->ncurr-1);
214 }
215
216 /* Function: esl_recorder_GetNext()
217 * Synopsis: Returns the next line number.
218 * Incept: SRE, Sat Jan 2 13:21:13 2010 [Zaragoza]
219 *
220 * Purpose: Returns the next line number that would
221 * be read by a call to <esl_recorder_Read()).
222 */
223 int
esl_recorder_GetNext(ESL_RECORDER * rc)224 esl_recorder_GetNext(ESL_RECORDER *rc)
225 {
226 return (rc->ncurr);
227 }
228
229
230
231
232 /* Function: esl_recorder_Destroy()
233 * Synopsis: Frees an <ESL_RECORDER>.
234 * Incept: SRE, Fri Dec 25 16:30:14 2009 [Casa de Gatos]
235 *
236 * Purpose: Frees the <ESL_RECORDER> <rc>.
237 *
238 * Returns: (void).
239 */
240 void
esl_recorder_Destroy(ESL_RECORDER * rc)241 esl_recorder_Destroy(ESL_RECORDER *rc)
242 {
243 int i;
244
245 if (rc == NULL) return;
246
247 if (rc->offset) free(rc->offset);
248 if (rc->lalloc) free(rc->lalloc);
249 if (rc->line) {
250 for (i = 0; i < rc->nalloc; i++)
251 if (rc->line[i]) free(rc->line[i]);
252 free(rc->line);
253 }
254 free(rc);
255 return;
256 }
257 /*--------------- end, <ESL_RECORDER> object --------------------*/
258
259
260
261
262 /*****************************************************************
263 * 2. Using the <ESL_RECORDER>
264 *****************************************************************/
265
266 /* Function: esl_recorder_Read()
267 * Synopsis: Read next line of a stream through an <ESL_RECORDER>.
268 * Incept: SRE, Fri Dec 25 16:31:00 2009 [Casa de Gatos]
269 *
270 * Purpose: Read the next line of the input stream that the
271 * <ESL_RECORDER> <rc> is recording. Return a ptr to
272 * it in <*opt_line>. Note that the <ESL_RECORDER>
273 * deals with allocation and freeing of this line;
274 * if caller wants to keep it for something, it must
275 * make a copy immediately, because subsequent calls
276 * to <esl_recorder_*> functions may overwrite these
277 * internal memory buffers.
278 *
279 * Returns: <eslOK> on success.
280 * <eslEOF> if no more lines exist in the stream.
281 *
282 * Throws: <eslEMEM> on an allocation failure.
283 */
284 int
esl_recorder_Read(ESL_RECORDER * rc,char ** opt_line)285 esl_recorder_Read(ESL_RECORDER *rc, char **opt_line)
286 {
287 int idx = (rc->ncurr - rc->baseline) % rc->nalloc; /* index of line to read, in wrapped coords */
288 int status;
289
290 /* if currline <= lastline, we already have the line recorded;
291 * else we need to read a new one from <fp> */
292 if (rc->ncurr >= rc->nread)
293 {
294 /* if reading a new line would overwrite our marked start, grow */
295 if ( rc->markline >= 0 &&
296 ((rc->ncurr - rc->baseline) % rc->nalloc == ((rc->markline - rc->baseline) % rc->nalloc)))
297 {
298 int xtra = ESL_MAX(3, (rc->nalloc / 3));
299 status = esl_recorder_ResizeTo(rc, rc->nalloc + xtra);
300 if (status) goto ERROR;
301 idx = (rc->ncurr - rc->baseline) % rc->nalloc;
302 }
303
304 rc->offset[idx] = ftello(rc->fp);
305 status = esl_fgets(&(rc->line[idx]), &(rc->lalloc[idx]), rc->fp);
306 if (status) goto ERROR;
307 rc->nread++;
308 }
309
310 rc->ncurr++;
311 if (opt_line) *opt_line = rc->line[idx];
312 return eslOK;
313
314 ERROR:
315 if (opt_line) *opt_line = NULL;
316 return status;
317 }
318
319
320 /* Function: esl_recorder_Position()
321 * Synopsis: Reset the recorder to a new starting line position.
322 * Incept: SRE, Mon Dec 28 10:25:22 2009 [Casa de Gatos]
323 *
324 * Purpose: Reset the recorder <rc> to a new line position <linenumber>,
325 * starting from 0. The next call to <esl_recorder_Read()>
326 * will read this line.
327 *
328 * The <linenumber> can be ahead of the furthest line read
329 * by the recorder so far, in which case it calls
330 * <esl_recorder_Read()> until it reaches the proper
331 * position. This can result in a return code of <eslEOF>,
332 * if no such line exists in the stream.
333 *
334 * If the <linenumber> falls before (outside) the
335 * recorder's history window, an <eslEINVAL> exception is
336 * thrown.
337 *
338 * Returns: <eslOK> on success.
339 * <eslEOF> if <linenumber> is larger than current position
340 * in file, and the stream ends before line <linenumber> is
341 * reached.
342 *
343 * Throws: <eslEMEM> on allocation failure; this can only happen
344 * if <linenumber> is larger than current position in
345 * file, forcing <esl_recorder_Read()> calls to reach that
346 * line.
347 */
348 int
esl_recorder_Position(ESL_RECORDER * rc,int linenumber)349 esl_recorder_Position(ESL_RECORDER *rc, int linenumber)
350 {
351 /* The recorder stores lines MAX(baseline,<nread-nalloc>)..<nread>-1 */
352 int line0 = ESL_MAX(rc->baseline, rc->nread - rc->nalloc);
353 int status;
354
355 if (linenumber < line0)
356 ESL_EXCEPTION(eslEINVAL, "recorder's window is past that line");
357
358 if (linenumber >= rc->nread) {
359 while (rc->nread < linenumber)
360 if ((status = esl_recorder_Read(rc, NULL)) != eslOK) return status;
361 }
362
363 rc->ncurr = linenumber;
364 return eslOK;
365 }
366
367 /* Function: esl_recorder_MarkBlock()
368 * Synopsis: Mark first line to be saved in a block.
369 * Incept: SRE, Fri Jan 1 11:13:53 2010 [Magallon]
370 *
371 * Purpose: Mark line number <markline> (0..N-1) in a file being read
372 * through the <ESL_RECORDER> <rc> as the first line in a
373 * block of lines to be parsed later, when the end of
374 * the block is found.
375 *
376 * This mark makes sure that the <ESL_RECORDER> will keep
377 * the entire block of lines in memory, starting at or
378 * before the mark. When a mark is active,
379 * <esl_recorder_Read()> will reallocate and grow the
380 * recorder as necessary, rather than overwriting the mark.
381 *
382 * Returns: <eslOK> on success.
383 *
384 * Throws: <eslEINVAL> if the <markline> has already passed out
385 * of the recorder's memory.
386 */
387 int
esl_recorder_MarkBlock(ESL_RECORDER * rc,int markline)388 esl_recorder_MarkBlock(ESL_RECORDER *rc, int markline)
389 {
390 int line0 = ESL_MAX(rc->baseline, rc->nread - rc->nalloc);
391
392 if (markline < line0) ESL_EXCEPTION(eslEINVAL, "recorder window already passed marked line");
393 rc->markline = markline;
394 return eslOK;
395 }
396
397
398 /* Function: esl_recorder_UnmarkBlock()
399 * Synopsis: Remove a marked start of a block.
400 * Incept: SRE, Fri Jan 1 12:47:32 2010 [Magallon]
401 *
402 * Purpose: Release the mark in the <ESL_RECORDER> <rc>, if any.
403 *
404 * The recorder will no longer reallocate and grow to keep
405 * the marked line in memory.
406 *
407 * Returns: <eslOK> on success.
408 *
409 * Throws: (no abnormal error conditions)
410 */
411 int
esl_recorder_UnmarkBlock(ESL_RECORDER * rc)412 esl_recorder_UnmarkBlock(ESL_RECORDER *rc)
413 {
414 rc->markline = -1;
415 return eslOK;
416 }
417
418
419 /* Function: esl_recorder_GetBlock()
420 * Synopsis: Get a block of lines from a recorder, starting at the mark.
421 * Incept: SRE, Fri Jan 1 12:50:33 2010 [Magallon]
422 *
423 * Purpose: Get pointers into the internal memory arrays of the
424 * recorder <rc>, starting at the marked start of a block
425 * and ending at the most recently read line <rc->ncurr-1>,
426 * so you can parse a block of lines.
427 *
428 * Because these pointers are internally managed by the
429 * recorder <rc>, they should not be freed or reallocated
430 * or things like that. You should also avoid calling any
431 * <esl_recorder_*()> functions until you're done accessing
432 * these data, in case a function call alters the internal
433 * state of the object.
434 *
435 * If you do something that changes the contents of the
436 * lines (like strtok()'ing them), those changes will be
437 * preserved -- if you want to leave the original recorder
438 * data untouched and you need a temporary working copy of
439 * the data, you should make that copy yourself.
440 *
441 * Args: opt_lines : ptr to array of lines, indexed [0..*opt_nlines-1];
442 * starting with line <rc->markline> and ending with
443 * <rc->ncurr-1>, in order.
444 * opt_lalloc : array of memory allocations for each line
445 * opt_offset : array of offsets into input stream for start of each line
446 * opt_nlines : number of lines (minimally) valid in these arrays,
447 * starting from the mark and ending at the most recent
448 * line read.
449 *
450 * Returns: <eslOK> on success.
451 *
452 * Throws: (no abnormal error conditions)
453 */
454 int
esl_recorder_GetBlock(ESL_RECORDER * rc,char *** opt_lines,int ** opt_lalloc,off_t ** opt_offset,int * opt_nlines)455 esl_recorder_GetBlock(ESL_RECORDER *rc, char ***opt_lines, int **opt_lalloc, off_t **opt_offset, int *opt_nlines)
456 {
457 int idx0, idx1;
458 int status;
459
460 /* Everything from the markline to ncurr-1 must be in order and not
461 * permuted. If it isn't in proper order, then reorder the recorder
462 * to have ncurr-1 in last array position.
463 */
464 idx0 = (rc->markline - rc->baseline) % rc->nalloc;
465 idx1 = (rc->ncurr-1 - rc->baseline) % rc->nalloc;
466 if (idx0 > idx1)
467 {
468 if ((status = recorder_new_baseline(rc, rc->ncurr-rc->nalloc)) != eslOK) goto ERROR;
469 idx0 = (rc->markline - rc->baseline) % rc->nalloc;
470 }
471
472 if (opt_lines) *opt_lines = rc->line + idx0;
473 if (opt_lalloc) *opt_lalloc = rc->lalloc + idx0;
474 if (opt_offset) *opt_offset = rc->offset + idx0;
475 if (opt_nlines) *opt_nlines = rc->ncurr - rc->markline;
476 return eslOK;
477
478 ERROR:
479 if (opt_lines) *opt_lines = NULL;
480 if (opt_lalloc) *opt_lalloc = NULL;
481 if (opt_offset) *opt_offset = NULL;
482 if (opt_nlines) *opt_nlines = 0;
483 return status;
484 }
485 /*----------------- end, using ESL_RECORDER ---------------------*/
486
487
488
489
490 /*****************************************************************
491 * 3. Internal (static) functions
492 *****************************************************************/
493
494 /* linearray_reverse()
495 * In-place O(N) reversal of a subsection of the line array data,
496 * starting at i=pos, for n positions.
497 */
498 static void
linearray_reverse(ESL_RECORDER * rc,int pos,int n)499 linearray_reverse(ESL_RECORDER *rc, int pos, int n)
500 {
501 int i;
502 char *tmps;
503 int tmpi;
504 off_t tmpo;
505
506 /* the line array itself */
507 for (i = 0; i < n/2; i++)
508 {
509 tmps = rc->line[pos+n-i-1];
510 rc->line[pos+n-i-1] = rc->line[pos+i];
511 rc->line[pos+i] = tmps;
512 }
513
514 /* the line allocation array */
515 for (i = 0; i < n/2; i++)
516 {
517 tmpi = rc->lalloc[pos+n-i-1];
518 rc->lalloc[pos+n-i-1] = rc->lalloc[pos+i];
519 rc->lalloc[pos+i] = tmpi;
520 }
521
522 /* the offset array */
523 for (i = 0; i < n/2; i++)
524 {
525 tmpo = rc->offset[pos+n-i-1];
526 rc->offset[pos+n-i-1] = rc->offset[pos+i];
527 rc->offset[pos+i] = tmpo;
528 }
529 }
530
531 /* recorder_new_baseline()
532 * SRE, Fri Jan 1 09:00:55 2010 [Zaragoza]
533 *
534 * Set a ESL_RECORDER <rc> to a new baseline <newbase>, [0..N-1],
535 * greater than the previous baseline in the recorder.
536 *
537 * In general, must succeed, returning <eslOK>. If new baseline
538 * is <= old one, throws <eslEINVAL>, but you shouldn't do that.
539 *
540 * This is done in place in O(1) memory (no addiional or temporary
541 * allocation) and O(nalloc) time, using a trick: we can redo any
542 * circular permutation by no more than four in-place substring
543 * reversals:
544 *
545 * 456|123 -> 654|321 -> 65|4321 -> 56|1234
546 * (reversals) (new brkpt) (reversals)
547 *
548 * Some possible cases (all examples nalloc=7)
549 * 0 1 2 3 4 x x nread=5 [still filling the circle]
550 * 0 1 2 3 4 5 6 nread=7 [full, no wrapping yet]
551 * 7 8 2 3 4 5 6 nread=9 [wrapped]
552 * 7 8 9 10 11 12 13 nread=14 [back in order]
553 * 2 3 4 x x x x nread=5 baseline=2
554 * 2 3 4 5 6 7 8 nread=9 baseline=2
555 * 9 10 4 5 6 7 8 nread=11 baseline=2
556 * 9 10 11 12 13 14 15 nread=16 baseline=2
557 *
558 * By reversing the two substrings (lengths n1 and n2), we now have a
559 * complete string in reverse order. Our examples now look like:
560 * n1 n2
561 * 5 0 4 3 2 1 0 x x
562 * 7 0 6 5 4 3 2 1 0
563 * 2 5 8 7 6 5 4 3 2
564 * 7 0 13 12 11 10 9 8 7
565 * 3 0 4 3 2 x x x x
566 * 7 0 8 7 6 5 4 3 2
567 * 2 5 10 9 8 7 6 5 4
568 * 7 0 15 14 13 12 11 10 9
569 *
570 * After reversing two substrings calculated under the new
571 * baseline, our work is done. For example, if newbase=1,
572 * our first set of examples would look like:
573 * n1 n2
574 * 4 0 1 2 3 4 [0 x x]
575 * 6 0 1 2 3 4 5 6 [0]
576 * 1 6 8 2 3 4 5 6 7
577 * 6 1 8 9 10 11 12 13 7
578 * and for newbase=3, the second set look like:
579 * 2 0 3 4 [2 x x x x]
580 * 6 0 3 4 5 6 7 8 [2]
581 * 1 6 10 4 5 6 7 8 9
582 * 6 1 10 11 12 13 14 15 9
583 */
584 static int
recorder_new_baseline(ESL_RECORDER * rc,int newbase)585 recorder_new_baseline(ESL_RECORDER *rc, int newbase)
586 {
587 int n1, n2;
588
589 if (newbase < rc->baseline) ESL_EXCEPTION(eslEINVAL, "new baseline must be > old one");
590 if (newbase == rc->baseline) return eslOK;
591
592 n1 = (rc->nread - rc->baseline) % rc->nalloc;
593 n2 = ESL_MIN(rc->nread-rc->baseline, rc->nalloc) - n1;
594
595 if (n1>1) linearray_reverse(rc, 0, n1);
596 if (n2>1) linearray_reverse(rc, n1, n2);
597
598 n1 = (rc->nread - newbase) % rc->nalloc;
599 n2 = ESL_MIN(rc->nread - newbase, rc->nalloc) - n1;
600
601 if (n1>1) linearray_reverse(rc, 0, n1);
602 if (n2>1) linearray_reverse(rc, n1, n2);
603
604 rc->baseline = newbase;
605 return eslOK;
606 }
607 /*----------------- end, internal functions ---------------------*/
608
609
610
611
612
613 /*****************************************************************
614 * 4. Benchmark driver
615 *****************************************************************/
616 #ifdef eslRECORDER_BENCHMARK
617 /* gcc -O2 -std=gnu99 -DeslRECORDER_BENCHMARK -o esl_recorder_benchmark -I. esl_recorder.c esl_stopwatch.c esl_getopts.c easel.c
618 */
619 #include <stdio.h>
620
621 #include "easel.h"
622 #include "esl_getopts.h"
623 #include "esl_recorder.h"
624 #include "esl_stopwatch.h"
625
626 static ESL_OPTIONS options[] = {
627 /* name type default env range toggles reqs incomp help docgroup*/
628 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
629 { "-N", eslARG_INT, "1000", NULL, "n>0", NULL, NULL, NULL, "set recorder window size in lines", 0 },
630 { 0,0,0,0,0,0,0,0 },
631 };
632 static char usage[] = "[-options] <filename>";
633 static char banner[] = "benchmarking speed of ESL_RECORDER reading";
634
635 int
main(int argc,char ** argv)636 main(int argc, char **argv)
637 {
638 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
639 ESL_STOPWATCH *w = esl_stopwatch_Create();
640 ESL_RECORDER *rc = NULL;
641 char *filename = esl_opt_GetArg(go, 1);
642 int N = esl_opt_GetInteger(go, "-N");
643 FILE *fp = NULL;
644 char *buf = NULL;
645 int balloc = 0;
646 int status;
647
648 if ((fp = fopen(filename, "r")) == NULL) esl_fatal("no such file %s\n", filename);
649 rc = esl_recorder_Create(fp, N);
650 esl_stopwatch_Start(w);
651 while ((status = esl_recorder_Read(rc, &buf)) == eslOK);
652 esl_recorder_Destroy(rc);
653 fclose(fp);
654
655 esl_stopwatch_Stop(w);
656 esl_stopwatch_Display(stdout, w, "recorder time: ");
657
658 if ((fp = fopen(filename, "r")) == NULL) esl_fatal("no such file %s\n", filename);
659 esl_stopwatch_Start(w);
660 while ((status = esl_fgets(&buf, &balloc, fp)) == eslOK);
661 free(buf);
662 fclose(fp);
663
664 esl_stopwatch_Stop(w);
665 esl_stopwatch_Display(stdout, w, "esl_fgets() time: ");
666
667 esl_stopwatch_Destroy(w);
668 esl_getopts_Destroy(go);
669 return 0;
670 }
671 #endif /*eslRECORDER_BENCHMARK*/
672 /*---------------- end, benchmark driver ------------------------*/
673
674
675
676 /*****************************************************************
677 * 5. Unit tests
678 *****************************************************************/
679 #ifdef eslRECORDER_TESTDRIVE
680 #include "esl_random.h"
681
682 static void
generate_testfile(ESL_RANDOMNESS * rng,char * tmpfile,int * is_data,int nlines)683 generate_testfile(ESL_RANDOMNESS *rng, char *tmpfile, int *is_data, int nlines)
684 {
685 char *msg = "esl_recorder:: test file generator failed";
686 FILE *fp = NULL;
687 int in_block = esl_rnd_Roll(rng, 2); /* TRUE | FALSE */
688 int nblock = 1 + esl_rnd_Roll(rng, 10); /* 1..10 */
689 int i;
690
691 if (esl_tmpfile_named(tmpfile, &fp) != eslOK) esl_fatal(msg);
692 for (i = 0; i < nlines; i++)
693 {
694 is_data[i] = in_block ? TRUE : FALSE;
695 fprintf(fp, "%c%d\n", (in_block ? '#' : ' '), i);
696 if (--nblock == 0) {
697 in_block = ! in_block;
698 nblock = 1 + esl_rnd_Roll(rng, 10); /* 1..10 */
699 }
700 }
701 fclose(fp);
702 }
703
704 static void
utest_basic(char * tmpfile,int N)705 utest_basic(char *tmpfile, int N)
706 {
707 char *msg = "esl_recorder:: basic unit test failed";
708 ESL_RECORDER *rc = NULL;
709 FILE *fp = NULL;
710 int i;
711 char *buf;
712
713 if ((fp = fopen(tmpfile, "r")) == NULL) esl_fatal(msg);
714 if ((rc = esl_recorder_Create(fp, N)) == NULL) esl_fatal(msg);
715 for (i = 0; i < N; i++)
716 {
717 if (esl_recorder_Read(rc, &buf) != eslOK) esl_fatal(msg);
718 if (atoi(buf+1) != i) esl_fatal(msg);
719 }
720 if (esl_recorder_Read(rc, &buf) != eslEOF) esl_fatal(msg);
721
722 if (buf != NULL) esl_fatal(msg);
723
724 if (esl_recorder_Position(rc, 0) != eslOK) esl_fatal(msg);
725 for (i = 0; i < N; i++)
726 {
727 if (esl_recorder_Read(rc, &buf) != eslOK) esl_fatal(msg);
728 if (atoi(buf+1) != i) esl_fatal(msg);
729 }
730 if (esl_recorder_Read(rc, &buf) != eslEOF) esl_fatal(msg);
731
732 fclose(fp);
733 esl_recorder_Destroy(rc);
734 }
735
736
737 static void
utest_grow(char * tmpfile,int N)738 utest_grow(char *tmpfile, int N)
739 {
740 char *msg = "esl_recorder:: grow unit test failed";
741 ESL_RECORDER *rc = NULL;
742 FILE *fp = NULL;
743 int i;
744 char *buf;
745
746 if ((fp = fopen(tmpfile, "r")) == NULL) esl_fatal(msg);
747 if ((rc = esl_recorder_Create(fp, 3)) == NULL) esl_fatal(msg);
748 for (i = 0; i < 4; i++)
749 {
750 if (esl_recorder_Read(rc, &buf) != eslOK) esl_fatal(msg);
751 if (atoi(buf+1) != i) esl_fatal(msg);
752 }
753 /* recorder has now wrapped: contains lines 3 1 2 */
754
755 if (esl_recorder_Position(rc, 0) != eslEINVAL) esl_fatal(msg);
756 if (esl_recorder_Position(rc, 1) != eslOK) esl_fatal(msg);
757 if (esl_recorder_ResizeTo(rc, 6) != eslOK) esl_fatal(msg);
758 /* now 1 2 3 x x x */
759
760 for (i = 1; i < N; i++)
761 {
762 if (esl_recorder_Read(rc, &buf) != eslOK) esl_fatal(msg);
763 if (atoi(buf+1) != i) esl_fatal(msg);
764 }
765 if (esl_recorder_Read(rc, &buf) != eslEOF) esl_fatal(msg);
766
767 fclose(fp);
768 esl_recorder_Destroy(rc);
769 }
770
771 static void
utest_grow2(char * tmpfile,int N)772 utest_grow2(char *tmpfile, int N)
773 {
774 char *msg = "esl_recorder:: grow2 unit test failed";
775 ESL_RECORDER *rc = NULL;
776 FILE *fp = NULL;
777 int i;
778 char *buf;
779
780 if (N < 5) esl_fatal(msg); /* need at least this for this test */
781
782 if ((fp = fopen(tmpfile, "r")) == NULL) esl_fatal(msg);
783 if ((rc = esl_recorder_Create(fp, 3)) == NULL) esl_fatal(msg);
784 for (i = 0; i < 4; i++)
785 {
786 if (esl_recorder_Read(rc, &buf) != eslOK) esl_fatal(msg);
787 if (atoi(buf+1) != i) esl_fatal(msg);
788 }
789 /* recorder has now wrapped: contains lines 3 1 2 */
790
791 if (esl_recorder_ResizeTo(rc, 6) != eslOK) esl_fatal(msg);
792 /* recorder should now have reset baseline: 1 2 3 x x x */
793
794 if (esl_recorder_Read(rc, &buf) != eslOK) esl_fatal(msg);
795 if (atoi(buf+1) != 4) esl_fatal(msg);
796 /* now it should have 1 2 3 4 x x (and not 3 4 2 x x x, or x 1 2 3 4 x, for example) */
797
798 if (esl_recorder_Position(rc, 0) != eslEINVAL) esl_fatal(msg);
799 if (esl_recorder_Position(rc, 1) != eslOK) esl_fatal(msg);
800 for (i = 1; i < N; i++)
801 {
802 if (esl_recorder_Read(rc, &buf) != eslOK) esl_fatal(msg);
803 if (atoi(buf+1) != i) esl_fatal(msg);
804 }
805 if (esl_recorder_Read(rc, &buf) != eslEOF) esl_fatal(msg);
806
807 fclose(fp);
808 esl_recorder_Destroy(rc);
809 }
810
811 static void
utest_shrink(char * tmpfile,int N)812 utest_shrink(char *tmpfile, int N)
813 {
814 char *msg = "esl_recorder:: shrink unit test failed";
815 ESL_RECORDER *rc = NULL;
816 FILE *fp = NULL;
817 int i;
818 char *buf;
819
820 if ((fp = fopen(tmpfile, "r")) == NULL) esl_fatal(msg);
821 if ((rc = esl_recorder_Create(fp, 6)) == NULL) esl_fatal(msg);
822 for (i = 0; i < 7; i++)
823 {
824 if (esl_recorder_Read(rc, &buf) != eslOK) esl_fatal(msg);
825 if (atoi(buf+1) != i) esl_fatal(msg);
826 }
827 /* recorder has now wrapped: contains lines 6 1 2 3 4 5 */
828
829 if (esl_recorder_ResizeTo(rc, 3) != eslOK) esl_fatal(msg);
830 /* now it's 6 4 5 */
831 if (esl_recorder_Position(rc, 4) != eslOK) esl_fatal(msg);
832
833 for (i = 4; i < N; i++)
834 {
835 if (esl_recorder_Read(rc, &buf) != eslOK) esl_fatal(msg);
836 if (atoi(buf+1) != i) esl_fatal(msg);
837 }
838 if (esl_recorder_Read(rc, &buf) != eslEOF) esl_fatal(msg);
839
840 fclose(fp);
841 esl_recorder_Destroy(rc);
842 }
843
844 static void
utest_block(ESL_RANDOMNESS * rng,char * tmpfile,int * is_data,int N)845 utest_block(ESL_RANDOMNESS *rng, char *tmpfile, int *is_data, int N)
846 {
847 char *msg = "esl_recorder:: block unit test failed";
848 ESL_RECORDER *rc = NULL;
849 FILE *fp = NULL;
850 int linenumber = 0; /* where we should be in the file */
851 int max_reposition = 2;
852 int max_realloc = 2;
853 int *nseen1 = NULL; /* # of times we Read() each line */
854 int *nseen2 = NULL; /* # of times we see each line in a block */
855 int minalloc;
856 int roll;
857 char *buf;
858 char **block;
859 int from;
860 int n,i;
861 int status = eslOK;
862
863 if ((fp = fopen(tmpfile, "r")) == NULL) esl_fatal(msg);
864 roll = 1+esl_rnd_Roll(rng, N+1); /* 1..N+1 */
865 if ((rc = esl_recorder_Create(fp, roll)) == NULL) esl_fatal(msg);
866
867 if ((nseen1 = malloc(sizeof(int) * N)) == NULL) esl_fatal(msg);
868 if ((nseen2 = malloc(sizeof(int) * N)) == NULL) esl_fatal(msg);
869 for (i = 0; i < N; i++) nseen1[i] = 0;
870 for (i = 0; i < N; i++) nseen2[i] = 0;
871
872 while (status == eslOK)
873 {
874 /* skip nondata lines (no # prefix) */
875 do {
876 if (esl_recorder_Read(rc, &buf) == eslEOF) goto DONE;
877 if (atoi(buf+1) != linenumber) esl_fatal(msg);
878 if (esl_recorder_GetCurrent(rc) != linenumber) esl_fatal(msg);
879 nseen1[linenumber]++;
880 linenumber++;
881 } while (*buf != '#');
882
883 /* read block */
884 from = esl_recorder_GetCurrent(rc);
885 esl_recorder_MarkBlock(rc, from);
886 do {
887 if ((status = esl_recorder_Read(rc, &buf)) == eslEOF) break;
888 if (atoi(buf+1) != linenumber) esl_fatal(msg);
889 if (esl_recorder_GetCurrent(rc) != linenumber) esl_fatal(msg);
890 nseen1[linenumber]++;
891 linenumber++;
892 } while (*buf == '#');
893
894 /* get the block */
895 esl_recorder_GetBlock(rc, &block, NULL, NULL, &n);
896 if (status == eslOK) n--;
897
898 /* check the block */
899 for (i = 0; i < n; i++)
900 {
901 if (atoi(block[i]+1) != from+i) esl_fatal(msg);
902 nseen2[from+i]++;
903 }
904
905 /* unmark it */
906 esl_recorder_UnmarkBlock(rc);
907
908 /* some fraction of the time, reposition randomly */
909 if (status == eslOK && max_reposition && (roll = esl_rnd_Roll(rng, 5)) == 0)
910 {
911 linenumber = esl_recorder_GetFirst(rc) +
912 esl_rnd_Roll(rng, esl_recorder_GetLast(rc) - esl_recorder_GetFirst(rc) + 1);
913 if (esl_recorder_Position(rc, linenumber) != eslOK) esl_fatal(msg);
914 max_reposition--;
915 }
916
917 /* some fraction of the time, shrink the allocation */
918 if (status == eslOK && max_realloc && (roll = esl_rnd_Roll(rng, 5)) == 0)
919 {
920 /* must keep at least nread-ncurr+1 lines, to keep curr line in window */
921 minalloc = rc->nread-rc->ncurr+1;
922 roll = minalloc + esl_rnd_Roll(rng, rc->nalloc-minalloc+1);
923 if (esl_recorder_ResizeTo(rc, roll) != eslOK) esl_fatal(msg);
924 max_realloc--;
925 }
926 }
927
928 DONE:
929 /* we're EOF. We should be sitting on the last line. */
930 if (esl_recorder_GetCurrent(rc) != N-1) esl_fatal(msg);
931
932 /* We should have Read() every line at least once. */
933 for (i = 0; i < N; i++)
934 if (! nseen1[i]) esl_fatal(msg);
935
936 /* In reading blocks, we should have seen each "data" line at least
937 * once; non-data lines, not at all.
938 */
939 for (i = 0; i < N; i++) {
940 if ( is_data[i] && ! nseen2[i]) esl_fatal(msg);
941 if (! is_data[i] && nseen2[i]) esl_fatal(msg);
942 }
943
944 fclose(fp);
945 esl_recorder_Destroy(rc);
946 free(nseen1);
947 free(nseen2);
948 }
949
950 #endif /*eslRECORDER_TESTDRIVE*/
951 /*------------------- end, unit tests ---------------------------*/
952
953
954 /*****************************************************************
955 * 6. Test driver
956 *****************************************************************/
957 #ifdef eslRECORDER_TESTDRIVE
958 /* gcc -O2 -std=gnu99 -DeslRECORDER_TESTDRIVE -o esl_recorder_utest -I. esl_recorder.c esl_stopwatch.c esl_getopts.c esl_random.c easel.c -lm
959 */
960 #include <stdio.h>
961
962 #include "easel.h"
963 #include "esl_getopts.h"
964 #include "esl_random.h"
965 #include "esl_recorder.h"
966
967 static ESL_OPTIONS options[] = {
968 /* name type default env range toggles reqs incomp help docgroup*/
969 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
970 { "-s", eslARG_INT, "42", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0 },
971 { "-v", eslARG_NONE, NULL, NULL, NULL, NULL, NULL, NULL, "more verbose output", 0 },
972 { "-N", eslARG_INT, "200", NULL, NULL, NULL, NULL, NULL, "number of lines per test file", 0 },
973 { "-F", eslARG_INT, "100", NULL, NULL, NULL, NULL, NULL, "number of test files", 0 },
974
975 { 0,0,0,0,0,0,0,0 },
976 };
977 static char usage[] = "[-options] <filename>";
978 static char banner[] = "test driver for ESL_RECORDER";
979
980 int
main(int argc,char ** argv)981 main(int argc, char **argv)
982 {
983 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
984 ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
985 char template[13]= "esltmpXXXXXX";
986 char tmpfile[13];
987 int N = esl_opt_GetInteger(go, "-N");
988 int nfiles = esl_opt_GetInteger(go, "-F");
989 int *is_data = malloc(sizeof(int) * N);
990
991 esl_exception_SetHandler(&esl_nonfatal_handler);
992
993 if (esl_opt_GetBoolean(go, "-v")) {
994 printf("random number seed: %" PRIu32 "\n", esl_randomness_GetSeed(rng));
995 }
996
997 while (nfiles--)
998 {
999 strcpy(tmpfile, template);
1000 generate_testfile(rng, tmpfile, is_data, N);
1001
1002 utest_basic (tmpfile, N);
1003 utest_grow (tmpfile, N);
1004 utest_grow2 (tmpfile, N);
1005 utest_shrink(tmpfile, N);
1006 utest_block (rng, tmpfile, is_data, N);
1007
1008 remove(tmpfile);
1009 }
1010
1011 free(is_data);
1012 esl_getopts_Destroy(go);
1013 esl_randomness_Destroy(rng);
1014
1015 printf("ok\n");
1016 return 0;
1017 }
1018 #endif /*eslRECORDER_TESTDRIVE*/
1019 /*------------------- end, test driver --------------------------*/
1020
1021
1022
1023 /*****************************************************************
1024 * 7. Examples
1025 *****************************************************************/
1026 #ifdef eslRECORDER_EXAMPLE
1027 /* gcc -g -Wall -std=gnu99 -DeslRECORDER_EXAMPLE -o esl_recorder_example -I. esl_recorder.c easel.c
1028 */
1029
1030 #include "easel.h"
1031 #include "esl_recorder.h"
1032
1033 int
main(int argc,char ** argv)1034 main(int argc, char **argv)
1035 {
1036 FILE *fp = stdin;
1037 ESL_RECORDER *rc = esl_recorder_Create(fp, 10);
1038 char *line;
1039 int i;
1040 int status;
1041
1042 printf("\nFirst time:\n");
1043 for (i = 0; i < 10; i++)
1044 {
1045 if ((status = esl_recorder_Read(rc, &line)) != eslOK) break; /* watch for EOF */
1046 fputs(line, stdout);
1047 }
1048
1049 esl_recorder_Position(rc, 0); /* rewind to start */
1050
1051 printf("\nOne more time:\n");
1052 for (i = 0; i < 10; i++)
1053 {
1054 if ((status = esl_recorder_Read(rc, &line)) != eslOK) break;
1055 fputs(line, stdout);
1056 }
1057
1058 esl_recorder_Destroy(rc);
1059 return 0;
1060 }
1061 #endif /*eslRECORDER_EXAMPLE*/
1062
1063
1064 #ifdef eslRECORDER_EXAMPLE2
1065 /* gcc -g -Wall -std=gnu99 -DeslRECORDER_EXAMPLE2 -o esl_recorder_example2 -I. esl_recorder.c easel.c
1066 */
1067
1068 #include <stdio.h>
1069 #include <ctype.h>
1070
1071 #include "easel.h"
1072 #include "esl_recorder.h"
1073
1074 static int
is_data(char * s)1075 is_data(char *s)
1076 {
1077 if (*s == '#') return TRUE;
1078 // for (; *s; s++) if (! isspace(*s)) return TRUE;
1079 return FALSE;
1080 }
1081
1082 int
main(int argc,char ** argv)1083 main(int argc, char **argv)
1084 {
1085 FILE *fp = fopen(argv[1], "r");
1086 ESL_RECORDER *rc = esl_recorder_Create(fp, 10);
1087 char *line;
1088 char **block;
1089 int n;
1090 int nblocks=0;
1091 int i;
1092 int status = eslOK;
1093
1094 while (status == eslOK)
1095 {
1096 /* skip lines without # */
1097 do {
1098 if (esl_recorder_Read(rc, &line) == eslEOF) goto DONE;
1099 } while (! is_data(line));
1100
1101 /* read block */
1102 esl_recorder_MarkBlock(rc, esl_recorder_GetCurrent(rc));
1103 do {
1104 status = esl_recorder_Read(rc, &line);
1105 } while (status == eslOK && is_data(line));
1106
1107 /* get the block */
1108 esl_recorder_GetBlock(rc, &block, NULL, NULL, &n);
1109 nblocks++;
1110
1111 /* if we EOF'ed, n lines of block ended with the EOF;
1112 * else, last line was a blank line
1113 */
1114 if (status == eslOK) n--;
1115
1116 /* show it (exclusive of the trailing blank line */
1117 printf("BLOCK %d\n", nblocks);
1118 for (i = 0; i < n; i++)
1119 printf("line %4d: %s", i+1, block[i]);
1120 printf("\n\n");
1121
1122 /* unmark it */
1123 esl_recorder_UnmarkBlock(rc);
1124 }
1125
1126 DONE:
1127 esl_recorder_Destroy(rc);
1128 fclose(fp);
1129 return 0;
1130 }
1131 #endif /*eslRECORDER_EXAMPLE2*/
1132 /*------------------ end, example main() ------------------------*/
1133
1134