1 /* Saving history in a line-based input stream
2  *
3  * Contents:
4  *   1. The <ESL_RECORDER> object
5  *   2. Using the <ESL_RECORDER>
6  *   3. Internal (static) functions
7  *   4. Benchmark driver
8  *   5. Unit tests
9  *   6. Test driver
10  *   7. Examples
11  */
12 #include "esl_config.h"
13 
14 #include <string.h>
15 
16 #include "easel.h"
17 #include "esl_recorder.h"
18 
19 static void linearray_reverse(ESL_RECORDER *rc, int pos, int n);
20 static int  recorder_new_baseline(ESL_RECORDER *rc, int newbase);
21 
22 /*****************************************************************
23  * 1. The <ESL_RECORDER> object
24  *****************************************************************/
25 
26 /* Function:  esl_recorder_Create()
27  * Synopsis:  Create an <ESL_RECORDER>.
28  * Incept:    SRE, Fri Dec 25 16:27:40 2009 [Casa de Gatos]
29  *
30  * Purpose:   Allocate a new <ESL_RECORDER> that will read
31  *            line-by-line from input stream <fp>, saving
32  *            a history of up to <maxlines> lines.
33  *
34  * Returns:   pointer to the new <ESL_RECORDER> on success.
35  *
36  * Throws:    <NULL> on allocation failure.
37  */
38 ESL_RECORDER *
esl_recorder_Create(FILE * fp,int maxlines)39 esl_recorder_Create(FILE *fp, int maxlines)
40 {
41   ESL_RECORDER *rc = NULL;
42   int           i;
43   int           status;
44 
45   ESL_ALLOC(rc, sizeof(ESL_RECORDER));
46   rc->fp         = fp;
47   rc->line       = NULL;
48   rc->nalloc     = maxlines;
49   rc->lalloc     = NULL;
50   rc->offset     = NULL;
51   rc->nread      = 0;
52   rc->ncurr      = 0;
53   rc->baseline   = 0;
54   rc->markline   = -1;
55 
56   ESL_ALLOC(rc->line,   sizeof(char *) * rc->nalloc);
57   for (i = 0; i < rc->nalloc; i++) rc->line[i]   = NULL;
58 
59   ESL_ALLOC(rc->lalloc, sizeof(int)    * rc->nalloc);
60   for (i = 0; i < rc->nalloc; i++) rc->lalloc[i] = 0;
61 
62   ESL_ALLOC(rc->offset, sizeof(off_t)  * rc->nalloc);
63   for (i = 0; i < rc->nalloc; i++) rc->offset[i] = 0;
64 
65   return rc;
66 
67  ERROR:
68   esl_recorder_Destroy(rc);
69   return NULL;
70 }
71 
72 /* Function:  esl_recorder_ResizeTo()
73  * Synopsis:  Reallocate an <ESL_RECORDER> for a new <maxlines>
74  * Incept:    SRE, Fri Dec 25 17:02:46 2009 [Casa de Gatos]
75  *
76  * Purpose:   Reallocate the <ESL_RECORDER> <rc> to have a new
77  *            window size <maxlines>.
78  *
79  *            The new <maxlines> may be more or less than the previous
80  *            window size for <rc>.
81  *
82  * Returns:   <eslOK> on success.
83  *
84  * Throws:    <eslEMEM> if (re-)allocation fails.
85  *
86  *            <eslEINVAL> if the recorder has a marked line (for start
87  *            of a block) and you try to shrink it so much that that
88  *            marked line would be lost.
89  *
90  *            <eslEINCONCEIVABLE> on any baseline resetting problem;
91  *            this would have to be an internal error in the module.
92  *
93  * Note:      We may have to repermute the line array, and reset its
94  *            baseline, as follows.
95  *
96  *            In the growth case: if the line array is out of order
97  *            (circularly permuted) we must straighten it out, which
98  *            means resetting the baseline.
99  *            i.e. to grow 3 1 2 to nalloc=6, we need 1 2 3 x x x;
100  *            simple reallocation to 3 1 2 x x x doesn't work,
101  *            next read would make 3 4 2 x x x.
102  *
103  *            In the shrinkage case: if the line array is in use beyond the
104  *            new array size, we set a new baseline to keep as much of the
105  *            old array as possible.
106  *
107  *            i.e. for 6->3
108  *            1 2 3 x x x -> 1 2 3
109  *            1 2 3 4 x x -> 2 3 4 with new baseline=2.
110  *            4 5 0 1 2 3 -> 3 4 5 with new baseline=3
111  */
112 int
esl_recorder_ResizeTo(ESL_RECORDER * rc,int new_maxlines)113 esl_recorder_ResizeTo(ESL_RECORDER *rc, int new_maxlines)
114 {
115   int   idx;
116   int   newbase;
117   void *tmp;
118   int   minlines;
119   int   status;
120 
121   if (new_maxlines == rc->nalloc) return eslOK;
122 
123   if (new_maxlines > rc->nalloc) /* growth case */
124     {
125       if ((rc->nread - rc->baseline) / rc->nalloc != 0)	/* array is permuted; reorder it */
126 	{
127 	  newbase = ESL_MAX(rc->baseline, rc->nread - rc->nalloc);
128 	  status = recorder_new_baseline(rc, newbase);
129 	  if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly");
130 	}
131     }
132   else 				/* shrinkage case */
133     {
134       /* check that the marked line (if any) will stay in window */
135       if (rc->markline >= 0)
136 	{
137 	  minlines = rc->nread - rc->markline;
138 	  if (new_maxlines < minlines)
139 	    ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing marked line");
140 	}
141       /* check that current line will stay in window */
142       minlines = rc->nread - rc->ncurr + 1;
143       if (new_maxlines < minlines)
144 	ESL_EXCEPTION(eslEINVAL, "can't shrink that far without losing current line");
145 
146       if (rc->nread - rc->baseline > new_maxlines) /* baseline needs to move up */
147 	{
148 	  newbase = rc->nread - new_maxlines;
149 	  status = recorder_new_baseline(rc, newbase);
150 	  if (status) ESL_EXCEPTION(eslEINCONCEIVABLE, "baseline reset failed unexpectedly");
151 	}
152 
153       for (idx = new_maxlines; idx < rc->nalloc; idx++)
154 	if (rc->line[idx]) free(rc->line[idx]);
155     }
156 
157   ESL_RALLOC(rc->line,   tmp, sizeof(char *) * new_maxlines);
158   ESL_RALLOC(rc->lalloc, tmp, sizeof(int)    * new_maxlines);
159   ESL_RALLOC(rc->offset, tmp, sizeof(off_t)  * new_maxlines);
160   for (idx = rc->nalloc; idx < new_maxlines; idx++) /* no-op in shrinkage case */
161     {
162       rc->line[idx]   = NULL;
163       rc->lalloc[idx] = 0;
164       rc->offset[idx] = 0;
165     }
166   rc->nalloc = new_maxlines;
167   return eslOK;
168 
169  ERROR:
170   return status;
171 }
172 
173 /* Function:  esl_recorder_GetFirst()
174  * Synopsis:  Returns the earliest linenumber stored.
175  * Incept:    SRE, Sat Jan  2 13:18:38 2010 [Zaragoza]
176  *
177  * Purpose:   Returns the earliest line number that is
178  *            stored in the recorder <rc>.
179  */
180 int
esl_recorder_GetFirst(ESL_RECORDER * rc)181 esl_recorder_GetFirst(ESL_RECORDER *rc)
182 {
183   return (ESL_MAX(rc->baseline, rc->nread-rc->nalloc));
184 }
185 
186 /* Function:  esl_recorder_GetLast()
187  * Synopsis:  Returns the furthest linenumber stored.
188  * Incept:    SRE, Sat Jan  2 13:19:45 2010 [Zaragoza]
189  *
190  * Purpose:   Returns the furthest line number that is
191  *            stored in the recorder <rc> -- the furthest
192  *            we have read into the input stream so far.
193  *            (This is not necessarily the current
194  *            position in the stream, if we have repositioned.)
195  */
196 int
esl_recorder_GetLast(ESL_RECORDER * rc)197 esl_recorder_GetLast(ESL_RECORDER *rc)
198 {
199   return (rc->nread-1);
200 }
201 
202 /* Function:  esl_recorder_GetCurrent()
203  * Synopsis:  Returns the current line number.
204  * Incept:    SRE, Sat Jan  2 13:21:13 2010 [Zaragoza]
205  *
206  * Purpose:   Returns the current line number -- the
207  *            line number most recently returned by
208  *            a call to <esl_recorder_Read()).
209  */
210 int
esl_recorder_GetCurrent(ESL_RECORDER * rc)211 esl_recorder_GetCurrent(ESL_RECORDER *rc)
212 {
213   return (rc->ncurr-1);
214 }
215 
216 /* Function:  esl_recorder_GetNext()
217  * Synopsis:  Returns the next line number.
218  * Incept:    SRE, Sat Jan  2 13:21:13 2010 [Zaragoza]
219  *
220  * Purpose:   Returns the next line number that would
221  *            be read by a call to <esl_recorder_Read()).
222  */
223 int
esl_recorder_GetNext(ESL_RECORDER * rc)224 esl_recorder_GetNext(ESL_RECORDER *rc)
225 {
226   return (rc->ncurr);
227 }
228 
229 
230 
231 
232 /* Function:  esl_recorder_Destroy()
233  * Synopsis:  Frees an <ESL_RECORDER>.
234  * Incept:    SRE, Fri Dec 25 16:30:14 2009 [Casa de Gatos]
235  *
236  * Purpose:   Frees the <ESL_RECORDER> <rc>.
237  *
238  * Returns:   (void).
239  */
240 void
esl_recorder_Destroy(ESL_RECORDER * rc)241 esl_recorder_Destroy(ESL_RECORDER *rc)
242 {
243   int i;
244 
245   if (rc == NULL) return;
246 
247   if (rc->offset) free(rc->offset);
248   if (rc->lalloc) free(rc->lalloc);
249   if (rc->line) {
250     for (i = 0; i < rc->nalloc; i++)
251       if (rc->line[i]) free(rc->line[i]);
252     free(rc->line);
253   }
254   free(rc);
255   return;
256 }
257 /*--------------- end, <ESL_RECORDER> object --------------------*/
258 
259 
260 
261 
262 /*****************************************************************
263  * 2. Using the <ESL_RECORDER>
264  *****************************************************************/
265 
266 /* Function:  esl_recorder_Read()
267  * Synopsis:  Read next line of a stream through an <ESL_RECORDER>.
268  * Incept:    SRE, Fri Dec 25 16:31:00 2009 [Casa de Gatos]
269  *
270  * Purpose:   Read the next line of the input stream that the
271  *            <ESL_RECORDER> <rc> is recording. Return a ptr to
272  *            it in <*opt_line>. Note that the <ESL_RECORDER>
273  *            deals with allocation and freeing of this line;
274  *            if caller wants to keep it for something, it must
275  *            make a copy immediately, because subsequent calls
276  *            to <esl_recorder_*> functions may overwrite these
277  *            internal memory buffers.
278  *
279  * Returns:   <eslOK> on success.
280  *            <eslEOF> if no more lines exist in the stream.
281  *
282  * Throws:    <eslEMEM> on an allocation failure.
283  */
284 int
esl_recorder_Read(ESL_RECORDER * rc,char ** opt_line)285 esl_recorder_Read(ESL_RECORDER *rc, char **opt_line)
286 {
287   int idx = (rc->ncurr - rc->baseline) % rc->nalloc;     /* index of line to read, in wrapped coords */
288   int status;
289 
290   /* if currline <= lastline, we already have the line recorded;
291    * else we need to read a new one from <fp> */
292   if (rc->ncurr >= rc->nread)
293     {
294       /* if reading a new line would overwrite our marked start, grow */
295       if ( rc->markline >= 0 &&
296 	   ((rc->ncurr - rc->baseline) % rc->nalloc == ((rc->markline - rc->baseline) % rc->nalloc)))
297 	{
298 	  int xtra = ESL_MAX(3, (rc->nalloc / 3));
299 	  status = esl_recorder_ResizeTo(rc, rc->nalloc + xtra);
300 	  if (status) goto ERROR;
301 	  idx = (rc->ncurr - rc->baseline) % rc->nalloc;
302 	}
303 
304       rc->offset[idx] = ftello(rc->fp);
305       status = esl_fgets(&(rc->line[idx]), &(rc->lalloc[idx]), rc->fp);
306       if (status) goto ERROR;
307       rc->nread++;
308     }
309 
310   rc->ncurr++;
311   if (opt_line) *opt_line = rc->line[idx];
312   return eslOK;
313 
314  ERROR:
315   if (opt_line) *opt_line = NULL;
316   return status;
317 }
318 
319 
320 /* Function:  esl_recorder_Position()
321  * Synopsis:  Reset the recorder to a new starting line position.
322  * Incept:    SRE, Mon Dec 28 10:25:22 2009 [Casa de Gatos]
323  *
324  * Purpose:   Reset the recorder <rc> to a new line position <linenumber>,
325  *            starting from 0. The next call to <esl_recorder_Read()>
326  *            will read this line.
327  *
328  *            The <linenumber> can be ahead of the furthest line read
329  *            by the recorder so far, in which case it calls
330  *            <esl_recorder_Read()> until it reaches the proper
331  *            position. This can result in a return code of <eslEOF>,
332  *            if no such line exists in the stream.
333  *
334  *            If the <linenumber> falls before (outside) the
335  *            recorder's history window, an <eslEINVAL> exception is
336  *            thrown.
337  *
338  * Returns:   <eslOK> on success.
339  *            <eslEOF> if <linenumber> is larger than current position
340  *            in file, and the stream ends before line <linenumber> is
341  *            reached.
342  *
343  * Throws:    <eslEMEM> on allocation failure; this can only happen
344  *            if <linenumber> is larger than current position in
345  *            file, forcing <esl_recorder_Read()> calls to reach that
346  *            line.
347  */
348 int
esl_recorder_Position(ESL_RECORDER * rc,int linenumber)349 esl_recorder_Position(ESL_RECORDER *rc, int linenumber)
350 {
351   /* The recorder stores lines MAX(baseline,<nread-nalloc>)..<nread>-1 */
352   int line0  = ESL_MAX(rc->baseline, rc->nread - rc->nalloc);
353   int status;
354 
355   if (linenumber < line0)
356     ESL_EXCEPTION(eslEINVAL, "recorder's window is past that line");
357 
358   if (linenumber >= rc->nread) {
359     while (rc->nread < linenumber)
360       if ((status = esl_recorder_Read(rc, NULL)) != eslOK) return status;
361   }
362 
363   rc->ncurr = linenumber;
364   return eslOK;
365 }
366 
367 /* Function:  esl_recorder_MarkBlock()
368  * Synopsis:  Mark first line to be saved in a block.
369  * Incept:    SRE, Fri Jan  1 11:13:53 2010 [Magallon]
370  *
371  * Purpose:   Mark line number <markline> (0..N-1) in a file being read
372  *            through the <ESL_RECORDER> <rc> as the first line in a
373  *            block of lines to be parsed later, when the end of
374  *            the block is found.
375  *
376  *            This mark makes sure that the <ESL_RECORDER> will keep
377  *            the entire block of lines in memory, starting at or
378  *            before the mark. When a mark is active,
379  *            <esl_recorder_Read()> will reallocate and grow the
380  *            recorder as necessary, rather than overwriting the mark.
381  *
382  * Returns:   <eslOK> on success.
383  *
384  * Throws:    <eslEINVAL> if the <markline> has already passed out
385  *            of the recorder's memory.
386  */
387 int
esl_recorder_MarkBlock(ESL_RECORDER * rc,int markline)388 esl_recorder_MarkBlock(ESL_RECORDER *rc, int markline)
389 {
390   int line0 = ESL_MAX(rc->baseline, rc->nread - rc->nalloc);
391 
392   if (markline < line0) ESL_EXCEPTION(eslEINVAL, "recorder window already passed marked line");
393   rc->markline = markline;
394   return eslOK;
395 }
396 
397 
398 /* Function:  esl_recorder_UnmarkBlock()
399  * Synopsis:  Remove a marked start of a block.
400  * Incept:    SRE, Fri Jan  1 12:47:32 2010 [Magallon]
401  *
402  * Purpose:   Release the mark in the <ESL_RECORDER> <rc>, if any.
403  *
404  *            The recorder will no longer reallocate and grow to keep
405  *            the marked line in memory.
406  *
407  * Returns:   <eslOK> on success.
408  *
409  * Throws:    (no abnormal error conditions)
410  */
411 int
esl_recorder_UnmarkBlock(ESL_RECORDER * rc)412 esl_recorder_UnmarkBlock(ESL_RECORDER *rc)
413 {
414   rc->markline = -1;
415   return eslOK;
416 }
417 
418 
419 /* Function:  esl_recorder_GetBlock()
420  * Synopsis:  Get a block of lines from a recorder, starting at the mark.
421  * Incept:    SRE, Fri Jan  1 12:50:33 2010 [Magallon]
422  *
423  * Purpose:   Get pointers into the internal memory arrays of the
424  *            recorder <rc>, starting at the marked start of a block
425  *            and ending at the most recently read line <rc->ncurr-1>,
426  *            so you can parse a block of lines.
427  *
428  *            Because these pointers are internally managed by the
429  *            recorder <rc>, they should not be freed or reallocated
430  *            or things like that. You should also avoid calling any
431  *            <esl_recorder_*()> functions until you're done accessing
432  *            these data, in case a function call alters the internal
433  *            state of the object.
434  *
435  *            If you do something that changes the contents of the
436  *            lines (like strtok()'ing them), those changes will be
437  *            preserved -- if you want to leave the original recorder
438  *            data untouched and you need a temporary working copy of
439  *            the data, you should make that copy yourself.
440  *
441  * Args:      opt_lines  : ptr to array of lines, indexed [0..*opt_nlines-1];
442  *                         starting with line <rc->markline> and ending with
443  *                         <rc->ncurr-1>, in order.
444  *            opt_lalloc : array of memory allocations for each line
445  *            opt_offset : array of offsets into input stream for start of each line
446  *            opt_nlines : number of lines (minimally) valid in these arrays,
447  *                         starting from the mark and ending at the most recent
448  *                         line read.
449  *
450  * Returns:   <eslOK> on success.
451  *
452  * Throws:    (no abnormal error conditions)
453  */
454 int
esl_recorder_GetBlock(ESL_RECORDER * rc,char *** opt_lines,int ** opt_lalloc,off_t ** opt_offset,int * opt_nlines)455 esl_recorder_GetBlock(ESL_RECORDER *rc, char ***opt_lines, int **opt_lalloc, off_t **opt_offset, int *opt_nlines)
456 {
457   int idx0, idx1;
458   int status;
459 
460   /* Everything from the markline to ncurr-1 must be in order and not
461    * permuted.  If it isn't in proper order, then reorder the recorder
462    * to have ncurr-1 in last array position.
463    */
464   idx0 = (rc->markline - rc->baseline) % rc->nalloc;
465   idx1 = (rc->ncurr-1  - rc->baseline) % rc->nalloc;
466   if (idx0 > idx1)
467     {
468       if ((status = recorder_new_baseline(rc, rc->ncurr-rc->nalloc)) != eslOK) goto ERROR;
469       idx0 = (rc->markline - rc->baseline) % rc->nalloc;
470     }
471 
472   if (opt_lines)  *opt_lines  = rc->line   + idx0;
473   if (opt_lalloc) *opt_lalloc = rc->lalloc + idx0;
474   if (opt_offset) *opt_offset = rc->offset + idx0;
475   if (opt_nlines) *opt_nlines = rc->ncurr  - rc->markline;
476   return eslOK;
477 
478  ERROR:
479   if (opt_lines)  *opt_lines  = NULL;
480   if (opt_lalloc) *opt_lalloc = NULL;
481   if (opt_offset) *opt_offset = NULL;
482   if (opt_nlines) *opt_nlines = 0;
483   return status;
484 }
485 /*----------------- end, using ESL_RECORDER ---------------------*/
486 
487 
488 
489 
490 /*****************************************************************
491  * 3. Internal (static) functions
492  *****************************************************************/
493 
494 /* linearray_reverse()
495  * In-place O(N) reversal of a subsection of the line array data,
496  * starting at i=pos, for n positions.
497  */
498 static void
linearray_reverse(ESL_RECORDER * rc,int pos,int n)499 linearray_reverse(ESL_RECORDER *rc, int pos, int n)
500 {
501   int    i;
502   char  *tmps;
503   int    tmpi;
504   off_t  tmpo;
505 
506   /* the line array itself */
507   for (i = 0; i < n/2; i++)
508     {
509       tmps                = rc->line[pos+n-i-1];
510       rc->line[pos+n-i-1] = rc->line[pos+i];
511       rc->line[pos+i]     = tmps;
512     }
513 
514   /* the line allocation array */
515   for (i = 0; i < n/2; i++)
516     {
517       tmpi                  = rc->lalloc[pos+n-i-1];
518       rc->lalloc[pos+n-i-1] = rc->lalloc[pos+i];
519       rc->lalloc[pos+i]     = tmpi;
520     }
521 
522   /* the offset array */
523   for (i = 0; i < n/2; i++)
524     {
525       tmpo                  = rc->offset[pos+n-i-1];
526       rc->offset[pos+n-i-1] = rc->offset[pos+i];
527       rc->offset[pos+i]     = tmpo;
528     }
529 }
530 
531 /* recorder_new_baseline()
532  * SRE, Fri Jan  1 09:00:55 2010 [Zaragoza]
533  *
534  * Set a ESL_RECORDER <rc> to a new baseline <newbase>, [0..N-1],
535  * greater than the previous baseline in the recorder.
536  *
537  * In general, must succeed, returning <eslOK>. If new baseline
538  * is <= old one, throws <eslEINVAL>, but you shouldn't do that.
539  *
540  * This is done in place in O(1) memory (no addiional or temporary
541  * allocation) and O(nalloc) time, using a trick: we can redo any
542  * circular permutation by no more than four in-place substring
543  * reversals:
544  *
545  *  456|123 ->  654|321 -> 65|4321 -> 56|1234
546  *      (reversals)  (new brkpt) (reversals)
547  *
548  * Some possible cases (all examples nalloc=7)
549  *   0  1  2  3  4  x  x   nread=5  [still filling the circle]
550  *   0  1  2  3  4  5  6   nread=7  [full, no wrapping yet]
551  *   7  8  2  3  4  5  6   nread=9  [wrapped]
552  *   7  8  9 10 11 12 13   nread=14 [back in order]
553  *   2  3  4  x  x  x  x   nread=5  baseline=2
554  *   2  3  4  5  6  7  8   nread=9  baseline=2
555  *   9 10  4  5  6  7  8   nread=11 baseline=2
556  *   9 10 11 12 13 14 15   nread=16 baseline=2
557  *
558  * By reversing the two substrings (lengths n1 and n2), we now have a
559  * complete string in reverse order. Our examples now look like:
560  *   n1 n2
561  *   5   0     4  3  2  1  0  x  x
562  *   7   0     6  5  4  3  2  1  0
563  *   2   5     8  7  6  5  4  3  2
564  *   7   0    13 12 11 10  9  8  7
565  *   3   0     4  3  2  x  x  x  x
566  *   7   0     8  7  6  5  4  3  2
567  *   2   5    10  9  8  7  6  5  4
568  *   7   0    15 14 13 12 11 10  9
569  *
570  * After reversing two substrings calculated under the new
571  * baseline, our work is done. For example, if newbase=1,
572  * our first set of examples would look like:
573  *   n1 n2
574  *   4   0     1  2  3  4 [0  x  x]
575  *   6   0     1  2  3  4  5  6 [0]
576  *   1   6     8  2  3  4  5  6  7
577  *   6   1     8  9 10 11 12 13  7
578  * and for newbase=3, the second set look like:
579  *   2   0     3  4 [2  x  x  x  x]
580  *   6   0     3  4  5  6  7  8 [2]
581  *   1   6    10  4  5  6  7  8  9
582  *   6   1    10 11 12 13 14 15  9
583  */
584 static int
recorder_new_baseline(ESL_RECORDER * rc,int newbase)585 recorder_new_baseline(ESL_RECORDER *rc, int newbase)
586 {
587   int n1, n2;
588 
589   if (newbase < rc->baseline)  ESL_EXCEPTION(eslEINVAL, "new baseline must be > old one");
590   if (newbase == rc->baseline) return eslOK;
591 
592   n1 = (rc->nread - rc->baseline) % rc->nalloc;
593   n2 = ESL_MIN(rc->nread-rc->baseline, rc->nalloc) - n1;
594 
595   if (n1>1) linearray_reverse(rc,    0, n1);
596   if (n2>1) linearray_reverse(rc,   n1, n2);
597 
598   n1 = (rc->nread - newbase) % rc->nalloc;
599   n2 = ESL_MIN(rc->nread - newbase, rc->nalloc) - n1;
600 
601   if (n1>1) linearray_reverse(rc,  0, n1);
602   if (n2>1) linearray_reverse(rc, n1, n2);
603 
604   rc->baseline = newbase;
605   return eslOK;
606 }
607 /*----------------- end, internal functions ---------------------*/
608 
609 
610 
611 
612 
613 /*****************************************************************
614  * 4. Benchmark driver
615  *****************************************************************/
616 #ifdef eslRECORDER_BENCHMARK
617 /* gcc -O2 -std=gnu99 -DeslRECORDER_BENCHMARK -o esl_recorder_benchmark -I. esl_recorder.c esl_stopwatch.c esl_getopts.c easel.c
618  */
619 #include <stdio.h>
620 
621 #include "easel.h"
622 #include "esl_getopts.h"
623 #include "esl_recorder.h"
624 #include "esl_stopwatch.h"
625 
626 static ESL_OPTIONS options[] = {
627   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
628   { "-h",        eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
629   { "-N",        eslARG_INT,    "1000", NULL, "n>0", NULL,  NULL, NULL, "set recorder window size in lines",                0 },
630   { 0,0,0,0,0,0,0,0 },
631 };
632 static char usage[]  = "[-options] <filename>";
633 static char banner[] = "benchmarking speed of ESL_RECORDER reading";
634 
635 int
main(int argc,char ** argv)636 main(int argc, char **argv)
637 {
638   ESL_GETOPTS   *go        = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
639   ESL_STOPWATCH *w         = esl_stopwatch_Create();
640   ESL_RECORDER  *rc        = NULL;
641   char          *filename  = esl_opt_GetArg(go, 1);
642   int            N         = esl_opt_GetInteger(go, "-N");
643   FILE          *fp        = NULL;
644   char          *buf       = NULL;
645   int            balloc    = 0;
646   int            status;
647 
648   if ((fp = fopen(filename, "r")) == NULL) esl_fatal("no such file %s\n", filename);
649   rc = esl_recorder_Create(fp, N);
650   esl_stopwatch_Start(w);
651   while ((status = esl_recorder_Read(rc, &buf)) == eslOK);
652   esl_recorder_Destroy(rc);
653   fclose(fp);
654 
655   esl_stopwatch_Stop(w);
656   esl_stopwatch_Display(stdout, w, "recorder time:    ");
657 
658   if ((fp = fopen(filename, "r")) == NULL) esl_fatal("no such file %s\n", filename);
659   esl_stopwatch_Start(w);
660   while ((status = esl_fgets(&buf, &balloc, fp)) == eslOK);
661   free(buf);
662   fclose(fp);
663 
664   esl_stopwatch_Stop(w);
665   esl_stopwatch_Display(stdout, w, "esl_fgets() time: ");
666 
667   esl_stopwatch_Destroy(w);
668   esl_getopts_Destroy(go);
669   return 0;
670 }
671 #endif /*eslRECORDER_BENCHMARK*/
672 /*---------------- end, benchmark driver ------------------------*/
673 
674 
675 
676 /*****************************************************************
677  * 5. Unit tests
678  *****************************************************************/
679 #ifdef eslRECORDER_TESTDRIVE
680 #include "esl_random.h"
681 
682 static void
generate_testfile(ESL_RANDOMNESS * rng,char * tmpfile,int * is_data,int nlines)683 generate_testfile(ESL_RANDOMNESS *rng, char *tmpfile, int *is_data, int nlines)
684 {
685   char *msg      = "esl_recorder:: test file generator failed";
686   FILE *fp       = NULL;
687   int   in_block = esl_rnd_Roll(rng, 2);      /* TRUE | FALSE */
688   int   nblock   = 1 + esl_rnd_Roll(rng, 10); /* 1..10        */
689   int   i;
690 
691   if (esl_tmpfile_named(tmpfile, &fp) != eslOK) esl_fatal(msg);
692   for (i = 0; i < nlines; i++)
693     {
694       is_data[i] = in_block ? TRUE : FALSE;
695       fprintf(fp, "%c%d\n", (in_block ? '#' : ' '), i);
696       if (--nblock == 0) {
697 	in_block = ! in_block;
698 	nblock   = 1 + esl_rnd_Roll(rng, 10); /* 1..10 */
699       }
700     }
701   fclose(fp);
702 }
703 
704 static void
utest_basic(char * tmpfile,int N)705 utest_basic(char *tmpfile, int N)
706 {
707   char         *msg         = "esl_recorder:: basic unit test failed";
708   ESL_RECORDER *rc          = NULL;
709   FILE         *fp          = NULL;
710   int           i;
711   char         *buf;
712 
713   if ((fp = fopen(tmpfile, "r"))        == NULL) esl_fatal(msg);
714   if ((rc = esl_recorder_Create(fp, N)) == NULL) esl_fatal(msg);
715   for (i = 0; i < N; i++)
716     {
717       if (esl_recorder_Read(rc, &buf)  != eslOK) esl_fatal(msg);
718       if (atoi(buf+1) != i)                      esl_fatal(msg);
719     }
720   if (esl_recorder_Read(rc, &buf)     != eslEOF) esl_fatal(msg);
721 
722   if (buf != NULL)                               esl_fatal(msg);
723 
724   if (esl_recorder_Position(rc, 0)     != eslOK) esl_fatal(msg);
725   for (i = 0; i < N; i++)
726     {
727       if (esl_recorder_Read(rc, &buf)  != eslOK) esl_fatal(msg);
728       if (atoi(buf+1) != i)                      esl_fatal(msg);
729     }
730   if (esl_recorder_Read(rc, &buf)     != eslEOF) esl_fatal(msg);
731 
732   fclose(fp);
733   esl_recorder_Destroy(rc);
734 }
735 
736 
737 static void
utest_grow(char * tmpfile,int N)738 utest_grow(char *tmpfile, int N)
739 {
740   char         *msg         = "esl_recorder:: grow unit test failed";
741   ESL_RECORDER *rc          = NULL;
742   FILE         *fp          = NULL;
743   int           i;
744   char         *buf;
745 
746   if ((fp = fopen(tmpfile, "r"))        == NULL) esl_fatal(msg);
747   if ((rc = esl_recorder_Create(fp, 3)) == NULL) esl_fatal(msg);
748   for (i = 0; i < 4; i++)
749     {
750       if (esl_recorder_Read(rc, &buf) != eslOK)  esl_fatal(msg);
751       if (atoi(buf+1) != i)                      esl_fatal(msg);
752     }
753   /* recorder has now wrapped: contains lines 3 1 2 */
754 
755   if (esl_recorder_Position(rc, 0) != eslEINVAL) esl_fatal(msg);
756   if (esl_recorder_Position(rc, 1) != eslOK)     esl_fatal(msg);
757   if (esl_recorder_ResizeTo(rc, 6) != eslOK)     esl_fatal(msg);
758   /* now 1 2 3 x x x */
759 
760   for (i = 1; i < N; i++)
761     {
762       if (esl_recorder_Read(rc, &buf) != eslOK)  esl_fatal(msg);
763       if (atoi(buf+1) != i)                      esl_fatal(msg);
764     }
765   if (esl_recorder_Read(rc, &buf) != eslEOF)     esl_fatal(msg);
766 
767   fclose(fp);
768   esl_recorder_Destroy(rc);
769 }
770 
771 static void
utest_grow2(char * tmpfile,int N)772 utest_grow2(char *tmpfile, int N)
773 {
774   char         *msg         = "esl_recorder:: grow2 unit test failed";
775   ESL_RECORDER *rc          = NULL;
776   FILE         *fp          = NULL;
777   int           i;
778   char         *buf;
779 
780   if (N < 5) esl_fatal(msg);	/* need at least this for this test */
781 
782   if ((fp = fopen(tmpfile, "r"))        == NULL) esl_fatal(msg);
783   if ((rc = esl_recorder_Create(fp, 3)) == NULL) esl_fatal(msg);
784   for (i = 0; i < 4; i++)
785     {
786       if (esl_recorder_Read(rc, &buf) != eslOK)  esl_fatal(msg);
787       if (atoi(buf+1) != i)                      esl_fatal(msg);
788     }
789   /* recorder has now wrapped: contains lines 3 1 2 */
790 
791   if (esl_recorder_ResizeTo(rc, 6)   != eslOK)   esl_fatal(msg);
792   /* recorder should now have reset baseline: 1 2 3 x x x */
793 
794   if (esl_recorder_Read(rc, &buf) != eslOK)      esl_fatal(msg);
795   if (atoi(buf+1) != 4)                          esl_fatal(msg);
796   /* now it should have 1 2 3 4 x x (and not 3 4 2 x x x, or x 1 2 3 4 x, for example) */
797 
798   if (esl_recorder_Position(rc, 0) != eslEINVAL) esl_fatal(msg);
799   if (esl_recorder_Position(rc, 1) != eslOK)     esl_fatal(msg);
800   for (i = 1; i < N; i++)
801     {
802       if (esl_recorder_Read(rc, &buf) != eslOK)  esl_fatal(msg);
803       if (atoi(buf+1) != i)                      esl_fatal(msg);
804     }
805   if (esl_recorder_Read(rc, &buf) != eslEOF)     esl_fatal(msg);
806 
807   fclose(fp);
808   esl_recorder_Destroy(rc);
809 }
810 
811 static void
utest_shrink(char * tmpfile,int N)812 utest_shrink(char *tmpfile, int N)
813 {
814   char         *msg         = "esl_recorder:: shrink unit test failed";
815   ESL_RECORDER *rc          = NULL;
816   FILE         *fp          = NULL;
817   int           i;
818   char         *buf;
819 
820   if ((fp = fopen(tmpfile, "r"))        == NULL) esl_fatal(msg);
821   if ((rc = esl_recorder_Create(fp, 6)) == NULL) esl_fatal(msg);
822   for (i = 0; i < 7; i++)
823     {
824       if (esl_recorder_Read(rc, &buf) != eslOK) esl_fatal(msg);
825       if (atoi(buf+1) != i)                     esl_fatal(msg);
826     }
827   /* recorder has now wrapped: contains lines 6 1 2 3 4 5 */
828 
829   if (esl_recorder_ResizeTo(rc, 3) != eslOK)    esl_fatal(msg);
830   /* now it's 6 4 5 */
831   if (esl_recorder_Position(rc, 4) != eslOK)    esl_fatal(msg);
832 
833   for (i = 4; i < N; i++)
834     {
835       if (esl_recorder_Read(rc, &buf) != eslOK) esl_fatal(msg);
836       if (atoi(buf+1) != i)                     esl_fatal(msg);
837     }
838   if (esl_recorder_Read(rc, &buf) != eslEOF)    esl_fatal(msg);
839 
840   fclose(fp);
841   esl_recorder_Destroy(rc);
842 }
843 
844 static void
utest_block(ESL_RANDOMNESS * rng,char * tmpfile,int * is_data,int N)845 utest_block(ESL_RANDOMNESS *rng, char *tmpfile, int *is_data, int N)
846 {
847   char         *msg            = "esl_recorder:: block unit test failed";
848   ESL_RECORDER *rc             = NULL;
849   FILE         *fp             = NULL;
850   int           linenumber     = 0; /* where we should be in the file */
851   int           max_reposition = 2;
852   int           max_realloc    = 2;
853   int          *nseen1         = NULL; /* # of times we Read() each line */
854   int          *nseen2         = NULL; /* # of times we see each line in a block */
855   int           minalloc;
856   int           roll;
857   char         *buf;
858   char        **block;
859   int           from;
860   int           n,i;
861   int           status         = eslOK;
862 
863   if ((fp = fopen(tmpfile, "r"))           == NULL) esl_fatal(msg);
864   roll = 1+esl_rnd_Roll(rng, N+1);	/* 1..N+1 */
865   if ((rc = esl_recorder_Create(fp, roll)) == NULL) esl_fatal(msg);
866 
867   if ((nseen1  = malloc(sizeof(int) * N))   == NULL) esl_fatal(msg);
868   if ((nseen2  = malloc(sizeof(int) * N))   == NULL) esl_fatal(msg);
869   for (i = 0; i < N; i++) nseen1[i]  = 0;
870   for (i = 0; i < N; i++) nseen2[i]  = 0;
871 
872   while (status == eslOK)
873     {
874       /* skip nondata lines (no # prefix) */
875       do {
876 	if (esl_recorder_Read(rc, &buf) == eslEOF)     goto DONE;
877 	if (atoi(buf+1)                 != linenumber) esl_fatal(msg);
878 	if (esl_recorder_GetCurrent(rc) != linenumber) esl_fatal(msg);
879 	nseen1[linenumber]++;
880 	linenumber++;
881       } while (*buf != '#');
882 
883       /* read block */
884       from = esl_recorder_GetCurrent(rc);
885       esl_recorder_MarkBlock(rc, from);
886       do {
887 	if ((status = esl_recorder_Read(rc, &buf)) == eslEOF)   break;
888 	if (atoi(buf+1)                 != linenumber) esl_fatal(msg);
889 	if (esl_recorder_GetCurrent(rc) != linenumber) esl_fatal(msg);
890 	nseen1[linenumber]++;
891 	linenumber++;
892       } while (*buf == '#');
893 
894       /* get the block */
895       esl_recorder_GetBlock(rc, &block, NULL, NULL, &n);
896       if (status == eslOK) n--;
897 
898       /* check the block */
899       for (i = 0; i < n; i++)
900 	{
901 	  if (atoi(block[i]+1) != from+i) esl_fatal(msg);
902 	  nseen2[from+i]++;
903 	}
904 
905       /* unmark it */
906       esl_recorder_UnmarkBlock(rc);
907 
908       /* some fraction of the time, reposition randomly */
909       if (status == eslOK && max_reposition && (roll = esl_rnd_Roll(rng, 5)) == 0)
910 	{
911 	  linenumber = esl_recorder_GetFirst(rc) +
912 	    esl_rnd_Roll(rng, esl_recorder_GetLast(rc) - esl_recorder_GetFirst(rc) + 1);
913 	  if (esl_recorder_Position(rc, linenumber) != eslOK) esl_fatal(msg);
914 	  max_reposition--;
915 	}
916 
917       /* some fraction of the time, shrink the allocation */
918       if (status == eslOK && max_realloc && (roll = esl_rnd_Roll(rng, 5)) == 0)
919 	{
920 	  /* must keep at least nread-ncurr+1 lines, to keep curr line in window */
921 	  minalloc = rc->nread-rc->ncurr+1;
922 	  roll = minalloc + esl_rnd_Roll(rng, rc->nalloc-minalloc+1);
923 	  if (esl_recorder_ResizeTo(rc, roll) != eslOK) esl_fatal(msg);
924 	  max_realloc--;
925 	}
926     }
927 
928  DONE:
929   /* we're EOF. We should be sitting on the last line. */
930   if (esl_recorder_GetCurrent(rc) != N-1) esl_fatal(msg);
931 
932   /* We should have Read() every line at least once. */
933   for (i = 0; i < N; i++)
934     if (! nseen1[i]) esl_fatal(msg);
935 
936   /* In reading blocks, we should have seen each "data" line at least
937    * once; non-data lines, not at all.
938    */
939   for (i = 0; i < N; i++) {
940     if (  is_data[i] && ! nseen2[i]) esl_fatal(msg);
941     if (! is_data[i] &&   nseen2[i]) esl_fatal(msg);
942   }
943 
944   fclose(fp);
945   esl_recorder_Destroy(rc);
946   free(nseen1);
947   free(nseen2);
948 }
949 
950 #endif /*eslRECORDER_TESTDRIVE*/
951 /*------------------- end, unit tests ---------------------------*/
952 
953 
954 /*****************************************************************
955  * 6. Test driver
956  *****************************************************************/
957 #ifdef eslRECORDER_TESTDRIVE
958 /* gcc -O2 -std=gnu99 -DeslRECORDER_TESTDRIVE -o esl_recorder_utest -I. esl_recorder.c esl_stopwatch.c esl_getopts.c esl_random.c easel.c -lm
959  */
960 #include <stdio.h>
961 
962 #include "easel.h"
963 #include "esl_getopts.h"
964 #include "esl_random.h"
965 #include "esl_recorder.h"
966 
967 static ESL_OPTIONS options[] = {
968   /* name      type      default  env  range toggles reqs incomp  help                         docgroup*/
969   { "-h",  eslARG_NONE, FALSE,  NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
970   { "-s",  eslARG_INT,    "42", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>",        0 },
971   { "-v",  eslARG_NONE,   NULL, NULL, NULL, NULL, NULL, NULL, "more verbose output",                  0 },
972   { "-N",  eslARG_INT,   "200", NULL, NULL, NULL, NULL, NULL, "number of lines per test file",        0 },
973   { "-F",  eslARG_INT,   "100", NULL, NULL, NULL, NULL, NULL, "number of test files",                 0 },
974 
975   { 0,0,0,0,0,0,0,0 },
976 };
977 static char usage[]  = "[-options] <filename>";
978 static char banner[] = "test driver for ESL_RECORDER";
979 
980 int
main(int argc,char ** argv)981 main(int argc, char **argv)
982 {
983   ESL_GETOPTS    *go          = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
984   ESL_RANDOMNESS *rng         = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
985   char            template[13]= "esltmpXXXXXX";
986   char            tmpfile[13];
987   int             N           = esl_opt_GetInteger(go, "-N");
988   int             nfiles      = esl_opt_GetInteger(go, "-F");
989   int            *is_data     = malloc(sizeof(int) * N);
990 
991   esl_exception_SetHandler(&esl_nonfatal_handler);
992 
993   if (esl_opt_GetBoolean(go, "-v")) {
994     printf("random number seed: %" PRIu32 "\n", esl_randomness_GetSeed(rng));
995   }
996 
997   while (nfiles--)
998     {
999       strcpy(tmpfile, template);
1000       generate_testfile(rng, tmpfile, is_data, N);
1001 
1002       utest_basic (tmpfile, N);
1003       utest_grow  (tmpfile, N);
1004       utest_grow2 (tmpfile, N);
1005       utest_shrink(tmpfile, N);
1006       utest_block (rng, tmpfile, is_data, N);
1007 
1008       remove(tmpfile);
1009     }
1010 
1011   free(is_data);
1012   esl_getopts_Destroy(go);
1013   esl_randomness_Destroy(rng);
1014 
1015   printf("ok\n");
1016   return 0;
1017 }
1018 #endif /*eslRECORDER_TESTDRIVE*/
1019 /*------------------- end, test driver --------------------------*/
1020 
1021 
1022 
1023 /*****************************************************************
1024  * 7. Examples
1025  *****************************************************************/
1026 #ifdef eslRECORDER_EXAMPLE
1027 /* gcc -g -Wall -std=gnu99 -DeslRECORDER_EXAMPLE -o esl_recorder_example -I. esl_recorder.c easel.c
1028  */
1029 
1030 #include "easel.h"
1031 #include "esl_recorder.h"
1032 
1033 int
main(int argc,char ** argv)1034 main(int argc, char **argv)
1035 {
1036   FILE         *fp   = stdin;
1037   ESL_RECORDER *rc   = esl_recorder_Create(fp, 10);
1038   char         *line;
1039   int           i;
1040   int           status;
1041 
1042   printf("\nFirst time:\n");
1043   for (i = 0; i < 10; i++)
1044     {
1045       if ((status = esl_recorder_Read(rc, &line)) != eslOK) break; /* watch for EOF */
1046       fputs(line, stdout);
1047     }
1048 
1049   esl_recorder_Position(rc, 0);	/* rewind to start */
1050 
1051   printf("\nOne more time:\n");
1052   for (i = 0; i < 10; i++)
1053     {
1054       if ((status = esl_recorder_Read(rc, &line)) != eslOK) break;
1055       fputs(line, stdout);
1056     }
1057 
1058   esl_recorder_Destroy(rc);
1059   return 0;
1060 }
1061 #endif /*eslRECORDER_EXAMPLE*/
1062 
1063 
1064 #ifdef eslRECORDER_EXAMPLE2
1065 /* gcc -g -Wall -std=gnu99 -DeslRECORDER_EXAMPLE2 -o esl_recorder_example2 -I. esl_recorder.c easel.c
1066  */
1067 
1068 #include <stdio.h>
1069 #include <ctype.h>
1070 
1071 #include "easel.h"
1072 #include "esl_recorder.h"
1073 
1074 static int
is_data(char * s)1075 is_data(char *s)
1076 {
1077   if (*s == '#') return TRUE;
1078   //  for (; *s; s++) if (! isspace(*s)) return TRUE;
1079   return FALSE;
1080 }
1081 
1082 int
main(int argc,char ** argv)1083 main(int argc, char **argv)
1084 {
1085   FILE         *fp   = fopen(argv[1], "r");
1086   ESL_RECORDER *rc   = esl_recorder_Create(fp, 10);
1087   char         *line;
1088   char        **block;
1089   int           n;
1090   int           nblocks=0;
1091   int           i;
1092   int           status = eslOK;
1093 
1094   while (status == eslOK)
1095     {
1096       /* skip lines without # */
1097       do {
1098 	if (esl_recorder_Read(rc, &line) == eslEOF) goto DONE;
1099       } while (! is_data(line));
1100 
1101       /* read block */
1102       esl_recorder_MarkBlock(rc, esl_recorder_GetCurrent(rc));
1103       do {
1104 	status = esl_recorder_Read(rc, &line);
1105       } while (status == eslOK && is_data(line));
1106 
1107       /* get the block */
1108       esl_recorder_GetBlock(rc, &block, NULL, NULL, &n);
1109       nblocks++;
1110 
1111       /* if we EOF'ed, n lines of block ended with the EOF;
1112        * else, last line was a blank line
1113        */
1114       if (status == eslOK) n--;
1115 
1116       /* show it (exclusive of the trailing blank line */
1117       printf("BLOCK %d\n", nblocks);
1118       for (i = 0; i < n; i++)
1119 	printf("line %4d: %s", i+1, block[i]);
1120       printf("\n\n");
1121 
1122       /* unmark it */
1123       esl_recorder_UnmarkBlock(rc);
1124     }
1125 
1126  DONE:
1127   esl_recorder_Destroy(rc);
1128   fclose(fp);
1129   return 0;
1130 }
1131 #endif /*eslRECORDER_EXAMPLE2*/
1132 /*------------------ end, example main() ------------------------*/
1133 
1134