1 /*
2  * Copyright (c) 2004-2005, 2007, 2010, 2013 Genome Research Ltd.
3  * Author(s): James Bonfield
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *    1. Redistributions of source code must retain the above copyright notice,
9  *       this list of conditions and the following disclaimer.
10  *
11  *    2. Redistributions in binary form must reproduce the above
12  *       copyright notice, this list of conditions and the following
13  *       disclaimer in the documentation and/or other materials provided
14  *       with the distribution.
15  *
16  *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17  *    Institute nor the names of its contributors may be used to endorse
18  *    or promote products derived from this software without specific
19  *    prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25  * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Author(s): Simon Dear, James Bonfield, Rodger Staden, John Taylor
36  *
37  * Copyright (c) 1994-1999, 2001-2003 MEDICAL RESEARCH COUNCIL
38  * All rights reserved
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions are met:
42  *
43  *    1 Redistributions of source code must retain the above copyright notice,
44  *      this list of conditions and the following disclaimer.
45  *
46  *    2 Redistributions in binary form must reproduce the above copyright
47  *      notice, this list of conditions and the following disclaimer in
48  *      the documentation and/or other materials provided with the
49  *      distribution.
50  *
51  *    3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF
52  *      MOLECULAR BIOLOGY nor the names of its contributors may be used
53  *      to endorse or promote products derived from this software without
54  *      specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
57  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
60  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 /*
70  * Copyright (c) Medical Research Council 1994. All rights reserved.
71  *
72  * Permission to use, copy, modify and distribute this software and its
73  * documentation for any purpose is hereby granted without fee, provided that
74  * this copyright and notice appears in all copies.
75  *
76  * This file was written by James Bonfield, Simon Dear, Rodger Staden,
77  * as part of the Staden Package at the MRC Laboratory of Molecular
78  * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
79  *
80  * MRC disclaims all warranties with regard to this software.
81  */
82 
83 /*
84  * File: expFileIO.c
85  * Version:
86  *
87  * Description: Routines for reading and writing to experiment files.
88  *
89  * 1. Opening experiment files
90  * 2. Reading information from an experiment file
91  * 3. Appending to experiment files
92  * 4. Closing an opened experiment file
93  *
94  * Created:
95  * Updated:
96  *
97  */
98 
99 /*
100  * Tag format:
101  *
102  * 0        10
103  * |----.----|-
104  * TG   TYPE S position..length
105  * TG        One or more comment lines starting at character position 10
106  * TG        Each line represents a line of tag.
107  * TG         Extra indentation is simply added to the comment.
108  *
109  * Where S is the strand, either "+", "-", or "=" (both).
110  * Eg:
111  *
112  * TG   COMM = 100..110
113  * TG        This comment contains
114  * TG          several lines.
115  *
116  * So the above is a COMMent tag on both strands from bases 100 to 110
117  * inclusive containing the annotation
118  * "This comment contains\n  several lines.\n"
119  *
120  * This is written using exp_put_str giving the multi line string:
121  * "COMM = 100..110\nThis comment contains\n  several lines."
122  *
123  * (ie the indentation is added by the experiment file format, not by the
124  *  calling routines. Similarly this indentation is stripped out again when
125  *  reading back.)
126  */
127 
128 #ifdef HAVE_CONFIG_H
129 #include "io_lib_config.h"
130 #endif
131 
132 #include <stdio.h>
133 #include <string.h> /* IMPORT: strdup (hopefully!) */
134 #include <ctype.h>
135 
136 /* 6/1/99 johnt - includes needed for Visual C++ */
137 #ifdef _MSC_VER
138 #  include <io.h>
139 #  include <fcntl.h>
140 #endif
141 
142 #include "io_lib/expFileIO.h"
143 #include "io_lib/xalloc.h"
144 #include "io_lib/misc.h"
145 #include "io_lib/stdio_hack.h"
146 
147 /* Fixup for broken SunOS 4.x systems */
148 #ifndef FOPEN_MAX
149 #define FOPEN_MAX 20
150 #endif
151 
152 static int exp_check_eid_read(Exp_info *e,int id);
153 
154 /*************************************************************
155  * Line types for experiment file
156  *************************************************************/
157 
158 char eflt_feature_ids[MAXIMUM_EFLTS][MAXIMUM_EFLT_LENGTH+1] = {
159     "CF", /*  0 cloning vector sequence file */
160     "CN", /*  1 clone name */
161     "CS", /*  2 cloning vector sequence present in sequence */
162     "CV", /*  3 cloning vector type */
163     "DR", /*  4 direction of read */
164     "DT", /*  5 date of experiment */
165     "EN", /*  6 experiment name */
166     "EX", /*  7 experimental notes */
167     "FM", /*  8 sequencing vector fragmentation method */
168     "LN", /*  9 local format trace file name */
169     "LT", /* 10 local format trace file type */
170     "MC", /* 11 machine on which experiment ran */
171     "MN", /* 12 machine generated trace file name */
172     "MT", /* 13 machine generated trace file type */
173     "OP", /* 14 operator */
174     "PN", /* 15 primer name */
175     "QR", /* 16 poor quality sequence present at right (3') end */
176     "SC", /* 17 sequencing vector cloning site */
177     "SF", /* 18 sequencing vector sequence file */
178     "SI", /* 19 sequencing vector insertion length */
179     "SL", /* 20 sequencing vector present at left (5') end */
180     "SP", /* 21 sequencing vector primer site (relative to cloning site) */
181     "SQ", /* 22 sequence */
182     "SR", /* 23 sequencing vector present at right (3') end */
183     "ST", /* 24 strands */
184     "SV", /* 25 sequencing vector type */
185     "TN", /* 26 template name */
186     "QL", /* 27 poor quality sequence present at left (5') end */
187     "PS", /* 28 processing status */
188     "CC", /* 29 comments */
189     "SS", /* 30 sequence to screen against */
190     /* added 27-May-93 */
191     "TG", /* 31 gel tag line */
192     "ID", /* 32 identifier */
193     /* added 24-Sep-93 */
194     "AQ", /* 33 average quality measure */
195     /* added 15-Oct-93 */
196     "PR", /* 34 primer type */
197     "LI", /* 35 subclone library (mtd) */
198     "LE", /* 36 subclone library entry (well) */
199     /* added 19-Apr-94 */
200     "TC", /* 37 contig tag line */
201     "AC", /* 38 accession number */
202     /* added 11-Nov-94 */
203     "BC", /* 39 base calling software */
204     "ON", /* 40 original base numbers (positions) */
205     "AV", /* 41 accuracy (quality) values */
206     "PC", /* 42 position in contig */
207     "SE", /* 43 sense, whether it is complemented */
208     /* added 5-4-95 */
209     "CL", /* 44 cloning vector left end*/
210     "CR", /* 45 cloning vector right end*/
211     "AP", /* 46 assembly position */
212     "CH", /* 47 special chemistry used (eg taq) */
213     "PD", /* 48 primer data - the sequence of a primer */
214     "WT", /* 49 wild type trace */
215     "NT", /* 50 note */
216     "GD", /* 51 Gap4 database file */
217     "WL", /* 52 wildtype trace left clip point */
218     "WR", /* 53 wildtype trace right clip point */
219     "FT", /* 54 EMBL format feature table */
220     "LG"  /* 55 LiGation: an amalgamation of LI and LE */
221 };
222 
223 
224 
225 
226 /*************************************************************
227  * Output/update lines
228  *************************************************************/
229 
exp_print_line_(FILE * fp,char * eflt,char * entry)230 static int exp_print_line_(FILE *fp, char *eflt, char *entry)
231 /*
232  * Output an experiment file line
233  */
234 {
235     return fprintf(fp,
236 		   "%-5s%s\n",
237 		   eflt,
238 		   entry
239 		   )  <  0;
240 }
241 
exp_print_line(FILE * fp,Exp_info * e,int eflt,int i)242 int exp_print_line(FILE *fp, Exp_info *e, int eflt, int i)
243 /*
244  * Output an experiment file line
245  */
246 {
247     return exp_print_line_(fp,
248 			   eflt_feature_ids[eflt],
249 			   arr(char *,e->entries[eflt],i)
250 			   );
251 }
252 
253 /*
254  * Outputs a multi-line experiment file line.
255  * Continuation lines are automatically added by adding 5 characters of extra
256  * indentation at the start of each continuation.
257  *
258  * returns -1 for failure, 0 for success.
259  */
exp_print_mline(FILE * fp,Exp_info * e,int eflt,int i)260 int exp_print_mline(FILE *fp, Exp_info *e, int eflt, int i) {
261     char *p, *c;
262 
263     p = arr(char *, e->entries[eflt], i);
264 
265     /* first line */
266     if ((c = strchr(p, '\n')))
267 	*c = '\0';
268     if (-1 == exp_print_line_(fp, eflt_feature_ids[eflt], p))
269 	return -1;
270 
271     while (c) {
272 	*c = '\n';
273 	p = c+1;
274 
275 	if ((c = strchr(p, '\n'))) {
276 	    *c = '\0';
277 	}
278 
279 	if (-1 == fprintf(fp, "%-10s%s\n", eflt_feature_ids[eflt], p))
280 	    return -1;
281     }
282 
283     return 0;
284 }
285 
286 
exp_print_seq(FILE * fp,Exp_info * e,int eflt,int i)287 int exp_print_seq(FILE *fp, Exp_info *e, int eflt, int i)
288 /*
289  * Output an experiment file multi line
290  */
291 {
292     int j, l;
293     char *seq;
294     if (fprintf(fp,"%-5s",eflt_feature_ids[eflt])<0) return 1;
295 
296     l = strlen(seq = arr(char *,e->entries[eflt],i));
297     for(j=0;j<l;j++) {
298 	if (j%60==0) if ( fprintf(fp,"\n    ") < 0 ) return 1;
299 	if (j%10==0) if ( fprintf(fp," ") < 0 ) return 1;
300 	if ( fprintf(fp,"%c",seq[j]) < 0 ) return 1;
301     }
302     if ( fprintf(fp,"\n//\n") < 0 ) return 1;
303 
304     return 0;
305 }
306 
exp_get_feature_index(char * e)307 int exp_get_feature_index(char *e)
308 {
309     int i;
310     for (i = 0; i < MAXIMUM_EFLTS; i++) {
311 	if (eflt_feature_ids[i][0] == e[0] &&
312 	    eflt_feature_ids[i][1] == e[1])
313 	    return i;
314     }
315 
316     return -1;
317 }
318 
319 
320 /*************************************************************
321  * Utility routines
322  *************************************************************/
323 
324 /*
325  * Creates a string of 'range format' from the start and end points.
326  * The string (of form start..end) is also returned.
327  */
exp_create_range(char * str,int start,int end)328 char *exp_create_range(char *str, int start, int end) {
329     sprintf(str, "%d..%d", start, end);
330     return str;
331 }
332 
333 /*
334  * Extracts the start and end points from a range string.
335  * Returns 0 for success and -1 for failure.
336  */
exp_extract_range(char * str,int * start,int * end)337 int exp_extract_range(char *str, int *start, int *end) {
338     return sscanf(str, "%d..%d", start, end) != 2;
339 }
340 
exp_create_info(void)341 Exp_info *exp_create_info(void)
342 /*
343  * Allocate space for new experiment file information
344  */
345 {
346     Exp_info *new;
347     int i;
348 
349     new = (Exp_info *)xmalloc(sizeof(Exp_info));
350     if (new != NULL) {
351 	for(i=0; i< MAXIMUM_EFLTS ; i++) {
352 	    new->Nentries[i] = 0;
353 	    new->entries[i] = ArrayCreate(sizeof(char *), 1/*one entry*/);
354 	}
355 	new->fp = NULL;
356     }
357 
358     return new;
359 }
360 
361 
exp_destroy_info(Exp_info * e)362 void exp_destroy_info(Exp_info *e)
363 /*
364  * Destroy experiment file information
365  */
366 {
367     int i;
368     int j;
369     if (e != NULL_Exp_info) {
370 	for (i = 0; i < MAXIMUM_EFLTS; i++) {
371 	    Array a = e->entries[i];
372 	    for(j=0;j<e->Nentries[i];j++)
373 		if (arr(char *,a,j) != NULL) xfree(arr(char *,a,j));
374 	    ArrayDestroy(a);
375 	}
376 	if (e->fp != NULL) fclose(e->fp);
377 	xfree(e);
378     }
379 }
380 
381 
382 
383 
384 /*
385  * Read from file a sequence, discarding all white space til a // is
386  * encountered
387  */
exp_read_sequence(FILE * fp)388 static char *exp_read_sequence(FILE *fp)
389 {
390     char *seq = NULL;
391     size_t seq_len = 0, seq_alloc;
392     char line[EXP_FILE_LINE_LENGTH+1];
393     char *l;
394     static int valid_char[256], init = 0;
395 
396     /* Initialise lookup tables for efficiency later on.*/
397     if (!init) {
398 	int i;
399 	for (i = 0; i < 256; i++) {
400 	    if (i < 128 && !isspace(i) && !isdigit(i) && !iscntrl(i))
401 		valid_char[i] = 1;
402 	    else
403 		valid_char[i] = 0;
404 	}
405 	init = 1;
406     }
407 
408     /* Initialise memory */
409     seq_alloc = EXP_FILE_LINE_LENGTH * 8;
410     seq = (char *)xmalloc(seq_alloc);
411     if (NULL == seq)
412 	return NULL;
413     seq[0] = '\0';
414 
415     /* Reading line by line, until we get "//" */
416     l = fgets(line,EXP_FILE_LINE_LENGTH,fp);
417     while (l!= NULL && strncmp(l,"//",2)) {
418 	char *a, *b;
419 
420 	/* make sure the seq buffer is large enough */
421 	if (seq_len + EXP_FILE_LINE_LENGTH + 1 > seq_alloc) {
422 	    seq_alloc *= 2;
423 	    if (NULL == (seq = (char *)xrealloc(seq, seq_alloc)))
424 		return NULL;
425 	}
426 
427 	/* copy to seq, stripping spaces on the fly */
428 	for(a=line, b = &seq[seq_len]; *a; a++)
429 	    if (valid_char[(unsigned char)*a])
430 		*b++ = *a;
431 	*b = '\0';
432 	seq_len = b-seq;
433 
434 	l = fgets(line,EXP_FILE_LINE_LENGTH,fp);
435     }
436 
437     /* Shrink the allocated string to reduce memory usage */
438     seq = (char *)xrealloc(seq, seq_len + 1);
439 
440     return seq;
441 }
442 
443 
444 /*
445  * Converts the opos[] array into a char array.
446  * In doing so this shrinks the data size by using a .. notation.
447  * No check is made that buf is large enough. It is recommended that buf is
448  * allocated to 5*len which covers the worst case (for sequences less that
449  * 9999 bases long).
450  *
451  * Note that on older systems sprintf may return the first argument rather
452  * than the number of characters written.
453  * For this reason we have to do the counting ourselves.
454  */
opos2str(int2 * opos,int len,char * buf)455 char *opos2str(int2 *opos, int len, char *buf) {
456     int i, st, f, dir = 0;
457     char *r = buf, *rs = buf;
458 
459     f = opos[st = 0];
460     for (i = 1; i < len; f=opos[i++]) {
461 	if (dir == 0) {
462 	    if (opos[i] == f+1)
463 		dir=1;
464 	    else if (opos[i] == f-1)
465 		dir=-1;
466 	}
467 
468 	if (dir && opos[i] != f + dir) {
469 	    if (st != i-1)
470 		sprintf(buf, "%d..%d ", opos[st], opos[i-1]);
471 	    else
472 		sprintf(buf, "%d ", opos[st]);
473 	    st = i;
474 	    dir = 0;
475 
476 	    buf += strlen(buf);
477 
478 	} else if (dir == 0) {
479 	    sprintf(buf, "%d ", f);
480 
481 	    st = i;
482 	    buf += strlen(buf);
483 	}
484 
485 	if (buf - rs > 60) {
486 	    *buf++ = '\n';
487 	    *buf = '\0';
488 	    rs = buf - 6;
489 	}
490     }
491 
492     if (st != i-1)
493 	sprintf(buf, "%d..%d", opos[st], opos[i-1]);
494     else
495 	sprintf(buf, "%d", opos[st]);
496 
497     return r;
498 }
499 
500 
501 
502 /*
503  * Expands from the character string .. notation to the opos[] array, up to
504  * a maximum of len elements in opos[].
505  *
506  * Returns the length of the opos array.
507  */
str2opos(int2 * opos,int len,char * buf)508 int str2opos(int2 *opos, int len, char *buf) {
509     /* int i, n1, n2, st, en, m, j = 0; */
510     int i, j = 0, st, en;
511     char *cp;
512 
513     while (j < len && *buf) {
514 	st = strtol(buf, &cp, 10);
515 	if (buf == cp) {
516 	    buf++;
517 	    continue;
518 	}
519 	buf = cp;
520 	if (buf[0] == '.' && buf[1] == '.') {
521 	    en = strtol(buf += 2, &cp, 10);
522 	    if (buf == cp) {
523 		opos[j++] = st;
524 		buf++;
525 		continue;
526 	    }
527 	    buf = cp;
528 
529 	    if (en >= st)
530 		for (i = st; i <= en && j < len; i++)
531 		    opos[j++] = i;
532 	    else
533 		for (i = st; i >= en && j < len; i--)
534 		    opos[j++] = i;
535 	} else {
536 	    opos[j++] = st;
537 	}
538     }
539 
540     return j;
541 }
542 
543 
544 /*
545  * Converts the accuracy value string (AV) to the confidence array up to
546  * a maximum of len elements in conf[].
547  *
548  * The AV string is of format:
549  * "x y z ..." where x, y and z are confidence values for the first three
550  * called bases. Or:
551  * "a,b,c,d e,f,g,h i,j,k,l ..." where the 4-tuples represent the four
552  * confidence values for each base.
553  *
554  * Returns: number of confidence values read, or -1 for error.
555  */
str2conf(int1 * conf,int len,char * buf)556 int str2conf(int1 *conf, int len, char *buf) {
557     int ind = 0;
558 
559     while (*buf && ind < len) {
560 	char *new_buf;
561 	int val1;
562 
563 	val1 = strtol(buf, &new_buf, 10);
564 	if (new_buf == buf)
565 	    break;
566 
567 	if (*new_buf == ',') {
568 	    fprintf(stderr, "4-tuple system is currently unsupported\n");
569 	    return -1;
570 	}
571 
572 	conf[ind++] = val1;
573 	buf = new_buf;
574     }
575 
576     return ind;
577 }
578 
579 
580 /*
581  * Converts the confidence array to the accuracy value string (AV).
582  *
583  * Note no memory overrun checks are performed on buf. It is recommended
584  * that it is allocated to 4*len (worst case of "255 " for each base) plus.
585  * a couple of terminating newline and null plus another byte per 15 values
586  * to allow for the 60-char line length.
587  * For ease, allocating to 5*len+2 is more than sufficient.
588  *
589  * Returns the buf argument.
590  */
conf2str(int1 * conf,int len,char * buf)591 char *conf2str(int1 *conf, int len, char *buf) {
592     int i;
593     char *ret = buf, *rs = buf;
594 
595     for (i = 0; i < len; i++) {
596 	sprintf(buf, "%d ", conf[i]);
597 	buf += strlen(buf);
598 
599 	if (buf - rs > 60) {
600 	    *buf++ = '\n';
601 	    *buf = '\0';
602 	    rs = buf - 6;
603 	}
604     }
605 
606     return ret;
607 }
608 
609 /*************************************************************
610  * Main C interface routines
611  *************************************************************/
612 
613 
614 /*
615  * Closes an experiment file (if open), but does not free it.
616  */
exp_close(Exp_info * e)617 void exp_close(Exp_info *e) {
618     if (e->fp) {
619 	fclose(e->fp);
620 	e->fp = NULL;
621     }
622 }
623 
624 
exp_read_info(char * file)625 Exp_info *exp_read_info(char *file)
626 /*
627  * Read in an experiment file and return handle
628  */
629 {
630     Exp_info *e;
631     FILE *fp;
632 
633     /*
634      * open for read
635      */
636     if ((fp = fopen(file,"r"))==NULL) {
637 	return NULL_Exp_info;
638     }
639 
640     e = exp_fread_info(fp);
641     fclose(fp);
642 
643     if (NULL_Exp_info == e) {
644 	return NULL_Exp_info;
645     }
646 
647     /*
648      * reopen for appending
649      */
650     e->fp = fopen(file,"a");
651 
652     return e;
653 
654 }
655 
656 
657 /*
658  * Read in an experiment file and return handle
659  */
exp_fread_info(FILE * fp)660 Exp_info *exp_fread_info(FILE *fp)
661 {
662     Exp_info *e;
663     char line[EXP_FILE_LINE_LENGTH+1];
664     char *aline;
665     int alloced_length = EXP_FILE_LINE_LENGTH+1;
666     int apos, len;
667     int last_entry = -1;
668     size_t entry_len = 0;
669 
670     e = exp_create_info();
671 
672 
673     /*
674      * No longer has an effect due to mFILE already being loaded. Ifdef not
675      * triggered under mingw anyway.
676      */
677 #ifdef _WIN32
678     /* 6/1/99 johnt - need to ensure text mode to translate \r\n to \n */
679     /* _setmode(fileno(fp),_O_TEXT); */
680     mfascii(fp);
681 #endif
682 
683     /*
684      * open for read, set this temporarily in this function. Should be NULL
685      * when exiting as this isn't our file pointer to own, but the destroy
686      * function does attempt to automatically close it.
687      */
688     e->fp = fp;
689 
690     if (NULL == (aline = (char *)xmalloc(alloced_length)))
691 	return NULL;
692 
693     if (e != NULL_Exp_info) {
694 	int at_end = 0;
695 
696 	for(;;) {
697 	    char *c;
698 	    int entry;
699 
700 	    /* Read into aline, joining and allocating as necessary */
701 	    apos = 0;
702 	    do {
703 		if (fgets(line,EXP_FILE_LINE_LENGTH,e->fp) == NULL) {
704 		    at_end = 1;
705 		    break;
706 		}
707 
708 		len = strlen(line);
709 		if (apos + len >= alloced_length) {
710 		    alloced_length *= 2;
711 		    if (NULL == (aline = (char *)xrealloc(aline,
712 							  alloced_length))) {
713 			e->fp = NULL;
714 			return NULL;
715 		    }
716 		}
717 
718 		strcpy(aline+apos, line);
719 		apos += len;
720 	    } while (line[len-1] != '\n');
721 
722 	    if (at_end)
723 		break;
724 
725 	    /*
726 	     * zero terminate first argument
727 	     * set c to point to second argument
728 	     *
729 	     * FIXME: c should point to character 6 always. Indentation is
730 	     * important when considering continuation lines.
731 	     */
732 	    for (c=aline;*c && !isspace(*c); c++) ;
733 	    if (*c) {
734 		*c++ = '\0';
735 		for (;*c && isspace(*c); c++) ;
736 	    }
737 
738 	    entry = exp_get_feature_index(aline);
739 	    if (entry >= 0) {
740 		/*
741 		 * Tag lines may be split over multiple lines. If we have no
742 		 * tag type then we append to the existing tag.
743 		 */
744 		if (entry == last_entry &&
745 		    (int)(c-aline) >= 10/* continuation lines */
746 		    && (entry == EFLT_TG || entry == EFLT_TC ||
747 			entry == EFLT_ON || entry == EFLT_AV ||
748 			entry == EFLT_NT || entry == EFLT_FT)) {
749 		    char *en;
750 		    size_t l1, l2;
751 
752 		    /*
753 		     * Extend our current line by the appropriate amount
754 		     */
755 		    if( exp_check_eid_read(e,entry) )
756 			return NULL;
757 		    en = exp_get_entry(e,entry);
758 		    l1 = entry_len;
759 		    l2 = strlen(&aline[10]);
760 
761 		    if (NULL == (en = exp_get_entry(e, entry) =
762 				 (char *)xrealloc(en, l1 + l2 + 1))) {
763 			e->fp = NULL;
764 			return NULL;
765 		    }
766 
767 
768 		    /*
769 		     * Append the new line (without the \n char)
770 		     */
771 		    en[l1] = '\n';
772 		    aline[l2+9] = '\0';
773 		    strcpy(&en[l1+1], &aline[10]);
774 
775 		    entry_len += l2;
776 		} else {
777 		    /*
778 		     * Increment number of entries for line type entry
779 		     * This will force exp_get_entry() to return pointer to
780 		     * next free element in array
781 		     */
782 		    (void)ArrayRef(e->entries[entry],e->Nentries[entry]++);
783 
784 		    if (entry == EFLT_SQ)
785 			exp_get_entry(e,entry) = exp_read_sequence(e->fp);
786 		    else {
787 			char *eoln = strchr(c,'\n');
788 			int i;
789 
790 			if (eoln!=NULL) *eoln='\0';
791 
792 			if (entry == EFLT_LT)
793 			    for (i=3; isspace(c[i]) && i >= 0; c[i--]='\0');
794 
795 			exp_get_entry(e,entry) = (char *)strdup(c);
796 			entry_len = strlen(c);
797 		    }
798 		}
799 	    }
800 
801 	    last_entry = entry;
802 	}
803     }
804 
805     e->fp = NULL;
806     xfree(aline);
807 
808     return e;
809 }
810 
exp_check_eid_read(Exp_info * e,int id)811 static int exp_check_eid_read(Exp_info *e,int id)
812 /*
813  * Check these are a valid combination and that
814  * an entry exists for read
815  */
816 {
817     return (
818 	    e == NULL ||
819 	    id < 0 ||
820 	    id >= MAXIMUM_EFLTS ||
821 	    e->Nentries[id] == 0 ||
822 	    eflt_feature_ids[id][0]=='\0'
823 	    );
824 }
825 
exp_check_eid_write(Exp_info * e,int id)826 static int exp_check_eid_write(Exp_info *e,int id)
827 /*
828  * Check these are a valid combination and that
829  * an entry exists for write
830  */
831 {
832     return (e == NULL ||
833 	    id < 0 ||
834 	    id >= MAXIMUM_EFLTS ||
835 	    e->fp == NULL ||
836 	    eflt_feature_ids[id][0]=='\0');
837 }
838 
839 
840 
841 
842 
843 
exp_get_int(Exp_info * e,int id,int * val)844 int exp_get_int(Exp_info *e, int id, int *val)
845 /*
846  * Get the integer for entry id
847  * returns:
848  *    0 - success
849  *    1 - no entry
850  */
851 {
852     if ( exp_check_eid_read(e,id) ) return 1;
853     *val = atoi(exp_get_entry(e,id));
854     return 0;
855 }
856 
857 
exp_get_rng(Exp_info * e,int id,int * from,int * to)858 int exp_get_rng(Exp_info *e, int id, int *from, int *to)
859 /*
860  * Get the integer pair for entry id
861  * returns:
862  *    0 - success
863  *    1 - no entry
864  */
865 {
866     if ( exp_check_eid_read(e,id) ) return 1;
867     (void)exp_extract_range(exp_get_entry(e,id), from, to);
868 
869     return 0;
870 }
871 
872 
873 
exp_get_str(Exp_info * e,int id,char * s,f_implicit s_l)874 int exp_get_str(Exp_info *e, int id, char *s, f_implicit s_l)
875 /*
876  * Get the string for entry id
877  * returns:
878  *    0 - success
879  *    1 - no entry
880  */
881 {
882     if ( exp_check_eid_read(e,id) ) return 1;
883     strncpy(s,exp_get_entry(e,id),s_l);
884 
885     return 0;
886 }
887 
888 
exp_append_str(Exp_info * e,int id,char * s,int len)889 static int exp_append_str(Exp_info *e, int id, char *s, int len)
890 /*
891  * Append the string to experiment file for entry id
892  * returns:
893  *    0 - success
894  *    1 - no update
895  */
896 {
897     (void)ArrayRef(e->entries[id],e->Nentries[id]++);
898     exp_get_entry(e,id) = (char *)xmalloc(len+1);
899     strncpy(exp_get_entry(e,id), s, len);
900     exp_get_entry(e,id)[len] = '\0';
901 
902     if ( id == EFLT_SQ )
903 	return exp_print_seq(e->fp,e,id,e->Nentries[id]-1);
904     else if (id == EFLT_TG || id == EFLT_TC ||
905 	     id == EFLT_ON || id == EFLT_AV ||
906 	     id == EFLT_NT || id == EFLT_FT)
907 	return exp_print_mline(e->fp,e,id,e->Nentries[id]-1);
908     else
909 	return exp_print_line(e->fp,e,id,e->Nentries[id]-1);
910 }
911 
912 
exp_put_int(Exp_info * e,int id,int * val)913 int exp_put_int(Exp_info *e, int id, int *val)
914 /*
915  * Append the integer for entry id to the experiment file
916  * returns:
917  *    0 - success
918  *    1 - no update
919  */
920 {
921     char buf[EXP_FILE_LINE_LENGTH];
922     if ( exp_check_eid_write(e,id) ) return 1;
923     sprintf(buf,"%d",*val);
924     return exp_append_str(e,id,buf,strlen(buf));
925 }
926 
927 
exp_put_rng(Exp_info * e,int id,int * from,int * to)928 int exp_put_rng(Exp_info *e, int id, int *from, int *to)
929 /*
930  * Append the integer pair for entry id to the experiment file
931  * returns:
932  *    0 - success
933  *    1 - no update
934  */
935 {
936     char buf[EXP_FILE_LINE_LENGTH];
937     if ( exp_check_eid_write(e,id) ) return 1;
938 
939     (void )exp_create_range(buf, *from, *to);
940 
941     return exp_append_str(e,id,buf,strlen(buf));
942 }
943 
944 
945 
exp_put_str(Exp_info * e,int id,char * s,f_implicit s_l)946 int exp_put_str(Exp_info *e, int id, char *s, f_implicit s_l)
947 /*
948  * Append the string for entry id to the experiment file
949  * returns:
950  *    0 - success
951  *    1 - no update
952  */
953 {
954     if ( exp_check_eid_write(e,id) ) return 1;
955     return exp_append_str(e,id,s,s_l);
956 }
957 
958 
959 /*************************************************************
960  * FORTRAN INTERFACE
961  *************************************************************/
962 
963 
964 
965 static int init_done = 0;
966 static int NHandles = 0;
967 static Exp_info **Handles = NULL;
968 
initialise(void)969 static int initialise(void)
970 {
971     int i;
972 
973     if (init_done) return 0;
974     init_done++;
975 
976     NHandles = FOPEN_MAX;
977 
978     if ( (Handles = (Exp_info **)xmalloc(sizeof(Exp_info *) * NHandles)) == NULL) {
979 	NHandles = 0;
980 	return 1;
981     }
982 
983     for (i=0; i<NHandles; i++) Handles[i] = NULL;
984 
985     return 0;
986 }
987 
988 
get_free_handle(void)989 static int get_free_handle(void)
990 /*
991  * find a free entry in the Exp array
992  * returns -1 if there is none
993  */
994 {
995     int i;
996 
997     (void) initialise();
998 
999     if (!NHandles) return -1; /* no slots! */
1000     for (i=0; i<NHandles && Handles[i]!=NULL; i++) ;
1001     return (i==NHandles)?-1:i;
1002 }
1003 
1004 
check_handle(f_int * handle)1005 static int check_handle(f_int *handle)
1006 {
1007     return (handle == NULL ||
1008 	    (int) (*handle) <= 0 ||
1009 	    (int) (*handle) > NHandles);
1010 }
1011 
1012 
1013 
expopn_(char * fn,f_implicit fn_l)1014 f_int expopn_(char *fn, f_implicit fn_l)
1015 /*
1016  * FORTRAN interface to exp_open_file()
1017  */
1018 {
1019     char cfn[1025];
1020     int handle;
1021 
1022     if ( (handle = get_free_handle()) >= 0 ) {
1023 	f2cstr(fn,fn_l,cfn,1024);
1024 	Handles[handle] = exp_read_info(cfn);
1025     }
1026 
1027     return (f_int) (handle+1);
1028 }
1029 
1030 
1031 
expkil_(f_int * handle)1032 f_proc_ret expkil_(f_int *handle)
1033 /*
1034  * FORTRAN interface to exp_destroy_info
1035  */
1036 {
1037     Exp_info *e;
1038     if ( check_handle(handle) ) f_proc_return();
1039     e = (Exp_info *) Handles[(int)(*handle)-1];
1040 
1041     exp_destroy_info(e);
1042 
1043     Handles[(int)(*handle)-1] = NULL;
1044     *handle = 0;
1045 
1046     f_proc_return();
1047 }
1048 
expri_(f_int * handle,f_int * id,f_int * val)1049 f_int expri_(f_int *handle, f_int *id, f_int *val)
1050 /*
1051  * FORTRAN interface to exp_get_int
1052  */
1053 {
1054     Exp_info *e;
1055     if ( check_handle(handle) ) return 1;
1056     e = (Exp_info *) Handles[(int)(*handle)-1];
1057 
1058     return exp_get_int(e, (int)*id, (int *)val);
1059 }
1060 
1061 
exprr_(f_int * handle,f_int * id,f_int * from,f_int * to)1062 f_int exprr_(f_int *handle, f_int *id, f_int *from, f_int *to)
1063 /*
1064  * FORTRAN interface to exp_get_rng
1065  */
1066 {
1067     Exp_info *e;
1068     if ( check_handle(handle) ) return 1;
1069     e = (Exp_info *) Handles[(int)(*handle)-1];
1070 
1071     return exp_get_rng(e,(int)*id,(int *)from,(int *)to);
1072 
1073 }
1074 
1075 /* ARGSUSED */
exprsa_(f_int * handle,f_int * id,char * s,f_int * max_len,f_implicit s_l)1076 f_int exprsa_(f_int *handle, f_int *id, char *s, f_int *max_len, f_implicit s_l)
1077 /*
1078  * FORTRAN interface to exp_get_str workalike
1079  * NOTE: for use with FORTRAN CHARACTER arrays instead CHARACTER strings
1080  */
1081 {
1082     Exp_info *e;
1083     if ( check_handle(handle) ) return 1;
1084     e = (Exp_info *) Handles[(int)(*handle)-1];
1085 
1086     if ( exp_check_eid_read(e,*id) ) return 1;
1087     c2fstr(exp_get_entry(e,*id),(int)*max_len,s,(int)*max_len);
1088     return 0;
1089 }
1090 
1091 
exprs_(f_int * handle,f_int * id,char * s,f_implicit s_l)1092 f_int exprs_(f_int *handle, f_int *id, char *s, f_implicit s_l)
1093 /*
1094  * FORTRAN interface to exp_get_str workalike
1095  * NOTE: for use with FORTRAN CHARACTER strings instead CHARACTER arrays
1096  */
1097 {
1098     Exp_info *e;
1099     if ( check_handle(handle) ) return 1;
1100     e = (Exp_info *) Handles[(int)(*handle)-1];
1101 
1102     if ( exp_check_eid_read(e,*id) ) return 1;
1103     c2fstr(exp_get_entry(e,*id),s_l,s,s_l);
1104     return 0;
1105 }
1106 
1107 
expwi_(f_int * handle,f_int * id,f_int * val)1108 f_int expwi_(f_int *handle, f_int *id, f_int *val)
1109 /*
1110  * FORTRAN interface to exp_put_int
1111  */
1112 {
1113     Exp_info *e;
1114     if ( check_handle(handle) ) return 1;
1115     e = (Exp_info *) Handles[(int)(*handle)-1];
1116 
1117     return exp_put_int(e, (int)*id, (int *)val);
1118 }
1119 
1120 
expwr_(f_int * handle,f_int * id,f_int * from,f_int * to)1121 f_int expwr_(f_int *handle, f_int *id, f_int *from, f_int *to)
1122 /*
1123  * FORTRAN interface to exp_put_rng
1124  */
1125 {
1126     Exp_info *e;
1127     if ( check_handle(handle) ) return 1;
1128     e = (Exp_info *) Handles[(int)(*handle)-1];
1129 
1130     return exp_put_rng(e, (int)*id, (int *)from, (int *)to);
1131 }
1132 
1133 
1134 /* ARGSUSED */
expwsa_(f_int * handle,f_int * id,char * s,f_int * max_len,f_implicit s_l)1135 f_int expwsa_(f_int *handle, f_int *id, char *s, f_int *max_len, f_implicit s_l)
1136 /*
1137  * FORTRAN interface to exp_put_str workalike
1138  * NOTE: for use with FORTRAN CHARACTER arrays instead CHARACTER strings
1139  */
1140 {
1141     Exp_info *e;
1142     char buf[EXP_FILE_LINE_LENGTH];
1143     if ( check_handle(handle) ) return 1;
1144     e = (Exp_info *) Handles[(int)(*handle)-1];
1145 
1146 
1147     if ( exp_check_eid_write(e,*id) ) return 1;
1148     /* don't allow multi-line entries to be written */
1149     if (*id == EFLT_SQ ) return 1;
1150     f2cstr(s,(int)*max_len,buf,sizeof(buf));
1151     return exp_append_str(e,*id,buf,strlen(buf));
1152 
1153 }
1154 
expws_(f_int * handle,f_int * id,char * s,f_implicit s_l)1155 f_int expws_(f_int *handle, f_int *id, char *s, f_implicit s_l)
1156 /*
1157  * FORTRAN interface to exp_put_str workalike
1158  * NOTE: for use with FORTRAN CHARACTER strings instead CHARACTER arrays
1159  */
1160 {
1161     char buf[EXP_FILE_LINE_LENGTH];
1162     Exp_info *e;
1163     if ( check_handle(handle) ) return 1;
1164     e = (Exp_info *) Handles[(int)(*handle)-1];
1165 
1166 
1167     if ( exp_check_eid_write(e,*id) ) return 1;
1168     /* don't allow multi-line entries to be written */
1169     if (*id == EFLT_SQ ) return 1;
1170     f2cstr(s,s_l,buf,sizeof(buf));
1171     return exp_append_str(e,*id,buf,s_l);
1172 }
1173 
1174 /*
1175  * FORTRAN interface to exp_create_range()
1176  */
expcr_(char * str,f_int * start,f_int * end,f_implicit str_l)1177 void expcr_(char *str, f_int *start, f_int *end, f_implicit str_l) {
1178     exp_create_range(str, *start, *end);
1179     c2fstr(str, str_l, str, str_l);
1180 
1181     f_proc_return();
1182 }
1183 
1184 /*
1185  * FORTRAN interface to exp_extract_range()
1186  */
1187 /* ARGSUSED */
exper_(char * str,f_int * start,f_int * end,f_implicit str_l)1188 f_int exper_(char *str, f_int *start, f_int *end, f_implicit str_l) {
1189     return exp_extract_range(str, start, end);
1190 }
1191 
1192 
1193 
1194 
1195 /*************************************************************
1196  * Go for it!
1197  *************************************************************/
1198 
print_line(FILE * fp,Exp_info * e,int eflt,int all)1199 static void print_line(FILE *fp, Exp_info *e, int eflt, int all)
1200 {
1201     if (all) {
1202 	int i;
1203 	for(i=0;i<e->Nentries[eflt];i++) exp_print_line(fp,e,eflt,i);
1204     } else if (e->Nentries[eflt] > 0) {
1205 	exp_print_line(fp,e,eflt,e->Nentries[eflt]-1);
1206     }
1207 }
1208 
1209 
print_mline(FILE * fp,Exp_info * e,int eflt,int all)1210 static void print_mline(FILE *fp, Exp_info *e, int eflt, int all)
1211 {
1212     if (all) {
1213 	int i;
1214 	for(i=0;i<e->Nentries[eflt];i++) exp_print_mline(fp,e,eflt,i);
1215     } else if (e->Nentries[eflt] > 0) {
1216 	exp_print_mline(fp,e,eflt,e->Nentries[eflt]-1);
1217     }
1218 }
1219 
1220 
1221 
print_seq(FILE * fp,Exp_info * e,int eflt)1222 static void print_seq(FILE *fp, Exp_info *e, int eflt)
1223 {
1224     if (e->Nentries[eflt] > 0)
1225         exp_print_seq(fp,e,eflt,e->Nentries[eflt]-1);
1226 }
1227 
1228 
1229 
1230 
exp_print_file(FILE * fp,Exp_info * e)1231 void exp_print_file(FILE *fp, Exp_info *e)
1232 {
1233     print_line(fp,e,EFLT_ID, 0);
1234     print_line(fp,e,EFLT_AC, 0);
1235     print_line(fp,e,EFLT_EN, 0);
1236 
1237     print_line(fp,e,EFLT_CC, 1);
1238     print_line(fp,e,EFLT_EX, 1);
1239     print_line(fp,e,EFLT_PS, 1);
1240 
1241     print_line(fp,e,EFLT_LN, 0);
1242     print_line(fp,e,EFLT_LT, 0);
1243 
1244     print_line(fp,e,EFLT_CF, 0);
1245     print_line(fp,e,EFLT_CV, 0);
1246     print_line(fp,e,EFLT_CS, 0);
1247     print_line(fp,e,EFLT_CL, 0);
1248     print_line(fp,e,EFLT_CR, 0);
1249 
1250     print_line(fp,e,EFLT_SF, 0);
1251     print_line(fp,e,EFLT_SV, 0);
1252     print_line(fp,e,EFLT_SI, 0);
1253     print_line(fp,e,EFLT_SC, 0);
1254     print_line(fp,e,EFLT_SP, 0);
1255     print_line(fp,e,EFLT_PD, 0);
1256     print_line(fp,e,EFLT_FM, 0);
1257     print_line(fp,e,EFLT_SL, 0);
1258     print_line(fp,e,EFLT_SR, 0);
1259 
1260     print_line(fp,e,EFLT_QL, 0);
1261     print_line(fp,e,EFLT_QR, 0);
1262 
1263     print_mline(fp,e,EFLT_TG,1);
1264     print_mline(fp,e,EFLT_TC,1);
1265     print_mline(fp,e,EFLT_NT,1);
1266 
1267     print_line(fp,e,EFLT_CN, 0);
1268     print_line(fp,e,EFLT_TN, 0);
1269     print_line(fp,e,EFLT_PN, 0);
1270     print_line(fp,e,EFLT_PR, 0);
1271     print_line(fp,e,EFLT_LI, 0);
1272     print_line(fp,e,EFLT_LE, 0);
1273     print_line(fp,e,EFLT_CH, 0);
1274 
1275     print_mline(fp,e,EFLT_ON,0);
1276     print_line(fp,e,EFLT_AQ, 0);
1277     print_mline(fp,e,EFLT_AV,0);
1278 
1279     print_line(fp,e,EFLT_DR, 0);
1280     print_line(fp,e,EFLT_SE, 0);
1281     print_line(fp,e,EFLT_PC, 0);
1282     print_line(fp,e,EFLT_AP, 0);
1283     print_line(fp,e,EFLT_ST, 0);
1284 
1285     print_line(fp,e,EFLT_DT, 0);
1286     print_line(fp,e,EFLT_MC, 0);
1287     print_line(fp,e,EFLT_MN, 0);
1288     print_line(fp,e,EFLT_MT, 0);
1289     print_line(fp,e,EFLT_OP, 1);
1290     print_line(fp,e,EFLT_BC, 0);
1291     print_line(fp,e,EFLT_SS, 0);
1292 
1293     print_line(fp,e,EFLT_WT, 0);
1294     print_line(fp,e,EFLT_WL, 0);
1295     print_line(fp,e,EFLT_WR, 0);
1296 
1297     print_mline(fp,e,EFLT_FT,1);
1298 
1299     print_seq (fp,e,EFLT_SQ);
1300 }
1301 
1302 
1303 /*
1304  * Allocate an set a new experiment file entry
1305  */
exp_set_entry(Exp_info * e,int eflt,char * str)1306 char *exp_set_entry(Exp_info *e, int eflt, char *str) {
1307     char *s;
1308     size_t l;
1309 
1310     if (NULL == ArrayRef(e->entries[eflt], e->Nentries[eflt]))
1311 	return NULL;
1312     else
1313 	e->Nentries[eflt]++;
1314 
1315     l = strlen(str);
1316     if (NULL == (s = exp_get_entry(e, eflt) = (char *)xmalloc(l+1))) {
1317 	e->Nentries[eflt]--;
1318 	return NULL;
1319     }
1320     strcpy(s, str);
1321 
1322     return s;
1323 }
1324