1 /*
2 * Copyright (c) 2004-2005, 2007, 2010, 2013 Genome Research Ltd.
3 * Author(s): James Bonfield
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following
13 * disclaimer in the documentation and/or other materials provided
14 * with the distribution.
15 *
16 * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17 * Institute nor the names of its contributors may be used to endorse
18 * or promote products derived from this software without specific
19 * prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /*
35 * Author(s): Simon Dear, James Bonfield, Rodger Staden, John Taylor
36 *
37 * Copyright (c) 1994-1999, 2001-2003 MEDICAL RESEARCH COUNCIL
38 * All rights reserved
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions are met:
42 *
43 * 1 Redistributions of source code must retain the above copyright notice,
44 * this list of conditions and the following disclaimer.
45 *
46 * 2 Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in
48 * the documentation and/or other materials provided with the
49 * distribution.
50 *
51 * 3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF
52 * MOLECULAR BIOLOGY nor the names of its contributors may be used
53 * to endorse or promote products derived from this software without
54 * specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
57 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
60 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 */
68
69 /*
70 * Copyright (c) Medical Research Council 1994. All rights reserved.
71 *
72 * Permission to use, copy, modify and distribute this software and its
73 * documentation for any purpose is hereby granted without fee, provided that
74 * this copyright and notice appears in all copies.
75 *
76 * This file was written by James Bonfield, Simon Dear, Rodger Staden,
77 * as part of the Staden Package at the MRC Laboratory of Molecular
78 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
79 *
80 * MRC disclaims all warranties with regard to this software.
81 */
82
83 /*
84 * File: expFileIO.c
85 * Version:
86 *
87 * Description: Routines for reading and writing to experiment files.
88 *
89 * 1. Opening experiment files
90 * 2. Reading information from an experiment file
91 * 3. Appending to experiment files
92 * 4. Closing an opened experiment file
93 *
94 * Created:
95 * Updated:
96 *
97 */
98
99 /*
100 * Tag format:
101 *
102 * 0 10
103 * |----.----|-
104 * TG TYPE S position..length
105 * TG One or more comment lines starting at character position 10
106 * TG Each line represents a line of tag.
107 * TG Extra indentation is simply added to the comment.
108 *
109 * Where S is the strand, either "+", "-", or "=" (both).
110 * Eg:
111 *
112 * TG COMM = 100..110
113 * TG This comment contains
114 * TG several lines.
115 *
116 * So the above is a COMMent tag on both strands from bases 100 to 110
117 * inclusive containing the annotation
118 * "This comment contains\n several lines.\n"
119 *
120 * This is written using exp_put_str giving the multi line string:
121 * "COMM = 100..110\nThis comment contains\n several lines."
122 *
123 * (ie the indentation is added by the experiment file format, not by the
124 * calling routines. Similarly this indentation is stripped out again when
125 * reading back.)
126 */
127
128 #ifdef HAVE_CONFIG_H
129 #include "io_lib_config.h"
130 #endif
131
132 #include <stdio.h>
133 #include <string.h> /* IMPORT: strdup (hopefully!) */
134 #include <ctype.h>
135
136 /* 6/1/99 johnt - includes needed for Visual C++ */
137 #ifdef _MSC_VER
138 # include <io.h>
139 # include <fcntl.h>
140 #endif
141
142 #include "io_lib/expFileIO.h"
143 #include "io_lib/xalloc.h"
144 #include "io_lib/misc.h"
145 #include "io_lib/stdio_hack.h"
146
147 /* Fixup for broken SunOS 4.x systems */
148 #ifndef FOPEN_MAX
149 #define FOPEN_MAX 20
150 #endif
151
152 static int exp_check_eid_read(Exp_info *e,int id);
153
154 /*************************************************************
155 * Line types for experiment file
156 *************************************************************/
157
158 char eflt_feature_ids[MAXIMUM_EFLTS][MAXIMUM_EFLT_LENGTH+1] = {
159 "CF", /* 0 cloning vector sequence file */
160 "CN", /* 1 clone name */
161 "CS", /* 2 cloning vector sequence present in sequence */
162 "CV", /* 3 cloning vector type */
163 "DR", /* 4 direction of read */
164 "DT", /* 5 date of experiment */
165 "EN", /* 6 experiment name */
166 "EX", /* 7 experimental notes */
167 "FM", /* 8 sequencing vector fragmentation method */
168 "LN", /* 9 local format trace file name */
169 "LT", /* 10 local format trace file type */
170 "MC", /* 11 machine on which experiment ran */
171 "MN", /* 12 machine generated trace file name */
172 "MT", /* 13 machine generated trace file type */
173 "OP", /* 14 operator */
174 "PN", /* 15 primer name */
175 "QR", /* 16 poor quality sequence present at right (3') end */
176 "SC", /* 17 sequencing vector cloning site */
177 "SF", /* 18 sequencing vector sequence file */
178 "SI", /* 19 sequencing vector insertion length */
179 "SL", /* 20 sequencing vector present at left (5') end */
180 "SP", /* 21 sequencing vector primer site (relative to cloning site) */
181 "SQ", /* 22 sequence */
182 "SR", /* 23 sequencing vector present at right (3') end */
183 "ST", /* 24 strands */
184 "SV", /* 25 sequencing vector type */
185 "TN", /* 26 template name */
186 "QL", /* 27 poor quality sequence present at left (5') end */
187 "PS", /* 28 processing status */
188 "CC", /* 29 comments */
189 "SS", /* 30 sequence to screen against */
190 /* added 27-May-93 */
191 "TG", /* 31 gel tag line */
192 "ID", /* 32 identifier */
193 /* added 24-Sep-93 */
194 "AQ", /* 33 average quality measure */
195 /* added 15-Oct-93 */
196 "PR", /* 34 primer type */
197 "LI", /* 35 subclone library (mtd) */
198 "LE", /* 36 subclone library entry (well) */
199 /* added 19-Apr-94 */
200 "TC", /* 37 contig tag line */
201 "AC", /* 38 accession number */
202 /* added 11-Nov-94 */
203 "BC", /* 39 base calling software */
204 "ON", /* 40 original base numbers (positions) */
205 "AV", /* 41 accuracy (quality) values */
206 "PC", /* 42 position in contig */
207 "SE", /* 43 sense, whether it is complemented */
208 /* added 5-4-95 */
209 "CL", /* 44 cloning vector left end*/
210 "CR", /* 45 cloning vector right end*/
211 "AP", /* 46 assembly position */
212 "CH", /* 47 special chemistry used (eg taq) */
213 "PD", /* 48 primer data - the sequence of a primer */
214 "WT", /* 49 wild type trace */
215 "NT", /* 50 note */
216 "GD", /* 51 Gap4 database file */
217 "WL", /* 52 wildtype trace left clip point */
218 "WR", /* 53 wildtype trace right clip point */
219 "FT", /* 54 EMBL format feature table */
220 "LG" /* 55 LiGation: an amalgamation of LI and LE */
221 };
222
223
224
225
226 /*************************************************************
227 * Output/update lines
228 *************************************************************/
229
exp_print_line_(FILE * fp,char * eflt,char * entry)230 static int exp_print_line_(FILE *fp, char *eflt, char *entry)
231 /*
232 * Output an experiment file line
233 */
234 {
235 return fprintf(fp,
236 "%-5s%s\n",
237 eflt,
238 entry
239 ) < 0;
240 }
241
exp_print_line(FILE * fp,Exp_info * e,int eflt,int i)242 int exp_print_line(FILE *fp, Exp_info *e, int eflt, int i)
243 /*
244 * Output an experiment file line
245 */
246 {
247 return exp_print_line_(fp,
248 eflt_feature_ids[eflt],
249 arr(char *,e->entries[eflt],i)
250 );
251 }
252
253 /*
254 * Outputs a multi-line experiment file line.
255 * Continuation lines are automatically added by adding 5 characters of extra
256 * indentation at the start of each continuation.
257 *
258 * returns -1 for failure, 0 for success.
259 */
exp_print_mline(FILE * fp,Exp_info * e,int eflt,int i)260 int exp_print_mline(FILE *fp, Exp_info *e, int eflt, int i) {
261 char *p, *c;
262
263 p = arr(char *, e->entries[eflt], i);
264
265 /* first line */
266 if ((c = strchr(p, '\n')))
267 *c = '\0';
268 if (-1 == exp_print_line_(fp, eflt_feature_ids[eflt], p))
269 return -1;
270
271 while (c) {
272 *c = '\n';
273 p = c+1;
274
275 if ((c = strchr(p, '\n'))) {
276 *c = '\0';
277 }
278
279 if (-1 == fprintf(fp, "%-10s%s\n", eflt_feature_ids[eflt], p))
280 return -1;
281 }
282
283 return 0;
284 }
285
286
exp_print_seq(FILE * fp,Exp_info * e,int eflt,int i)287 int exp_print_seq(FILE *fp, Exp_info *e, int eflt, int i)
288 /*
289 * Output an experiment file multi line
290 */
291 {
292 int j, l;
293 char *seq;
294 if (fprintf(fp,"%-5s",eflt_feature_ids[eflt])<0) return 1;
295
296 l = strlen(seq = arr(char *,e->entries[eflt],i));
297 for(j=0;j<l;j++) {
298 if (j%60==0) if ( fprintf(fp,"\n ") < 0 ) return 1;
299 if (j%10==0) if ( fprintf(fp," ") < 0 ) return 1;
300 if ( fprintf(fp,"%c",seq[j]) < 0 ) return 1;
301 }
302 if ( fprintf(fp,"\n//\n") < 0 ) return 1;
303
304 return 0;
305 }
306
exp_get_feature_index(char * e)307 int exp_get_feature_index(char *e)
308 {
309 int i;
310 for (i = 0; i < MAXIMUM_EFLTS; i++) {
311 if (eflt_feature_ids[i][0] == e[0] &&
312 eflt_feature_ids[i][1] == e[1])
313 return i;
314 }
315
316 return -1;
317 }
318
319
320 /*************************************************************
321 * Utility routines
322 *************************************************************/
323
324 /*
325 * Creates a string of 'range format' from the start and end points.
326 * The string (of form start..end) is also returned.
327 */
exp_create_range(char * str,int start,int end)328 char *exp_create_range(char *str, int start, int end) {
329 sprintf(str, "%d..%d", start, end);
330 return str;
331 }
332
333 /*
334 * Extracts the start and end points from a range string.
335 * Returns 0 for success and -1 for failure.
336 */
exp_extract_range(char * str,int * start,int * end)337 int exp_extract_range(char *str, int *start, int *end) {
338 return sscanf(str, "%d..%d", start, end) != 2;
339 }
340
exp_create_info(void)341 Exp_info *exp_create_info(void)
342 /*
343 * Allocate space for new experiment file information
344 */
345 {
346 Exp_info *new;
347 int i;
348
349 new = (Exp_info *)xmalloc(sizeof(Exp_info));
350 if (new != NULL) {
351 for(i=0; i< MAXIMUM_EFLTS ; i++) {
352 new->Nentries[i] = 0;
353 new->entries[i] = ArrayCreate(sizeof(char *), 1/*one entry*/);
354 }
355 new->fp = NULL;
356 }
357
358 return new;
359 }
360
361
exp_destroy_info(Exp_info * e)362 void exp_destroy_info(Exp_info *e)
363 /*
364 * Destroy experiment file information
365 */
366 {
367 int i;
368 int j;
369 if (e != NULL_Exp_info) {
370 for (i = 0; i < MAXIMUM_EFLTS; i++) {
371 Array a = e->entries[i];
372 for(j=0;j<e->Nentries[i];j++)
373 if (arr(char *,a,j) != NULL) xfree(arr(char *,a,j));
374 ArrayDestroy(a);
375 }
376 if (e->fp != NULL) fclose(e->fp);
377 xfree(e);
378 }
379 }
380
381
382
383
384 /*
385 * Read from file a sequence, discarding all white space til a // is
386 * encountered
387 */
exp_read_sequence(FILE * fp)388 static char *exp_read_sequence(FILE *fp)
389 {
390 char *seq = NULL;
391 size_t seq_len = 0, seq_alloc;
392 char line[EXP_FILE_LINE_LENGTH+1];
393 char *l;
394 static int valid_char[256], init = 0;
395
396 /* Initialise lookup tables for efficiency later on.*/
397 if (!init) {
398 int i;
399 for (i = 0; i < 256; i++) {
400 if (i < 128 && !isspace(i) && !isdigit(i) && !iscntrl(i))
401 valid_char[i] = 1;
402 else
403 valid_char[i] = 0;
404 }
405 init = 1;
406 }
407
408 /* Initialise memory */
409 seq_alloc = EXP_FILE_LINE_LENGTH * 8;
410 seq = (char *)xmalloc(seq_alloc);
411 if (NULL == seq)
412 return NULL;
413 seq[0] = '\0';
414
415 /* Reading line by line, until we get "//" */
416 l = fgets(line,EXP_FILE_LINE_LENGTH,fp);
417 while (l!= NULL && strncmp(l,"//",2)) {
418 char *a, *b;
419
420 /* make sure the seq buffer is large enough */
421 if (seq_len + EXP_FILE_LINE_LENGTH + 1 > seq_alloc) {
422 seq_alloc *= 2;
423 if (NULL == (seq = (char *)xrealloc(seq, seq_alloc)))
424 return NULL;
425 }
426
427 /* copy to seq, stripping spaces on the fly */
428 for(a=line, b = &seq[seq_len]; *a; a++)
429 if (valid_char[(unsigned char)*a])
430 *b++ = *a;
431 *b = '\0';
432 seq_len = b-seq;
433
434 l = fgets(line,EXP_FILE_LINE_LENGTH,fp);
435 }
436
437 /* Shrink the allocated string to reduce memory usage */
438 seq = (char *)xrealloc(seq, seq_len + 1);
439
440 return seq;
441 }
442
443
444 /*
445 * Converts the opos[] array into a char array.
446 * In doing so this shrinks the data size by using a .. notation.
447 * No check is made that buf is large enough. It is recommended that buf is
448 * allocated to 5*len which covers the worst case (for sequences less that
449 * 9999 bases long).
450 *
451 * Note that on older systems sprintf may return the first argument rather
452 * than the number of characters written.
453 * For this reason we have to do the counting ourselves.
454 */
opos2str(int2 * opos,int len,char * buf)455 char *opos2str(int2 *opos, int len, char *buf) {
456 int i, st, f, dir = 0;
457 char *r = buf, *rs = buf;
458
459 f = opos[st = 0];
460 for (i = 1; i < len; f=opos[i++]) {
461 if (dir == 0) {
462 if (opos[i] == f+1)
463 dir=1;
464 else if (opos[i] == f-1)
465 dir=-1;
466 }
467
468 if (dir && opos[i] != f + dir) {
469 if (st != i-1)
470 sprintf(buf, "%d..%d ", opos[st], opos[i-1]);
471 else
472 sprintf(buf, "%d ", opos[st]);
473 st = i;
474 dir = 0;
475
476 buf += strlen(buf);
477
478 } else if (dir == 0) {
479 sprintf(buf, "%d ", f);
480
481 st = i;
482 buf += strlen(buf);
483 }
484
485 if (buf - rs > 60) {
486 *buf++ = '\n';
487 *buf = '\0';
488 rs = buf - 6;
489 }
490 }
491
492 if (st != i-1)
493 sprintf(buf, "%d..%d", opos[st], opos[i-1]);
494 else
495 sprintf(buf, "%d", opos[st]);
496
497 return r;
498 }
499
500
501
502 /*
503 * Expands from the character string .. notation to the opos[] array, up to
504 * a maximum of len elements in opos[].
505 *
506 * Returns the length of the opos array.
507 */
str2opos(int2 * opos,int len,char * buf)508 int str2opos(int2 *opos, int len, char *buf) {
509 /* int i, n1, n2, st, en, m, j = 0; */
510 int i, j = 0, st, en;
511 char *cp;
512
513 while (j < len && *buf) {
514 st = strtol(buf, &cp, 10);
515 if (buf == cp) {
516 buf++;
517 continue;
518 }
519 buf = cp;
520 if (buf[0] == '.' && buf[1] == '.') {
521 en = strtol(buf += 2, &cp, 10);
522 if (buf == cp) {
523 opos[j++] = st;
524 buf++;
525 continue;
526 }
527 buf = cp;
528
529 if (en >= st)
530 for (i = st; i <= en && j < len; i++)
531 opos[j++] = i;
532 else
533 for (i = st; i >= en && j < len; i--)
534 opos[j++] = i;
535 } else {
536 opos[j++] = st;
537 }
538 }
539
540 return j;
541 }
542
543
544 /*
545 * Converts the accuracy value string (AV) to the confidence array up to
546 * a maximum of len elements in conf[].
547 *
548 * The AV string is of format:
549 * "x y z ..." where x, y and z are confidence values for the first three
550 * called bases. Or:
551 * "a,b,c,d e,f,g,h i,j,k,l ..." where the 4-tuples represent the four
552 * confidence values for each base.
553 *
554 * Returns: number of confidence values read, or -1 for error.
555 */
str2conf(int1 * conf,int len,char * buf)556 int str2conf(int1 *conf, int len, char *buf) {
557 int ind = 0;
558
559 while (*buf && ind < len) {
560 char *new_buf;
561 int val1;
562
563 val1 = strtol(buf, &new_buf, 10);
564 if (new_buf == buf)
565 break;
566
567 if (*new_buf == ',') {
568 fprintf(stderr, "4-tuple system is currently unsupported\n");
569 return -1;
570 }
571
572 conf[ind++] = val1;
573 buf = new_buf;
574 }
575
576 return ind;
577 }
578
579
580 /*
581 * Converts the confidence array to the accuracy value string (AV).
582 *
583 * Note no memory overrun checks are performed on buf. It is recommended
584 * that it is allocated to 4*len (worst case of "255 " for each base) plus.
585 * a couple of terminating newline and null plus another byte per 15 values
586 * to allow for the 60-char line length.
587 * For ease, allocating to 5*len+2 is more than sufficient.
588 *
589 * Returns the buf argument.
590 */
conf2str(int1 * conf,int len,char * buf)591 char *conf2str(int1 *conf, int len, char *buf) {
592 int i;
593 char *ret = buf, *rs = buf;
594
595 for (i = 0; i < len; i++) {
596 sprintf(buf, "%d ", conf[i]);
597 buf += strlen(buf);
598
599 if (buf - rs > 60) {
600 *buf++ = '\n';
601 *buf = '\0';
602 rs = buf - 6;
603 }
604 }
605
606 return ret;
607 }
608
609 /*************************************************************
610 * Main C interface routines
611 *************************************************************/
612
613
614 /*
615 * Closes an experiment file (if open), but does not free it.
616 */
exp_close(Exp_info * e)617 void exp_close(Exp_info *e) {
618 if (e->fp) {
619 fclose(e->fp);
620 e->fp = NULL;
621 }
622 }
623
624
exp_read_info(char * file)625 Exp_info *exp_read_info(char *file)
626 /*
627 * Read in an experiment file and return handle
628 */
629 {
630 Exp_info *e;
631 FILE *fp;
632
633 /*
634 * open for read
635 */
636 if ((fp = fopen(file,"r"))==NULL) {
637 return NULL_Exp_info;
638 }
639
640 e = exp_fread_info(fp);
641 fclose(fp);
642
643 if (NULL_Exp_info == e) {
644 return NULL_Exp_info;
645 }
646
647 /*
648 * reopen for appending
649 */
650 e->fp = fopen(file,"a");
651
652 return e;
653
654 }
655
656
657 /*
658 * Read in an experiment file and return handle
659 */
exp_fread_info(FILE * fp)660 Exp_info *exp_fread_info(FILE *fp)
661 {
662 Exp_info *e;
663 char line[EXP_FILE_LINE_LENGTH+1];
664 char *aline;
665 int alloced_length = EXP_FILE_LINE_LENGTH+1;
666 int apos, len;
667 int last_entry = -1;
668 size_t entry_len = 0;
669
670 e = exp_create_info();
671
672
673 /*
674 * No longer has an effect due to mFILE already being loaded. Ifdef not
675 * triggered under mingw anyway.
676 */
677 #ifdef _WIN32
678 /* 6/1/99 johnt - need to ensure text mode to translate \r\n to \n */
679 /* _setmode(fileno(fp),_O_TEXT); */
680 mfascii(fp);
681 #endif
682
683 /*
684 * open for read, set this temporarily in this function. Should be NULL
685 * when exiting as this isn't our file pointer to own, but the destroy
686 * function does attempt to automatically close it.
687 */
688 e->fp = fp;
689
690 if (NULL == (aline = (char *)xmalloc(alloced_length)))
691 return NULL;
692
693 if (e != NULL_Exp_info) {
694 int at_end = 0;
695
696 for(;;) {
697 char *c;
698 int entry;
699
700 /* Read into aline, joining and allocating as necessary */
701 apos = 0;
702 do {
703 if (fgets(line,EXP_FILE_LINE_LENGTH,e->fp) == NULL) {
704 at_end = 1;
705 break;
706 }
707
708 len = strlen(line);
709 if (apos + len >= alloced_length) {
710 alloced_length *= 2;
711 if (NULL == (aline = (char *)xrealloc(aline,
712 alloced_length))) {
713 e->fp = NULL;
714 return NULL;
715 }
716 }
717
718 strcpy(aline+apos, line);
719 apos += len;
720 } while (line[len-1] != '\n');
721
722 if (at_end)
723 break;
724
725 /*
726 * zero terminate first argument
727 * set c to point to second argument
728 *
729 * FIXME: c should point to character 6 always. Indentation is
730 * important when considering continuation lines.
731 */
732 for (c=aline;*c && !isspace(*c); c++) ;
733 if (*c) {
734 *c++ = '\0';
735 for (;*c && isspace(*c); c++) ;
736 }
737
738 entry = exp_get_feature_index(aline);
739 if (entry >= 0) {
740 /*
741 * Tag lines may be split over multiple lines. If we have no
742 * tag type then we append to the existing tag.
743 */
744 if (entry == last_entry &&
745 (int)(c-aline) >= 10/* continuation lines */
746 && (entry == EFLT_TG || entry == EFLT_TC ||
747 entry == EFLT_ON || entry == EFLT_AV ||
748 entry == EFLT_NT || entry == EFLT_FT)) {
749 char *en;
750 size_t l1, l2;
751
752 /*
753 * Extend our current line by the appropriate amount
754 */
755 if( exp_check_eid_read(e,entry) )
756 return NULL;
757 en = exp_get_entry(e,entry);
758 l1 = entry_len;
759 l2 = strlen(&aline[10]);
760
761 if (NULL == (en = exp_get_entry(e, entry) =
762 (char *)xrealloc(en, l1 + l2 + 1))) {
763 e->fp = NULL;
764 return NULL;
765 }
766
767
768 /*
769 * Append the new line (without the \n char)
770 */
771 en[l1] = '\n';
772 aline[l2+9] = '\0';
773 strcpy(&en[l1+1], &aline[10]);
774
775 entry_len += l2;
776 } else {
777 /*
778 * Increment number of entries for line type entry
779 * This will force exp_get_entry() to return pointer to
780 * next free element in array
781 */
782 (void)ArrayRef(e->entries[entry],e->Nentries[entry]++);
783
784 if (entry == EFLT_SQ)
785 exp_get_entry(e,entry) = exp_read_sequence(e->fp);
786 else {
787 char *eoln = strchr(c,'\n');
788 int i;
789
790 if (eoln!=NULL) *eoln='\0';
791
792 if (entry == EFLT_LT)
793 for (i=3; isspace(c[i]) && i >= 0; c[i--]='\0');
794
795 exp_get_entry(e,entry) = (char *)strdup(c);
796 entry_len = strlen(c);
797 }
798 }
799 }
800
801 last_entry = entry;
802 }
803 }
804
805 e->fp = NULL;
806 xfree(aline);
807
808 return e;
809 }
810
exp_check_eid_read(Exp_info * e,int id)811 static int exp_check_eid_read(Exp_info *e,int id)
812 /*
813 * Check these are a valid combination and that
814 * an entry exists for read
815 */
816 {
817 return (
818 e == NULL ||
819 id < 0 ||
820 id >= MAXIMUM_EFLTS ||
821 e->Nentries[id] == 0 ||
822 eflt_feature_ids[id][0]=='\0'
823 );
824 }
825
exp_check_eid_write(Exp_info * e,int id)826 static int exp_check_eid_write(Exp_info *e,int id)
827 /*
828 * Check these are a valid combination and that
829 * an entry exists for write
830 */
831 {
832 return (e == NULL ||
833 id < 0 ||
834 id >= MAXIMUM_EFLTS ||
835 e->fp == NULL ||
836 eflt_feature_ids[id][0]=='\0');
837 }
838
839
840
841
842
843
exp_get_int(Exp_info * e,int id,int * val)844 int exp_get_int(Exp_info *e, int id, int *val)
845 /*
846 * Get the integer for entry id
847 * returns:
848 * 0 - success
849 * 1 - no entry
850 */
851 {
852 if ( exp_check_eid_read(e,id) ) return 1;
853 *val = atoi(exp_get_entry(e,id));
854 return 0;
855 }
856
857
exp_get_rng(Exp_info * e,int id,int * from,int * to)858 int exp_get_rng(Exp_info *e, int id, int *from, int *to)
859 /*
860 * Get the integer pair for entry id
861 * returns:
862 * 0 - success
863 * 1 - no entry
864 */
865 {
866 if ( exp_check_eid_read(e,id) ) return 1;
867 (void)exp_extract_range(exp_get_entry(e,id), from, to);
868
869 return 0;
870 }
871
872
873
exp_get_str(Exp_info * e,int id,char * s,f_implicit s_l)874 int exp_get_str(Exp_info *e, int id, char *s, f_implicit s_l)
875 /*
876 * Get the string for entry id
877 * returns:
878 * 0 - success
879 * 1 - no entry
880 */
881 {
882 if ( exp_check_eid_read(e,id) ) return 1;
883 strncpy(s,exp_get_entry(e,id),s_l);
884
885 return 0;
886 }
887
888
exp_append_str(Exp_info * e,int id,char * s,int len)889 static int exp_append_str(Exp_info *e, int id, char *s, int len)
890 /*
891 * Append the string to experiment file for entry id
892 * returns:
893 * 0 - success
894 * 1 - no update
895 */
896 {
897 (void)ArrayRef(e->entries[id],e->Nentries[id]++);
898 exp_get_entry(e,id) = (char *)xmalloc(len+1);
899 strncpy(exp_get_entry(e,id), s, len);
900 exp_get_entry(e,id)[len] = '\0';
901
902 if ( id == EFLT_SQ )
903 return exp_print_seq(e->fp,e,id,e->Nentries[id]-1);
904 else if (id == EFLT_TG || id == EFLT_TC ||
905 id == EFLT_ON || id == EFLT_AV ||
906 id == EFLT_NT || id == EFLT_FT)
907 return exp_print_mline(e->fp,e,id,e->Nentries[id]-1);
908 else
909 return exp_print_line(e->fp,e,id,e->Nentries[id]-1);
910 }
911
912
exp_put_int(Exp_info * e,int id,int * val)913 int exp_put_int(Exp_info *e, int id, int *val)
914 /*
915 * Append the integer for entry id to the experiment file
916 * returns:
917 * 0 - success
918 * 1 - no update
919 */
920 {
921 char buf[EXP_FILE_LINE_LENGTH];
922 if ( exp_check_eid_write(e,id) ) return 1;
923 sprintf(buf,"%d",*val);
924 return exp_append_str(e,id,buf,strlen(buf));
925 }
926
927
exp_put_rng(Exp_info * e,int id,int * from,int * to)928 int exp_put_rng(Exp_info *e, int id, int *from, int *to)
929 /*
930 * Append the integer pair for entry id to the experiment file
931 * returns:
932 * 0 - success
933 * 1 - no update
934 */
935 {
936 char buf[EXP_FILE_LINE_LENGTH];
937 if ( exp_check_eid_write(e,id) ) return 1;
938
939 (void )exp_create_range(buf, *from, *to);
940
941 return exp_append_str(e,id,buf,strlen(buf));
942 }
943
944
945
exp_put_str(Exp_info * e,int id,char * s,f_implicit s_l)946 int exp_put_str(Exp_info *e, int id, char *s, f_implicit s_l)
947 /*
948 * Append the string for entry id to the experiment file
949 * returns:
950 * 0 - success
951 * 1 - no update
952 */
953 {
954 if ( exp_check_eid_write(e,id) ) return 1;
955 return exp_append_str(e,id,s,s_l);
956 }
957
958
959 /*************************************************************
960 * FORTRAN INTERFACE
961 *************************************************************/
962
963
964
965 static int init_done = 0;
966 static int NHandles = 0;
967 static Exp_info **Handles = NULL;
968
initialise(void)969 static int initialise(void)
970 {
971 int i;
972
973 if (init_done) return 0;
974 init_done++;
975
976 NHandles = FOPEN_MAX;
977
978 if ( (Handles = (Exp_info **)xmalloc(sizeof(Exp_info *) * NHandles)) == NULL) {
979 NHandles = 0;
980 return 1;
981 }
982
983 for (i=0; i<NHandles; i++) Handles[i] = NULL;
984
985 return 0;
986 }
987
988
get_free_handle(void)989 static int get_free_handle(void)
990 /*
991 * find a free entry in the Exp array
992 * returns -1 if there is none
993 */
994 {
995 int i;
996
997 (void) initialise();
998
999 if (!NHandles) return -1; /* no slots! */
1000 for (i=0; i<NHandles && Handles[i]!=NULL; i++) ;
1001 return (i==NHandles)?-1:i;
1002 }
1003
1004
check_handle(f_int * handle)1005 static int check_handle(f_int *handle)
1006 {
1007 return (handle == NULL ||
1008 (int) (*handle) <= 0 ||
1009 (int) (*handle) > NHandles);
1010 }
1011
1012
1013
expopn_(char * fn,f_implicit fn_l)1014 f_int expopn_(char *fn, f_implicit fn_l)
1015 /*
1016 * FORTRAN interface to exp_open_file()
1017 */
1018 {
1019 char cfn[1025];
1020 int handle;
1021
1022 if ( (handle = get_free_handle()) >= 0 ) {
1023 f2cstr(fn,fn_l,cfn,1024);
1024 Handles[handle] = exp_read_info(cfn);
1025 }
1026
1027 return (f_int) (handle+1);
1028 }
1029
1030
1031
expkil_(f_int * handle)1032 f_proc_ret expkil_(f_int *handle)
1033 /*
1034 * FORTRAN interface to exp_destroy_info
1035 */
1036 {
1037 Exp_info *e;
1038 if ( check_handle(handle) ) f_proc_return();
1039 e = (Exp_info *) Handles[(int)(*handle)-1];
1040
1041 exp_destroy_info(e);
1042
1043 Handles[(int)(*handle)-1] = NULL;
1044 *handle = 0;
1045
1046 f_proc_return();
1047 }
1048
expri_(f_int * handle,f_int * id,f_int * val)1049 f_int expri_(f_int *handle, f_int *id, f_int *val)
1050 /*
1051 * FORTRAN interface to exp_get_int
1052 */
1053 {
1054 Exp_info *e;
1055 if ( check_handle(handle) ) return 1;
1056 e = (Exp_info *) Handles[(int)(*handle)-1];
1057
1058 return exp_get_int(e, (int)*id, (int *)val);
1059 }
1060
1061
exprr_(f_int * handle,f_int * id,f_int * from,f_int * to)1062 f_int exprr_(f_int *handle, f_int *id, f_int *from, f_int *to)
1063 /*
1064 * FORTRAN interface to exp_get_rng
1065 */
1066 {
1067 Exp_info *e;
1068 if ( check_handle(handle) ) return 1;
1069 e = (Exp_info *) Handles[(int)(*handle)-1];
1070
1071 return exp_get_rng(e,(int)*id,(int *)from,(int *)to);
1072
1073 }
1074
1075 /* ARGSUSED */
exprsa_(f_int * handle,f_int * id,char * s,f_int * max_len,f_implicit s_l)1076 f_int exprsa_(f_int *handle, f_int *id, char *s, f_int *max_len, f_implicit s_l)
1077 /*
1078 * FORTRAN interface to exp_get_str workalike
1079 * NOTE: for use with FORTRAN CHARACTER arrays instead CHARACTER strings
1080 */
1081 {
1082 Exp_info *e;
1083 if ( check_handle(handle) ) return 1;
1084 e = (Exp_info *) Handles[(int)(*handle)-1];
1085
1086 if ( exp_check_eid_read(e,*id) ) return 1;
1087 c2fstr(exp_get_entry(e,*id),(int)*max_len,s,(int)*max_len);
1088 return 0;
1089 }
1090
1091
exprs_(f_int * handle,f_int * id,char * s,f_implicit s_l)1092 f_int exprs_(f_int *handle, f_int *id, char *s, f_implicit s_l)
1093 /*
1094 * FORTRAN interface to exp_get_str workalike
1095 * NOTE: for use with FORTRAN CHARACTER strings instead CHARACTER arrays
1096 */
1097 {
1098 Exp_info *e;
1099 if ( check_handle(handle) ) return 1;
1100 e = (Exp_info *) Handles[(int)(*handle)-1];
1101
1102 if ( exp_check_eid_read(e,*id) ) return 1;
1103 c2fstr(exp_get_entry(e,*id),s_l,s,s_l);
1104 return 0;
1105 }
1106
1107
expwi_(f_int * handle,f_int * id,f_int * val)1108 f_int expwi_(f_int *handle, f_int *id, f_int *val)
1109 /*
1110 * FORTRAN interface to exp_put_int
1111 */
1112 {
1113 Exp_info *e;
1114 if ( check_handle(handle) ) return 1;
1115 e = (Exp_info *) Handles[(int)(*handle)-1];
1116
1117 return exp_put_int(e, (int)*id, (int *)val);
1118 }
1119
1120
expwr_(f_int * handle,f_int * id,f_int * from,f_int * to)1121 f_int expwr_(f_int *handle, f_int *id, f_int *from, f_int *to)
1122 /*
1123 * FORTRAN interface to exp_put_rng
1124 */
1125 {
1126 Exp_info *e;
1127 if ( check_handle(handle) ) return 1;
1128 e = (Exp_info *) Handles[(int)(*handle)-1];
1129
1130 return exp_put_rng(e, (int)*id, (int *)from, (int *)to);
1131 }
1132
1133
1134 /* ARGSUSED */
expwsa_(f_int * handle,f_int * id,char * s,f_int * max_len,f_implicit s_l)1135 f_int expwsa_(f_int *handle, f_int *id, char *s, f_int *max_len, f_implicit s_l)
1136 /*
1137 * FORTRAN interface to exp_put_str workalike
1138 * NOTE: for use with FORTRAN CHARACTER arrays instead CHARACTER strings
1139 */
1140 {
1141 Exp_info *e;
1142 char buf[EXP_FILE_LINE_LENGTH];
1143 if ( check_handle(handle) ) return 1;
1144 e = (Exp_info *) Handles[(int)(*handle)-1];
1145
1146
1147 if ( exp_check_eid_write(e,*id) ) return 1;
1148 /* don't allow multi-line entries to be written */
1149 if (*id == EFLT_SQ ) return 1;
1150 f2cstr(s,(int)*max_len,buf,sizeof(buf));
1151 return exp_append_str(e,*id,buf,strlen(buf));
1152
1153 }
1154
expws_(f_int * handle,f_int * id,char * s,f_implicit s_l)1155 f_int expws_(f_int *handle, f_int *id, char *s, f_implicit s_l)
1156 /*
1157 * FORTRAN interface to exp_put_str workalike
1158 * NOTE: for use with FORTRAN CHARACTER strings instead CHARACTER arrays
1159 */
1160 {
1161 char buf[EXP_FILE_LINE_LENGTH];
1162 Exp_info *e;
1163 if ( check_handle(handle) ) return 1;
1164 e = (Exp_info *) Handles[(int)(*handle)-1];
1165
1166
1167 if ( exp_check_eid_write(e,*id) ) return 1;
1168 /* don't allow multi-line entries to be written */
1169 if (*id == EFLT_SQ ) return 1;
1170 f2cstr(s,s_l,buf,sizeof(buf));
1171 return exp_append_str(e,*id,buf,s_l);
1172 }
1173
1174 /*
1175 * FORTRAN interface to exp_create_range()
1176 */
expcr_(char * str,f_int * start,f_int * end,f_implicit str_l)1177 void expcr_(char *str, f_int *start, f_int *end, f_implicit str_l) {
1178 exp_create_range(str, *start, *end);
1179 c2fstr(str, str_l, str, str_l);
1180
1181 f_proc_return();
1182 }
1183
1184 /*
1185 * FORTRAN interface to exp_extract_range()
1186 */
1187 /* ARGSUSED */
exper_(char * str,f_int * start,f_int * end,f_implicit str_l)1188 f_int exper_(char *str, f_int *start, f_int *end, f_implicit str_l) {
1189 return exp_extract_range(str, start, end);
1190 }
1191
1192
1193
1194
1195 /*************************************************************
1196 * Go for it!
1197 *************************************************************/
1198
print_line(FILE * fp,Exp_info * e,int eflt,int all)1199 static void print_line(FILE *fp, Exp_info *e, int eflt, int all)
1200 {
1201 if (all) {
1202 int i;
1203 for(i=0;i<e->Nentries[eflt];i++) exp_print_line(fp,e,eflt,i);
1204 } else if (e->Nentries[eflt] > 0) {
1205 exp_print_line(fp,e,eflt,e->Nentries[eflt]-1);
1206 }
1207 }
1208
1209
print_mline(FILE * fp,Exp_info * e,int eflt,int all)1210 static void print_mline(FILE *fp, Exp_info *e, int eflt, int all)
1211 {
1212 if (all) {
1213 int i;
1214 for(i=0;i<e->Nentries[eflt];i++) exp_print_mline(fp,e,eflt,i);
1215 } else if (e->Nentries[eflt] > 0) {
1216 exp_print_mline(fp,e,eflt,e->Nentries[eflt]-1);
1217 }
1218 }
1219
1220
1221
print_seq(FILE * fp,Exp_info * e,int eflt)1222 static void print_seq(FILE *fp, Exp_info *e, int eflt)
1223 {
1224 if (e->Nentries[eflt] > 0)
1225 exp_print_seq(fp,e,eflt,e->Nentries[eflt]-1);
1226 }
1227
1228
1229
1230
exp_print_file(FILE * fp,Exp_info * e)1231 void exp_print_file(FILE *fp, Exp_info *e)
1232 {
1233 print_line(fp,e,EFLT_ID, 0);
1234 print_line(fp,e,EFLT_AC, 0);
1235 print_line(fp,e,EFLT_EN, 0);
1236
1237 print_line(fp,e,EFLT_CC, 1);
1238 print_line(fp,e,EFLT_EX, 1);
1239 print_line(fp,e,EFLT_PS, 1);
1240
1241 print_line(fp,e,EFLT_LN, 0);
1242 print_line(fp,e,EFLT_LT, 0);
1243
1244 print_line(fp,e,EFLT_CF, 0);
1245 print_line(fp,e,EFLT_CV, 0);
1246 print_line(fp,e,EFLT_CS, 0);
1247 print_line(fp,e,EFLT_CL, 0);
1248 print_line(fp,e,EFLT_CR, 0);
1249
1250 print_line(fp,e,EFLT_SF, 0);
1251 print_line(fp,e,EFLT_SV, 0);
1252 print_line(fp,e,EFLT_SI, 0);
1253 print_line(fp,e,EFLT_SC, 0);
1254 print_line(fp,e,EFLT_SP, 0);
1255 print_line(fp,e,EFLT_PD, 0);
1256 print_line(fp,e,EFLT_FM, 0);
1257 print_line(fp,e,EFLT_SL, 0);
1258 print_line(fp,e,EFLT_SR, 0);
1259
1260 print_line(fp,e,EFLT_QL, 0);
1261 print_line(fp,e,EFLT_QR, 0);
1262
1263 print_mline(fp,e,EFLT_TG,1);
1264 print_mline(fp,e,EFLT_TC,1);
1265 print_mline(fp,e,EFLT_NT,1);
1266
1267 print_line(fp,e,EFLT_CN, 0);
1268 print_line(fp,e,EFLT_TN, 0);
1269 print_line(fp,e,EFLT_PN, 0);
1270 print_line(fp,e,EFLT_PR, 0);
1271 print_line(fp,e,EFLT_LI, 0);
1272 print_line(fp,e,EFLT_LE, 0);
1273 print_line(fp,e,EFLT_CH, 0);
1274
1275 print_mline(fp,e,EFLT_ON,0);
1276 print_line(fp,e,EFLT_AQ, 0);
1277 print_mline(fp,e,EFLT_AV,0);
1278
1279 print_line(fp,e,EFLT_DR, 0);
1280 print_line(fp,e,EFLT_SE, 0);
1281 print_line(fp,e,EFLT_PC, 0);
1282 print_line(fp,e,EFLT_AP, 0);
1283 print_line(fp,e,EFLT_ST, 0);
1284
1285 print_line(fp,e,EFLT_DT, 0);
1286 print_line(fp,e,EFLT_MC, 0);
1287 print_line(fp,e,EFLT_MN, 0);
1288 print_line(fp,e,EFLT_MT, 0);
1289 print_line(fp,e,EFLT_OP, 1);
1290 print_line(fp,e,EFLT_BC, 0);
1291 print_line(fp,e,EFLT_SS, 0);
1292
1293 print_line(fp,e,EFLT_WT, 0);
1294 print_line(fp,e,EFLT_WL, 0);
1295 print_line(fp,e,EFLT_WR, 0);
1296
1297 print_mline(fp,e,EFLT_FT,1);
1298
1299 print_seq (fp,e,EFLT_SQ);
1300 }
1301
1302
1303 /*
1304 * Allocate an set a new experiment file entry
1305 */
exp_set_entry(Exp_info * e,int eflt,char * str)1306 char *exp_set_entry(Exp_info *e, int eflt, char *str) {
1307 char *s;
1308 size_t l;
1309
1310 if (NULL == ArrayRef(e->entries[eflt], e->Nentries[eflt]))
1311 return NULL;
1312 else
1313 e->Nentries[eflt]++;
1314
1315 l = strlen(str);
1316 if (NULL == (s = exp_get_entry(e, eflt) = (char *)xmalloc(l+1))) {
1317 e->Nentries[eflt]--;
1318 return NULL;
1319 }
1320 strcpy(s, str);
1321
1322 return s;
1323 }
1324