1 static char rcsid[] = "$Id: iit_store.c 222736 2020-05-29 15:48:02Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #include <stdio.h>
7 #include <stdlib.h>
8 #ifdef HAVE_UNISTD_H
9 #include <unistd.h>
10 #endif
11 #ifdef HAVE_SYS_TYPES_H
12 #include <sys/types.h>
13 #endif
14 #include <string.h> /* For strlen */
15 #include <strings.h> /* For rindex */
16 #include <ctype.h>
17 #include <math.h> /* For qsort and NAN */
18 #include "bool.h"
19 #include "types.h"
20 #include "assert.h"
21 #include "mem.h"
22 #include "fopen.h"
23 #include "getline.h"
24
25 #include "list.h"
26 #include "doublelist.h"
27 #include "univinterval.h"
28 #include "interval.h"
29 #include "table.h"
30 #include "tableint.h"
31 #include "chrom.h"
32 #include "iit-write-univ.h"
33 #include "iit-write.h"
34 #include "getopt.h"
35
36 #ifndef NAN
37 #define NAN nan("")
38 #endif
39
40 #ifndef NAN
41 static double NAN = nan("")
42 #endif
43
44
45 #ifdef DEBUG
46 #define debug(x) x
47 #else
48 #define debug(x)
49 #endif
50
51 #define LINELENGTH 8192
52 #define MONITOR_INTERVAL 100000 /* 100 thousand entries */
53
54 /************************************************************************
55 * Program options
56 ************************************************************************/
57
58 static char *outputfile = NULL;
59 static bool univ_format_p = false; /* IIT_write_univ used for chromosome.iit file */
60 static bool gff3_format_p = false;
61 static char *labelid = "ID";
62 static bool fieldsp = false;
63 static bool acc_only_p = false;
64 static char iit_version = 0;
65
66 static Sorttype_T divsort = CHROM_SORT;
67 static char *mitochondrial_string = NULL;
68
69
70 static struct option long_options[] = {
71 /* Input options */
72 {"output", required_argument, 0, 'o'}, /* outputfile */
73 {"univformat", no_argument, 0, '1'}, /* univ_format_p */
74 {"accession-only", no_argument, 0, 0}, /* acc_only_p */
75 {"fields", no_argument, 0, 'F'}, /* fieldsp */
76 {"gff", no_argument, 0, 'G'}, /* gff3_format_p */
77 {"label", required_argument, 0, 'l'}, /* labelid */
78 {"iitversion", required_argument, 0, 'v'}, /* iit_version */
79 {"sort", required_argument, 0, 's'}, /* sorttype */
80
81 /* Help options */
82 {"version", no_argument, 0, 'V'}, /* print_program_version */
83 {"help", no_argument, 0, '?'}, /* print_program_usage */
84 {0, 0, 0, 0}
85 };
86
87 static void
print_program_version()88 print_program_version () {
89 fprintf(stdout,"\n");
90 fprintf(stdout,"iit_store: indexing utility for Interval Index Trees\n");
91 fprintf(stdout,"Part of GMAP package, version %s\n",PACKAGE_VERSION);
92 fprintf(stdout,"Thomas D. Wu, Genentech, Inc.\n");
93 fprintf(stdout,"Contact: twu@gene.com\n");
94 fprintf(stdout,"\n");
95 return;
96 }
97
98 static void
print_program_usage()99 print_program_usage () {
100 fprintf(stdout,"\
101 Usage: iit_store [OPTIONS...] -o outputfile inputfile, or\n\
102 cat inputfile | iit_store [OPTIONS...] -o outputfile\n\
103 where\n\
104 outputfile is the desired filename for the iit file\n\
105 (.iit will be added as a suffix if necessary), and\n\
106 inputfile is in either FASTA or GFF3 format, as described below.\n\
107 \n\
108 Options\n\
109 -o, --output=STRING Name of output iit file\n\
110 -1, --oldformat Old format for intervals:\n\
111 <start> <optional end> <optional div> <optional type>\n\
112 --accession-only Process only the first word of each FASTA header, and ignore the rest of the line\n\
113 -F, --fields Annotation consists of separate fields\n\
114 -G, --gff Parse input file in gff3 format\n\
115 -l, --label=STRING For gff input, the feature attribute to use (default is ID)\n\
116 \n\
117 -s, --sort=STRING Sorting of divisions: none, alpha, numeric-alpha, or chrom (default)\n\
118 numeric-alpha: chr1 chr1_random chr2 chr10 chr10_random chrM chrUn chrX chrY\n\
119 chrom: chr1 chr2 chr10 chrX chrY chrM chr1_random chr10_random chrUn\n\
120 \n\
121 Note 1: For sorting purposes, any initial 'chr' will be ignored\n\
122 Note 2: For chrom, X, Y, M, MT (or chrX, chrY, and so on) are special\n\
123 \n\
124 -v, --iitversion=STRING Desired iit version for output iit\n\
125 (default = 0, which means latest version)\n\
126 \n\
127 -V, --version Show version\n\
128 -?, --help Show this help message\n\
129 \n\
130 \n\
131 Description of input format:\n\
132 \n\
133 The FASTA format for input files should be\n\
134 \n\
135 >label [interval [type]] [/value=value]\n\
136 optional_annotation (which may be zero, one, or multiple lines)\n\
137 \n\
138 where intervals have one of the following forms:\n\
139 div:start..end\n\
140 div:start\n\
141 start..end\n\
142 start\n\
143 and a given type, numeric value, or both is optional. A numeric value\n\
144 allows intervals to be searched by a range of values using iit_get.\n\
145 If the interval is omitted, then it is assumed to be label:1..n,\n\
146 where n is the length of the sequence. This allows for storage and retrieval\n\
147 of sequences in FASTA files. If you specify --accession-only, then it is\n\
148 assumed that you are not providing intervals, and all information in the FASTA\n\
149 header other than the first word (accession) will be ignored.\n\
150 \n\
151 Intervals may have directions. To indicate a forward direction,\n\
152 the start coordinate should be less than the end coordinate.\n\
153 To indicate a reverse direction, the start coordinate should be\n\
154 greater than the end coordinate. If they are the same, then no\n\
155 direction is implied. If no end coordinate is given, the end\n\
156 coordinate is assumed to be the same as the start coordinate.\n\
157 \n\
158 For example, the label may be a sequence accession, with the div representing\n\
159 a chromosome, and the type representing an additional piece of information\n\
160 A header might therefore look like\n\
161 \n\
162 >NM_004448 17:35138441..35109780 refseq\n\
163 \n\
164 which indicates an interval on chromosome 17 in the reverse direction,\n\
165 and of type refseq.\n\
166 \n\
167 If the -F flag is provided, IIT files may store annotation for each interval\n\
168 as separate fields. The input must contain the names of the fields, one per\n\
169 line, before the first interval header. Each interval then contains annotation\n\
170 corresponding to each field, one value per line.\n\
171 \n\
172 The GFF3 format requires the -G flag and optionally the -l flag.\n\
173 The iit_store program will parse the chromosome from column 1, the start\n\
174 coordinate from column 4, the end coordinate from column 5, the strand\n\
175 from column 7, an if possible, the label from column 9. The -l flag\n\
176 will indicate which feature from column 9 to retrieve, such as ID, Name,\n\
177 or Parent. Appropriate choice of label may be helpful later on, because\n\
178 the iit_get program can retrieve information by label, as well as by\n\
179 coordinates.\n\
180 \n\
181 Limitations: Start and end coordinates must be non-negative integers, and are\n\
182 limited to the domain of a 64-bit quantity, which means coordinates must be\n\
183 less than 2^64. If your machine is a 32-bit machine, coordinates must be less\n\
184 than 2^32 = 4294967295.\n\
185 \n\
186 See also: iit_get, iit_dump\n\
187 ");
188 return;
189 }
190
191 /* Empties contents of lines */
192 static char *
concatenate_lines(List_T lines,int content_size)193 concatenate_lines (List_T lines, int content_size) {
194 char *string, *temp;
195 List_T l;
196
197 string = (char *) CALLOC(content_size+1,sizeof(char));
198 for (l = lines; l; l = List_next(l)) {
199 temp = (char *) List_head(l);
200 strcat(string,temp);
201 FREE(temp);
202 }
203
204 /* Keep last return
205 if (string[content_size-1] == '\n') {
206 string[content_size-1] = '\0';
207 }
208 */
209
210 return string;
211 }
212
213
214
215 /* Note that isnumber is a function in ctype.h on some systems */
216 static bool
isnumberp(Univcoord_T * result,char * string)217 isnumberp (Univcoord_T *result, char *string) {
218 char *p = string;
219
220 *result = 0U;
221 while (*p != '\0') {
222 if (*p == ',') {
223 /* Skip commas */
224 } else if (!isdigit((int) *p)) {
225 return false;
226 } else {
227 *result = (*result) * 10 + (*p - '0');
228 }
229 p++;
230 }
231 return true;
232 }
233
234 static bool
isrange(Univcoord_T * start,Univcoord_T * end,char * string)235 isrange (Univcoord_T *start, Univcoord_T *end, char *string) {
236 bool result;
237 Univcoord_T length;
238 char *copy, *startstring, *endstring;
239
240 copy = (char *) CALLOC(strlen(string)+1,sizeof(char));
241 strcpy(copy,string);
242
243 if (index(copy,'.')) {
244 startstring = strtok(copy,"..");
245 endstring = strtok(NULL,"..");
246 result = (isnumberp(&(*start),startstring) && isnumberp(&(*end),endstring));
247 FREE(copy);
248 return result;
249
250 } else if (index(copy,'+')) {
251 startstring = strtok(copy,"+");
252 endstring = strtok(NULL,"+");
253 if (!isnumberp(&(*start),startstring)) {
254 result = false;
255 } else if (endstring[0] == '-' && isnumberp(&length,&(endstring[1]))) {
256 *end = (*start) - length;
257 result = true;
258 } else if (!isnumberp(&length,endstring)) {
259 result = false;
260 } else {
261 *end = (*start) + length;
262 result = true;
263 }
264 FREE(copy);
265 return result;
266
267 } else if (index(copy,'-')) {
268 /* Old notation */
269 startstring = strtok(copy,"--");
270 endstring = strtok(NULL,"--");
271 result = (isnumberp(&(*start),startstring) && isnumberp(&(*end),endstring));
272 FREE(copy);
273 return result;
274
275 } else {
276 FREE(copy);
277 return false;
278 }
279 }
280
281
282 /* Example: >A X:1..10 red. Here, A is a label, 1 and 10 are start and end, X is a div, and red is a type. */
283 /* Other variants: >A 1..10 red, or >A 1..10 */
284 static char *
scan_header_div(int * labellength,bool * seenp,List_T * divlist,List_T * typelist,Tableint_T div_seenp,Tableint_T typetable,bool * valuep,double * value,char ** label,Univcoord_T * start,Univcoord_T * end,int * type,char ** restofheader,char * header,int line_length)285 scan_header_div (int *labellength, bool *seenp, List_T *divlist, List_T *typelist, Tableint_T div_seenp, Tableint_T typetable,
286 bool *valuep, double *value, char **label, Univcoord_T *start, Univcoord_T *end, int *type,
287 char **restofheader, char *header, int line_length) {
288 char *divstring = NULL, *coords, *copy, *acc, *query, *tag, *typestring, *p;
289 char *valueptr;
290
291 *seenp = false;
292 acc = (char *) MALLOC((line_length+1)*sizeof(char));
293 query = (char *) MALLOC((line_length+1)*sizeof(char));
294 tag = (char *) MALLOC((line_length+1)*sizeof(char));
295
296 if (sscanf(header,">%s",acc) < 1) {
297 fprintf(stderr,"Error parsing %s. Expecting a FASTA type header with a label, optional coords (as <div>:<number>..<number>), and optional tag.\n",header);
298 exit(9);
299 } else {
300 *labellength = strlen(acc);
301 *label = (char *) MALLOC(((*labellength)+1)*sizeof(char));
302 strcpy(*label,acc);
303 }
304
305 if (acc_only_p == true || sscanf(header,">%s %s",acc,query) < 2) {
306 /* Treat query as acc:1..n */
307 divstring = (char *) MALLOC(((*labellength)+1)*sizeof(char));
308 strcpy(divstring,acc);
309
310 if (Tableint_get(div_seenp,(void *) divstring) == 0) {
311 debug(printf("Entering new div %s.\n",divstring));
312 Tableint_put(div_seenp,(void *) divstring,(int) true);
313 copy = (char *) MALLOC((strlen(divstring)+1)*sizeof(char));
314 strcpy(copy,divstring);
315 *divlist = List_push(*divlist,copy);
316 *seenp = false;
317 } else {
318 fprintf(stderr,"Error parsing %s. No interval given, and saw duplicate labels\n",header);
319 exit(9);
320 }
321 coords = (char *) NULL;
322
323 } else if (!index(query,':')) {
324 debug(printf("Query %s has no div.\n",query));
325 divstring = (char *) CALLOC(1,sizeof(char));
326 divstring[0] = '\0';
327 coords = query;
328
329 } else {
330 debug(printf("Parsed query %s into ",query));
331 p = strtok(query,":");
332
333 divstring = (char *) MALLOC((strlen(p)+1)*sizeof(char));
334 strcpy(divstring,p);
335
336 if (Tableint_get(div_seenp,(void *) divstring) == 0) {
337 debug(printf("Entering new div %s.\n",divstring));
338 Tableint_put(div_seenp,(void *) divstring,(int) true);
339 copy = (char *) MALLOC((strlen(divstring)+1)*sizeof(char));
340 strcpy(copy,divstring);
341 *divlist = List_push(*divlist,copy);
342 *seenp = false;
343 } else {
344 *seenp = true;
345 }
346
347 coords = strtok(NULL,":");
348 debug(printf("div %s and coords %s\n",divstring,coords));
349 }
350
351 if (coords == NULL) {
352 /* fprintf(stderr,"Error parsing %s. Expecting coords (as <div>:<number>..<number>)\n",query); */
353 /* fprintf(stderr,"Problematic line was: %s\n",header); */
354 /* exit(9); */
355 *start = 1;
356 *end = 0; /* Need to assign later */
357
358 } else if (isnumberp(&(*start),coords)) {
359 debug(printf(" and coords %s as a number\n",coords));
360 *end = *start;
361 } else if (isrange(&(*start),&(*end),coords)) {
362 debug(printf(" and coords %s as a range starting at %llu and ending at %llu\n",
363 coords,(unsigned long long) *start,(unsigned long long) *end));
364 } else {
365 fprintf(stderr,"Error parsing %s:%s. Expecting coords (as <div>:<number>..<number>). Or specify --accession-only to ignore the second field\n",
366 query,coords);
367 fprintf(stderr,"Problematic line was: %s\n",header);
368 exit(9);
369 }
370
371 if ((valueptr = strstr(header,"/value=")) == NULL) {
372 *value = NAN;
373 } else {
374 /* Note: Not checking for any errors */
375 *valuep = true;
376 valueptr += strlen("/value=");
377 *value = atof(valueptr);
378 }
379
380 if (acc_only_p == true || sscanf(header,">%s %s %s",acc,query,tag) < 3) {
381 *type = 0;
382 *restofheader = (char *) NULL;
383
384 } else if (!strncmp(tag,"/value=",strlen("/value="))) {
385 *type = 0;
386
387 /* Get rest of header */
388 p = header;
389 while (!isspace(*p)) p++; /* accession */
390 while (isspace(*p)) p++;
391
392 while (!isspace(*p)) p++; /* coords */
393 while (isspace(*p)) p++;
394
395 if (*p == '\0') {
396 *restofheader = (char *) NULL;
397 } else {
398 *restofheader = (char *) MALLOC((strlen(p)+1)*sizeof(char));
399 strcpy(*restofheader,p);
400 }
401
402 } else {
403 if ((*type = Tableint_get(typetable,(void *) tag)) == 0) {
404 /* Store types as 1-based */
405 *type = Tableint_length(typetable) + 1;
406 typestring = (char *) MALLOC((strlen(tag)+1)*sizeof(char));
407 strcpy(typestring,tag);
408 Tableint_put(typetable,typestring,*type);
409 *typelist = List_push(*typelist,typestring);
410 /* debug(printf("Entering new type %s.\n",typestring)); */
411 }
412
413 /* Get rest of header */
414 p = header;
415 while (!isspace(*p)) p++; /* accession */
416 while (isspace(*p)) p++;
417
418 while (!isspace(*p)) p++; /* coords */
419 while (isspace(*p)) p++;
420
421 while (*p != '\0' && !isspace(*p)) p++; /* tag */
422 while (*p != '\0' && isspace(*p)) p++;
423
424 if (*p == '\0') {
425 *restofheader = (char *) NULL;
426 } else {
427 *restofheader = (char *) MALLOC((strlen(p)+1)*sizeof(char));
428 strcpy(*restofheader,p);
429 }
430 }
431
432 FREE(tag);
433 FREE(query);
434 FREE(acc);
435
436 return divstring;
437 }
438
439
440
441 /* Example: >A 1 10 red. Here, A is a label, 1 and 10 are start and end, and red is a type. */
442 static void
scan_header_univ(int * labellength,List_T * typelist,Tableint_T typetable,bool * valuep,double * value,char ** label,Univcoord_T * start,Univcoord_T * end,int * type,char * header,int line_length)443 scan_header_univ (int *labellength, List_T *typelist, Tableint_T typetable,
444 bool *valuep, double *value, char **label, Univcoord_T *start, Univcoord_T *end, int *type, char *header,
445 int line_length) {
446 char *acc, *typestring, *p, *ptr;
447 char *valueptr;
448 int nscanned;
449
450 acc = (char *) MALLOC((line_length+1)*sizeof(char));
451
452 nscanned = sscanf(header,">%s %llu %llu",acc,&(*start),&(*end));
453 if (nscanned < 3) {
454 fprintf(stderr,"Error parsing %s. Expecting a FASTA type header with a label, two coordinates, and optional tag.\n",header);
455 exit(9);
456 } else {
457 if ((valueptr = strstr(header,"/value=")) == NULL) {
458 *value = NAN;
459 } else {
460 /* Note: Not checking for any errors */
461 *valuep = true;
462 valueptr += strlen("/value=");
463 *value = atof(valueptr);
464 }
465
466 *labellength = strlen(acc);
467 *label = (char *) MALLOC((*labellength+1)*sizeof(char));
468 strcpy(*label,acc);
469
470 p = header;
471 while (!isspace((int) *p)) { p++; } /* First word (label) */
472 while (isspace((int) *p)) { p++; } /* First space */
473 while (!isspace((int) *p)) { p++; } /* Second word (start coord) */
474 while (isspace((int) *p)) { p++; } /* Second space */
475 while (!isspace((int) *p)) { p++; } /* Third word (end coord) */
476 while (*p != '\0' && isspace((int) *p)) { p++; } /* Third space */
477
478 if (*p == '\0') {
479 *type = 0; /* Empty type string */
480 } else {
481 while (*p != '\0' && isspace((int) *p)) { p++; } /* Fourth space */
482 if (*p == '\0') {
483 *type = 0;
484 } else if (!strncmp(p,"/value=",strlen("/value="))) {
485 *type = 0;
486 } else {
487 if ((ptr = rindex(p,'\n')) != NULL) {
488 while (isspace((int) *ptr)) { ptr--; } /* Erase empty space */
489 ptr++;
490 *ptr = '\0';
491 }
492
493 if ((*type = Tableint_get(typetable,(void *) p)) == 0) {
494 /* Store types as 1-based */
495 *type = Tableint_length(typetable) + 1;
496 typestring = (char *) CALLOC(strlen(p)+1,sizeof(char));
497 strcpy(typestring,p);
498 Tableint_put(typetable,typestring,*type);
499 *typelist = List_push(*typelist,typestring);
500 /* debug(printf("Entering new type %s.\n",typestring)); */
501 }
502 }
503 }
504 }
505
506 FREE(acc);
507
508 return;
509 }
510
511 static List_T
parse_fieldlist(char * firstchar,FILE * fp)512 parse_fieldlist (char *firstchar, FILE *fp) {
513 List_T fieldlist = NULL;
514 char *line, *fieldname;
515 int line_length;
516
517 while (!feof(fp) && (*firstchar = fgetc(fp)) != '>') {
518 if (*firstchar != EOF) {
519 line = Getline_wlength(&line_length,fp);
520 fieldname = (char *) MALLOC((line_length+2)*sizeof(char));
521 fieldname[0] = *firstchar;
522 strcpy(&(fieldname[1]),line);
523
524 fieldlist = List_push(fieldlist,fieldname);
525 }
526 }
527
528 return List_reverse(fieldlist);
529 }
530
531
532 static void
parse_fasta(bool * valuep,Univcoord_T * max_coordinate,Univcoord_T * label_totallength,Univcoord_T * annot_totallength,List_T * divlist,List_T * typelist,Table_T intervaltable,Table_T valuetable,Table_T labeltable,Table_T annottable,FILE * fp,Tableint_T div_seenp,Tableint_T typetable,char firstchar)533 parse_fasta (bool *valuep, Univcoord_T *max_coordinate, Univcoord_T *label_totallength, Univcoord_T *annot_totallength,
534 List_T *divlist, List_T *typelist, Table_T intervaltable, Table_T valuetable, Table_T labeltable, Table_T annottable,
535 FILE *fp, Tableint_T div_seenp, Tableint_T typetable, char firstchar) {
536 char *header, *line, *divstring, *label, *restofheader = NULL, *tempstring;
537 int line_length;
538 double value;
539 Univcoord_T start, end;
540 List_T lines, d;
541 /* content_size includes restofheader, whereas sequence_length does not */
542 int labellength, content_size, sequence_length, type, nentries;
543 bool seenp;
544
545 /* *max_coordinate = 0; */
546 *label_totallength = 0;
547 *annot_totallength = 0;
548
549 if (feof(fp)) {
550 return;
551
552 } else if (firstchar == '\0') {
553 header = Getline_wlinefeed(&line_length,fp);
554 } else {
555 line = Getline_wlinefeed(&line_length,fp);
556
557 header = (char *) malloc((line_length+2)*sizeof(char));
558 header[0] = firstchar;
559 strcpy(&(header[1]),line);
560 FREE(line);
561 line_length += 1;
562 }
563 if (univ_format_p == true) {
564 scan_header_univ(&labellength,&(*typelist),typetable,&(*valuep),&value,&label,&start,&end,&type,
565 header,line_length);
566 seenp = false;
567 divstring = (char *) CALLOC(1,sizeof(char));
568 divstring[0] = '\0';
569 restofheader = (char *) NULL;
570 } else {
571 divstring = scan_header_div(&labellength,&seenp,&(*divlist),&(*typelist),
572 div_seenp,typetable,&(*valuep),&value,&label,&start,&end,&type,
573 &restofheader,header,line_length);
574 }
575 FREE(header);
576
577 *max_coordinate = start;
578 if (end > *max_coordinate) {
579 *max_coordinate = end;
580 }
581
582 Table_put(valuetable,(void *) divstring,
583 Doublelist_push(Table_get(valuetable,(void *) divstring),value));
584
585 *label_totallength = labellength;
586 Table_put(labeltable,(void *) divstring,
587 List_push(Table_get(labeltable,(void *) divstring),label));
588
589 lines = NULL;
590 content_size = sequence_length = 0;
591 if (restofheader != NULL) {
592 lines = List_push(lines,(void *) restofheader);
593 content_size += strlen(restofheader);
594 }
595
596 nentries = 1; /* Because we already processed the first entry above */
597 while ((line = Getline_wlinefeed(&line_length,fp)) != NULL) {
598 if (line[0] == '>') {
599 if (++nentries % MONITOR_INTERVAL == 0) {
600 fprintf(stderr,"Read %d entries in FASTA file...\n",nentries);
601 }
602
603 /* Store as Univinterval_T now, but may need to change to Interval_T later */
604 if (end == 0) {
605 /* No coordinates given, so assume that the annotation represents a sequence with coords 1..length(annotation) */
606 if (sequence_length == 0) {
607 start = end = 0;
608 } else if ((end = sequence_length - 1) > *max_coordinate) {
609 /* fprintf(stderr,"Assigning %llu to end\n",end); */
610 *max_coordinate = end;
611 }
612 }
613 Table_put(intervaltable,(void *) divstring,
614 List_push(Table_get(intervaltable,(void *) divstring),
615 (void *) Univinterval_new(start,end,type)));
616
617 lines = List_reverse(lines);
618 if (restofheader == NULL && content_size > 0) {
619 tempstring = (char *) CALLOC(2,sizeof(char));
620 tempstring[0] = '\n';
621 tempstring[1] = '\0';
622 lines = List_push(lines,tempstring);
623 content_size += 1;
624 }
625 *annot_totallength += content_size;
626
627 Table_put(annottable,(void *) divstring,
628 List_push(Table_get(annottable,(void *) divstring),
629 (void *) concatenate_lines(lines,content_size)));
630 List_free(&lines);
631
632 if (seenp == true) {
633 FREE(divstring);
634 }
635 if (univ_format_p == true) {
636 scan_header_univ(&labellength,&(*typelist),typetable,
637 &(*valuep),&value,&label,&start,&end,&type,line,line_length);
638 seenp = false;
639 divstring = (char *) CALLOC(1,sizeof(char));
640 divstring[0] = '\0';
641 restofheader = (char *) NULL;
642 } else {
643 divstring = scan_header_div(&labellength,&seenp,&(*divlist),&(*typelist),div_seenp,typetable,
644 &(*valuep),&value,&label,&start,&end,&type,&restofheader,line,line_length);
645 }
646 if (start > *max_coordinate) {
647 *max_coordinate = start;
648 }
649 if (end > *max_coordinate) {
650 *max_coordinate = end;
651 }
652
653 Table_put(valuetable,(void *) divstring,
654 Doublelist_push(Table_get(valuetable,(void *) divstring),value));
655
656 *label_totallength += labellength;
657 Table_put(labeltable,(void *) divstring,
658 List_push(Table_get(labeltable,(void *) divstring),label));
659
660 lines = NULL;
661 content_size = sequence_length = 0;
662 if (restofheader != NULL) {
663 lines = List_push(lines,(void *) restofheader);
664 content_size += strlen(restofheader);
665 }
666
667 FREE(line);
668
669 } else {
670 lines = List_push(lines,(void *) line);
671 content_size += line_length;
672 sequence_length += line_length;
673 }
674
675 }
676 fprintf(stderr,"Finished reading FASTA file -- total entries: %d\n",nentries);
677
678 /* Store as Univinterval_T now, but may need to change later */
679 if (end == 0) {
680 /* No coordinates given, so assume that the annotation represents a sequence with coords 1..length(annotation) */
681 if (sequence_length == 0) {
682 start = end = 0;
683 } else if ((end = sequence_length - 1) > *max_coordinate) {
684 fprintf(stderr,"Assigning %llu to end\n",end);
685 *max_coordinate = end;
686 }
687 }
688 Table_put(intervaltable,(void *) divstring,
689 List_push(Table_get(intervaltable,(void *) divstring),
690 (void *) Univinterval_new(start,end,type)));
691
692 lines = List_reverse(lines);
693 if (restofheader == NULL && content_size > 0) {
694 tempstring = (char *) CALLOC(2,sizeof(char));
695 tempstring[0] = '\n';
696 tempstring[1] = '\0';
697 lines = List_push(lines,tempstring);
698 content_size += 1;
699 }
700 *annot_totallength += content_size;
701 Table_put(annottable,(void *) divstring,
702 List_push(Table_get(annottable,(void *) divstring),
703 (void *) concatenate_lines(lines,content_size)));
704 List_free(&lines);
705
706 if (seenp == true) {
707 FREE(divstring);
708 }
709
710 fprintf(stderr,"Maximum coordinate: %llu\n",(unsigned long long) *max_coordinate);
711 fprintf(stderr,"Total label length: %llu + %d separators\n",(unsigned long long) *label_totallength,nentries);
712 fprintf(stderr,"Total annotation length: %llu + %d separators\n",(unsigned long long) *annot_totallength,nentries);
713 *label_totallength += nentries;
714 *annot_totallength += nentries;
715
716 /* Reverse all lists */
717 fprintf(stderr,"Saw %d distinct divisions/chromosomes\n",List_length(*divlist)-1);
718 *divlist = List_reverse(*divlist);
719
720 fprintf(stderr,"Saw %d distinct tags/types\n",List_length(*typelist));
721 *typelist = List_reverse(*typelist);
722
723 for (d = *divlist; d != NULL; d = List_next(d)) {
724 divstring = (char *) List_head(d);
725 Table_put(intervaltable,(void *) divstring,
726 List_reverse((List_T) Table_get(intervaltable,(void *) divstring)));
727 Table_put(valuetable,(void *) divstring,
728 Doublelist_reverse((Doublelist_T) Table_get(valuetable,(void *) divstring)));
729 Table_put(labeltable,(void *) divstring,
730 List_reverse((List_T) Table_get(labeltable,(void *) divstring)));
731 Table_put(annottable,(void *) divstring,
732 List_reverse((List_T) Table_get(annottable,(void *) divstring)));
733 }
734
735 return;
736 }
737
738
739 static int
assign_columns(char ** columns,char * line,int maxfields)740 assign_columns (char **columns, char *line, int maxfields) {
741 char *token;
742 int nfields = 0;
743
744 columns[nfields++] = token = strtok(line,"\t");
745 while ((token = strtok(NULL,"\t")) != NULL && nfields < maxfields) {
746 columns[nfields++] = token;
747 }
748 return nfields;
749 }
750
751
752 #define CHRCOLUMN 0
753 #define STARTCOLUMN 3
754 #define ENDCOLUMN 4
755 #define STRANDCOLUMN 6
756 #define FEATURECOLUMN 8
757 #define GFF3_COLUMNS 9
758
759 /* Modifies feature */
760 static char *
gff3_feature_id(char * feature,char * labelstr,int labelstrlen,int lineno)761 gff3_feature_id (char *feature, char *labelstr, int labelstrlen, int lineno) {
762 char *token, *value, *p;
763
764 token = strtok(feature,";");
765 if (!strncmp(token,labelstr,labelstrlen)) {
766 value = &(token[labelstrlen]);
767 if (value[0] != '"') {
768 return value;
769 } else {
770 value = &(value[1]);
771 /* Quotation marks */
772 if ((p = rindex(value,'"')) == NULL) {
773 fprintf(stderr,"Error in line %d: Saw no matching quotation in %s\n",lineno,token);
774 exit(9);
775 } else {
776 *p = '\0';
777 }
778 return value;
779 }
780 } else {
781 while ((token = strtok(NULL,";")) != NULL) {
782 if (!strncmp(token,labelstr,labelstrlen)) {
783 value = &(token[labelstrlen]);
784 if (value[0] != '"') {
785 return value;
786 } else {
787 value = &(value[1]);
788 /* Quotation marks */
789 if ((p = rindex(value,'"')) == NULL) {
790 fprintf(stderr,"Error in line %d: Saw no matching quotation in %s\n",lineno,token);
791 exit(9);
792 } else {
793 *p = '\0';
794 }
795 return value;
796 }
797 }
798 }
799 return NULL;
800 }
801 }
802
803 static bool
empty_line_p(char * line)804 empty_line_p (char *line) {
805 char *p = line;
806
807 while (*p != '\0' && isspace(*p)) {
808 p++;
809 }
810 if (*p == '\0') {
811 return true;
812 } else {
813 return false;
814 }
815 }
816
817 static void
parse_gff3(List_T * divlist,Table_T intervaltable,Table_T labeltable,Table_T annottable,FILE * fp,Tableint_T div_seenp)818 parse_gff3 (List_T *divlist, Table_T intervaltable, Table_T labeltable, Table_T annottable,
819 FILE *fp, Tableint_T div_seenp) {
820 char *line, Space[1000], *columns[GFF3_COLUMNS];
821 char *divstring, *label, *chr, *idptr;
822 List_T d;
823 Univcoord_T start, end;
824 int nfields, lineno = 0, row = 0, labelstrlen;
825 char strandchar;
826 char *labelstr;
827
828 labelstr = (char *) CALLOC(strlen(labelid) + strlen("=") + 1,sizeof(char));
829 sprintf(labelstr,"%s=",labelid);
830 labelstrlen = strlen(labelstr);
831
832 while ((line = Getline(fp)) != NULL) {
833 lineno++;
834 if (line[0] == '#') {
835 /* Skip comment */
836 FREE(line);
837
838 } else if (empty_line_p(line) == true) {
839 /* Skip empty line */
840 FREE(line);
841
842 } else {
843 #if 0
844 if ((p = rindex(line,'\n')) == NULL) {
845 fprintf(stderr,"Line exceeds maximum length of %d\n",LINELENGTH);
846 exit(9);
847 } else {
848 *p = '\0';
849 }
850 #endif
851
852 nfields = assign_columns(columns,line,GFF3_COLUMNS); /* destroys line */
853
854 if (nfields < GFF3_COLUMNS-1) {
855 /* Subract 1 to allow for an empty feature column */
856 fprintf(stderr,"Skipping line %d with only %d fields: %s\n",lineno,nfields,line);
857 FREE(line);
858
859 } else {
860 chr = columns[CHRCOLUMN];
861 divstring = (char *) CALLOC(strlen(chr)+1,sizeof(char));
862 sprintf(divstring,"%s",chr);
863
864 if ((strandchar = columns[STRANDCOLUMN][0]) == '+') {
865 start = atof(columns[STARTCOLUMN]);
866 end = atof(columns[ENDCOLUMN]);
867 } else if (strandchar == '-') {
868 start = atof(columns[ENDCOLUMN]);
869 end = atof(columns[STARTCOLUMN]);
870 } else if (strandchar == '.' || strandchar == '?') {
871 start = atof(columns[STARTCOLUMN]);
872 end = atof(columns[ENDCOLUMN]);
873 } else {
874 start = atof(columns[STARTCOLUMN]);
875 end = atof(columns[ENDCOLUMN]);
876 }
877
878 if (Tableint_get(div_seenp,(void *) divstring) == 0) {
879 Tableint_put(div_seenp,(void *) divstring,(int) true);
880 *divlist = List_push(*divlist,divstring);
881 }
882
883 /* Store Univinterval_T now, but may need to change later */
884 Table_put(intervaltable,(void *) divstring,
885 List_push(Table_get(intervaltable,(void *) divstring),
886 (void *) Univinterval_new(start,end,/*type*/0)));
887
888 if (nfields <= FEATURECOLUMN) {
889 sprintf(Space,"gff.%d",row);
890 label = (char *) MALLOC((strlen(Space)+1)*sizeof(char));
891 strcpy(label,Space);
892 } else if ((idptr = gff3_feature_id(columns[FEATURECOLUMN],labelstr,labelstrlen,lineno)) == NULL) {
893 sprintf(Space,"gff.%d",row);
894 label = (char *) MALLOC((strlen(Space)+1)*sizeof(char));
895 strcpy(label,Space);
896 } else {
897 label = (char *) MALLOC((strlen(idptr)+1)*sizeof(char));
898 strcpy(label,idptr);
899 }
900 Table_put(labeltable,(void *) divstring,
901 List_push(Table_get(labeltable,(void *) divstring),label));
902 Table_put(annottable,(void *) divstring,
903 List_push(Table_get(annottable,(void *) divstring),line));
904
905 row++;
906 }
907 }
908 }
909
910 *divlist = List_reverse(*divlist);
911
912 for (d = *divlist; d != NULL; d = List_next(d)) {
913 divstring = (char *) List_head(d);
914 Table_put(intervaltable,(void *) divstring,
915 List_reverse((List_T) Table_get(intervaltable,(void *) divstring)));
916 Table_put(labeltable,(void *) divstring,
917 List_reverse((List_T) Table_get(labeltable,(void *) divstring)));
918 Table_put(annottable,(void *) divstring,
919 List_reverse((List_T) Table_get(annottable,(void *) divstring)));
920 }
921
922 FREE(labelstr);
923
924 return;
925 }
926
927
928 #ifdef __STRICT_ANSI__
929 int getopt (int argc, char *const argv[], const char *optstring);
930 #endif
931
932 int
main(int argc,char * argv[])933 main (int argc, char *argv[]) {
934 char *inputfile = NULL, *iitfile, *tempstring, *divstring, *typestring, *p;
935 char firstchar;
936 List_T d, l, templist = NULL, divlist = NULL, typelist = NULL, fieldlist = NULL;
937 Doublelist_T valuelist;
938 List_T newlist;
939 FILE *fp;
940 Univinterval_T univinterval;
941 Interval_T interval;
942 Tableint_T div_seenp, typetable;
943 Table_T intervaltable, labeltable, valuetable, annottable;
944 Chrom_T *chroms = NULL;
945 int n_proper_divs = 0, i;
946 bool coord_values_8p, label_pointers_8p, annot_pointers_8p, valuep = false;
947 Univcoord_T order;
948 Univcoord_T max_coordinate, label_totallength, annot_totallength;
949
950 int opt;
951 extern int optind;
952 extern char *optarg;
953 int long_option_index = 0;
954 const char *long_name;
955
956 while ((opt = getopt_long(argc,argv,"o:1FGl:v:s:",
957 long_options,&long_option_index)) != -1) {
958 switch (opt) {
959
960 case 0:
961 long_name = long_options[long_option_index].name;
962 if (!strcmp(long_name,"accession-only")) {
963 acc_only_p = true;
964 } else {
965 /* Shouldn't reach here */
966 fprintf(stderr,"Don't recognize option %s. For usage, run 'iit_store --help'",long_name);
967 return 9;
968 }
969 break;
970
971 case 'o': outputfile = optarg; break;
972 case '1': univ_format_p = true; break;
973 case 'F': fieldsp = true; break;
974 case 'G': gff3_format_p = true; break;
975 case 'l': labelid = optarg; break;
976 case 'v': iit_version = atoi(optarg); break;
977 case 's':
978 if (!strcmp(optarg,"none")) {
979 divsort = NO_SORT;
980 } else if (!strcmp(optarg,"alpha")) {
981 divsort = ALPHA_SORT;
982 } else if (!strcmp(optarg,"numeric-alpha")) {
983 divsort = NUMERIC_ALPHA_SORT;
984 } else if (!strcmp(optarg,"chrom")) {
985 divsort = CHROM_SORT;
986 } else {
987 fprintf(stderr,"Don't recognize sort type %s. Allowed values are none, alpha, or chrom.",optarg);
988 exit(9);
989 }
990 break;
991 case 'V': print_program_version(); exit(0);
992 case '?': print_program_usage(); exit(0);
993 default: exit(9);
994 }
995 }
996 argc -= optind;
997 argv += optind;
998
999 if (outputfile == NULL) {
1000 fprintf(stderr,"Need to specify an output file with the -o flag\n");
1001 exit(9);
1002 } else if (iit_version > IIT_LATEST_VERSION_NOVALUES && iit_version > IIT_LATEST_VERSION_VALUES) {
1003 fprintf(stderr,"version %d requested, but this program can write only up to version %d or %d\n",
1004 iit_version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
1005 exit(9);
1006 }
1007
1008 if (argc < 1) {
1009 fp = stdin;
1010 } else {
1011 inputfile = argv[0];
1012 fp = FOPEN_READ_TEXT(inputfile);
1013 if (!fp) {
1014 fprintf(stderr,"Can't open file %s\n",inputfile);
1015 exit(9);
1016 }
1017 }
1018
1019 div_seenp = Tableint_new(100,Table_string_compare,Table_string_hash);
1020 typetable = Tableint_new(100,Table_string_compare,Table_string_hash);
1021 intervaltable = Table_new(100,Table_string_compare,Table_string_hash);
1022 valuetable = Table_new(100,Table_string_compare,Table_string_hash);
1023 labeltable = Table_new(100,Table_string_compare,Table_string_hash);
1024 annottable = Table_new(100,Table_string_compare,Table_string_hash);
1025
1026 /* The zeroth div is empty */
1027 divstring = (char *) CALLOC(1,sizeof(char));
1028 divstring[0] = '\0';
1029 divlist = List_push(NULL,divstring);
1030
1031 /* The zeroth type is empty */
1032 typestring = (char *) CALLOC(1,sizeof(char));
1033 typestring[0] = '\0';
1034 typelist = List_push(NULL,typestring);
1035
1036 if (univ_format_p == true) {
1037 typestring = (char *) MALLOC((strlen("circular")+1)*sizeof(char));
1038 strcpy(typestring,"circular");
1039 Tableint_put(typetable,typestring,/*type*/1);
1040 }
1041
1042 if (gff3_format_p == true) {
1043 parse_gff3(&divlist,intervaltable,labeltable,annottable,fp,div_seenp);
1044 } else {
1045 fieldlist = parse_fieldlist(&firstchar,fp);
1046 parse_fasta(&valuep,&max_coordinate,&label_totallength,&annot_totallength,
1047 &divlist,&typelist,intervaltable,valuetable,labeltable,annottable,
1048 fp,div_seenp,typetable,firstchar);
1049 }
1050
1051 if (inputfile != NULL) {
1052 fclose(fp);
1053 }
1054
1055 if (univ_format_p == true) {
1056 iit_version = 1;
1057 typestring = (char *) MALLOC((strlen("circular")+1)*sizeof(char));
1058 strcpy(typestring,"circular");
1059 typelist = List_push(typelist,typestring);
1060 typelist = List_reverse(typelist);
1061
1062 } else if (iit_version == 0 && List_length(divlist) == 1) {
1063 /* No divs other than NULL */
1064 fprintf(stderr,"No divs/chromosomes provided, so storing as IIT version 1\n");
1065 iit_version = 1;
1066 }
1067
1068 coord_values_8p = false;
1069 label_pointers_8p = false;
1070 annot_pointers_8p = false;
1071 #ifdef HAVE_64_BIT
1072 if (gff3_format_p == true) {
1073 coord_values_8p = false;
1074 } else if (max_coordinate > 4294967295U) {
1075 coord_values_8p = true;
1076 }
1077 if (iit_version == 0) {
1078 if (gff3_format_p == true) {
1079 label_pointers_8p = false;
1080 } else if (label_totallength > 4294967295U) {
1081 label_pointers_8p = true;
1082 }
1083 if (gff3_format_p == true) {
1084 annot_pointers_8p = false;
1085 } else if (annot_totallength > 4294967295U) {
1086 annot_pointers_8p = true;
1087 }
1088 if (valuep == true) {
1089 iit_version = IIT_LATEST_VERSION_VALUES;
1090 } else {
1091 iit_version = IIT_LATEST_VERSION_NOVALUES;
1092 }
1093
1094 } else if (iit_version == 4) {
1095 if (label_totallength > 4294967295U || annot_totallength > 4294967295U) {
1096 /* Both pointer types have to match */
1097 label_pointers_8p = true;
1098 annot_pointers_8p = true;
1099 }
1100
1101 } else if (iit_version <= 3) {
1102 if (label_totallength > 4294967295U || annot_totallength > 4294967295U) {
1103 fprintf(stderr,"Need 8-byte pointers, which requires you to specify a version of 4 or greater\n");
1104 exit(9);
1105 }
1106 }
1107 #else
1108 if (iit_version == 0) {
1109 if (valuep == true) {
1110 iit_version = IIT_LATEST_VERSION_VALUES;
1111 } else {
1112 iit_version = IIT_LATEST_VERSION_NOVALUES;
1113 }
1114 }
1115 #endif
1116
1117 if (iit_version == 1) {
1118 /* Will use Univinterval_T objects, which may print as UINT8 or UINT4 */
1119 } else if (coord_values_8p == true) {
1120 fprintf(stderr,"Cannot have large coordinates, except for chromosome IIT files\n");
1121 exit(9);
1122 } else {
1123 /* Convert all Univinterval_T objects to Interval_T objects */
1124
1125 for (d = divlist; d != NULL; d = List_next(d)) {
1126 divstring = (char *) List_head(d);
1127 templist = (List_T) Table_get(intervaltable,(void *) divstring);
1128 newlist = (List_T) NULL;
1129 for (l = templist; l != NULL; l = List_next(l)) {
1130 univinterval = (Univinterval_T) List_head(l);
1131 if (Univinterval_sign(univinterval) < 0) {
1132 newlist = List_push(newlist,
1133 (void *) Interval_new(Univinterval_high(univinterval),
1134 Univinterval_low(univinterval),
1135 Univinterval_type(univinterval)));
1136 } else {
1137 newlist = List_push(newlist,
1138 (void *) Interval_new(Univinterval_low(univinterval),
1139 Univinterval_high(univinterval),
1140 Univinterval_type(univinterval)));
1141 }
1142 Univinterval_free(&univinterval);
1143 }
1144 Table_put(intervaltable,(void *) divstring,(void *) List_reverse(newlist));
1145 List_free(&templist);
1146 }
1147 }
1148
1149
1150 /* Figure out name of iit file */
1151 if (strlen(outputfile) < 4) {
1152 iitfile = (char *) CALLOC(strlen(outputfile)+strlen(".iit")+1,sizeof(char));
1153 sprintf(iitfile,"%s.iit",outputfile);
1154 } else {
1155 p = &(outputfile[strlen(outputfile)]);
1156 p -= 4;
1157 if (!strcmp(p,".iit")) {
1158 iitfile = (char *) CALLOC(strlen(outputfile)+1,sizeof(char));
1159 strcpy(iitfile,outputfile);
1160 } else {
1161 iitfile = (char *) CALLOC(strlen(outputfile)+strlen(".iit")+1,sizeof(char));
1162 sprintf(iitfile,"%s.iit",outputfile);
1163 }
1164 }
1165
1166 order = 0;
1167 if ((n_proper_divs = List_length(divlist) - 1) > 0) {
1168 chroms = (Chrom_T *) CALLOC(n_proper_divs,sizeof(Chrom_T));
1169 for (l = divlist, i = 0; l != NULL; l = List_next(l)) {
1170 tempstring = (char *) List_head(l);
1171 if (tempstring[0] == '\0') {
1172 /* FREE(tempstring); -- Causes invalid read later in table_string_compare */
1173 } else {
1174 chroms[i++] = Chrom_from_string(tempstring,mitochondrial_string,order++,
1175 /*circularp*/false,/*alt_scaffold_start*/0,/*alt_scaffold_end*/0);
1176 }
1177 }
1178 }
1179
1180 #if 0
1181 /* Need to have these existing for the IIT_write command below */
1182 for (l = divlist; l != NULL; l = List_next(l)) {
1183 divstring = (char *) List_head(l);
1184 FREE(divstring);
1185 }
1186 #endif
1187 List_free(&divlist);
1188
1189 switch (divsort) {
1190 case NO_SORT: qsort(chroms,n_proper_divs,sizeof(Chrom_T),Chrom_compare_order); break;
1191 case ALPHA_SORT: qsort(chroms,n_proper_divs,sizeof(Chrom_T),Chrom_compare_alpha); break;
1192 case NUMERIC_ALPHA_SORT: qsort(chroms,n_proper_divs,sizeof(Chrom_T),Chrom_compare_numeric_alpha); break;
1193 case CHROM_SORT: qsort(chroms,n_proper_divs,sizeof(Chrom_T),Chrom_compare_chrom); break;
1194 default: fprintf(stderr,"Don't recognize divsort type %d\n",divsort); abort();
1195 }
1196
1197 /* The zeroth div is empty */
1198 divstring = (char *) CALLOC(1,sizeof(char));
1199 divstring[0] = '\0';
1200 divlist = List_push(NULL,divstring);
1201
1202 for (i = 0; i < n_proper_divs; i++) {
1203 divlist = List_push(divlist,Chrom_string(chroms[i]));
1204 }
1205 divlist = List_reverse(divlist);
1206
1207 #if 0
1208 /* Causes invalid reads later on */
1209 for (i = 0; i < n_proper_divs; i++) {
1210 Chrom_free(&(chroms[i]));
1211 }
1212 #endif
1213
1214 FREE(chroms);
1215
1216
1217 if (iit_version == 1) {
1218 IIT_write_univ(iitfile,divlist,typelist,intervaltable,labeltable,annottable,
1219 coord_values_8p,label_pointers_8p,annot_pointers_8p);
1220 } else if (valuep == false) {
1221 IIT_write(iitfile,divlist,typelist,fieldlist,intervaltable,/*valuetable*/NULL,labeltable,annottable,
1222 divsort,iit_version,label_pointers_8p,annot_pointers_8p);
1223 } else {
1224 IIT_write(iitfile,divlist,typelist,fieldlist,intervaltable,valuetable,labeltable,annottable,
1225 divsort,iit_version,label_pointers_8p,annot_pointers_8p);
1226 }
1227 FREE(iitfile);
1228
1229 for (d = divlist; d != NULL; d = List_next(d)) {
1230 divstring = (char *) List_head(d);
1231
1232 templist = (List_T) Table_get(annottable,(void *) divstring);
1233 for (l = templist; l != NULL; l = List_next(l)) {
1234 tempstring = (char *) List_head(l);
1235 FREE(tempstring);
1236 }
1237 List_free(&templist);
1238
1239 valuelist = (Doublelist_T) Table_get(valuetable,(void *) divstring);
1240 Doublelist_free(&valuelist);
1241
1242 templist = (List_T) Table_get(labeltable,(void *) divstring);
1243 for (l = templist; l != NULL; l = List_next(l)) {
1244 tempstring = (char *) List_head(l);
1245 FREE(tempstring);
1246 }
1247 List_free(&templist);
1248
1249 templist = (List_T) Table_get(intervaltable,(void *) divstring);
1250 if (iit_version == 1) {
1251 for (l = templist; l != NULL; l = List_next(l)) {
1252 univinterval = (Univinterval_T) List_head(l);
1253 Univinterval_free(&univinterval);
1254 }
1255 } else {
1256 for (l = templist; l != NULL; l = List_next(l)) {
1257 interval = (Interval_T) List_head(l);
1258 Interval_free(&interval);
1259 }
1260 }
1261 List_free(&templist);
1262
1263 }
1264
1265
1266 Table_free(&intervaltable);
1267 Table_free(&valuetable);
1268 Table_free(&labeltable);
1269 Table_free(&annottable);
1270
1271 for (l = fieldlist; l != NULL; l = List_next(l)) {
1272 tempstring = (char *) List_head(l);
1273 FREE(tempstring);
1274 }
1275 List_free(&fieldlist);
1276
1277 for (l = typelist; l != NULL; l = List_next(l)) {
1278 tempstring = (char *) List_head(l);
1279 FREE(tempstring);
1280 }
1281 List_free(&typelist);
1282
1283 for (l = divlist; l != NULL; l = List_next(l)) {
1284 tempstring = (char *) List_head(l);
1285 FREE(tempstring);
1286 }
1287 List_free(&divlist);
1288
1289 Tableint_free(&typetable);
1290 Tableint_free(&div_seenp);
1291
1292 return 0;
1293 }
1294