1 static char rcsid[] = "$Id: iit_store.c 222736 2020-05-29 15:48:02Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include <stdio.h>
7 #include <stdlib.h>
8 #ifdef HAVE_UNISTD_H
9 #include <unistd.h>
10 #endif
11 #ifdef HAVE_SYS_TYPES_H
12 #include <sys/types.h>
13 #endif
14 #include <string.h>		/* For strlen */
15 #include <strings.h>		/* For rindex */
16 #include <ctype.h>
17 #include <math.h>		/* For qsort and NAN */
18 #include "bool.h"
19 #include "types.h"
20 #include "assert.h"
21 #include "mem.h"
22 #include "fopen.h"
23 #include "getline.h"
24 
25 #include "list.h"
26 #include "doublelist.h"
27 #include "univinterval.h"
28 #include "interval.h"
29 #include "table.h"
30 #include "tableint.h"
31 #include "chrom.h"
32 #include "iit-write-univ.h"
33 #include "iit-write.h"
34 #include "getopt.h"
35 
36 #ifndef NAN
37 #define NAN nan("")
38 #endif
39 
40 #ifndef NAN
41 static double NAN = nan("")
42 #endif
43 
44 
45 #ifdef DEBUG
46 #define debug(x) x
47 #else
48 #define debug(x)
49 #endif
50 
51 #define LINELENGTH 8192
52 #define MONITOR_INTERVAL 100000 /* 100 thousand entries */
53 
54 /************************************************************************
55  *   Program options
56  ************************************************************************/
57 
58 static char *outputfile = NULL;
59 static bool univ_format_p = false; /* IIT_write_univ used for chromosome.iit file */
60 static bool gff3_format_p = false;
61 static char *labelid = "ID";
62 static bool fieldsp = false;
63 static bool acc_only_p = false;
64 static char iit_version = 0;
65 
66 static Sorttype_T divsort = CHROM_SORT;
67 static char *mitochondrial_string = NULL;
68 
69 
70 static struct option long_options[] = {
71   /* Input options */
72   {"output", required_argument, 0, 'o'}, /* outputfile */
73   {"univformat", no_argument, 0, '1'}, /* univ_format_p */
74   {"accession-only", no_argument, 0, 0},     /* acc_only_p */
75   {"fields", no_argument, 0, 'F'}, /* fieldsp */
76   {"gff", no_argument, 0, 'G'}, /* gff3_format_p */
77   {"label", required_argument, 0, 'l'}, /* labelid */
78   {"iitversion", required_argument, 0, 'v'}, /* iit_version */
79   {"sort", required_argument, 0, 's'}, /* sorttype */
80 
81   /* Help options */
82   {"version", no_argument, 0, 'V'}, /* print_program_version */
83   {"help", no_argument, 0, '?'}, /* print_program_usage */
84   {0, 0, 0, 0}
85 };
86 
87 static void
print_program_version()88 print_program_version () {
89   fprintf(stdout,"\n");
90   fprintf(stdout,"iit_store: indexing utility for Interval Index Trees\n");
91   fprintf(stdout,"Part of GMAP package, version %s\n",PACKAGE_VERSION);
92   fprintf(stdout,"Thomas D. Wu, Genentech, Inc.\n");
93   fprintf(stdout,"Contact: twu@gene.com\n");
94   fprintf(stdout,"\n");
95   return;
96 }
97 
98 static void
print_program_usage()99 print_program_usage () {
100   fprintf(stdout,"\
101 Usage: iit_store [OPTIONS...] -o outputfile inputfile, or\n\
102        cat inputfile | iit_store [OPTIONS...] -o outputfile\n\
103 where\n\
104    outputfile is the desired filename for the iit file\n\
105        (.iit will be added as a suffix if necessary), and\n\
106    inputfile is in either FASTA or GFF3 format, as described below.\n\
107 \n\
108 Options\n\
109   -o, --output=STRING       Name of output iit file\n\
110   -1, --oldformat           Old format for intervals:\n\
111                              <start> <optional end> <optional div> <optional type>\n\
112   --accession-only          Process only the first word of each FASTA header, and ignore the rest of the line\n\
113   -F, --fields              Annotation consists of separate fields\n\
114   -G, --gff                 Parse input file in gff3 format\n\
115   -l, --label=STRING        For gff input, the feature attribute to use (default is ID)\n\
116 \n\
117   -s, --sort=STRING         Sorting of divisions: none, alpha, numeric-alpha, or chrom (default)\n\
118                 numeric-alpha: chr1 chr1_random chr2 chr10 chr10_random chrM chrUn chrX chrY\n\
119                         chrom: chr1 chr2 chr10 chrX chrY chrM chr1_random chr10_random chrUn\n\
120 \n\
121                                Note 1: For sorting purposes, any initial 'chr' will be ignored\n\
122                                Note 2: For chrom, X, Y, M, MT (or chrX, chrY, and so on) are special\n\
123 \n\
124   -v, --iitversion=STRING   Desired iit version for output iit\n\
125                             (default = 0, which means latest version)\n\
126 \n\
127   -V, --version             Show version\n\
128   -?, --help                Show this help message\n\
129 \n\
130 \n\
131 Description of input format:\n\
132 \n\
133 The FASTA format for input files should be\n\
134 \n\
135     >label [interval [type]] [/value=value]\n\
136     optional_annotation (which may be zero, one, or multiple lines)\n\
137 \n\
138 where intervals have one of the following forms:\n\
139    div:start..end\n\
140    div:start\n\
141    start..end\n\
142    start\n\
143 and a given type, numeric value, or both is optional.  A numeric value\n\
144 allows intervals to be searched by a range of values using iit_get.\n\
145 If the interval is omitted, then it is assumed to be label:1..n,\n\
146 where n is the length of the sequence.  This allows for storage and retrieval\n\
147 of sequences in FASTA files.  If you specify --accession-only, then it is\n\
148 assumed that you are not providing intervals, and all information in the FASTA\n\
149 header other than the first word (accession) will be ignored.\n\
150 \n\
151 Intervals may have directions.  To indicate a forward direction,\n\
152 the start coordinate should be less than the end coordinate.\n\
153 To indicate a reverse direction, the start coordinate should be\n\
154 greater than the end coordinate. If they are the same, then no\n\
155 direction is implied.  If no end coordinate is given, the end\n\
156 coordinate is assumed to be the same as the start coordinate.\n\
157 \n\
158 For example, the label may be a sequence accession, with the div representing\n\
159 a chromosome, and the type representing an additional piece of information\n\
160 A header might therefore look like\n\
161 \n\
162     >NM_004448 17:35138441..35109780 refseq\n\
163 \n\
164 which indicates an interval on chromosome 17 in the reverse direction,\n\
165 and of type refseq.\n\
166 \n\
167 If the -F flag is provided, IIT files may store annotation for each interval\n\
168 as separate fields.  The input must contain the names of the fields, one per\n\
169 line, before the first interval header.  Each interval then contains annotation\n\
170 corresponding to each field, one value per line.\n\
171 \n\
172 The GFF3 format requires the -G flag and optionally the -l flag.\n\
173 The iit_store program will parse the chromosome from column 1, the start\n\
174 coordinate from column 4, the end coordinate from column 5, the strand\n\
175 from column 7, an if possible, the label from column 9.  The -l flag\n\
176 will indicate which feature from column 9 to retrieve, such as ID, Name,\n\
177 or Parent.  Appropriate choice of label may be helpful later on, because\n\
178 the iit_get program can retrieve information by label, as well as by\n\
179 coordinates.\n\
180 \n\
181 Limitations: Start and end coordinates must be non-negative integers, and are\n\
182 limited to the domain of a 64-bit quantity, which means coordinates must be\n\
183 less than 2^64.  If your machine is a 32-bit machine, coordinates must be less\n\
184 than 2^32 = 4294967295.\n\
185 \n\
186 See also: iit_get, iit_dump\n\
187 ");
188   return;
189 }
190 
191 /* Empties contents of lines */
192 static char *
concatenate_lines(List_T lines,int content_size)193 concatenate_lines (List_T lines, int content_size) {
194   char *string, *temp;
195   List_T l;
196 
197   string = (char *) CALLOC(content_size+1,sizeof(char));
198   for (l = lines; l; l = List_next(l)) {
199     temp = (char *) List_head(l);
200     strcat(string,temp);
201     FREE(temp);
202   }
203 
204   /* Keep last return
205   if (string[content_size-1] == '\n') {
206     string[content_size-1] = '\0';
207   }
208   */
209 
210   return string;
211 }
212 
213 
214 
215 /* Note that isnumber is a function in ctype.h on some systems */
216 static bool
isnumberp(Univcoord_T * result,char * string)217 isnumberp (Univcoord_T *result, char *string) {
218   char *p = string;
219 
220   *result = 0U;
221   while (*p != '\0') {
222     if (*p == ',') {
223       /* Skip commas */
224     } else if (!isdigit((int) *p)) {
225       return false;
226     } else {
227       *result = (*result) * 10 + (*p - '0');
228     }
229     p++;
230   }
231   return true;
232 }
233 
234 static bool
isrange(Univcoord_T * start,Univcoord_T * end,char * string)235 isrange (Univcoord_T *start, Univcoord_T *end, char *string) {
236   bool result;
237   Univcoord_T length;
238   char *copy, *startstring, *endstring;
239 
240   copy = (char *) CALLOC(strlen(string)+1,sizeof(char));
241   strcpy(copy,string);
242 
243   if (index(copy,'.')) {
244     startstring = strtok(copy,"..");
245     endstring = strtok(NULL,"..");
246     result = (isnumberp(&(*start),startstring) && isnumberp(&(*end),endstring));
247     FREE(copy);
248     return result;
249 
250   } else if (index(copy,'+')) {
251     startstring = strtok(copy,"+");
252     endstring = strtok(NULL,"+");
253     if (!isnumberp(&(*start),startstring)) {
254       result = false;
255     } else if (endstring[0] == '-' && isnumberp(&length,&(endstring[1]))) {
256       *end = (*start) - length;
257       result = true;
258     } else if (!isnumberp(&length,endstring)) {
259       result = false;
260     } else {
261       *end = (*start) + length;
262       result = true;
263     }
264     FREE(copy);
265     return result;
266 
267   } else if (index(copy,'-')) {
268     /* Old notation */
269     startstring = strtok(copy,"--");
270     endstring = strtok(NULL,"--");
271     result = (isnumberp(&(*start),startstring) && isnumberp(&(*end),endstring));
272     FREE(copy);
273     return result;
274 
275   } else {
276     FREE(copy);
277     return false;
278   }
279 }
280 
281 
282 /* Example: >A X:1..10 red.  Here, A is a label, 1 and 10 are start and end, X is a div, and red is a type. */
283 /* Other variants: >A 1..10 red, or >A 1..10 */
284 static char *
scan_header_div(int * labellength,bool * seenp,List_T * divlist,List_T * typelist,Tableint_T div_seenp,Tableint_T typetable,bool * valuep,double * value,char ** label,Univcoord_T * start,Univcoord_T * end,int * type,char ** restofheader,char * header,int line_length)285 scan_header_div (int *labellength, bool *seenp, List_T *divlist, List_T *typelist, Tableint_T div_seenp, Tableint_T typetable,
286 		 bool *valuep, double *value, char **label, Univcoord_T *start, Univcoord_T *end, int *type,
287 		 char **restofheader, char *header, int line_length) {
288   char *divstring = NULL, *coords, *copy, *acc, *query, *tag, *typestring, *p;
289   char *valueptr;
290 
291   *seenp = false;
292   acc = (char *) MALLOC((line_length+1)*sizeof(char));
293   query = (char *) MALLOC((line_length+1)*sizeof(char));
294   tag = (char *) MALLOC((line_length+1)*sizeof(char));
295 
296   if (sscanf(header,">%s",acc) < 1) {
297     fprintf(stderr,"Error parsing %s.  Expecting a FASTA type header with a label, optional coords (as <div>:<number>..<number>), and optional tag.\n",header);
298     exit(9);
299   } else {
300     *labellength = strlen(acc);
301     *label = (char *) MALLOC(((*labellength)+1)*sizeof(char));
302     strcpy(*label,acc);
303   }
304 
305   if (acc_only_p == true || sscanf(header,">%s %s",acc,query) < 2) {
306     /* Treat query as acc:1..n */
307     divstring = (char *) MALLOC(((*labellength)+1)*sizeof(char));
308     strcpy(divstring,acc);
309 
310     if (Tableint_get(div_seenp,(void *) divstring) == 0) {
311       debug(printf("Entering new div %s.\n",divstring));
312       Tableint_put(div_seenp,(void *) divstring,(int) true);
313       copy = (char *) MALLOC((strlen(divstring)+1)*sizeof(char));
314       strcpy(copy,divstring);
315       *divlist = List_push(*divlist,copy);
316       *seenp = false;
317     } else {
318       fprintf(stderr,"Error parsing %s.  No interval given, and saw duplicate labels\n",header);
319       exit(9);
320     }
321     coords = (char *) NULL;
322 
323   } else if (!index(query,':')) {
324     debug(printf("Query %s has no div.\n",query));
325     divstring = (char *) CALLOC(1,sizeof(char));
326     divstring[0] = '\0';
327     coords = query;
328 
329   } else {
330     debug(printf("Parsed query %s into ",query));
331     p = strtok(query,":");
332 
333     divstring = (char *) MALLOC((strlen(p)+1)*sizeof(char));
334     strcpy(divstring,p);
335 
336     if (Tableint_get(div_seenp,(void *) divstring) == 0) {
337       debug(printf("Entering new div %s.\n",divstring));
338       Tableint_put(div_seenp,(void *) divstring,(int) true);
339       copy = (char *) MALLOC((strlen(divstring)+1)*sizeof(char));
340       strcpy(copy,divstring);
341       *divlist = List_push(*divlist,copy);
342       *seenp = false;
343     } else {
344       *seenp = true;
345     }
346 
347     coords = strtok(NULL,":");
348     debug(printf("div %s and coords %s\n",divstring,coords));
349   }
350 
351   if (coords == NULL) {
352     /* fprintf(stderr,"Error parsing %s.  Expecting coords (as <div>:<number>..<number>)\n",query); */
353     /* fprintf(stderr,"Problematic line was: %s\n",header); */
354     /* exit(9); */
355     *start = 1;
356     *end = 0;			/* Need to assign later */
357 
358   } else if (isnumberp(&(*start),coords)) {
359     debug(printf("  and coords %s as a number\n",coords));
360     *end = *start;
361   } else if (isrange(&(*start),&(*end),coords)) {
362     debug(printf("  and coords %s as a range starting at %llu and ending at %llu\n",
363 		 coords,(unsigned long long) *start,(unsigned long long) *end));
364   } else {
365     fprintf(stderr,"Error parsing %s:%s.  Expecting coords (as <div>:<number>..<number>).  Or specify --accession-only to ignore the second field\n",
366 	    query,coords);
367     fprintf(stderr,"Problematic line was: %s\n",header);
368     exit(9);
369   }
370 
371   if ((valueptr = strstr(header,"/value=")) == NULL) {
372     *value = NAN;
373   } else {
374     /* Note: Not checking for any errors */
375     *valuep = true;
376     valueptr += strlen("/value=");
377     *value = atof(valueptr);
378   }
379 
380   if (acc_only_p == true || sscanf(header,">%s %s %s",acc,query,tag) < 3) {
381     *type = 0;
382     *restofheader = (char *) NULL;
383 
384   } else if (!strncmp(tag,"/value=",strlen("/value="))) {
385     *type = 0;
386 
387     /* Get rest of header */
388     p = header;
389     while (!isspace(*p)) p++;	/* accession */
390     while (isspace(*p)) p++;
391 
392     while (!isspace(*p)) p++;	/* coords */
393     while (isspace(*p)) p++;
394 
395     if (*p == '\0') {
396       *restofheader = (char *) NULL;
397     } else {
398       *restofheader = (char *) MALLOC((strlen(p)+1)*sizeof(char));
399       strcpy(*restofheader,p);
400     }
401 
402   } else {
403     if ((*type = Tableint_get(typetable,(void *) tag)) == 0) {
404       /* Store types as 1-based */
405       *type = Tableint_length(typetable) + 1;
406       typestring = (char *) MALLOC((strlen(tag)+1)*sizeof(char));
407       strcpy(typestring,tag);
408       Tableint_put(typetable,typestring,*type);
409       *typelist = List_push(*typelist,typestring);
410       /* debug(printf("Entering new type %s.\n",typestring)); */
411     }
412 
413     /* Get rest of header */
414     p = header;
415     while (!isspace(*p)) p++;	/* accession */
416     while (isspace(*p)) p++;
417 
418     while (!isspace(*p)) p++;	/* coords */
419     while (isspace(*p)) p++;
420 
421     while (*p != '\0' && !isspace(*p)) p++;	/* tag */
422     while (*p != '\0' && isspace(*p)) p++;
423 
424     if (*p == '\0') {
425       *restofheader = (char *) NULL;
426     } else {
427       *restofheader = (char *) MALLOC((strlen(p)+1)*sizeof(char));
428       strcpy(*restofheader,p);
429     }
430   }
431 
432   FREE(tag);
433   FREE(query);
434   FREE(acc);
435 
436   return divstring;
437 }
438 
439 
440 
441 /* Example: >A 1 10 red.  Here, A is a label, 1 and 10 are start and end, and red is a type. */
442 static void
scan_header_univ(int * labellength,List_T * typelist,Tableint_T typetable,bool * valuep,double * value,char ** label,Univcoord_T * start,Univcoord_T * end,int * type,char * header,int line_length)443 scan_header_univ (int *labellength, List_T *typelist, Tableint_T typetable,
444 		  bool *valuep, double *value, char **label, Univcoord_T *start, Univcoord_T *end, int *type, char *header,
445 		  int line_length) {
446   char *acc, *typestring, *p, *ptr;
447   char *valueptr;
448   int nscanned;
449 
450   acc = (char *) MALLOC((line_length+1)*sizeof(char));
451 
452   nscanned = sscanf(header,">%s %llu %llu",acc,&(*start),&(*end));
453   if (nscanned < 3) {
454     fprintf(stderr,"Error parsing %s.  Expecting a FASTA type header with a label, two coordinates, and optional tag.\n",header);
455     exit(9);
456   } else {
457     if ((valueptr = strstr(header,"/value=")) == NULL) {
458       *value = NAN;
459     } else {
460       /* Note: Not checking for any errors */
461       *valuep = true;
462       valueptr += strlen("/value=");
463       *value = atof(valueptr);
464     }
465 
466     *labellength = strlen(acc);
467     *label = (char *) MALLOC((*labellength+1)*sizeof(char));
468     strcpy(*label,acc);
469 
470     p = header;
471     while (!isspace((int) *p)) { p++; } /* First word (label) */
472     while (isspace((int) *p)) { p++; } /* First space */
473     while (!isspace((int) *p)) { p++; } /* Second word (start coord) */
474     while (isspace((int) *p)) { p++; } /* Second space */
475     while (!isspace((int) *p)) { p++; } /* Third word (end coord) */
476     while (*p != '\0' && isspace((int) *p)) { p++; } /* Third space */
477 
478     if (*p == '\0') {
479       *type = 0;		/* Empty type string */
480     } else {
481       while (*p != '\0' && isspace((int) *p)) { p++; } /* Fourth space */
482       if (*p == '\0') {
483 	*type = 0;
484       } else if (!strncmp(p,"/value=",strlen("/value="))) {
485 	*type = 0;
486       } else {
487 	if ((ptr = rindex(p,'\n')) != NULL) {
488 	  while (isspace((int) *ptr)) { ptr--; } /* Erase empty space */
489 	  ptr++;
490 	  *ptr = '\0';
491 	}
492 
493 	if ((*type = Tableint_get(typetable,(void *) p)) == 0) {
494 	  /* Store types as 1-based */
495 	  *type = Tableint_length(typetable) + 1;
496 	  typestring = (char *) CALLOC(strlen(p)+1,sizeof(char));
497 	  strcpy(typestring,p);
498 	  Tableint_put(typetable,typestring,*type);
499 	  *typelist = List_push(*typelist,typestring);
500 	  /* debug(printf("Entering new type %s.\n",typestring)); */
501 	}
502       }
503     }
504   }
505 
506   FREE(acc);
507 
508   return;
509 }
510 
511 static List_T
parse_fieldlist(char * firstchar,FILE * fp)512 parse_fieldlist (char *firstchar, FILE *fp) {
513   List_T fieldlist = NULL;
514   char *line, *fieldname;
515   int line_length;
516 
517   while (!feof(fp) && (*firstchar = fgetc(fp)) != '>') {
518     if (*firstchar != EOF) {
519       line = Getline_wlength(&line_length,fp);
520       fieldname = (char *) MALLOC((line_length+2)*sizeof(char));
521       fieldname[0] = *firstchar;
522       strcpy(&(fieldname[1]),line);
523 
524       fieldlist = List_push(fieldlist,fieldname);
525     }
526   }
527 
528   return List_reverse(fieldlist);
529 }
530 
531 
532 static void
parse_fasta(bool * valuep,Univcoord_T * max_coordinate,Univcoord_T * label_totallength,Univcoord_T * annot_totallength,List_T * divlist,List_T * typelist,Table_T intervaltable,Table_T valuetable,Table_T labeltable,Table_T annottable,FILE * fp,Tableint_T div_seenp,Tableint_T typetable,char firstchar)533 parse_fasta (bool *valuep, Univcoord_T *max_coordinate, Univcoord_T *label_totallength, Univcoord_T *annot_totallength,
534 	     List_T *divlist, List_T *typelist, Table_T intervaltable, Table_T valuetable, Table_T labeltable, Table_T annottable,
535 	     FILE *fp, Tableint_T div_seenp, Tableint_T typetable, char firstchar) {
536   char *header, *line, *divstring, *label, *restofheader = NULL, *tempstring;
537   int line_length;
538   double value;
539   Univcoord_T start, end;
540   List_T lines, d;
541   /* content_size includes restofheader, whereas sequence_length does not */
542   int labellength, content_size, sequence_length, type, nentries;
543   bool seenp;
544 
545   /* *max_coordinate = 0; */
546   *label_totallength = 0;
547   *annot_totallength = 0;
548 
549   if (feof(fp)) {
550     return;
551 
552   } else if (firstchar == '\0') {
553     header = Getline_wlinefeed(&line_length,fp);
554   } else {
555     line = Getline_wlinefeed(&line_length,fp);
556 
557     header = (char *) malloc((line_length+2)*sizeof(char));
558     header[0] = firstchar;
559     strcpy(&(header[1]),line);
560     FREE(line);
561     line_length += 1;
562   }
563   if (univ_format_p == true) {
564     scan_header_univ(&labellength,&(*typelist),typetable,&(*valuep),&value,&label,&start,&end,&type,
565 		     header,line_length);
566     seenp = false;
567     divstring = (char *) CALLOC(1,sizeof(char));
568     divstring[0] = '\0';
569     restofheader = (char *) NULL;
570   } else {
571     divstring = scan_header_div(&labellength,&seenp,&(*divlist),&(*typelist),
572 				div_seenp,typetable,&(*valuep),&value,&label,&start,&end,&type,
573 				&restofheader,header,line_length);
574   }
575   FREE(header);
576 
577   *max_coordinate = start;
578   if (end > *max_coordinate) {
579     *max_coordinate = end;
580   }
581 
582   Table_put(valuetable,(void *) divstring,
583 	    Doublelist_push(Table_get(valuetable,(void *) divstring),value));
584 
585   *label_totallength = labellength;
586   Table_put(labeltable,(void *) divstring,
587 	    List_push(Table_get(labeltable,(void *) divstring),label));
588 
589   lines = NULL;
590   content_size = sequence_length = 0;
591   if (restofheader != NULL) {
592     lines = List_push(lines,(void *) restofheader);
593     content_size += strlen(restofheader);
594   }
595 
596   nentries = 1;			/* Because we already processed the first entry above */
597   while ((line = Getline_wlinefeed(&line_length,fp)) != NULL) {
598     if (line[0] == '>') {
599       if (++nentries % MONITOR_INTERVAL == 0) {
600 	fprintf(stderr,"Read %d entries in FASTA file...\n",nentries);
601       }
602 
603       /* Store as Univinterval_T now, but may need to change to Interval_T later */
604       if (end == 0) {
605 	/* No coordinates given, so assume that the annotation represents a sequence with coords 1..length(annotation) */
606 	if (sequence_length == 0) {
607 	  start = end = 0;
608 	} else if ((end = sequence_length - 1) > *max_coordinate) {
609 	  /* fprintf(stderr,"Assigning %llu to end\n",end); */
610 	  *max_coordinate = end;
611 	}
612       }
613       Table_put(intervaltable,(void *) divstring,
614 		List_push(Table_get(intervaltable,(void *) divstring),
615 			  (void *) Univinterval_new(start,end,type)));
616 
617       lines = List_reverse(lines);
618       if (restofheader == NULL && content_size > 0) {
619 	tempstring = (char *) CALLOC(2,sizeof(char));
620 	tempstring[0] = '\n';
621 	tempstring[1] = '\0';
622 	lines = List_push(lines,tempstring);
623 	content_size += 1;
624       }
625       *annot_totallength += content_size;
626 
627       Table_put(annottable,(void *) divstring,
628 		List_push(Table_get(annottable,(void *) divstring),
629 			  (void *) concatenate_lines(lines,content_size)));
630       List_free(&lines);
631 
632       if (seenp == true) {
633 	FREE(divstring);
634       }
635       if (univ_format_p == true) {
636 	scan_header_univ(&labellength,&(*typelist),typetable,
637 			 &(*valuep),&value,&label,&start,&end,&type,line,line_length);
638 	seenp = false;
639 	divstring = (char *) CALLOC(1,sizeof(char));
640 	divstring[0] = '\0';
641 	restofheader = (char *) NULL;
642       } else {
643 	divstring = scan_header_div(&labellength,&seenp,&(*divlist),&(*typelist),div_seenp,typetable,
644 				    &(*valuep),&value,&label,&start,&end,&type,&restofheader,line,line_length);
645       }
646       if (start > *max_coordinate) {
647 	*max_coordinate = start;
648       }
649       if (end > *max_coordinate) {
650 	*max_coordinate = end;
651       }
652 
653       Table_put(valuetable,(void *) divstring,
654 		Doublelist_push(Table_get(valuetable,(void *) divstring),value));
655 
656       *label_totallength += labellength;
657       Table_put(labeltable,(void *) divstring,
658 		List_push(Table_get(labeltable,(void *) divstring),label));
659 
660       lines = NULL;
661       content_size = sequence_length = 0;
662       if (restofheader != NULL) {
663 	lines = List_push(lines,(void *) restofheader);
664 	content_size += strlen(restofheader);
665       }
666 
667       FREE(line);
668 
669     } else {
670       lines = List_push(lines,(void *) line);
671       content_size += line_length;
672       sequence_length += line_length;
673     }
674 
675   }
676   fprintf(stderr,"Finished reading FASTA file -- total entries: %d\n",nentries);
677 
678   /* Store as Univinterval_T now, but may need to change later */
679   if (end == 0) {
680     /* No coordinates given, so assume that the annotation represents a sequence with coords 1..length(annotation) */
681     if (sequence_length == 0) {
682       start = end = 0;
683     } else if ((end = sequence_length - 1) > *max_coordinate) {
684       fprintf(stderr,"Assigning %llu to end\n",end);
685       *max_coordinate = end;
686     }
687   }
688   Table_put(intervaltable,(void *) divstring,
689 	    List_push(Table_get(intervaltable,(void *) divstring),
690 		      (void *) Univinterval_new(start,end,type)));
691 
692   lines = List_reverse(lines);
693   if (restofheader == NULL && content_size > 0) {
694     tempstring = (char *) CALLOC(2,sizeof(char));
695     tempstring[0] = '\n';
696     tempstring[1] = '\0';
697     lines = List_push(lines,tempstring);
698     content_size += 1;
699   }
700   *annot_totallength += content_size;
701   Table_put(annottable,(void *) divstring,
702 	    List_push(Table_get(annottable,(void *) divstring),
703 		      (void *) concatenate_lines(lines,content_size)));
704   List_free(&lines);
705 
706   if (seenp == true) {
707     FREE(divstring);
708   }
709 
710   fprintf(stderr,"Maximum coordinate: %llu\n",(unsigned long long) *max_coordinate);
711   fprintf(stderr,"Total label length: %llu + %d separators\n",(unsigned long long) *label_totallength,nentries);
712   fprintf(stderr,"Total annotation length: %llu + %d separators\n",(unsigned long long) *annot_totallength,nentries);
713   *label_totallength += nentries;
714   *annot_totallength += nentries;
715 
716   /* Reverse all lists */
717   fprintf(stderr,"Saw %d distinct divisions/chromosomes\n",List_length(*divlist)-1);
718   *divlist = List_reverse(*divlist);
719 
720   fprintf(stderr,"Saw %d distinct tags/types\n",List_length(*typelist));
721   *typelist = List_reverse(*typelist);
722 
723   for (d = *divlist; d != NULL; d = List_next(d)) {
724     divstring = (char *) List_head(d);
725     Table_put(intervaltable,(void *) divstring,
726 	      List_reverse((List_T) Table_get(intervaltable,(void *) divstring)));
727     Table_put(valuetable,(void *) divstring,
728 	      Doublelist_reverse((Doublelist_T) Table_get(valuetable,(void *) divstring)));
729     Table_put(labeltable,(void *) divstring,
730 	      List_reverse((List_T) Table_get(labeltable,(void *) divstring)));
731     Table_put(annottable,(void *) divstring,
732 	      List_reverse((List_T) Table_get(annottable,(void *) divstring)));
733   }
734 
735   return;
736 }
737 
738 
739 static int
assign_columns(char ** columns,char * line,int maxfields)740 assign_columns (char **columns, char *line, int maxfields) {
741   char *token;
742   int nfields = 0;
743 
744   columns[nfields++] = token = strtok(line,"\t");
745   while ((token = strtok(NULL,"\t")) != NULL && nfields < maxfields) {
746     columns[nfields++] = token;
747   }
748   return nfields;
749 }
750 
751 
752 #define CHRCOLUMN 0
753 #define STARTCOLUMN 3
754 #define ENDCOLUMN 4
755 #define STRANDCOLUMN 6
756 #define FEATURECOLUMN 8
757 #define GFF3_COLUMNS 9
758 
759 /* Modifies feature */
760 static char *
gff3_feature_id(char * feature,char * labelstr,int labelstrlen,int lineno)761 gff3_feature_id (char *feature, char *labelstr, int labelstrlen, int lineno) {
762   char *token, *value, *p;
763 
764   token = strtok(feature,";");
765   if (!strncmp(token,labelstr,labelstrlen)) {
766     value = &(token[labelstrlen]);
767     if (value[0] != '"') {
768       return value;
769     } else {
770       value = &(value[1]);
771       /* Quotation marks */
772       if ((p = rindex(value,'"')) == NULL) {
773 	fprintf(stderr,"Error in line %d: Saw no matching quotation in %s\n",lineno,token);
774 	exit(9);
775       } else {
776 	*p = '\0';
777       }
778       return value;
779     }
780   } else {
781     while ((token = strtok(NULL,";")) != NULL) {
782       if (!strncmp(token,labelstr,labelstrlen)) {
783 	value = &(token[labelstrlen]);
784 	if (value[0] != '"') {
785 	  return value;
786 	} else {
787 	  value = &(value[1]);
788 	  /* Quotation marks */
789 	  if ((p = rindex(value,'"')) == NULL) {
790 	    fprintf(stderr,"Error in line %d: Saw no matching quotation in %s\n",lineno,token);
791 	    exit(9);
792 	  } else {
793 	    *p = '\0';
794 	  }
795 	  return value;
796 	}
797       }
798     }
799     return NULL;
800   }
801 }
802 
803 static bool
empty_line_p(char * line)804 empty_line_p (char *line) {
805   char *p = line;
806 
807   while (*p != '\0' && isspace(*p)) {
808     p++;
809   }
810   if (*p == '\0') {
811     return true;
812   } else {
813     return false;
814   }
815 }
816 
817 static void
parse_gff3(List_T * divlist,Table_T intervaltable,Table_T labeltable,Table_T annottable,FILE * fp,Tableint_T div_seenp)818 parse_gff3 (List_T *divlist, Table_T intervaltable, Table_T labeltable, Table_T annottable,
819 	    FILE *fp, Tableint_T div_seenp) {
820   char *line, Space[1000], *columns[GFF3_COLUMNS];
821   char *divstring, *label, *chr, *idptr;
822   List_T d;
823   Univcoord_T start, end;
824   int nfields, lineno = 0, row = 0, labelstrlen;
825   char strandchar;
826   char *labelstr;
827 
828   labelstr = (char *) CALLOC(strlen(labelid) + strlen("=") + 1,sizeof(char));
829   sprintf(labelstr,"%s=",labelid);
830   labelstrlen = strlen(labelstr);
831 
832   while ((line = Getline(fp)) != NULL) {
833     lineno++;
834     if (line[0] == '#') {
835       /* Skip comment */
836       FREE(line);
837 
838     } else if (empty_line_p(line) == true) {
839       /* Skip empty line */
840       FREE(line);
841 
842     } else {
843 #if 0
844       if ((p = rindex(line,'\n')) == NULL) {
845 	fprintf(stderr,"Line exceeds maximum length of %d\n",LINELENGTH);
846 	exit(9);
847       } else {
848 	*p = '\0';
849       }
850 #endif
851 
852       nfields = assign_columns(columns,line,GFF3_COLUMNS); /* destroys line */
853 
854       if (nfields < GFF3_COLUMNS-1) {
855 	/* Subract 1 to allow for an empty feature column */
856 	fprintf(stderr,"Skipping line %d with only %d fields: %s\n",lineno,nfields,line);
857 	FREE(line);
858 
859       } else {
860 	chr = columns[CHRCOLUMN];
861 	divstring = (char *) CALLOC(strlen(chr)+1,sizeof(char));
862 	sprintf(divstring,"%s",chr);
863 
864 	if ((strandchar = columns[STRANDCOLUMN][0]) == '+') {
865 	  start = atof(columns[STARTCOLUMN]);
866 	  end = atof(columns[ENDCOLUMN]);
867 	} else if (strandchar == '-') {
868 	  start = atof(columns[ENDCOLUMN]);
869 	  end = atof(columns[STARTCOLUMN]);
870 	} else if (strandchar == '.' || strandchar == '?') {
871 	  start = atof(columns[STARTCOLUMN]);
872 	  end = atof(columns[ENDCOLUMN]);
873 	} else {
874 	  start = atof(columns[STARTCOLUMN]);
875 	  end = atof(columns[ENDCOLUMN]);
876 	}
877 
878 	if (Tableint_get(div_seenp,(void *) divstring) == 0) {
879 	  Tableint_put(div_seenp,(void *) divstring,(int) true);
880 	  *divlist = List_push(*divlist,divstring);
881 	}
882 
883 	/* Store Univinterval_T now, but may need to change later */
884 	Table_put(intervaltable,(void *) divstring,
885 		  List_push(Table_get(intervaltable,(void *) divstring),
886 			    (void *) Univinterval_new(start,end,/*type*/0)));
887 
888 	if (nfields <= FEATURECOLUMN) {
889 	  sprintf(Space,"gff.%d",row);
890 	  label = (char *) MALLOC((strlen(Space)+1)*sizeof(char));
891 	  strcpy(label,Space);
892 	} else if ((idptr = gff3_feature_id(columns[FEATURECOLUMN],labelstr,labelstrlen,lineno)) == NULL) {
893 	  sprintf(Space,"gff.%d",row);
894 	  label = (char *) MALLOC((strlen(Space)+1)*sizeof(char));
895 	  strcpy(label,Space);
896 	} else {
897 	  label = (char *) MALLOC((strlen(idptr)+1)*sizeof(char));
898 	  strcpy(label,idptr);
899 	}
900 	Table_put(labeltable,(void *) divstring,
901 		  List_push(Table_get(labeltable,(void *) divstring),label));
902 	Table_put(annottable,(void *) divstring,
903 		  List_push(Table_get(annottable,(void *) divstring),line));
904 
905 	row++;
906       }
907     }
908   }
909 
910   *divlist = List_reverse(*divlist);
911 
912   for (d = *divlist; d != NULL; d = List_next(d)) {
913     divstring = (char *) List_head(d);
914     Table_put(intervaltable,(void *) divstring,
915 	      List_reverse((List_T) Table_get(intervaltable,(void *) divstring)));
916     Table_put(labeltable,(void *) divstring,
917 	      List_reverse((List_T) Table_get(labeltable,(void *) divstring)));
918     Table_put(annottable,(void *) divstring,
919 	      List_reverse((List_T) Table_get(annottable,(void *) divstring)));
920   }
921 
922   FREE(labelstr);
923 
924   return;
925 }
926 
927 
928 #ifdef __STRICT_ANSI__
929 int getopt (int argc, char *const argv[], const char *optstring);
930 #endif
931 
932 int
main(int argc,char * argv[])933 main (int argc, char *argv[]) {
934   char *inputfile = NULL, *iitfile, *tempstring, *divstring, *typestring, *p;
935   char firstchar;
936   List_T d, l, templist = NULL, divlist = NULL, typelist = NULL, fieldlist = NULL;
937   Doublelist_T valuelist;
938   List_T newlist;
939   FILE *fp;
940   Univinterval_T univinterval;
941   Interval_T interval;
942   Tableint_T div_seenp, typetable;
943   Table_T intervaltable, labeltable, valuetable, annottable;
944   Chrom_T *chroms = NULL;
945   int n_proper_divs = 0, i;
946   bool coord_values_8p, label_pointers_8p, annot_pointers_8p, valuep = false;
947   Univcoord_T order;
948   Univcoord_T max_coordinate, label_totallength, annot_totallength;
949 
950   int opt;
951   extern int optind;
952   extern char *optarg;
953   int long_option_index = 0;
954   const char *long_name;
955 
956   while ((opt = getopt_long(argc,argv,"o:1FGl:v:s:",
957 			    long_options,&long_option_index)) != -1) {
958     switch (opt) {
959 
960     case 0:
961       long_name = long_options[long_option_index].name;
962       if (!strcmp(long_name,"accession-only")) {
963 	acc_only_p = true;
964       } else {
965 	/* Shouldn't reach here */
966 	fprintf(stderr,"Don't recognize option %s.  For usage, run 'iit_store --help'",long_name);
967 	return 9;
968       }
969       break;
970 
971     case 'o': outputfile = optarg; break;
972     case '1': univ_format_p = true; break;
973     case 'F': fieldsp = true; break;
974     case 'G': gff3_format_p = true; break;
975     case 'l': labelid = optarg; break;
976     case 'v': iit_version = atoi(optarg); break;
977     case 's':
978       if (!strcmp(optarg,"none")) {
979 	divsort = NO_SORT;
980       } else if (!strcmp(optarg,"alpha")) {
981 	divsort = ALPHA_SORT;
982       } else if (!strcmp(optarg,"numeric-alpha")) {
983 	divsort = NUMERIC_ALPHA_SORT;
984       } else if (!strcmp(optarg,"chrom")) {
985 	divsort = CHROM_SORT;
986       } else {
987 	fprintf(stderr,"Don't recognize sort type %s.  Allowed values are none, alpha, or chrom.",optarg);
988 	exit(9);
989       }
990       break;
991     case 'V': print_program_version(); exit(0);
992     case '?': print_program_usage(); exit(0);
993     default: exit(9);
994     }
995   }
996   argc -= optind;
997   argv += optind;
998 
999   if (outputfile == NULL) {
1000     fprintf(stderr,"Need to specify an output file with the -o flag\n");
1001     exit(9);
1002   } else if (iit_version > IIT_LATEST_VERSION_NOVALUES && iit_version > IIT_LATEST_VERSION_VALUES) {
1003     fprintf(stderr,"version %d requested, but this program can write only up to version %d or %d\n",
1004 	    iit_version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
1005     exit(9);
1006   }
1007 
1008   if (argc < 1) {
1009     fp = stdin;
1010   } else {
1011     inputfile = argv[0];
1012     fp = FOPEN_READ_TEXT(inputfile);
1013     if (!fp) {
1014       fprintf(stderr,"Can't open file %s\n",inputfile);
1015       exit(9);
1016     }
1017   }
1018 
1019   div_seenp = Tableint_new(100,Table_string_compare,Table_string_hash);
1020   typetable = Tableint_new(100,Table_string_compare,Table_string_hash);
1021   intervaltable = Table_new(100,Table_string_compare,Table_string_hash);
1022   valuetable = Table_new(100,Table_string_compare,Table_string_hash);
1023   labeltable = Table_new(100,Table_string_compare,Table_string_hash);
1024   annottable = Table_new(100,Table_string_compare,Table_string_hash);
1025 
1026   /* The zeroth div is empty */
1027   divstring = (char *) CALLOC(1,sizeof(char));
1028   divstring[0] = '\0';
1029   divlist = List_push(NULL,divstring);
1030 
1031   /* The zeroth type is empty */
1032   typestring = (char *) CALLOC(1,sizeof(char));
1033   typestring[0] = '\0';
1034   typelist = List_push(NULL,typestring);
1035 
1036   if (univ_format_p == true) {
1037     typestring = (char *) MALLOC((strlen("circular")+1)*sizeof(char));
1038     strcpy(typestring,"circular");
1039     Tableint_put(typetable,typestring,/*type*/1);
1040   }
1041 
1042   if (gff3_format_p == true) {
1043     parse_gff3(&divlist,intervaltable,labeltable,annottable,fp,div_seenp);
1044   } else {
1045     fieldlist = parse_fieldlist(&firstchar,fp);
1046     parse_fasta(&valuep,&max_coordinate,&label_totallength,&annot_totallength,
1047 		&divlist,&typelist,intervaltable,valuetable,labeltable,annottable,
1048 		fp,div_seenp,typetable,firstchar);
1049   }
1050 
1051   if (inputfile != NULL) {
1052     fclose(fp);
1053   }
1054 
1055   if (univ_format_p == true) {
1056     iit_version = 1;
1057     typestring = (char *) MALLOC((strlen("circular")+1)*sizeof(char));
1058     strcpy(typestring,"circular");
1059     typelist = List_push(typelist,typestring);
1060     typelist = List_reverse(typelist);
1061 
1062   } else if (iit_version == 0 && List_length(divlist) == 1) {
1063     /* No divs other than NULL */
1064     fprintf(stderr,"No divs/chromosomes provided, so storing as IIT version 1\n");
1065     iit_version = 1;
1066   }
1067 
1068   coord_values_8p = false;
1069   label_pointers_8p = false;
1070   annot_pointers_8p = false;
1071 #ifdef HAVE_64_BIT
1072   if (gff3_format_p == true) {
1073     coord_values_8p = false;
1074   } else if (max_coordinate > 4294967295U) {
1075     coord_values_8p = true;
1076   }
1077   if (iit_version == 0) {
1078     if (gff3_format_p == true) {
1079       label_pointers_8p = false;
1080     } else if (label_totallength > 4294967295U) {
1081       label_pointers_8p = true;
1082     }
1083     if (gff3_format_p == true) {
1084       annot_pointers_8p = false;
1085     } else if (annot_totallength > 4294967295U) {
1086       annot_pointers_8p = true;
1087     }
1088     if (valuep == true) {
1089       iit_version = IIT_LATEST_VERSION_VALUES;
1090     } else {
1091       iit_version = IIT_LATEST_VERSION_NOVALUES;
1092     }
1093 
1094   } else if (iit_version == 4) {
1095     if (label_totallength > 4294967295U || annot_totallength > 4294967295U) {
1096       /* Both pointer types have to match */
1097       label_pointers_8p = true;
1098       annot_pointers_8p = true;
1099     }
1100 
1101   } else if (iit_version <= 3) {
1102     if (label_totallength > 4294967295U || annot_totallength > 4294967295U) {
1103       fprintf(stderr,"Need 8-byte pointers, which requires you to specify a version of 4 or greater\n");
1104       exit(9);
1105     }
1106   }
1107 #else
1108   if (iit_version == 0) {
1109     if (valuep == true) {
1110       iit_version = IIT_LATEST_VERSION_VALUES;
1111     } else {
1112       iit_version = IIT_LATEST_VERSION_NOVALUES;
1113     }
1114   }
1115 #endif
1116 
1117   if (iit_version == 1) {
1118     /* Will use Univinterval_T objects, which may print as UINT8 or UINT4 */
1119   } else if (coord_values_8p == true) {
1120     fprintf(stderr,"Cannot have large coordinates, except for chromosome IIT files\n");
1121     exit(9);
1122   } else {
1123     /* Convert all Univinterval_T objects to Interval_T objects */
1124 
1125     for (d = divlist; d != NULL; d = List_next(d)) {
1126       divstring = (char *) List_head(d);
1127       templist = (List_T) Table_get(intervaltable,(void *) divstring);
1128       newlist = (List_T) NULL;
1129       for (l = templist; l != NULL; l = List_next(l)) {
1130 	univinterval = (Univinterval_T) List_head(l);
1131 	if (Univinterval_sign(univinterval) < 0) {
1132 	  newlist = List_push(newlist,
1133 			      (void *) Interval_new(Univinterval_high(univinterval),
1134 						    Univinterval_low(univinterval),
1135 						    Univinterval_type(univinterval)));
1136 	} else {
1137 	  newlist = List_push(newlist,
1138 			      (void *) Interval_new(Univinterval_low(univinterval),
1139 						    Univinterval_high(univinterval),
1140 						    Univinterval_type(univinterval)));
1141 	}
1142 	Univinterval_free(&univinterval);
1143       }
1144       Table_put(intervaltable,(void *) divstring,(void *) List_reverse(newlist));
1145       List_free(&templist);
1146     }
1147   }
1148 
1149 
1150   /* Figure out name of iit file */
1151   if (strlen(outputfile) < 4) {
1152     iitfile = (char *) CALLOC(strlen(outputfile)+strlen(".iit")+1,sizeof(char));
1153     sprintf(iitfile,"%s.iit",outputfile);
1154   } else {
1155     p = &(outputfile[strlen(outputfile)]);
1156     p -= 4;
1157     if (!strcmp(p,".iit")) {
1158       iitfile = (char *) CALLOC(strlen(outputfile)+1,sizeof(char));
1159       strcpy(iitfile,outputfile);
1160     } else {
1161       iitfile = (char *) CALLOC(strlen(outputfile)+strlen(".iit")+1,sizeof(char));
1162       sprintf(iitfile,"%s.iit",outputfile);
1163     }
1164   }
1165 
1166   order = 0;
1167   if ((n_proper_divs = List_length(divlist) - 1) > 0) {
1168     chroms = (Chrom_T *) CALLOC(n_proper_divs,sizeof(Chrom_T));
1169     for (l = divlist, i = 0; l != NULL; l = List_next(l)) {
1170       tempstring = (char *) List_head(l);
1171       if (tempstring[0] == '\0') {
1172 	/* FREE(tempstring); -- Causes invalid read later in table_string_compare */
1173       } else {
1174 	chroms[i++] = Chrom_from_string(tempstring,mitochondrial_string,order++,
1175 					/*circularp*/false,/*alt_scaffold_start*/0,/*alt_scaffold_end*/0);
1176       }
1177     }
1178   }
1179 
1180 #if 0
1181   /* Need to have these existing for the IIT_write command below */
1182   for (l = divlist; l != NULL; l = List_next(l)) {
1183     divstring = (char *) List_head(l);
1184     FREE(divstring);
1185   }
1186 #endif
1187   List_free(&divlist);
1188 
1189   switch (divsort) {
1190   case NO_SORT: qsort(chroms,n_proper_divs,sizeof(Chrom_T),Chrom_compare_order); break;
1191   case ALPHA_SORT: qsort(chroms,n_proper_divs,sizeof(Chrom_T),Chrom_compare_alpha); break;
1192   case NUMERIC_ALPHA_SORT: qsort(chroms,n_proper_divs,sizeof(Chrom_T),Chrom_compare_numeric_alpha); break;
1193   case CHROM_SORT: qsort(chroms,n_proper_divs,sizeof(Chrom_T),Chrom_compare_chrom); break;
1194   default: fprintf(stderr,"Don't recognize divsort type %d\n",divsort); abort();
1195   }
1196 
1197   /* The zeroth div is empty */
1198   divstring = (char *) CALLOC(1,sizeof(char));
1199   divstring[0] = '\0';
1200   divlist = List_push(NULL,divstring);
1201 
1202   for (i = 0; i < n_proper_divs; i++) {
1203     divlist = List_push(divlist,Chrom_string(chroms[i]));
1204   }
1205   divlist = List_reverse(divlist);
1206 
1207 #if 0
1208   /* Causes invalid reads later on */
1209   for (i = 0; i < n_proper_divs; i++) {
1210     Chrom_free(&(chroms[i]));
1211   }
1212 #endif
1213 
1214   FREE(chroms);
1215 
1216 
1217   if (iit_version == 1) {
1218     IIT_write_univ(iitfile,divlist,typelist,intervaltable,labeltable,annottable,
1219 		   coord_values_8p,label_pointers_8p,annot_pointers_8p);
1220   } else if (valuep == false) {
1221     IIT_write(iitfile,divlist,typelist,fieldlist,intervaltable,/*valuetable*/NULL,labeltable,annottable,
1222 	      divsort,iit_version,label_pointers_8p,annot_pointers_8p);
1223   } else {
1224     IIT_write(iitfile,divlist,typelist,fieldlist,intervaltable,valuetable,labeltable,annottable,
1225 	      divsort,iit_version,label_pointers_8p,annot_pointers_8p);
1226   }
1227   FREE(iitfile);
1228 
1229   for (d = divlist; d != NULL; d = List_next(d)) {
1230     divstring = (char *) List_head(d);
1231 
1232     templist = (List_T) Table_get(annottable,(void *) divstring);
1233     for (l = templist; l != NULL; l = List_next(l)) {
1234       tempstring = (char *) List_head(l);
1235       FREE(tempstring);
1236     }
1237     List_free(&templist);
1238 
1239     valuelist = (Doublelist_T) Table_get(valuetable,(void *) divstring);
1240     Doublelist_free(&valuelist);
1241 
1242     templist = (List_T) Table_get(labeltable,(void *) divstring);
1243     for (l = templist; l != NULL; l = List_next(l)) {
1244       tempstring = (char *) List_head(l);
1245       FREE(tempstring);
1246     }
1247     List_free(&templist);
1248 
1249     templist = (List_T) Table_get(intervaltable,(void *) divstring);
1250     if (iit_version == 1) {
1251       for (l = templist; l != NULL; l = List_next(l)) {
1252 	univinterval = (Univinterval_T) List_head(l);
1253 	Univinterval_free(&univinterval);
1254       }
1255     } else {
1256       for (l = templist; l != NULL; l = List_next(l)) {
1257 	interval = (Interval_T) List_head(l);
1258 	Interval_free(&interval);
1259       }
1260     }
1261     List_free(&templist);
1262 
1263   }
1264 
1265 
1266   Table_free(&intervaltable);
1267   Table_free(&valuetable);
1268   Table_free(&labeltable);
1269   Table_free(&annottable);
1270 
1271   for (l = fieldlist; l != NULL; l = List_next(l)) {
1272     tempstring = (char *) List_head(l);
1273     FREE(tempstring);
1274   }
1275   List_free(&fieldlist);
1276 
1277   for (l = typelist; l != NULL; l = List_next(l)) {
1278     tempstring = (char *) List_head(l);
1279     FREE(tempstring);
1280   }
1281   List_free(&typelist);
1282 
1283   for (l = divlist; l != NULL; l = List_next(l)) {
1284     tempstring = (char *) List_head(l);
1285     FREE(tempstring);
1286   }
1287   List_free(&divlist);
1288 
1289   Tableint_free(&typetable);
1290   Tableint_free(&div_seenp);
1291 
1292   return 0;
1293 }
1294