1 static char rcsid[] = "$Id: iit-read.c 222390 2020-04-10 12:44:01Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "iit-read.h"
7 #include "iitdef.h"
8 
9 #ifdef WORDS_BIGENDIAN
10 #include "bigendian.h"
11 #else
12 #include "littleendian.h"
13 #endif
14 
15 #include <stdlib.h>		/* For qsort */
16 #include <string.h>		/* For memset */
17 #include <strings.h>
18 #include <ctype.h>		/* For isspace */
19 #ifdef HAVE_UNISTD_H
20 #include <unistd.h>		/* For mmap on Linux */
21 #endif
22 #ifdef HAVE_SYS_TYPES_H
23 #include <sys/types.h>		/* For open, fstat, and mmap */
24 #endif
25 /* Not sure why this was included
26 #include <sys/param.h>
27 */
28 #ifdef HAVE_FCNTL_H
29 #include <fcntl.h>		/* For open */
30 #endif
31 #ifdef HAVE_SYS_STAT_H
32 #include <sys/stat.h>		/* For open and fstat */
33 #endif
34 #include <sys/mman.h>		/* For mmap and madvise */
35 #include <math.h>		/* For qsort */
36 #include <errno.h>		/* For perror */
37 #include "assert.h"
38 #include "except.h"
39 #include "mem.h"
40 #include "access.h"
41 #include "fopen.h"
42 
43 /* Note: if sizeof(int) or sizeof(unsigned int) are not 4, then the below code is faulty */
44 
45 
46 /* Integer interval tree. */
47 
48 /*
49  * n intervals;
50  *   specified by their indices e[1..n]
51  *   and endpoint-access function:
52  *                low  (e[i])
53  *                high (e[i])
54  *        is_contained (x, e[i])
55  *   eg:
56  *        interval e[i]          ... "[" low (e[i]) "," high (e[i]) ")"
57  *        is_contained (x, e[i]) ... (    (low (e[i]) <= x
58  *                                    and (x < high (e[i]))
59  */
60 
61 /*--------------------------------------------------------------------------*/
62 
63 #ifdef DEBUG
64 #define debug(x) x
65 #else
66 #define debug(x)
67 #endif
68 
69 /* Timing */
70 #ifdef DEBUG1
71 #define debug1(x) x
72 #else
73 #define debug1(x)
74 #endif
75 
76 /* Flanking */
77 #ifdef DEBUG2
78 #define debug2(x) x
79 #else
80 #define debug2(x)
81 #endif
82 
83 /* Binary search */
84 #ifdef DEBUG3
85 #define debug3(x) x
86 #else
87 #define debug3(x)
88 #endif
89 
90 
91 
92 #define T IIT_T
93 
94 static void
file_move_absolute(int fd,size_t offset,size_t objsize,Chrpos_T n)95 file_move_absolute (int fd, size_t offset, size_t objsize, Chrpos_T n) {
96   off_t position = offset + n*objsize;
97 
98   if (lseek(fd,position,SEEK_SET) < 0) {
99     perror("Error in gmap, file_move_label");
100     exit(9);
101   }
102   return;
103 }
104 
105 
106 bool
IIT_universalp(char * filename,bool add_iit_p)107 IIT_universalp (char *filename, bool add_iit_p) {
108   char *newfile;
109   FILE *fp;
110   int total_nintervals;
111 
112   if (add_iit_p == true) {
113     newfile = (char *) CALLOC(strlen(filename)+strlen(".iit")+1,sizeof(char));
114     sprintf(newfile,"%s.iit",filename);
115     if ((fp = FOPEN_READ_BINARY(newfile)) != NULL) {
116       filename = newfile;
117     } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
118       /* fprintf(stderr,"Cannot open IIT file %s or %s\n",filename,newfile); */
119       FREE(newfile);
120       return false;
121     }
122   } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
123     /* fprintf(stderr,"Cannot open IIT file %s\n",filename); */
124     return false;
125   }
126 
127   if (FREAD_INT(&total_nintervals,fp) < 1) {
128     fprintf(stderr,"IIT file %s appears to be empty\n",filename);
129     fclose(fp);
130     if (add_iit_p == true) {
131       FREE(newfile);
132     }
133     return false;
134   } else if (total_nintervals == 0) {
135     /* Need to use Univ_IIT_read instead */
136     fclose(fp);
137     if (add_iit_p == true) {
138       FREE(newfile);
139     }
140     return false;
141   } else {
142     fclose(fp);
143     if (add_iit_p == true) {
144       FREE(newfile);
145     }
146     return true;
147   }
148 }
149 
150 
151 bool
IIT_valuep(T this)152 IIT_valuep (T this) {
153   return this->valuep;
154 }
155 
156 
157 char *
IIT_name(T this)158 IIT_name (T this) {
159   return this->name;
160 }
161 
162 int
IIT_version(T this)163 IIT_version (T this) {
164   return this->version;
165 }
166 
167 int
IIT_total_nintervals(T this)168 IIT_total_nintervals (T this) {
169   return this->total_nintervals;
170 }
171 
172 int
IIT_nintervals(T this,int divno)173 IIT_nintervals (T this, int divno) {
174   return this->nintervals[divno];
175 }
176 
177 
178 int
IIT_ntypes(T this)179 IIT_ntypes (T this) {
180   return this->ntypes;
181 }
182 
183 int
IIT_nfields(T this)184 IIT_nfields (T this) {
185   return this->nfields;
186 }
187 
188 
189 Chrpos_T
IIT_length(T this,int index)190 IIT_length (T this, int index) {
191   Interval_T interval;
192 
193   interval = &(this->intervals[0][index-1]);
194   return Interval_length(interval);
195 }
196 
197 
198 Chrpos_T
IIT_divlength(T this,char * divstring)199 IIT_divlength (T this, char *divstring) {
200   Chrpos_T max = 0U;
201   Interval_T interval;
202   int divno, i;
203 
204   divno = IIT_divint(this,divstring);
205   for (i = 0; i < this->nintervals[divno]; i++) {
206     interval = &(this->intervals[divno][i]);
207     if (Interval_high(interval) > max) {
208       max = Interval_high(interval);
209     }
210   }
211   /* Convert from zero-based coordinate */
212   return max+1U;
213 }
214 
215 
216 /* Assumes intervals are stored using universal coordinates */
217 Chrpos_T
IIT_totallength(T this)218 IIT_totallength (T this) {
219   Chrpos_T max = 0U;
220   Interval_T interval;
221   int divno, i;
222 
223   for (divno = 0; divno < this->ndivs; divno++) {
224     for (i = 0; i < this->nintervals[divno]; i++) {
225       interval = &(this->intervals[divno][i]);
226       if (Interval_high(interval) > max) {
227 	max = Interval_high(interval);
228       }
229     }
230   }
231   /* Convert from zero-based coordinate */
232   return max+1U;
233 }
234 
235 
236 Interval_T
IIT_interval(T this,int index)237 IIT_interval (T this, int index) {
238   assert(index <= this->total_nintervals);
239   return &(this->intervals[0][index-1]); /* Convert to 0-based */
240 }
241 
242 /* Need to use for search on alphas (IIT_get_next and probably IIT_get_flanking) */
243 Interval_T
IIT_interval_for_divno(T this,int divno,int index)244 IIT_interval_for_divno (T this, int divno, int index) {
245   assert(index <= this->nintervals[divno]);
246   return &(this->intervals[divno][index-1]); /* Convert to 0-based */
247 }
248 
249 
250 Chrpos_T
IIT_interval_low(T this,int index)251 IIT_interval_low (T this, int index) {
252   Interval_T interval;
253 
254   assert(index <= this->total_nintervals);
255   interval = &(this->intervals[0][index-1]);
256   return Interval_low(interval);
257 }
258 
259 Chrpos_T
IIT_interval_high(T this,int index)260 IIT_interval_high (T this, int index) {
261   Interval_T interval;
262 
263   assert(index <= this->total_nintervals);
264   interval = &(this->intervals[0][index-1]);
265   return Interval_high(interval);
266 }
267 
268 Chrpos_T
IIT_interval_length(T this,int index)269 IIT_interval_length (T this, int index) {
270   Interval_T interval;
271 
272   assert(index <= this->total_nintervals);
273   interval = &(this->intervals[0][index-1]);
274   return Interval_length(interval);
275 }
276 
277 int
IIT_interval_type(T this,int index)278 IIT_interval_type (T this, int index) {
279   Interval_T interval;
280 
281   assert(index <= this->total_nintervals);
282   interval = &(this->intervals[0][index-1]);
283   return Interval_type(interval);
284 }
285 
286 
287 int
IIT_interval_sign(T this,int index)288 IIT_interval_sign (T this, int index) {
289   Interval_T interval;
290 
291   assert(index <= this->total_nintervals);
292   interval = &(this->intervals[0][index-1]);
293   return Interval_sign(interval);
294 }
295 
296 
297 /* chrhigh is one past the highest position in the chromosome */
298 void
IIT_interval_bounds(Chrpos_T * low,Chrpos_T * high,Chrpos_T * length,T this,int index,int circular_typeint)299 IIT_interval_bounds (Chrpos_T *low, Chrpos_T *high, Chrpos_T *length, T this,
300 		     int index, int circular_typeint) {
301   Interval_T interval;
302 
303   assert(index > 0);
304   assert(index <= this->total_nintervals);
305 
306   interval = &(this->intervals[0][index-1]);
307   *low = Interval_low(interval);
308   *length = Interval_length(interval);
309   if (Interval_type(interval) == circular_typeint) {
310     *high = Interval_high(interval) + 1 + (*length);
311   } else {
312     *high = Interval_high(interval) + 1;
313   }
314   return;
315 }
316 
317 int
IIT_index(T this,int divno,int i)318 IIT_index (T this, int divno, int i) {
319   return this->cum_nintervals[divno] + i + 1; /* 1-based */
320 }
321 
322 
323 
324 /* Note: ndivs includes div "0", so callers should iterate through at i < ndivs */
325 int
IIT_ndivs(T this)326 IIT_ndivs (T this) {
327   return this->ndivs;
328 }
329 
330 /* The iit file has a '\0' after each string, so functions know where
331    it ends */
332 char *
IIT_divstring(T this,int divno)333 IIT_divstring (T this, int divno) {
334   UINT4 start;
335 
336   start = this->divpointers[divno];
337   return &(this->divstrings[start]);
338 }
339 
340 int
IIT_divint(T this,char * divstring)341 IIT_divint (T this, char *divstring) {
342   int i = 0;			/* Actually divstring for divno 0 is NULL */
343   UINT4 start;
344 
345   if (divstring == NULL) {
346     return 0;
347   } else if (divstring[0] == '\0') {
348     return 0;
349   } else {
350     while (i < this->ndivs) {
351       start = this->divpointers[i];
352       if (!strcmp(divstring,&(this->divstrings[start]))) {
353 	return i;
354       }
355       i++;
356     }
357 
358     return -1;
359   }
360 }
361 
362 char *
IIT_divstring_from_index(T this,int index)363 IIT_divstring_from_index (T this, int index) {
364   int divno = 1;
365   UINT4 start;
366 
367   while (divno <= this->ndivs) {
368     /* Checked on existing iit file to confirm we need >= and not > */
369     if (this->cum_nintervals[divno] >= index) {
370       start = this->divpointers[divno-1];
371       return &(this->divstrings[start]);
372     }
373     divno++;
374   }
375 
376   return (char *) NULL;
377 }
378 
379 static int
IIT_divint_from_index(T this,int index)380 IIT_divint_from_index (T this, int index) {
381   int divno = 1;
382 
383   while (divno <= this->ndivs) {
384     /* Checked on existing iit file to confirm we need >= and not > */
385     if (this->cum_nintervals[divno] >= index) {
386       return divno-1;
387     }
388     divno++;
389   }
390 
391   return -1;
392 }
393 
394 
395 /* The iit file has a '\0' after each string, so functions know where
396    it ends */
397 char *
IIT_typestring(T this,int type)398 IIT_typestring (T this, int type) {
399   UINT4 start;
400 
401   start = this->typepointers[type];
402   return &(this->typestrings[start]);
403 }
404 
405 int
IIT_typeint(T this,char * typestring)406 IIT_typeint (T this, char *typestring) {
407   int i = 0;
408   UINT4 start;
409 
410   while (i < this->ntypes) {
411     start = this->typepointers[i];
412     if (!strcmp(typestring,&(this->typestrings[start]))) {
413       return i;
414     }
415     i++;
416   }
417 
418   return -1;
419 }
420 
421 char *
IIT_fieldstring(T this,int fieldint)422 IIT_fieldstring (T this, int fieldint) {
423   UINT4 start;
424 
425   start = this->fieldpointers[fieldint];
426   return &(this->fieldstrings[start]);
427 }
428 
429 int
IIT_fieldint(T this,char * fieldstring)430 IIT_fieldint (T this, char *fieldstring) {
431   int i = 0;
432   UINT4 start;
433 
434   while (i < this->nfields) {
435     start = this->fieldpointers[i];
436     if (!strcmp(fieldstring,&(this->fieldstrings[start]))) {
437       return i;
438     }
439     i++;
440   }
441 
442   return -1;
443 }
444 
445 
446 char *
IIT_label(T this,int index,bool * allocp)447 IIT_label (T this, int index, bool *allocp) {
448   int recno;
449 #ifdef HAVE_64_BIT
450   UINT8 start;
451 #else
452   UINT4 start;
453 #endif
454 
455   recno = index - 1; /* Convert to 0-based */
456 
457 #ifdef WORDS_BIGENDIAN
458 #ifdef HAVE_64_BIT
459   if (this->label_pointers_8p == true) {
460     start = Bigendian_convert_uint8(this->labelpointers8[recno]);
461   } else {
462     start = (UINT8) Bigendian_convert_uint(this->labelpointers[recno]);
463   }
464 #else
465   start = Bigendian_convert_uint(this->labelpointers[recno]);
466 #endif
467 #else
468 #ifdef HAVE_64_BIT
469   if (this->label_pointers_8p == true) {
470     start = this->labelpointers8[recno];
471   } else {
472     start = (UINT8) this->labelpointers[recno];
473   }
474 #else
475   start = this->labelpointers[recno];
476 #endif
477 #endif
478   *allocp = false;
479   return &(this->labels[start]);
480 }
481 
482 
483 static char EMPTY_STRING[1] = {'\0'};
484 
485 /* The iit file has a '\0' after each string, so functions know where
486    it ends */
487 /* Note: annotation itself is never allocated */
488 char *
IIT_annotation(char ** restofheader,T this,int index,bool * alloc_header_p)489 IIT_annotation (char **restofheader, T this, int index, bool *alloc_header_p) {
490   int recno;
491   char *annotation, *p;
492   int len;
493 #ifdef HAVE_64_BIT
494   UINT8 start;
495 #else
496   UINT4 start;
497 #endif
498 
499 
500   recno = index - 1; /* Convert to 0-based */
501 #ifdef WORDS_BIGENDIAN
502 #ifdef HAVE_64_BIT
503   if (this->annot_pointers_8p == true) {
504     start = Bigendian_convert_uint8(this->annotpointers8[recno]);
505   } else {
506     start = (UINT8) Bigendian_convert_uint(this->annotpointers[recno]);
507   }
508 #else
509   start = Bigendian_convert_uint(this->annotpointers[recno]);
510 #endif
511 #else
512 #ifdef HAVE_64_BIT
513   if (this->annot_pointers_8p == true) {
514     start = this->annotpointers8[recno];
515   } else {
516     start = (UINT8) this->annotpointers[recno];
517   }
518 #else
519   start = this->annotpointers[recno];
520 #endif
521 #endif
522 
523   if (this->version <= 4) {
524     *restofheader = EMPTY_STRING;
525 
526     *alloc_header_p = false;
527     return &(this->annotations[start]);
528   } else {
529     /* Versions 5 and higher include rest of header with
530        annotation.  Don't return initial '\n', unless annotation is empty */
531     annotation = &(this->annotations[start]);
532     if (annotation[0] == '\0') {
533       *restofheader = annotation; /* Both are empty strings */
534 
535       *alloc_header_p = false;
536       return annotation;
537 
538     } else if (annotation[0] == '\n') {
539       *restofheader = EMPTY_STRING;
540 
541       *alloc_header_p = false;
542       return &(annotation[1]);
543 
544     } else {
545       p = annotation;
546       while (*p != '\0' && *p != '\n') p++;
547       len = (p - annotation)/sizeof(char);
548       *restofheader = (char *) MALLOC((1+len+1)*sizeof(char));
549       *restofheader[0] = ' ';
550       strncpy(&((*restofheader)[1]),annotation,len);
551       (*restofheader)[1+len] = '\0';
552 
553       if (*p == '\n') p++;
554 
555       *alloc_header_p = true;
556       return p;
557     }
558   }
559 }
560 
561 /* The iit file has a '\0' after each string, so functions know where
562    it ends */
563 char
IIT_annotation_firstchar(T this,int index)564 IIT_annotation_firstchar (T this, int index) {
565   int recno;
566 #ifdef HAVE_64_BIT
567   UINT8 start;
568 #else
569   UINT4 start;
570 #endif
571 
572   recno = index - 1; /* Convert to 0-based */
573 
574 #ifdef WORDS_BIGENDIAN
575 #ifdef HAVE_64_BIT
576   if (this->annot_pointers_8p == true) {
577     start = Bigendian_convert_uint8(this->annotpointers8[recno]);
578   } else {
579     start = (UINT8) Bigendian_convert_uint(this->annotpointers[recno]);
580   }
581 #else
582   start = Bigendian_convert_uint(this->annotpointers[recno]);
583 #endif
584 #else
585 #ifdef HAVE_64_BIT
586   if (this->annot_pointers_8p == true) {
587     start = this->annotpointers8[recno];
588   } else {
589     start = (UINT8) this->annotpointers[recno];
590   }
591 #else
592   start = this->annotpointers[recno];
593 #endif
594 #endif
595 
596   return this->annotations[start];
597 }
598 
599 #ifdef HAVE_64_BIT
600 UINT8
601 #else
602 UINT4
603 #endif
IIT_annotation_strlen(T this,int index)604 IIT_annotation_strlen (T this, int index) {
605   int recno;
606 #ifdef HAVE_64_BIT
607   UINT8 start, end;
608 #else
609   UINT4 start, end;
610 #endif
611 
612   recno = index - 1; /* Convert to 0-based */
613 
614 #ifdef WORDS_BIGENDIAN
615 #ifdef HAVE_64_BIT
616   if (this->annot_pointers_8p == true) {
617     start = Bigendian_convert_uint8(this->annotpointers8[recno]);
618     end = Bigendian_convert_uint8(this->annotpointers8[recno+1]);
619   } else {
620     start = (UINT8) Bigendian_convert_uint(this->annotpointers[recno]);
621     end = (UINT8) Bigendian_convert_uint(this->annotpointers[recno+1]);
622   }
623 #else
624   start = Bigendian_convert_uint(this->annotpointers[recno]);
625   end = Bigendian_convert_uint(this->annotpointers[recno+1]);
626 #endif
627 #else
628 #ifdef HAVE_64_BIT
629   if (this->annot_pointers_8p == true) {
630     start = this->annotpointers8[recno];
631     end = this->annotpointers8[recno+1];
632   } else {
633     start = (UINT8) this->annotpointers[recno];
634     end = (UINT8) this->annotpointers[recno+1];
635   }
636 #else
637   start = this->annotpointers[recno];
638   end = this->annotpointers[recno+1];
639 #endif
640 #endif
641 
642   /*
643   if (strlen(&(this->annotations[start])) != (end - start - 1)) {
644     printf("Problem with %s: %d != %u\n",
645     &(this->labels[this->labelpointers[recno]]),strlen(&(this->annotations[start])),end-start-1);
646     abort();
647   } else {
648     printf("Okay %s: %d == %u\n",
649     &(this->labels[this->labelpointers[recno]]),strlen(&(this->annotations[start])),end-start-1);
650   }
651   */
652 
653   return (end - start - 1);	/* Subtract terminal '\0' */
654 }
655 
656 /* Always allocated */
657 char *
IIT_fieldvalue(T this,int index,int fieldint)658 IIT_fieldvalue (T this, int index, int fieldint) {
659   char *fieldvalue, *annotation, *p, *q;
660   int recno, fieldno = 0, fieldlen;
661 #ifdef HAVE_64_BIT
662   UINT8 start;
663 #else
664   UINT4 start;
665 #endif
666   bool allocp;
667 
668   recno = index - 1; /* Convert to 0-based */
669 #ifdef WORDS_BIGENDIAN
670 #ifdef HAVE_64_BIT
671   if (this->annot_pointers_8p == true) {
672     start = Bigendian_convert_uint8(this->annotpointers8[recno]);
673   } else {
674     start = (UINT8) Bigendian_convert_uint(this->annotpointers[recno]);
675   }
676 #else
677   start = Bigendian_convert_uint(this->annotpointers[recno]);
678 #endif
679 #else
680 #ifdef HAVE_64_BIT
681   if (this->annot_pointers_8p == true) {
682     start = this->annotpointers8[recno];
683   } else {
684     start = (UINT8) this->annotpointers[recno];
685   }
686 #else
687   start = this->annotpointers[recno];
688 #endif
689 #endif
690   annotation = &(this->annotations[start]);
691   allocp = false;
692 
693   p = annotation;
694 
695   /* Starting with version 5, annotation should have '\n' from the header line.  */
696   while (*p != '\0' && *p != '\n') p++;
697   if (*p == '\n') p++;
698 
699   while (*p != '\0' && fieldno < fieldint) {
700     if (*p == '\n') {
701       fieldno++;
702     }
703     p++;
704   }
705 
706   if (*p == '\0') {
707     fieldvalue = (char *) CALLOC(1,sizeof(char));
708     fieldvalue[0] = '\0';
709   } else {
710     q = p;
711     while (*q != '\0' && *q != '\n') {
712       q++;
713     }
714     fieldlen = (q - p)/sizeof(char);
715     fieldvalue = (char *) MALLOC((fieldlen+1)*sizeof(char));
716     strncpy(fieldvalue,p,fieldlen);
717     fieldvalue[fieldlen] = '\0';
718   }
719 
720   if (allocp == true) {
721     FREE(annotation);
722   }
723 
724   return fieldvalue;
725 }
726 
727 
728 void
IIT_dump_divstrings(FILE * fp,T this)729 IIT_dump_divstrings (FILE *fp, T this) {
730   int divno;
731   UINT4 start;
732 
733   /* Start with 1, because first divno has no name */
734   for (divno = 1; divno < this->ndivs; divno++) {
735     start = this->divpointers[divno];
736     fprintf(fp,"%s ",&(this->divstrings[start]));
737   }
738   fprintf(fp,"\n");
739 
740   return;
741 }
742 
743 
744 void
IIT_dump_typestrings(FILE * fp,T this)745 IIT_dump_typestrings (FILE *fp, T this) {
746   int type;
747   UINT4 start;
748 
749   for (type = 0; type < this->ntypes; type++) {
750     start = this->typepointers[type];
751     fprintf(fp,"%d\t%s\n",type,&(this->typestrings[start]));
752   }
753   return;
754 }
755 
756 void
IIT_dump_fieldstrings(FILE * fp,T this)757 IIT_dump_fieldstrings (FILE *fp, T this) {
758   int field;
759   UINT4 start;
760 
761   for (field = 0; field < this->nfields; field++) {
762     start = this->fieldpointers[field];
763     fprintf(fp,"%d\t%s\n",field,&(this->fieldstrings[start]));
764   }
765   return;
766 }
767 
768 void
IIT_dump_labels(FILE * fp,T this)769 IIT_dump_labels (FILE *fp, T this) {
770   int i;
771 #ifdef HAVE_64_BIT
772   UINT8 start;
773 #else
774   UINT4 start;
775 #endif
776   char *label;
777 
778   for (i = 0; i < this->total_nintervals; i++) {
779 #ifdef WORDS_BIGENDIAN
780 #ifdef HAVE_64_BIT
781     if (this->label_pointers_8p == true) {
782       start = Bigendian_convert_uint8(this->labelpointers8[i]);
783     } else {
784       start = (UINT8) Bigendian_convert_uint(this->labelpointers[i]);
785     }
786 #else
787     start = Bigendian_convert_uint(this->labelpointers[i]);
788 #endif
789 #else
790 #ifdef HAVE_64_BIT
791     if (this->label_pointers_8p == true) {
792       start = this->labelpointers8[i];
793     } else {
794       start = (UINT8) this->labelpointers[i];
795     }
796 #else
797     start = this->labelpointers[i];
798 #endif
799 #endif
800     label = &(this->labels[start]);
801     fprintf(fp,"%s ",label);
802   }
803   fprintf(fp,"\n");
804   return;
805 }
806 
807 
808 void
IIT_dump(T this,bool sortp)809 IIT_dump (T this, bool sortp) {
810   int divno, i;
811   Interval_T interval;
812   char *divstring;
813   char *labelptr, *annotptr, c;
814   int *matches, nmatches, index;
815   char *label, *annotation, *restofheader;
816   bool allocp;
817 
818   if (sortp == false) {
819     labelptr = this->labels;
820     annotptr = this->annotations;
821   }
822 
823   for (divno = 0; divno < this->ndivs; divno++) {
824     divstring = IIT_divstring(this,divno);
825 
826     if (sortp == true) {
827       if (this->nintervals[divno] > 0) {
828 	matches = IIT_get(&nmatches,this,divstring,/*x*/0,/*y*/-1U,/*sortp*/true);
829 	for (i = 0; i < nmatches; i++) {
830 	  index = matches[i];
831 	  label = IIT_label(this,index,&allocp);
832 	  printf(">%s",label);
833 	  if (allocp == true) {
834 	    FREE(label);
835 	  }
836 
837 	  interval = IIT_interval(this,index);
838 	  if (Interval_low(interval) == 0 && Interval_high(interval) == 0) {
839 	    /* No interval */
840 	    printf("\n");
841 	    annotation = IIT_annotation(&restofheader,this,index,&allocp);
842 	    printf("%s",annotation);
843 	    if (allocp == true) {
844 	      FREE(restofheader);
845 	    }
846 
847 	  } else {
848 	    if (divno > 0) {
849 	      /* zeroth divno has empty string */
850 	      printf(" %s:",divstring);
851 	    }
852 
853 	    if (Interval_sign(interval) < 0) {
854 	      printf("%u..%u",Interval_high(interval),Interval_low(interval));
855 	    } else {
856 	      printf("%u..%u",Interval_low(interval),Interval_high(interval));
857 	    }
858 	    if (Interval_type(interval) > 0) {
859 	      printf(" %s",IIT_typestring(this,Interval_type(interval)));
860 	    }
861 
862 	    annotation = IIT_annotation(&restofheader,this,index,&allocp);
863 	    printf("%s\n",restofheader);
864 	    printf("%s",annotation);
865 	    if (allocp == true) {
866 	      FREE(restofheader);
867 	    }
868 	  }
869 	}
870 
871 	FREE(matches);
872       }
873 
874     } else {
875       for (i = 0; i < this->nintervals[divno]; i++) {
876 	printf(">");
877 	while ((c = *labelptr++) != '\0') {
878 	  printf("%c",c);
879 	}
880 	printf(" ");
881 
882 	interval = &(this->intervals[divno][i]);
883 	if (divno <= 0) {
884 	  /* zeroth divno has empty string */
885 	} else if (Interval_low(interval) == 0 && Interval_high(interval) == 0) {
886 	  /* Ignore divstring */
887 	} else {
888 	  printf("%s:",divstring);
889 	}
890 
891 	if (Interval_low(interval) == 0 && Interval_high(interval) == 0) {
892 	  /* Ignore interval and type */
893 	} else {
894 	  if (Interval_sign(interval) < 0) {
895 	    printf("%u..%u",Interval_high(interval),Interval_low(interval));
896 	  } else {
897 	    printf("%u..%u",Interval_low(interval),Interval_high(interval));
898 	  }
899 	  if (Interval_type(interval) > 0) {
900 	    printf(" %s",IIT_typestring(this,Interval_type(interval)));
901 	  }
902 	}
903 
904 	if (this->version <= 4) {
905 	  printf("\n");
906 	  while ((c = *annotptr++) != '\0') {
907 	    printf("%c",c);
908 	  }
909 	} else {
910 	  /* Versions 5 and higher include rest of header with
911 	     annotation.  Don't print initial '\n', unless annotation is empty */
912 	  if (*annotptr == '\0') {
913 	    printf("\n");
914 	    annotptr++;
915 	  } else if (*annotptr == '\n') {
916 	    /* No rest of header */
917 	    while ((c = *annotptr++) != '\0') {
918 	      printf("%c",c);
919 	    }
920 	  } else {
921 	    printf(" ");
922 	    while ((c = *annotptr++) != '\0') {
923 	      printf("%c",c);
924 	    }
925 	  }
926 	}
927       }
928     }
929   }
930 
931   return;
932 }
933 
934 
935 /* For chromosome.iit file, which is stored in version 1 */
936 void
IIT_dump_simple(T this)937 IIT_dump_simple (T this) {
938   int index = 0, i;
939   Interval_T interval;
940   Chrpos_T startpos, endpos;
941   char *label;
942   bool allocp;
943 
944   for (i = 0; i < this->nintervals[0]; i++) {
945     interval = &(this->intervals[0][i]);
946     label = IIT_label(this,index+1,&allocp);
947     printf("%s\t",label);
948     if (allocp == true) {
949       FREE(label);
950     }
951     startpos = Interval_low(interval);
952     endpos = startpos + Interval_length(interval) - 1U;
953 
954     printf("%u..%u\t",startpos+1U,endpos+1U);
955 
956     printf("%u",Interval_length(interval));
957     if (Interval_type(interval) > 0) {
958       printf("\t%s",IIT_typestring(this,Interval_type(interval)));
959     }
960     printf("\n");
961 
962     index++;
963   }
964 
965   return;
966 }
967 
968 
969 #if 0
970 /* For higher version files, which are divided into divs */
971 void
972 IIT_dump_formatted (T this, bool directionalp) {
973   int divno, index = 0, i;
974   Interval_T interval;
975   Chrpos_T startpos, endpos;
976   char *label, *divstring, firstchar;
977   bool allocp;
978 
979   for (divno = 0; divno < this->ndivs; divno++) {
980     divstring = IIT_divstring(this,divno);
981     for (i = 0; i < this->nintervals[divno]; i++) {
982       interval = &(this->intervals[divno][i]);
983       label = IIT_label(this,index+1,&allocp);
984       printf("%s\t",label);
985       if (allocp == true) {
986 	FREE(label);
987       }
988       startpos = Interval_low(interval);
989       endpos = startpos + Interval_length(interval) - 1U;
990 
991       if (divno > 0) {
992 	printf("%s:",divstring);
993       }
994       if (directionalp == false) {
995 	printf("%u..%u\t",startpos+1U,endpos+1U);
996       } else if (this->version <= 1) {
997 	firstchar = IIT_annotation_firstchar(this,index+1);
998 	if (firstchar == '-') {
999 	  printf("%u..%u\t",endpos+1U,startpos+1U);
1000 	} else {
1001 	  printf("%u..%u\t",startpos+1U,endpos+1U);
1002 	}
1003       } else {
1004 	if (Interval_sign(interval) < 0) {
1005 	  printf("%u..%u\t",endpos+1U,startpos+1U);
1006 	} else {
1007 	  printf("%u..%u\t",startpos+1U,endpos+1U);
1008 	}
1009       }
1010 
1011       printf("%u",Interval_length(interval));
1012       if (Interval_type(interval) > 0) {
1013 	printf("\t%s",IIT_typestring(this,Interval_type(interval)));
1014       }
1015       printf("\n");
1016 
1017       index++;
1018     }
1019   }
1020 
1021   return;
1022 }
1023 #endif
1024 
1025 
1026 #if 0
1027 static int
1028 uint_cmp (const void *x, const void *y) {
1029   unsigned int a = * (unsigned int *) x;
1030   unsigned int b = * (unsigned int *) y;
1031 
1032   if (a < b) {
1033     return -1;
1034   } else if (a > b) {
1035     return +1;
1036   } else {
1037     return 0;
1038   }
1039 }
1040 
1041 /* Need to work on */
1042 UINT4 *
1043 IIT_transitions (int **signs, int *nedges, T this) {
1044   UINT4 *edges, *starts, *ends;
1045   int nintervals, i, j, k;
1046   Interval_T interval;
1047   Uintlist_T startlist = NULL, endlist = NULL;
1048 
1049   for (i = 0; i < this->nintervals; i++) {
1050     interval = &(this->intervals[i]);
1051     startlist = Uintlist_push(startlist,Interval_low(interval));
1052     endlist = Uintlist_push(endlist,Interval_high(interval));
1053   }
1054 
1055   if (Uintlist_length(startlist) == 0) {
1056     edges = (unsigned int *) NULL;
1057     *signs = (int *) NULL;
1058     *nedges = 0;
1059   } else {
1060     starts = Uintlist_to_array(&nintervals,startlist);
1061     ends = Uintlist_to_array(&nintervals,endlist);
1062     qsort(starts,nintervals,sizeof(unsigned int),uint_cmp);
1063     qsort(ends,nintervals,sizeof(unsigned int),uint_cmp);
1064 
1065     *nedges = nintervals+nintervals;
1066     *signs = (int *) CALLOC(*nedges,sizeof(int));
1067     edges = (unsigned int *) CALLOC(*nedges,sizeof(unsigned int));
1068     i = j = k = 0;
1069     while (i < nintervals && j < nintervals) {
1070       if (starts[i] <= ends[j]) {
1071 	(*signs)[k] = +1;
1072 	edges[k++] = starts[i++];
1073       } else {
1074 	(*signs)[k] = -1;
1075 	edges[k++] = ends[j++];
1076       }
1077     }
1078     while (i < nintervals) {
1079       (*signs)[k] = +1;
1080       edges[k++] = starts[i++];
1081     }
1082     while (j < nintervals) {
1083       (*signs)[k] = -1;
1084       edges[k++] = ends[j++];
1085     }
1086 
1087     FREE(ends);
1088     FREE(starts);
1089   }
1090 
1091   Uintlist_free(&endlist);
1092   Uintlist_free(&startlist);
1093 
1094   return edges;
1095 }
1096 
1097 UINT4 *
1098 IIT_transitions_subset (int **signs, int *nedges, T this, int *indices, int nindices) {
1099   UINT4 *edges, *starts, *ends;
1100   int nintervals, i, j, k;
1101   Interval_T interval;
1102   Uintlist_T startlist = NULL, endlist = NULL;
1103 
1104   for (k = 0; k < nindices; k++) {
1105     i = indices[k] - 1;
1106     interval = &(this->intervals[i]);
1107     startlist = Uintlist_push(startlist,Interval_low(interval));
1108     endlist = Uintlist_push(endlist,Interval_high(interval));
1109   }
1110 
1111   if (Uintlist_length(startlist) == 0) {
1112     edges = (unsigned int *) NULL;
1113     *signs = (int *) NULL;
1114     *nedges = 0;
1115   } else {
1116     starts = Uintlist_to_array(&nintervals,startlist);
1117     ends = Uintlist_to_array(&nintervals,endlist);
1118     qsort(starts,nintervals,sizeof(unsigned int),uint_cmp);
1119     qsort(ends,nintervals,sizeof(unsigned int),uint_cmp);
1120 
1121     *nedges = nintervals+nintervals;
1122     *signs = (int *) CALLOC(*nedges,sizeof(int));
1123     edges = (unsigned int *) CALLOC(*nedges,sizeof(unsigned int));
1124     i = j = k = 0;
1125     while (i < nintervals && j < nintervals) {
1126       if (starts[i] <= ends[j]) {
1127 	(*signs)[k] = +1;
1128 	edges[k++] = starts[i++];
1129       } else {
1130 	(*signs)[k] = -1;
1131 	edges[k++] = ends[j++];
1132       }
1133     }
1134     while (i < nintervals) {
1135       (*signs)[k] = +1;
1136       edges[k++] = starts[i++];
1137     }
1138     while (j < nintervals) {
1139       (*signs)[k] = -1;
1140       edges[k++] = ends[j++];
1141     }
1142 
1143     FREE(ends);
1144     FREE(starts);
1145   }
1146 
1147   Uintlist_free(&endlist);
1148   Uintlist_free(&startlist);
1149 
1150   return edges;
1151 }
1152 #endif
1153 
1154 
1155 /* For IIT versions <= 2.  Previously sorted by Chrom_compare, but now
1156    we assume that chromosomes are represented by divs, which are
1157    pre-sorted by iit_store. */
1158 #if 0
1159 static int
1160 string_compare (const void *x, const void *y) {
1161   char *a = (char *) x;
1162   char *b = (char *) y;
1163 
1164   return strcmp(a,b);
1165 }
1166 
1167 static int *
1168 sort_matches_by_type (T this, int *matches, int nmatches, bool alphabetizep) {
1169   int *sorted;
1170   int type, index, i, j, k = 0, t;
1171   List_T *intervallists;
1172   Interval_T *intervals, interval;
1173   int *matches1, nmatches1, nintervals;
1174   char *typestring;
1175   char **strings;
1176 
1177   if (nmatches == 0) {
1178     return (int *) NULL;
1179   } else {
1180     sorted = (int *) CALLOC(nmatches,sizeof(int));
1181   }
1182 
1183   intervallists = (List_T *) CALLOC(this->ntypes,sizeof(List_T));
1184   for (i = 0; i < nmatches; i++) {
1185     index = matches[i];
1186     interval = &(this->intervals[0][index-1]);
1187     type = Interval_type(interval);
1188     intervallists[type] = List_push(intervallists[type],(void *) interval);
1189   }
1190 
1191   if (alphabetizep == true) {
1192     strings = (char **) CALLOC(this->ntypes,sizeof(char *));
1193 
1194     for (type = 0; type < this->ntypes; type++) {
1195       typestring = IIT_typestring(this,type);
1196       strings[type] = (char *) CALLOC(strlen(typestring)+1,sizeof(char));
1197       strcpy(strings[type],typestring);
1198     }
1199     qsort(strings,this->ntypes,sizeof(char *),string_compare);
1200   }
1201 
1202   for (t = 0; t < this->ntypes; t++) {
1203     if (alphabetizep == false) {
1204       type = t;
1205       typestring = IIT_typestring(this,type);
1206     } else {
1207       typestring = strings[t];
1208       type = IIT_typeint(this,typestring);
1209     }
1210 
1211     if ((nintervals = List_length(intervallists[type])) > 0) {
1212       intervals = (Interval_T *) List_to_array(intervallists[type],/*end*/NULL);
1213       qsort(intervals,nintervals,sizeof(Interval_T),Interval_cmp);
1214 
1215       i = 0;
1216       while (i < nintervals) {
1217 	interval = intervals[i];
1218 	matches1 = IIT_get_exact_multiple(&nmatches1,this,/*divstring*/NULL,Interval_low(interval),Interval_high(interval),type);
1219 	if (matches1 != NULL) {
1220 	  for (j = 0; j < nmatches1; j++) {
1221 	    sorted[k++] = matches1[j];
1222 	  }
1223 	  i += nmatches1;
1224 	  FREE(matches1);
1225 	}
1226       }
1227 
1228       FREE(intervals);
1229       List_free(&(intervallists[type]));
1230     }
1231 
1232   }
1233 
1234   if (alphabetizep == true) {
1235     for (t = 0; t < this->ntypes; t++) {
1236       FREE(strings[t]);
1237     }
1238     FREE(strings);
1239   }
1240 
1241   FREE(intervallists);
1242   return sorted;
1243 }
1244 #endif
1245 
1246 
1247 /* For IIT versions >= 3.  Assumes that matches are all in the same
1248    div */
1249 static int *
sort_matches_by_position(T this,int * matches,int nmatches)1250 sort_matches_by_position (T this, int *matches, int nmatches) {
1251   int *sorted, index, i;
1252   struct Interval_windex_T *intervals;
1253 
1254   if (nmatches == 0) {
1255     return (int *) NULL;
1256   } else {
1257     intervals = (struct Interval_windex_T *) CALLOC(nmatches,sizeof(struct Interval_windex_T));
1258     for (i = 0; i < nmatches; i++) {
1259       index = intervals[i].index = matches[i];
1260       intervals[i].interval = &(this->intervals[0][index-1]); /* Ignore divno here, because we have offset index */
1261     }
1262     qsort(intervals,nmatches,sizeof(struct Interval_windex_T),Interval_windex_cmp);
1263 
1264     sorted = (int *) CALLOC(nmatches,sizeof(int));
1265     for (i = 0; i < nmatches; i++) {
1266       sorted[i] = intervals[i].index;
1267     }
1268 
1269     FREE(intervals);
1270     return sorted;
1271   }
1272 }
1273 
1274 
1275 
1276 
1277 #if 0
1278 /* Need to work on */
1279 void
1280 IIT_dump_counts (T this, bool alphabetizep) {
1281   int type, divno, index, i, j, k, t;
1282   Interval_T interval;
1283   Uintlist_T *startlists, *endlists;
1284   int *matches, nmatches, nintervals;
1285   unsigned int *starts, *ends, edge;
1286   char *typestring;
1287   Chrom_T *chroms;
1288 
1289   startlists = (Uintlist_T *) CALLOC(this->ntypes,sizeof(Uintlist_T));
1290   endlists = (Uintlist_T *) CALLOC(this->ntypes,sizeof(Uintlist_T));
1291   for (i = 0; i < this->nintervals; i++) {
1292     interval = &(this->intervals[i]);
1293     type = Interval_type(interval);
1294     startlists[type] = Uintlist_push(startlists[type],Interval_low(interval));
1295     endlists[type] = Uintlist_push(endlists[type],Interval_high(interval));
1296   }
1297 
1298   if (alphabetizep == true) {
1299     chroms = (Chrom_T *) CALLOC(this->ntypes,sizeof(Chrom_T));
1300 
1301     for (type = 0; type < this->ntypes; type++) {
1302       typestring = IIT_typestring(this,type);
1303       chroms[type] = Chrom_from_string(typestring,/*mitochondrial_string*/NULL,/*order*/0U,/*circularp*/false,
1304 				       /*alt_scaffold_start*/0,/*alt_scaffold_end*/0);
1305     }
1306     qsort(chroms,this->ntypes,sizeof(Chrom_T),Chrom_compare);
1307   }
1308 
1309   for (t = 0; t < this->ntypes; t++) {
1310     if (alphabetizep == false) {
1311       type = t;
1312       typestring = IIT_typestring(this,type);
1313     } else {
1314       typestring = Chrom_string(chroms[t]); /* Not allocated; do not free */
1315       type = IIT_typeint(this,typestring);
1316     }
1317 
1318     if (Uintlist_length(startlists[type]) > 0) {
1319       starts = Uintlist_to_array(&nintervals,startlists[type]);
1320       ends = Uintlist_to_array(&nintervals,endlists[type]);
1321       qsort(starts,nintervals,sizeof(unsigned int),uint_cmp);
1322       qsort(ends,nintervals,sizeof(unsigned int),uint_cmp);
1323 
1324       i = j = 0;
1325       while (i < nintervals || j < nintervals) {
1326 	if (i >= nintervals && j >= nintervals) {
1327 	  /* done */
1328 	  matches = (int *) NULL;
1329 	} else if (i >= nintervals) {
1330 	  /* work on remaining ends */
1331 	  edge = ends[j++];
1332 	  matches = IIT_get_typed(&nmatches,this,edge,edge,type,/*sortp*/false);
1333 	  printf("%s\t%u\tend\t%d",typestring,edge,nmatches);
1334 	  while (j < nintervals && ends[j] == edge) {
1335 	    j++;
1336 	  }
1337 	} else if (j >= nintervals) {
1338 	  /* work on remaining starts */
1339 	  edge = starts[i++];
1340 	  matches = IIT_get_typed(&nmatches,this,edge,edge,type,/*sortp*/false);
1341 	  printf("%s\t%u\tstart\t%d",typestring,edge,nmatches);
1342 	  while (i < nintervals && starts[i] == edge) {
1343 	    i++;
1344 	  }
1345 	} else if (starts[i] <= ends[j]) {
1346 	  edge = starts[i++];
1347 	  matches = IIT_get_typed(&nmatches,this,edge,edge,type,/*sortp*/false);
1348 	  printf("%s\t%u\tstart\t%d",typestring,edge,nmatches);
1349 	  while (i < nintervals && starts[i] == edge) {
1350 	    i++;
1351 	  }
1352 	} else {
1353 	  edge = ends[j++];
1354 	  matches = IIT_get_typed(&nmatches,this,edge,edge,type,/*sortp*/false);
1355 	  printf("%s\t%u\tend\t%d",typestring,edge,nmatches);
1356 	  while (j < nintervals && ends[j] == edge) {
1357 	    j++;
1358 	  }
1359 	}
1360 
1361 	if (matches != NULL) {
1362 	  index = matches[0];
1363 	  label = IIT_label(this,index,&allocp);
1364 	  printf("\t%s",label);
1365 	  if (allocp == true) {
1366 	    FREE(label);
1367 	  }
1368 
1369 	  for (k = 1; k < nmatches; k++) {
1370 	    index = matches[k];
1371 	    label = IIT_label(this,index,&allocp);
1372 	    printf(",%s",label);
1373 	    if (allocp == true) {
1374 	      FREE(label);
1375 	    }
1376 	  }
1377 	  printf("\n");
1378 	  FREE(matches);
1379 	}
1380       }
1381 
1382       Uintlist_free(&(endlists[type]));
1383       Uintlist_free(&(startlists[type]));
1384       FREE(ends);
1385       FREE(starts);
1386     }
1387 
1388   }
1389 
1390   if (alphabetizep == true) {
1391     for (t = 0; t < this->ntypes; t++) {
1392       Chrom_free(&(chroms[t]));
1393     }
1394     FREE(chroms);
1395   }
1396 
1397   FREE(endlists);
1398   FREE(startlists);
1399 
1400   return;
1401 }
1402 #endif
1403 
1404 
1405 /************************************************************************
1406  * For file format, see iit-write.c
1407  ************************************************************************/
1408 
1409 void
IIT_free(T * old)1410 IIT_free (T *old) {
1411   int divno;
1412 
1413   if (*old != NULL) {
1414     if ((*old)->name != NULL) {
1415       FREE((*old)->name);
1416     }
1417 
1418     if ((*old)->access == LOADED) {
1419       /* No need to munmap or free words */
1420 
1421     } else if ((*old)->access == MMAPPED) {
1422 #ifdef HAVE_MMAP
1423       munmap((void *) (*old)->annot_mmap,(*old)->annot_length);
1424       munmap((void *) (*old)->annotpointers_mmap,(*old)->annotpointers_length);
1425       munmap((void *) (*old)->label_mmap,(*old)->label_length);
1426       munmap((void *) (*old)->labelpointers_mmap,(*old)->labelpointers_length);
1427       munmap((void *) (*old)->labelorder_mmap,(*old)->labelorder_length);
1428       if ((*old)->valuep == true) {
1429 	munmap((void *) (*old)->value_mmap,(*old)->value_length);
1430 	munmap((void *) (*old)->valueorder_mmap,(*old)->valueorder_length);
1431       }
1432 #endif
1433       close((*old)->fd);
1434 
1435     } else if ((*old)->access == FILEIO) {
1436       FREE((*old)->annotations);
1437 #ifdef HAVE_64_BIT
1438       if ((*old)->annot_pointers_8p == true) {
1439 	FREE((*old)->annotpointers8);
1440       } else {
1441 	FREE((*old)->annotpointers);
1442       }
1443 #else
1444       FREE((*old)->annotpointers);
1445 #endif
1446       FREE((*old)->labels);
1447 #ifdef HAVE_64_BIT
1448       if ((*old)->label_pointers_8p == true) {
1449 	FREE((*old)->labelpointers8);
1450       } else {
1451 	FREE((*old)->labelpointers);
1452       }
1453 #else
1454       FREE((*old)->labelpointers);
1455 #endif
1456       FREE((*old)->labelorder);
1457       /* close((*old)->fd); -- closed in read_annotations */
1458 
1459       if ((*old)->valuep == true) {
1460 	FREE((*old)->values);
1461 	FREE((*old)->valueorder);
1462       }
1463 
1464     } else if ((*old)->access == ALLOCATED_PRIVATE) {
1465       /* Nothing to close.  IIT must have been created by IIT_new. */
1466 
1467     } else if ((*old)->access == ALLOCATED_SHARED) {
1468       /* Nothing to close.  IIT must have been created by IIT_new. */
1469 
1470     } else {
1471       abort();
1472     }
1473 
1474     if ((*old)->access == LOADED) {
1475       FREE((*old)->intervals);
1476       FREE((*old)->nodes);
1477       FREE((*old)->omegas);
1478       FREE((*old)->sigmas);
1479       if ((*old)->alphas != NULL) {
1480 	FREE((*old)->betas);
1481 	FREE((*old)->alphas);
1482       }
1483 
1484     } else {
1485       if ((*old)->fieldstrings != NULL) {
1486 	FREE((*old)->fieldstrings);
1487       }
1488       FREE((*old)->fieldpointers);
1489       FREE((*old)->typestrings);
1490       FREE((*old)->typepointers);
1491 
1492       FREE((*old)->intervals[0]);
1493       FREE((*old)->intervals);
1494 
1495       for (divno = 0; divno < (*old)->ndivs; divno++) {
1496 	/* Note: we are depending on Mem_free() to check that these are non-NULL */
1497 	FREE((*old)->nodes[divno]);
1498 	FREE((*old)->omegas[divno]);
1499 	FREE((*old)->sigmas[divno]);
1500 	if ((*old)->alphas != NULL) {
1501 	  FREE((*old)->betas[divno]);
1502 	  FREE((*old)->alphas[divno]);
1503 	}
1504       }
1505 
1506       FREE((*old)->nodes);
1507       FREE((*old)->omegas);
1508       FREE((*old)->sigmas);
1509       if ((*old)->alphas != NULL) {
1510 	FREE((*old)->betas);
1511 	FREE((*old)->alphas);
1512       }
1513 
1514       FREE((*old)->divstrings);
1515       FREE((*old)->divpointers);
1516       FREE((*old)->cum_nnodes);
1517       FREE((*old)->nnodes);
1518       FREE((*old)->cum_nintervals);
1519       FREE((*old)->nintervals);
1520     }
1521 
1522     FREE(*old);
1523 
1524   }
1525 
1526   return;
1527 }
1528 
1529 
1530 
1531 static void
move_relative(FILE * fp,off_t offset)1532 move_relative (FILE *fp, off_t offset) {
1533 
1534 #ifdef HAVE_FSEEKO
1535   if (fseeko(fp,offset,SEEK_CUR) < 0) {
1536     fprintf(stderr,"Error in move_relative, seek\n");
1537     abort();
1538   }
1539 #else
1540   if (fseek(fp,(long) offset,SEEK_CUR) < 0) {
1541     fprintf(stderr,"Error in move_relative, seek\n");
1542     abort();
1543   }
1544 #endif
1545 
1546   return;
1547 }
1548 
1549 
1550 static size_t
skip_trees(size_t offset,size_t filesize,FILE * fp,char * filename,int skip_ndivs,int skip_nintervals,int skip_nnodes)1551 skip_trees (size_t offset, size_t filesize, FILE *fp, char *filename,
1552 	    int skip_ndivs, int skip_nintervals, int skip_nnodes) {
1553 
1554   size_t skipsize;
1555 
1556   /* 4 is for alphas, betas, sigmas, and omegas */
1557   skipsize = (skip_nintervals + skip_ndivs) * 4 * sizeof(int);
1558   skipsize += skip_nnodes * sizeof(struct FNode_T);
1559 
1560   if ((offset += skipsize) > filesize) {
1561     fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after skip_trees %zu, filesize %zu).  Did you generate it using iit_store?\n",
1562 	    filename,offset,filesize);
1563     exit(9);
1564   } else {
1565     move_relative(fp,skipsize);
1566   }
1567 
1568   return offset;
1569 }
1570 
1571 
1572 
1573 static char *
load_tree(char * memory,T new,int divno)1574 load_tree (char *memory, T new, int divno) {
1575 #ifdef DEBUG
1576   int i;
1577 #endif
1578 
1579   if (new->version < 2) {
1580 #if 0
1581     /* Computing only if needed */
1582     compute_flanking(new);
1583 #else
1584     new->alphas[divno] = new->betas[divno] = (int *) NULL;
1585 #endif
1586 
1587   } else {
1588     new->alphas[divno] = (int *) memory;
1589     memory += (new->nintervals[divno]+1) * sizeof(int);
1590 
1591     new->betas[divno] = (int *) memory;
1592     memory += (new->nintervals[divno]+1) * sizeof(int);
1593   }
1594 
1595   new->sigmas[divno] = (int *) memory;
1596   memory += (new->nintervals[divno]+1) * sizeof(int);
1597 
1598   new->omegas[divno] = (int *) memory;
1599   memory += (new->nintervals[divno]+1) * sizeof(int);
1600 
1601   if (new->nnodes[divno] == 0) {
1602     new->nodes[divno] = (struct FNode_T *) NULL;
1603   } else {
1604 #ifdef WORDS_BIGENDIAN
1605     /* Not supported */
1606     abort();
1607 #if 0
1608     new->nodes[divno] = (struct FNode_T *) CALLOC(new->nnodes[divno],sizeof(struct FNode_T));
1609     for (i = 0; i < new->nnodes[divno]; i++) {
1610       Bigendian_fread_uint(&(new->nodes[divno][i].value),fp);
1611       Bigendian_fread_int(&(new->nodes[divno][i].a),fp);
1612       Bigendian_fread_int(&(new->nodes[divno][i].b),fp);
1613       Bigendian_fread_int(&(new->nodes[divno][i].leftindex),fp);
1614       Bigendian_fread_int(&(new->nodes[divno][i].rightindex),fp);
1615     }
1616 #endif
1617 
1618 #else
1619     if (sizeof(struct FNode_T) == sizeof(unsigned int)+sizeof(int)+sizeof(int)+sizeof(int)+sizeof(int)) {
1620       new->nodes[divno] = (struct FNode_T *) memory;
1621       memory += new->nnodes[divno] * sizeof(struct FNode_T);
1622     } else {
1623       /* Not supported */
1624       abort();
1625 #if 0
1626       for (i = 0; i < new->nnodes[divno]; i++) {
1627 	fread(&(new->nodes[divno][i].value),sizeof(unsigned int),1,fp);
1628 	fread(&(new->nodes[divno][i].a),sizeof(int),1,fp);
1629 	fread(&(new->nodes[divno][i].b),sizeof(int),1,fp);
1630 	fread(&(new->nodes[divno][i].leftindex),sizeof(int),1,fp);
1631 	fread(&(new->nodes[divno][i].rightindex),sizeof(int),1,fp);
1632       }
1633 #endif
1634     }
1635 #endif
1636 
1637     debug(
1638 	  for (i = 0; i < new->nnodes[divno]; i++) {
1639 	    printf("Read node %d %d %d\n",new->nodes[divno][i].value,new->nodes[divno][i].a,new->nodes[divno][i].b);
1640 	  }
1641 	  );
1642   }
1643   debug(printf("\n"));
1644 
1645   return memory;
1646 }
1647 
1648 
1649 
1650 static size_t
read_tree(size_t offset,size_t filesize,FILE * fp,char * filename,T new,int divno)1651 read_tree (size_t offset, size_t filesize, FILE *fp, char *filename, T new, int divno) {
1652   size_t items_read;
1653   int i;
1654 
1655   if (new->version < 2) {
1656 #if 0
1657     /* Computing only if needed */
1658     compute_flanking(new);
1659 #else
1660     new->alphas[divno] = new->betas[divno] = (int *) NULL;
1661 #endif
1662 
1663   } else {
1664     if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
1665       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after alphas %zu, filesize %zu).  Did you generate it using iit_store?\n",
1666 	      filename,offset,filesize);
1667       exit(9);
1668     } else {
1669       new->alphas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
1670       if ((items_read = FREAD_INTS(new->alphas[divno],new->nintervals[divno]+1,fp)) != (unsigned int) new->nintervals[divno] + 1) {
1671 	fprintf(stderr,"IIT file %s appears to be truncated.  items_read = %zu\n",
1672 		filename,items_read);
1673 	exit(9);
1674       }
1675     }
1676 
1677     if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
1678       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after betas %zu, filesize %zu).  Did you generate it using iit_store?\n",
1679 	      filename,offset,filesize);
1680       exit(9);
1681     } else {
1682       new->betas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
1683       if ((items_read = FREAD_INTS(new->betas[divno],new->nintervals[divno]+1,fp)) != (unsigned int) new->nintervals[divno] + 1) {
1684 	fprintf(stderr,"IIT file %s appears to be truncated.  items_read = %zu\n",filename,items_read);
1685 	exit(9);
1686       }
1687 #if 0
1688       debug(
1689 	    printf("betas[%d]:",divno);
1690 	    for (i = 0; i < new->nintervals[divno]+1; i++) {
1691 	      printf(" %d",new->betas[divno][i]);
1692 	    }
1693 	    printf("\n");
1694 	    );
1695 #endif
1696     }
1697   }
1698 
1699   if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
1700     fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after sigmas %zu, filesize %zu).  Did you generate it using iit_store?\n",
1701 	    filename,offset,filesize);
1702     exit(9);
1703   } else {
1704     new->sigmas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
1705     if ((items_read = FREAD_INTS(new->sigmas[divno],new->nintervals[divno]+1,fp)) != (unsigned int) new->nintervals[divno] + 1) {
1706       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
1707       exit(9);
1708     }
1709 #if 0
1710     debug(
1711 	  printf("sigmas[%d]:",divno);
1712 	  for (i = 0; i < new->nintervals[divno]+1; i++) {
1713 	    printf(" %d",new->sigmas[divno][i]);
1714 	  }
1715 	  printf("\n");
1716 	  );
1717 #endif
1718   }
1719 
1720   if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
1721     fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after omegas %zu, filesize %zu).  Did you generate it using iit_store?\n",
1722 	    filename,offset,filesize);
1723     exit(9);
1724   } else {
1725     new->omegas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
1726     if ((items_read = FREAD_INTS(new->omegas[divno],new->nintervals[divno]+1,fp)) != (unsigned int) new->nintervals[divno] + 1) {
1727       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
1728       exit(9);
1729     }
1730 #if 0
1731     debug(
1732 	  printf("omegas[%d]:",divno);
1733 	  for (i = 0; i < new->nintervals[divno]+1; i++) {
1734 	    printf(" %d",new->omegas[divno][i]);
1735 	  }
1736 	  printf("\n");
1737 	  );
1738 #endif
1739   }
1740 
1741   debug(printf("nnodes[%d]: %d\n",divno,new->nnodes[divno]));
1742   if (new->nnodes[divno] == 0) {
1743     new->nodes[divno] = (struct FNode_T *) NULL;
1744   } else {
1745     new->nodes[divno] = (struct FNode_T *) CALLOC(new->nnodes[divno],sizeof(struct FNode_T));
1746 #ifdef WORDS_BIGENDIAN
1747     for (i = 0; i < new->nnodes[divno]; i++) {
1748       Bigendian_fread_uint(&(new->nodes[divno][i].value),fp);
1749       Bigendian_fread_int(&(new->nodes[divno][i].a),fp);
1750       Bigendian_fread_int(&(new->nodes[divno][i].b),fp);
1751       Bigendian_fread_int(&(new->nodes[divno][i].leftindex),fp);
1752       Bigendian_fread_int(&(new->nodes[divno][i].rightindex),fp);
1753     }
1754     offset += (sizeof(unsigned int)+sizeof(int)+sizeof(int)+sizeof(int)+sizeof(int))*new->nnodes[divno];
1755 #else
1756     if (sizeof(struct FNode_T) == sizeof(unsigned int)+sizeof(int)+sizeof(int)+sizeof(int)+sizeof(int)) {
1757       offset += sizeof(struct FNode_T)*fread(new->nodes[divno],sizeof(struct FNode_T),new->nnodes[divno],fp);
1758     } else {
1759       for (i = 0; i < new->nnodes[divno]; i++) {
1760 	fread(&(new->nodes[divno][i].value),sizeof(unsigned int),1,fp);
1761 	fread(&(new->nodes[divno][i].a),sizeof(int),1,fp);
1762 	fread(&(new->nodes[divno][i].b),sizeof(int),1,fp);
1763 	fread(&(new->nodes[divno][i].leftindex),sizeof(int),1,fp);
1764 	fread(&(new->nodes[divno][i].rightindex),sizeof(int),1,fp);
1765       }
1766       offset += (sizeof(unsigned int)+sizeof(int)+sizeof(int)+sizeof(int)+sizeof(int))*new->nnodes[divno];
1767     }
1768 #endif
1769     if (offset > filesize) {
1770       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nodes %zu, filesize %zu).  Did you generate it using iit_store?\n",
1771 	      filename,offset,filesize);
1772       exit(9);
1773     }
1774 
1775 #if 1
1776     debug(
1777 	  for (i = 0; i < new->nnodes[divno]; i++) {
1778 	    printf("Read node %d %d %d\n",new->nodes[divno][i].value,new->nodes[divno][i].a,new->nodes[divno][i].b);
1779 	  }
1780 	  );
1781 #endif
1782 
1783   }
1784   debug(printf("\n"));
1785 
1786   return offset;
1787 }
1788 
1789 
1790 static size_t
skip_intervals(int * skip_nintervals,size_t offset,size_t filesize,FILE * fp,char * filename,T new,int divstart,int divend)1791 skip_intervals (int *skip_nintervals, size_t offset, size_t filesize, FILE *fp, char *filename, T new,
1792 		int divstart, int divend) {
1793   int divno;
1794   size_t skipsize = 0;
1795 
1796   *skip_nintervals = 0;
1797   for (divno = divstart; divno <= divend; divno++) {
1798     *skip_nintervals += new->nintervals[divno];
1799   }
1800   if (new->version >= 2) {
1801     skipsize += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int))*(*skip_nintervals);
1802   } else {
1803     skipsize += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int))*(*skip_nintervals);
1804   }
1805 
1806   if ((offset += skipsize) > filesize) {
1807     fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after skip_intervals %zu, filesize %zu).  Did you generate it using iit_store?\n",
1808 	    filename,offset,filesize);
1809     exit(9);
1810   } else {
1811     move_relative(fp,skipsize);
1812   }
1813 
1814   return offset;
1815 }
1816 
1817 
1818 static char *
load_intervals(char * memory,T new,int divno)1819 load_intervals (char *memory, T new, int divno) {
1820 
1821 #ifdef WORDS_BIGENDIAN
1822   /* Not supported */
1823   abort();
1824 #if 0
1825   for (i = 0; i < new->nintervals[divno]; i++) {
1826     Bigendian_fread_uint(&(new->intervals[divno][i].low),fp);
1827     Bigendian_fread_uint(&(new->intervals[divno][i].high),fp);
1828     if (new->version >= 2) {
1829       Bigendian_fread_int(&(new->intervals[divno][i].sign),fp);
1830     } else {
1831       new->intervals[divno][i].sign = +1;
1832     }
1833     Bigendian_fread_int(&(new->intervals[divno][i].type),fp);
1834   }
1835   if (new->version >= 2) {
1836     offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int))*new->nintervals[divno];
1837   } else {
1838     offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int))*new->nintervals[divno];
1839   }
1840 #endif
1841 
1842 #else
1843   if (new->version >= 2 && sizeof(struct Interval_T) == sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int)) {
1844     new->intervals[divno] = (struct Interval_T *) memory;
1845     memory += new->nintervals[divno] * sizeof(struct Interval_T);
1846 
1847   } else if (new->version <= 1 && sizeof(struct Interval_T) == sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)) {
1848     new->intervals[divno] = (struct Interval_T *) memory;
1849     memory += new->nintervals[divno] * sizeof(struct Interval_T);
1850 
1851   } else {
1852     /* Not supported */
1853     abort();
1854 
1855   }
1856 #endif
1857 
1858   return memory;
1859 }
1860 
1861 
1862 static size_t
read_intervals(size_t offset,size_t filesize,FILE * fp,char * filename,T new,int divno)1863 read_intervals (size_t offset, size_t filesize, FILE *fp, char *filename, T new, int divno) {
1864   int i;
1865 
1866 #ifdef WORDS_BIGENDIAN
1867   for (i = 0; i < new->nintervals[divno]; i++) {
1868     Bigendian_fread_uint(&(new->intervals[divno][i].low),fp);
1869     Bigendian_fread_uint(&(new->intervals[divno][i].high),fp);
1870     if (new->version >= 2) {
1871       Bigendian_fread_int(&(new->intervals[divno][i].sign),fp);
1872     } else {
1873       new->intervals[divno][i].sign = +1;
1874     }
1875     Bigendian_fread_int(&(new->intervals[divno][i].type),fp);
1876   }
1877   if (new->version >= 2) {
1878     offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int))*new->nintervals[divno];
1879   } else {
1880     offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int))*new->nintervals[divno];
1881   }
1882 #else
1883   if (new->version >= 2 && sizeof(struct Interval_T) == sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int)) {
1884     offset += sizeof(struct Interval_T)*fread(new->intervals[divno],sizeof(struct Interval_T),new->nintervals[divno],fp);
1885   } else if (new->version <= 1 && sizeof(struct Interval_T) == sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)) {
1886     offset += sizeof(struct Interval_T)*fread(new->intervals[divno],sizeof(struct Interval_T),new->nintervals[divno],fp);
1887   } else {
1888     for (i = 0; i < new->nintervals[divno]; i++) {
1889       fread(&(new->intervals[divno][i].low),sizeof(unsigned int),1,fp);
1890       fread(&(new->intervals[divno][i].high),sizeof(unsigned int),1,fp);
1891       if (new->version >= 2) {
1892 	fread(&(new->intervals[divno][i].sign),sizeof(int),1,fp);
1893       } else {
1894 	new->intervals[divno][i].sign = +1;
1895       }
1896       fread(&(new->intervals[divno][i].type),sizeof(int),1,fp);
1897     }
1898     if (new->version >= 2) {
1899       offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int))*new->nintervals[divno];
1900     } else {
1901       offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int))*new->nintervals[divno];
1902     }
1903   }
1904 #endif
1905   if (offset > filesize) {
1906     fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after intervals %zu, filesize %zu).  Did you generate it using iit_store?\n",
1907 	    filename,offset,filesize);
1908     exit(9);
1909   }
1910 
1911   return offset;
1912 }
1913 
1914 
1915 static char *
load_words(char * memory,T new)1916 load_words (char *memory, T new) {
1917   off_t stringlen;
1918 #ifdef DEBUG
1919   int i;
1920 #endif
1921 
1922   new->typepointers = (unsigned int *) memory;
1923   memory += (new->ntypes+1) * sizeof(unsigned int);
1924   debug(
1925 	printf("typepointers:");
1926 	for (i = 0; i < new->ntypes+1; i++) {
1927 	  printf(" %u",new->typepointers[i]);
1928 	}
1929 	printf("\n");
1930 	);
1931 
1932   /* Note: To keep ints aligned, would be better to make stringlen a
1933      multiple of 4, and put a terminating '\0' as needed */
1934   stringlen = new->typepointers[new->ntypes];
1935   if (stringlen == 0) {
1936     new->typestrings = (char *) NULL;
1937   } else {
1938     new->typestrings = (char *) memory;
1939     memory += stringlen * sizeof(char);
1940   }
1941   debug(
1942 	printf("typestrings:\n");
1943 	for (s = 0; s < stringlen; s++) {
1944 	  printf("%c",new->typestrings[s]);
1945 	}
1946 	printf("\n");
1947 	);
1948 
1949   if (new->version < 2) {
1950     new->fieldpointers = (unsigned int *) CALLOC(new->nfields+1,sizeof(unsigned int));
1951     new->fieldpointers[0] = '\0';
1952   } else {
1953     new->fieldpointers = (unsigned int *) memory;
1954     memory += (new->nfields+1) * sizeof(unsigned int);
1955   }
1956 
1957   /* Note: To keep ints aligned, would be better to make stringlen a
1958      multiple of 4, and put a terminating '\0' as needed */
1959   stringlen = new->fieldpointers[new->nfields];
1960   if (stringlen == 0) {
1961     new->fieldstrings = (char *) NULL;
1962   } else {
1963     new->fieldstrings = (char *) memory;
1964     memory += stringlen * sizeof(char);
1965   }
1966   debug(
1967 	printf("fieldstrings:\n");
1968 	for (s = 0; s < stringlen; s++) {
1969 	  printf("%c",new->fieldstrings[s]);
1970 	}
1971 	printf("\n");
1972 	);
1973 
1974   if (new->valuep == true) {
1975     debug(printf("Starting load of valueorder offset/length\n"));
1976     /* new->valueorder_offset = offset; -- Needed only for mmap_annotations */
1977     new->valueorder = (int *) memory;
1978     new->valueorder_length = (size_t) (new->total_nintervals*sizeof(int));
1979     memory += new->valueorder_length;
1980 
1981     debug1(printf("Starting read of value offset/length\n"));
1982     /* new->value_offset = offset; -- Needed only for mmap_annotations */
1983     new->values = (double *) memory;
1984     new->value_length = (size_t) (new->total_nintervals*sizeof(double));
1985     memory += new->value_length;
1986   }
1987 
1988   debug(printf("Starting load of labelorder at %p\n",memory));
1989   /* new->labelorder_offset = offset; -- Needed only for mmap_annotations */
1990   new->labelorder = (int *) memory;
1991   new->labelorder_length = (size_t) (new->total_nintervals*sizeof(int));
1992   memory += new->labelorder_length;
1993   debug(
1994 	printf("labelorder:\n");
1995 	for (i = 0; i < new->total_nintervals; i++) {
1996 	  printf("%d ",new->labelorder[i]);
1997 	}
1998 	printf("\n");
1999 	);
2000 
2001   debug(printf("Starting load of labelpointer offset/length\n"));
2002   /* new->labelpointers_offset = offset; -- Needed only for mmap_annotations */
2003 #ifdef HAVE_64_BIT
2004   if (new->label_pointers_8p == true) {
2005     new->labelpointers8 = (UINT8 *) memory;
2006     new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2007     memory += new->total_nintervals * sizeof(UINT8);
2008     new->label_length = (size_t) * (UINT8 *) memory;
2009     memory += sizeof(UINT8);
2010   } else {
2011     new->labelpointers = (UINT4 *) memory;
2012     new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2013     memory += new->total_nintervals * sizeof(UINT4);
2014     new->label_length = (size_t) * (UINT4 *) memory;
2015     memory += sizeof(UINT4);
2016   }
2017 #else
2018   new->labelpointers = (UINT4 *) memory;
2019   new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2020   memory += new->total_nintervals * sizeof(UINT4);
2021   new->label_length = (size_t) * (UINT4 *) memory;
2022   memory += sizeof(UINT4);
2023 #endif
2024 
2025   debug(printf("Starting load of label offset/length\n"));
2026   /* new->label_offset = offset; -- Needed only for mmap_annotations */
2027   new->labels = (char *) memory;
2028   /* new->label_length computed above */
2029   memory += new->label_length;
2030 
2031   debug(printf("Starting load of annotpointers offset/length\n"));
2032   /* new->annotpointers_offset = offset; -- Needed only for mmap_annotations */
2033 #ifdef HAVE_64_BIT
2034   if (new->annot_pointers_8p == true) {
2035     new->annotpointers8 = (UINT8 *) memory;
2036     new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2037   } else {
2038     new->annotpointers = (UINT4 *) memory;
2039     new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2040   }
2041 #else
2042   new->annotpointers = (UINT4 *) memory;
2043   new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2044 #endif
2045   memory += new->annotpointers_length;
2046 
2047   debug(printf("Starting load of annotations at %p\n",memory));
2048   /* new->annot_offset = offset; -- Needed only for mmap_annotations */
2049   new->annotations = (char *) memory;
2050   /* new->annot_length = filesize - new->annot_offset; -- Needed only for mmap_annotations or read_words */
2051   /* fprintf(stderr,"annot_length: %zu\n",new->annot_length); */
2052 
2053   return memory;
2054 }
2055 
2056 
2057 
2058 static void
read_words(size_t offset,size_t filesize,FILE * fp,T new)2059 read_words (size_t offset, size_t filesize, FILE *fp, T new) {
2060   size_t stringlen;
2061 #ifdef HAVE_64_BIT
2062   UINT8 length8;
2063 #endif
2064   UINT4 length;
2065 #ifdef DEBUG
2066   int i;
2067 #endif
2068 
2069   new->typepointers = (unsigned int *) CALLOC(new->ntypes+1,sizeof(unsigned int));
2070   offset += sizeof(int)*FREAD_UINTS(new->typepointers,new->ntypes+1,fp);
2071   debug(
2072 	printf("typepointers:");
2073 	for (i = 0; i < new->ntypes+1; i++) {
2074 	  printf(" %u",new->typepointers[i]);
2075 	}
2076 	printf("\n");
2077 	);
2078 
2079   stringlen = new->typepointers[new->ntypes];
2080   if (stringlen == 0) {
2081     new->typestrings = (char *) NULL;
2082   } else {
2083     new->typestrings = (char *) CALLOC(stringlen,sizeof(char));
2084     offset += sizeof(char)*FREAD_CHARS(new->typestrings,stringlen,fp);
2085   }
2086   debug(
2087 	printf("typestrings:\n");
2088 	for (s = 0; s < stringlen; s++) {
2089 	  printf("%c",new->typestrings[s]);
2090 	}
2091 	printf("\n");
2092 	);
2093 
2094   new->fieldpointers = (unsigned int *) CALLOC(new->nfields+1,sizeof(unsigned int));
2095   if (new->version < 2) {
2096     new->fieldpointers[0] = '\0';
2097   } else {
2098     offset += sizeof(int)*FREAD_UINTS(new->fieldpointers,new->nfields+1,fp);
2099   }
2100   stringlen = new->fieldpointers[new->nfields];
2101   if (stringlen == 0) {
2102     new->fieldstrings = (char *) NULL;
2103   } else {
2104     new->fieldstrings = (char *) CALLOC(stringlen,sizeof(char));
2105     offset += sizeof(char)*FREAD_CHARS(new->fieldstrings,stringlen,fp);
2106   }
2107   debug(
2108 	printf("fieldstrings:\n");
2109 	for (s = 0; s < stringlen; s++) {
2110 	  printf("%c",new->fieldstrings[s]);
2111 	}
2112 	printf("\n");
2113 	);
2114 
2115   if (new->valuep == true) {
2116     debug1(printf("Starting read of valueorder offset/length\n"));
2117     new->valueorder_offset = offset;
2118     new->valueorder_length = (size_t) (new->total_nintervals*sizeof(int));
2119     /* fprintf(stderr,"Doing a move_relative for valueorder_length %zu\n",new->valueorder_length); */
2120     move_relative(fp,new->valueorder_length);
2121     offset += new->valueorder_length;
2122 
2123     debug1(printf("Starting read of value offset/length\n"));
2124     new->value_offset = offset;
2125     new->value_length = (size_t) (new->total_nintervals*sizeof(double));
2126     /* fprintf(stderr,"Doing a move_relative for value_length %zu\n",new->value_length); */
2127     move_relative(fp,new->value_length);
2128     offset += new->value_length;
2129   }
2130 
2131   debug1(printf("Starting read of labelorder offset/length\n"));
2132   new->labelorder_offset = offset;
2133   new->labelorder_length = (size_t) (new->total_nintervals*sizeof(int));
2134   /* fprintf(stderr,"Doing a move_relative for labelorder_length %zu\n",new->labelorder_length); */
2135   move_relative(fp,new->labelorder_length);
2136   offset += new->labelorder_length;
2137 
2138   debug1(printf("Starting read of labelpointer offset/length\n"));
2139   new->labelpointers_offset = offset;
2140 #ifdef HAVE_64_BIT
2141   if (new->label_pointers_8p == true) {
2142     new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2143     move_relative(fp,new->total_nintervals * sizeof(UINT8));
2144     FREAD_UINT8(&length8,fp);
2145     new->label_length = (size_t) length8;
2146   } else {
2147     new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2148     /* fprintf(stderr,"Doing a move_relative for labelpointer %zu\n",new->total_nintervals * sizeof(UINT4)); */
2149     move_relative(fp,new->total_nintervals * sizeof(UINT4));
2150     FREAD_UINT(&length,fp);
2151     new->label_length = (size_t) length;
2152   }
2153 #else
2154   new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2155   /* fprintf(stderr,"Doing a move_relative for labelpointer %zu\n",new->total_nintervals * sizeof(UINT4)); */
2156   move_relative(fp,new->total_nintervals * sizeof(UINT4));
2157   FREAD_UINT(&length,fp);
2158   new->label_length = (size_t) length;
2159 #endif
2160   offset += new->labelpointers_length;
2161 
2162   debug1(printf("Starting read of label offset/length\n"));
2163   new->label_offset = offset;
2164   /* new->label_length computed above */
2165   /* fprintf(stderr,"Doing a move_relative for label_length %zu\n",new->label_length); */
2166   move_relative(fp,new->label_length);
2167   offset += new->label_length;
2168 
2169   debug1(printf("Starting read of annotpointers offset/length\n"));
2170   new->annotpointers_offset = offset;
2171 #ifdef HAVE_64_BIT
2172   if (new->annot_pointers_8p == true) {
2173     new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2174   } else {
2175     new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2176   }
2177 #else
2178   new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2179 #endif
2180   offset += new->annotpointers_length;
2181 
2182   new->annot_offset = offset;
2183 
2184 #ifdef BAD_32BIT
2185   /* This fails if length > 4 GB */
2186   move_relative(fp,new->total_nintervals * sizeof(unsigned int));
2187   FREAD_UINT(&length,fp);
2188   new->annot_length = (size_t) length;
2189   fprintf(stderr,"Incorrect length: %u\n",length);
2190 #else
2191   new->annot_length = filesize - new->annot_offset;
2192   /* fprintf(stderr,"annot_length: %zu\n",new->annot_length); */
2193 #endif
2194 
2195 #if 0
2196   /* To do this check, we need to get stringlen for annotation similarly to that for labels */
2197   last_offset = offset + sizeof(char)*stringlen;
2198   if (last_offset != filesize) {
2199     fprintf(stderr,"Problem with last_offset (%zu) not equal to filesize = (%zu)\n",
2200 	    last_offset,filesize);
2201     exit(9);
2202   }
2203 #endif
2204 
2205   return;
2206 }
2207 
2208 static void
read_words_debug(size_t offset,size_t filesize,FILE * fp,T new)2209 read_words_debug (size_t offset, size_t filesize, FILE *fp, T new) {
2210   size_t stringlen, s;
2211 #ifdef HAVE_64_BIT
2212   UINT8 length8;
2213 #endif
2214   UINT4 length;
2215   int i;
2216 #if 0
2217   size_t last_offset;
2218 #endif
2219 
2220   new->typepointers = (unsigned int *) CALLOC(new->ntypes+1,sizeof(unsigned int));
2221   offset += sizeof(int)*FREAD_UINTS(new->typepointers,new->ntypes+1,fp);
2222   printf("typepointers:");
2223   for (i = 0; i < new->ntypes+1; i++) {
2224     printf(" %u",new->typepointers[i]);
2225   }
2226   printf("\n");
2227 
2228   stringlen = new->typepointers[new->ntypes];
2229   if (stringlen == 0) {
2230     new->typestrings = (char *) NULL;
2231   } else {
2232     new->typestrings = (char *) CALLOC(stringlen,sizeof(char));
2233     offset += sizeof(char)*FREAD_CHARS(new->typestrings,stringlen,fp);
2234   }
2235   printf("typestrings:\n");
2236   for (s = 0; s < stringlen; s++) {
2237     printf("%c",new->typestrings[s]);
2238   }
2239   printf("\n");
2240 
2241   new->fieldpointers = (unsigned int *) CALLOC(new->nfields+1,sizeof(unsigned int));
2242   if (new->version < 2) {
2243     new->fieldpointers[0] = '\0';
2244   } else {
2245     offset += sizeof(int)*FREAD_UINTS(new->fieldpointers,new->nfields+1,fp);
2246   }
2247   stringlen = new->fieldpointers[new->nfields];
2248   if (stringlen == 0) {
2249     new->fieldstrings = (char *) NULL;
2250   } else {
2251     new->fieldstrings = (char *) CALLOC(stringlen,sizeof(char));
2252     offset += sizeof(char)*FREAD_CHARS(new->fieldstrings,stringlen,fp);
2253   }
2254   printf("fieldstrings:\n");
2255   for (s = 0; s < stringlen; s++) {
2256     printf("%c",new->fieldstrings[s]);
2257   }
2258   printf("\n");
2259 
2260   if (new->valuep == true) {
2261     debug1(printf("Starting read of valueorder offset/length\n"));
2262     new->valueorder_offset = offset;
2263     new->valueorder_length = (size_t) (new->total_nintervals*sizeof(int));
2264     /* fprintf(stderr,"Doing a move_relative for valueorder_length %zu\n",new->valueorder_length); */
2265     move_relative(fp,new->valueorder_length);
2266     offset += new->valueorder_length;
2267 
2268     debug1(printf("Starting read of value offset/length\n"));
2269     new->value_offset = offset;
2270     new->value_length = (size_t) (new->total_nintervals*sizeof(double));
2271     /* fprintf(stderr,"Doing a move_relative for value_length %zu\n",new->value_length); */
2272     move_relative(fp,new->value_length);
2273     offset += new->value_length;
2274   }
2275 
2276   debug1(printf("Starting read of labelorder offset/length\n"));
2277   new->labelorder_offset = offset;
2278   new->labelorder_length = (size_t) (new->total_nintervals*sizeof(int));
2279   move_relative(fp,new->labelorder_length);
2280   offset += new->labelorder_length;
2281 
2282   debug1(printf("Starting read of labelpointers offset/length\n"));
2283   new->labelpointers_offset = offset;
2284 #ifdef HAVE_64_BIT
2285   if (new->label_pointers_8p == true) {
2286     new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2287     move_relative(fp,new->total_nintervals * sizeof(UINT8));
2288     FREAD_UINT8(&length8,fp);
2289     new->label_length = (size_t) length8;
2290   } else {
2291     new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2292     move_relative(fp,new->total_nintervals * sizeof(UINT4));
2293     FREAD_UINT(&length,fp);
2294     new->label_length = (size_t) length;
2295   }
2296 #else
2297   new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2298   move_relative(fp,new->total_nintervals * sizeof(UINT4));
2299   FREAD_UINT(&length,fp);
2300   new->label_length = (size_t) length;
2301 #endif
2302   offset += new->labelpointers_length;
2303 
2304   fprintf(stderr,"label_length: %zu\n",new->label_length);
2305   debug1(printf("Starting read of label offset/length\n"));
2306   new->label_offset = offset;
2307   /* new->label_length computed above */
2308   move_relative(fp,new->label_length);
2309   offset += new->label_length;
2310 
2311   debug1(printf("Starting read of annotpointers offset/length\n"));
2312   new->annotpointers_offset = offset;
2313 #ifdef HAVE_64_BIT
2314   if (new->annot_pointers_8p == true) {
2315     new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2316   } else {
2317     new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2318   }
2319 #else
2320   new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2321 #endif
2322   offset += new->annotpointers_length;
2323 
2324   new->annot_offset = offset;
2325 
2326 #ifdef BAD_32BIT
2327   /* This fails if length > 4 GB */
2328   move_relative(fp,new->total_nintervals * sizeof(unsigned int));
2329   FREAD_UINT(&length,fp);
2330   new->annot_length = (size_t) length;
2331   fprintf(stderr,"Incorrect length: %u\n",length);
2332 #else
2333   new->annot_length = filesize - new->annot_offset;
2334   fprintf(stderr,"annot_length: %zu\n",new->annot_length);
2335 #endif
2336 
2337 #if 0
2338   /* To do this check, we need to get stringlen for annotation similarly to that for labels */
2339   last_offset = offset + sizeof(char)*stringlen;
2340   if (last_offset != filesize) {
2341     fprintf(stderr,"Problem with last_offset (%zu) not equal to filesize = (%zu)\n",
2342 	    last_offset,filesize);
2343     exit(9);
2344   }
2345 #endif
2346 
2347   return;
2348 }
2349 
2350 /* This function only assigns pointers.  Subsequent accesses to
2351    memory, other than char *, still need to be read correctly
2352    by bigendian machines */
2353 /* Previously allowed read/write access, but we can assume read-only access */
2354 #ifdef HAVE_MMAP
2355 static bool
mmap_annotations(char * filename,T new,bool readonlyp)2356 mmap_annotations (char *filename, T new, bool readonlyp) {
2357   int remainder;
2358 
2359   assert(readonlyp == true);
2360 
2361   if ((new->fd = open(filename,O_RDONLY,0764)) < 0) {
2362     fprintf(stderr,"Error: can't open file %s with open for reading\n",filename);
2363     exit(9);
2364   }
2365 
2366   if (new->valuep == true) {
2367     new->valueorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->valueorder_offset,new->valueorder_length,
2368 						       /*randomp*/true);
2369     debug(fprintf(stderr,"valueorder_mmap is %p\n",new->valueorder_mmap));
2370     new->valueorder = (int *) &(new->valueorder_mmap[remainder]);
2371     new->valueorder_length += (size_t) remainder;
2372 
2373     new->value_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->value_offset,new->value_length,
2374 						  /*randomp*/true);
2375     debug(fprintf(stderr,"values_mmap is %p\n",new->value_mmap));
2376     new->values = (double *) &(new->value_mmap[remainder]);
2377     new->value_length += (size_t) remainder;
2378   }
2379 
2380   new->labelorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
2381 						     /*randomp*/true);
2382   debug(fprintf(stderr,"labelorder_mmap is %p\n",new->labelorder_mmap));
2383   new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
2384   new->labelorder_length += (size_t) remainder;
2385 
2386   new->labelpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
2387 							/*randomp*/true);
2388   debug(fprintf(stderr,"labelpointers_mmap is %p\n",new->labelpointers_mmap));
2389 #ifdef HAVE_64_BIT
2390   if (new->label_pointers_8p == true) {
2391     new->labelpointers8 = (UINT8 *) &(new->labelpointers_mmap[remainder]);
2392     new->labelpointers = (UINT4 *) NULL;
2393   } else {
2394     new->labelpointers8 = (UINT8 *) NULL;
2395     new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
2396   }
2397 #else
2398   new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
2399 #endif
2400   new->labelpointers_length += (size_t) remainder;
2401 
2402   new->label_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->label_offset,new->label_length,
2403 						/*randomp*/true);
2404   debug(fprintf(stderr,"labels_mmap is %p\n",new->label_mmap));
2405   new->labels = (char *) &(new->label_mmap[remainder]);
2406   new->label_length += (size_t) remainder;
2407 
2408   new->annotpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
2409 							/*randomp*/true);
2410   debug(fprintf(stderr,"annotpointers_mmap is %p\n",new->annotpointers_mmap));
2411 #ifdef HAVE_64_BIT
2412   if (new->annot_pointers_8p == true) {
2413     new->annotpointers8 = (UINT8 *) &(new->annotpointers_mmap[remainder]);
2414     new->annotpointers = (UINT4 *) NULL;
2415   } else {
2416     new->annotpointers8 = (UINT8 *) NULL;
2417     new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
2418   }
2419 #else
2420   new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
2421 #endif
2422   new->annotpointers_length += (size_t) remainder;
2423 
2424   new->annot_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annot_offset,new->annot_length,
2425 						/*randomp*/true);
2426   debug(fprintf(stderr,"annots_mmap is %p\n",new->annot_mmap));
2427   new->annotations = (char *) &(new->annot_mmap[remainder]);
2428   new->annot_length += (size_t) remainder;
2429 
2430 
2431 #ifdef HAVE_64_BIT
2432   if (new->label_pointers_8p == true) {
2433     if (new->labelorder == NULL || new->labelpointers8 == NULL || new->labels == NULL) {
2434       fprintf(stderr,"Memory mapping failed in reading IIT file %s.  Using slow file IO instead.\n",filename);
2435       return false;
2436     }
2437   } else {
2438     if (new->labelorder == NULL || new->labelpointers == NULL || new->labels == NULL) {
2439       fprintf(stderr,"Memory mapping failed in reading IIT file %s.  Using slow file IO instead.\n",filename);
2440       return false;
2441     }
2442   }
2443 #else
2444   if (new->labelorder == NULL || new->labelpointers == NULL || new->labels == NULL) {
2445     fprintf(stderr,"Memory mapping failed in reading IIT file %s.  Using slow file IO instead.\n",filename);
2446     return false;
2447   }
2448 #endif
2449 
2450 #ifdef HAVE_64_BIT
2451   if (new->annot_pointers_8p == true) {
2452     if (new->annotpointers8 == NULL || new->annotations == NULL) {
2453       fprintf(stderr,"Memory mapping failed in reading IIT file %s.  Using slow file IO instead.\n",filename);
2454       return false;
2455     }
2456   } else {
2457     if (new->annotpointers == NULL || new->annotations == NULL) {
2458       fprintf(stderr,"Memory mapping failed in reading IIT file %s.  Using slow file IO instead.\n",filename);
2459       return false;
2460     }
2461   }
2462 #else
2463   if (new->annotpointers == NULL || new->annotations == NULL) {
2464     fprintf(stderr,"Memory mapping failed in reading IIT file %s.  Using slow file IO instead.\n",filename);
2465     return false;
2466   }
2467 #endif
2468 
2469   return true;
2470 }
2471 #endif
2472 
2473 
2474 /* Used if access is FILEIO.  Subsequent accesses by bigendian
2475    machines to anything but (char *) will still need to convert. */
2476 static void
read_annotations(T new)2477 read_annotations (T new) {
2478 
2479   if (new->valuep == true) {
2480     file_move_absolute(new->fd,new->valueorder_offset,sizeof(int),/*n*/0);
2481     new->valueorder = (int *) CALLOC(new->total_nintervals,sizeof(int));
2482     read(new->fd,new->valueorder,new->total_nintervals*sizeof(int));
2483 
2484     file_move_absolute(new->fd,new->value_offset,sizeof(char),/*n*/0);
2485     new->values = (double *) CALLOC(new->value_length,sizeof(char));
2486     read(new->fd,new->values,new->value_length*sizeof(char));
2487   }
2488 
2489   file_move_absolute(new->fd,new->labelorder_offset,sizeof(int),/*n*/0);
2490   new->labelorder = (int *) CALLOC(new->total_nintervals,sizeof(int));
2491   read(new->fd,new->labelorder,new->total_nintervals*sizeof(int));
2492 
2493 #ifdef HAVE_64_BIT
2494   if (new->label_pointers_8p == true) {
2495     file_move_absolute(new->fd,new->labelpointers_offset,sizeof(UINT8),/*n*/0);
2496     new->labelpointers8 = (UINT8 *) CALLOC(new->total_nintervals+1,sizeof(UINT8));
2497     read(new->fd,new->labelpointers8,(new->total_nintervals+1)*sizeof(UINT8));
2498     new->labelpointers = (UINT4 *) NULL;
2499   } else {
2500     file_move_absolute(new->fd,new->labelpointers_offset,sizeof(UINT4),/*n*/0);
2501     new->labelpointers = (UINT4 *) CALLOC(new->total_nintervals+1,sizeof(UINT4));
2502     read(new->fd,new->labelpointers,(new->total_nintervals+1)*sizeof(UINT4));
2503     new->labelpointers8 = (UINT8 *) NULL;
2504   }
2505 #else
2506   file_move_absolute(new->fd,new->labelpointers_offset,sizeof(UINT4),/*n*/0);
2507   new->labelpointers = (UINT4 *) CALLOC(new->total_nintervals+1,sizeof(UINT4));
2508   read(new->fd,new->labelpointers,(new->total_nintervals+1)*sizeof(UINT4));
2509 #endif
2510 
2511   file_move_absolute(new->fd,new->label_offset,sizeof(char),/*n*/0);
2512   new->labels = (char *) CALLOC(new->label_length,sizeof(char));
2513   read(new->fd,new->labels,new->label_length*sizeof(char));
2514 
2515 #ifdef HAVE_64_BIT
2516   if (new->annot_pointers_8p == true) {
2517     file_move_absolute(new->fd,new->annotpointers_offset,sizeof(UINT8),/*n*/0);
2518     new->annotpointers8 = (UINT8 *) CALLOC(new->total_nintervals+1,sizeof(UINT8));
2519     read(new->fd,new->annotpointers8,(new->total_nintervals+1)*sizeof(UINT8));
2520     new->annotpointers = (UINT4 *) NULL;
2521   } else {
2522     file_move_absolute(new->fd,new->annotpointers_offset,sizeof(UINT4),/*n*/0);
2523     new->annotpointers = (UINT4 *) CALLOC(new->total_nintervals+1,sizeof(UINT4));
2524     read(new->fd,new->annotpointers,(new->total_nintervals+1)*sizeof(UINT4));
2525     new->annotpointers8 = (UINT8 *) NULL;
2526   }
2527 #else
2528   file_move_absolute(new->fd,new->annotpointers_offset,sizeof(UINT4),/*n*/0);
2529   new->annotpointers = (UINT4 *) CALLOC(new->total_nintervals+1,sizeof(UINT4));
2530   read(new->fd,new->annotpointers,(new->total_nintervals+1)*sizeof(UINT4));
2531 #endif
2532 
2533   file_move_absolute(new->fd,new->annot_offset,sizeof(char),/*n*/0);
2534   new->annotations = (char *) CALLOC(new->annot_length,sizeof(char));
2535   read(new->fd,new->annotations,new->annot_length*sizeof(char));
2536 
2537   return;
2538 }
2539 
2540 
2541 int
IIT_read_divint(char * filename,char * divstring,bool add_iit_p)2542 IIT_read_divint (char *filename, char *divstring, bool add_iit_p) {
2543   char *newfile = NULL;
2544   FILE *fp;
2545   int version;
2546   size_t offset, skipsize;
2547   size_t filesize;
2548   int total_nintervals, ntypes, nfields, divsort;
2549   int label_pointer_size, annot_pointer_size;
2550 
2551   int i, ndivs;
2552   UINT4 *divpointers, stringlen, start;
2553   char *divstrings;
2554 
2555   if (add_iit_p == true) {
2556     newfile = (char *) CALLOC(strlen(filename)+strlen(".iit")+1,sizeof(char));
2557     sprintf(newfile,"%s.iit",filename);
2558     if ((fp = FOPEN_READ_BINARY(newfile)) != NULL) {
2559       filename = newfile;
2560     } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
2561       /* fprintf(stderr,"Cannot open IIT file %s or %s\n",filename,newfile); */
2562       FREE(newfile);
2563       return -1;
2564     }
2565   } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
2566     /* fprintf(stderr,"Cannot open IIT file %s\n",filename); */
2567     return -1;
2568   }
2569 
2570   filesize = Access_filesize(filename);
2571   offset = 0U;
2572 
2573   if (FREAD_INT(&total_nintervals,fp) < 1) {
2574     fprintf(stderr,"IIT file %s appears to be empty\n",filename);
2575     fclose(fp);
2576     return -1;
2577   } else if ((offset += sizeof(int)) > filesize) {
2578     fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %zu, filesize %zu).  Did you generate it using iit_store?\n",
2579 	    filename,offset,filesize);
2580     return -1;
2581   }
2582 
2583   if (total_nintervals > 0) {
2584     version = 1;
2585 
2586   } else {
2587     /* New format to indicate version > 1 */
2588     FREAD_INT(&version,fp);
2589     if (version > IIT_LATEST_VERSION_NOVALUES && version > IIT_LATEST_VERSION_VALUES) {
2590       fprintf(stderr,"This file is version %d, but this software can only read up to versions %d and %d\n",
2591 	      version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
2592       return -1;
2593     } else if ((offset += sizeof(int)) > filesize) {
2594       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %zu, filesize %zu).  Did you generate it using iit_store?\n",
2595 	      filename,offset,filesize);
2596       return -1;
2597     }
2598 
2599     if (version < 5) {
2600     } else {
2601       /* Read new variables indicating sizes of label and annot pointers */
2602       if (FREAD_INT(&label_pointer_size,fp) < 1) {
2603 	fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2604 	return -1;
2605       } else if ((offset += sizeof(int)) > filesize) {
2606 	fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu).  Did you generate it using iit_store?\n",
2607 		filename,offset,filesize);
2608 	return -1;
2609       }
2610 
2611       if (FREAD_INT(&annot_pointer_size,fp) < 1) {
2612 	fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2613 	return -1;
2614       } else if ((offset += sizeof(int)) > filesize) {
2615 	fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu).  Did you generate it using iit_store?\n",
2616 		filename,offset,filesize);
2617 	return -1;
2618       }
2619 
2620       if (label_pointer_size == 4) {
2621       } else if (label_pointer_size == 8) {
2622       } else {
2623 	fprintf(stderr,"IIT file %s has a problem with label_pointer_size being %d, expecting 4 or 8\n",
2624 		filename,label_pointer_size);
2625 	return -1;
2626       }
2627 
2628       if (annot_pointer_size == 4) {
2629       } else if (annot_pointer_size == 8) {
2630       } else {
2631 	fprintf(stderr,"IIT file %s has a problem with annot_pointer_size being %d, expecting 4 or 8\n",
2632 		filename,annot_pointer_size);
2633 	return -1;
2634       }
2635     }
2636 
2637     /* Re-read total_nintervals */
2638     if (FREAD_INT(&total_nintervals,fp) < 1) {
2639       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2640       return -1;
2641     } else if ((offset += sizeof(int)) > filesize) {
2642       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu).  Did you generate it using iit_store?\n",
2643 	      filename,offset,filesize);
2644       return -1;
2645     }
2646   }
2647 
2648   debug(printf("version: %d\n",version));
2649   debug(printf("total_nintervals: %d\n",total_nintervals));
2650 
2651 
2652   if (FREAD_INT(&ntypes,fp) < 1) {
2653     fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2654     return -1;
2655   } else if (ntypes < 0) {
2656     fprintf(stderr,"IIT file %s appears to have a negative number of types\n",filename);
2657     return -1;
2658   } else if ((offset += sizeof(int)) > filesize) {
2659     fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %zu, filesize %zu).  Did you generate it using iit_store?\n",
2660 	    filename,offset,filesize);
2661     return -1;
2662   }
2663   debug(printf("ntypes: %d\n",ntypes));
2664 
2665 
2666   if (version < 2) {
2667     nfields = 0;
2668   } else {
2669     if (FREAD_INT(&nfields,fp) < 1) {
2670       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2671       return -1;
2672     } else if (nfields < 0) {
2673       fprintf(stderr,"IIT file %s appears to have a negative number of fields\n",filename);
2674       return -1;
2675     } else if ((offset += sizeof(int)) > filesize) {
2676       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %zu, filesize %zu).  Did you generate it using iit_store?\n",
2677 	      filename,offset,filesize);
2678       return -1;
2679     }
2680   }
2681   debug(printf("nfields: %d\n",nfields));
2682 
2683 
2684   if (version <= 2) {
2685     return -1;
2686 
2687   } else {
2688 
2689     if (FREAD_INT(&ndivs,fp) < 1) {
2690       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2691       return -1;
2692     } else if (ndivs < 0) {
2693       fprintf(stderr,"IIT file %s appears to have a negative number of divs\n",filename);
2694       return -1;
2695     } else if ((offset += sizeof(int)) > filesize) {
2696       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %zu, filesize %zu).  Did you generate it using iit_store?\n",
2697 	      filename,offset,filesize);
2698       return -1;
2699     }
2700     debug(printf("ndivs: %d\n",ndivs));
2701 
2702     /* Skip nintervals */
2703     offset += skipsize = sizeof(int)*ndivs;
2704     move_relative(fp,skipsize);
2705 
2706     /* Skip cum_nintervals */
2707     offset += skipsize = sizeof(int)*(ndivs+1);
2708     move_relative(fp,skipsize);
2709 
2710     /* Skip nnodes */
2711     offset += skipsize = sizeof(int)*ndivs;
2712     move_relative(fp,skipsize);
2713 
2714     /* Skip cum_nnodes */
2715     offset += skipsize = sizeof(int)*(ndivs+1);
2716     move_relative(fp,skipsize);
2717 
2718     if (FREAD_INT(&divsort,fp) < 1) {
2719       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2720       return -1;
2721     } else if (divsort < 0) {
2722       fprintf(stderr,"IIT file %s appears to have a negative value for divsort\n",filename);
2723       return -1;
2724     } else if ((offset += sizeof(int)) > filesize) {
2725       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %zu, filesize %zu).  Did you generate it using iit_store?\n",
2726 	      filename,offset,filesize);
2727       return -1;
2728     }
2729     debug(printf("divsort: %d\n",divsort));
2730 
2731     divpointers = (UINT4 *) CALLOC(ndivs+1,sizeof(UINT4));
2732     offset += sizeof(int)*FREAD_UINTS(divpointers,ndivs+1,fp);
2733     debug(
2734 	  printf("divpointers:");
2735 	  for (i = 0; i < ndivs+1; i++) {
2736 	    printf(" %u",divpointers[i]);
2737 	  }
2738 	  printf("\n");
2739 	  );
2740 
2741     stringlen = divpointers[ndivs];
2742     if (stringlen == 0) {
2743       fprintf(stderr,"Problem with divstring stringlen being 0\n");
2744       exit(9);
2745     } else {
2746       divstrings = (char *) CALLOC(stringlen,sizeof(char));
2747     }
2748     offset += sizeof(char)*FREAD_CHARS(divstrings,stringlen,fp);
2749     debug(
2750 	  printf("divstrings:\n");
2751 	  for (s = 0; s < stringlen; s++) {
2752 	    if (divstrings[s] == '\0') {
2753 	      printf("\n");
2754 	    } else {
2755 	      printf("%c",divstrings[s]);
2756 	    }
2757 	  }
2758 	  printf("(end of divstrings)\n");
2759 	  );
2760 
2761     i = 0;
2762     while (i < ndivs) {
2763       start = divpointers[i];
2764       if (!strcmp(divstring,&(divstrings[start]))) {
2765 	fclose(fp);
2766 	FREE(divstrings);
2767 	FREE(divpointers);
2768 	if (newfile != NULL) {
2769 	  FREE(newfile);
2770 	}
2771 	return i;
2772       }
2773       i++;
2774     }
2775 
2776     fclose(fp);
2777     FREE(divstrings);
2778     FREE(divpointers);
2779     if (newfile != NULL) {
2780       FREE(newfile);
2781     }
2782     return -1;
2783   }
2784 }
2785 
2786 
2787 
2788 T
IIT_load(char * memory,char * name)2789 IIT_load (char *memory, char *name) {
2790   T new;
2791   off_t stringlen;
2792   int divno;
2793   int label_pointer_size, annot_pointer_size;
2794 #ifdef DEBUG
2795   int i;
2796   Interval_T interval;
2797 #endif
2798 
2799   new = (T) MALLOC(sizeof(*new));
2800 
2801   if (name == NULL) {
2802     new->name = NULL;
2803   } else {
2804     new->name = (char *) CALLOC(strlen(name)+1,sizeof(char));
2805     strcpy(new->name,name);
2806   }
2807 
2808   new->total_nintervals = * (int *) memory;  memory += sizeof(int);
2809 
2810   if (new->total_nintervals != 0) {
2811     /* Need to use Univ_IIT_read instead */
2812     fprintf(stderr,"Unexpected error in IIT_load.  First int is %d.  Using IIT_read code on a version 1 IIT\n",
2813 	    new->total_nintervals);
2814     abort();
2815 
2816   } else {
2817     /* New format to indicate version > 1 */
2818     new->version = * (int *) memory;  memory += sizeof(int);
2819     if (new->version > IIT_LATEST_VERSION_NOVALUES && new->version > IIT_LATEST_VERSION_VALUES) {
2820       fprintf(stderr,"This file is version %d, but this software can only read up to versions %d and %d\n",
2821 	      new->version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
2822       return NULL;
2823     }
2824 
2825     if (new->version == IIT_LATEST_VERSION_VALUES) {
2826       /* If IIT_LATEST_VERSION_VALUES increases, need to revise this code to handle version 6 */
2827       new->valuep = true;
2828     } else {
2829       new->valuep = false;
2830     }
2831 
2832     if (new->version <= 3) {
2833       new->label_pointers_8p = false;
2834       new->annot_pointers_8p = false;
2835     } else if (new->version == 4) {
2836       new->label_pointers_8p = true;
2837       new->annot_pointers_8p = true;
2838     } else {
2839       /* Read new variables indicating sizes of label and annot pointers */
2840       label_pointer_size = * (int *) memory;  memory += sizeof(int);
2841       annot_pointer_size = * (int *) memory;  memory += sizeof(int);
2842 
2843       if (label_pointer_size == 4) {
2844 	new->label_pointers_8p = false;
2845       } else if (label_pointer_size == 8) {
2846 	new->label_pointers_8p = true;
2847       } else {
2848 	fprintf(stderr,"IIT file has a problem with label_pointer_size being %d, expecting 4 or 8\n",
2849 		label_pointer_size);
2850       }
2851 
2852       if (annot_pointer_size == 4) {
2853 	new->annot_pointers_8p = false;
2854       } else if (annot_pointer_size == 8) {
2855 	new->annot_pointers_8p = true;
2856       } else {
2857 	fprintf(stderr,"IIT file has a problem with annot_pointer_size being %d, expecting 4 or 8\n",
2858 		annot_pointer_size);
2859       }
2860     }
2861 
2862     /* Re-read total_nintervals */
2863     new->total_nintervals = * (int *) memory;  memory += sizeof(int);
2864   }
2865 
2866   debug(printf("version: %d\n",new->version));
2867   debug(printf("total_nintervals: %d\n",new->total_nintervals));
2868 
2869 
2870   new->ntypes = * (int *) memory;  memory += sizeof(int);
2871   if (new->ntypes < 0) {
2872     fprintf(stderr,"IIT file appears to have a negative number of types\n");
2873     return NULL;
2874   }
2875   debug(printf("ntypes: %d\n",new->ntypes));
2876 
2877 
2878   if (new->version < 2) {
2879     new->nfields = 0;
2880   } else {
2881     new->nfields = * (int *) memory;  memory += sizeof(int);
2882     if (new->nfields < 0) {
2883       fprintf(stderr,"IIT file appears to have a negative number of fields\n");
2884       return NULL;
2885     }
2886   }
2887   debug(printf("nfields: %d\n",new->nfields));
2888 
2889 
2890   if (new->version <= 2) {
2891     /* Might not be supported */
2892     new->ndivs = 1;
2893 
2894     new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
2895     new->nintervals[0] = new->total_nintervals;
2896     new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
2897     new->cum_nintervals[0] = 0;
2898     new->cum_nintervals[1] = new->total_nintervals;
2899 
2900     new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
2901     new->nnodes[0] = * (int *) memory;  memory += sizeof(int);
2902     if (new->nnodes[0] < 0) {
2903       fprintf(stderr,"IIT file appears to have a negative number of nodes\n");
2904       return NULL;
2905     }
2906     new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
2907     new->cum_nnodes[0] = 0;
2908     new->cum_nnodes[1] = new->nnodes[0];
2909 
2910     new->divsort = NO_SORT;
2911 
2912     new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
2913     new->divpointers[0] = 0;
2914     new->divpointers[1] = 1;
2915 
2916     new->divstrings = (char *) CALLOC(1,sizeof(char));
2917     new->divstrings[0] = '\0';
2918 
2919   } else {
2920 
2921     new->ndivs = * (int *) memory;  memory += sizeof(int);
2922     if (new->ndivs < 0) {
2923       fprintf(stderr,"IIT file appears to have a negative number of divs\n");
2924       return NULL;
2925     }
2926     debug(printf("ndivs: %d\n",new->ndivs));
2927 
2928     new->nintervals = (int *) memory;
2929     memory += new->ndivs * sizeof(int);
2930     debug(
2931 	  printf("nintervals:");
2932 	  for (i = 0; i < new->ndivs; i++) {
2933 	    printf(" %d",new->nintervals[i]);
2934 	  }
2935 	  printf("\n");
2936 	  );
2937 
2938     new->cum_nintervals = (int *) memory;
2939     memory += (new->ndivs+1) * sizeof(int);
2940     debug(
2941 	  printf("cum_nintervals:");
2942 	  for (i = 0; i <= new->ndivs; i++) {
2943 	    printf(" %d",new->cum_nintervals[i]);
2944 	  }
2945 	  printf("\n");
2946 	  );
2947 
2948     new->nnodes = (int *) memory;
2949     memory += new->ndivs * sizeof(int);
2950     debug(
2951 	  printf("nnodes:");
2952 	  for (i = 0; i < new->ndivs; i++) {
2953 	    printf(" %d",new->nnodes[i]);
2954 	  }
2955 	  printf("\n");
2956 	  );
2957 
2958     new->cum_nnodes = (int *) memory;
2959     memory += (new->ndivs+1) * sizeof(int);
2960     debug(
2961 	  printf("cum_nnodes:");
2962 	  for (i = 0; i <= new->ndivs; i++) {
2963 	    printf(" %d",new->cum_nnodes[i]);
2964 	  }
2965 	  printf("\n");
2966 	  );
2967 
2968     new->divsort = * (int *) memory;  memory += sizeof(int);
2969     if (new->divsort < 0) {
2970       fprintf(stderr,"IIT file appears to have a negative value for divsort\n");
2971       return NULL;
2972     }
2973     debug(printf("divsort: %d\n",new->divsort));
2974 
2975     new->divpointers = (UINT4 *) memory;
2976     memory += (new->ndivs+1) * sizeof(int);;
2977     debug(
2978 	  printf("divpointers:");
2979 	  for (i = 0; i < new->ndivs+1; i++) {
2980 	    printf(" %u",new->divpointers[i]);
2981 	  }
2982 	  printf("\n");
2983 	  );
2984 
2985     /* Note: To keep ints aligned, would be better to make stringlen a
2986        multiple of 4, and put a terminating '\0' as needed */
2987     stringlen = new->divpointers[new->ndivs];
2988     if (stringlen == 0) {
2989       new->divstrings = (char *) NULL;
2990     } else {
2991       new->divstrings = (char *) memory;
2992       memory += stringlen * sizeof(char);
2993     }
2994     debug(
2995 	  printf("divstrings:\n");
2996 	  for (s = 0; s < stringlen; s++) {
2997 	    if (new->divstrings[s] == '\0') {
2998 	      printf("\n");
2999 	    } else {
3000 	      printf("%c",new->divstrings[s]);
3001 	    }
3002 	  }
3003 	  printf("(end of divstrings)\n");
3004 	  );
3005   }
3006 
3007   new->alphas = (int **) CALLOC(new->ndivs,sizeof(int *));
3008   new->betas = (int **) CALLOC(new->ndivs,sizeof(int *));
3009   new->sigmas = (int **) CALLOC(new->ndivs,sizeof(int *));
3010   new->omegas = (int **) CALLOC(new->ndivs,sizeof(int *));
3011   new->nodes = (struct FNode_T **) CALLOC(new->ndivs,sizeof(struct FNode_T *));
3012 
3013   if (new->version == 1) {
3014     abort();
3015   }
3016 
3017   new->intervals = (struct Interval_T **) CALLOC(new->ndivs,sizeof(struct Interval_T *));
3018 
3019   /* Load all divs */
3020   debug(printf("Loading all divs\n"));
3021   for (divno = 0; divno < new->ndivs; divno++) {
3022     debug(fprintf(stderr,"Starting load of div\n"));
3023     memory = load_tree(memory,new,divno);
3024     debug(fprintf(stderr,"Ending read of div\n"));
3025   }
3026 
3027   for (divno = 0; divno < new->ndivs; divno++) {
3028     memory = load_intervals(memory,new,divno);
3029   }
3030 
3031   /* memory = */ load_words(memory,new);
3032   new->access = LOADED;
3033 
3034   return new;
3035 }
3036 
3037 
3038 
3039 T
IIT_read(char * filename,char * name,bool readonlyp,Divread_T divread,char * divstring,bool add_iit_p)3040 IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *divstring,
3041 	  bool add_iit_p) {
3042   T new;
3043   FILE *fp;
3044   char *newfile = NULL;
3045   size_t offset = 0, stringlen;
3046   size_t filesize;
3047   int skip_nintervals, desired_divno, divno;
3048   int label_pointer_size, annot_pointer_size;
3049 #ifdef DEBUG
3050   int i;
3051   Interval_T interval;
3052 #endif
3053 
3054   if (add_iit_p == false) {
3055     if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
3056       return NULL;
3057     }
3058 
3059   } else {
3060     /* Try adding .iit first */
3061     newfile = (char *) CALLOC(strlen(filename)+strlen(".iit")+1,sizeof(char));
3062     sprintf(newfile,"%s.iit",filename);
3063     if ((fp = FOPEN_READ_BINARY(newfile)) != NULL) {
3064       filename = newfile;
3065     } else {
3066       FREE(newfile);
3067       if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
3068 	return NULL;
3069       }
3070     }
3071   }
3072 
3073 
3074   new = (T) MALLOC(sizeof(*new));
3075 
3076   filesize = Access_filesize(filename);
3077 
3078   if (name == NULL) {
3079     new->name = NULL;
3080   } else {
3081     new->name = (char *) CALLOC(strlen(name)+1,sizeof(char));
3082     strcpy(new->name,name);
3083   }
3084 
3085   if (FREAD_INT(&new->total_nintervals,fp) < 1) {
3086     fprintf(stderr,"IIT file %s appears to be empty\n",filename);
3087     fclose(fp);
3088     return NULL;
3089   } else if ((offset += sizeof(int)) > filesize) {
3090     fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %zu, filesize %zu).  Did you generate it using iit_store?\n",
3091 	    filename,offset,filesize);
3092     return NULL;
3093   }
3094 
3095   if (new->total_nintervals != 0) {
3096     /* Need to use Univ_IIT_read instead */
3097     fprintf(stderr,"Unexpected error in IIT_read of %s.  First int is %d.  Using IIT_read code on a version 1 IIT\n",
3098 	    filename,new->total_nintervals);
3099     abort();
3100 
3101   } else {
3102     /* New format to indicate version > 1 */
3103     FREAD_INT(&new->version,fp);
3104     if (new->version > IIT_LATEST_VERSION_NOVALUES && new->version > IIT_LATEST_VERSION_VALUES) {
3105       fprintf(stderr,"This file is version %d, but this software can only read up to versions %d and %d\n",
3106 	      new->version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
3107       return NULL;
3108     } else if ((offset += sizeof(int)) > filesize) {
3109       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %zu, filesize %zu).  Did you generate it using iit_store?\n",
3110 	      filename,offset,filesize);
3111       return NULL;
3112     }
3113 
3114     if (new->version == IIT_LATEST_VERSION_VALUES) {
3115       /* If IIT_LATEST_VERSION_VALUES increases, need to revise this code to handle version 6 */
3116       new->valuep = true;
3117     } else {
3118       new->valuep = false;
3119     }
3120 
3121     if (new->version <= 3) {
3122       new->label_pointers_8p = false;
3123       new->annot_pointers_8p = false;
3124     } else if (new->version == 4) {
3125       new->label_pointers_8p = true;
3126       new->annot_pointers_8p = true;
3127     } else {
3128       /* Read new variables indicating sizes of label and annot pointers */
3129       if (FREAD_INT(&label_pointer_size,fp) < 1) {
3130 	fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3131 	return NULL;
3132       } else if ((offset += sizeof(int)) > filesize) {
3133 	fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu).  Did you generate it using iit_store?\n",
3134 		filename,offset,filesize);
3135 	return NULL;
3136       }
3137 
3138       if (FREAD_INT(&annot_pointer_size,fp) < 1) {
3139 	fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3140 	return NULL;
3141       } else if ((offset += sizeof(int)) > filesize) {
3142 	fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu).  Did you generate it using iit_store?\n",
3143 		filename,offset,filesize);
3144 	return NULL;
3145       }
3146 
3147       if (label_pointer_size == 4) {
3148 	new->label_pointers_8p = false;
3149       } else if (label_pointer_size == 8) {
3150 	new->label_pointers_8p = true;
3151       } else {
3152 	fprintf(stderr,"IIT file %s has a problem with label_pointer_size being %d, expecting 4 or 8\n",
3153 		filename,label_pointer_size);
3154       }
3155 
3156       if (annot_pointer_size == 4) {
3157 	new->annot_pointers_8p = false;
3158       } else if (annot_pointer_size == 8) {
3159 	new->annot_pointers_8p = true;
3160       } else {
3161 	fprintf(stderr,"IIT file %s has a problem with annot_pointer_size being %d, expecting 4 or 8\n",
3162 		filename,annot_pointer_size);
3163       }
3164     }
3165 
3166     /* Re-read total_nintervals */
3167     if (FREAD_INT(&new->total_nintervals,fp) < 1) {
3168       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3169       return NULL;
3170     } else if ((offset += sizeof(int)) > filesize) {
3171       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu).  Did you generate it using iit_store?\n",
3172 	      filename,offset,filesize);
3173       return NULL;
3174     }
3175   }
3176 
3177   debug(printf("version: %d\n",new->version));
3178   debug(printf("total_nintervals: %d\n",new->total_nintervals));
3179 
3180 
3181   if (FREAD_INT(&new->ntypes,fp) < 1) {
3182     fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3183     return NULL;
3184   } else if (new->ntypes < 0) {
3185     fprintf(stderr,"IIT file %s appears to have a negative number of types\n",filename);
3186     return NULL;
3187   } else if ((offset += sizeof(int)) > filesize) {
3188     fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %zu, filesize %zu).  Did you generate it using iit_store?\n",
3189 	    filename,offset,filesize);
3190     return NULL;
3191   }
3192   debug(printf("ntypes: %d\n",new->ntypes));
3193 
3194 
3195   if (new->version < 2) {
3196     new->nfields = 0;
3197   } else {
3198     if (FREAD_INT(&new->nfields,fp) < 1) {
3199       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3200       return NULL;
3201     } else if (new->nfields < 0) {
3202       fprintf(stderr,"IIT file %s appears to have a negative number of fields\n",filename);
3203       return NULL;
3204     } else if ((offset += sizeof(int)) > filesize) {
3205       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %zu, filesize %zu).  Did you generate it using iit_store?\n",
3206 	      filename,offset,filesize);
3207       return NULL;
3208     }
3209   }
3210   debug(printf("nfields: %d\n",new->nfields));
3211 
3212 
3213   if (new->version <= 2) {
3214     new->ndivs = 1;
3215 
3216     new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
3217     new->nintervals[0] = new->total_nintervals;
3218     new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
3219     new->cum_nintervals[0] = 0;
3220     new->cum_nintervals[1] = new->total_nintervals;
3221 
3222     new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
3223     if (FREAD_INT(&(new->nnodes[0]),fp) < 1) {
3224       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3225       return NULL;
3226     } else if (new->nnodes[0] < 0) {
3227       fprintf(stderr,"IIT file %s appears to have a negative number of nodes\n",filename);
3228       return NULL;
3229     } else if ((offset += sizeof(int)) > filesize) {
3230       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %zu, filesize %zu).  Did you generate it using iit_store?\n",
3231 	      filename,offset,filesize);
3232       return NULL;
3233     }
3234     new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
3235     new->cum_nnodes[0] = 0;
3236     new->cum_nnodes[1] = new->nnodes[0];
3237 
3238     new->divsort = NO_SORT;
3239 
3240     new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
3241     new->divpointers[0] = 0;
3242     new->divpointers[1] = 1;
3243 
3244     new->divstrings = (char *) CALLOC(1,sizeof(char));
3245     new->divstrings[0] = '\0';
3246 
3247   } else {
3248 
3249     if (FREAD_INT(&new->ndivs,fp) < 1) {
3250       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3251       return NULL;
3252     } else if (new->ndivs < 0) {
3253       fprintf(stderr,"IIT file %s appears to have a negative number of divs\n",filename);
3254       return NULL;
3255     } else if ((offset += sizeof(int)) > filesize) {
3256       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %zu, filesize %zu).  Did you generate it using iit_store?\n",
3257 	      filename,offset,filesize);
3258       return NULL;
3259     }
3260     debug(printf("ndivs: %d\n",new->ndivs));
3261 
3262     new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
3263     offset += sizeof(int)*FREAD_INTS(new->nintervals,new->ndivs,fp);
3264     debug(
3265 	  printf("nintervals:");
3266 	  for (i = 0; i < new->ndivs; i++) {
3267 	    printf(" %d",new->nintervals[i]);
3268 	  }
3269 	  printf("\n");
3270 	  );
3271 
3272     new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
3273     offset += sizeof(int)*FREAD_INTS(new->cum_nintervals,new->ndivs+1,fp);
3274     debug(
3275 	  printf("cum_nintervals:");
3276 	  for (i = 0; i <= new->ndivs; i++) {
3277 	    printf(" %d",new->cum_nintervals[i]);
3278 	  }
3279 	  printf("\n");
3280 	  );
3281 
3282     new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
3283     offset += sizeof(int)*FREAD_INTS(new->nnodes,new->ndivs,fp);
3284     debug(
3285 	  printf("nnodes:");
3286 	  for (i = 0; i < new->ndivs; i++) {
3287 	    printf(" %d",new->nnodes[i]);
3288 	  }
3289 	  printf("\n");
3290 	  );
3291 
3292     new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
3293     offset += sizeof(int)*FREAD_INTS(new->cum_nnodes,new->ndivs+1,fp);
3294     debug(
3295 	  printf("cum_nnodes:");
3296 	  for (i = 0; i <= new->ndivs; i++) {
3297 	    printf(" %d",new->cum_nnodes[i]);
3298 	  }
3299 	  printf("\n");
3300 	  );
3301 
3302     if (FREAD_INT(&new->divsort,fp) < 1) {
3303       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3304       return NULL;
3305     } else if (new->divsort < 0) {
3306       fprintf(stderr,"IIT file %s appears to have a negative value for divsort\n",filename);
3307       return NULL;
3308     } else if ((offset += sizeof(int)) > filesize) {
3309       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %zu, filesize %zu).  Did you generate it using iit_store?\n",
3310 	      filename,offset,filesize);
3311       return NULL;
3312     }
3313     debug(printf("divsort: %d\n",new->divsort));
3314 
3315     new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
3316     offset += sizeof(int)*FREAD_UINTS(new->divpointers,new->ndivs+1,fp);
3317     debug(
3318 	  printf("divpointers:");
3319 	  for (i = 0; i < new->ndivs+1; i++) {
3320 	    printf(" %u",new->divpointers[i]);
3321 	  }
3322 	  printf("\n");
3323 	  );
3324 
3325     stringlen = new->divpointers[new->ndivs];
3326     if (stringlen == 0) {
3327       new->divstrings = (char *) NULL;
3328     } else {
3329       new->divstrings = (char *) CALLOC(stringlen,sizeof(char));
3330       offset += sizeof(char)*FREAD_CHARS(new->divstrings,stringlen,fp);
3331     }
3332     debug(
3333 	  printf("divstrings:\n");
3334 	  for (s = 0; s < stringlen; s++) {
3335 	    if (new->divstrings[s] == '\0') {
3336 	      printf("\n");
3337 	    } else {
3338 	      printf("%c",new->divstrings[s]);
3339 	    }
3340 	  }
3341 	  printf("(end of divstrings)\n");
3342 	  );
3343   }
3344 
3345   new->alphas = (int **) CALLOC(new->ndivs,sizeof(int *));
3346   new->betas = (int **) CALLOC(new->ndivs,sizeof(int *));
3347   new->sigmas = (int **) CALLOC(new->ndivs,sizeof(int *));
3348   new->omegas = (int **) CALLOC(new->ndivs,sizeof(int *));
3349   new->nodes = (struct FNode_T **) CALLOC(new->ndivs,sizeof(struct FNode_T *));
3350 
3351   if (new->version == 1) {
3352     fprintf(stderr,"Not expecting version 1\n");
3353     abort();
3354   }
3355 
3356   new->intervals = (struct Interval_T **) CALLOC(new->ndivs,sizeof(struct Interval_T *));
3357 
3358   if (divread == READ_ALL) {
3359     /* Read all divs */
3360     debug(printf("Reading all divs\n"));
3361     for (divno = 0; divno < new->ndivs; divno++) {
3362       debug(fprintf(stderr,"Starting read of div\n"));
3363       offset = read_tree(offset,filesize,fp,filename,new,divno);
3364       debug(fprintf(stderr,"Ending read of div\n"));
3365     }
3366 
3367     new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3368     offset = read_intervals(offset,filesize,fp,filename,new,/*divno*/0);
3369     for (divno = 1; divno < new->ndivs; divno++) {
3370       new->intervals[divno] = &(new->intervals[divno-1][new->nintervals[divno-1]]);
3371       offset = read_intervals(offset,filesize,fp,filename,new,divno);
3372     }
3373 
3374   } else if (divread == READ_NONE) {
3375     debug(printf("Reading no divs\n"));
3376     offset = skip_trees(offset,filesize,fp,filename,new->ndivs,
3377 			new->cum_nintervals[new->ndivs],new->cum_nnodes[new->ndivs]);
3378 
3379     new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3380     offset = read_intervals(offset,filesize,fp,filename,new,/*divno*/0);
3381     for (divno = 1; divno < new->ndivs; divno++) {
3382       new->intervals[divno] = &(new->intervals[divno-1][new->nintervals[divno-1]]);
3383       offset = read_intervals(offset,filesize,fp,filename,new,divno);
3384     }
3385 
3386   } else if (divread == READ_ONE) {
3387     debug(printf("Reading only div %s\n",divstring));
3388     if ((desired_divno = IIT_divint(new,divstring)) < 0) {
3389       fprintf(stderr,"Cannot find div %s in IIT_read.  Ignoring div.\n",divstring);
3390       desired_divno = 0;
3391     }
3392     offset = skip_trees(offset,filesize,fp,filename,desired_divno,
3393 			new->cum_nintervals[desired_divno],new->cum_nnodes[desired_divno]);
3394     debug1(fprintf(stderr,"Starting read of div\n"));
3395     offset = read_tree(offset,filesize,fp,filename,new,desired_divno);
3396     debug1(fprintf(stderr,"Ending read of div\n"));
3397     offset = skip_trees(offset,filesize,fp,filename,new->ndivs - (desired_divno + 1),
3398 			new->cum_nintervals[new->ndivs] - new->cum_nintervals[desired_divno+1],
3399 			new->cum_nnodes[new->ndivs] - new->cum_nnodes[desired_divno+1]);
3400 
3401     new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3402     offset = skip_intervals(&skip_nintervals,offset,filesize,fp,filename,new,0,desired_divno-1);
3403     debug1(fprintf(stderr,"Starting read of intervals\n"));
3404     new->intervals[desired_divno] = &(new->intervals[0][skip_nintervals]);
3405     offset = read_intervals(offset,filesize,fp,filename,new,desired_divno);
3406     debug1(fprintf(stderr,"Ending read of intervals\n"));
3407     offset = skip_intervals(&skip_nintervals,offset,filesize,fp,filename,new,desired_divno+1,new->ndivs-1);
3408 
3409     debug(
3410 	  /*
3411 	    printf("sigmas[%d]:\n",desired_divno);
3412 	    for (i = 0; i < new->nintervals[desired_divno]+1; i++) {
3413 	    interval = &(new->intervals[desired_divno][new->sigmas[desired_divno][i]]);
3414 	    printf("%d %u..%u\n",new->sigmas[desired_divno][i],Interval_low(interval),Interval_high(interval));
3415 	    }
3416 	    printf("\n");
3417 	  */
3418 
3419 	  printf("alphas[%d]:\n",desired_divno);
3420 	  for (i = 0; i < new->nintervals[desired_divno]+1; i++) {
3421 	    interval = &(new->intervals[desired_divno][new->alphas[desired_divno][i]]);
3422 	    printf("%d %u..%u\n",new->alphas[desired_divno][i],Interval_low(interval),Interval_high(interval));
3423 	  }
3424 	  printf("\n");
3425 	  );
3426 
3427 
3428   } else {
3429     abort();
3430   }
3431 
3432   read_words(offset,filesize,fp,new);
3433   fclose(fp);
3434 
3435 #ifndef HAVE_MMAP
3436   debug1(printf("No mmap available.  Reading annotations\n"));
3437   new->access = FILEIO;
3438   new->fd = Access_fileio(filename);
3439   read_annotations(new);
3440   close(new->fd);
3441   /* pthread_mutex_init(&new->read_mutex,NULL); */
3442 #else
3443   debug1(printf("mmap available.  Setting up pointers to annotations\n"));
3444   new->access = MMAPPED;
3445   if (mmap_annotations(filename,new,readonlyp) == false) {
3446     debug1(printf("  Failed.  Reading annotations\n"));
3447     new->access = FILEIO;
3448     new->fd = Access_fileio(filename);
3449     read_annotations(new);
3450     close(new->fd);
3451     /* pthread_mutex_init(&new->read_mutex,NULL); */
3452   }
3453 #endif
3454 
3455   if (newfile != NULL) {
3456     FREE(newfile);
3457   }
3458 
3459   return new;
3460 }
3461 
3462 
3463 void
IIT_debug(char * filename)3464 IIT_debug (char *filename) {
3465   T new;
3466   FILE *fp;
3467   char *newfile = NULL;
3468   size_t stringlen, s;
3469   size_t offset = 0, filesize;
3470   int skip_nintervals, desired_divno, divno, i;
3471   int label_pointer_size, annot_pointer_size;
3472   Divread_T divread = READ_ALL;
3473   char *divstring = NULL;
3474   bool add_iit_p = false;
3475 #ifdef DEBUG
3476   Interval_T interval;
3477 #endif
3478 
3479   if (add_iit_p == true) {
3480     newfile = (char *) CALLOC(strlen(filename)+strlen(".iit")+1,sizeof(char));
3481     sprintf(newfile,"%s.iit",filename);
3482     if ((fp = FOPEN_READ_BINARY(newfile)) != NULL) {
3483       filename = newfile;
3484     } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
3485       /* fprintf(stderr,"Cannot open IIT file %s or %s\n",filename,newfile); */
3486       FREE(newfile);
3487       return;
3488     }
3489   } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
3490     /* fprintf(stderr,"Cannot open IIT file %s\n",filename); */
3491     return;
3492   }
3493 
3494   new = (T) MALLOC(sizeof(*new));
3495 
3496   filesize = Access_filesize(filename);
3497 
3498   new->name = NULL;
3499 
3500   if (FREAD_INT(&new->total_nintervals,fp) < 1) {
3501     fprintf(stderr,"IIT file %s appears to be empty\n",filename);
3502     fclose(fp);
3503     return;
3504   } else if ((offset += sizeof(int)) > filesize) {
3505     fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %zu, filesize %zu).  Did you generate it using iit_store?\n",
3506 	    filename,offset,filesize);
3507     return;
3508   }
3509 
3510   if (new->total_nintervals > 0) {
3511     new->version = 1;
3512     new->valuep = false;
3513 
3514   } else {
3515     /* New format to indicate version > 1 */
3516     FREAD_INT(&new->version,fp);
3517     if (new->version > IIT_LATEST_VERSION_NOVALUES && new->version > IIT_LATEST_VERSION_VALUES) {
3518       fprintf(stderr,"This file is version %d, but this software can only read up to versions %d and %d\n",
3519 	      new->version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
3520       return;
3521     } else if ((offset += sizeof(int)) > filesize) {
3522       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %zu, filesize %zu).  Did you generate it using iit_store?\n",
3523 	      filename,offset,filesize);
3524       return;
3525     }
3526 
3527     if (new->version == IIT_LATEST_VERSION_VALUES) {
3528       /* If IIT_LATEST_VERSION_VALUES increases, need to revise this code to handle version 6 */
3529       new->valuep = true;
3530     } else {
3531       new->valuep = false;
3532     }
3533 
3534     if (new->version <= 3) {
3535       new->label_pointers_8p = false;
3536       new->annot_pointers_8p = false;
3537     } else if (new->version == 4) {
3538       new->label_pointers_8p = true;
3539       new->annot_pointers_8p = true;
3540     } else {
3541       /* Read new variables indicating sizes of label and annot pointers */
3542       if (FREAD_INT(&label_pointer_size,fp) < 1) {
3543 	fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3544 	return;
3545       } else if ((offset += sizeof(int)) > filesize) {
3546 	fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu).  Did you generate it using iit_store?\n",
3547 		filename,offset,filesize);
3548 	return;
3549       }
3550 
3551       if (FREAD_INT(&annot_pointer_size,fp) < 1) {
3552 	fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3553 	return;
3554       } else if ((offset += sizeof(int)) > filesize) {
3555 	fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu).  Did you generate it using iit_store?\n",
3556 		filename,offset,filesize);
3557 	return;
3558       }
3559 
3560       if (label_pointer_size == 4) {
3561 	new->label_pointers_8p = false;
3562       } else if (label_pointer_size == 8) {
3563 	new->label_pointers_8p = true;
3564       } else {
3565 	fprintf(stderr,"IIT file %s has a problem with label_pointer_size being %d, expecting 4 or 8\n",
3566 		filename,label_pointer_size);
3567       }
3568 
3569       if (annot_pointer_size == 4) {
3570 	new->annot_pointers_8p = false;
3571       } else if (annot_pointer_size == 8) {
3572 	new->annot_pointers_8p = true;
3573       } else {
3574 	fprintf(stderr,"IIT file %s has a problem with annot_pointer_size being %d, expecting 4 or 8\n",
3575 		filename,annot_pointer_size);
3576       }
3577     }
3578 
3579     /* Re-read total_nintervals */
3580     if (FREAD_INT(&new->total_nintervals,fp) < 1) {
3581       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3582       return;
3583     } else if ((offset += sizeof(int)) > filesize) {
3584       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu).  Did you generate it using iit_store?\n",
3585 	      filename,offset,filesize);
3586       return;
3587     }
3588   }
3589   if (new->total_nintervals < 0) {
3590     fprintf(stderr,"IIT file %s appears to have a negative number of intervals\n",filename);
3591     return;
3592   }
3593 
3594   printf("version: %d\n",new->version);
3595   printf("total_nintervals: %d\n",new->total_nintervals);
3596 
3597   if (new->version >= 5) {
3598     printf("label_pointer_size: %d\n",label_pointer_size);
3599     printf("annot_pointer_size: %d\n",annot_pointer_size);
3600   }
3601 
3602 
3603   if (FREAD_INT(&new->ntypes,fp) < 1) {
3604     fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3605     return;
3606   } else if (new->ntypes < 0) {
3607     fprintf(stderr,"IIT file %s appears to have a negative number of types\n",filename);
3608     return;
3609   } else if ((offset += sizeof(int)) > filesize) {
3610     fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %zu, filesize %zu).  Did you generate it using iit_store?\n",
3611 	    filename,offset,filesize);
3612     return;
3613   }
3614   printf("ntypes: %d\n",new->ntypes);
3615 
3616 
3617   if (new->version < 2) {
3618     new->nfields = 0;
3619   } else {
3620     if (FREAD_INT(&new->nfields,fp) < 1) {
3621       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3622       return;
3623     } else if (new->nfields < 0) {
3624       fprintf(stderr,"IIT file %s appears to have a negative number of fields\n",filename);
3625       return;
3626     } else if ((offset += sizeof(int)) > filesize) {
3627       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %zu, filesize %zu).  Did you generate it using iit_store?\n",
3628 	      filename,offset,filesize);
3629       return;
3630     }
3631   }
3632   printf("nfields: %d\n",new->nfields);
3633 
3634 
3635   if (new->version <= 2) {
3636     new->ndivs = 1;
3637 
3638     new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
3639     new->nintervals[0] = new->total_nintervals;
3640     new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
3641     new->cum_nintervals[0] = 0;
3642     new->cum_nintervals[1] = new->total_nintervals;
3643 
3644     new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
3645     if (FREAD_INT(&(new->nnodes[0]),fp) < 1) {
3646       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3647       return;
3648     } else if (new->nnodes[0] < 0) {
3649       fprintf(stderr,"IIT file %s appears to have a negative number of nodes\n",filename);
3650       return;
3651     } else if ((offset += sizeof(int)) > filesize) {
3652       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %zu, filesize %zu).  Did you generate it using iit_store?\n",
3653 	      filename,offset,filesize);
3654       return;
3655     }
3656     new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
3657     new->cum_nnodes[0] = 0;
3658     new->cum_nnodes[1] = new->nnodes[0];
3659 
3660     new->divsort = NO_SORT;
3661 
3662     new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
3663     new->divpointers[0] = 0;
3664     new->divpointers[1] = 1;
3665 
3666     new->divstrings = (char *) CALLOC(1,sizeof(char));
3667     new->divstrings[0] = '\0';
3668 
3669   } else {
3670 
3671     if (FREAD_INT(&new->ndivs,fp) < 1) {
3672       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3673       return;
3674     } else if (new->ndivs < 0) {
3675       fprintf(stderr,"IIT file %s appears to have a negative number of divs\n",filename);
3676       return;
3677     } else if ((offset += sizeof(int)) > filesize) {
3678       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %zu, filesize %zu).  Did you generate it using iit_store?\n",
3679 	      filename,offset,filesize);
3680       return;
3681     }
3682     printf("ndivs: %d\n",new->ndivs);
3683 
3684     new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
3685     offset += sizeof(int)*FREAD_INTS(new->nintervals,new->ndivs,fp);
3686     printf("nintervals:");
3687     for (i = 0; i < new->ndivs; i++) {
3688       printf(" %d",new->nintervals[i]);
3689     }
3690     printf("\n");
3691 
3692     new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
3693     offset += sizeof(int)*FREAD_INTS(new->cum_nintervals,new->ndivs+1,fp);
3694     printf("cum_nintervals:");
3695     for (i = 0; i <= new->ndivs; i++) {
3696       printf(" %d",new->cum_nintervals[i]);
3697     }
3698     printf("\n");
3699 
3700     new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
3701     offset += sizeof(int)*FREAD_INTS(new->nnodes,new->ndivs,fp);
3702     printf("nnodes:");
3703     for (i = 0; i < new->ndivs; i++) {
3704       printf(" %d",new->nnodes[i]);
3705     }
3706     printf("\n");
3707 
3708     new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
3709     offset += sizeof(int)*FREAD_INTS(new->cum_nnodes,new->ndivs+1,fp);
3710     printf("cum_nnodes:");
3711     for (i = 0; i <= new->ndivs; i++) {
3712       printf(" %d",new->cum_nnodes[i]);
3713     }
3714     printf("\n");
3715 
3716     if (FREAD_INT(&new->divsort,fp) < 1) {
3717       fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3718       return;
3719     } else if (new->divsort < 0) {
3720       fprintf(stderr,"IIT file %s appears to have a negative value for divsort\n",filename);
3721       return;
3722     } else if ((offset += sizeof(int)) > filesize) {
3723       fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %zu, filesize %zu).  Did you generate it using iit_store?\n",
3724 	      filename,offset,filesize);
3725       return;
3726     }
3727     printf("divsort: %d\n",new->divsort);
3728 
3729     new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
3730     offset += sizeof(int)*FREAD_UINTS(new->divpointers,new->ndivs+1,fp);
3731     printf("divpointers:");
3732     for (i = 0; i < new->ndivs+1; i++) {
3733       printf(" %u",new->divpointers[i]);
3734     }
3735     printf("\n");
3736 
3737     stringlen = new->divpointers[new->ndivs];
3738     if (stringlen == 0) {
3739       new->divstrings = (char *) NULL;
3740     } else {
3741       new->divstrings = (char *) CALLOC(stringlen,sizeof(char));
3742       offset += sizeof(char)*FREAD_CHARS(new->divstrings,stringlen,fp);
3743     }
3744     printf("divstrings:\n");
3745     for (s = 0; s < stringlen; s++) {
3746       if (new->divstrings[s] == '\0') {
3747 	printf("\n");
3748       } else {
3749 	printf("%c",new->divstrings[s]);
3750       }
3751     }
3752   }
3753 
3754   new->alphas = (int **) CALLOC(new->ndivs,sizeof(int *));
3755   new->betas = (int **) CALLOC(new->ndivs,sizeof(int *));
3756   new->sigmas = (int **) CALLOC(new->ndivs,sizeof(int *));
3757   new->omegas = (int **) CALLOC(new->ndivs,sizeof(int *));
3758   new->nodes = (struct FNode_T **) CALLOC(new->ndivs,sizeof(struct FNode_T *));
3759   new->intervals = (struct Interval_T **) CALLOC(new->ndivs,sizeof(struct Interval_T *));
3760 
3761   if (divread == READ_ALL) {
3762     /* Read all divs */
3763     debug(printf("Reading all divs\n"));
3764     for (divno = 0; divno < new->ndivs; divno++) {
3765       debug(printf("Div %d tree\n",divno));
3766       offset = read_tree(offset,filesize,fp,filename,new,divno);
3767     }
3768 
3769     debug(printf("Div 0 intervals\n"));
3770     new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3771     offset = read_intervals(offset,filesize,fp,filename,new,/*divno*/0);
3772     for (divno = 1; divno < new->ndivs; divno++) {
3773       debug(printf("Div %d intervals\n",divno));
3774       new->intervals[divno] = &(new->intervals[divno-1][new->nintervals[divno-1]]);
3775       offset = read_intervals(offset,filesize,fp,filename,new,divno);
3776     }
3777 
3778   } else if (divread == READ_NONE) {
3779     debug(printf("Reading no divs\n"));
3780     offset = skip_trees(offset,filesize,fp,filename,new->ndivs,
3781 			new->cum_nintervals[new->ndivs],new->cum_nnodes[new->ndivs]);
3782 
3783     new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3784     offset = read_intervals(offset,filesize,fp,filename,new,/*divno*/0);
3785     for (divno = 1; divno < new->ndivs; divno++) {
3786       new->intervals[divno] = &(new->intervals[divno-1][new->nintervals[divno-1]]);
3787       offset = read_intervals(offset,filesize,fp,filename,new,divno);
3788     }
3789 
3790   } else if (divread == READ_ONE) {
3791     debug(printf("Reading only div %s\n",divstring));
3792     if ((desired_divno = IIT_divint(new,divstring)) < 0) {
3793       fprintf(stderr,"Cannot find div %s in IIT_read.  Ignoring div.\n",divstring);
3794       desired_divno = 0;
3795     }
3796     offset = skip_trees(offset,filesize,fp,filename,desired_divno,
3797 			new->cum_nintervals[desired_divno],new->cum_nnodes[desired_divno]);
3798     debug1(fprintf(stderr,"Starting read of div\n"));
3799     offset = read_tree(offset,filesize,fp,filename,new,desired_divno);
3800     debug1(fprintf(stderr,"Ending read of div\n"));
3801     offset = skip_trees(offset,filesize,fp,filename,new->ndivs - (desired_divno + 1),
3802 			new->cum_nintervals[new->ndivs] - new->cum_nintervals[desired_divno+1],
3803 			new->cum_nnodes[new->ndivs] - new->cum_nnodes[desired_divno+1]);
3804 
3805     new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3806     offset = skip_intervals(&skip_nintervals,offset,filesize,fp,filename,new,0,desired_divno-1);
3807     debug1(fprintf(stderr,"Starting read of intervals\n"));
3808     new->intervals[desired_divno] = &(new->intervals[0][skip_nintervals]);
3809     offset = read_intervals(offset,filesize,fp,filename,new,desired_divno);
3810     debug1(fprintf(stderr,"Ending read of intervals\n"));
3811     offset = skip_intervals(&skip_nintervals,offset,filesize,fp,filename,new,desired_divno+1,new->ndivs-1);
3812 
3813     debug(
3814 	  /*
3815 	  printf("sigmas[%d]:\n",desired_divno);
3816 	  for (i = 0; i < new->nintervals[desired_divno]+1; i++) {
3817 	    interval = &(new->intervals[desired_divno][new->sigmas[desired_divno][i]]);
3818 	    printf("%d %u..%u\n",new->sigmas[desired_divno][i],Interval_low(interval),Interval_high(interval));
3819 	  }
3820 	  printf("\n");
3821 	  */
3822 
3823 	  printf("alphas[%d]:\n",desired_divno);
3824 	  for (i = 0; i < new->nintervals[desired_divno]+1; i++) {
3825 	    interval = &(new->intervals[desired_divno][new->alphas[desired_divno][i]]);
3826 	    printf("%d %u..%u\n",new->alphas[desired_divno][i],Interval_low(interval),Interval_high(interval));
3827 	  }
3828 	  printf("\n");
3829 	  );
3830 
3831 
3832   } else {
3833     abort();
3834   }
3835 
3836   read_words_debug(offset,filesize,fp,new);
3837   fclose(fp);
3838 
3839 #ifndef HAVE_MMAP
3840   debug1(printf("No mmap available.  Reading annotations\n"));
3841   new->access = FILEIO;
3842   new->fd = Access_fileio(filename);
3843   read_annotations(new);
3844   close(new->fd);
3845   /* pthread_mutex_init(&new->read_mutex,NULL); */
3846 #else
3847   debug1(printf("mmap available.  Setting up pointers to annotations\n"));
3848   new->access = MMAPPED;
3849   if (mmap_annotations(filename,new,/*readonlyp*/true) == false) {
3850     debug1(printf("  Failed.  Reading annotations\n"));
3851     new->access = FILEIO;
3852     new->fd = Access_fileio(filename);
3853     read_annotations(new);
3854     close(new->fd);
3855     /* pthread_mutex_init(&new->read_mutex,NULL); */
3856   }
3857 #endif
3858 
3859   IIT_free(&new);
3860 
3861   if (newfile != NULL) {
3862     FREE(newfile);
3863   }
3864 
3865   return;
3866 }
3867 
3868 
3869 /************************************************************************/
3870 
3871 static void
fnode_query_aux(int * min,int * max,T this,int divno,int nodeindex,Chrpos_T x)3872 fnode_query_aux (int *min, int *max, T this, int divno, int nodeindex, Chrpos_T x) {
3873   int lambda;
3874   FNode_T node;
3875 
3876   if (nodeindex == -1) {
3877     return;
3878   }
3879 
3880   node = &(this->nodes[divno][nodeindex]);
3881   if (x == node->value) {
3882     debug(printf("%uD:\n",node->value));
3883     if (node->a < *min) {
3884       *min = node->a;
3885     }
3886     if (node->b > *max) {
3887       *max = node->b;
3888     }
3889     return;
3890   } else if (x < node->value) {
3891     fnode_query_aux(&(*min),&(*max),this,divno,node->leftindex,x);
3892     debug(printf("%uL:\n",node->value));
3893     if (node->a < *min) {
3894       *min = node->a;
3895     }
3896     for (lambda = node->a; lambda <= node->b; lambda++) {
3897       debug(printf("Looking at lambda %d, segment %d\n",
3898 		   lambda,this->sigmas[divno][lambda]));
3899       if (Interval_is_contained(x,this->intervals[divno],this->sigmas[divno][lambda]) == true) {
3900 	if (lambda > *max) {
3901 	  *max = lambda;
3902 	}
3903       } else {
3904 	return;
3905       }
3906     }
3907     return;
3908   } else {
3909     /* (node->value < x) */
3910     fnode_query_aux(&(*min),&(*max),this,divno,node->rightindex,x);
3911     debug(printf("%uR:\n", node->value));
3912     if (node->b > *max) {
3913       *max = node->b;
3914     }
3915     for (lambda = node->b; lambda >= node->a; lambda--) {
3916       debug(printf("Looking at lambda %d, segment %d\n",
3917 		   lambda,this->omegas[divno][lambda]));
3918       if (Interval_is_contained(x,this->intervals[divno],this->omegas[divno][lambda]) == true) {
3919 	if (lambda < *min) {
3920 	  *min = lambda;
3921 	}
3922       } else {
3923 	return;
3924       }
3925     }
3926     return;
3927   }
3928 }
3929 
3930 /************************************************************************/
3931 
3932 int *
IIT_find(int * nmatches,T this,char * label)3933 IIT_find (int *nmatches, T this, char *label) {
3934   int *matches = NULL, j;
3935   int low, middle, high, recno;
3936   bool foundp = false;
3937   int cmp;
3938 
3939   low = 0;
3940   high = this->total_nintervals;
3941   *nmatches = 0;
3942 
3943   while (!foundp && low < high) {
3944     middle = (low+high)/2;
3945 
3946 #ifdef WORDS_BIGENDIAN
3947 #ifdef HAVE_64_BIT
3948     if (this->label_pointers_8p == true) {
3949       cmp = strcmp(label,&(this->labels[Bigendian_convert_uint8(this->labelpointers8[Bigendian_convert_int(this->labelorder[middle])])]));
3950     } else {
3951       cmp = strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[middle])])]));
3952     }
3953 #else
3954     cmp = strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[middle])])]));
3955 #endif
3956 #else
3957 #ifdef HAVE_64_BIT
3958     if (this->label_pointers_8p == true) {
3959       cmp = strcmp(label,&(this->labels[this->labelpointers8[this->labelorder[middle]]]));
3960     } else {
3961       cmp = strcmp(label,&(this->labels[this->labelpointers[this->labelorder[middle]]]));
3962     }
3963 #else
3964     cmp = strcmp(label,&(this->labels[this->labelpointers[this->labelorder[middle]]]));
3965 #endif
3966 #endif
3967 
3968     if (cmp < 0) {
3969       high = middle;
3970     } else if (cmp > 0) {
3971       low = middle + 1;
3972     } else {
3973       foundp = true;
3974     }
3975   }
3976 
3977   if (foundp == true) {
3978     low = middle;
3979 #ifdef WORDS_BIGENDIAN
3980 #ifdef HAVE_64_BIT
3981     if (this->label_pointers_8p == true) {
3982       while (low-1 >= 0 &&
3983 	     !strcmp(label,&(this->labels[Bigendian_convert_uint8(this->labelpointers8[Bigendian_convert_int(this->labelorder[low-1])])]))) {
3984 	low--;
3985       }
3986     } else {
3987       while (low-1 >= 0 &&
3988 	     !strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[low-1])])]))) {
3989 	low--;
3990       }
3991     }
3992 #else
3993     while (low-1 >= 0 &&
3994 	   !strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[low-1])])]))) {
3995       low--;
3996     }
3997 #endif
3998 #else
3999 #ifdef HAVE_64_BIT
4000     if (this->label_pointers_8p == true) {
4001       while (low-1 >= 0 &&
4002 	     !strcmp(label,&(this->labels[this->labelpointers8[this->labelorder[low-1]]]))) {
4003 	low--;
4004       }
4005     } else {
4006       while (low-1 >= 0 &&
4007 	     !strcmp(label,&(this->labels[this->labelpointers[this->labelorder[low-1]]]))) {
4008 	low--;
4009       }
4010     }
4011 #else
4012     while (low-1 >= 0 &&
4013 	   !strcmp(label,&(this->labels[this->labelpointers[this->labelorder[low-1]]]))) {
4014       low--;
4015     }
4016 #endif
4017 #endif
4018 
4019     high = middle;
4020 #ifdef WORDS_BIGENDIAN
4021 #ifdef HAVE_64_BIT
4022     if (this->label_pointers_8p == true) {
4023       while (high+1 < this->total_nintervals &&
4024 	     !strcmp(label,&(this->labels[Bigendian_convert_uint8(this->labelpointers8[Bigendian_convert_int(this->labelorder[high+1])])]))) {
4025 	high++;
4026       }
4027     } else {
4028       while (high+1 < this->total_nintervals &&
4029 	     !strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[high+1])])]))) {
4030 	high++;
4031       }
4032     }
4033 #else
4034     while (high+1 < this->total_nintervals &&
4035 	   !strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[high+1])])]))) {
4036       high++;
4037     }
4038 #endif
4039 #else
4040 #ifdef HAVE_64_BIT
4041     if (this->label_pointers_8p == true) {
4042       while (high+1 < this->total_nintervals &&
4043 	     !strcmp(label,&(this->labels[this->labelpointers8[this->labelorder[high+1]]]))) {
4044 	high++;
4045       }
4046     } else {
4047       while (high+1 < this->total_nintervals &&
4048 	     !strcmp(label,&(this->labels[this->labelpointers[this->labelorder[high+1]]]))) {
4049 	high++;
4050       }
4051     }
4052 #else
4053     while (high+1 < this->total_nintervals &&
4054 	   !strcmp(label,&(this->labels[this->labelpointers[this->labelorder[high+1]]]))) {
4055       high++;
4056     }
4057 #endif
4058 #endif
4059 
4060 
4061     *nmatches = high - low + 1;
4062     if (*nmatches > 0) {
4063       matches = (int *) CALLOC(*nmatches,sizeof(int));
4064       j = 0;
4065       for (recno = low; recno <= high; recno++) {
4066 #ifdef WORDS_BIGENDIAN
4067 #if 0
4068 	printf("Pushing %d:%d\n",recno,Bigendian_convert_int(this->labelorder[recno]));
4069 #endif
4070 	matches[j++] = Bigendian_convert_int(this->labelorder[recno])+1;
4071 
4072 #else
4073 #if 0
4074 	printf("Pushing %d:%d\n",recno,this->labelorder[recno]);
4075 #endif
4076 	matches[j++] = this->labelorder[recno]+1;
4077 #endif
4078       }
4079     }
4080   }
4081 
4082   return matches;
4083 }
4084 
4085 /* Slow.  Used before binary search method above. */
4086 int
IIT_find_linear(T this,char * label)4087 IIT_find_linear (T this, char *label) {
4088   int i;
4089   char *p;
4090 
4091   for (i = 0; i < this->total_nintervals; i++) {
4092 #ifdef WORDS_BIGENDIAN
4093 #ifdef HAVE_64_BIT
4094     if (this->label_pointers_8p == true) {
4095       p = &(this->labels[Bigendian_convert_uint8(this->labelpointers8[i])]);
4096     } else {
4097       p = &(this->labels[Bigendian_convert_uint(this->labelpointers[i])]);
4098     }
4099 #else
4100     p = &(this->labels[Bigendian_convert_uint(this->labelpointers[i])]);
4101 #endif
4102 #else
4103 #ifdef HAVE_64_BIT
4104     if (this->label_pointers_8p == true) {
4105       p = &(this->labels[this->labelpointers8[i]]);
4106     } else {
4107       p = &(this->labels[this->labelpointers[i]]);
4108     }
4109 #else
4110     p = &(this->labels[this->labelpointers[i]]);
4111 #endif
4112 #endif
4113     while (isspace((int) *p)) {
4114       p++;
4115     }
4116     if (!strcmp(label,p)) {
4117       return i + 1;
4118     }
4119   }
4120 
4121   return -1;
4122 }
4123 
4124 int
IIT_find_one(T this,char * label)4125 IIT_find_one (T this, char *label) {
4126   int index;
4127   int *matches, nmatches;
4128 
4129   matches = IIT_find(&nmatches,this,label);
4130   if (nmatches == 0) {
4131     /*
4132     fprintf(stderr,"Expected one match for %s, but got 0\n",
4133 	    label);
4134     */
4135     index = -1;
4136   } else {
4137     if (nmatches > 1) {
4138       fprintf(stderr,"Expected one match for %s, but got %d\n",
4139 	      label,nmatches);
4140     }
4141     index = matches[0];
4142     FREE(matches);
4143   }
4144 
4145   return index;
4146 }
4147 
4148 
4149 /************************************************************************/
4150 
4151 
4152 static int
int_compare(const void * a,const void * b)4153 int_compare (const void *a, const void *b) {
4154   int x = * (int *) a;
4155   int y = * (int *) b;
4156 
4157   if (x < y) {
4158     return -1;
4159   } else if (y < x) {
4160     return +1;
4161   } else {
4162     return 0;
4163   }
4164 }
4165 
4166 
4167 static int
uint_compare_ascending(const void * a,const void * b)4168 uint_compare_ascending (const void *a, const void *b) {
4169   unsigned int x = * (unsigned int *) a;
4170   unsigned int y = * (unsigned int *) b;
4171 
4172   if (x < y) {
4173     return -1;
4174   } else if (y < x) {
4175     return +1;
4176   } else {
4177     return 0;
4178   }
4179 }
4180 
4181 
4182 static int
uint_compare_descending(const void * a,const void * b)4183 uint_compare_descending (const void *a, const void *b) {
4184   unsigned int x = * (unsigned int *) a;
4185   unsigned int y = * (unsigned int *) b;
4186 
4187   if (x > y) {
4188     return -1;
4189   } else if (y > x) {
4190     return +1;
4191   } else {
4192     return 0;
4193   }
4194 }
4195 
4196 
4197 Chrpos_T *
IIT_get_highs_for_low(int * nuniq,T this,int divno,Chrpos_T x)4198 IIT_get_highs_for_low (int *nuniq, T this, int divno, Chrpos_T x) {
4199   Chrpos_T *uniq = NULL, *coords = NULL, prev;
4200   int neval, ncoords, i;
4201   int match, lambda, min1, max1 = 0;
4202   struct Interval_T interval;
4203 
4204   if (divno < 0) {
4205     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4206     *nuniq = 0;
4207     return (Chrpos_T *) NULL;
4208   }
4209   min1 = this->nintervals[divno] + 1;
4210 
4211   debug(printf("Entering IIT_get_highs_for_low with divno %d and query %u\n",divno,x));
4212   fnode_query_aux(&min1,&max1,this,divno,0,x);
4213   debug(printf("min1=%d max1=%d\n",min1,max1));
4214 
4215   if (max1 < min1) {
4216     *nuniq = 0;
4217     return (Chrpos_T *) NULL;
4218   } else {
4219     neval = (max1 - min1 + 1) + (max1 - min1 + 1);
4220     coords = (Chrpos_T *) CALLOC(neval,sizeof(Chrpos_T));
4221     ncoords = 0;
4222 
4223     for (lambda = min1; lambda <= max1; lambda++) {
4224       match = this->sigmas[divno][lambda];
4225       /* Have to subtract 1 because intervals array is zero-based */
4226       interval = this->intervals[divno][match - 1];
4227       if (interval.low == x) {
4228 	coords[ncoords++] = interval.high;
4229       }
4230 
4231       match = this->omegas[divno][lambda];
4232       /* Have to subtract 1 because intervals array is zero-based */
4233       interval = this->intervals[divno][match - 1];
4234       if (interval.low == x) {
4235 	coords[ncoords++] = interval.high;
4236       }
4237     }
4238 
4239     if (ncoords == 0) {
4240       *nuniq = 0;
4241       FREE(coords);
4242       return (Chrpos_T *) NULL;
4243 
4244     } else {
4245       /* Eliminate duplicates */
4246       qsort(coords,ncoords,sizeof(Chrpos_T),uint_compare_ascending);
4247 
4248       uniq = (Chrpos_T *) CALLOC(ncoords,sizeof(Chrpos_T));
4249       *nuniq = 0;
4250       prev = 0;
4251       for (i = 0; i < ncoords; i++) {
4252 	if (coords[i] != prev) {
4253 	  uniq[(*nuniq)++] = coords[i];
4254 	  prev = coords[i];
4255 	}
4256       }
4257 
4258       FREE(coords);
4259       return uniq;
4260     }
4261   }
4262 }
4263 
4264 
4265 Chrpos_T *
IIT_get_lows_for_high(int * nuniq,T this,int divno,Chrpos_T x)4266 IIT_get_lows_for_high (int *nuniq, T this, int divno, Chrpos_T x) {
4267   Chrpos_T *uniq = NULL, *coords = NULL, prev;
4268   int neval, ncoords, i;
4269   int match, lambda, min1, max1 = 0;
4270   struct Interval_T interval;
4271 
4272   if (divno < 0) {
4273     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4274     *nuniq = 0;
4275     return (Chrpos_T *) NULL;
4276   }
4277   min1 = this->nintervals[divno] + 1;
4278 
4279   debug(printf("Entering IIT_get_lows_for_high with divno %d and query %u\n",divno,x));
4280   fnode_query_aux(&min1,&max1,this,divno,0,x);
4281   debug(printf("min1=%d max1=%d\n",min1,max1));
4282 
4283   if (max1 < min1) {
4284     *nuniq = 0;
4285     return (Chrpos_T *) NULL;
4286   } else {
4287     neval = (max1 - min1 + 1) + (max1 - min1 + 1);
4288     coords = (Chrpos_T *) CALLOC(neval,sizeof(Chrpos_T));
4289     ncoords = 0;
4290 
4291     for (lambda = min1; lambda <= max1; lambda++) {
4292       match = this->sigmas[divno][lambda];
4293       /* Have to subtract 1 because intervals array is zero-based */
4294       interval = this->intervals[divno][match - 1];
4295       if (interval.high == x) {
4296 	coords[ncoords++] = interval.low;
4297       }
4298 
4299       match = this->omegas[divno][lambda];
4300       /* Have to subtract 1 because intervals array is zero-based */
4301       interval = this->intervals[divno][match - 1];
4302       if (interval.high == x) {
4303 	coords[ncoords++] = interval.low;
4304       }
4305     }
4306 
4307     if (ncoords == 0) {
4308       *nuniq = 0;
4309       FREE(coords);
4310       return (Chrpos_T *) NULL;
4311 
4312     } else {
4313       /* Eliminate duplicates */
4314       qsort(coords,ncoords,sizeof(Chrpos_T),uint_compare_descending);
4315 
4316       uniq = (Chrpos_T *) CALLOC(ncoords,sizeof(Chrpos_T));
4317       *nuniq = 0;
4318       prev = 0;
4319       for (i = 0; i < ncoords; i++) {
4320 	if (coords[i] != prev) {
4321 	  uniq[(*nuniq)++] = coords[i];
4322 	  prev = coords[i];
4323 	}
4324       }
4325 
4326       FREE(coords);
4327       return uniq;
4328     }
4329   }
4330 }
4331 
4332 
4333 bool
IIT_low_exists_signed_p(T this,int divno,Chrpos_T x,int sign)4334 IIT_low_exists_signed_p (T this, int divno, Chrpos_T x, int sign) {
4335   int match, lambda, min1, max1 = 0;
4336   struct Interval_T interval;
4337 
4338   if (divno < 0) {
4339     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4340     return false;
4341   }
4342   min1 = this->nintervals[divno] + 1;
4343 
4344   debug(printf("Entering IIT_get_highs_for_low with divno %d and query %u\n",divno,x));
4345   fnode_query_aux(&min1,&max1,this,divno,0,x);
4346   debug(printf("min1=%d max1=%d\n",min1,max1));
4347 
4348   if (max1 < min1) {
4349     return false;
4350   } else {
4351     for (lambda = min1; lambda <= max1; lambda++) {
4352       match = this->sigmas[divno][lambda];
4353       /* Have to subtract 1 because intervals array is zero-based */
4354       interval = this->intervals[divno][match - 1];
4355       if (interval.low == x && (sign == 0 || interval.sign == sign)) {
4356 	return true;
4357       }
4358 
4359       match = this->omegas[divno][lambda];
4360       /* Have to subtract 1 because intervals array is zero-based */
4361       interval = this->intervals[divno][match - 1];
4362       if (interval.low == x && (sign == 0 || interval.sign == sign)) {
4363 	return true;
4364       }
4365     }
4366 
4367     return false;
4368   }
4369 }
4370 
4371 bool
IIT_high_exists_signed_p(T this,int divno,Chrpos_T x,int sign)4372 IIT_high_exists_signed_p (T this, int divno, Chrpos_T x, int sign) {
4373   int match, lambda, min1, max1 = 0;
4374   struct Interval_T interval;
4375 
4376   if (divno < 0) {
4377     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4378     return false;
4379   }
4380   min1 = this->nintervals[divno] + 1;
4381 
4382   debug(printf("Entering IIT_get_lows_for_high with divno %d and query %u\n",divno,x));
4383   fnode_query_aux(&min1,&max1,this,divno,0,x);
4384   debug(printf("min1=%d max1=%d\n",min1,max1));
4385 
4386   if (max1 < min1) {
4387     return false;
4388   } else {
4389     for (lambda = min1; lambda <= max1; lambda++) {
4390       match = this->sigmas[divno][lambda];
4391       /* Have to subtract 1 because intervals array is zero-based */
4392       interval = this->intervals[divno][match - 1];
4393       if (interval.high == x && (sign == 0 || interval.sign == sign)) {
4394 	return true;
4395       }
4396 
4397       match = this->omegas[divno][lambda];
4398       /* Have to subtract 1 because intervals array is zero-based */
4399       interval = this->intervals[divno][match - 1];
4400       if (interval.high == x && (sign == 0 || interval.sign == sign)) {
4401 	return true;
4402       }
4403     }
4404 
4405     return false;
4406   }
4407 }
4408 
4409 
4410 int *
IIT_get_lows_signed(int * nmatches,T this,int divno,Chrpos_T x,Chrpos_T y,int sign)4411 IIT_get_lows_signed (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y, int sign) {
4412   int *uniq = NULL, *matches, matchstart, neval, nfound, i;
4413   int match, lambda, prev;
4414   int min1, max1 = 0, min2, max2 = 0;
4415   struct Interval_T interval;
4416 
4417   if (divno < 0) {
4418     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4419     *nmatches = 0;
4420     return (int *) NULL;
4421   } else {
4422     min1 = min2 = this->nintervals[divno] + 1;
4423   }
4424 
4425   debug(printf("Entering IIT_low_signed_p with divno %d and query %u..%u\n",divno,x,y));
4426   fnode_query_aux(&min1,&max1,this,divno,0,x);
4427   fnode_query_aux(&min2,&max2,this,divno,0,y);
4428   debug(printf("min1=%d max1=%d  min2=%d max2=%d\n",min1,max1,min2,max2));
4429 
4430   *nmatches = 0;
4431   if (max2 >= min1) {
4432     neval = (max2 - min1 + 1) + (max2 - min1 + 1);
4433     matches = (int *) CALLOC(neval,sizeof(int));
4434 
4435     nfound = 0;
4436     for (lambda = min1; lambda <= max2; lambda++) {
4437       match = this->sigmas[divno][lambda];
4438       /* Have to subtract 1 because intervals array is zero-based */
4439       interval = this->intervals[divno][match - 1];
4440       if (interval.low >= x && interval.low <= y && (sign == 0 || interval.sign == sign)) {
4441 	matches[nfound++] = match;
4442       }
4443 
4444       match = this->omegas[divno][lambda];
4445       /* Have to subtract 1 because intervals array is zero-based */
4446       interval = this->intervals[divno][match - 1];
4447       if (interval.low >= x && interval.low <= y && (sign == 0 || interval.sign == sign)) {
4448 	matches[nfound++] = match;
4449       }
4450     }
4451 
4452     if (nfound == 0) {
4453       FREE(matches);
4454       return (int *) NULL;
4455     } else {
4456       /* Eliminate duplicates */
4457       uniq = (int *) CALLOC(nfound,sizeof(int));
4458       qsort(matches,nfound,sizeof(int),int_compare);
4459       prev = 0;
4460       debug(printf("unique segments in lambda %d to %d:",min1,max2));
4461       for (i = 0; i < nfound; i++) {
4462 	if (matches[i] != prev) {
4463 	  debug(printf(" %d",matches[i]));
4464 	  uniq[(*nmatches)++] = matches[i];
4465 	  prev = matches[i];
4466 	}
4467       }
4468       debug(printf("\n"));
4469       FREE(matches);
4470 
4471       /* No need to check for interval overlap */
4472     }
4473   }
4474 
4475   matchstart = this->cum_nintervals[divno];
4476   for (i = 0; i < *nmatches; i++) {
4477     uniq[i] += matchstart;
4478   }
4479 
4480   return uniq;
4481 }
4482 
4483 
4484 int *
IIT_get_highs_signed(int * nmatches,T this,int divno,Chrpos_T x,Chrpos_T y,int sign)4485 IIT_get_highs_signed (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y, int sign) {
4486   int *uniq = NULL, *matches, matchstart, neval, nfound, i;
4487   int match, lambda, prev;
4488   int min1, max1 = 0, min2, max2 = 0;
4489   struct Interval_T interval;
4490 
4491   if (divno < 0) {
4492     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4493     *nmatches = 0;
4494     return (int *) NULL;
4495   } else {
4496     min1 = min2 = this->nintervals[divno] + 1;
4497   }
4498 
4499   debug(printf("Entering IIT_low_signed_p with divno %d and query %u..%u\n",divno,x,y));
4500   fnode_query_aux(&min1,&max1,this,divno,0,x);
4501   fnode_query_aux(&min2,&max2,this,divno,0,y);
4502   debug(printf("min1=%d max1=%d  min2=%d max2=%d\n",min1,max1,min2,max2));
4503 
4504   *nmatches = 0;
4505   if (max2 >= min1) {
4506     neval = (max2 - min1 + 1) + (max2 - min1 + 1);
4507     matches = (int *) CALLOC(neval,sizeof(int));
4508 
4509     nfound = 0;
4510     for (lambda = min1; lambda <= max2; lambda++) {
4511       match = this->sigmas[divno][lambda];
4512       /* Have to subtract 1 because intervals array is zero-based */
4513       interval = this->intervals[divno][match - 1];
4514       if (interval.high >= x && interval.high <= y && (sign == 0 || interval.sign == sign)) {
4515 	matches[nfound++] = match;
4516       }
4517 
4518       match = this->omegas[divno][lambda];
4519       /* Have to subtract 1 because intervals array is zero-based */
4520       interval = this->intervals[divno][match - 1];
4521       if (interval.high >= x && interval.high <= y && (sign == 0 || interval.sign == sign)) {
4522 	matches[nfound++] = match;
4523       }
4524     }
4525 
4526     if (nfound == 0) {
4527       FREE(matches);
4528       return (int *) NULL;
4529     } else {
4530       /* Eliminate duplicates */
4531       uniq = (int *) CALLOC(nfound,sizeof(int));
4532       qsort(matches,nfound,sizeof(int),int_compare);
4533       prev = 0;
4534       debug(printf("unique segments in lambda %d to %d:",min1,max2));
4535       for (i = 0; i < nfound; i++) {
4536 	if (matches[i] != prev) {
4537 	  debug(printf(" %d",matches[i]));
4538 	  uniq[(*nmatches)++] = matches[i];
4539 	  prev = matches[i];
4540 	}
4541       }
4542       debug(printf("\n"));
4543       FREE(matches);
4544 
4545       /* No need to check for interval overlap */
4546     }
4547   }
4548 
4549   matchstart = this->cum_nintervals[divno];
4550   for (i = 0; i < *nmatches; i++) {
4551     uniq[i] += matchstart;
4552   }
4553 
4554   return uniq;
4555 }
4556 
4557 
4558 
4559 int *
IIT_get(int * nmatches,T this,char * divstring,Chrpos_T x,Chrpos_T y,bool sortp)4560 IIT_get (int *nmatches, T this, char *divstring, Chrpos_T x, Chrpos_T y, bool sortp) {
4561   int *sorted, *matches = NULL, matchstart, *uniq, neval, nuniq, i;
4562   int lambda, prev;
4563   int divno;
4564   int min1, max1 = 0, min2, max2 = 0;
4565   int nintervals;
4566 
4567   divno = IIT_divint(this,divstring);
4568 
4569 #if 1
4570   /* Usually don't need to check, unless crossing between iits,
4571      because divstring comes from same iit */
4572   if (divno < 0) {
4573     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4574     *nmatches = 0;
4575     return (int *) NULL;
4576   }
4577 #endif
4578 
4579   if ((nintervals = this->nintervals[divno]) == 0) {
4580     *nmatches = 0;
4581     return (int *) NULL;
4582   } else {
4583     min1 = min2 = nintervals + 1;
4584   }
4585 
4586   debug(printf("Entering IIT_get with query %u %u\n",x,y));
4587   fnode_query_aux(&min1,&max1,this,divno,0,x);
4588   fnode_query_aux(&min2,&max2,this,divno,0,y);
4589   debug(printf("min1=%d max1=%d  min2=%d max2=%d\n",min1,max1,min2,max2));
4590 
4591   *nmatches = 0;
4592   if (max2 >= min1) {
4593     neval = (max2 - min1 + 1) + (max2 - min1 + 1);
4594     matches = (int *) CALLOC(neval,sizeof(int));
4595     uniq = (int *) CALLOC(neval,sizeof(int));
4596 
4597     i = 0;
4598     for (lambda = min1; lambda <= max2; lambda++) {
4599       matches[i++] = this->sigmas[divno][lambda];
4600       matches[i++] = this->omegas[divno][lambda];
4601     }
4602 
4603     /* Eliminate duplicates */
4604     qsort(matches,neval,sizeof(int),int_compare);
4605     nuniq = 0;
4606     prev = 0;
4607     debug(printf("unique segments in lambda %d to %d:",min1,max2));
4608     for (i = 0; i < neval; i++) {
4609       if (matches[i] != prev) {
4610 	debug(printf(" %d",matches[i]));
4611 	uniq[nuniq++] = matches[i];
4612 	prev = matches[i];
4613       }
4614     }
4615     debug(printf("\n"));
4616 
4617     for (i = 0; i < nuniq; i++) {
4618       if (Interval_overlap_p(x,y,this->intervals[divno],uniq[i]) == true) {
4619 	matches[(*nmatches)++] = uniq[i];
4620 	debug(printf("Pushing overlapping segment %d (%u..%u)\n",uniq[i],
4621 		     Interval_low(&(this->intervals[divno][uniq[i]-1])),
4622 		     Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4623       } else {
4624 	debug(printf("Not pushing non-overlapping segment %d (%u..%u)\n",uniq[i],
4625 		     Interval_low(&(this->intervals[divno][uniq[i]-1])),
4626 		     Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4627       }
4628     }
4629 
4630     FREE(uniq);
4631   }
4632 
4633   /* Convert to universal indices */
4634   matchstart = this->cum_nintervals[divno];
4635   for (i = 0; i < *nmatches; i++) {
4636     matches[i] += matchstart;
4637   }
4638 
4639   if (sortp == false) {
4640     return matches;
4641 #if 0
4642   } else if (this->version <= 2) {
4643     sorted = sort_matches_by_type(this,matches,*nmatches,/*alphabetizep*/true);
4644     FREE(matches);
4645     return sorted;
4646 #endif
4647   } else {
4648     sorted = sort_matches_by_position(this,matches,*nmatches);
4649     FREE(matches);
4650     return sorted;
4651   }
4652 }
4653 
4654 
4655 int *
IIT_get_signed(int * nmatches,T this,char * divstring,Chrpos_T x,Chrpos_T y,int sign,bool sortp)4656 IIT_get_signed (int *nmatches, T this, char *divstring, Chrpos_T x, Chrpos_T y, int sign, bool sortp) {
4657   int *sorted, *matches = NULL, matchstart, *uniq, neval, nuniq, i;
4658   int lambda, prev;
4659   int divno;
4660   int min1, max1 = 0, min2, max2 = 0;
4661   int nintervals;
4662   int index;
4663 
4664   divno = IIT_divint(this,divstring);
4665 
4666 #if 1
4667   /* Usually don't need to check, unless crossing between iits,
4668      because divstring comes from same iit */
4669   if (divno < 0) {
4670     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4671     *nmatches = 0;
4672     return (int *) NULL;
4673   }
4674 #endif
4675 
4676   if ((nintervals = this->nintervals[divno]) == 0) {
4677     *nmatches = 0;
4678     return (int *) NULL;
4679   } else {
4680     min1 = min2 = nintervals + 1;
4681   }
4682 
4683   debug(printf("Entering IIT_get with query %u %u\n",x,y));
4684   fnode_query_aux(&min1,&max1,this,divno,0,x);
4685   fnode_query_aux(&min2,&max2,this,divno,0,y);
4686   debug(printf("min1=%d max1=%d  min2=%d max2=%d\n",min1,max1,min2,max2));
4687 
4688   *nmatches = 0;
4689   if (max2 >= min1) {
4690     neval = (max2 - min1 + 1) + (max2 - min1 + 1);
4691     matches = (int *) CALLOC(neval,sizeof(int));
4692     uniq = (int *) CALLOC(neval,sizeof(int));
4693 
4694     i = 0;
4695     for (lambda = min1; lambda <= max2; lambda++) {
4696       index = this->sigmas[divno][lambda];
4697       if (sign == 0 || Interval_sign(&(this->intervals[divno][index-1])) == sign) {
4698 	matches[i++] = index;
4699       }
4700       index = this->omegas[divno][lambda];
4701       if (sign == 0 || Interval_sign(&(this->intervals[divno][index-1])) == sign) {
4702 	matches[i++] = index;
4703       }
4704     }
4705 
4706     /* Eliminate duplicates */
4707     qsort(matches,neval,sizeof(int),int_compare);
4708     nuniq = 0;
4709     prev = 0;
4710     debug(printf("unique segments in lambda %d to %d:",min1,max2));
4711     for (i = 0; i < neval; i++) {
4712       if (matches[i] != prev) {
4713 	debug(printf(" %d",matches[i]));
4714 	uniq[nuniq++] = matches[i];
4715 	prev = matches[i];
4716       }
4717     }
4718     debug(printf("\n"));
4719 
4720     for (i = 0; i < nuniq; i++) {
4721       if (Interval_overlap_p(x,y,this->intervals[divno],uniq[i]) == true) {
4722 	matches[(*nmatches)++] = uniq[i];
4723 	debug(printf("Pushing overlapping segment %d (%u..%u)\n",uniq[i],
4724 		     Interval_low(&(this->intervals[divno][uniq[i]-1])),
4725 		     Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4726       } else {
4727 	debug(printf("Not pushing non-overlapping segment %d (%u..%u)\n",uniq[i],
4728 		     Interval_low(&(this->intervals[divno][uniq[i]-1])),
4729 		     Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4730       }
4731     }
4732 
4733     FREE(uniq);
4734   }
4735 
4736   /* Convert to universal indices */
4737   matchstart = this->cum_nintervals[divno];
4738   for (i = 0; i < *nmatches; i++) {
4739     matches[i] += matchstart;
4740   }
4741 
4742   if (sortp == false) {
4743     return matches;
4744 #if 0
4745   } else if (this->version <= 2) {
4746     sorted = sort_matches_by_type(this,matches,*nmatches,/*alphabetizep*/true);
4747     FREE(matches);
4748     return sorted;
4749 #endif
4750   } else {
4751     sorted = sort_matches_by_position(this,matches,*nmatches);
4752     FREE(matches);
4753     return sorted;
4754   }
4755 }
4756 
4757 
4758 bool
IIT_exists_with_divno(T this,int divno,Chrpos_T x,Chrpos_T y)4759 IIT_exists_with_divno (T this, int divno, Chrpos_T x, Chrpos_T y) {
4760   int match;
4761   int lambda;
4762   int min1, max1 = 0, min2, max2 = 0;
4763 
4764   if (divno < 0) {
4765     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4766     return false;
4767   }
4768   min1 = min2 = this->nintervals[divno] + 1;
4769 
4770   debug(printf("Entering IIT_get_with_divno with divno %d and query %u %u\n",divno,x,y));
4771   fnode_query_aux(&min1,&max1,this,divno,0,x);
4772   fnode_query_aux(&min2,&max2,this,divno,0,y);
4773   debug(printf("min1=%d max1=%d  min2=%d max2=%d\n",min1,max1,min2,max2));
4774 
4775   for (lambda = min1; lambda <= max2; lambda++) {
4776     match = this->sigmas[divno][lambda];
4777     if (Interval_overlap_p(x,y,this->intervals[divno],match) == true) {
4778       return true;
4779     }
4780     match = this->omegas[divno][lambda];
4781     if (Interval_overlap_p(x,y,this->intervals[divno],match) == true) {
4782       return true;
4783     }
4784   }
4785 
4786   return false;
4787 }
4788 
4789 
4790 bool
IIT_exists_with_divno_signed(T this,int divno,Chrpos_T x,Chrpos_T y,int sign)4791 IIT_exists_with_divno_signed (T this, int divno, Chrpos_T x, Chrpos_T y, int sign) {
4792   int match;
4793   int lambda;
4794   int min1, max1 = 0, min2, max2 = 0;
4795   Interval_T interval;
4796 
4797   if (divno < 0) {
4798     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4799     return false;
4800   }
4801   min1 = min2 = this->nintervals[divno] + 1;
4802 
4803   debug(printf("Entering IIT_exists_with_divno_signed with divno %d and query %u %u\n",divno,x,y));
4804   fnode_query_aux(&min1,&max1,this,divno,0,x);
4805   fnode_query_aux(&min2,&max2,this,divno,0,y);
4806   debug(printf("min1=%d max1=%d  min2=%d max2=%d\n",min1,max1,min2,max2));
4807 
4808   for (lambda = min1; lambda <= max2; lambda++) {
4809     match = this->sigmas[divno][lambda];
4810     interval = &(this->intervals[divno][match - 1]);
4811     if (Interval_low(interval) == x && Interval_high(interval) == y &&
4812 	(sign == 0 || Interval_sign(interval) == sign)) {
4813       return true;
4814     }
4815 
4816     match = this->omegas[divno][lambda];
4817     interval = &(this->intervals[divno][match - 1]);
4818     if (Interval_low(interval) == x && Interval_high(interval) == y &&
4819 	(sign == 0 || Interval_sign(interval) == sign)) {
4820       return true;
4821     }
4822   }
4823 
4824   return false;
4825 }
4826 
4827 
4828 bool
IIT_exists_with_divno_typed_signed(T this,int divno,Chrpos_T x,Chrpos_T y,int type,int sign)4829 IIT_exists_with_divno_typed_signed (T this, int divno, Chrpos_T x, Chrpos_T y, int type, int sign) {
4830   int match;
4831   int lambda;
4832   int min1, max1 = 0, min2, max2 = 0;
4833   Interval_T interval;
4834 
4835   if (divno < 0) {
4836     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4837     return false;
4838   }
4839   min1 = min2 = this->nintervals[divno] + 1;
4840 
4841   debug(printf("Entering IIT_exists_with_divno_typed_signed with divno %d and query %u %u\n",divno,x,y));
4842   fnode_query_aux(&min1,&max1,this,divno,0,x);
4843   fnode_query_aux(&min2,&max2,this,divno,0,y);
4844   debug(printf("min1=%d max1=%d  min2=%d max2=%d\n",min1,max1,min2,max2));
4845 
4846   for (lambda = min1; lambda <= max2; lambda++) {
4847     match = this->sigmas[divno][lambda];
4848     interval = &(this->intervals[divno][match - 1]);
4849     if (Interval_low(interval) == x && Interval_high(interval) == y &&
4850 	Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
4851       return true;
4852     }
4853 
4854     match = this->omegas[divno][lambda];
4855     interval = &(this->intervals[divno][match - 1]);
4856     if (Interval_low(interval) == x && Interval_high(interval) == y &&
4857 	Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
4858       return true;
4859     }
4860   }
4861 
4862   return false;
4863 }
4864 
4865 
4866 #if 0
4867 bool
4868 IIT_exists_with_divno_typed_signed (T this, int divno, Chrpos_T x, Chrpos_T y, int type, int sign) {
4869   int match;
4870   int lambda;
4871   int min1, max1 = 0, min2, max2 = 0;
4872   Interval_T interval;
4873 
4874   if (divno < 0) {
4875     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4876     return false;
4877   }
4878   min1 = min2 = this->nintervals[divno] + 1;
4879 
4880   debug(printf("Entering IIT_get_with_divno with divno %d and query %u %u\n",divno,x,y));
4881   fnode_query_aux(&min1,&max1,this,divno,0,x);
4882   fnode_query_aux(&min2,&max2,this,divno,0,y);
4883   debug(printf("min1=%d max1=%d  min2=%d max2=%d\n",min1,max1,min2,max2));
4884 
4885   for (lambda = min1; lambda <= max2; lambda++) {
4886     match = this->sigmas[divno][lambda];
4887     interval = &(this->intervals[divno][match - 1]);
4888     if (Interval_overlap_p(x,y,this->intervals[divno],match) == true &&
4889 	Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
4890       return true;
4891     }
4892     match = this->omegas[divno][lambda];
4893     interval = &(this->intervals[divno][match - 1]);
4894     if (Interval_overlap_p(x,y,this->intervals[divno],match) == true &&
4895 	Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
4896       return true;
4897     }
4898   }
4899 
4900   return false;
4901 }
4902 #endif
4903 
4904 
4905 
4906 int *
IIT_get_with_divno(int * nmatches,T this,int divno,Chrpos_T x,Chrpos_T y,bool sortp)4907 IIT_get_with_divno (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y, bool sortp) {
4908   int *sorted, *matches = NULL, matchstart, *uniq, neval, nuniq, i;
4909   int lambda, prev;
4910   int min1, max1 = 0, min2, max2 = 0;
4911 
4912   if (divno < 0) {
4913     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4914     *nmatches = 0;
4915     return (int *) NULL;
4916   }
4917   min1 = min2 = this->nintervals[divno] + 1;
4918 
4919   debug(printf("Entering IIT_get_with_divno with divno %d and query %u %u\n",divno,x,y));
4920   fnode_query_aux(&min1,&max1,this,divno,0,x);
4921   fnode_query_aux(&min2,&max2,this,divno,0,y);
4922   debug(printf("min1=%d max1=%d  min2=%d max2=%d\n",min1,max1,min2,max2));
4923 
4924   *nmatches = 0;
4925   if (max2 >= min1) {
4926     neval = (max2 - min1 + 1) + (max2 - min1 + 1);
4927     matches = (int *) CALLOC(neval,sizeof(int));
4928     uniq = (int *) CALLOC(neval,sizeof(int));
4929 
4930     i = 0;
4931     for (lambda = min1; lambda <= max2; lambda++) {
4932       matches[i++] = this->sigmas[divno][lambda];
4933       matches[i++] = this->omegas[divno][lambda];
4934     }
4935 
4936     /* Eliminate duplicates */
4937     qsort(matches,neval,sizeof(int),int_compare);
4938     nuniq = 0;
4939     prev = 0;
4940     debug(printf("unique segments in lambda %d to %d:",min1,max2));
4941     for (i = 0; i < neval; i++) {
4942       if (matches[i] != prev) {
4943 	debug(printf(" %d",matches[i]));
4944 	uniq[nuniq++] = matches[i];
4945 	prev = matches[i];
4946       }
4947     }
4948     debug(printf("\n"));
4949 
4950     for (i = 0; i < nuniq; i++) {
4951       if (Interval_overlap_p(x,y,this->intervals[divno],uniq[i]) == true) {
4952 	matches[(*nmatches)++] = uniq[i];
4953 	debug(printf("Pushing overlapping segment %d (%u..%u)\n",uniq[i],
4954 		     Interval_low(&(this->intervals[divno][uniq[i]-1])),
4955 		     Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4956       } else {
4957 	debug(printf("Not pushing non-overlapping segment %d (%u..%u)\n",uniq[i],
4958 		     Interval_low(&(this->intervals[divno][uniq[i]-1])),
4959 		     Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4960       }
4961     }
4962 
4963     FREE(uniq);
4964   }
4965 
4966   /* Convert to universal indices */
4967   matchstart = this->cum_nintervals[divno];
4968   for (i = 0; i < *nmatches; i++) {
4969     matches[i] += matchstart;
4970   }
4971 
4972   if (sortp == false) {
4973     return matches;
4974 #if 0
4975   } else if (this->version <= 2) {
4976     sorted = sort_matches_by_type(this,matches,*nmatches,/*alphabetizep*/true);
4977     FREE(matches);
4978     return sorted;
4979 #endif
4980   } else {
4981     sorted = sort_matches_by_position(this,matches,*nmatches);
4982     FREE(matches);
4983     return sorted;
4984   }
4985 }
4986 
4987 
4988 
4989 int *
IIT_get_signed_with_divno(int * nmatches,T this,int divno,Chrpos_T x,Chrpos_T y,bool sortp,int sign)4990 IIT_get_signed_with_divno (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y, bool sortp,
4991 			   int sign) {
4992   int *sorted, *matches = NULL, matchstart, *uniq, neval, nuniq, i;
4993   int lambda, prev;
4994   int min1, max1 = 0, min2, max2 = 0;
4995   int index;
4996 
4997   if (divno < 0) {
4998     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4999     *nmatches = 0;
5000     return (int *) NULL;
5001   }
5002   min1 = min2 = this->nintervals[divno] + 1;
5003 
5004   debug(printf("Entering IIT_get_with_divno with divno %d and query %u %u\n",divno,x,y));
5005   fnode_query_aux(&min1,&max1,this,divno,0,x);
5006   fnode_query_aux(&min2,&max2,this,divno,0,y);
5007   debug(printf("min1=%d max1=%d  min2=%d max2=%d\n",min1,max1,min2,max2));
5008 
5009   *nmatches = 0;
5010   if (max2 >= min1) {
5011     neval = (max2 - min1 + 1) + (max2 - min1 + 1);
5012     matches = (int *) CALLOC(neval,sizeof(int));
5013     uniq = (int *) CALLOC(neval,sizeof(int));
5014 
5015     i = 0;
5016     for (lambda = min1; lambda <= max2; lambda++) {
5017       index = this->sigmas[divno][lambda];
5018       if (sign == 0 || Interval_sign(&(this->intervals[divno][index-1])) == sign) {
5019 	matches[i++] = index;
5020       }
5021       index = this->omegas[divno][lambda];
5022       if (sign == 0 || Interval_sign(&(this->intervals[divno][index-1])) == sign) {
5023 	matches[i++] = index;
5024       }
5025     }
5026 
5027     /* Eliminate duplicates */
5028     qsort(matches,neval,sizeof(int),int_compare);
5029     nuniq = 0;
5030     prev = 0;
5031     debug(printf("unique segments in lambda %d to %d:",min1,max2));
5032     for (i = 0; i < neval; i++) {
5033       if (matches[i] != prev) {
5034 	debug(printf(" %d",matches[i]));
5035 	uniq[nuniq++] = matches[i];
5036 	prev = matches[i];
5037       }
5038     }
5039     debug(printf("\n"));
5040 
5041     for (i = 0; i < nuniq; i++) {
5042       if (Interval_overlap_p(x,y,this->intervals[divno],uniq[i]) == true) {
5043 	matches[(*nmatches)++] = uniq[i];
5044 	debug(printf("Pushing overlapping segment %d (%u..%u)\n",uniq[i],
5045 		     Interval_low(&(this->intervals[divno][uniq[i]-1])),
5046 		     Interval_high(&(this->intervals[divno][uniq[i]-1]))));
5047       } else {
5048 	debug(printf("Not pushing non-overlapping segment %d (%u..%u)\n",uniq[i],
5049 		     Interval_low(&(this->intervals[divno][uniq[i]-1])),
5050 		     Interval_high(&(this->intervals[divno][uniq[i]-1]))));
5051       }
5052     }
5053 
5054     FREE(uniq);
5055   }
5056 
5057   /* Convert to universal indices */
5058   matchstart = this->cum_nintervals[divno];
5059   for (i = 0; i < *nmatches; i++) {
5060     matches[i] += matchstart;
5061   }
5062 
5063   if (sortp == false) {
5064     return matches;
5065 #if 0
5066   } else if (this->version <= 2) {
5067     sorted = sort_matches_by_type(this,matches,*nmatches,/*alphabetizep*/true);
5068     FREE(matches);
5069     return sorted;
5070 #endif
5071   } else {
5072     sorted = sort_matches_by_position(this,matches,*nmatches);
5073     FREE(matches);
5074     return sorted;
5075   }
5076 }
5077 
5078 
5079 static int
coord_search_low(T this,int divno,Chrpos_T x)5080 coord_search_low (T this, int divno, Chrpos_T x) {
5081   int low, middle, high;
5082   bool foundp = false;
5083   Chrpos_T middlevalue;
5084   int index;
5085 
5086   low = 1;			/* not 0, because alphas[divno][0] not used */
5087   high = this->nintervals[divno];
5088 
5089   debug3(printf("low = %d, high = %d\n",low,high));
5090   while (!foundp && low < high) {
5091     middle = (low+high)/2;
5092     index = this->alphas[divno][middle];
5093     middlevalue = Interval_low(&(this->intervals[divno][index-1]));
5094 
5095     debug3(printf("  compare x %u with middlevalue %u (for interval %d)\n",x,middlevalue,this->alphas[divno][middle]-1));
5096     if (x < middlevalue) {
5097       high = middle;
5098     } else if (x > middlevalue) {
5099       low = middle + 1;
5100     } else {
5101       foundp = true;
5102     }
5103     debug3(printf("low = %d, high = %d, middle = %d\n",low,high,middle));
5104   }
5105 
5106   if (foundp == true) {
5107     debug3(printf("found\n"));
5108     return middle;
5109   } else {
5110     debug3(printf("not found\n"));
5111     return low;
5112   }
5113 }
5114 
5115 static int
coord_search_high(T this,int divno,Chrpos_T x)5116 coord_search_high (T this, int divno, Chrpos_T x) {
5117   int low, middle, high;
5118   bool foundp = false;
5119   Chrpos_T middlevalue;
5120   int index;
5121 
5122   low = 1;			/* not 0, because betas[divno][0] not used */
5123   high = this->nintervals[divno];
5124 
5125   while (!foundp && low < high) {
5126     middle = (low+high)/2;
5127     index = this->betas[divno][middle];
5128     middlevalue = Interval_high(&(this->intervals[divno][index-1]));
5129 
5130     if (x < middlevalue) {
5131       high = middle;
5132     } else if (x > middlevalue) {
5133       low = middle + 1;
5134     } else {
5135       foundp = true;
5136     }
5137   }
5138 
5139   if (foundp == true) {
5140     return middle;
5141   } else {
5142     return high;
5143   }
5144 }
5145 
5146 
5147 /* Specialized version of IIT_get_flanking, for 1 right flank */
5148 /* Returns a relative index, requiring use of IIT_interval_for_divno */
5149 int
IIT_get_next(T this,int divno,Chrpos_T y)5150 IIT_get_next (T this, int divno, Chrpos_T y) {
5151   int lambda;
5152   Interval_T interval;
5153 
5154 #if 0
5155   for (lambda = 1; lambda <= this->nintervals[divno]; lambda++) {
5156     interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5157     printf("lambda %d %d: %u..%u\n",
5158 	   lambda,this->alphas[divno][lambda],Interval_low(interval),Interval_high(interval));
5159   }
5160   printf("\n");
5161 #endif
5162 
5163 
5164   /* Look at alphas for right flank */
5165   lambda = coord_search_low(this,divno,y);
5166   debug2(printf("coord_search_low lambda = %d\n",lambda));
5167 
5168   while (lambda <= this->nintervals[divno]) {
5169     interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5170     debug2(printf("Looking at %u..%u\n",Interval_low(interval),Interval_high(interval)));
5171     if (Interval_low(interval) <= y) {
5172       debug2(printf("Advancing because interval_low %u <= %u\n",Interval_low(interval),y));
5173       lambda++;
5174     } else {
5175       debug2(printf("Returning %d\n\n",this->alphas[divno][lambda]));
5176       return this->alphas[divno][lambda];
5177     }
5178   }
5179 
5180   debug2(printf("Returning -1\n\n"));
5181   return -1;
5182 }
5183 
5184 
5185 void
IIT_get_flanking(int ** leftflanks,int * nleftflanks,int ** rightflanks,int * nrightflanks,T this,char * divstring,Chrpos_T x,Chrpos_T y,int nflanking,int sign)5186 IIT_get_flanking (int **leftflanks, int *nleftflanks, int **rightflanks, int *nrightflanks,
5187 		  T this, char *divstring, Chrpos_T x, Chrpos_T y, int nflanking, int sign) {
5188   int lambda, matchstart, i;
5189   Interval_T interval;
5190   bool stopp;
5191   int divno;
5192 
5193   divno = IIT_divint(this,divstring);
5194 
5195   debug2(printf("Entering IIT_get_flanking with divno %d, query %u %u, nflanking = %d, sign %d\n",divno,x,y,nflanking,sign));
5196 
5197   if (this->alphas[divno] == NULL) {
5198 #if 0
5199     compute_flanking(this);
5200 #else
5201     fprintf(stderr,"Flanking hits not supported on version %d of iit files.  Please use iit_update to update your file\n",
5202 	    this->version);
5203     exit(9);
5204 #endif
5205   }
5206 
5207   /* Look at alphas for right flank */
5208   lambda = coord_search_low(this,divno,y);
5209   debug2(printf("coord_search_low lambda = %d\n",lambda));
5210 
5211   *rightflanks = (int *) CALLOC(nflanking,sizeof(int));
5212   *nrightflanks = 0;
5213   stopp = false;
5214   while (lambda <= this->nintervals[divno] && stopp == false) {
5215     interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5216     if (Interval_low(interval) <= y) {
5217       debug2(printf("Advancing because interval_low %u <= %u\n",Interval_low(interval),y));
5218       lambda++;
5219     } else if (sign != 0 && Interval_sign(interval) != sign) {
5220       debug2(printf("Advancing because sign != 0 && interval_sign %d != %d\n",Interval_sign(interval),sign));
5221       lambda++;
5222     } else {
5223       (*rightflanks)[(*nrightflanks)++] = this->alphas[divno][lambda];
5224       debug2(printf("Storing right flank %d\n",this->alphas[divno][lambda]));
5225       if (*nrightflanks < nflanking) {
5226 	debug2(printf("Advancing because need more\n"));
5227 	lambda++;
5228       } else {
5229 	stopp = true;
5230       }
5231     }
5232   }
5233 
5234   /* Look at betas for left flank */
5235   lambda = coord_search_high(this,divno,x);
5236 
5237   *leftflanks = (int *) CALLOC(nflanking,sizeof(int));
5238   *nleftflanks = 0;
5239   stopp = false;
5240   while (lambda >= 1 && stopp == false) {
5241     interval = &(this->intervals[divno][this->betas[divno][lambda]-1]);
5242     if (Interval_high(interval) >= x) {
5243       lambda--;
5244     } else if (sign != 0 && Interval_sign(interval) != sign) {
5245       lambda--;
5246     } else {
5247       (*leftflanks)[(*nleftflanks)++] = this->betas[divno][lambda];
5248       if (*nleftflanks < nflanking) {
5249 	lambda--;
5250       } else {
5251 	stopp = true;
5252       }
5253     }
5254   }
5255 
5256   /* Convert to universal indices */
5257   matchstart = this->cum_nintervals[divno];
5258   for (i = 0; i < *nrightflanks; i++) {
5259     (*rightflanks)[i] += matchstart;
5260   }
5261   for (i = 0; i < *nleftflanks; i++) {
5262     (*leftflanks)[i] += matchstart;
5263   }
5264 
5265   return;
5266 }
5267 
5268 void
IIT_get_flanking_with_divno(int ** leftflanks,int * nleftflanks,int ** rightflanks,int * nrightflanks,T this,int divno,Chrpos_T x,Chrpos_T y,int nflanking,int sign)5269 IIT_get_flanking_with_divno (int **leftflanks, int *nleftflanks, int **rightflanks, int *nrightflanks,
5270 			     T this, int divno, Chrpos_T x, Chrpos_T y, int nflanking, int sign) {
5271   int lambda, matchstart, i;
5272   Interval_T interval;
5273   bool stopp;
5274 
5275   debug2(printf("Entering IIT_get_flanking_with_divno with divno %d, query %u %u, nflanking = %d, sign %d\n",divno,x,y,nflanking,sign));
5276 
5277   if (this->alphas[divno] == NULL) {
5278 #if 0
5279     compute_flanking(this);
5280 #else
5281     fprintf(stderr,"Flanking hits not supported on version %d of iit files.  Please use iit_update to update your file\n",
5282 	    this->version);
5283     exit(9);
5284 #endif
5285   }
5286 
5287   /* Look at alphas for right flank */
5288   lambda = coord_search_low(this,divno,y);
5289   debug2(printf("coord_search_low lambda = %d\n",lambda));
5290 
5291   *rightflanks = (int *) CALLOC(nflanking,sizeof(int));
5292   *nrightflanks = 0;
5293   stopp = false;
5294   while (lambda <= this->nintervals[divno] && stopp == false) {
5295     interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5296     if (Interval_low(interval) <= y) {
5297       debug2(printf("Advancing because interval_low %u <= %u\n",Interval_low(interval),y));
5298       lambda++;
5299     } else if (sign != 0 && Interval_sign(interval) != sign) {
5300       debug2(printf("Advancing because sign != 0 && interval_sign %d != %d\n",Interval_sign(interval),sign));
5301       lambda++;
5302     } else {
5303       (*rightflanks)[(*nrightflanks)++] = this->alphas[divno][lambda];
5304       debug2(printf("Storing right flank %d\n",this->alphas[divno][lambda]));
5305       if (*nrightflanks < nflanking) {
5306 	debug2(printf("Advancing because need more\n"));
5307 	lambda++;
5308       } else {
5309 	stopp = true;
5310       }
5311     }
5312   }
5313 
5314   /* Look at betas for left flank */
5315   lambda = coord_search_high(this,divno,x);
5316 
5317   *leftflanks = (int *) CALLOC(nflanking,sizeof(int));
5318   *nleftflanks = 0;
5319   stopp = false;
5320   while (lambda >= 1 && stopp == false) {
5321     interval = &(this->intervals[divno][this->betas[divno][lambda]-1]);
5322     if (Interval_high(interval) >= x) {
5323       lambda--;
5324     } else if (sign != 0 && Interval_sign(interval) != sign) {
5325       lambda--;
5326     } else {
5327       (*leftflanks)[(*nleftflanks)++] = this->betas[divno][lambda];
5328       if (*nleftflanks < nflanking) {
5329 	lambda--;
5330       } else {
5331 	stopp = true;
5332       }
5333     }
5334   }
5335 
5336   /* Convert to universal indices */
5337   matchstart = this->cum_nintervals[divno];
5338   for (i = 0; i < *nrightflanks; i++) {
5339     (*rightflanks)[i] += matchstart;
5340   }
5341   for (i = 0; i < *nleftflanks; i++) {
5342     (*leftflanks)[i] += matchstart;
5343   }
5344 
5345   return;
5346 }
5347 
5348 void
IIT_get_flanking_typed(int ** leftflanks,int * nleftflanks,int ** rightflanks,int * nrightflanks,T this,char * divstring,Chrpos_T x,Chrpos_T y,int nflanking,int type,int sign)5349 IIT_get_flanking_typed (int **leftflanks, int *nleftflanks, int **rightflanks, int *nrightflanks,
5350 			T this, char *divstring, Chrpos_T x, Chrpos_T y, int nflanking, int type,
5351 			int sign) {
5352   int lambda, matchstart, i;
5353   Interval_T interval;
5354   bool stopp;
5355   int divno;
5356 
5357   divno = IIT_divint(this,divstring);
5358 
5359   debug2(printf("Entering IIT_get_flanking_typed with query %u %u => divno is %d\n",x,y,divno));
5360 
5361   if (this->alphas[divno] == NULL) {
5362 #if 0
5363     IIT_compute_flanking(this);
5364 #else
5365     fprintf(stderr,"Flanking hits not supported on version %d of iit files.  Please use iit_update to update your file\n",
5366 	    this->version);
5367     exit(9);
5368 #endif
5369   }
5370 
5371   /* Look at alphas for right flank */
5372   lambda = coord_search_low(this,divno,y);
5373   debug2(printf("coord_search_low yields lambda %d\n",lambda));
5374 
5375   *rightflanks = (int *) CALLOC(nflanking,sizeof(int));
5376   *nrightflanks = 0;
5377   stopp = false;
5378   while (lambda <= this->nintervals[divno] && stopp == false) {
5379     interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5380     if (sign != 0 && Interval_sign(interval) != sign) {
5381       debug2(printf("Advancing because sign != 0 && interval_sign %d != %d\n",Interval_sign(interval),sign));
5382       lambda++;
5383     } else if (Interval_low(interval) <= y) {
5384       debug2(printf("Advancing because interval_low %u <= %u\n",Interval_low(interval),y));
5385       lambda++;
5386     } else if (Interval_type(interval) != type) {
5387       debug2(printf("Advancing because interval_type %d != %d\n",Interval_type(interval),type));
5388       lambda++;
5389     } else {
5390       (*rightflanks)[(*nrightflanks)++] = this->alphas[divno][lambda];
5391       debug2(printf("Storing right flank %d\n",this->alphas[divno][lambda]));
5392       if (*nrightflanks < nflanking) {
5393 	debug2(printf("Advancing because need more\n"));
5394 	lambda++;
5395       } else {
5396 	stopp = true;
5397       }
5398     }
5399   }
5400 
5401   /* Look at betas for left flank */
5402   lambda = coord_search_high(this,divno,x);
5403 
5404   *leftflanks = (int *) CALLOC(nflanking,sizeof(int));
5405   *nleftflanks = 0;
5406   stopp = false;
5407   while (lambda >= 1 && stopp == false) {
5408     interval = &(this->intervals[divno][this->betas[divno][lambda]-1]);
5409     if (sign != 0 && Interval_sign(interval) != sign) {
5410       lambda--;
5411     } else if (Interval_high(interval) >= x) {
5412       lambda--;
5413     } else if (Interval_type(interval) != type) {
5414       lambda--;
5415     } else {
5416       (*leftflanks)[(*nleftflanks)++] = this->betas[divno][lambda];
5417       if (*nleftflanks < nflanking) {
5418 	lambda--;
5419       } else {
5420 	stopp = true;
5421       }
5422     }
5423   }
5424 
5425   /* Convert to universal indices */
5426   matchstart = this->cum_nintervals[divno];
5427   for (i = 0; i < *nrightflanks; i++) {
5428     (*rightflanks)[i] += matchstart;
5429   }
5430   for (i = 0; i < *nleftflanks; i++) {
5431     (*leftflanks)[i] += matchstart;
5432   }
5433 
5434   return;
5435 }
5436 
5437 void
IIT_get_flanking_multiple_typed(int ** leftflanks,int * nleftflanks,int ** rightflanks,int * nrightflanks,T this,char * divstring,Chrpos_T x,Chrpos_T y,int nflanking,int * types,int ntypes)5438 IIT_get_flanking_multiple_typed (int **leftflanks, int *nleftflanks, int **rightflanks, int *nrightflanks,
5439 				 T this, char *divstring, Chrpos_T x, Chrpos_T y, int nflanking, int *types, int ntypes) {
5440   int k, i;
5441   int lambda, matchstart;
5442   Interval_T interval;
5443   bool stopp;
5444   int divno;
5445 
5446   divno = IIT_divint(this,divstring);
5447 
5448   debug(printf("Entering IIT_get_flanking_multiple_typed with query %u %u\n",x,y));
5449 
5450   if (this->alphas[divno] == NULL) {
5451 #if 0
5452     IIT_compute_flanking(this);
5453 #else
5454     fprintf(stderr,"Flanking hits not supported on version %d of iit files.  Please use iit_update to update your file\n",
5455 	    this->version);
5456     exit(9);
5457 #endif
5458   }
5459 
5460   /* Look at alphas for right flank */
5461   lambda = coord_search_low(this,divno,y);
5462 
5463   *rightflanks = (int *) CALLOC(nflanking,sizeof(int));
5464   *nrightflanks = 0;
5465   stopp = false;
5466   while (lambda <= this->nintervals[divno] && stopp == false) {
5467     interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5468     if (Interval_low(interval) <= y) {
5469       lambda++;
5470     } else {
5471       k = 0;
5472       while (k < ntypes && Interval_type(interval) != types[k]) {
5473 	k++;
5474       }
5475       if (k >= ntypes) {
5476 	lambda++;
5477       } else {
5478 	(*rightflanks)[(*nrightflanks)++] = this->alphas[divno][lambda];
5479 	if (*nrightflanks < nflanking) {
5480 	  lambda++;
5481 	} else {
5482 	  stopp = true;
5483 	}
5484       }
5485     }
5486   }
5487 
5488 
5489   /* Look at betas for left flank */
5490   lambda = coord_search_high(this,divno,x);
5491 
5492   *leftflanks = (int *) CALLOC(nflanking,sizeof(int));
5493   *nleftflanks = 0;
5494   stopp = false;
5495   while (lambda >= 1 && stopp == false) {
5496     interval = &(this->intervals[divno][this->betas[divno][lambda]-1]);
5497     if (Interval_high(interval) >= x) {
5498       lambda--;
5499     } else {
5500       k = 0;
5501       while (k < ntypes && Interval_type(interval) != types[k]) {
5502 	k++;
5503       }
5504       if (k >= ntypes) {
5505 	lambda--;
5506       } else {
5507 	(*leftflanks)[(*nleftflanks)++] = this->betas[divno][lambda];
5508 	if (*nleftflanks < nflanking) {
5509 	  lambda--;
5510 	} else {
5511 	  stopp = true;
5512 	}
5513       }
5514     }
5515   }
5516 
5517   /* Convert to universal indices */
5518   matchstart = this->cum_nintervals[divno];
5519   for (i = 0; i < *nrightflanks; i++) {
5520     (*rightflanks)[i] += matchstart;
5521   }
5522   for (i = 0; i < *nleftflanks; i++) {
5523     (*leftflanks)[i] += matchstart;
5524   }
5525 
5526   return;
5527 }
5528 
5529 
5530 static const Except_T iit_error = { "IIT problem" };
5531 
5532 int
IIT_get_one(T this,char * divstring,Chrpos_T x,Chrpos_T y)5533 IIT_get_one (T this, char *divstring, Chrpos_T x, Chrpos_T y) {
5534   int lambda;
5535   int min1, max1 = 0, min2, max2 = 0;
5536   int divno;
5537   bool stopp;
5538   Interval_T interval;
5539 
5540   divno = IIT_divint(this,divstring);
5541   min1 = min2 = this->nintervals[divno] + 1;
5542 
5543   debug(printf("Entering IIT_get_one with query %u %u\n",x,y));
5544   fnode_query_aux(&min1,&max1,this,divno,0,x);
5545   fnode_query_aux(&min2,&max2,this,divno,0,y);
5546   debug(printf("min1=%d max1=%d  min2=%d max2=%d\n",min1,max1,min2,max2));
5547 
5548   if (max2 >= min1) {
5549     for (lambda = min1; lambda <= max2; lambda++) {
5550       if (Interval_overlap_p(x,y,this->intervals[divno],this->sigmas[divno][lambda]) == true) {
5551 	return this->sigmas[divno][lambda];
5552       }
5553     }
5554     for (lambda = min1; lambda <= max2; lambda++) {
5555       if (Interval_overlap_p(x,y,this->intervals[divno],this->omegas[divno][lambda]) == true) {
5556 	return this->omegas[divno][lambda];
5557       }
5558     }
5559   }
5560 
5561   /* fprintf(stderr,"Expected one match for %u--%u, but got none\n",x,y); */
5562   /* If we miss (e.g., for circular chromosome), then report the chromosome below */
5563   /* Look at betas or omegas for left flank */
5564   lambda = min1 - 1;
5565   stopp = false;
5566   while (lambda >= 1 && stopp == false) {
5567     interval = &(this->intervals[divno][this->omegas[divno][lambda]-1]);
5568     if (Interval_high(interval) >= x) {
5569       lambda--;
5570     } else {
5571       return this->omegas[divno][lambda];
5572     }
5573   }
5574 
5575   return this->omegas[divno][/*lambda*/1];
5576 }
5577 
5578 /* Generally called where intervals don't overlap, like chromosomes,
5579    and where x == y. */
5580 /*
5581 int
5582 IIT_get_one_safe (T this, Chrpos_T x, Chrpos_T y) {
5583   int index;
5584   int *matches, nmatches;
5585 
5586   matches = IIT_get(&nmatches,this,x,y,sortp);
5587   if (nmatches != 1) {
5588     fprintf(stderr,"Expected one match for %u--%u, but got %d\n",
5589 	    x,y,nmatches);
5590     abort();
5591   }
5592   index = matches[0];
5593   FREE(matches);
5594   return index;
5595 }
5596 */
5597 
5598 int *
IIT_get_typed(int * ntypematches,T this,char * divstring,Chrpos_T x,Chrpos_T y,int type,bool sortp)5599 IIT_get_typed (int *ntypematches, T this, char *divstring, Chrpos_T x, Chrpos_T y, int type, bool sortp) {
5600   int *sorted;
5601   int index;
5602   /* int divno; */
5603   int *typematches = NULL, *matches, nmatches, i, j;
5604   Interval_T interval;
5605 
5606   *ntypematches = 0;
5607   matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5608   for (i = 0; i < nmatches; i++) {
5609     index = matches[i];
5610     interval = &(this->intervals[0][index-1]);
5611     if (Interval_type(interval) == type) {
5612       (*ntypematches)++;
5613     }
5614   }
5615 
5616   if (*ntypematches > 0) {
5617     typematches = (int *) CALLOC(*ntypematches,sizeof(int));
5618     j = 0;
5619     for (i = 0; i < nmatches; i++) {
5620       index = matches[i];
5621       interval = &(this->intervals[0][index-1]);
5622       if (Interval_type(interval) == type) {
5623 	typematches[j++] = index;
5624       }
5625     }
5626   }
5627 
5628   if (matches != NULL) {
5629     FREE(matches);
5630   }
5631 
5632   if (sortp == false) {
5633     return typematches;
5634 #if 0
5635   } else if (this->version <= 2) {
5636     sorted = sort_matches_by_type(this,typematches,*ntypematches,/*alphabetizep*/false);
5637     FREE(typematches);
5638     return sorted;
5639 #endif
5640   } else {
5641     /* divno = IIT_divint(this,divstring); */
5642     sorted = sort_matches_by_position(this,typematches,*ntypematches);
5643     FREE(typematches);
5644     return sorted;
5645   }
5646 }
5647 
5648 int *
IIT_get_typed_with_divno(int * ntypematches,T this,int divno,Chrpos_T x,Chrpos_T y,int type,bool sortp)5649 IIT_get_typed_with_divno (int *ntypematches, T this, int divno, Chrpos_T x, Chrpos_T y,
5650 			  int type, bool sortp) {
5651   int *sorted;
5652   int index;
5653   int *typematches = NULL, *matches, nmatches, i, j;
5654   Interval_T interval;
5655 
5656   if (divno < 0) {
5657     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
5658     *ntypematches = 0;
5659     return (int *) NULL;
5660   }
5661 
5662   *ntypematches = 0;
5663   matches = IIT_get_with_divno(&nmatches,this,divno,x,y,/*sortp*/false);
5664   for (i = 0; i < nmatches; i++) {
5665     index = matches[i];
5666     interval = &(this->intervals[0][index-1]);
5667     if (Interval_type(interval) == type) {
5668       (*ntypematches)++;
5669     }
5670   }
5671 
5672   if (*ntypematches > 0) {
5673     typematches = (int *) CALLOC(*ntypematches,sizeof(int));
5674     j = 0;
5675     for (i = 0; i < nmatches; i++) {
5676       index = matches[i];
5677       interval = &(this->intervals[0][index-1]);
5678       if (Interval_type(interval) == type) {
5679 	typematches[j++] = index;
5680       }
5681     }
5682   }
5683 
5684   if (matches != NULL) {
5685     FREE(matches);
5686   }
5687 
5688   if (sortp == false) {
5689     return typematches;
5690 #if 0
5691   } else if (this->version <= 2) {
5692     sorted = sort_matches_by_type(this,typematches,*ntypematches,/*alphabetizep*/false);
5693     FREE(typematches);
5694     return sorted;
5695 #endif
5696   } else {
5697     sorted = sort_matches_by_position(this,typematches,*ntypematches);
5698     FREE(typematches);
5699     return sorted;
5700   }
5701 }
5702 
5703 
5704 int *
IIT_get_typed_signed(int * ntypematches,T this,char * divstring,Chrpos_T x,Chrpos_T y,int type,int sign,bool sortp)5705 IIT_get_typed_signed (int *ntypematches, T this, char *divstring, Chrpos_T x, Chrpos_T y,
5706 		      int type, int sign, bool sortp) {
5707   int *sorted;
5708   int index;
5709   /* int divno; */
5710   int *typematches = NULL, *matches, nmatches, i, j;
5711   Interval_T interval;
5712 
5713   *ntypematches = 0;
5714   matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5715   for (i = 0; i < nmatches; i++) {
5716     index = matches[i];
5717     interval = &(this->intervals[0][index-1]);
5718     if (Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
5719       (*ntypematches)++;
5720     }
5721   }
5722 
5723   if (*ntypematches > 0) {
5724     typematches = (int *) CALLOC(*ntypematches,sizeof(int));
5725     j = 0;
5726     for (i = 0; i < nmatches; i++) {
5727       index = matches[i];
5728       interval = &(this->intervals[0][index-1]);
5729       if (Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
5730 	typematches[j++] = index;
5731       }
5732     }
5733   }
5734 
5735   if (matches != NULL) {
5736     FREE(matches);
5737   }
5738 
5739   if (sortp == false) {
5740     return typematches;
5741 #if 0
5742   } else if (this->version <= 2) {
5743     sorted = sort_matches_by_type(this,typematches,*ntypematches,/*alphabetizep*/false);
5744     FREE(typematches);
5745     return sorted;
5746 #endif
5747   } else {
5748     /* divno = IIT_divint(this,divstring); */
5749     sorted = sort_matches_by_position(this,typematches,*ntypematches);
5750     FREE(typematches);
5751     return sorted;
5752   }
5753 }
5754 
5755 
5756 int *
IIT_get_typed_signed_with_divno(int * ntypematches,T this,int divno,Chrpos_T x,Chrpos_T y,int type,int sign,bool sortp)5757 IIT_get_typed_signed_with_divno (int *ntypematches, T this, int divno, Chrpos_T x, Chrpos_T y,
5758 				 int type, int sign, bool sortp) {
5759   int *sorted;
5760   int index;
5761   int *typematches = NULL, *matches, nmatches, i, j;
5762   Interval_T interval;
5763 
5764   if (divno < 0) {
5765     /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
5766     *ntypematches = 0;
5767     return (int *) NULL;
5768   }
5769 
5770   *ntypematches = 0;
5771   matches = IIT_get_with_divno(&nmatches,this,divno,x,y,/*sortp*/false);
5772   for (i = 0; i < nmatches; i++) {
5773     index = matches[i];
5774     interval = &(this->intervals[0][index-1]);
5775     if (Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
5776       (*ntypematches)++;
5777     }
5778   }
5779 
5780   if (*ntypematches > 0) {
5781     typematches = (int *) CALLOC(*ntypematches,sizeof(int));
5782     j = 0;
5783     for (i = 0; i < nmatches; i++) {
5784       index = matches[i];
5785       interval = &(this->intervals[0][index-1]);
5786       if (Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
5787 	typematches[j++] = index;
5788       }
5789     }
5790   }
5791 
5792   if (matches != NULL) {
5793     FREE(matches);
5794   }
5795 
5796   if (sortp == false) {
5797     return typematches;
5798 #if 0
5799   } else if (this->version <= 2) {
5800     sorted = sort_matches_by_type(this,typematches,*ntypematches,/*alphabetizep*/false);
5801     FREE(typematches);
5802     return sorted;
5803 #endif
5804   } else {
5805     sorted = sort_matches_by_position(this,typematches,*ntypematches);
5806     FREE(typematches);
5807     return sorted;
5808   }
5809 }
5810 
5811 
5812 int *
IIT_get_multiple_typed(int * ntypematches,T this,char * divstring,Chrpos_T x,Chrpos_T y,int * types,int ntypes,bool sortp)5813 IIT_get_multiple_typed (int *ntypematches, T this, char *divstring, Chrpos_T x, Chrpos_T y,
5814 			int *types, int ntypes, bool sortp) {
5815   int *sorted;
5816   int index;
5817   /* int divno; */
5818   int *typematches = NULL, *matches, nmatches, i, j, k;
5819   Interval_T interval;
5820 
5821   *ntypematches = 0;
5822   matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5823   for (i = 0; i < nmatches; i++) {
5824     index = matches[i];
5825     interval = &(this->intervals[0][index-1]);
5826     k = 0;
5827     while (k < ntypes && Interval_type(interval) != types[k]) {
5828       k++;
5829     }
5830     if (k < ntypes) {
5831       (*ntypematches)++;
5832     }
5833   }
5834 
5835   if (*ntypematches > 0) {
5836     typematches = (int *) CALLOC(*ntypematches,sizeof(int));
5837     j = 0;
5838     for (i = 0; i < nmatches; i++) {
5839       index = matches[i];
5840       interval = &(this->intervals[0][index-1]);
5841       k = 0;
5842       while (k < ntypes && Interval_type(interval) != types[k]) {
5843 	k++;
5844       }
5845       if (k < ntypes) {
5846 	typematches[j++] = index;
5847       }
5848     }
5849   }
5850 
5851   if (matches != NULL) {
5852     FREE(matches);
5853   }
5854 
5855   if (sortp == false || this->version >= 3) {
5856     return typematches;
5857 #if 0
5858   } else if (this->version <= 2) {
5859     sorted = sort_matches_by_type(this,typematches,*ntypematches,/*alphabetizep*/true);
5860     FREE(typematches);
5861     return sorted;
5862 #endif
5863   } else {
5864     /* divno = IIT_divint(this,divstring); */
5865     sorted = sort_matches_by_position(this,typematches,*ntypematches);
5866     FREE(typematches);
5867     return sorted;
5868   }
5869 }
5870 
5871 int
IIT_get_exact(T this,char * divstring,Chrpos_T x,Chrpos_T y,int type)5872 IIT_get_exact (T this, char *divstring, Chrpos_T x, Chrpos_T y, int type) {
5873   int index;
5874   int *matches, nmatches, i;
5875   Interval_T interval;
5876 
5877   matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5878   for (i = 0; i < nmatches; i++) {
5879     index = matches[i];
5880     interval = &(this->intervals[0][index-1]);
5881     if (Interval_low(interval) == x && Interval_high(interval) == y &&
5882 	Interval_type(interval) == type) {
5883       FREE(matches);
5884       return index;
5885     }
5886   }
5887 
5888   FREE(matches);
5889   return -1;
5890 }
5891 
5892 bool
IIT_exact_p(T this,char * divstring,Chrpos_T x,Chrpos_T y,int type)5893 IIT_exact_p (T this, char *divstring, Chrpos_T x, Chrpos_T y, int type) {
5894   int index;
5895   int *matches, nmatches, i;
5896   Interval_T interval;
5897 
5898   if (x == y) {
5899     matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5900     for (i = 0; i < nmatches; i++) {
5901       index = matches[i];
5902       interval = &(this->intervals[0][index-1]);
5903       if (Interval_low(interval) == x && Interval_high(interval) == y &&
5904 	  Interval_sign(interval) == 0 && Interval_type(interval) == type) {
5905 	FREE(matches);
5906 	return true;
5907       }
5908     }
5909 
5910   } else if (x < y) {
5911     matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5912     for (i = 0; i < nmatches; i++) {
5913       index = matches[i];
5914       interval = &(this->intervals[0][index-1]);
5915       if (Interval_low(interval) == x && Interval_high(interval) == y &&
5916 	  Interval_sign(interval) > 0 && Interval_type(interval) == type) {
5917 	FREE(matches);
5918 	return true;
5919       }
5920     }
5921 
5922   } else {
5923     matches = IIT_get(&nmatches,this,divstring,y,x,/*sortp*/false);
5924     for (i = 0; i < nmatches; i++) {
5925       index = matches[i];
5926       interval = &(this->intervals[0][index-1]);
5927       if (Interval_low(interval) == x && Interval_high(interval) == y &&
5928 	  Interval_sign(interval) < 0 && Interval_type(interval) == type) {
5929 	FREE(matches);
5930 	return true;
5931       }
5932     }
5933   }
5934 
5935   FREE(matches);
5936   return false;
5937 }
5938 
5939 
5940 int *
IIT_get_exact_multiple(int * nexactmatches,T this,char * divstring,Chrpos_T x,Chrpos_T y,int type)5941 IIT_get_exact_multiple (int *nexactmatches, T this, char *divstring, Chrpos_T x, Chrpos_T y, int type) {
5942   int *exactmatches;
5943   int index;
5944   int *matches, nmatches, i, j;
5945   Interval_T interval;
5946 
5947   *nexactmatches = 0;
5948   matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5949   for (i = 0; i < nmatches; i++) {
5950     index = matches[i];
5951     interval = &(this->intervals[0][index-1]);
5952     if (Interval_low(interval) == x && Interval_high(interval) == y &&
5953 	Interval_type(interval) == type) {
5954       (*nexactmatches)++;
5955     }
5956   }
5957 
5958   if (*nexactmatches == 0) {
5959     FREE(matches);
5960     return (int *) NULL;
5961   } else {
5962     exactmatches = (int *) CALLOC(*nexactmatches,sizeof(int));
5963     j = 0;
5964     for (i = 0; i < nmatches; i++) {
5965       index = matches[i];
5966       interval = &(this->intervals[0][index-1]);
5967       if (Interval_low(interval) == x && Interval_high(interval) == y &&
5968 	  Interval_type(interval) == type) {
5969 	exactmatches[j++] = index;
5970       }
5971     }
5972     FREE(matches);
5973     return exactmatches;
5974   }
5975 }
5976 
5977 
5978 #if 0
5979 /* Previously called by print_splicesite_labels in pair.c */
5980 int *
5981 IIT_get_exact_multiple_with_divno (int *nexactmatches, T this, int divno, Chrpos_T x, Chrpos_T y, int type) {
5982   int *exactmatches;
5983   int index;
5984   int *matches, nmatches, i, j;
5985   Interval_T interval;
5986 
5987   *nexactmatches = 0;
5988   matches = IIT_get_with_divno(&nmatches,this,divno,x,y,/*sortp*/false);
5989   for (i = 0; i < nmatches; i++) {
5990     index = matches[i];
5991     interval = &(this->intervals[0][index-1]);
5992     if (Interval_low(interval) == x && Interval_high(interval) == y &&
5993 	Interval_type(interval) == type) {
5994       (*nexactmatches)++;
5995     }
5996   }
5997 
5998   if (*nexactmatches == 0) {
5999     FREE(matches);
6000     return (int *) NULL;
6001   } else {
6002     exactmatches = (int *) CALLOC(*nexactmatches,sizeof(int));
6003     j = 0;
6004     for (i = 0; i < nmatches; i++) {
6005       index = matches[i];
6006       interval = &(this->intervals[0][index-1]);
6007       if (Interval_low(interval) == x && Interval_high(interval) == y &&
6008 	  Interval_type(interval) == type) {
6009 	exactmatches[j++] = index;
6010       }
6011     }
6012     FREE(matches);
6013     return exactmatches;
6014   }
6015 }
6016 #endif
6017 
6018 
6019 /************************************************************************/
6020 
6021 /* Modified from IIT_find */
6022 int *
IIT_get_values_between(int * nmatches,T this,double lowval,double highval)6023 IIT_get_values_between (int *nmatches, T this, double lowval, double highval) {
6024   int *matches = NULL, j;
6025   double val;
6026   int start, end;
6027   int low, middle, high, recno;
6028   bool foundp;
6029 
6030   debug(printf("Entering IIT_get_values_between with %f to %f\n",lowval,highval));
6031 
6032   /* Find start */
6033   foundp = false;
6034   low = 0;
6035   high = this->total_nintervals;
6036 
6037 #ifdef DEBUG
6038 #ifndef WORDS_BIGENDIAN
6039   for (middle = low; middle < high; middle++) {
6040     printf("%d:%d:%f\n",middle,this->valueorder[middle],
6041 	   this->values[this->valueorder[middle]]);
6042   }
6043   printf("\n");
6044 #endif
6045 #endif
6046 
6047   while (!foundp && low < high) {
6048     middle = (low+high)/2;
6049 
6050 #ifdef DEBUG
6051 #ifndef WORDS_BIGENDIAN
6052     printf("low %d middle %d:%d:%f high %d\n",
6053 	   low,middle,this->valueorder[middle],
6054 	   this->values[this->valueorder[middle]],high);
6055 #endif
6056 #endif
6057 
6058 #ifdef WORDS_BIGENDIAN
6059     val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[middle])]);
6060 #else
6061     val = this->values[this->valueorder[middle]];
6062 #endif
6063 
6064     if (val > lowval) {
6065       high = middle;
6066       debug(printf("Decreasing high to %d\n",high));
6067     } else if (val < lowval) {
6068       low = middle + 1;
6069       debug(printf("Increasing low to %d\n",low));
6070     } else {
6071       foundp = true;
6072     }
6073   }
6074 
6075   if (foundp == true) {
6076     start = middle;
6077     debug(printf("start is middle = %d\n\n",start));
6078 
6079 #ifdef WORDS_BIGENDIAN
6080     while (start-1 >= 0 &&
6081 	   lowval == Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[start-1])])) {
6082       start--;
6083     }
6084 #else
6085     while (start-1 >= 0 &&
6086 	   lowval == this->values[this->valueorder[start-1]]) {
6087       start--;
6088       debug(printf("Regressing start to %d\n",start));
6089     }
6090 #endif
6091 
6092   } else if ((start = low) >= this->total_nintervals) {
6093     *nmatches = 0;
6094     return (int *) NULL;
6095 
6096   } else {
6097     debug(printf("start is low = %d\n\n",start));
6098 #ifdef WORDS_BIGENDIAN
6099     val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[start])]);
6100 #else
6101     val = this->values[this->valueorder[start]];
6102 #endif
6103     debug(printf("Final value for low bound = %f\n",val));
6104     if (val < lowval) {
6105       *nmatches = 0;
6106       return (int *) NULL;
6107     }
6108   }
6109 
6110 
6111   /* Find end */
6112   foundp = false;
6113   low = 0;
6114   high = this->total_nintervals;
6115   while (!foundp && low < high) {
6116     middle = (low+high)/2;
6117 
6118 #ifdef DEBUG
6119 #ifndef WORDS_BIGENDIAN
6120     printf("low %d middle %d:%d:%f high %d\n",
6121 	   low,middle,this->valueorder[middle],
6122 	   this->values[this->valueorder[middle]],high);
6123 #endif
6124 #endif
6125 
6126 #ifdef WORDS_BIGENDIAN
6127     val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[middle])]);
6128 #else
6129     val = this->values[this->valueorder[middle]];
6130 #endif
6131 
6132     if (val > highval) {
6133       high = middle;
6134       debug(printf("Decreasing high to %d\n",high));
6135     } else if (val < highval) {
6136       low = middle + 1;
6137       debug(printf("Increasing low to %d\n",low));
6138     } else {
6139       foundp = true;
6140     }
6141   }
6142 
6143   if (foundp == true) {
6144     end = middle;
6145     debug(printf("end is middle = %d\n\n",end));
6146 
6147 #ifdef WORDS_BIGENDIAN
6148     while (end+1 < this->total_nintervals &&
6149 	   highval == Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[end+1])])) {
6150       end++;
6151     }
6152 #else
6153     while (end+1 < this->total_nintervals &&
6154 	   highval == this->values[this->valueorder[end+1]]) {
6155       end++;
6156       debug(printf("Advancing end to %d\n",end));
6157     }
6158 #endif
6159 
6160   } else if ((end = high - 1) < 0) {
6161     *nmatches = 0;
6162     return (int *) NULL;
6163 
6164   } else {
6165     debug(printf("end is high - 1 = %d\n\n",end));
6166 
6167 #ifdef WORDS_BIGENDIAN
6168     val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[end])]);
6169 #else
6170     val = this->values[this->valueorder[end]];
6171 #endif
6172     debug(printf("Final value for high bound = %f\n",val));
6173 
6174     if (val > highval) {
6175       *nmatches = 0;
6176       return (int *) NULL;
6177     }
6178   }
6179 
6180   *nmatches = end - start + 1;
6181   if (*nmatches <= 0) {
6182     *nmatches = 0;
6183     return (int *) NULL;
6184   } else {
6185     matches = (int *) CALLOC(*nmatches,sizeof(int));
6186     j = 0;
6187     for (recno = start; recno <= end; recno++) {
6188 #ifdef WORDS_BIGENDIAN
6189 #ifdef DEBUG
6190       printf("Pushing %d:%d\n",recno,Bigendian_convert_int(this->valueorder[recno]));
6191 #endif
6192       matches[j++] = Bigendian_convert_int(this->valueorder[recno])+1;
6193 
6194 #else
6195 #ifdef DEBUG
6196       printf("Pushing %d:%d\n",recno,this->valueorder[recno]);
6197 #endif
6198       matches[j++] = this->valueorder[recno]+1;
6199 #endif
6200     }
6201 
6202     return matches;
6203   }
6204 }
6205 
6206 
6207 int *
IIT_get_values_below(int * nmatches,T this,double highval)6208 IIT_get_values_below (int *nmatches, T this, double highval) {
6209   int *matches = NULL, j;
6210   double val;
6211   int start = 0, end;
6212   int low, middle, high, recno;
6213   bool foundp;
6214 
6215   debug(printf("Entering IIT_get_values_below with %f\n",highval));
6216 
6217   /* Find end */
6218   foundp = false;
6219   low = 0;
6220   high = this->total_nintervals;
6221   while (!foundp && low < high) {
6222     middle = (low+high)/2;
6223 
6224 #ifdef DEBUG
6225 #ifndef WORDS_BIGENDIAN
6226     printf("low %d middle %d:%d:%f high %d\n",
6227 	   low,middle,this->valueorder[middle],
6228 	   this->values[this->valueorder[middle]],high);
6229 #endif
6230 #endif
6231 
6232 #ifdef WORDS_BIGENDIAN
6233     val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[middle])]);
6234 #else
6235     val = this->values[this->valueorder[middle]];
6236 #endif
6237 
6238     if (val > highval) {
6239       high = middle;
6240       debug(printf("Decreasing high to %d\n",high));
6241     } else if (val < highval) {
6242       low = middle + 1;
6243       debug(printf("Increasing low to %d\n",low));
6244     } else {
6245       foundp = true;
6246     }
6247   }
6248 
6249   if (foundp == true) {
6250     end = middle;
6251     debug(printf("end is middle = %d\n\n",end));
6252 
6253 #ifdef WORDS_BIGENDIAN
6254     while (end+1 < this->total_nintervals &&
6255 	   highval == Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[end+1])])) {
6256       end++;
6257     }
6258 #else
6259     while (end+1 < this->total_nintervals &&
6260 	   highval == this->values[this->valueorder[end+1]]) {
6261       end++;
6262       debug(printf("Advancing end to %d\n",end));
6263     }
6264 #endif
6265 
6266   } else if ((end = high - 1) < 0) {
6267     *nmatches = 0;
6268     return (int *) NULL;
6269 
6270   } else {
6271     debug(printf("end is high - 1 = %d\n\n",end));
6272 
6273 #ifdef WORDS_BIGENDIAN
6274     val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[end])]);
6275 #else
6276     val = this->values[this->valueorder[end]];
6277 #endif
6278     debug(printf("Final value for high bound = %f\n",val));
6279 
6280     if (val > highval) {
6281       *nmatches = 0;
6282       return (int *) NULL;
6283     }
6284   }
6285 
6286 
6287   *nmatches = end - start + 1;
6288   if (*nmatches <= 0) {
6289     *matches = 0;
6290     return (int *) NULL;
6291   } else {
6292     matches = (int *) CALLOC(*nmatches,sizeof(int));
6293     j = 0;
6294     for (recno = start; recno <= end; recno++) {
6295 #ifdef WORDS_BIGENDIAN
6296 #ifdef DEBUG
6297       printf("Pushing %d:%d\n",recno,Bigendian_convert_int(this->valueorder[recno]));
6298 #endif
6299       matches[j++] = Bigendian_convert_int(this->valueorder[recno])+1;
6300 
6301 #else
6302 #ifdef DEBUG
6303       printf("Pushing %d:%d\n",recno,this->valueorder[recno]);
6304 #endif
6305       matches[j++] = this->valueorder[recno]+1;
6306 #endif
6307     }
6308 
6309     return matches;
6310   }
6311 }
6312 
6313 
6314 int *
IIT_get_values_above(int * nmatches,T this,double lowval)6315 IIT_get_values_above (int *nmatches, T this, double lowval) {
6316   int *matches = NULL, j;
6317   double val;
6318   int start, end = this->total_nintervals - 1;
6319   int low, middle, high, recno;
6320   bool foundp;
6321 
6322   debug(printf("Entering IIT_get_values_above with %f\n",lowval));
6323 
6324   /* Find start */
6325   foundp = false;
6326   low = 0;
6327   high = this->total_nintervals;
6328 
6329   while (!foundp && low < high) {
6330     middle = (low+high)/2;
6331 
6332 #ifdef DEBUG
6333 #ifndef WORDS_BIGENDIAN
6334     printf("low %d middle %d:%d:%f high %d\n",
6335 	   low,middle,this->valueorder[middle],
6336 	   this->values[this->valueorder[middle]],high);
6337 #endif
6338 #endif
6339 
6340 #ifdef WORDS_BIGENDIAN
6341     val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[middle])]);
6342 #else
6343     val = this->values[this->valueorder[middle]];
6344 #endif
6345 
6346     if (val > lowval) {
6347       high = middle;
6348       debug(printf("Decreasing high to %d\n",high));
6349     } else if (val < lowval) {
6350       low = middle + 1;
6351       debug(printf("Increasing low to %d\n",low));
6352     } else {
6353       foundp = true;
6354     }
6355   }
6356 
6357   if (foundp == true) {
6358     start = middle;
6359     debug(printf("start is middle = %d\n\n",start));
6360 
6361 #ifdef WORDS_BIGENDIAN
6362     while (start-1 >= 0 &&
6363 	   lowval == Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[start-1])])) {
6364       start--;
6365     }
6366 #else
6367     while (start-1 >= 0 &&
6368 	   lowval == this->values[this->valueorder[start-1]]) {
6369       start--;
6370       debug(printf("Regressing start to %d\n",start));
6371     }
6372 #endif
6373 
6374   } else if ((start = low) >= this->total_nintervals) {
6375     *nmatches = 0;
6376     return (int *) NULL;
6377 
6378   } else {
6379     debug(printf("start is low = %d\n\n",start));
6380 #ifdef WORDS_BIGENDIAN
6381     val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[start])]);
6382 #else
6383     val = this->values[this->valueorder[start]];
6384 #endif
6385     debug(printf("Final value for low bound = %f\n",val));
6386     if (val < lowval) {
6387       *nmatches = 0;
6388       return (int *) NULL;
6389     }
6390   }
6391 
6392 
6393   *nmatches = end - start + 1;
6394   if (*nmatches <= 0) {
6395     *matches = 0;
6396     return (int *) NULL;
6397   } else {
6398     matches = (int *) CALLOC(*nmatches,sizeof(int));
6399     j = 0;
6400     for (recno = start; recno <= end; recno++) {
6401 #ifdef WORDS_BIGENDIAN
6402 #ifdef DEBUG
6403       printf("Pushing %d:%d\n",recno,Bigendian_convert_int(this->valueorder[recno]));
6404 #endif
6405       matches[j++] = Bigendian_convert_int(this->valueorder[recno])+1;
6406 
6407 #else
6408 #ifdef DEBUG
6409       printf("Pushing %d:%d\n",recno,this->valueorder[recno]);
6410 #endif
6411       matches[j++] = this->valueorder[recno]+1;
6412 #endif
6413     }
6414 
6415     return matches;
6416   }
6417 }
6418 
6419 
6420 
6421 /************************************************************************/
6422 
6423 #if 0
6424 /* Need to work on */
6425 /* Retrieves intervals from an IIT where type > 0.  Used by gmapindex to
6426    construct altstrain_iit.  Here, the iit is a contig_iit.  */
6427 List_T
6428 IIT_intervallist_typed (List_T *labellist, Uintlist_T *seglength_list, T this) {
6429   List_T intervallist = NULL;
6430   Interval_T interval;
6431   char *label, *annotation, *restofheader, firstchar;
6432   bool allocp;
6433   int i;
6434   Chrpos_T seglength;
6435 
6436   *labellist = NULL;
6437   *seglength_list = NULL;
6438   for (i = 0; i < this->nintervals; i++) {
6439     interval = &(this->intervals[i]);
6440     if (Interval_type(interval) > 0) {
6441       intervallist = List_push(intervallist,Interval_copy(interval));
6442       label = IIT_label(this,i+1,&allocp);
6443       *labellist = List_push(*labellist,label);
6444 
6445       if (this->version <= 1) {
6446 	/* Annotation may be negative to indicate contig is reverse complement */
6447 	annotation = IIT_annotation(&restofheader,this,i+1,&allocp);
6448 	firstchar = annotation[0];
6449 	if (firstchar == '-') {
6450 	  seglength = (Chrpos_T) strtoul(&(annotation[1]),NULL,10);
6451 	} else {
6452 	  seglength = (Chrpos_T) strtoul(annotation,NULL,10);
6453 	  *seglength_list = Uintlist_push(*seglength_list,seglength);
6454 	}
6455 	if (allocp == true) {
6456 	  FREE(restofheader);
6457 	}
6458       } else {
6459 	seglength = (Chrpos_T) strtoul(annotation,NULL,10);
6460 	*seglength_list = Uintlist_push(*seglength_list,seglength);
6461       }
6462     }
6463   }
6464   *labellist = List_reverse(*labellist);
6465   *seglength_list = Uintlist_reverse(*seglength_list);
6466   return List_reverse(intervallist);
6467 }
6468 #endif
6469 
6470 List_T
IIT_typelist(T this)6471 IIT_typelist (T this) {
6472   List_T typelist = NULL;
6473   int i;
6474   char *typestring, *copy;
6475 
6476   for (i = 0; i < this->ntypes; i++) {
6477     typestring = IIT_typestring(this,i);
6478     copy = (char *) CALLOC(strlen(typestring)+1,sizeof(char));
6479     strcpy(copy,typestring);
6480     typelist = List_push(typelist,copy);
6481   }
6482   return List_reverse(typelist);
6483 }
6484 
6485 
6486 /************************************************************************/
6487 
6488 /* Assume 0-based index */
6489 static void
print_header(Filestring_T fp,T this,int recno,char * chr,bool relativep,Chrpos_T left,bool print_comment_p)6490 print_header (Filestring_T fp, T this, int recno, char *chr,
6491 	      bool relativep, Chrpos_T left, bool print_comment_p) {
6492   char *string, *restofheader, *p;
6493   Interval_T interval;
6494   bool allocp;
6495 #if 0
6496   int typeint;
6497 #endif
6498 
6499   string = IIT_label(this,recno+1,&allocp);
6500 
6501   FPRINTF(fp,"\t%s",this->name);
6502 
6503   interval = &(this->intervals[0][recno]);
6504   if (relativep == true) {
6505     if (Interval_sign(interval) >= 0) {
6506       FPRINTF(fp,"\t%u..%u",Interval_low(interval)-left,Interval_high(interval)-left);
6507     } else {
6508       FPRINTF(fp,"\t%u..%u",Interval_high(interval)-left,Interval_low(interval)-left);
6509     }
6510   } else {
6511     if (Interval_sign(interval) >= 0) {
6512       FPRINTF(fp,"\t%s:%u..%u",chr,Interval_low(interval),Interval_high(interval));
6513     } else {
6514       FPRINTF(fp,"\t%s:%u..%u",chr,Interval_high(interval),Interval_low(interval));
6515     }
6516   }
6517 
6518 #if 0
6519   if (map_bothstrands_p == true) {
6520     if ((typeint = Interval_type(interval)) <= 0) {
6521       FPRINTF(fp,"\t\t%s",string);
6522     } else {
6523       FPRINTF(fp,"\t%s\t%s",IIT_typestring(this,typeint),string);
6524     }
6525   } else {
6526 #endif
6527     FPRINTF(fp,"\t");
6528     p = string;
6529     while (*p != '\0' && *p != '\n') {
6530       PUTC(*p,fp);
6531       p++;
6532     }
6533 
6534 #if 0
6535   }
6536 #endif
6537 
6538   if (allocp == true) {
6539     FREE(string);
6540   }
6541 
6542   if (print_comment_p == true) {
6543     p = IIT_annotation(&restofheader,this,recno+1,&allocp);
6544     FPRINTF(fp,"\t");
6545     while (*p != '\0' && *p != '\n') {
6546       PUTC(*p,fp);
6547       p++;
6548     }
6549 
6550     if (allocp == true) {
6551       FREE(restofheader);
6552     }
6553   }
6554 
6555   FPRINTF(fp,"\n");
6556 
6557   return;
6558 }
6559 
6560 
6561 void
IIT_print_header(Filestring_T fp,T this,int * matches,int nmatches,char * chr,bool reversep,bool relativep,Chrpos_T left,bool print_comment_p)6562 IIT_print_header (Filestring_T fp, T this, int *matches, int nmatches,
6563 		  char *chr, bool reversep, bool relativep, Chrpos_T left,
6564 		  bool print_comment_p) {
6565   int recno, i;
6566 
6567   if (reversep == true) {
6568     for (i = nmatches-1; i >= 0; i--) {
6569       recno = matches[i] - 1;	/* Convert to 0-based */
6570       print_header(fp,this,recno,chr,relativep,left,print_comment_p);
6571     }
6572   } else {
6573     for (i = 0; i < nmatches; i++) {
6574       recno = matches[i] - 1;	/* Convert to 0-based */
6575       print_header(fp,this,recno,chr,relativep,left,print_comment_p);
6576     }
6577   }
6578 
6579   return;
6580 }
6581 
6582 
6583 Intlist_T
IIT_gene_exons_plus(int * chrnum,Uintlist_T * exonstarts,T genes_iit,int * genes_chrnum_crosstable,int index)6584 IIT_gene_exons_plus (int *chrnum, Uintlist_T *exonstarts, T genes_iit,
6585 		     int *genes_chrnum_crosstable, int index) {
6586   Intlist_T exonlengths = (Intlist_T) NULL;
6587   char *restofheader, *p;
6588   Chrpos_T exonstart, exonend;
6589   int divint;
6590   bool allocp;
6591 
6592 
6593   divint = IIT_divint_from_index(genes_iit,index);
6594   *chrnum = genes_chrnum_crosstable[divint];
6595   /* printf("index %d => divint %d => chrnum %d\n",index,divint,*chrnum); */
6596 
6597   *exonstarts = (Uintlist_T) NULL;
6598 
6599   p = IIT_annotation(&restofheader,genes_iit,index,&allocp);
6600 
6601   /* Skip header */
6602   while (*p != '\0' && *p != '\n') {
6603     p++;
6604   }
6605   if (*p == '\n') p++;
6606 
6607   while (*p != '\0') {
6608     if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6609       fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6610       abort();
6611     } else {
6612       exonlengths = Intlist_push(exonlengths,exonend - exonstart + 1);
6613       *exonstarts = Uintlist_push(*exonstarts,exonstart);
6614 
6615       /* Advance to next exon */
6616       while (*p != '\0' && *p != '\n') p++;
6617       if (*p == '\n') p++;
6618     }
6619   }
6620 
6621   if (allocp) {
6622     FREE(restofheader);
6623   }
6624 
6625   *exonstarts = Uintlist_reverse(*exonstarts);
6626   return Intlist_reverse(exonlengths);
6627 }
6628 
6629 
6630 Intlist_T
IIT_gene_exons_minus(int * chrnum,Uintlist_T * exonstarts,T genes_iit,int * genes_chrnum_crosstable,int index)6631 IIT_gene_exons_minus (int *chrnum, Uintlist_T *exonstarts, T genes_iit,
6632 		      int *genes_chrnum_crosstable, int index) {
6633   Intlist_T exonlengths = (Intlist_T) NULL;
6634   char *restofheader, *p;
6635   Chrpos_T exonstart, exonend;
6636   int divint;
6637   bool allocp;
6638 
6639 
6640   divint = IIT_divint_from_index(genes_iit,index);
6641   *chrnum = genes_chrnum_crosstable[divint];
6642   /* printf("index %d => divint %d => chrnum %d\n",index,divint,*chrnum); */
6643 
6644   *exonstarts = (Uintlist_T) NULL;
6645 
6646   p = IIT_annotation(&restofheader,genes_iit,index,&allocp);
6647 
6648   /* Skip header */
6649   while (*p != '\0' && *p != '\n') {
6650     p++;
6651   }
6652   if (*p == '\n') p++;
6653 
6654   while (*p != '\0') {
6655     if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6656       fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6657       abort();
6658     } else {
6659       exonlengths = Intlist_push(exonlengths,exonstart - exonend + 1);
6660       *exonstarts = Uintlist_push(*exonstarts,exonstart);
6661 
6662       /* Advance to next exon */
6663       while (*p != '\0' && *p != '\n') p++;
6664       if (*p == '\n') p++;
6665     }
6666   }
6667 
6668   if (allocp) {
6669     FREE(restofheader);
6670   }
6671 
6672   *exonstarts = Uintlist_reverse(*exonstarts);
6673   return Intlist_reverse(exonlengths);
6674 }
6675 
6676 
6677 int
IIT_gene_exons_array(int * transcript_genestrand,int ** exonbounds,unsigned int ** exonstarts,T alignment_iit,int alignment_index)6678 IIT_gene_exons_array (int *transcript_genestrand, int **exonbounds, unsigned int **exonstarts,
6679 		      T alignment_iit, int alignment_index) {
6680   int nexons;
6681   char *restofheader, *p;
6682   int exonbound = 0;
6683   Intlist_T exonbounds_list = NULL;
6684   Uintlist_T exonstarts_list = NULL;
6685   Chrpos_T exonstart, exonend;
6686   bool allocp;
6687 
6688   *transcript_genestrand = IIT_interval_sign(alignment_iit,alignment_index);
6689   p = IIT_annotation(&restofheader,alignment_iit,alignment_index,&allocp);
6690 
6691   /* Skip header */
6692   while (*p != '\0' && *p != '\n') {
6693     p++;
6694   }
6695   if (*p == '\n') p++;
6696 
6697   if (*transcript_genestrand > 0) {
6698     while (*p != '\0') {
6699       if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6700 	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6701 	abort();
6702       } else {
6703 	exonbound += exonend - exonstart + 1;
6704 	exonbounds_list = Intlist_push(exonbounds_list,exonbound);
6705 	exonstarts_list = Uintlist_push(exonstarts_list,exonstart);
6706 
6707 	/* Advance to next exon */
6708 	while (*p != '\0' && *p != '\n') p++;
6709 	if (*p == '\n') p++;
6710       }
6711     }
6712   } else {
6713     while (*p != '\0') {
6714       if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6715 	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6716 	abort();
6717       } else {
6718 	exonbound += exonstart - exonend + 1;
6719 	exonbounds_list = Intlist_push(exonbounds_list,exonbound);
6720 	exonstarts_list = Uintlist_push(exonstarts_list,exonstart);
6721 
6722 	/* Advance to next exon */
6723 	while (*p != '\0' && *p != '\n') p++;
6724 	if (*p == '\n') p++;
6725       }
6726     }
6727   }
6728 
6729   if (allocp) {
6730     FREE(restofheader);
6731   }
6732 
6733   exonbounds_list = Intlist_reverse(exonbounds_list);
6734   *exonbounds = Intlist_to_array(&nexons,exonbounds_list);
6735   Intlist_free(&exonbounds_list);
6736 
6737   exonstarts_list = Uintlist_reverse(exonstarts_list);
6738   *exonstarts = Uintlist_to_array(&nexons,exonstarts_list);
6739   Uintlist_free(&exonstarts_list);
6740 
6741   return nexons;
6742 }
6743 
6744 
6745 Overlap_T
IIT_gene_overlap(T map_iit,int divno,Chrpos_T x,Chrpos_T y,bool favor_multiexon_p)6746 IIT_gene_overlap (T map_iit, int divno, Chrpos_T x, Chrpos_T y, bool favor_multiexon_p) {
6747   int *matches, index;
6748   int nmatches, i;
6749   Chrpos_T exonstart, exonend;
6750   int observed_genestrand;
6751   char *annot, *restofheader, *p;
6752   bool allocp = false;
6753   bool multiexon_p;
6754   bool foundp = false;
6755 
6756   matches = IIT_get_with_divno(&nmatches,map_iit,divno,x,y,/*sortp*/false);
6757 
6758   for (i = 0; i < nmatches; i++) {
6759     index = matches[i];
6760     observed_genestrand = IIT_interval_sign(map_iit,index);
6761 #if 0
6762     if (observed_genestrand > 0 && desired_genestrand < 0) {
6763       /* Inconsistent */
6764     } else if (observed_genestrand < 0 && desired_genestrand > 0) {
6765       /* Inconsistent */
6766     } else {
6767 #endif
6768       annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
6769 
6770       /* Skip header */
6771       p = annot;
6772       while (*p != '\0' && *p != '\n') {
6773 	p++;
6774       }
6775       if (*p == '\n') p++;
6776 
6777       if (observed_genestrand > 0) {
6778 	multiexon_p = false;
6779 	while (*p != '\0') {
6780 	  if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6781 	    fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6782 	    abort();
6783 	  } else {
6784 	    /* Advance to next exon */
6785 	    while (*p != '\0' && *p != '\n') p++;
6786 	    if (*p == '\n') p++;
6787 	    if (*p != '\0') {
6788 	      multiexon_p = true;
6789 	    }
6790 
6791 	    if (exonend < x) {
6792 	      /* No overlap */
6793 	    } else if (exonstart > y) {
6794 	      /* No overlap */
6795 	    } else if (favor_multiexon_p == true) {
6796 	      if (multiexon_p == true) {
6797 		FREE(matches);
6798 		if (allocp) FREE(annot);
6799 		return KNOWN_GENE_MULTIEXON;
6800 	      } else {
6801 		/* Keep searching for a multi-exon gene */
6802 		foundp = true;
6803 	      }
6804 	    } else {
6805 	      FREE(matches);
6806 	      if (allocp) FREE(annot);
6807 	      return KNOWN_GENE;
6808 	    }
6809 	  }
6810 
6811 	}
6812 
6813       } else {
6814 	multiexon_p = false;
6815 	while (*p != '\0') {
6816 	  if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6817 	    fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6818 	    abort();
6819 	  } else {
6820 	    /* Advance to next exon */
6821 	    while (*p != '\0' && *p != '\n') p++;
6822 	    if (*p == '\n') p++;
6823 	    if (*p != '\0') {
6824 	      multiexon_p = true;
6825 	    }
6826 
6827 	    if (exonstart < x) {
6828 	      /* No overlap */
6829 	    } else if (exonend > y) {
6830 	      /* No overlap */
6831 	    } else if (favor_multiexon_p == true) {
6832 	      if (multiexon_p == true) {
6833 		FREE(matches);
6834 		if (allocp) FREE(annot);
6835 		return KNOWN_GENE_MULTIEXON;
6836 	      } else {
6837 		/* Keep searching for a multi-exon gene */
6838 		foundp = true;
6839 	      }
6840 	    } else {
6841 	      FREE(matches);
6842 	      if (allocp) FREE(annot);
6843 	      return KNOWN_GENE;
6844 	    }
6845 	  }
6846 	}
6847       }
6848 #if 0
6849     }
6850 #endif
6851   }
6852 
6853   FREE(matches);
6854   if (allocp) FREE(annot);
6855   if (foundp == true) {
6856     return KNOWN_GENE;
6857   } else {
6858     return NO_KNOWN_GENE;
6859   }
6860 }
6861 
6862 
6863 Chrpos_T
IIT_genestruct_chrpos(char * strand,char ** divstring,char ** gene,T map_iit,char * transcript,int querypos)6864 IIT_genestruct_chrpos (char *strand, char **divstring, char **gene,
6865 		       T map_iit, char *transcript, int querypos) {
6866   Interval_T interval0;
6867   int index0;
6868   Chrpos_T exonstart0, exonend0, exonlength;
6869   char *annot, *restofheader, *p;
6870   bool allocp = false;
6871 
6872 
6873   if ((index0 = IIT_find_one(map_iit,transcript)) < 0) {
6874     fprintf(stderr,"Could not find transcript %s in genes map\n",transcript);
6875     return (Chrpos_T) 0;
6876   } else {
6877     *divstring = IIT_divstring_from_index(map_iit,index0);
6878     interval0 = &(map_iit->intervals[0][index0-1]);
6879     annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
6880   }
6881 
6882 
6883   /* Get gene from header */
6884   p = annot;
6885   while (*p != '\0' && *p != '\n' && *p != ' ') {
6886     p++;
6887   }
6888   *gene = (char *) MALLOC((p - annot + 1)*sizeof(char));
6889   strncpy(*gene,annot,p - annot);
6890   (*gene)[p - annot] = '\0';
6891 
6892   while (*p != '\0' && *p != '\n') {
6893     p++;
6894   }
6895   if (*p == '\n') p++;
6896 
6897 
6898   if (Interval_sign(interval0) > 0) {
6899     *strand = '+';
6900     while (*p != '\0') {
6901       if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
6902 	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6903 	abort();
6904       } else {
6905 	exonlength = exonend0 - exonstart0 + 1;
6906 	if (exonlength < (Chrpos_T) querypos) {
6907 	  querypos -= exonlength;
6908 	} else {
6909 	  if (allocp) {
6910 	    FREE(restofheader);
6911 	  }
6912 	  return exonstart0 + querypos - 1; /* Because both exonstart0 and querypos are 1-based */
6913 	}
6914       }
6915 
6916       /* Advance to the next exon */
6917       while (*p != '\0' && *p != '\n') p++;
6918       if (*p == '\n') p++;
6919     }
6920 
6921   } else {
6922     *strand = '-';
6923     while (*p != '\0') {
6924       if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
6925 	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6926 	abort();
6927       } else {
6928 	exonlength = exonstart0 - exonend0 + 1;
6929 	if (exonlength < (Chrpos_T) querypos) {
6930 	  querypos -= exonlength;
6931 	} else {
6932 	  if (allocp) {
6933 	    FREE(restofheader);
6934 	  }
6935 	  return exonstart0 - querypos + 1; /* Because both exonstart and querypos are 1-based */
6936 	}
6937       }
6938 
6939       /* Advance to the next exon */
6940       while (*p != '\0' && *p != '\n') p++;
6941       if (*p == '\n') p++;
6942     }
6943   }
6944 
6945   if (allocp) {
6946     FREE(restofheader);
6947   }
6948 
6949   fprintf(stderr,"querypos is too long\n");
6950   return (Chrpos_T) 0;
6951 }
6952 
6953 
6954 bool
IIT_gene_overlapp(T map_iit,int index,Chrpos_T x,Chrpos_T y)6955 IIT_gene_overlapp (T map_iit, int index, Chrpos_T x, Chrpos_T y) {
6956   Chrpos_T exonstart, exonend;
6957   int observed_genestrand;
6958   char *annot, *restofheader, *p;
6959   bool allocp = false;
6960 
6961   observed_genestrand = IIT_interval_sign(map_iit,index);
6962   annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
6963 
6964   /* Skip header */
6965   p = annot;
6966   while (*p != '\0' && *p != '\n') {
6967     p++;
6968   }
6969   if (*p == '\n') p++;
6970 
6971   if (observed_genestrand > 0) {
6972     while (*p != '\0') {
6973       if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6974 	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6975 	abort();
6976       } else {
6977 	/* Advance to next exon */
6978 	while (*p != '\0' && *p != '\n') p++;
6979 	if (*p == '\n') p++;
6980 
6981 	if (exonend < x) {
6982 	  /* No overlap */
6983 	} else if (exonstart > y) {
6984 	  /* No overlap */
6985 	} else {
6986 	  if (allocp) FREE(annot);
6987 	  return true;
6988 	}
6989       }
6990 
6991     }
6992 
6993   } else {
6994     while (*p != '\0') {
6995       if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6996 	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6997 	abort();
6998       } else {
6999 	/* Advance to next exon */
7000 	while (*p != '\0' && *p != '\n') p++;
7001 	if (*p == '\n') p++;
7002 
7003 	if (exonstart < x) {
7004 	  /* No overlap */
7005 	} else if (exonend > y) {
7006 	  /* No overlap */
7007 	} else {
7008 	  if (allocp) FREE(annot);
7009 	  return true;
7010 	}
7011       }
7012     }
7013   }
7014 
7015   if (allocp) FREE(annot);
7016   return false;
7017 }
7018 
7019 
7020 /* Can handle only genes with the same direction as the given gene */
7021 Intlist_T
IIT_unique_positions(T map_iit,int index0,int divno)7022 IIT_unique_positions (T map_iit, int index0, int divno) {
7023   Intlist_T uniques = (Intlist_T) NULL;
7024   int nunique;
7025   Interval_T interval0;
7026   int *matches, index;
7027   int nmatches, i;
7028   Chrpos_T exonstart0, exonend0, exonstart, exonend, pos;
7029   char *annot, *restofheader, *p, *q;
7030   char **pointers;
7031   int npointers, ptri;
7032   bool allocp = false;
7033   bool uniquep;
7034 
7035 
7036   interval0 = &(map_iit->intervals[0][index0-1]);
7037   matches = IIT_get_signed_with_divno(&nmatches,map_iit,divno,Interval_low(interval0),Interval_high(interval0),
7038 				      /*sortp*/false,Interval_sign(interval0));
7039   if (nmatches == 0) {
7040     /* No overlapping genes found */
7041     pointers = (char **) NULL;
7042     npointers = 0;
7043   } else {
7044     pointers = (char **) MALLOC(nmatches * sizeof(char *));
7045     npointers = 0;
7046     for (i = 0; i < nmatches; i++) {
7047       index = matches[i];
7048       if (index != index0) {
7049 	annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
7050 
7051 	/* Skip header */
7052 	p = annot;
7053 	while (*p != '\0' && *p != '\n') {
7054 	  p++;
7055 	}
7056 	if (*p == '\n') p++;
7057 
7058 	pointers[npointers++] = p;
7059       }
7060     }
7061     FREE(matches);
7062   }
7063 
7064   annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
7065   /* Skip header */
7066   p = annot;
7067   while (*p != '\0' && *p != '\n') {
7068     p++;
7069   }
7070   if (*p == '\n') p++;
7071 
7072   nunique = -1;
7073   if (Interval_sign(interval0) > 0) {
7074     while (*p != '\0') {
7075       if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
7076 	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
7077 	abort();
7078       } else {
7079 	if (nunique >= 0) {
7080 	  uniques = Intlist_push(uniques,nunique);
7081 	}
7082 	nunique = 0;
7083 
7084 	for (pos = exonstart0; pos <= exonend0; pos++) {
7085 	  uniquep = true;
7086 	  for (ptri = 0; ptri < npointers; ptri++) {
7087 	    q = pointers[ptri];
7088 	    if (*q == '\0') {
7089 	      /* Skip */
7090 	      exonstart = exonend = -1U;
7091 	    } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7092 	      fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7093 	      abort();
7094 	    }
7095 
7096 	    /* Advance to appropriate exon if necessary */
7097 	    while (pos > exonend) {
7098 	      while (*q != '\0' && *q != '\n') q++;
7099 	      if (*q == '\n') q++;
7100 
7101 	      if (*q == '\0') {
7102 		exonstart = exonend = -1U;
7103 	      } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7104 		fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7105 		abort();
7106 	      }
7107 	    }
7108 
7109 	    if (pos >= exonstart && pos <= exonend) {
7110 	      uniquep = false;
7111 	    }
7112 
7113 	    pointers[ptri] = q;
7114 	  }
7115 	  if (uniquep == true) {
7116 	    nunique += 1;
7117 	  }
7118 	}
7119 
7120 	/* Advance to the next exon */
7121 	while (*p != '\0' && *p != '\n') p++;
7122 	if (*p == '\n') p++;
7123       }
7124     }
7125 
7126   } else {
7127     while (*p != '\0') {
7128       if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
7129 	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
7130 	abort();
7131       } else {
7132 	if (nunique >= 0) {
7133 	  uniques = Intlist_push(uniques,nunique);
7134 	}
7135 	nunique = 0;
7136 
7137 	for (pos = exonstart0; pos >= exonend0; --pos) {
7138 	  uniquep = true;
7139 	  for (ptri = 0; ptri < npointers; ptri++) {
7140 	    q = pointers[ptri];
7141 	    if (*q == '\0') {
7142 	      /* Skip */
7143 	      exonstart = exonend = 0;
7144 	    } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7145 	      fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7146 	      abort();
7147 	    }
7148 
7149 	    /* Advance to appropriate exon if necessary */
7150 	    while (pos < exonend) {
7151 	      while (*q != '\0' && *q != '\n') q++;
7152 	      if (*q == '\n') q++;
7153 
7154 	      if (*q == '\0') {
7155 		exonstart = exonend = 0;
7156 	      } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7157 		fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7158 		abort();
7159 	      }
7160 	    }
7161 
7162 	    if (pos <= exonstart && pos >= exonend) {
7163 	      uniquep = false;
7164 	    }
7165 
7166 	    pointers[ptri] = q;
7167 	  }
7168 	  if (uniquep == true) {
7169 	    nunique += 1;
7170 	  }
7171 	}
7172 
7173 	/* Advance to the next exon */
7174 	while (*p != '\0' && *p != '\n') p++;
7175 	if (*p == '\n') p++;
7176       }
7177     }
7178   }
7179 
7180 
7181   if (nunique >= 0) {
7182     uniques = Intlist_push(uniques,nunique);
7183   }
7184 
7185   FREE(pointers);
7186   return Intlist_reverse(uniques);
7187 }
7188 
7189 
7190 /* Needed for a second round of gene expression assignment */
7191 Intlist_T
IIT_unique_positions_given_others(T map_iit,int index0,int * matches,int nmatches)7192 IIT_unique_positions_given_others (T map_iit, int index0, int *matches, int nmatches) {
7193   Intlist_T uniques = (Intlist_T) NULL;
7194   int nunique;
7195   Interval_T interval0;
7196   int index;
7197   int i;
7198   Chrpos_T exonstart0, exonend0, exonstart, exonend, pos;
7199   char *annot, *restofheader, *p, *q;
7200   char **pointers;
7201   int npointers, ptri;
7202   bool allocp = false;
7203   bool uniquep;
7204 
7205 
7206   interval0 = &(map_iit->intervals[0][index0-1]);
7207 
7208   pointers = MALLOC(nmatches * sizeof(char *));
7209   npointers = 0;
7210   for (i = 0; i < nmatches; i++) {
7211     index = matches[i];
7212     if (index != index0) {
7213       annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
7214 
7215       /* Skip header */
7216       p = annot;
7217       while (*p != '\0' && *p != '\n') {
7218 	p++;
7219       }
7220       if (*p == '\n') p++;
7221 
7222       pointers[npointers++] = p;
7223     }
7224   }
7225   /* FREE(matches); */
7226 
7227   annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
7228   /* Skip header */
7229   p = annot;
7230   while (*p != '\0' && *p != '\n') {
7231     p++;
7232   }
7233   if (*p == '\n') p++;
7234 
7235   nunique = -1;
7236   if (Interval_sign(interval0) > 0) {
7237     while (*p != '\0') {
7238       if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
7239 	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
7240 	abort();
7241       } else {
7242 	if (nunique >= 0) {
7243 	  uniques = Intlist_push(uniques,nunique);
7244 	}
7245 	nunique = 0;
7246 
7247 	for (pos = exonstart0; pos <= exonend0; pos++) {
7248 	  uniquep = true;
7249 	  for (ptri = 0; ptri < npointers; ptri++) {
7250 	    q = pointers[ptri];
7251 	    if (*q == '\0') {
7252 	      /* Skip */
7253 	      exonstart = exonend = -1U;
7254 	    } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7255 	      fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7256 	      abort();
7257 	    }
7258 
7259 	    /* Advance to appropriate exon if necessary */
7260 	    while (pos > exonend) {
7261 	      while (*q != '\0' && *q != '\n') q++;
7262 	      if (*q == '\n') q++;
7263 
7264 	      if (*q == '\0') {
7265 		exonstart = exonend = -1U;
7266 	      } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7267 		fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7268 		abort();
7269 	      }
7270 	    }
7271 
7272 	    if (pos >= exonstart && pos <= exonend) {
7273 	      uniquep = false;
7274 	    }
7275 
7276 	    pointers[ptri] = q;
7277 	  }
7278 	  if (uniquep == true) {
7279 	    nunique += 1;
7280 	  }
7281 	}
7282 
7283 	/* Advance to the next exon */
7284 	while (*p != '\0' && *p != '\n') p++;
7285 	if (*p == '\n') p++;
7286       }
7287     }
7288 
7289   } else {
7290     while (*p != '\0') {
7291       if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
7292 	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
7293 	abort();
7294       } else {
7295 	if (nunique >= 0) {
7296 	  uniques = Intlist_push(uniques,nunique);
7297 	}
7298 	nunique = 0;
7299 
7300 	for (pos = exonstart0; pos >= exonend0; --pos) {
7301 	  uniquep = true;
7302 	  for (ptri = 0; ptri < npointers; ptri++) {
7303 	    q = pointers[ptri];
7304 	    if (*q == '\0') {
7305 	      /* Skip */
7306 	      exonstart = exonend = 0;
7307 	    } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7308 	      fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7309 	      abort();
7310 	    }
7311 
7312 	    /* Advance to appropriate exon if necessary */
7313 	    while (pos < exonend) {
7314 	      while (*q != '\0' && *q != '\n') q++;
7315 	      if (*q == '\n') q++;
7316 
7317 	      if (*q == '\0') {
7318 		exonstart = exonend = 0;
7319 	      } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7320 		fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7321 		abort();
7322 	      }
7323 	    }
7324 
7325 	    if (pos <= exonstart && pos >= exonend) {
7326 	      uniquep = false;
7327 	    }
7328 
7329 	    pointers[ptri] = q;
7330 	  }
7331 	  if (uniquep == true) {
7332 	    nunique += 1;
7333 	  }
7334 	}
7335 
7336 	/* Advance to the next exon */
7337 	while (*p != '\0' && *p != '\n') p++;
7338 	if (*p == '\n') p++;
7339       }
7340     }
7341   }
7342 
7343 
7344   if (nunique >= 0) {
7345     uniques = Intlist_push(uniques,nunique);
7346   }
7347 
7348   FREE(pointers);
7349   return Intlist_reverse(uniques);
7350 }
7351 
7352 
7353 /* Can handle only genes with the same direction as the given gene */
7354 /* Values or either 1 (unique) or 0 (not unique) */
7355 Intlist_T
IIT_unique_splicep(T map_iit,int index0,int divno)7356 IIT_unique_splicep (T map_iit, int index0, int divno) {
7357   Intlist_T uniques = (Intlist_T) NULL;
7358   Interval_T interval0;
7359   int *matches, index;
7360   int nmatches, i;
7361   Chrpos_T exonstart0, intronstart0, intronend0, exonend0,
7362     exonstart, intronstart, intronend, exonend;
7363   char *annot, *restofheader, *p, *q;
7364   char **pointers;
7365   int npointers, ptri;
7366   bool allocp = false;
7367   bool uniquep, firstp;
7368 
7369 
7370   interval0 = &(map_iit->intervals[0][index0-1]);
7371   matches = IIT_get_signed_with_divno(&nmatches,map_iit,divno,Interval_low(interval0),Interval_high(interval0),
7372 				      /*sortp*/false,Interval_sign(interval0));
7373   if (nmatches == 0) {
7374     /* No overlapping genes found */
7375     pointers = (char **) NULL;
7376     npointers = 0;
7377   } else {
7378     pointers = (char **) MALLOC(nmatches * sizeof(char *));
7379     npointers = 0;
7380     for (i = 0; i < nmatches; i++) {
7381       index = matches[i];
7382       if (index != index0) {
7383 	annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
7384 
7385 	/* Skip header */
7386 	p = annot;
7387 	while (*p != '\0' && *p != '\n') {
7388 	  p++;
7389 	}
7390 	if (*p == '\n') p++;
7391 
7392 	pointers[npointers++] = p;
7393       }
7394     }
7395     FREE(matches);
7396   }
7397 
7398   annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
7399   /* Skip header */
7400   p = annot;
7401   while (*p != '\0' && *p != '\n') {
7402     p++;
7403   }
7404   if (*p == '\n') p++;
7405 
7406   firstp = true;
7407   if (Interval_sign(interval0) > 0) {
7408     while (*p != '\0') {
7409       if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
7410 	/* Passed last intron */
7411 	while (*p != '\0') p++;
7412       } else {
7413 	if (firstp == false) {
7414 	  uniques = Intlist_push(uniques,(int) uniquep);
7415 	}
7416 	firstp = false;
7417 
7418 	uniquep = true;
7419 	for (ptri = 0; ptri < npointers; ptri++) {
7420 	  q = pointers[ptri];
7421 	  if (*q == '\0') {
7422 	    /* Skip */
7423 	    intronstart = intronend = -1U;
7424 	  } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7425 	    /* Passed last intron */
7426 	    intronstart = intronend = 0;
7427 	    while (*q != '\0') q++;
7428 	  }
7429 
7430 	  /* Advance to appropriate exon if necessary */
7431 	  while (intronstart0 > intronstart) {
7432 	    while (*q != '\0' && *q != '\n') q++;
7433 	    if (*q == '\n') q++;
7434 
7435 	    if (*q == '\0') {
7436 	      intronstart = intronend = -1U;
7437 	    } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7438 	      intronstart = intronend = 0;
7439 	      while (*q != '\0') q++;
7440 	    }
7441 	  }
7442 
7443 	  if (intronstart == intronstart0 && intronend == intronend0) {
7444 	    uniquep = false;
7445 	  }
7446 
7447 	  pointers[ptri] = q;
7448 	}
7449       }
7450 
7451       /* Advance to the next exon */
7452       while (*p != '\0' && *p != '\n') p++;
7453       if (*p == '\n') p++;
7454     }
7455 
7456   } else {
7457     while (*p != '\0') {
7458       if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
7459 	/* Passed last intron */
7460 	while (*p != '\0') p++;
7461       } else {
7462 	if (firstp == false) {
7463 	  uniques = Intlist_push(uniques,(int) uniquep);
7464 	}
7465 	firstp = false;
7466 
7467 	uniquep = true;
7468 	for (ptri = 0; ptri < npointers; ptri++) {
7469 	  q = pointers[ptri];
7470 	  if (*q == '\0') {
7471 	    /* Skip */
7472 	    intronstart = intronend = 0;
7473 	  } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7474 	    /* Passed last intron */
7475 	    intronstart = intronend = 0;
7476 	    while (*q != '\0') q++;
7477 	  }
7478 
7479 	  /* Advance to appropriate exon if necessary */
7480 	  while (intronstart0 < intronstart) {
7481 	    while (*q != '\0' && *q != '\n') q++;
7482 	    if (*q == '\n') q++;
7483 
7484 	    if (*q == '\0') {
7485 	      intronstart = intronend = 0;
7486 	    } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7487 	      intronstart = intronend = 0;
7488 	      while (*q != '\0') q++;
7489 	    }
7490 	  }
7491 
7492 	  if (intronstart == intronstart0 && intronend == intronend0) {
7493 	    uniquep = false;
7494 	  }
7495 
7496 	  pointers[ptri] = q;
7497 	}
7498       }
7499 
7500       /* Advance to the next exon */
7501       while (*p != '\0' && *p != '\n') p++;
7502       if (*p == '\n') p++;
7503     }
7504   }
7505 
7506   if (firstp == false) {
7507     uniques = Intlist_push(uniques,(int) uniquep);
7508   }
7509 
7510   FREE(pointers);
7511   return Intlist_reverse(uniques);
7512 }
7513 
7514 
7515 /* Can handle only genes with the same direction as the given gene */
7516 /* Values or either 1 (unique) or 0 (not unique) */
7517 Intlist_T
IIT_unique_splicep_given_others(T map_iit,int index0,int * matches,int nmatches)7518 IIT_unique_splicep_given_others (T map_iit, int index0, int *matches, int nmatches) {
7519   Intlist_T uniques = (Intlist_T) NULL;
7520   Interval_T interval0;
7521   int index;
7522   int i;
7523   Chrpos_T exonstart0, intronstart0, intronend0, exonend0,
7524     exonstart, intronstart, intronend, exonend;
7525   char *annot, *restofheader, *p, *q;
7526   char **pointers;
7527   int npointers, ptri;
7528   bool allocp = false;
7529   bool uniquep, firstp;
7530 
7531 
7532   interval0 = &(map_iit->intervals[0][index0-1]);
7533 
7534   pointers = MALLOC(nmatches * sizeof(char *));
7535   npointers = 0;
7536   for (i = 0; i < nmatches; i++) {
7537     index = matches[i];
7538     if (index != index0) {
7539       annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
7540 
7541       /* Skip header */
7542       p = annot;
7543       while (*p != '\0' && *p != '\n') {
7544 	p++;
7545       }
7546       if (*p == '\n') p++;
7547 
7548       pointers[npointers++] = p;
7549     }
7550   }
7551   /* FREE(matches); */
7552 
7553   annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
7554   /* Skip header */
7555   p = annot;
7556   while (*p != '\0' && *p != '\n') {
7557     p++;
7558   }
7559   if (*p == '\n') p++;
7560 
7561   firstp = true;
7562   if (Interval_sign(interval0) > 0) {
7563     while (*p != '\0') {
7564       if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
7565 	/* Passed last intron */
7566 	while (*p != '\0') p++;
7567       } else {
7568 	if (firstp == false) {
7569 	  uniques = Intlist_push(uniques,(int) uniquep);
7570 	}
7571 	firstp = false;
7572 
7573 	uniquep = true;
7574 	for (ptri = 0; ptri < npointers; ptri++) {
7575 	  q = pointers[ptri];
7576 	  if (*q == '\0') {
7577 	    /* Skip */
7578 	    intronstart = intronend = -1U;
7579 	  } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7580 	    /* Passed last intron */
7581 	    intronstart = intronend = 0;
7582 	    while (*q != '\0') q++;
7583 	  }
7584 
7585 	  /* Advance to appropriate exon if necessary */
7586 	  while (intronstart0 > intronstart) {
7587 	    while (*q != '\0' && *q != '\n') q++;
7588 	    if (*q == '\n') q++;
7589 
7590 	    if (*q == '\0') {
7591 	      intronstart = intronend = -1U;
7592 	    } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7593 	      intronstart = intronend = 0;
7594 	      while (*q != '\0') q++;
7595 	    }
7596 	  }
7597 
7598 	  if (intronstart == intronstart0 && intronend == intronend0) {
7599 	    uniquep = false;
7600 	  }
7601 
7602 	  pointers[ptri] = q;
7603 	}
7604       }
7605 
7606       /* Advance to the next exon */
7607       while (*p != '\0' && *p != '\n') p++;
7608       if (*p == '\n') p++;
7609     }
7610 
7611   } else {
7612     while (*p != '\0') {
7613       if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
7614 	/* Passed last intron */
7615 	while (*p != '\0') p++;
7616       } else {
7617 	if (firstp == false) {
7618 	  uniques = Intlist_push(uniques,(int) uniquep);
7619 	}
7620 	firstp = false;
7621 
7622 	uniquep = true;
7623 	for (ptri = 0; ptri < npointers; ptri++) {
7624 	  q = pointers[ptri];
7625 	  if (*q == '\0') {
7626 	    /* Skip */
7627 	    intronstart = intronend = 0;
7628 	  } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7629 	    /* Passed last intron */
7630 	    intronstart = intronend = 0;
7631 	    while (*q != '\0') q++;
7632 	  }
7633 
7634 	  /* Advance to appropriate exon if necessary */
7635 	  while (intronstart0 < intronstart) {
7636 	    while (*q != '\0' && *q != '\n') q++;
7637 	    if (*q == '\n') q++;
7638 
7639 	    if (*q == '\0') {
7640 	      intronstart = intronend = 0;
7641 	    } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7642 	      intronstart = intronend = 0;
7643 	      while (*q != '\0') q++;
7644 	    }
7645 	  }
7646 
7647 	  if (intronstart == intronstart0 && intronend == intronend0) {
7648 	    uniquep = false;
7649 	  }
7650 
7651 	  pointers[ptri] = q;
7652 	}
7653       }
7654 
7655       /* Advance to the next exon */
7656       while (*p != '\0' && *p != '\n') p++;
7657       if (*p == '\n') p++;
7658     }
7659   }
7660 
7661   if (firstp == false) {
7662     uniques = Intlist_push(uniques,(int) uniquep);
7663   }
7664 
7665   FREE(pointers);
7666   return Intlist_reverse(uniques);
7667 }
7668