1 static char rcsid[] = "$Id: iit-read.c 222390 2020-04-10 12:44:01Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #include "iit-read.h"
7 #include "iitdef.h"
8
9 #ifdef WORDS_BIGENDIAN
10 #include "bigendian.h"
11 #else
12 #include "littleendian.h"
13 #endif
14
15 #include <stdlib.h> /* For qsort */
16 #include <string.h> /* For memset */
17 #include <strings.h>
18 #include <ctype.h> /* For isspace */
19 #ifdef HAVE_UNISTD_H
20 #include <unistd.h> /* For mmap on Linux */
21 #endif
22 #ifdef HAVE_SYS_TYPES_H
23 #include <sys/types.h> /* For open, fstat, and mmap */
24 #endif
25 /* Not sure why this was included
26 #include <sys/param.h>
27 */
28 #ifdef HAVE_FCNTL_H
29 #include <fcntl.h> /* For open */
30 #endif
31 #ifdef HAVE_SYS_STAT_H
32 #include <sys/stat.h> /* For open and fstat */
33 #endif
34 #include <sys/mman.h> /* For mmap and madvise */
35 #include <math.h> /* For qsort */
36 #include <errno.h> /* For perror */
37 #include "assert.h"
38 #include "except.h"
39 #include "mem.h"
40 #include "access.h"
41 #include "fopen.h"
42
43 /* Note: if sizeof(int) or sizeof(unsigned int) are not 4, then the below code is faulty */
44
45
46 /* Integer interval tree. */
47
48 /*
49 * n intervals;
50 * specified by their indices e[1..n]
51 * and endpoint-access function:
52 * low (e[i])
53 * high (e[i])
54 * is_contained (x, e[i])
55 * eg:
56 * interval e[i] ... "[" low (e[i]) "," high (e[i]) ")"
57 * is_contained (x, e[i]) ... ( (low (e[i]) <= x
58 * and (x < high (e[i]))
59 */
60
61 /*--------------------------------------------------------------------------*/
62
63 #ifdef DEBUG
64 #define debug(x) x
65 #else
66 #define debug(x)
67 #endif
68
69 /* Timing */
70 #ifdef DEBUG1
71 #define debug1(x) x
72 #else
73 #define debug1(x)
74 #endif
75
76 /* Flanking */
77 #ifdef DEBUG2
78 #define debug2(x) x
79 #else
80 #define debug2(x)
81 #endif
82
83 /* Binary search */
84 #ifdef DEBUG3
85 #define debug3(x) x
86 #else
87 #define debug3(x)
88 #endif
89
90
91
92 #define T IIT_T
93
94 static void
file_move_absolute(int fd,size_t offset,size_t objsize,Chrpos_T n)95 file_move_absolute (int fd, size_t offset, size_t objsize, Chrpos_T n) {
96 off_t position = offset + n*objsize;
97
98 if (lseek(fd,position,SEEK_SET) < 0) {
99 perror("Error in gmap, file_move_label");
100 exit(9);
101 }
102 return;
103 }
104
105
106 bool
IIT_universalp(char * filename,bool add_iit_p)107 IIT_universalp (char *filename, bool add_iit_p) {
108 char *newfile;
109 FILE *fp;
110 int total_nintervals;
111
112 if (add_iit_p == true) {
113 newfile = (char *) CALLOC(strlen(filename)+strlen(".iit")+1,sizeof(char));
114 sprintf(newfile,"%s.iit",filename);
115 if ((fp = FOPEN_READ_BINARY(newfile)) != NULL) {
116 filename = newfile;
117 } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
118 /* fprintf(stderr,"Cannot open IIT file %s or %s\n",filename,newfile); */
119 FREE(newfile);
120 return false;
121 }
122 } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
123 /* fprintf(stderr,"Cannot open IIT file %s\n",filename); */
124 return false;
125 }
126
127 if (FREAD_INT(&total_nintervals,fp) < 1) {
128 fprintf(stderr,"IIT file %s appears to be empty\n",filename);
129 fclose(fp);
130 if (add_iit_p == true) {
131 FREE(newfile);
132 }
133 return false;
134 } else if (total_nintervals == 0) {
135 /* Need to use Univ_IIT_read instead */
136 fclose(fp);
137 if (add_iit_p == true) {
138 FREE(newfile);
139 }
140 return false;
141 } else {
142 fclose(fp);
143 if (add_iit_p == true) {
144 FREE(newfile);
145 }
146 return true;
147 }
148 }
149
150
151 bool
IIT_valuep(T this)152 IIT_valuep (T this) {
153 return this->valuep;
154 }
155
156
157 char *
IIT_name(T this)158 IIT_name (T this) {
159 return this->name;
160 }
161
162 int
IIT_version(T this)163 IIT_version (T this) {
164 return this->version;
165 }
166
167 int
IIT_total_nintervals(T this)168 IIT_total_nintervals (T this) {
169 return this->total_nintervals;
170 }
171
172 int
IIT_nintervals(T this,int divno)173 IIT_nintervals (T this, int divno) {
174 return this->nintervals[divno];
175 }
176
177
178 int
IIT_ntypes(T this)179 IIT_ntypes (T this) {
180 return this->ntypes;
181 }
182
183 int
IIT_nfields(T this)184 IIT_nfields (T this) {
185 return this->nfields;
186 }
187
188
189 Chrpos_T
IIT_length(T this,int index)190 IIT_length (T this, int index) {
191 Interval_T interval;
192
193 interval = &(this->intervals[0][index-1]);
194 return Interval_length(interval);
195 }
196
197
198 Chrpos_T
IIT_divlength(T this,char * divstring)199 IIT_divlength (T this, char *divstring) {
200 Chrpos_T max = 0U;
201 Interval_T interval;
202 int divno, i;
203
204 divno = IIT_divint(this,divstring);
205 for (i = 0; i < this->nintervals[divno]; i++) {
206 interval = &(this->intervals[divno][i]);
207 if (Interval_high(interval) > max) {
208 max = Interval_high(interval);
209 }
210 }
211 /* Convert from zero-based coordinate */
212 return max+1U;
213 }
214
215
216 /* Assumes intervals are stored using universal coordinates */
217 Chrpos_T
IIT_totallength(T this)218 IIT_totallength (T this) {
219 Chrpos_T max = 0U;
220 Interval_T interval;
221 int divno, i;
222
223 for (divno = 0; divno < this->ndivs; divno++) {
224 for (i = 0; i < this->nintervals[divno]; i++) {
225 interval = &(this->intervals[divno][i]);
226 if (Interval_high(interval) > max) {
227 max = Interval_high(interval);
228 }
229 }
230 }
231 /* Convert from zero-based coordinate */
232 return max+1U;
233 }
234
235
236 Interval_T
IIT_interval(T this,int index)237 IIT_interval (T this, int index) {
238 assert(index <= this->total_nintervals);
239 return &(this->intervals[0][index-1]); /* Convert to 0-based */
240 }
241
242 /* Need to use for search on alphas (IIT_get_next and probably IIT_get_flanking) */
243 Interval_T
IIT_interval_for_divno(T this,int divno,int index)244 IIT_interval_for_divno (T this, int divno, int index) {
245 assert(index <= this->nintervals[divno]);
246 return &(this->intervals[divno][index-1]); /* Convert to 0-based */
247 }
248
249
250 Chrpos_T
IIT_interval_low(T this,int index)251 IIT_interval_low (T this, int index) {
252 Interval_T interval;
253
254 assert(index <= this->total_nintervals);
255 interval = &(this->intervals[0][index-1]);
256 return Interval_low(interval);
257 }
258
259 Chrpos_T
IIT_interval_high(T this,int index)260 IIT_interval_high (T this, int index) {
261 Interval_T interval;
262
263 assert(index <= this->total_nintervals);
264 interval = &(this->intervals[0][index-1]);
265 return Interval_high(interval);
266 }
267
268 Chrpos_T
IIT_interval_length(T this,int index)269 IIT_interval_length (T this, int index) {
270 Interval_T interval;
271
272 assert(index <= this->total_nintervals);
273 interval = &(this->intervals[0][index-1]);
274 return Interval_length(interval);
275 }
276
277 int
IIT_interval_type(T this,int index)278 IIT_interval_type (T this, int index) {
279 Interval_T interval;
280
281 assert(index <= this->total_nintervals);
282 interval = &(this->intervals[0][index-1]);
283 return Interval_type(interval);
284 }
285
286
287 int
IIT_interval_sign(T this,int index)288 IIT_interval_sign (T this, int index) {
289 Interval_T interval;
290
291 assert(index <= this->total_nintervals);
292 interval = &(this->intervals[0][index-1]);
293 return Interval_sign(interval);
294 }
295
296
297 /* chrhigh is one past the highest position in the chromosome */
298 void
IIT_interval_bounds(Chrpos_T * low,Chrpos_T * high,Chrpos_T * length,T this,int index,int circular_typeint)299 IIT_interval_bounds (Chrpos_T *low, Chrpos_T *high, Chrpos_T *length, T this,
300 int index, int circular_typeint) {
301 Interval_T interval;
302
303 assert(index > 0);
304 assert(index <= this->total_nintervals);
305
306 interval = &(this->intervals[0][index-1]);
307 *low = Interval_low(interval);
308 *length = Interval_length(interval);
309 if (Interval_type(interval) == circular_typeint) {
310 *high = Interval_high(interval) + 1 + (*length);
311 } else {
312 *high = Interval_high(interval) + 1;
313 }
314 return;
315 }
316
317 int
IIT_index(T this,int divno,int i)318 IIT_index (T this, int divno, int i) {
319 return this->cum_nintervals[divno] + i + 1; /* 1-based */
320 }
321
322
323
324 /* Note: ndivs includes div "0", so callers should iterate through at i < ndivs */
325 int
IIT_ndivs(T this)326 IIT_ndivs (T this) {
327 return this->ndivs;
328 }
329
330 /* The iit file has a '\0' after each string, so functions know where
331 it ends */
332 char *
IIT_divstring(T this,int divno)333 IIT_divstring (T this, int divno) {
334 UINT4 start;
335
336 start = this->divpointers[divno];
337 return &(this->divstrings[start]);
338 }
339
340 int
IIT_divint(T this,char * divstring)341 IIT_divint (T this, char *divstring) {
342 int i = 0; /* Actually divstring for divno 0 is NULL */
343 UINT4 start;
344
345 if (divstring == NULL) {
346 return 0;
347 } else if (divstring[0] == '\0') {
348 return 0;
349 } else {
350 while (i < this->ndivs) {
351 start = this->divpointers[i];
352 if (!strcmp(divstring,&(this->divstrings[start]))) {
353 return i;
354 }
355 i++;
356 }
357
358 return -1;
359 }
360 }
361
362 char *
IIT_divstring_from_index(T this,int index)363 IIT_divstring_from_index (T this, int index) {
364 int divno = 1;
365 UINT4 start;
366
367 while (divno <= this->ndivs) {
368 /* Checked on existing iit file to confirm we need >= and not > */
369 if (this->cum_nintervals[divno] >= index) {
370 start = this->divpointers[divno-1];
371 return &(this->divstrings[start]);
372 }
373 divno++;
374 }
375
376 return (char *) NULL;
377 }
378
379 static int
IIT_divint_from_index(T this,int index)380 IIT_divint_from_index (T this, int index) {
381 int divno = 1;
382
383 while (divno <= this->ndivs) {
384 /* Checked on existing iit file to confirm we need >= and not > */
385 if (this->cum_nintervals[divno] >= index) {
386 return divno-1;
387 }
388 divno++;
389 }
390
391 return -1;
392 }
393
394
395 /* The iit file has a '\0' after each string, so functions know where
396 it ends */
397 char *
IIT_typestring(T this,int type)398 IIT_typestring (T this, int type) {
399 UINT4 start;
400
401 start = this->typepointers[type];
402 return &(this->typestrings[start]);
403 }
404
405 int
IIT_typeint(T this,char * typestring)406 IIT_typeint (T this, char *typestring) {
407 int i = 0;
408 UINT4 start;
409
410 while (i < this->ntypes) {
411 start = this->typepointers[i];
412 if (!strcmp(typestring,&(this->typestrings[start]))) {
413 return i;
414 }
415 i++;
416 }
417
418 return -1;
419 }
420
421 char *
IIT_fieldstring(T this,int fieldint)422 IIT_fieldstring (T this, int fieldint) {
423 UINT4 start;
424
425 start = this->fieldpointers[fieldint];
426 return &(this->fieldstrings[start]);
427 }
428
429 int
IIT_fieldint(T this,char * fieldstring)430 IIT_fieldint (T this, char *fieldstring) {
431 int i = 0;
432 UINT4 start;
433
434 while (i < this->nfields) {
435 start = this->fieldpointers[i];
436 if (!strcmp(fieldstring,&(this->fieldstrings[start]))) {
437 return i;
438 }
439 i++;
440 }
441
442 return -1;
443 }
444
445
446 char *
IIT_label(T this,int index,bool * allocp)447 IIT_label (T this, int index, bool *allocp) {
448 int recno;
449 #ifdef HAVE_64_BIT
450 UINT8 start;
451 #else
452 UINT4 start;
453 #endif
454
455 recno = index - 1; /* Convert to 0-based */
456
457 #ifdef WORDS_BIGENDIAN
458 #ifdef HAVE_64_BIT
459 if (this->label_pointers_8p == true) {
460 start = Bigendian_convert_uint8(this->labelpointers8[recno]);
461 } else {
462 start = (UINT8) Bigendian_convert_uint(this->labelpointers[recno]);
463 }
464 #else
465 start = Bigendian_convert_uint(this->labelpointers[recno]);
466 #endif
467 #else
468 #ifdef HAVE_64_BIT
469 if (this->label_pointers_8p == true) {
470 start = this->labelpointers8[recno];
471 } else {
472 start = (UINT8) this->labelpointers[recno];
473 }
474 #else
475 start = this->labelpointers[recno];
476 #endif
477 #endif
478 *allocp = false;
479 return &(this->labels[start]);
480 }
481
482
483 static char EMPTY_STRING[1] = {'\0'};
484
485 /* The iit file has a '\0' after each string, so functions know where
486 it ends */
487 /* Note: annotation itself is never allocated */
488 char *
IIT_annotation(char ** restofheader,T this,int index,bool * alloc_header_p)489 IIT_annotation (char **restofheader, T this, int index, bool *alloc_header_p) {
490 int recno;
491 char *annotation, *p;
492 int len;
493 #ifdef HAVE_64_BIT
494 UINT8 start;
495 #else
496 UINT4 start;
497 #endif
498
499
500 recno = index - 1; /* Convert to 0-based */
501 #ifdef WORDS_BIGENDIAN
502 #ifdef HAVE_64_BIT
503 if (this->annot_pointers_8p == true) {
504 start = Bigendian_convert_uint8(this->annotpointers8[recno]);
505 } else {
506 start = (UINT8) Bigendian_convert_uint(this->annotpointers[recno]);
507 }
508 #else
509 start = Bigendian_convert_uint(this->annotpointers[recno]);
510 #endif
511 #else
512 #ifdef HAVE_64_BIT
513 if (this->annot_pointers_8p == true) {
514 start = this->annotpointers8[recno];
515 } else {
516 start = (UINT8) this->annotpointers[recno];
517 }
518 #else
519 start = this->annotpointers[recno];
520 #endif
521 #endif
522
523 if (this->version <= 4) {
524 *restofheader = EMPTY_STRING;
525
526 *alloc_header_p = false;
527 return &(this->annotations[start]);
528 } else {
529 /* Versions 5 and higher include rest of header with
530 annotation. Don't return initial '\n', unless annotation is empty */
531 annotation = &(this->annotations[start]);
532 if (annotation[0] == '\0') {
533 *restofheader = annotation; /* Both are empty strings */
534
535 *alloc_header_p = false;
536 return annotation;
537
538 } else if (annotation[0] == '\n') {
539 *restofheader = EMPTY_STRING;
540
541 *alloc_header_p = false;
542 return &(annotation[1]);
543
544 } else {
545 p = annotation;
546 while (*p != '\0' && *p != '\n') p++;
547 len = (p - annotation)/sizeof(char);
548 *restofheader = (char *) MALLOC((1+len+1)*sizeof(char));
549 *restofheader[0] = ' ';
550 strncpy(&((*restofheader)[1]),annotation,len);
551 (*restofheader)[1+len] = '\0';
552
553 if (*p == '\n') p++;
554
555 *alloc_header_p = true;
556 return p;
557 }
558 }
559 }
560
561 /* The iit file has a '\0' after each string, so functions know where
562 it ends */
563 char
IIT_annotation_firstchar(T this,int index)564 IIT_annotation_firstchar (T this, int index) {
565 int recno;
566 #ifdef HAVE_64_BIT
567 UINT8 start;
568 #else
569 UINT4 start;
570 #endif
571
572 recno = index - 1; /* Convert to 0-based */
573
574 #ifdef WORDS_BIGENDIAN
575 #ifdef HAVE_64_BIT
576 if (this->annot_pointers_8p == true) {
577 start = Bigendian_convert_uint8(this->annotpointers8[recno]);
578 } else {
579 start = (UINT8) Bigendian_convert_uint(this->annotpointers[recno]);
580 }
581 #else
582 start = Bigendian_convert_uint(this->annotpointers[recno]);
583 #endif
584 #else
585 #ifdef HAVE_64_BIT
586 if (this->annot_pointers_8p == true) {
587 start = this->annotpointers8[recno];
588 } else {
589 start = (UINT8) this->annotpointers[recno];
590 }
591 #else
592 start = this->annotpointers[recno];
593 #endif
594 #endif
595
596 return this->annotations[start];
597 }
598
599 #ifdef HAVE_64_BIT
600 UINT8
601 #else
602 UINT4
603 #endif
IIT_annotation_strlen(T this,int index)604 IIT_annotation_strlen (T this, int index) {
605 int recno;
606 #ifdef HAVE_64_BIT
607 UINT8 start, end;
608 #else
609 UINT4 start, end;
610 #endif
611
612 recno = index - 1; /* Convert to 0-based */
613
614 #ifdef WORDS_BIGENDIAN
615 #ifdef HAVE_64_BIT
616 if (this->annot_pointers_8p == true) {
617 start = Bigendian_convert_uint8(this->annotpointers8[recno]);
618 end = Bigendian_convert_uint8(this->annotpointers8[recno+1]);
619 } else {
620 start = (UINT8) Bigendian_convert_uint(this->annotpointers[recno]);
621 end = (UINT8) Bigendian_convert_uint(this->annotpointers[recno+1]);
622 }
623 #else
624 start = Bigendian_convert_uint(this->annotpointers[recno]);
625 end = Bigendian_convert_uint(this->annotpointers[recno+1]);
626 #endif
627 #else
628 #ifdef HAVE_64_BIT
629 if (this->annot_pointers_8p == true) {
630 start = this->annotpointers8[recno];
631 end = this->annotpointers8[recno+1];
632 } else {
633 start = (UINT8) this->annotpointers[recno];
634 end = (UINT8) this->annotpointers[recno+1];
635 }
636 #else
637 start = this->annotpointers[recno];
638 end = this->annotpointers[recno+1];
639 #endif
640 #endif
641
642 /*
643 if (strlen(&(this->annotations[start])) != (end - start - 1)) {
644 printf("Problem with %s: %d != %u\n",
645 &(this->labels[this->labelpointers[recno]]),strlen(&(this->annotations[start])),end-start-1);
646 abort();
647 } else {
648 printf("Okay %s: %d == %u\n",
649 &(this->labels[this->labelpointers[recno]]),strlen(&(this->annotations[start])),end-start-1);
650 }
651 */
652
653 return (end - start - 1); /* Subtract terminal '\0' */
654 }
655
656 /* Always allocated */
657 char *
IIT_fieldvalue(T this,int index,int fieldint)658 IIT_fieldvalue (T this, int index, int fieldint) {
659 char *fieldvalue, *annotation, *p, *q;
660 int recno, fieldno = 0, fieldlen;
661 #ifdef HAVE_64_BIT
662 UINT8 start;
663 #else
664 UINT4 start;
665 #endif
666 bool allocp;
667
668 recno = index - 1; /* Convert to 0-based */
669 #ifdef WORDS_BIGENDIAN
670 #ifdef HAVE_64_BIT
671 if (this->annot_pointers_8p == true) {
672 start = Bigendian_convert_uint8(this->annotpointers8[recno]);
673 } else {
674 start = (UINT8) Bigendian_convert_uint(this->annotpointers[recno]);
675 }
676 #else
677 start = Bigendian_convert_uint(this->annotpointers[recno]);
678 #endif
679 #else
680 #ifdef HAVE_64_BIT
681 if (this->annot_pointers_8p == true) {
682 start = this->annotpointers8[recno];
683 } else {
684 start = (UINT8) this->annotpointers[recno];
685 }
686 #else
687 start = this->annotpointers[recno];
688 #endif
689 #endif
690 annotation = &(this->annotations[start]);
691 allocp = false;
692
693 p = annotation;
694
695 /* Starting with version 5, annotation should have '\n' from the header line. */
696 while (*p != '\0' && *p != '\n') p++;
697 if (*p == '\n') p++;
698
699 while (*p != '\0' && fieldno < fieldint) {
700 if (*p == '\n') {
701 fieldno++;
702 }
703 p++;
704 }
705
706 if (*p == '\0') {
707 fieldvalue = (char *) CALLOC(1,sizeof(char));
708 fieldvalue[0] = '\0';
709 } else {
710 q = p;
711 while (*q != '\0' && *q != '\n') {
712 q++;
713 }
714 fieldlen = (q - p)/sizeof(char);
715 fieldvalue = (char *) MALLOC((fieldlen+1)*sizeof(char));
716 strncpy(fieldvalue,p,fieldlen);
717 fieldvalue[fieldlen] = '\0';
718 }
719
720 if (allocp == true) {
721 FREE(annotation);
722 }
723
724 return fieldvalue;
725 }
726
727
728 void
IIT_dump_divstrings(FILE * fp,T this)729 IIT_dump_divstrings (FILE *fp, T this) {
730 int divno;
731 UINT4 start;
732
733 /* Start with 1, because first divno has no name */
734 for (divno = 1; divno < this->ndivs; divno++) {
735 start = this->divpointers[divno];
736 fprintf(fp,"%s ",&(this->divstrings[start]));
737 }
738 fprintf(fp,"\n");
739
740 return;
741 }
742
743
744 void
IIT_dump_typestrings(FILE * fp,T this)745 IIT_dump_typestrings (FILE *fp, T this) {
746 int type;
747 UINT4 start;
748
749 for (type = 0; type < this->ntypes; type++) {
750 start = this->typepointers[type];
751 fprintf(fp,"%d\t%s\n",type,&(this->typestrings[start]));
752 }
753 return;
754 }
755
756 void
IIT_dump_fieldstrings(FILE * fp,T this)757 IIT_dump_fieldstrings (FILE *fp, T this) {
758 int field;
759 UINT4 start;
760
761 for (field = 0; field < this->nfields; field++) {
762 start = this->fieldpointers[field];
763 fprintf(fp,"%d\t%s\n",field,&(this->fieldstrings[start]));
764 }
765 return;
766 }
767
768 void
IIT_dump_labels(FILE * fp,T this)769 IIT_dump_labels (FILE *fp, T this) {
770 int i;
771 #ifdef HAVE_64_BIT
772 UINT8 start;
773 #else
774 UINT4 start;
775 #endif
776 char *label;
777
778 for (i = 0; i < this->total_nintervals; i++) {
779 #ifdef WORDS_BIGENDIAN
780 #ifdef HAVE_64_BIT
781 if (this->label_pointers_8p == true) {
782 start = Bigendian_convert_uint8(this->labelpointers8[i]);
783 } else {
784 start = (UINT8) Bigendian_convert_uint(this->labelpointers[i]);
785 }
786 #else
787 start = Bigendian_convert_uint(this->labelpointers[i]);
788 #endif
789 #else
790 #ifdef HAVE_64_BIT
791 if (this->label_pointers_8p == true) {
792 start = this->labelpointers8[i];
793 } else {
794 start = (UINT8) this->labelpointers[i];
795 }
796 #else
797 start = this->labelpointers[i];
798 #endif
799 #endif
800 label = &(this->labels[start]);
801 fprintf(fp,"%s ",label);
802 }
803 fprintf(fp,"\n");
804 return;
805 }
806
807
808 void
IIT_dump(T this,bool sortp)809 IIT_dump (T this, bool sortp) {
810 int divno, i;
811 Interval_T interval;
812 char *divstring;
813 char *labelptr, *annotptr, c;
814 int *matches, nmatches, index;
815 char *label, *annotation, *restofheader;
816 bool allocp;
817
818 if (sortp == false) {
819 labelptr = this->labels;
820 annotptr = this->annotations;
821 }
822
823 for (divno = 0; divno < this->ndivs; divno++) {
824 divstring = IIT_divstring(this,divno);
825
826 if (sortp == true) {
827 if (this->nintervals[divno] > 0) {
828 matches = IIT_get(&nmatches,this,divstring,/*x*/0,/*y*/-1U,/*sortp*/true);
829 for (i = 0; i < nmatches; i++) {
830 index = matches[i];
831 label = IIT_label(this,index,&allocp);
832 printf(">%s",label);
833 if (allocp == true) {
834 FREE(label);
835 }
836
837 interval = IIT_interval(this,index);
838 if (Interval_low(interval) == 0 && Interval_high(interval) == 0) {
839 /* No interval */
840 printf("\n");
841 annotation = IIT_annotation(&restofheader,this,index,&allocp);
842 printf("%s",annotation);
843 if (allocp == true) {
844 FREE(restofheader);
845 }
846
847 } else {
848 if (divno > 0) {
849 /* zeroth divno has empty string */
850 printf(" %s:",divstring);
851 }
852
853 if (Interval_sign(interval) < 0) {
854 printf("%u..%u",Interval_high(interval),Interval_low(interval));
855 } else {
856 printf("%u..%u",Interval_low(interval),Interval_high(interval));
857 }
858 if (Interval_type(interval) > 0) {
859 printf(" %s",IIT_typestring(this,Interval_type(interval)));
860 }
861
862 annotation = IIT_annotation(&restofheader,this,index,&allocp);
863 printf("%s\n",restofheader);
864 printf("%s",annotation);
865 if (allocp == true) {
866 FREE(restofheader);
867 }
868 }
869 }
870
871 FREE(matches);
872 }
873
874 } else {
875 for (i = 0; i < this->nintervals[divno]; i++) {
876 printf(">");
877 while ((c = *labelptr++) != '\0') {
878 printf("%c",c);
879 }
880 printf(" ");
881
882 interval = &(this->intervals[divno][i]);
883 if (divno <= 0) {
884 /* zeroth divno has empty string */
885 } else if (Interval_low(interval) == 0 && Interval_high(interval) == 0) {
886 /* Ignore divstring */
887 } else {
888 printf("%s:",divstring);
889 }
890
891 if (Interval_low(interval) == 0 && Interval_high(interval) == 0) {
892 /* Ignore interval and type */
893 } else {
894 if (Interval_sign(interval) < 0) {
895 printf("%u..%u",Interval_high(interval),Interval_low(interval));
896 } else {
897 printf("%u..%u",Interval_low(interval),Interval_high(interval));
898 }
899 if (Interval_type(interval) > 0) {
900 printf(" %s",IIT_typestring(this,Interval_type(interval)));
901 }
902 }
903
904 if (this->version <= 4) {
905 printf("\n");
906 while ((c = *annotptr++) != '\0') {
907 printf("%c",c);
908 }
909 } else {
910 /* Versions 5 and higher include rest of header with
911 annotation. Don't print initial '\n', unless annotation is empty */
912 if (*annotptr == '\0') {
913 printf("\n");
914 annotptr++;
915 } else if (*annotptr == '\n') {
916 /* No rest of header */
917 while ((c = *annotptr++) != '\0') {
918 printf("%c",c);
919 }
920 } else {
921 printf(" ");
922 while ((c = *annotptr++) != '\0') {
923 printf("%c",c);
924 }
925 }
926 }
927 }
928 }
929 }
930
931 return;
932 }
933
934
935 /* For chromosome.iit file, which is stored in version 1 */
936 void
IIT_dump_simple(T this)937 IIT_dump_simple (T this) {
938 int index = 0, i;
939 Interval_T interval;
940 Chrpos_T startpos, endpos;
941 char *label;
942 bool allocp;
943
944 for (i = 0; i < this->nintervals[0]; i++) {
945 interval = &(this->intervals[0][i]);
946 label = IIT_label(this,index+1,&allocp);
947 printf("%s\t",label);
948 if (allocp == true) {
949 FREE(label);
950 }
951 startpos = Interval_low(interval);
952 endpos = startpos + Interval_length(interval) - 1U;
953
954 printf("%u..%u\t",startpos+1U,endpos+1U);
955
956 printf("%u",Interval_length(interval));
957 if (Interval_type(interval) > 0) {
958 printf("\t%s",IIT_typestring(this,Interval_type(interval)));
959 }
960 printf("\n");
961
962 index++;
963 }
964
965 return;
966 }
967
968
969 #if 0
970 /* For higher version files, which are divided into divs */
971 void
972 IIT_dump_formatted (T this, bool directionalp) {
973 int divno, index = 0, i;
974 Interval_T interval;
975 Chrpos_T startpos, endpos;
976 char *label, *divstring, firstchar;
977 bool allocp;
978
979 for (divno = 0; divno < this->ndivs; divno++) {
980 divstring = IIT_divstring(this,divno);
981 for (i = 0; i < this->nintervals[divno]; i++) {
982 interval = &(this->intervals[divno][i]);
983 label = IIT_label(this,index+1,&allocp);
984 printf("%s\t",label);
985 if (allocp == true) {
986 FREE(label);
987 }
988 startpos = Interval_low(interval);
989 endpos = startpos + Interval_length(interval) - 1U;
990
991 if (divno > 0) {
992 printf("%s:",divstring);
993 }
994 if (directionalp == false) {
995 printf("%u..%u\t",startpos+1U,endpos+1U);
996 } else if (this->version <= 1) {
997 firstchar = IIT_annotation_firstchar(this,index+1);
998 if (firstchar == '-') {
999 printf("%u..%u\t",endpos+1U,startpos+1U);
1000 } else {
1001 printf("%u..%u\t",startpos+1U,endpos+1U);
1002 }
1003 } else {
1004 if (Interval_sign(interval) < 0) {
1005 printf("%u..%u\t",endpos+1U,startpos+1U);
1006 } else {
1007 printf("%u..%u\t",startpos+1U,endpos+1U);
1008 }
1009 }
1010
1011 printf("%u",Interval_length(interval));
1012 if (Interval_type(interval) > 0) {
1013 printf("\t%s",IIT_typestring(this,Interval_type(interval)));
1014 }
1015 printf("\n");
1016
1017 index++;
1018 }
1019 }
1020
1021 return;
1022 }
1023 #endif
1024
1025
1026 #if 0
1027 static int
1028 uint_cmp (const void *x, const void *y) {
1029 unsigned int a = * (unsigned int *) x;
1030 unsigned int b = * (unsigned int *) y;
1031
1032 if (a < b) {
1033 return -1;
1034 } else if (a > b) {
1035 return +1;
1036 } else {
1037 return 0;
1038 }
1039 }
1040
1041 /* Need to work on */
1042 UINT4 *
1043 IIT_transitions (int **signs, int *nedges, T this) {
1044 UINT4 *edges, *starts, *ends;
1045 int nintervals, i, j, k;
1046 Interval_T interval;
1047 Uintlist_T startlist = NULL, endlist = NULL;
1048
1049 for (i = 0; i < this->nintervals; i++) {
1050 interval = &(this->intervals[i]);
1051 startlist = Uintlist_push(startlist,Interval_low(interval));
1052 endlist = Uintlist_push(endlist,Interval_high(interval));
1053 }
1054
1055 if (Uintlist_length(startlist) == 0) {
1056 edges = (unsigned int *) NULL;
1057 *signs = (int *) NULL;
1058 *nedges = 0;
1059 } else {
1060 starts = Uintlist_to_array(&nintervals,startlist);
1061 ends = Uintlist_to_array(&nintervals,endlist);
1062 qsort(starts,nintervals,sizeof(unsigned int),uint_cmp);
1063 qsort(ends,nintervals,sizeof(unsigned int),uint_cmp);
1064
1065 *nedges = nintervals+nintervals;
1066 *signs = (int *) CALLOC(*nedges,sizeof(int));
1067 edges = (unsigned int *) CALLOC(*nedges,sizeof(unsigned int));
1068 i = j = k = 0;
1069 while (i < nintervals && j < nintervals) {
1070 if (starts[i] <= ends[j]) {
1071 (*signs)[k] = +1;
1072 edges[k++] = starts[i++];
1073 } else {
1074 (*signs)[k] = -1;
1075 edges[k++] = ends[j++];
1076 }
1077 }
1078 while (i < nintervals) {
1079 (*signs)[k] = +1;
1080 edges[k++] = starts[i++];
1081 }
1082 while (j < nintervals) {
1083 (*signs)[k] = -1;
1084 edges[k++] = ends[j++];
1085 }
1086
1087 FREE(ends);
1088 FREE(starts);
1089 }
1090
1091 Uintlist_free(&endlist);
1092 Uintlist_free(&startlist);
1093
1094 return edges;
1095 }
1096
1097 UINT4 *
1098 IIT_transitions_subset (int **signs, int *nedges, T this, int *indices, int nindices) {
1099 UINT4 *edges, *starts, *ends;
1100 int nintervals, i, j, k;
1101 Interval_T interval;
1102 Uintlist_T startlist = NULL, endlist = NULL;
1103
1104 for (k = 0; k < nindices; k++) {
1105 i = indices[k] - 1;
1106 interval = &(this->intervals[i]);
1107 startlist = Uintlist_push(startlist,Interval_low(interval));
1108 endlist = Uintlist_push(endlist,Interval_high(interval));
1109 }
1110
1111 if (Uintlist_length(startlist) == 0) {
1112 edges = (unsigned int *) NULL;
1113 *signs = (int *) NULL;
1114 *nedges = 0;
1115 } else {
1116 starts = Uintlist_to_array(&nintervals,startlist);
1117 ends = Uintlist_to_array(&nintervals,endlist);
1118 qsort(starts,nintervals,sizeof(unsigned int),uint_cmp);
1119 qsort(ends,nintervals,sizeof(unsigned int),uint_cmp);
1120
1121 *nedges = nintervals+nintervals;
1122 *signs = (int *) CALLOC(*nedges,sizeof(int));
1123 edges = (unsigned int *) CALLOC(*nedges,sizeof(unsigned int));
1124 i = j = k = 0;
1125 while (i < nintervals && j < nintervals) {
1126 if (starts[i] <= ends[j]) {
1127 (*signs)[k] = +1;
1128 edges[k++] = starts[i++];
1129 } else {
1130 (*signs)[k] = -1;
1131 edges[k++] = ends[j++];
1132 }
1133 }
1134 while (i < nintervals) {
1135 (*signs)[k] = +1;
1136 edges[k++] = starts[i++];
1137 }
1138 while (j < nintervals) {
1139 (*signs)[k] = -1;
1140 edges[k++] = ends[j++];
1141 }
1142
1143 FREE(ends);
1144 FREE(starts);
1145 }
1146
1147 Uintlist_free(&endlist);
1148 Uintlist_free(&startlist);
1149
1150 return edges;
1151 }
1152 #endif
1153
1154
1155 /* For IIT versions <= 2. Previously sorted by Chrom_compare, but now
1156 we assume that chromosomes are represented by divs, which are
1157 pre-sorted by iit_store. */
1158 #if 0
1159 static int
1160 string_compare (const void *x, const void *y) {
1161 char *a = (char *) x;
1162 char *b = (char *) y;
1163
1164 return strcmp(a,b);
1165 }
1166
1167 static int *
1168 sort_matches_by_type (T this, int *matches, int nmatches, bool alphabetizep) {
1169 int *sorted;
1170 int type, index, i, j, k = 0, t;
1171 List_T *intervallists;
1172 Interval_T *intervals, interval;
1173 int *matches1, nmatches1, nintervals;
1174 char *typestring;
1175 char **strings;
1176
1177 if (nmatches == 0) {
1178 return (int *) NULL;
1179 } else {
1180 sorted = (int *) CALLOC(nmatches,sizeof(int));
1181 }
1182
1183 intervallists = (List_T *) CALLOC(this->ntypes,sizeof(List_T));
1184 for (i = 0; i < nmatches; i++) {
1185 index = matches[i];
1186 interval = &(this->intervals[0][index-1]);
1187 type = Interval_type(interval);
1188 intervallists[type] = List_push(intervallists[type],(void *) interval);
1189 }
1190
1191 if (alphabetizep == true) {
1192 strings = (char **) CALLOC(this->ntypes,sizeof(char *));
1193
1194 for (type = 0; type < this->ntypes; type++) {
1195 typestring = IIT_typestring(this,type);
1196 strings[type] = (char *) CALLOC(strlen(typestring)+1,sizeof(char));
1197 strcpy(strings[type],typestring);
1198 }
1199 qsort(strings,this->ntypes,sizeof(char *),string_compare);
1200 }
1201
1202 for (t = 0; t < this->ntypes; t++) {
1203 if (alphabetizep == false) {
1204 type = t;
1205 typestring = IIT_typestring(this,type);
1206 } else {
1207 typestring = strings[t];
1208 type = IIT_typeint(this,typestring);
1209 }
1210
1211 if ((nintervals = List_length(intervallists[type])) > 0) {
1212 intervals = (Interval_T *) List_to_array(intervallists[type],/*end*/NULL);
1213 qsort(intervals,nintervals,sizeof(Interval_T),Interval_cmp);
1214
1215 i = 0;
1216 while (i < nintervals) {
1217 interval = intervals[i];
1218 matches1 = IIT_get_exact_multiple(&nmatches1,this,/*divstring*/NULL,Interval_low(interval),Interval_high(interval),type);
1219 if (matches1 != NULL) {
1220 for (j = 0; j < nmatches1; j++) {
1221 sorted[k++] = matches1[j];
1222 }
1223 i += nmatches1;
1224 FREE(matches1);
1225 }
1226 }
1227
1228 FREE(intervals);
1229 List_free(&(intervallists[type]));
1230 }
1231
1232 }
1233
1234 if (alphabetizep == true) {
1235 for (t = 0; t < this->ntypes; t++) {
1236 FREE(strings[t]);
1237 }
1238 FREE(strings);
1239 }
1240
1241 FREE(intervallists);
1242 return sorted;
1243 }
1244 #endif
1245
1246
1247 /* For IIT versions >= 3. Assumes that matches are all in the same
1248 div */
1249 static int *
sort_matches_by_position(T this,int * matches,int nmatches)1250 sort_matches_by_position (T this, int *matches, int nmatches) {
1251 int *sorted, index, i;
1252 struct Interval_windex_T *intervals;
1253
1254 if (nmatches == 0) {
1255 return (int *) NULL;
1256 } else {
1257 intervals = (struct Interval_windex_T *) CALLOC(nmatches,sizeof(struct Interval_windex_T));
1258 for (i = 0; i < nmatches; i++) {
1259 index = intervals[i].index = matches[i];
1260 intervals[i].interval = &(this->intervals[0][index-1]); /* Ignore divno here, because we have offset index */
1261 }
1262 qsort(intervals,nmatches,sizeof(struct Interval_windex_T),Interval_windex_cmp);
1263
1264 sorted = (int *) CALLOC(nmatches,sizeof(int));
1265 for (i = 0; i < nmatches; i++) {
1266 sorted[i] = intervals[i].index;
1267 }
1268
1269 FREE(intervals);
1270 return sorted;
1271 }
1272 }
1273
1274
1275
1276
1277 #if 0
1278 /* Need to work on */
1279 void
1280 IIT_dump_counts (T this, bool alphabetizep) {
1281 int type, divno, index, i, j, k, t;
1282 Interval_T interval;
1283 Uintlist_T *startlists, *endlists;
1284 int *matches, nmatches, nintervals;
1285 unsigned int *starts, *ends, edge;
1286 char *typestring;
1287 Chrom_T *chroms;
1288
1289 startlists = (Uintlist_T *) CALLOC(this->ntypes,sizeof(Uintlist_T));
1290 endlists = (Uintlist_T *) CALLOC(this->ntypes,sizeof(Uintlist_T));
1291 for (i = 0; i < this->nintervals; i++) {
1292 interval = &(this->intervals[i]);
1293 type = Interval_type(interval);
1294 startlists[type] = Uintlist_push(startlists[type],Interval_low(interval));
1295 endlists[type] = Uintlist_push(endlists[type],Interval_high(interval));
1296 }
1297
1298 if (alphabetizep == true) {
1299 chroms = (Chrom_T *) CALLOC(this->ntypes,sizeof(Chrom_T));
1300
1301 for (type = 0; type < this->ntypes; type++) {
1302 typestring = IIT_typestring(this,type);
1303 chroms[type] = Chrom_from_string(typestring,/*mitochondrial_string*/NULL,/*order*/0U,/*circularp*/false,
1304 /*alt_scaffold_start*/0,/*alt_scaffold_end*/0);
1305 }
1306 qsort(chroms,this->ntypes,sizeof(Chrom_T),Chrom_compare);
1307 }
1308
1309 for (t = 0; t < this->ntypes; t++) {
1310 if (alphabetizep == false) {
1311 type = t;
1312 typestring = IIT_typestring(this,type);
1313 } else {
1314 typestring = Chrom_string(chroms[t]); /* Not allocated; do not free */
1315 type = IIT_typeint(this,typestring);
1316 }
1317
1318 if (Uintlist_length(startlists[type]) > 0) {
1319 starts = Uintlist_to_array(&nintervals,startlists[type]);
1320 ends = Uintlist_to_array(&nintervals,endlists[type]);
1321 qsort(starts,nintervals,sizeof(unsigned int),uint_cmp);
1322 qsort(ends,nintervals,sizeof(unsigned int),uint_cmp);
1323
1324 i = j = 0;
1325 while (i < nintervals || j < nintervals) {
1326 if (i >= nintervals && j >= nintervals) {
1327 /* done */
1328 matches = (int *) NULL;
1329 } else if (i >= nintervals) {
1330 /* work on remaining ends */
1331 edge = ends[j++];
1332 matches = IIT_get_typed(&nmatches,this,edge,edge,type,/*sortp*/false);
1333 printf("%s\t%u\tend\t%d",typestring,edge,nmatches);
1334 while (j < nintervals && ends[j] == edge) {
1335 j++;
1336 }
1337 } else if (j >= nintervals) {
1338 /* work on remaining starts */
1339 edge = starts[i++];
1340 matches = IIT_get_typed(&nmatches,this,edge,edge,type,/*sortp*/false);
1341 printf("%s\t%u\tstart\t%d",typestring,edge,nmatches);
1342 while (i < nintervals && starts[i] == edge) {
1343 i++;
1344 }
1345 } else if (starts[i] <= ends[j]) {
1346 edge = starts[i++];
1347 matches = IIT_get_typed(&nmatches,this,edge,edge,type,/*sortp*/false);
1348 printf("%s\t%u\tstart\t%d",typestring,edge,nmatches);
1349 while (i < nintervals && starts[i] == edge) {
1350 i++;
1351 }
1352 } else {
1353 edge = ends[j++];
1354 matches = IIT_get_typed(&nmatches,this,edge,edge,type,/*sortp*/false);
1355 printf("%s\t%u\tend\t%d",typestring,edge,nmatches);
1356 while (j < nintervals && ends[j] == edge) {
1357 j++;
1358 }
1359 }
1360
1361 if (matches != NULL) {
1362 index = matches[0];
1363 label = IIT_label(this,index,&allocp);
1364 printf("\t%s",label);
1365 if (allocp == true) {
1366 FREE(label);
1367 }
1368
1369 for (k = 1; k < nmatches; k++) {
1370 index = matches[k];
1371 label = IIT_label(this,index,&allocp);
1372 printf(",%s",label);
1373 if (allocp == true) {
1374 FREE(label);
1375 }
1376 }
1377 printf("\n");
1378 FREE(matches);
1379 }
1380 }
1381
1382 Uintlist_free(&(endlists[type]));
1383 Uintlist_free(&(startlists[type]));
1384 FREE(ends);
1385 FREE(starts);
1386 }
1387
1388 }
1389
1390 if (alphabetizep == true) {
1391 for (t = 0; t < this->ntypes; t++) {
1392 Chrom_free(&(chroms[t]));
1393 }
1394 FREE(chroms);
1395 }
1396
1397 FREE(endlists);
1398 FREE(startlists);
1399
1400 return;
1401 }
1402 #endif
1403
1404
1405 /************************************************************************
1406 * For file format, see iit-write.c
1407 ************************************************************************/
1408
1409 void
IIT_free(T * old)1410 IIT_free (T *old) {
1411 int divno;
1412
1413 if (*old != NULL) {
1414 if ((*old)->name != NULL) {
1415 FREE((*old)->name);
1416 }
1417
1418 if ((*old)->access == LOADED) {
1419 /* No need to munmap or free words */
1420
1421 } else if ((*old)->access == MMAPPED) {
1422 #ifdef HAVE_MMAP
1423 munmap((void *) (*old)->annot_mmap,(*old)->annot_length);
1424 munmap((void *) (*old)->annotpointers_mmap,(*old)->annotpointers_length);
1425 munmap((void *) (*old)->label_mmap,(*old)->label_length);
1426 munmap((void *) (*old)->labelpointers_mmap,(*old)->labelpointers_length);
1427 munmap((void *) (*old)->labelorder_mmap,(*old)->labelorder_length);
1428 if ((*old)->valuep == true) {
1429 munmap((void *) (*old)->value_mmap,(*old)->value_length);
1430 munmap((void *) (*old)->valueorder_mmap,(*old)->valueorder_length);
1431 }
1432 #endif
1433 close((*old)->fd);
1434
1435 } else if ((*old)->access == FILEIO) {
1436 FREE((*old)->annotations);
1437 #ifdef HAVE_64_BIT
1438 if ((*old)->annot_pointers_8p == true) {
1439 FREE((*old)->annotpointers8);
1440 } else {
1441 FREE((*old)->annotpointers);
1442 }
1443 #else
1444 FREE((*old)->annotpointers);
1445 #endif
1446 FREE((*old)->labels);
1447 #ifdef HAVE_64_BIT
1448 if ((*old)->label_pointers_8p == true) {
1449 FREE((*old)->labelpointers8);
1450 } else {
1451 FREE((*old)->labelpointers);
1452 }
1453 #else
1454 FREE((*old)->labelpointers);
1455 #endif
1456 FREE((*old)->labelorder);
1457 /* close((*old)->fd); -- closed in read_annotations */
1458
1459 if ((*old)->valuep == true) {
1460 FREE((*old)->values);
1461 FREE((*old)->valueorder);
1462 }
1463
1464 } else if ((*old)->access == ALLOCATED_PRIVATE) {
1465 /* Nothing to close. IIT must have been created by IIT_new. */
1466
1467 } else if ((*old)->access == ALLOCATED_SHARED) {
1468 /* Nothing to close. IIT must have been created by IIT_new. */
1469
1470 } else {
1471 abort();
1472 }
1473
1474 if ((*old)->access == LOADED) {
1475 FREE((*old)->intervals);
1476 FREE((*old)->nodes);
1477 FREE((*old)->omegas);
1478 FREE((*old)->sigmas);
1479 if ((*old)->alphas != NULL) {
1480 FREE((*old)->betas);
1481 FREE((*old)->alphas);
1482 }
1483
1484 } else {
1485 if ((*old)->fieldstrings != NULL) {
1486 FREE((*old)->fieldstrings);
1487 }
1488 FREE((*old)->fieldpointers);
1489 FREE((*old)->typestrings);
1490 FREE((*old)->typepointers);
1491
1492 FREE((*old)->intervals[0]);
1493 FREE((*old)->intervals);
1494
1495 for (divno = 0; divno < (*old)->ndivs; divno++) {
1496 /* Note: we are depending on Mem_free() to check that these are non-NULL */
1497 FREE((*old)->nodes[divno]);
1498 FREE((*old)->omegas[divno]);
1499 FREE((*old)->sigmas[divno]);
1500 if ((*old)->alphas != NULL) {
1501 FREE((*old)->betas[divno]);
1502 FREE((*old)->alphas[divno]);
1503 }
1504 }
1505
1506 FREE((*old)->nodes);
1507 FREE((*old)->omegas);
1508 FREE((*old)->sigmas);
1509 if ((*old)->alphas != NULL) {
1510 FREE((*old)->betas);
1511 FREE((*old)->alphas);
1512 }
1513
1514 FREE((*old)->divstrings);
1515 FREE((*old)->divpointers);
1516 FREE((*old)->cum_nnodes);
1517 FREE((*old)->nnodes);
1518 FREE((*old)->cum_nintervals);
1519 FREE((*old)->nintervals);
1520 }
1521
1522 FREE(*old);
1523
1524 }
1525
1526 return;
1527 }
1528
1529
1530
1531 static void
move_relative(FILE * fp,off_t offset)1532 move_relative (FILE *fp, off_t offset) {
1533
1534 #ifdef HAVE_FSEEKO
1535 if (fseeko(fp,offset,SEEK_CUR) < 0) {
1536 fprintf(stderr,"Error in move_relative, seek\n");
1537 abort();
1538 }
1539 #else
1540 if (fseek(fp,(long) offset,SEEK_CUR) < 0) {
1541 fprintf(stderr,"Error in move_relative, seek\n");
1542 abort();
1543 }
1544 #endif
1545
1546 return;
1547 }
1548
1549
1550 static size_t
skip_trees(size_t offset,size_t filesize,FILE * fp,char * filename,int skip_ndivs,int skip_nintervals,int skip_nnodes)1551 skip_trees (size_t offset, size_t filesize, FILE *fp, char *filename,
1552 int skip_ndivs, int skip_nintervals, int skip_nnodes) {
1553
1554 size_t skipsize;
1555
1556 /* 4 is for alphas, betas, sigmas, and omegas */
1557 skipsize = (skip_nintervals + skip_ndivs) * 4 * sizeof(int);
1558 skipsize += skip_nnodes * sizeof(struct FNode_T);
1559
1560 if ((offset += skipsize) > filesize) {
1561 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after skip_trees %zu, filesize %zu). Did you generate it using iit_store?\n",
1562 filename,offset,filesize);
1563 exit(9);
1564 } else {
1565 move_relative(fp,skipsize);
1566 }
1567
1568 return offset;
1569 }
1570
1571
1572
1573 static char *
load_tree(char * memory,T new,int divno)1574 load_tree (char *memory, T new, int divno) {
1575 #ifdef DEBUG
1576 int i;
1577 #endif
1578
1579 if (new->version < 2) {
1580 #if 0
1581 /* Computing only if needed */
1582 compute_flanking(new);
1583 #else
1584 new->alphas[divno] = new->betas[divno] = (int *) NULL;
1585 #endif
1586
1587 } else {
1588 new->alphas[divno] = (int *) memory;
1589 memory += (new->nintervals[divno]+1) * sizeof(int);
1590
1591 new->betas[divno] = (int *) memory;
1592 memory += (new->nintervals[divno]+1) * sizeof(int);
1593 }
1594
1595 new->sigmas[divno] = (int *) memory;
1596 memory += (new->nintervals[divno]+1) * sizeof(int);
1597
1598 new->omegas[divno] = (int *) memory;
1599 memory += (new->nintervals[divno]+1) * sizeof(int);
1600
1601 if (new->nnodes[divno] == 0) {
1602 new->nodes[divno] = (struct FNode_T *) NULL;
1603 } else {
1604 #ifdef WORDS_BIGENDIAN
1605 /* Not supported */
1606 abort();
1607 #if 0
1608 new->nodes[divno] = (struct FNode_T *) CALLOC(new->nnodes[divno],sizeof(struct FNode_T));
1609 for (i = 0; i < new->nnodes[divno]; i++) {
1610 Bigendian_fread_uint(&(new->nodes[divno][i].value),fp);
1611 Bigendian_fread_int(&(new->nodes[divno][i].a),fp);
1612 Bigendian_fread_int(&(new->nodes[divno][i].b),fp);
1613 Bigendian_fread_int(&(new->nodes[divno][i].leftindex),fp);
1614 Bigendian_fread_int(&(new->nodes[divno][i].rightindex),fp);
1615 }
1616 #endif
1617
1618 #else
1619 if (sizeof(struct FNode_T) == sizeof(unsigned int)+sizeof(int)+sizeof(int)+sizeof(int)+sizeof(int)) {
1620 new->nodes[divno] = (struct FNode_T *) memory;
1621 memory += new->nnodes[divno] * sizeof(struct FNode_T);
1622 } else {
1623 /* Not supported */
1624 abort();
1625 #if 0
1626 for (i = 0; i < new->nnodes[divno]; i++) {
1627 fread(&(new->nodes[divno][i].value),sizeof(unsigned int),1,fp);
1628 fread(&(new->nodes[divno][i].a),sizeof(int),1,fp);
1629 fread(&(new->nodes[divno][i].b),sizeof(int),1,fp);
1630 fread(&(new->nodes[divno][i].leftindex),sizeof(int),1,fp);
1631 fread(&(new->nodes[divno][i].rightindex),sizeof(int),1,fp);
1632 }
1633 #endif
1634 }
1635 #endif
1636
1637 debug(
1638 for (i = 0; i < new->nnodes[divno]; i++) {
1639 printf("Read node %d %d %d\n",new->nodes[divno][i].value,new->nodes[divno][i].a,new->nodes[divno][i].b);
1640 }
1641 );
1642 }
1643 debug(printf("\n"));
1644
1645 return memory;
1646 }
1647
1648
1649
1650 static size_t
read_tree(size_t offset,size_t filesize,FILE * fp,char * filename,T new,int divno)1651 read_tree (size_t offset, size_t filesize, FILE *fp, char *filename, T new, int divno) {
1652 size_t items_read;
1653 int i;
1654
1655 if (new->version < 2) {
1656 #if 0
1657 /* Computing only if needed */
1658 compute_flanking(new);
1659 #else
1660 new->alphas[divno] = new->betas[divno] = (int *) NULL;
1661 #endif
1662
1663 } else {
1664 if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
1665 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after alphas %zu, filesize %zu). Did you generate it using iit_store?\n",
1666 filename,offset,filesize);
1667 exit(9);
1668 } else {
1669 new->alphas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
1670 if ((items_read = FREAD_INTS(new->alphas[divno],new->nintervals[divno]+1,fp)) != (unsigned int) new->nintervals[divno] + 1) {
1671 fprintf(stderr,"IIT file %s appears to be truncated. items_read = %zu\n",
1672 filename,items_read);
1673 exit(9);
1674 }
1675 }
1676
1677 if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
1678 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after betas %zu, filesize %zu). Did you generate it using iit_store?\n",
1679 filename,offset,filesize);
1680 exit(9);
1681 } else {
1682 new->betas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
1683 if ((items_read = FREAD_INTS(new->betas[divno],new->nintervals[divno]+1,fp)) != (unsigned int) new->nintervals[divno] + 1) {
1684 fprintf(stderr,"IIT file %s appears to be truncated. items_read = %zu\n",filename,items_read);
1685 exit(9);
1686 }
1687 #if 0
1688 debug(
1689 printf("betas[%d]:",divno);
1690 for (i = 0; i < new->nintervals[divno]+1; i++) {
1691 printf(" %d",new->betas[divno][i]);
1692 }
1693 printf("\n");
1694 );
1695 #endif
1696 }
1697 }
1698
1699 if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
1700 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after sigmas %zu, filesize %zu). Did you generate it using iit_store?\n",
1701 filename,offset,filesize);
1702 exit(9);
1703 } else {
1704 new->sigmas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
1705 if ((items_read = FREAD_INTS(new->sigmas[divno],new->nintervals[divno]+1,fp)) != (unsigned int) new->nintervals[divno] + 1) {
1706 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
1707 exit(9);
1708 }
1709 #if 0
1710 debug(
1711 printf("sigmas[%d]:",divno);
1712 for (i = 0; i < new->nintervals[divno]+1; i++) {
1713 printf(" %d",new->sigmas[divno][i]);
1714 }
1715 printf("\n");
1716 );
1717 #endif
1718 }
1719
1720 if ((offset += sizeof(int)*(new->nintervals[divno]+1)) > filesize) {
1721 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after omegas %zu, filesize %zu). Did you generate it using iit_store?\n",
1722 filename,offset,filesize);
1723 exit(9);
1724 } else {
1725 new->omegas[divno] = (int *) CALLOC(new->nintervals[divno]+1,sizeof(int));
1726 if ((items_read = FREAD_INTS(new->omegas[divno],new->nintervals[divno]+1,fp)) != (unsigned int) new->nintervals[divno] + 1) {
1727 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
1728 exit(9);
1729 }
1730 #if 0
1731 debug(
1732 printf("omegas[%d]:",divno);
1733 for (i = 0; i < new->nintervals[divno]+1; i++) {
1734 printf(" %d",new->omegas[divno][i]);
1735 }
1736 printf("\n");
1737 );
1738 #endif
1739 }
1740
1741 debug(printf("nnodes[%d]: %d\n",divno,new->nnodes[divno]));
1742 if (new->nnodes[divno] == 0) {
1743 new->nodes[divno] = (struct FNode_T *) NULL;
1744 } else {
1745 new->nodes[divno] = (struct FNode_T *) CALLOC(new->nnodes[divno],sizeof(struct FNode_T));
1746 #ifdef WORDS_BIGENDIAN
1747 for (i = 0; i < new->nnodes[divno]; i++) {
1748 Bigendian_fread_uint(&(new->nodes[divno][i].value),fp);
1749 Bigendian_fread_int(&(new->nodes[divno][i].a),fp);
1750 Bigendian_fread_int(&(new->nodes[divno][i].b),fp);
1751 Bigendian_fread_int(&(new->nodes[divno][i].leftindex),fp);
1752 Bigendian_fread_int(&(new->nodes[divno][i].rightindex),fp);
1753 }
1754 offset += (sizeof(unsigned int)+sizeof(int)+sizeof(int)+sizeof(int)+sizeof(int))*new->nnodes[divno];
1755 #else
1756 if (sizeof(struct FNode_T) == sizeof(unsigned int)+sizeof(int)+sizeof(int)+sizeof(int)+sizeof(int)) {
1757 offset += sizeof(struct FNode_T)*fread(new->nodes[divno],sizeof(struct FNode_T),new->nnodes[divno],fp);
1758 } else {
1759 for (i = 0; i < new->nnodes[divno]; i++) {
1760 fread(&(new->nodes[divno][i].value),sizeof(unsigned int),1,fp);
1761 fread(&(new->nodes[divno][i].a),sizeof(int),1,fp);
1762 fread(&(new->nodes[divno][i].b),sizeof(int),1,fp);
1763 fread(&(new->nodes[divno][i].leftindex),sizeof(int),1,fp);
1764 fread(&(new->nodes[divno][i].rightindex),sizeof(int),1,fp);
1765 }
1766 offset += (sizeof(unsigned int)+sizeof(int)+sizeof(int)+sizeof(int)+sizeof(int))*new->nnodes[divno];
1767 }
1768 #endif
1769 if (offset > filesize) {
1770 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nodes %zu, filesize %zu). Did you generate it using iit_store?\n",
1771 filename,offset,filesize);
1772 exit(9);
1773 }
1774
1775 #if 1
1776 debug(
1777 for (i = 0; i < new->nnodes[divno]; i++) {
1778 printf("Read node %d %d %d\n",new->nodes[divno][i].value,new->nodes[divno][i].a,new->nodes[divno][i].b);
1779 }
1780 );
1781 #endif
1782
1783 }
1784 debug(printf("\n"));
1785
1786 return offset;
1787 }
1788
1789
1790 static size_t
skip_intervals(int * skip_nintervals,size_t offset,size_t filesize,FILE * fp,char * filename,T new,int divstart,int divend)1791 skip_intervals (int *skip_nintervals, size_t offset, size_t filesize, FILE *fp, char *filename, T new,
1792 int divstart, int divend) {
1793 int divno;
1794 size_t skipsize = 0;
1795
1796 *skip_nintervals = 0;
1797 for (divno = divstart; divno <= divend; divno++) {
1798 *skip_nintervals += new->nintervals[divno];
1799 }
1800 if (new->version >= 2) {
1801 skipsize += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int))*(*skip_nintervals);
1802 } else {
1803 skipsize += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int))*(*skip_nintervals);
1804 }
1805
1806 if ((offset += skipsize) > filesize) {
1807 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after skip_intervals %zu, filesize %zu). Did you generate it using iit_store?\n",
1808 filename,offset,filesize);
1809 exit(9);
1810 } else {
1811 move_relative(fp,skipsize);
1812 }
1813
1814 return offset;
1815 }
1816
1817
1818 static char *
load_intervals(char * memory,T new,int divno)1819 load_intervals (char *memory, T new, int divno) {
1820
1821 #ifdef WORDS_BIGENDIAN
1822 /* Not supported */
1823 abort();
1824 #if 0
1825 for (i = 0; i < new->nintervals[divno]; i++) {
1826 Bigendian_fread_uint(&(new->intervals[divno][i].low),fp);
1827 Bigendian_fread_uint(&(new->intervals[divno][i].high),fp);
1828 if (new->version >= 2) {
1829 Bigendian_fread_int(&(new->intervals[divno][i].sign),fp);
1830 } else {
1831 new->intervals[divno][i].sign = +1;
1832 }
1833 Bigendian_fread_int(&(new->intervals[divno][i].type),fp);
1834 }
1835 if (new->version >= 2) {
1836 offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int))*new->nintervals[divno];
1837 } else {
1838 offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int))*new->nintervals[divno];
1839 }
1840 #endif
1841
1842 #else
1843 if (new->version >= 2 && sizeof(struct Interval_T) == sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int)) {
1844 new->intervals[divno] = (struct Interval_T *) memory;
1845 memory += new->nintervals[divno] * sizeof(struct Interval_T);
1846
1847 } else if (new->version <= 1 && sizeof(struct Interval_T) == sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)) {
1848 new->intervals[divno] = (struct Interval_T *) memory;
1849 memory += new->nintervals[divno] * sizeof(struct Interval_T);
1850
1851 } else {
1852 /* Not supported */
1853 abort();
1854
1855 }
1856 #endif
1857
1858 return memory;
1859 }
1860
1861
1862 static size_t
read_intervals(size_t offset,size_t filesize,FILE * fp,char * filename,T new,int divno)1863 read_intervals (size_t offset, size_t filesize, FILE *fp, char *filename, T new, int divno) {
1864 int i;
1865
1866 #ifdef WORDS_BIGENDIAN
1867 for (i = 0; i < new->nintervals[divno]; i++) {
1868 Bigendian_fread_uint(&(new->intervals[divno][i].low),fp);
1869 Bigendian_fread_uint(&(new->intervals[divno][i].high),fp);
1870 if (new->version >= 2) {
1871 Bigendian_fread_int(&(new->intervals[divno][i].sign),fp);
1872 } else {
1873 new->intervals[divno][i].sign = +1;
1874 }
1875 Bigendian_fread_int(&(new->intervals[divno][i].type),fp);
1876 }
1877 if (new->version >= 2) {
1878 offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int))*new->nintervals[divno];
1879 } else {
1880 offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int))*new->nintervals[divno];
1881 }
1882 #else
1883 if (new->version >= 2 && sizeof(struct Interval_T) == sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int)) {
1884 offset += sizeof(struct Interval_T)*fread(new->intervals[divno],sizeof(struct Interval_T),new->nintervals[divno],fp);
1885 } else if (new->version <= 1 && sizeof(struct Interval_T) == sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)) {
1886 offset += sizeof(struct Interval_T)*fread(new->intervals[divno],sizeof(struct Interval_T),new->nintervals[divno],fp);
1887 } else {
1888 for (i = 0; i < new->nintervals[divno]; i++) {
1889 fread(&(new->intervals[divno][i].low),sizeof(unsigned int),1,fp);
1890 fread(&(new->intervals[divno][i].high),sizeof(unsigned int),1,fp);
1891 if (new->version >= 2) {
1892 fread(&(new->intervals[divno][i].sign),sizeof(int),1,fp);
1893 } else {
1894 new->intervals[divno][i].sign = +1;
1895 }
1896 fread(&(new->intervals[divno][i].type),sizeof(int),1,fp);
1897 }
1898 if (new->version >= 2) {
1899 offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int)+sizeof(int))*new->nintervals[divno];
1900 } else {
1901 offset += (sizeof(unsigned int)+sizeof(unsigned int)+sizeof(int))*new->nintervals[divno];
1902 }
1903 }
1904 #endif
1905 if (offset > filesize) {
1906 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after intervals %zu, filesize %zu). Did you generate it using iit_store?\n",
1907 filename,offset,filesize);
1908 exit(9);
1909 }
1910
1911 return offset;
1912 }
1913
1914
1915 static char *
load_words(char * memory,T new)1916 load_words (char *memory, T new) {
1917 off_t stringlen;
1918 #ifdef DEBUG
1919 int i;
1920 #endif
1921
1922 new->typepointers = (unsigned int *) memory;
1923 memory += (new->ntypes+1) * sizeof(unsigned int);
1924 debug(
1925 printf("typepointers:");
1926 for (i = 0; i < new->ntypes+1; i++) {
1927 printf(" %u",new->typepointers[i]);
1928 }
1929 printf("\n");
1930 );
1931
1932 /* Note: To keep ints aligned, would be better to make stringlen a
1933 multiple of 4, and put a terminating '\0' as needed */
1934 stringlen = new->typepointers[new->ntypes];
1935 if (stringlen == 0) {
1936 new->typestrings = (char *) NULL;
1937 } else {
1938 new->typestrings = (char *) memory;
1939 memory += stringlen * sizeof(char);
1940 }
1941 debug(
1942 printf("typestrings:\n");
1943 for (s = 0; s < stringlen; s++) {
1944 printf("%c",new->typestrings[s]);
1945 }
1946 printf("\n");
1947 );
1948
1949 if (new->version < 2) {
1950 new->fieldpointers = (unsigned int *) CALLOC(new->nfields+1,sizeof(unsigned int));
1951 new->fieldpointers[0] = '\0';
1952 } else {
1953 new->fieldpointers = (unsigned int *) memory;
1954 memory += (new->nfields+1) * sizeof(unsigned int);
1955 }
1956
1957 /* Note: To keep ints aligned, would be better to make stringlen a
1958 multiple of 4, and put a terminating '\0' as needed */
1959 stringlen = new->fieldpointers[new->nfields];
1960 if (stringlen == 0) {
1961 new->fieldstrings = (char *) NULL;
1962 } else {
1963 new->fieldstrings = (char *) memory;
1964 memory += stringlen * sizeof(char);
1965 }
1966 debug(
1967 printf("fieldstrings:\n");
1968 for (s = 0; s < stringlen; s++) {
1969 printf("%c",new->fieldstrings[s]);
1970 }
1971 printf("\n");
1972 );
1973
1974 if (new->valuep == true) {
1975 debug(printf("Starting load of valueorder offset/length\n"));
1976 /* new->valueorder_offset = offset; -- Needed only for mmap_annotations */
1977 new->valueorder = (int *) memory;
1978 new->valueorder_length = (size_t) (new->total_nintervals*sizeof(int));
1979 memory += new->valueorder_length;
1980
1981 debug1(printf("Starting read of value offset/length\n"));
1982 /* new->value_offset = offset; -- Needed only for mmap_annotations */
1983 new->values = (double *) memory;
1984 new->value_length = (size_t) (new->total_nintervals*sizeof(double));
1985 memory += new->value_length;
1986 }
1987
1988 debug(printf("Starting load of labelorder at %p\n",memory));
1989 /* new->labelorder_offset = offset; -- Needed only for mmap_annotations */
1990 new->labelorder = (int *) memory;
1991 new->labelorder_length = (size_t) (new->total_nintervals*sizeof(int));
1992 memory += new->labelorder_length;
1993 debug(
1994 printf("labelorder:\n");
1995 for (i = 0; i < new->total_nintervals; i++) {
1996 printf("%d ",new->labelorder[i]);
1997 }
1998 printf("\n");
1999 );
2000
2001 debug(printf("Starting load of labelpointer offset/length\n"));
2002 /* new->labelpointers_offset = offset; -- Needed only for mmap_annotations */
2003 #ifdef HAVE_64_BIT
2004 if (new->label_pointers_8p == true) {
2005 new->labelpointers8 = (UINT8 *) memory;
2006 new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2007 memory += new->total_nintervals * sizeof(UINT8);
2008 new->label_length = (size_t) * (UINT8 *) memory;
2009 memory += sizeof(UINT8);
2010 } else {
2011 new->labelpointers = (UINT4 *) memory;
2012 new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2013 memory += new->total_nintervals * sizeof(UINT4);
2014 new->label_length = (size_t) * (UINT4 *) memory;
2015 memory += sizeof(UINT4);
2016 }
2017 #else
2018 new->labelpointers = (UINT4 *) memory;
2019 new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2020 memory += new->total_nintervals * sizeof(UINT4);
2021 new->label_length = (size_t) * (UINT4 *) memory;
2022 memory += sizeof(UINT4);
2023 #endif
2024
2025 debug(printf("Starting load of label offset/length\n"));
2026 /* new->label_offset = offset; -- Needed only for mmap_annotations */
2027 new->labels = (char *) memory;
2028 /* new->label_length computed above */
2029 memory += new->label_length;
2030
2031 debug(printf("Starting load of annotpointers offset/length\n"));
2032 /* new->annotpointers_offset = offset; -- Needed only for mmap_annotations */
2033 #ifdef HAVE_64_BIT
2034 if (new->annot_pointers_8p == true) {
2035 new->annotpointers8 = (UINT8 *) memory;
2036 new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2037 } else {
2038 new->annotpointers = (UINT4 *) memory;
2039 new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2040 }
2041 #else
2042 new->annotpointers = (UINT4 *) memory;
2043 new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2044 #endif
2045 memory += new->annotpointers_length;
2046
2047 debug(printf("Starting load of annotations at %p\n",memory));
2048 /* new->annot_offset = offset; -- Needed only for mmap_annotations */
2049 new->annotations = (char *) memory;
2050 /* new->annot_length = filesize - new->annot_offset; -- Needed only for mmap_annotations or read_words */
2051 /* fprintf(stderr,"annot_length: %zu\n",new->annot_length); */
2052
2053 return memory;
2054 }
2055
2056
2057
2058 static void
read_words(size_t offset,size_t filesize,FILE * fp,T new)2059 read_words (size_t offset, size_t filesize, FILE *fp, T new) {
2060 size_t stringlen;
2061 #ifdef HAVE_64_BIT
2062 UINT8 length8;
2063 #endif
2064 UINT4 length;
2065 #ifdef DEBUG
2066 int i;
2067 #endif
2068
2069 new->typepointers = (unsigned int *) CALLOC(new->ntypes+1,sizeof(unsigned int));
2070 offset += sizeof(int)*FREAD_UINTS(new->typepointers,new->ntypes+1,fp);
2071 debug(
2072 printf("typepointers:");
2073 for (i = 0; i < new->ntypes+1; i++) {
2074 printf(" %u",new->typepointers[i]);
2075 }
2076 printf("\n");
2077 );
2078
2079 stringlen = new->typepointers[new->ntypes];
2080 if (stringlen == 0) {
2081 new->typestrings = (char *) NULL;
2082 } else {
2083 new->typestrings = (char *) CALLOC(stringlen,sizeof(char));
2084 offset += sizeof(char)*FREAD_CHARS(new->typestrings,stringlen,fp);
2085 }
2086 debug(
2087 printf("typestrings:\n");
2088 for (s = 0; s < stringlen; s++) {
2089 printf("%c",new->typestrings[s]);
2090 }
2091 printf("\n");
2092 );
2093
2094 new->fieldpointers = (unsigned int *) CALLOC(new->nfields+1,sizeof(unsigned int));
2095 if (new->version < 2) {
2096 new->fieldpointers[0] = '\0';
2097 } else {
2098 offset += sizeof(int)*FREAD_UINTS(new->fieldpointers,new->nfields+1,fp);
2099 }
2100 stringlen = new->fieldpointers[new->nfields];
2101 if (stringlen == 0) {
2102 new->fieldstrings = (char *) NULL;
2103 } else {
2104 new->fieldstrings = (char *) CALLOC(stringlen,sizeof(char));
2105 offset += sizeof(char)*FREAD_CHARS(new->fieldstrings,stringlen,fp);
2106 }
2107 debug(
2108 printf("fieldstrings:\n");
2109 for (s = 0; s < stringlen; s++) {
2110 printf("%c",new->fieldstrings[s]);
2111 }
2112 printf("\n");
2113 );
2114
2115 if (new->valuep == true) {
2116 debug1(printf("Starting read of valueorder offset/length\n"));
2117 new->valueorder_offset = offset;
2118 new->valueorder_length = (size_t) (new->total_nintervals*sizeof(int));
2119 /* fprintf(stderr,"Doing a move_relative for valueorder_length %zu\n",new->valueorder_length); */
2120 move_relative(fp,new->valueorder_length);
2121 offset += new->valueorder_length;
2122
2123 debug1(printf("Starting read of value offset/length\n"));
2124 new->value_offset = offset;
2125 new->value_length = (size_t) (new->total_nintervals*sizeof(double));
2126 /* fprintf(stderr,"Doing a move_relative for value_length %zu\n",new->value_length); */
2127 move_relative(fp,new->value_length);
2128 offset += new->value_length;
2129 }
2130
2131 debug1(printf("Starting read of labelorder offset/length\n"));
2132 new->labelorder_offset = offset;
2133 new->labelorder_length = (size_t) (new->total_nintervals*sizeof(int));
2134 /* fprintf(stderr,"Doing a move_relative for labelorder_length %zu\n",new->labelorder_length); */
2135 move_relative(fp,new->labelorder_length);
2136 offset += new->labelorder_length;
2137
2138 debug1(printf("Starting read of labelpointer offset/length\n"));
2139 new->labelpointers_offset = offset;
2140 #ifdef HAVE_64_BIT
2141 if (new->label_pointers_8p == true) {
2142 new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2143 move_relative(fp,new->total_nintervals * sizeof(UINT8));
2144 FREAD_UINT8(&length8,fp);
2145 new->label_length = (size_t) length8;
2146 } else {
2147 new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2148 /* fprintf(stderr,"Doing a move_relative for labelpointer %zu\n",new->total_nintervals * sizeof(UINT4)); */
2149 move_relative(fp,new->total_nintervals * sizeof(UINT4));
2150 FREAD_UINT(&length,fp);
2151 new->label_length = (size_t) length;
2152 }
2153 #else
2154 new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2155 /* fprintf(stderr,"Doing a move_relative for labelpointer %zu\n",new->total_nintervals * sizeof(UINT4)); */
2156 move_relative(fp,new->total_nintervals * sizeof(UINT4));
2157 FREAD_UINT(&length,fp);
2158 new->label_length = (size_t) length;
2159 #endif
2160 offset += new->labelpointers_length;
2161
2162 debug1(printf("Starting read of label offset/length\n"));
2163 new->label_offset = offset;
2164 /* new->label_length computed above */
2165 /* fprintf(stderr,"Doing a move_relative for label_length %zu\n",new->label_length); */
2166 move_relative(fp,new->label_length);
2167 offset += new->label_length;
2168
2169 debug1(printf("Starting read of annotpointers offset/length\n"));
2170 new->annotpointers_offset = offset;
2171 #ifdef HAVE_64_BIT
2172 if (new->annot_pointers_8p == true) {
2173 new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2174 } else {
2175 new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2176 }
2177 #else
2178 new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2179 #endif
2180 offset += new->annotpointers_length;
2181
2182 new->annot_offset = offset;
2183
2184 #ifdef BAD_32BIT
2185 /* This fails if length > 4 GB */
2186 move_relative(fp,new->total_nintervals * sizeof(unsigned int));
2187 FREAD_UINT(&length,fp);
2188 new->annot_length = (size_t) length;
2189 fprintf(stderr,"Incorrect length: %u\n",length);
2190 #else
2191 new->annot_length = filesize - new->annot_offset;
2192 /* fprintf(stderr,"annot_length: %zu\n",new->annot_length); */
2193 #endif
2194
2195 #if 0
2196 /* To do this check, we need to get stringlen for annotation similarly to that for labels */
2197 last_offset = offset + sizeof(char)*stringlen;
2198 if (last_offset != filesize) {
2199 fprintf(stderr,"Problem with last_offset (%zu) not equal to filesize = (%zu)\n",
2200 last_offset,filesize);
2201 exit(9);
2202 }
2203 #endif
2204
2205 return;
2206 }
2207
2208 static void
read_words_debug(size_t offset,size_t filesize,FILE * fp,T new)2209 read_words_debug (size_t offset, size_t filesize, FILE *fp, T new) {
2210 size_t stringlen, s;
2211 #ifdef HAVE_64_BIT
2212 UINT8 length8;
2213 #endif
2214 UINT4 length;
2215 int i;
2216 #if 0
2217 size_t last_offset;
2218 #endif
2219
2220 new->typepointers = (unsigned int *) CALLOC(new->ntypes+1,sizeof(unsigned int));
2221 offset += sizeof(int)*FREAD_UINTS(new->typepointers,new->ntypes+1,fp);
2222 printf("typepointers:");
2223 for (i = 0; i < new->ntypes+1; i++) {
2224 printf(" %u",new->typepointers[i]);
2225 }
2226 printf("\n");
2227
2228 stringlen = new->typepointers[new->ntypes];
2229 if (stringlen == 0) {
2230 new->typestrings = (char *) NULL;
2231 } else {
2232 new->typestrings = (char *) CALLOC(stringlen,sizeof(char));
2233 offset += sizeof(char)*FREAD_CHARS(new->typestrings,stringlen,fp);
2234 }
2235 printf("typestrings:\n");
2236 for (s = 0; s < stringlen; s++) {
2237 printf("%c",new->typestrings[s]);
2238 }
2239 printf("\n");
2240
2241 new->fieldpointers = (unsigned int *) CALLOC(new->nfields+1,sizeof(unsigned int));
2242 if (new->version < 2) {
2243 new->fieldpointers[0] = '\0';
2244 } else {
2245 offset += sizeof(int)*FREAD_UINTS(new->fieldpointers,new->nfields+1,fp);
2246 }
2247 stringlen = new->fieldpointers[new->nfields];
2248 if (stringlen == 0) {
2249 new->fieldstrings = (char *) NULL;
2250 } else {
2251 new->fieldstrings = (char *) CALLOC(stringlen,sizeof(char));
2252 offset += sizeof(char)*FREAD_CHARS(new->fieldstrings,stringlen,fp);
2253 }
2254 printf("fieldstrings:\n");
2255 for (s = 0; s < stringlen; s++) {
2256 printf("%c",new->fieldstrings[s]);
2257 }
2258 printf("\n");
2259
2260 if (new->valuep == true) {
2261 debug1(printf("Starting read of valueorder offset/length\n"));
2262 new->valueorder_offset = offset;
2263 new->valueorder_length = (size_t) (new->total_nintervals*sizeof(int));
2264 /* fprintf(stderr,"Doing a move_relative for valueorder_length %zu\n",new->valueorder_length); */
2265 move_relative(fp,new->valueorder_length);
2266 offset += new->valueorder_length;
2267
2268 debug1(printf("Starting read of value offset/length\n"));
2269 new->value_offset = offset;
2270 new->value_length = (size_t) (new->total_nintervals*sizeof(double));
2271 /* fprintf(stderr,"Doing a move_relative for value_length %zu\n",new->value_length); */
2272 move_relative(fp,new->value_length);
2273 offset += new->value_length;
2274 }
2275
2276 debug1(printf("Starting read of labelorder offset/length\n"));
2277 new->labelorder_offset = offset;
2278 new->labelorder_length = (size_t) (new->total_nintervals*sizeof(int));
2279 move_relative(fp,new->labelorder_length);
2280 offset += new->labelorder_length;
2281
2282 debug1(printf("Starting read of labelpointers offset/length\n"));
2283 new->labelpointers_offset = offset;
2284 #ifdef HAVE_64_BIT
2285 if (new->label_pointers_8p == true) {
2286 new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2287 move_relative(fp,new->total_nintervals * sizeof(UINT8));
2288 FREAD_UINT8(&length8,fp);
2289 new->label_length = (size_t) length8;
2290 } else {
2291 new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2292 move_relative(fp,new->total_nintervals * sizeof(UINT4));
2293 FREAD_UINT(&length,fp);
2294 new->label_length = (size_t) length;
2295 }
2296 #else
2297 new->labelpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2298 move_relative(fp,new->total_nintervals * sizeof(UINT4));
2299 FREAD_UINT(&length,fp);
2300 new->label_length = (size_t) length;
2301 #endif
2302 offset += new->labelpointers_length;
2303
2304 fprintf(stderr,"label_length: %zu\n",new->label_length);
2305 debug1(printf("Starting read of label offset/length\n"));
2306 new->label_offset = offset;
2307 /* new->label_length computed above */
2308 move_relative(fp,new->label_length);
2309 offset += new->label_length;
2310
2311 debug1(printf("Starting read of annotpointers offset/length\n"));
2312 new->annotpointers_offset = offset;
2313 #ifdef HAVE_64_BIT
2314 if (new->annot_pointers_8p == true) {
2315 new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT8));
2316 } else {
2317 new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2318 }
2319 #else
2320 new->annotpointers_length = (size_t) ((new->total_nintervals+1)*sizeof(UINT4));
2321 #endif
2322 offset += new->annotpointers_length;
2323
2324 new->annot_offset = offset;
2325
2326 #ifdef BAD_32BIT
2327 /* This fails if length > 4 GB */
2328 move_relative(fp,new->total_nintervals * sizeof(unsigned int));
2329 FREAD_UINT(&length,fp);
2330 new->annot_length = (size_t) length;
2331 fprintf(stderr,"Incorrect length: %u\n",length);
2332 #else
2333 new->annot_length = filesize - new->annot_offset;
2334 fprintf(stderr,"annot_length: %zu\n",new->annot_length);
2335 #endif
2336
2337 #if 0
2338 /* To do this check, we need to get stringlen for annotation similarly to that for labels */
2339 last_offset = offset + sizeof(char)*stringlen;
2340 if (last_offset != filesize) {
2341 fprintf(stderr,"Problem with last_offset (%zu) not equal to filesize = (%zu)\n",
2342 last_offset,filesize);
2343 exit(9);
2344 }
2345 #endif
2346
2347 return;
2348 }
2349
2350 /* This function only assigns pointers. Subsequent accesses to
2351 memory, other than char *, still need to be read correctly
2352 by bigendian machines */
2353 /* Previously allowed read/write access, but we can assume read-only access */
2354 #ifdef HAVE_MMAP
2355 static bool
mmap_annotations(char * filename,T new,bool readonlyp)2356 mmap_annotations (char *filename, T new, bool readonlyp) {
2357 int remainder;
2358
2359 assert(readonlyp == true);
2360
2361 if ((new->fd = open(filename,O_RDONLY,0764)) < 0) {
2362 fprintf(stderr,"Error: can't open file %s with open for reading\n",filename);
2363 exit(9);
2364 }
2365
2366 if (new->valuep == true) {
2367 new->valueorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->valueorder_offset,new->valueorder_length,
2368 /*randomp*/true);
2369 debug(fprintf(stderr,"valueorder_mmap is %p\n",new->valueorder_mmap));
2370 new->valueorder = (int *) &(new->valueorder_mmap[remainder]);
2371 new->valueorder_length += (size_t) remainder;
2372
2373 new->value_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->value_offset,new->value_length,
2374 /*randomp*/true);
2375 debug(fprintf(stderr,"values_mmap is %p\n",new->value_mmap));
2376 new->values = (double *) &(new->value_mmap[remainder]);
2377 new->value_length += (size_t) remainder;
2378 }
2379
2380 new->labelorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
2381 /*randomp*/true);
2382 debug(fprintf(stderr,"labelorder_mmap is %p\n",new->labelorder_mmap));
2383 new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
2384 new->labelorder_length += (size_t) remainder;
2385
2386 new->labelpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
2387 /*randomp*/true);
2388 debug(fprintf(stderr,"labelpointers_mmap is %p\n",new->labelpointers_mmap));
2389 #ifdef HAVE_64_BIT
2390 if (new->label_pointers_8p == true) {
2391 new->labelpointers8 = (UINT8 *) &(new->labelpointers_mmap[remainder]);
2392 new->labelpointers = (UINT4 *) NULL;
2393 } else {
2394 new->labelpointers8 = (UINT8 *) NULL;
2395 new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
2396 }
2397 #else
2398 new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
2399 #endif
2400 new->labelpointers_length += (size_t) remainder;
2401
2402 new->label_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->label_offset,new->label_length,
2403 /*randomp*/true);
2404 debug(fprintf(stderr,"labels_mmap is %p\n",new->label_mmap));
2405 new->labels = (char *) &(new->label_mmap[remainder]);
2406 new->label_length += (size_t) remainder;
2407
2408 new->annotpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
2409 /*randomp*/true);
2410 debug(fprintf(stderr,"annotpointers_mmap is %p\n",new->annotpointers_mmap));
2411 #ifdef HAVE_64_BIT
2412 if (new->annot_pointers_8p == true) {
2413 new->annotpointers8 = (UINT8 *) &(new->annotpointers_mmap[remainder]);
2414 new->annotpointers = (UINT4 *) NULL;
2415 } else {
2416 new->annotpointers8 = (UINT8 *) NULL;
2417 new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
2418 }
2419 #else
2420 new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
2421 #endif
2422 new->annotpointers_length += (size_t) remainder;
2423
2424 new->annot_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annot_offset,new->annot_length,
2425 /*randomp*/true);
2426 debug(fprintf(stderr,"annots_mmap is %p\n",new->annot_mmap));
2427 new->annotations = (char *) &(new->annot_mmap[remainder]);
2428 new->annot_length += (size_t) remainder;
2429
2430
2431 #ifdef HAVE_64_BIT
2432 if (new->label_pointers_8p == true) {
2433 if (new->labelorder == NULL || new->labelpointers8 == NULL || new->labels == NULL) {
2434 fprintf(stderr,"Memory mapping failed in reading IIT file %s. Using slow file IO instead.\n",filename);
2435 return false;
2436 }
2437 } else {
2438 if (new->labelorder == NULL || new->labelpointers == NULL || new->labels == NULL) {
2439 fprintf(stderr,"Memory mapping failed in reading IIT file %s. Using slow file IO instead.\n",filename);
2440 return false;
2441 }
2442 }
2443 #else
2444 if (new->labelorder == NULL || new->labelpointers == NULL || new->labels == NULL) {
2445 fprintf(stderr,"Memory mapping failed in reading IIT file %s. Using slow file IO instead.\n",filename);
2446 return false;
2447 }
2448 #endif
2449
2450 #ifdef HAVE_64_BIT
2451 if (new->annot_pointers_8p == true) {
2452 if (new->annotpointers8 == NULL || new->annotations == NULL) {
2453 fprintf(stderr,"Memory mapping failed in reading IIT file %s. Using slow file IO instead.\n",filename);
2454 return false;
2455 }
2456 } else {
2457 if (new->annotpointers == NULL || new->annotations == NULL) {
2458 fprintf(stderr,"Memory mapping failed in reading IIT file %s. Using slow file IO instead.\n",filename);
2459 return false;
2460 }
2461 }
2462 #else
2463 if (new->annotpointers == NULL || new->annotations == NULL) {
2464 fprintf(stderr,"Memory mapping failed in reading IIT file %s. Using slow file IO instead.\n",filename);
2465 return false;
2466 }
2467 #endif
2468
2469 return true;
2470 }
2471 #endif
2472
2473
2474 /* Used if access is FILEIO. Subsequent accesses by bigendian
2475 machines to anything but (char *) will still need to convert. */
2476 static void
read_annotations(T new)2477 read_annotations (T new) {
2478
2479 if (new->valuep == true) {
2480 file_move_absolute(new->fd,new->valueorder_offset,sizeof(int),/*n*/0);
2481 new->valueorder = (int *) CALLOC(new->total_nintervals,sizeof(int));
2482 read(new->fd,new->valueorder,new->total_nintervals*sizeof(int));
2483
2484 file_move_absolute(new->fd,new->value_offset,sizeof(char),/*n*/0);
2485 new->values = (double *) CALLOC(new->value_length,sizeof(char));
2486 read(new->fd,new->values,new->value_length*sizeof(char));
2487 }
2488
2489 file_move_absolute(new->fd,new->labelorder_offset,sizeof(int),/*n*/0);
2490 new->labelorder = (int *) CALLOC(new->total_nintervals,sizeof(int));
2491 read(new->fd,new->labelorder,new->total_nintervals*sizeof(int));
2492
2493 #ifdef HAVE_64_BIT
2494 if (new->label_pointers_8p == true) {
2495 file_move_absolute(new->fd,new->labelpointers_offset,sizeof(UINT8),/*n*/0);
2496 new->labelpointers8 = (UINT8 *) CALLOC(new->total_nintervals+1,sizeof(UINT8));
2497 read(new->fd,new->labelpointers8,(new->total_nintervals+1)*sizeof(UINT8));
2498 new->labelpointers = (UINT4 *) NULL;
2499 } else {
2500 file_move_absolute(new->fd,new->labelpointers_offset,sizeof(UINT4),/*n*/0);
2501 new->labelpointers = (UINT4 *) CALLOC(new->total_nintervals+1,sizeof(UINT4));
2502 read(new->fd,new->labelpointers,(new->total_nintervals+1)*sizeof(UINT4));
2503 new->labelpointers8 = (UINT8 *) NULL;
2504 }
2505 #else
2506 file_move_absolute(new->fd,new->labelpointers_offset,sizeof(UINT4),/*n*/0);
2507 new->labelpointers = (UINT4 *) CALLOC(new->total_nintervals+1,sizeof(UINT4));
2508 read(new->fd,new->labelpointers,(new->total_nintervals+1)*sizeof(UINT4));
2509 #endif
2510
2511 file_move_absolute(new->fd,new->label_offset,sizeof(char),/*n*/0);
2512 new->labels = (char *) CALLOC(new->label_length,sizeof(char));
2513 read(new->fd,new->labels,new->label_length*sizeof(char));
2514
2515 #ifdef HAVE_64_BIT
2516 if (new->annot_pointers_8p == true) {
2517 file_move_absolute(new->fd,new->annotpointers_offset,sizeof(UINT8),/*n*/0);
2518 new->annotpointers8 = (UINT8 *) CALLOC(new->total_nintervals+1,sizeof(UINT8));
2519 read(new->fd,new->annotpointers8,(new->total_nintervals+1)*sizeof(UINT8));
2520 new->annotpointers = (UINT4 *) NULL;
2521 } else {
2522 file_move_absolute(new->fd,new->annotpointers_offset,sizeof(UINT4),/*n*/0);
2523 new->annotpointers = (UINT4 *) CALLOC(new->total_nintervals+1,sizeof(UINT4));
2524 read(new->fd,new->annotpointers,(new->total_nintervals+1)*sizeof(UINT4));
2525 new->annotpointers8 = (UINT8 *) NULL;
2526 }
2527 #else
2528 file_move_absolute(new->fd,new->annotpointers_offset,sizeof(UINT4),/*n*/0);
2529 new->annotpointers = (UINT4 *) CALLOC(new->total_nintervals+1,sizeof(UINT4));
2530 read(new->fd,new->annotpointers,(new->total_nintervals+1)*sizeof(UINT4));
2531 #endif
2532
2533 file_move_absolute(new->fd,new->annot_offset,sizeof(char),/*n*/0);
2534 new->annotations = (char *) CALLOC(new->annot_length,sizeof(char));
2535 read(new->fd,new->annotations,new->annot_length*sizeof(char));
2536
2537 return;
2538 }
2539
2540
2541 int
IIT_read_divint(char * filename,char * divstring,bool add_iit_p)2542 IIT_read_divint (char *filename, char *divstring, bool add_iit_p) {
2543 char *newfile = NULL;
2544 FILE *fp;
2545 int version;
2546 size_t offset, skipsize;
2547 size_t filesize;
2548 int total_nintervals, ntypes, nfields, divsort;
2549 int label_pointer_size, annot_pointer_size;
2550
2551 int i, ndivs;
2552 UINT4 *divpointers, stringlen, start;
2553 char *divstrings;
2554
2555 if (add_iit_p == true) {
2556 newfile = (char *) CALLOC(strlen(filename)+strlen(".iit")+1,sizeof(char));
2557 sprintf(newfile,"%s.iit",filename);
2558 if ((fp = FOPEN_READ_BINARY(newfile)) != NULL) {
2559 filename = newfile;
2560 } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
2561 /* fprintf(stderr,"Cannot open IIT file %s or %s\n",filename,newfile); */
2562 FREE(newfile);
2563 return -1;
2564 }
2565 } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
2566 /* fprintf(stderr,"Cannot open IIT file %s\n",filename); */
2567 return -1;
2568 }
2569
2570 filesize = Access_filesize(filename);
2571 offset = 0U;
2572
2573 if (FREAD_INT(&total_nintervals,fp) < 1) {
2574 fprintf(stderr,"IIT file %s appears to be empty\n",filename);
2575 fclose(fp);
2576 return -1;
2577 } else if ((offset += sizeof(int)) > filesize) {
2578 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %zu, filesize %zu). Did you generate it using iit_store?\n",
2579 filename,offset,filesize);
2580 return -1;
2581 }
2582
2583 if (total_nintervals > 0) {
2584 version = 1;
2585
2586 } else {
2587 /* New format to indicate version > 1 */
2588 FREAD_INT(&version,fp);
2589 if (version > IIT_LATEST_VERSION_NOVALUES && version > IIT_LATEST_VERSION_VALUES) {
2590 fprintf(stderr,"This file is version %d, but this software can only read up to versions %d and %d\n",
2591 version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
2592 return -1;
2593 } else if ((offset += sizeof(int)) > filesize) {
2594 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %zu, filesize %zu). Did you generate it using iit_store?\n",
2595 filename,offset,filesize);
2596 return -1;
2597 }
2598
2599 if (version < 5) {
2600 } else {
2601 /* Read new variables indicating sizes of label and annot pointers */
2602 if (FREAD_INT(&label_pointer_size,fp) < 1) {
2603 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2604 return -1;
2605 } else if ((offset += sizeof(int)) > filesize) {
2606 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu). Did you generate it using iit_store?\n",
2607 filename,offset,filesize);
2608 return -1;
2609 }
2610
2611 if (FREAD_INT(&annot_pointer_size,fp) < 1) {
2612 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2613 return -1;
2614 } else if ((offset += sizeof(int)) > filesize) {
2615 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu). Did you generate it using iit_store?\n",
2616 filename,offset,filesize);
2617 return -1;
2618 }
2619
2620 if (label_pointer_size == 4) {
2621 } else if (label_pointer_size == 8) {
2622 } else {
2623 fprintf(stderr,"IIT file %s has a problem with label_pointer_size being %d, expecting 4 or 8\n",
2624 filename,label_pointer_size);
2625 return -1;
2626 }
2627
2628 if (annot_pointer_size == 4) {
2629 } else if (annot_pointer_size == 8) {
2630 } else {
2631 fprintf(stderr,"IIT file %s has a problem with annot_pointer_size being %d, expecting 4 or 8\n",
2632 filename,annot_pointer_size);
2633 return -1;
2634 }
2635 }
2636
2637 /* Re-read total_nintervals */
2638 if (FREAD_INT(&total_nintervals,fp) < 1) {
2639 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2640 return -1;
2641 } else if ((offset += sizeof(int)) > filesize) {
2642 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu). Did you generate it using iit_store?\n",
2643 filename,offset,filesize);
2644 return -1;
2645 }
2646 }
2647
2648 debug(printf("version: %d\n",version));
2649 debug(printf("total_nintervals: %d\n",total_nintervals));
2650
2651
2652 if (FREAD_INT(&ntypes,fp) < 1) {
2653 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2654 return -1;
2655 } else if (ntypes < 0) {
2656 fprintf(stderr,"IIT file %s appears to have a negative number of types\n",filename);
2657 return -1;
2658 } else if ((offset += sizeof(int)) > filesize) {
2659 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %zu, filesize %zu). Did you generate it using iit_store?\n",
2660 filename,offset,filesize);
2661 return -1;
2662 }
2663 debug(printf("ntypes: %d\n",ntypes));
2664
2665
2666 if (version < 2) {
2667 nfields = 0;
2668 } else {
2669 if (FREAD_INT(&nfields,fp) < 1) {
2670 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2671 return -1;
2672 } else if (nfields < 0) {
2673 fprintf(stderr,"IIT file %s appears to have a negative number of fields\n",filename);
2674 return -1;
2675 } else if ((offset += sizeof(int)) > filesize) {
2676 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %zu, filesize %zu). Did you generate it using iit_store?\n",
2677 filename,offset,filesize);
2678 return -1;
2679 }
2680 }
2681 debug(printf("nfields: %d\n",nfields));
2682
2683
2684 if (version <= 2) {
2685 return -1;
2686
2687 } else {
2688
2689 if (FREAD_INT(&ndivs,fp) < 1) {
2690 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2691 return -1;
2692 } else if (ndivs < 0) {
2693 fprintf(stderr,"IIT file %s appears to have a negative number of divs\n",filename);
2694 return -1;
2695 } else if ((offset += sizeof(int)) > filesize) {
2696 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %zu, filesize %zu). Did you generate it using iit_store?\n",
2697 filename,offset,filesize);
2698 return -1;
2699 }
2700 debug(printf("ndivs: %d\n",ndivs));
2701
2702 /* Skip nintervals */
2703 offset += skipsize = sizeof(int)*ndivs;
2704 move_relative(fp,skipsize);
2705
2706 /* Skip cum_nintervals */
2707 offset += skipsize = sizeof(int)*(ndivs+1);
2708 move_relative(fp,skipsize);
2709
2710 /* Skip nnodes */
2711 offset += skipsize = sizeof(int)*ndivs;
2712 move_relative(fp,skipsize);
2713
2714 /* Skip cum_nnodes */
2715 offset += skipsize = sizeof(int)*(ndivs+1);
2716 move_relative(fp,skipsize);
2717
2718 if (FREAD_INT(&divsort,fp) < 1) {
2719 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
2720 return -1;
2721 } else if (divsort < 0) {
2722 fprintf(stderr,"IIT file %s appears to have a negative value for divsort\n",filename);
2723 return -1;
2724 } else if ((offset += sizeof(int)) > filesize) {
2725 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %zu, filesize %zu). Did you generate it using iit_store?\n",
2726 filename,offset,filesize);
2727 return -1;
2728 }
2729 debug(printf("divsort: %d\n",divsort));
2730
2731 divpointers = (UINT4 *) CALLOC(ndivs+1,sizeof(UINT4));
2732 offset += sizeof(int)*FREAD_UINTS(divpointers,ndivs+1,fp);
2733 debug(
2734 printf("divpointers:");
2735 for (i = 0; i < ndivs+1; i++) {
2736 printf(" %u",divpointers[i]);
2737 }
2738 printf("\n");
2739 );
2740
2741 stringlen = divpointers[ndivs];
2742 if (stringlen == 0) {
2743 fprintf(stderr,"Problem with divstring stringlen being 0\n");
2744 exit(9);
2745 } else {
2746 divstrings = (char *) CALLOC(stringlen,sizeof(char));
2747 }
2748 offset += sizeof(char)*FREAD_CHARS(divstrings,stringlen,fp);
2749 debug(
2750 printf("divstrings:\n");
2751 for (s = 0; s < stringlen; s++) {
2752 if (divstrings[s] == '\0') {
2753 printf("\n");
2754 } else {
2755 printf("%c",divstrings[s]);
2756 }
2757 }
2758 printf("(end of divstrings)\n");
2759 );
2760
2761 i = 0;
2762 while (i < ndivs) {
2763 start = divpointers[i];
2764 if (!strcmp(divstring,&(divstrings[start]))) {
2765 fclose(fp);
2766 FREE(divstrings);
2767 FREE(divpointers);
2768 if (newfile != NULL) {
2769 FREE(newfile);
2770 }
2771 return i;
2772 }
2773 i++;
2774 }
2775
2776 fclose(fp);
2777 FREE(divstrings);
2778 FREE(divpointers);
2779 if (newfile != NULL) {
2780 FREE(newfile);
2781 }
2782 return -1;
2783 }
2784 }
2785
2786
2787
2788 T
IIT_load(char * memory,char * name)2789 IIT_load (char *memory, char *name) {
2790 T new;
2791 off_t stringlen;
2792 int divno;
2793 int label_pointer_size, annot_pointer_size;
2794 #ifdef DEBUG
2795 int i;
2796 Interval_T interval;
2797 #endif
2798
2799 new = (T) MALLOC(sizeof(*new));
2800
2801 if (name == NULL) {
2802 new->name = NULL;
2803 } else {
2804 new->name = (char *) CALLOC(strlen(name)+1,sizeof(char));
2805 strcpy(new->name,name);
2806 }
2807
2808 new->total_nintervals = * (int *) memory; memory += sizeof(int);
2809
2810 if (new->total_nintervals != 0) {
2811 /* Need to use Univ_IIT_read instead */
2812 fprintf(stderr,"Unexpected error in IIT_load. First int is %d. Using IIT_read code on a version 1 IIT\n",
2813 new->total_nintervals);
2814 abort();
2815
2816 } else {
2817 /* New format to indicate version > 1 */
2818 new->version = * (int *) memory; memory += sizeof(int);
2819 if (new->version > IIT_LATEST_VERSION_NOVALUES && new->version > IIT_LATEST_VERSION_VALUES) {
2820 fprintf(stderr,"This file is version %d, but this software can only read up to versions %d and %d\n",
2821 new->version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
2822 return NULL;
2823 }
2824
2825 if (new->version == IIT_LATEST_VERSION_VALUES) {
2826 /* If IIT_LATEST_VERSION_VALUES increases, need to revise this code to handle version 6 */
2827 new->valuep = true;
2828 } else {
2829 new->valuep = false;
2830 }
2831
2832 if (new->version <= 3) {
2833 new->label_pointers_8p = false;
2834 new->annot_pointers_8p = false;
2835 } else if (new->version == 4) {
2836 new->label_pointers_8p = true;
2837 new->annot_pointers_8p = true;
2838 } else {
2839 /* Read new variables indicating sizes of label and annot pointers */
2840 label_pointer_size = * (int *) memory; memory += sizeof(int);
2841 annot_pointer_size = * (int *) memory; memory += sizeof(int);
2842
2843 if (label_pointer_size == 4) {
2844 new->label_pointers_8p = false;
2845 } else if (label_pointer_size == 8) {
2846 new->label_pointers_8p = true;
2847 } else {
2848 fprintf(stderr,"IIT file has a problem with label_pointer_size being %d, expecting 4 or 8\n",
2849 label_pointer_size);
2850 }
2851
2852 if (annot_pointer_size == 4) {
2853 new->annot_pointers_8p = false;
2854 } else if (annot_pointer_size == 8) {
2855 new->annot_pointers_8p = true;
2856 } else {
2857 fprintf(stderr,"IIT file has a problem with annot_pointer_size being %d, expecting 4 or 8\n",
2858 annot_pointer_size);
2859 }
2860 }
2861
2862 /* Re-read total_nintervals */
2863 new->total_nintervals = * (int *) memory; memory += sizeof(int);
2864 }
2865
2866 debug(printf("version: %d\n",new->version));
2867 debug(printf("total_nintervals: %d\n",new->total_nintervals));
2868
2869
2870 new->ntypes = * (int *) memory; memory += sizeof(int);
2871 if (new->ntypes < 0) {
2872 fprintf(stderr,"IIT file appears to have a negative number of types\n");
2873 return NULL;
2874 }
2875 debug(printf("ntypes: %d\n",new->ntypes));
2876
2877
2878 if (new->version < 2) {
2879 new->nfields = 0;
2880 } else {
2881 new->nfields = * (int *) memory; memory += sizeof(int);
2882 if (new->nfields < 0) {
2883 fprintf(stderr,"IIT file appears to have a negative number of fields\n");
2884 return NULL;
2885 }
2886 }
2887 debug(printf("nfields: %d\n",new->nfields));
2888
2889
2890 if (new->version <= 2) {
2891 /* Might not be supported */
2892 new->ndivs = 1;
2893
2894 new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
2895 new->nintervals[0] = new->total_nintervals;
2896 new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
2897 new->cum_nintervals[0] = 0;
2898 new->cum_nintervals[1] = new->total_nintervals;
2899
2900 new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
2901 new->nnodes[0] = * (int *) memory; memory += sizeof(int);
2902 if (new->nnodes[0] < 0) {
2903 fprintf(stderr,"IIT file appears to have a negative number of nodes\n");
2904 return NULL;
2905 }
2906 new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
2907 new->cum_nnodes[0] = 0;
2908 new->cum_nnodes[1] = new->nnodes[0];
2909
2910 new->divsort = NO_SORT;
2911
2912 new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
2913 new->divpointers[0] = 0;
2914 new->divpointers[1] = 1;
2915
2916 new->divstrings = (char *) CALLOC(1,sizeof(char));
2917 new->divstrings[0] = '\0';
2918
2919 } else {
2920
2921 new->ndivs = * (int *) memory; memory += sizeof(int);
2922 if (new->ndivs < 0) {
2923 fprintf(stderr,"IIT file appears to have a negative number of divs\n");
2924 return NULL;
2925 }
2926 debug(printf("ndivs: %d\n",new->ndivs));
2927
2928 new->nintervals = (int *) memory;
2929 memory += new->ndivs * sizeof(int);
2930 debug(
2931 printf("nintervals:");
2932 for (i = 0; i < new->ndivs; i++) {
2933 printf(" %d",new->nintervals[i]);
2934 }
2935 printf("\n");
2936 );
2937
2938 new->cum_nintervals = (int *) memory;
2939 memory += (new->ndivs+1) * sizeof(int);
2940 debug(
2941 printf("cum_nintervals:");
2942 for (i = 0; i <= new->ndivs; i++) {
2943 printf(" %d",new->cum_nintervals[i]);
2944 }
2945 printf("\n");
2946 );
2947
2948 new->nnodes = (int *) memory;
2949 memory += new->ndivs * sizeof(int);
2950 debug(
2951 printf("nnodes:");
2952 for (i = 0; i < new->ndivs; i++) {
2953 printf(" %d",new->nnodes[i]);
2954 }
2955 printf("\n");
2956 );
2957
2958 new->cum_nnodes = (int *) memory;
2959 memory += (new->ndivs+1) * sizeof(int);
2960 debug(
2961 printf("cum_nnodes:");
2962 for (i = 0; i <= new->ndivs; i++) {
2963 printf(" %d",new->cum_nnodes[i]);
2964 }
2965 printf("\n");
2966 );
2967
2968 new->divsort = * (int *) memory; memory += sizeof(int);
2969 if (new->divsort < 0) {
2970 fprintf(stderr,"IIT file appears to have a negative value for divsort\n");
2971 return NULL;
2972 }
2973 debug(printf("divsort: %d\n",new->divsort));
2974
2975 new->divpointers = (UINT4 *) memory;
2976 memory += (new->ndivs+1) * sizeof(int);;
2977 debug(
2978 printf("divpointers:");
2979 for (i = 0; i < new->ndivs+1; i++) {
2980 printf(" %u",new->divpointers[i]);
2981 }
2982 printf("\n");
2983 );
2984
2985 /* Note: To keep ints aligned, would be better to make stringlen a
2986 multiple of 4, and put a terminating '\0' as needed */
2987 stringlen = new->divpointers[new->ndivs];
2988 if (stringlen == 0) {
2989 new->divstrings = (char *) NULL;
2990 } else {
2991 new->divstrings = (char *) memory;
2992 memory += stringlen * sizeof(char);
2993 }
2994 debug(
2995 printf("divstrings:\n");
2996 for (s = 0; s < stringlen; s++) {
2997 if (new->divstrings[s] == '\0') {
2998 printf("\n");
2999 } else {
3000 printf("%c",new->divstrings[s]);
3001 }
3002 }
3003 printf("(end of divstrings)\n");
3004 );
3005 }
3006
3007 new->alphas = (int **) CALLOC(new->ndivs,sizeof(int *));
3008 new->betas = (int **) CALLOC(new->ndivs,sizeof(int *));
3009 new->sigmas = (int **) CALLOC(new->ndivs,sizeof(int *));
3010 new->omegas = (int **) CALLOC(new->ndivs,sizeof(int *));
3011 new->nodes = (struct FNode_T **) CALLOC(new->ndivs,sizeof(struct FNode_T *));
3012
3013 if (new->version == 1) {
3014 abort();
3015 }
3016
3017 new->intervals = (struct Interval_T **) CALLOC(new->ndivs,sizeof(struct Interval_T *));
3018
3019 /* Load all divs */
3020 debug(printf("Loading all divs\n"));
3021 for (divno = 0; divno < new->ndivs; divno++) {
3022 debug(fprintf(stderr,"Starting load of div\n"));
3023 memory = load_tree(memory,new,divno);
3024 debug(fprintf(stderr,"Ending read of div\n"));
3025 }
3026
3027 for (divno = 0; divno < new->ndivs; divno++) {
3028 memory = load_intervals(memory,new,divno);
3029 }
3030
3031 /* memory = */ load_words(memory,new);
3032 new->access = LOADED;
3033
3034 return new;
3035 }
3036
3037
3038
3039 T
IIT_read(char * filename,char * name,bool readonlyp,Divread_T divread,char * divstring,bool add_iit_p)3040 IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *divstring,
3041 bool add_iit_p) {
3042 T new;
3043 FILE *fp;
3044 char *newfile = NULL;
3045 size_t offset = 0, stringlen;
3046 size_t filesize;
3047 int skip_nintervals, desired_divno, divno;
3048 int label_pointer_size, annot_pointer_size;
3049 #ifdef DEBUG
3050 int i;
3051 Interval_T interval;
3052 #endif
3053
3054 if (add_iit_p == false) {
3055 if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
3056 return NULL;
3057 }
3058
3059 } else {
3060 /* Try adding .iit first */
3061 newfile = (char *) CALLOC(strlen(filename)+strlen(".iit")+1,sizeof(char));
3062 sprintf(newfile,"%s.iit",filename);
3063 if ((fp = FOPEN_READ_BINARY(newfile)) != NULL) {
3064 filename = newfile;
3065 } else {
3066 FREE(newfile);
3067 if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
3068 return NULL;
3069 }
3070 }
3071 }
3072
3073
3074 new = (T) MALLOC(sizeof(*new));
3075
3076 filesize = Access_filesize(filename);
3077
3078 if (name == NULL) {
3079 new->name = NULL;
3080 } else {
3081 new->name = (char *) CALLOC(strlen(name)+1,sizeof(char));
3082 strcpy(new->name,name);
3083 }
3084
3085 if (FREAD_INT(&new->total_nintervals,fp) < 1) {
3086 fprintf(stderr,"IIT file %s appears to be empty\n",filename);
3087 fclose(fp);
3088 return NULL;
3089 } else if ((offset += sizeof(int)) > filesize) {
3090 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %zu, filesize %zu). Did you generate it using iit_store?\n",
3091 filename,offset,filesize);
3092 return NULL;
3093 }
3094
3095 if (new->total_nintervals != 0) {
3096 /* Need to use Univ_IIT_read instead */
3097 fprintf(stderr,"Unexpected error in IIT_read of %s. First int is %d. Using IIT_read code on a version 1 IIT\n",
3098 filename,new->total_nintervals);
3099 abort();
3100
3101 } else {
3102 /* New format to indicate version > 1 */
3103 FREAD_INT(&new->version,fp);
3104 if (new->version > IIT_LATEST_VERSION_NOVALUES && new->version > IIT_LATEST_VERSION_VALUES) {
3105 fprintf(stderr,"This file is version %d, but this software can only read up to versions %d and %d\n",
3106 new->version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
3107 return NULL;
3108 } else if ((offset += sizeof(int)) > filesize) {
3109 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %zu, filesize %zu). Did you generate it using iit_store?\n",
3110 filename,offset,filesize);
3111 return NULL;
3112 }
3113
3114 if (new->version == IIT_LATEST_VERSION_VALUES) {
3115 /* If IIT_LATEST_VERSION_VALUES increases, need to revise this code to handle version 6 */
3116 new->valuep = true;
3117 } else {
3118 new->valuep = false;
3119 }
3120
3121 if (new->version <= 3) {
3122 new->label_pointers_8p = false;
3123 new->annot_pointers_8p = false;
3124 } else if (new->version == 4) {
3125 new->label_pointers_8p = true;
3126 new->annot_pointers_8p = true;
3127 } else {
3128 /* Read new variables indicating sizes of label and annot pointers */
3129 if (FREAD_INT(&label_pointer_size,fp) < 1) {
3130 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3131 return NULL;
3132 } else if ((offset += sizeof(int)) > filesize) {
3133 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu). Did you generate it using iit_store?\n",
3134 filename,offset,filesize);
3135 return NULL;
3136 }
3137
3138 if (FREAD_INT(&annot_pointer_size,fp) < 1) {
3139 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3140 return NULL;
3141 } else if ((offset += sizeof(int)) > filesize) {
3142 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu). Did you generate it using iit_store?\n",
3143 filename,offset,filesize);
3144 return NULL;
3145 }
3146
3147 if (label_pointer_size == 4) {
3148 new->label_pointers_8p = false;
3149 } else if (label_pointer_size == 8) {
3150 new->label_pointers_8p = true;
3151 } else {
3152 fprintf(stderr,"IIT file %s has a problem with label_pointer_size being %d, expecting 4 or 8\n",
3153 filename,label_pointer_size);
3154 }
3155
3156 if (annot_pointer_size == 4) {
3157 new->annot_pointers_8p = false;
3158 } else if (annot_pointer_size == 8) {
3159 new->annot_pointers_8p = true;
3160 } else {
3161 fprintf(stderr,"IIT file %s has a problem with annot_pointer_size being %d, expecting 4 or 8\n",
3162 filename,annot_pointer_size);
3163 }
3164 }
3165
3166 /* Re-read total_nintervals */
3167 if (FREAD_INT(&new->total_nintervals,fp) < 1) {
3168 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3169 return NULL;
3170 } else if ((offset += sizeof(int)) > filesize) {
3171 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu). Did you generate it using iit_store?\n",
3172 filename,offset,filesize);
3173 return NULL;
3174 }
3175 }
3176
3177 debug(printf("version: %d\n",new->version));
3178 debug(printf("total_nintervals: %d\n",new->total_nintervals));
3179
3180
3181 if (FREAD_INT(&new->ntypes,fp) < 1) {
3182 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3183 return NULL;
3184 } else if (new->ntypes < 0) {
3185 fprintf(stderr,"IIT file %s appears to have a negative number of types\n",filename);
3186 return NULL;
3187 } else if ((offset += sizeof(int)) > filesize) {
3188 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %zu, filesize %zu). Did you generate it using iit_store?\n",
3189 filename,offset,filesize);
3190 return NULL;
3191 }
3192 debug(printf("ntypes: %d\n",new->ntypes));
3193
3194
3195 if (new->version < 2) {
3196 new->nfields = 0;
3197 } else {
3198 if (FREAD_INT(&new->nfields,fp) < 1) {
3199 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3200 return NULL;
3201 } else if (new->nfields < 0) {
3202 fprintf(stderr,"IIT file %s appears to have a negative number of fields\n",filename);
3203 return NULL;
3204 } else if ((offset += sizeof(int)) > filesize) {
3205 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %zu, filesize %zu). Did you generate it using iit_store?\n",
3206 filename,offset,filesize);
3207 return NULL;
3208 }
3209 }
3210 debug(printf("nfields: %d\n",new->nfields));
3211
3212
3213 if (new->version <= 2) {
3214 new->ndivs = 1;
3215
3216 new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
3217 new->nintervals[0] = new->total_nintervals;
3218 new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
3219 new->cum_nintervals[0] = 0;
3220 new->cum_nintervals[1] = new->total_nintervals;
3221
3222 new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
3223 if (FREAD_INT(&(new->nnodes[0]),fp) < 1) {
3224 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3225 return NULL;
3226 } else if (new->nnodes[0] < 0) {
3227 fprintf(stderr,"IIT file %s appears to have a negative number of nodes\n",filename);
3228 return NULL;
3229 } else if ((offset += sizeof(int)) > filesize) {
3230 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %zu, filesize %zu). Did you generate it using iit_store?\n",
3231 filename,offset,filesize);
3232 return NULL;
3233 }
3234 new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
3235 new->cum_nnodes[0] = 0;
3236 new->cum_nnodes[1] = new->nnodes[0];
3237
3238 new->divsort = NO_SORT;
3239
3240 new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
3241 new->divpointers[0] = 0;
3242 new->divpointers[1] = 1;
3243
3244 new->divstrings = (char *) CALLOC(1,sizeof(char));
3245 new->divstrings[0] = '\0';
3246
3247 } else {
3248
3249 if (FREAD_INT(&new->ndivs,fp) < 1) {
3250 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3251 return NULL;
3252 } else if (new->ndivs < 0) {
3253 fprintf(stderr,"IIT file %s appears to have a negative number of divs\n",filename);
3254 return NULL;
3255 } else if ((offset += sizeof(int)) > filesize) {
3256 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %zu, filesize %zu). Did you generate it using iit_store?\n",
3257 filename,offset,filesize);
3258 return NULL;
3259 }
3260 debug(printf("ndivs: %d\n",new->ndivs));
3261
3262 new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
3263 offset += sizeof(int)*FREAD_INTS(new->nintervals,new->ndivs,fp);
3264 debug(
3265 printf("nintervals:");
3266 for (i = 0; i < new->ndivs; i++) {
3267 printf(" %d",new->nintervals[i]);
3268 }
3269 printf("\n");
3270 );
3271
3272 new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
3273 offset += sizeof(int)*FREAD_INTS(new->cum_nintervals,new->ndivs+1,fp);
3274 debug(
3275 printf("cum_nintervals:");
3276 for (i = 0; i <= new->ndivs; i++) {
3277 printf(" %d",new->cum_nintervals[i]);
3278 }
3279 printf("\n");
3280 );
3281
3282 new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
3283 offset += sizeof(int)*FREAD_INTS(new->nnodes,new->ndivs,fp);
3284 debug(
3285 printf("nnodes:");
3286 for (i = 0; i < new->ndivs; i++) {
3287 printf(" %d",new->nnodes[i]);
3288 }
3289 printf("\n");
3290 );
3291
3292 new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
3293 offset += sizeof(int)*FREAD_INTS(new->cum_nnodes,new->ndivs+1,fp);
3294 debug(
3295 printf("cum_nnodes:");
3296 for (i = 0; i <= new->ndivs; i++) {
3297 printf(" %d",new->cum_nnodes[i]);
3298 }
3299 printf("\n");
3300 );
3301
3302 if (FREAD_INT(&new->divsort,fp) < 1) {
3303 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3304 return NULL;
3305 } else if (new->divsort < 0) {
3306 fprintf(stderr,"IIT file %s appears to have a negative value for divsort\n",filename);
3307 return NULL;
3308 } else if ((offset += sizeof(int)) > filesize) {
3309 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %zu, filesize %zu). Did you generate it using iit_store?\n",
3310 filename,offset,filesize);
3311 return NULL;
3312 }
3313 debug(printf("divsort: %d\n",new->divsort));
3314
3315 new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
3316 offset += sizeof(int)*FREAD_UINTS(new->divpointers,new->ndivs+1,fp);
3317 debug(
3318 printf("divpointers:");
3319 for (i = 0; i < new->ndivs+1; i++) {
3320 printf(" %u",new->divpointers[i]);
3321 }
3322 printf("\n");
3323 );
3324
3325 stringlen = new->divpointers[new->ndivs];
3326 if (stringlen == 0) {
3327 new->divstrings = (char *) NULL;
3328 } else {
3329 new->divstrings = (char *) CALLOC(stringlen,sizeof(char));
3330 offset += sizeof(char)*FREAD_CHARS(new->divstrings,stringlen,fp);
3331 }
3332 debug(
3333 printf("divstrings:\n");
3334 for (s = 0; s < stringlen; s++) {
3335 if (new->divstrings[s] == '\0') {
3336 printf("\n");
3337 } else {
3338 printf("%c",new->divstrings[s]);
3339 }
3340 }
3341 printf("(end of divstrings)\n");
3342 );
3343 }
3344
3345 new->alphas = (int **) CALLOC(new->ndivs,sizeof(int *));
3346 new->betas = (int **) CALLOC(new->ndivs,sizeof(int *));
3347 new->sigmas = (int **) CALLOC(new->ndivs,sizeof(int *));
3348 new->omegas = (int **) CALLOC(new->ndivs,sizeof(int *));
3349 new->nodes = (struct FNode_T **) CALLOC(new->ndivs,sizeof(struct FNode_T *));
3350
3351 if (new->version == 1) {
3352 fprintf(stderr,"Not expecting version 1\n");
3353 abort();
3354 }
3355
3356 new->intervals = (struct Interval_T **) CALLOC(new->ndivs,sizeof(struct Interval_T *));
3357
3358 if (divread == READ_ALL) {
3359 /* Read all divs */
3360 debug(printf("Reading all divs\n"));
3361 for (divno = 0; divno < new->ndivs; divno++) {
3362 debug(fprintf(stderr,"Starting read of div\n"));
3363 offset = read_tree(offset,filesize,fp,filename,new,divno);
3364 debug(fprintf(stderr,"Ending read of div\n"));
3365 }
3366
3367 new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3368 offset = read_intervals(offset,filesize,fp,filename,new,/*divno*/0);
3369 for (divno = 1; divno < new->ndivs; divno++) {
3370 new->intervals[divno] = &(new->intervals[divno-1][new->nintervals[divno-1]]);
3371 offset = read_intervals(offset,filesize,fp,filename,new,divno);
3372 }
3373
3374 } else if (divread == READ_NONE) {
3375 debug(printf("Reading no divs\n"));
3376 offset = skip_trees(offset,filesize,fp,filename,new->ndivs,
3377 new->cum_nintervals[new->ndivs],new->cum_nnodes[new->ndivs]);
3378
3379 new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3380 offset = read_intervals(offset,filesize,fp,filename,new,/*divno*/0);
3381 for (divno = 1; divno < new->ndivs; divno++) {
3382 new->intervals[divno] = &(new->intervals[divno-1][new->nintervals[divno-1]]);
3383 offset = read_intervals(offset,filesize,fp,filename,new,divno);
3384 }
3385
3386 } else if (divread == READ_ONE) {
3387 debug(printf("Reading only div %s\n",divstring));
3388 if ((desired_divno = IIT_divint(new,divstring)) < 0) {
3389 fprintf(stderr,"Cannot find div %s in IIT_read. Ignoring div.\n",divstring);
3390 desired_divno = 0;
3391 }
3392 offset = skip_trees(offset,filesize,fp,filename,desired_divno,
3393 new->cum_nintervals[desired_divno],new->cum_nnodes[desired_divno]);
3394 debug1(fprintf(stderr,"Starting read of div\n"));
3395 offset = read_tree(offset,filesize,fp,filename,new,desired_divno);
3396 debug1(fprintf(stderr,"Ending read of div\n"));
3397 offset = skip_trees(offset,filesize,fp,filename,new->ndivs - (desired_divno + 1),
3398 new->cum_nintervals[new->ndivs] - new->cum_nintervals[desired_divno+1],
3399 new->cum_nnodes[new->ndivs] - new->cum_nnodes[desired_divno+1]);
3400
3401 new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3402 offset = skip_intervals(&skip_nintervals,offset,filesize,fp,filename,new,0,desired_divno-1);
3403 debug1(fprintf(stderr,"Starting read of intervals\n"));
3404 new->intervals[desired_divno] = &(new->intervals[0][skip_nintervals]);
3405 offset = read_intervals(offset,filesize,fp,filename,new,desired_divno);
3406 debug1(fprintf(stderr,"Ending read of intervals\n"));
3407 offset = skip_intervals(&skip_nintervals,offset,filesize,fp,filename,new,desired_divno+1,new->ndivs-1);
3408
3409 debug(
3410 /*
3411 printf("sigmas[%d]:\n",desired_divno);
3412 for (i = 0; i < new->nintervals[desired_divno]+1; i++) {
3413 interval = &(new->intervals[desired_divno][new->sigmas[desired_divno][i]]);
3414 printf("%d %u..%u\n",new->sigmas[desired_divno][i],Interval_low(interval),Interval_high(interval));
3415 }
3416 printf("\n");
3417 */
3418
3419 printf("alphas[%d]:\n",desired_divno);
3420 for (i = 0; i < new->nintervals[desired_divno]+1; i++) {
3421 interval = &(new->intervals[desired_divno][new->alphas[desired_divno][i]]);
3422 printf("%d %u..%u\n",new->alphas[desired_divno][i],Interval_low(interval),Interval_high(interval));
3423 }
3424 printf("\n");
3425 );
3426
3427
3428 } else {
3429 abort();
3430 }
3431
3432 read_words(offset,filesize,fp,new);
3433 fclose(fp);
3434
3435 #ifndef HAVE_MMAP
3436 debug1(printf("No mmap available. Reading annotations\n"));
3437 new->access = FILEIO;
3438 new->fd = Access_fileio(filename);
3439 read_annotations(new);
3440 close(new->fd);
3441 /* pthread_mutex_init(&new->read_mutex,NULL); */
3442 #else
3443 debug1(printf("mmap available. Setting up pointers to annotations\n"));
3444 new->access = MMAPPED;
3445 if (mmap_annotations(filename,new,readonlyp) == false) {
3446 debug1(printf(" Failed. Reading annotations\n"));
3447 new->access = FILEIO;
3448 new->fd = Access_fileio(filename);
3449 read_annotations(new);
3450 close(new->fd);
3451 /* pthread_mutex_init(&new->read_mutex,NULL); */
3452 }
3453 #endif
3454
3455 if (newfile != NULL) {
3456 FREE(newfile);
3457 }
3458
3459 return new;
3460 }
3461
3462
3463 void
IIT_debug(char * filename)3464 IIT_debug (char *filename) {
3465 T new;
3466 FILE *fp;
3467 char *newfile = NULL;
3468 size_t stringlen, s;
3469 size_t offset = 0, filesize;
3470 int skip_nintervals, desired_divno, divno, i;
3471 int label_pointer_size, annot_pointer_size;
3472 Divread_T divread = READ_ALL;
3473 char *divstring = NULL;
3474 bool add_iit_p = false;
3475 #ifdef DEBUG
3476 Interval_T interval;
3477 #endif
3478
3479 if (add_iit_p == true) {
3480 newfile = (char *) CALLOC(strlen(filename)+strlen(".iit")+1,sizeof(char));
3481 sprintf(newfile,"%s.iit",filename);
3482 if ((fp = FOPEN_READ_BINARY(newfile)) != NULL) {
3483 filename = newfile;
3484 } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
3485 /* fprintf(stderr,"Cannot open IIT file %s or %s\n",filename,newfile); */
3486 FREE(newfile);
3487 return;
3488 }
3489 } else if ((fp = FOPEN_READ_BINARY(filename)) == NULL) {
3490 /* fprintf(stderr,"Cannot open IIT file %s\n",filename); */
3491 return;
3492 }
3493
3494 new = (T) MALLOC(sizeof(*new));
3495
3496 filesize = Access_filesize(filename);
3497
3498 new->name = NULL;
3499
3500 if (FREAD_INT(&new->total_nintervals,fp) < 1) {
3501 fprintf(stderr,"IIT file %s appears to be empty\n",filename);
3502 fclose(fp);
3503 return;
3504 } else if ((offset += sizeof(int)) > filesize) {
3505 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after first byte %zu, filesize %zu). Did you generate it using iit_store?\n",
3506 filename,offset,filesize);
3507 return;
3508 }
3509
3510 if (new->total_nintervals > 0) {
3511 new->version = 1;
3512 new->valuep = false;
3513
3514 } else {
3515 /* New format to indicate version > 1 */
3516 FREAD_INT(&new->version,fp);
3517 if (new->version > IIT_LATEST_VERSION_NOVALUES && new->version > IIT_LATEST_VERSION_VALUES) {
3518 fprintf(stderr,"This file is version %d, but this software can only read up to versions %d and %d\n",
3519 new->version,IIT_LATEST_VERSION_NOVALUES,IIT_LATEST_VERSION_VALUES);
3520 return;
3521 } else if ((offset += sizeof(int)) > filesize) {
3522 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after version %zu, filesize %zu). Did you generate it using iit_store?\n",
3523 filename,offset,filesize);
3524 return;
3525 }
3526
3527 if (new->version == IIT_LATEST_VERSION_VALUES) {
3528 /* If IIT_LATEST_VERSION_VALUES increases, need to revise this code to handle version 6 */
3529 new->valuep = true;
3530 } else {
3531 new->valuep = false;
3532 }
3533
3534 if (new->version <= 3) {
3535 new->label_pointers_8p = false;
3536 new->annot_pointers_8p = false;
3537 } else if (new->version == 4) {
3538 new->label_pointers_8p = true;
3539 new->annot_pointers_8p = true;
3540 } else {
3541 /* Read new variables indicating sizes of label and annot pointers */
3542 if (FREAD_INT(&label_pointer_size,fp) < 1) {
3543 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3544 return;
3545 } else if ((offset += sizeof(int)) > filesize) {
3546 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu). Did you generate it using iit_store?\n",
3547 filename,offset,filesize);
3548 return;
3549 }
3550
3551 if (FREAD_INT(&annot_pointer_size,fp) < 1) {
3552 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3553 return;
3554 } else if ((offset += sizeof(int)) > filesize) {
3555 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu). Did you generate it using iit_store?\n",
3556 filename,offset,filesize);
3557 return;
3558 }
3559
3560 if (label_pointer_size == 4) {
3561 new->label_pointers_8p = false;
3562 } else if (label_pointer_size == 8) {
3563 new->label_pointers_8p = true;
3564 } else {
3565 fprintf(stderr,"IIT file %s has a problem with label_pointer_size being %d, expecting 4 or 8\n",
3566 filename,label_pointer_size);
3567 }
3568
3569 if (annot_pointer_size == 4) {
3570 new->annot_pointers_8p = false;
3571 } else if (annot_pointer_size == 8) {
3572 new->annot_pointers_8p = true;
3573 } else {
3574 fprintf(stderr,"IIT file %s has a problem with annot_pointer_size being %d, expecting 4 or 8\n",
3575 filename,annot_pointer_size);
3576 }
3577 }
3578
3579 /* Re-read total_nintervals */
3580 if (FREAD_INT(&new->total_nintervals,fp) < 1) {
3581 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3582 return;
3583 } else if ((offset += sizeof(int)) > filesize) {
3584 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nintervals %zu, filesize %zu). Did you generate it using iit_store?\n",
3585 filename,offset,filesize);
3586 return;
3587 }
3588 }
3589 if (new->total_nintervals < 0) {
3590 fprintf(stderr,"IIT file %s appears to have a negative number of intervals\n",filename);
3591 return;
3592 }
3593
3594 printf("version: %d\n",new->version);
3595 printf("total_nintervals: %d\n",new->total_nintervals);
3596
3597 if (new->version >= 5) {
3598 printf("label_pointer_size: %d\n",label_pointer_size);
3599 printf("annot_pointer_size: %d\n",annot_pointer_size);
3600 }
3601
3602
3603 if (FREAD_INT(&new->ntypes,fp) < 1) {
3604 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3605 return;
3606 } else if (new->ntypes < 0) {
3607 fprintf(stderr,"IIT file %s appears to have a negative number of types\n",filename);
3608 return;
3609 } else if ((offset += sizeof(int)) > filesize) {
3610 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ntypes %zu, filesize %zu). Did you generate it using iit_store?\n",
3611 filename,offset,filesize);
3612 return;
3613 }
3614 printf("ntypes: %d\n",new->ntypes);
3615
3616
3617 if (new->version < 2) {
3618 new->nfields = 0;
3619 } else {
3620 if (FREAD_INT(&new->nfields,fp) < 1) {
3621 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3622 return;
3623 } else if (new->nfields < 0) {
3624 fprintf(stderr,"IIT file %s appears to have a negative number of fields\n",filename);
3625 return;
3626 } else if ((offset += sizeof(int)) > filesize) {
3627 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nfields %zu, filesize %zu). Did you generate it using iit_store?\n",
3628 filename,offset,filesize);
3629 return;
3630 }
3631 }
3632 printf("nfields: %d\n",new->nfields);
3633
3634
3635 if (new->version <= 2) {
3636 new->ndivs = 1;
3637
3638 new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
3639 new->nintervals[0] = new->total_nintervals;
3640 new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
3641 new->cum_nintervals[0] = 0;
3642 new->cum_nintervals[1] = new->total_nintervals;
3643
3644 new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
3645 if (FREAD_INT(&(new->nnodes[0]),fp) < 1) {
3646 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3647 return;
3648 } else if (new->nnodes[0] < 0) {
3649 fprintf(stderr,"IIT file %s appears to have a negative number of nodes\n",filename);
3650 return;
3651 } else if ((offset += sizeof(int)) > filesize) {
3652 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after nnodes %zu, filesize %zu). Did you generate it using iit_store?\n",
3653 filename,offset,filesize);
3654 return;
3655 }
3656 new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
3657 new->cum_nnodes[0] = 0;
3658 new->cum_nnodes[1] = new->nnodes[0];
3659
3660 new->divsort = NO_SORT;
3661
3662 new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
3663 new->divpointers[0] = 0;
3664 new->divpointers[1] = 1;
3665
3666 new->divstrings = (char *) CALLOC(1,sizeof(char));
3667 new->divstrings[0] = '\0';
3668
3669 } else {
3670
3671 if (FREAD_INT(&new->ndivs,fp) < 1) {
3672 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3673 return;
3674 } else if (new->ndivs < 0) {
3675 fprintf(stderr,"IIT file %s appears to have a negative number of divs\n",filename);
3676 return;
3677 } else if ((offset += sizeof(int)) > filesize) {
3678 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after ndivs %zu, filesize %zu). Did you generate it using iit_store?\n",
3679 filename,offset,filesize);
3680 return;
3681 }
3682 printf("ndivs: %d\n",new->ndivs);
3683
3684 new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
3685 offset += sizeof(int)*FREAD_INTS(new->nintervals,new->ndivs,fp);
3686 printf("nintervals:");
3687 for (i = 0; i < new->ndivs; i++) {
3688 printf(" %d",new->nintervals[i]);
3689 }
3690 printf("\n");
3691
3692 new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
3693 offset += sizeof(int)*FREAD_INTS(new->cum_nintervals,new->ndivs+1,fp);
3694 printf("cum_nintervals:");
3695 for (i = 0; i <= new->ndivs; i++) {
3696 printf(" %d",new->cum_nintervals[i]);
3697 }
3698 printf("\n");
3699
3700 new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
3701 offset += sizeof(int)*FREAD_INTS(new->nnodes,new->ndivs,fp);
3702 printf("nnodes:");
3703 for (i = 0; i < new->ndivs; i++) {
3704 printf(" %d",new->nnodes[i]);
3705 }
3706 printf("\n");
3707
3708 new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
3709 offset += sizeof(int)*FREAD_INTS(new->cum_nnodes,new->ndivs+1,fp);
3710 printf("cum_nnodes:");
3711 for (i = 0; i <= new->ndivs; i++) {
3712 printf(" %d",new->cum_nnodes[i]);
3713 }
3714 printf("\n");
3715
3716 if (FREAD_INT(&new->divsort,fp) < 1) {
3717 fprintf(stderr,"IIT file %s appears to be truncated\n",filename);
3718 return;
3719 } else if (new->divsort < 0) {
3720 fprintf(stderr,"IIT file %s appears to have a negative value for divsort\n",filename);
3721 return;
3722 } else if ((offset += sizeof(int)) > filesize) {
3723 fprintf(stderr,"IIT file %s has an invalid binary format -- offset is too large (offset after divsort %zu, filesize %zu). Did you generate it using iit_store?\n",
3724 filename,offset,filesize);
3725 return;
3726 }
3727 printf("divsort: %d\n",new->divsort);
3728
3729 new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
3730 offset += sizeof(int)*FREAD_UINTS(new->divpointers,new->ndivs+1,fp);
3731 printf("divpointers:");
3732 for (i = 0; i < new->ndivs+1; i++) {
3733 printf(" %u",new->divpointers[i]);
3734 }
3735 printf("\n");
3736
3737 stringlen = new->divpointers[new->ndivs];
3738 if (stringlen == 0) {
3739 new->divstrings = (char *) NULL;
3740 } else {
3741 new->divstrings = (char *) CALLOC(stringlen,sizeof(char));
3742 offset += sizeof(char)*FREAD_CHARS(new->divstrings,stringlen,fp);
3743 }
3744 printf("divstrings:\n");
3745 for (s = 0; s < stringlen; s++) {
3746 if (new->divstrings[s] == '\0') {
3747 printf("\n");
3748 } else {
3749 printf("%c",new->divstrings[s]);
3750 }
3751 }
3752 }
3753
3754 new->alphas = (int **) CALLOC(new->ndivs,sizeof(int *));
3755 new->betas = (int **) CALLOC(new->ndivs,sizeof(int *));
3756 new->sigmas = (int **) CALLOC(new->ndivs,sizeof(int *));
3757 new->omegas = (int **) CALLOC(new->ndivs,sizeof(int *));
3758 new->nodes = (struct FNode_T **) CALLOC(new->ndivs,sizeof(struct FNode_T *));
3759 new->intervals = (struct Interval_T **) CALLOC(new->ndivs,sizeof(struct Interval_T *));
3760
3761 if (divread == READ_ALL) {
3762 /* Read all divs */
3763 debug(printf("Reading all divs\n"));
3764 for (divno = 0; divno < new->ndivs; divno++) {
3765 debug(printf("Div %d tree\n",divno));
3766 offset = read_tree(offset,filesize,fp,filename,new,divno);
3767 }
3768
3769 debug(printf("Div 0 intervals\n"));
3770 new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3771 offset = read_intervals(offset,filesize,fp,filename,new,/*divno*/0);
3772 for (divno = 1; divno < new->ndivs; divno++) {
3773 debug(printf("Div %d intervals\n",divno));
3774 new->intervals[divno] = &(new->intervals[divno-1][new->nintervals[divno-1]]);
3775 offset = read_intervals(offset,filesize,fp,filename,new,divno);
3776 }
3777
3778 } else if (divread == READ_NONE) {
3779 debug(printf("Reading no divs\n"));
3780 offset = skip_trees(offset,filesize,fp,filename,new->ndivs,
3781 new->cum_nintervals[new->ndivs],new->cum_nnodes[new->ndivs]);
3782
3783 new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3784 offset = read_intervals(offset,filesize,fp,filename,new,/*divno*/0);
3785 for (divno = 1; divno < new->ndivs; divno++) {
3786 new->intervals[divno] = &(new->intervals[divno-1][new->nintervals[divno-1]]);
3787 offset = read_intervals(offset,filesize,fp,filename,new,divno);
3788 }
3789
3790 } else if (divread == READ_ONE) {
3791 debug(printf("Reading only div %s\n",divstring));
3792 if ((desired_divno = IIT_divint(new,divstring)) < 0) {
3793 fprintf(stderr,"Cannot find div %s in IIT_read. Ignoring div.\n",divstring);
3794 desired_divno = 0;
3795 }
3796 offset = skip_trees(offset,filesize,fp,filename,desired_divno,
3797 new->cum_nintervals[desired_divno],new->cum_nnodes[desired_divno]);
3798 debug1(fprintf(stderr,"Starting read of div\n"));
3799 offset = read_tree(offset,filesize,fp,filename,new,desired_divno);
3800 debug1(fprintf(stderr,"Ending read of div\n"));
3801 offset = skip_trees(offset,filesize,fp,filename,new->ndivs - (desired_divno + 1),
3802 new->cum_nintervals[new->ndivs] - new->cum_nintervals[desired_divno+1],
3803 new->cum_nnodes[new->ndivs] - new->cum_nnodes[desired_divno+1]);
3804
3805 new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
3806 offset = skip_intervals(&skip_nintervals,offset,filesize,fp,filename,new,0,desired_divno-1);
3807 debug1(fprintf(stderr,"Starting read of intervals\n"));
3808 new->intervals[desired_divno] = &(new->intervals[0][skip_nintervals]);
3809 offset = read_intervals(offset,filesize,fp,filename,new,desired_divno);
3810 debug1(fprintf(stderr,"Ending read of intervals\n"));
3811 offset = skip_intervals(&skip_nintervals,offset,filesize,fp,filename,new,desired_divno+1,new->ndivs-1);
3812
3813 debug(
3814 /*
3815 printf("sigmas[%d]:\n",desired_divno);
3816 for (i = 0; i < new->nintervals[desired_divno]+1; i++) {
3817 interval = &(new->intervals[desired_divno][new->sigmas[desired_divno][i]]);
3818 printf("%d %u..%u\n",new->sigmas[desired_divno][i],Interval_low(interval),Interval_high(interval));
3819 }
3820 printf("\n");
3821 */
3822
3823 printf("alphas[%d]:\n",desired_divno);
3824 for (i = 0; i < new->nintervals[desired_divno]+1; i++) {
3825 interval = &(new->intervals[desired_divno][new->alphas[desired_divno][i]]);
3826 printf("%d %u..%u\n",new->alphas[desired_divno][i],Interval_low(interval),Interval_high(interval));
3827 }
3828 printf("\n");
3829 );
3830
3831
3832 } else {
3833 abort();
3834 }
3835
3836 read_words_debug(offset,filesize,fp,new);
3837 fclose(fp);
3838
3839 #ifndef HAVE_MMAP
3840 debug1(printf("No mmap available. Reading annotations\n"));
3841 new->access = FILEIO;
3842 new->fd = Access_fileio(filename);
3843 read_annotations(new);
3844 close(new->fd);
3845 /* pthread_mutex_init(&new->read_mutex,NULL); */
3846 #else
3847 debug1(printf("mmap available. Setting up pointers to annotations\n"));
3848 new->access = MMAPPED;
3849 if (mmap_annotations(filename,new,/*readonlyp*/true) == false) {
3850 debug1(printf(" Failed. Reading annotations\n"));
3851 new->access = FILEIO;
3852 new->fd = Access_fileio(filename);
3853 read_annotations(new);
3854 close(new->fd);
3855 /* pthread_mutex_init(&new->read_mutex,NULL); */
3856 }
3857 #endif
3858
3859 IIT_free(&new);
3860
3861 if (newfile != NULL) {
3862 FREE(newfile);
3863 }
3864
3865 return;
3866 }
3867
3868
3869 /************************************************************************/
3870
3871 static void
fnode_query_aux(int * min,int * max,T this,int divno,int nodeindex,Chrpos_T x)3872 fnode_query_aux (int *min, int *max, T this, int divno, int nodeindex, Chrpos_T x) {
3873 int lambda;
3874 FNode_T node;
3875
3876 if (nodeindex == -1) {
3877 return;
3878 }
3879
3880 node = &(this->nodes[divno][nodeindex]);
3881 if (x == node->value) {
3882 debug(printf("%uD:\n",node->value));
3883 if (node->a < *min) {
3884 *min = node->a;
3885 }
3886 if (node->b > *max) {
3887 *max = node->b;
3888 }
3889 return;
3890 } else if (x < node->value) {
3891 fnode_query_aux(&(*min),&(*max),this,divno,node->leftindex,x);
3892 debug(printf("%uL:\n",node->value));
3893 if (node->a < *min) {
3894 *min = node->a;
3895 }
3896 for (lambda = node->a; lambda <= node->b; lambda++) {
3897 debug(printf("Looking at lambda %d, segment %d\n",
3898 lambda,this->sigmas[divno][lambda]));
3899 if (Interval_is_contained(x,this->intervals[divno],this->sigmas[divno][lambda]) == true) {
3900 if (lambda > *max) {
3901 *max = lambda;
3902 }
3903 } else {
3904 return;
3905 }
3906 }
3907 return;
3908 } else {
3909 /* (node->value < x) */
3910 fnode_query_aux(&(*min),&(*max),this,divno,node->rightindex,x);
3911 debug(printf("%uR:\n", node->value));
3912 if (node->b > *max) {
3913 *max = node->b;
3914 }
3915 for (lambda = node->b; lambda >= node->a; lambda--) {
3916 debug(printf("Looking at lambda %d, segment %d\n",
3917 lambda,this->omegas[divno][lambda]));
3918 if (Interval_is_contained(x,this->intervals[divno],this->omegas[divno][lambda]) == true) {
3919 if (lambda < *min) {
3920 *min = lambda;
3921 }
3922 } else {
3923 return;
3924 }
3925 }
3926 return;
3927 }
3928 }
3929
3930 /************************************************************************/
3931
3932 int *
IIT_find(int * nmatches,T this,char * label)3933 IIT_find (int *nmatches, T this, char *label) {
3934 int *matches = NULL, j;
3935 int low, middle, high, recno;
3936 bool foundp = false;
3937 int cmp;
3938
3939 low = 0;
3940 high = this->total_nintervals;
3941 *nmatches = 0;
3942
3943 while (!foundp && low < high) {
3944 middle = (low+high)/2;
3945
3946 #ifdef WORDS_BIGENDIAN
3947 #ifdef HAVE_64_BIT
3948 if (this->label_pointers_8p == true) {
3949 cmp = strcmp(label,&(this->labels[Bigendian_convert_uint8(this->labelpointers8[Bigendian_convert_int(this->labelorder[middle])])]));
3950 } else {
3951 cmp = strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[middle])])]));
3952 }
3953 #else
3954 cmp = strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[middle])])]));
3955 #endif
3956 #else
3957 #ifdef HAVE_64_BIT
3958 if (this->label_pointers_8p == true) {
3959 cmp = strcmp(label,&(this->labels[this->labelpointers8[this->labelorder[middle]]]));
3960 } else {
3961 cmp = strcmp(label,&(this->labels[this->labelpointers[this->labelorder[middle]]]));
3962 }
3963 #else
3964 cmp = strcmp(label,&(this->labels[this->labelpointers[this->labelorder[middle]]]));
3965 #endif
3966 #endif
3967
3968 if (cmp < 0) {
3969 high = middle;
3970 } else if (cmp > 0) {
3971 low = middle + 1;
3972 } else {
3973 foundp = true;
3974 }
3975 }
3976
3977 if (foundp == true) {
3978 low = middle;
3979 #ifdef WORDS_BIGENDIAN
3980 #ifdef HAVE_64_BIT
3981 if (this->label_pointers_8p == true) {
3982 while (low-1 >= 0 &&
3983 !strcmp(label,&(this->labels[Bigendian_convert_uint8(this->labelpointers8[Bigendian_convert_int(this->labelorder[low-1])])]))) {
3984 low--;
3985 }
3986 } else {
3987 while (low-1 >= 0 &&
3988 !strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[low-1])])]))) {
3989 low--;
3990 }
3991 }
3992 #else
3993 while (low-1 >= 0 &&
3994 !strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[low-1])])]))) {
3995 low--;
3996 }
3997 #endif
3998 #else
3999 #ifdef HAVE_64_BIT
4000 if (this->label_pointers_8p == true) {
4001 while (low-1 >= 0 &&
4002 !strcmp(label,&(this->labels[this->labelpointers8[this->labelorder[low-1]]]))) {
4003 low--;
4004 }
4005 } else {
4006 while (low-1 >= 0 &&
4007 !strcmp(label,&(this->labels[this->labelpointers[this->labelorder[low-1]]]))) {
4008 low--;
4009 }
4010 }
4011 #else
4012 while (low-1 >= 0 &&
4013 !strcmp(label,&(this->labels[this->labelpointers[this->labelorder[low-1]]]))) {
4014 low--;
4015 }
4016 #endif
4017 #endif
4018
4019 high = middle;
4020 #ifdef WORDS_BIGENDIAN
4021 #ifdef HAVE_64_BIT
4022 if (this->label_pointers_8p == true) {
4023 while (high+1 < this->total_nintervals &&
4024 !strcmp(label,&(this->labels[Bigendian_convert_uint8(this->labelpointers8[Bigendian_convert_int(this->labelorder[high+1])])]))) {
4025 high++;
4026 }
4027 } else {
4028 while (high+1 < this->total_nintervals &&
4029 !strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[high+1])])]))) {
4030 high++;
4031 }
4032 }
4033 #else
4034 while (high+1 < this->total_nintervals &&
4035 !strcmp(label,&(this->labels[Bigendian_convert_uint(this->labelpointers[Bigendian_convert_int(this->labelorder[high+1])])]))) {
4036 high++;
4037 }
4038 #endif
4039 #else
4040 #ifdef HAVE_64_BIT
4041 if (this->label_pointers_8p == true) {
4042 while (high+1 < this->total_nintervals &&
4043 !strcmp(label,&(this->labels[this->labelpointers8[this->labelorder[high+1]]]))) {
4044 high++;
4045 }
4046 } else {
4047 while (high+1 < this->total_nintervals &&
4048 !strcmp(label,&(this->labels[this->labelpointers[this->labelorder[high+1]]]))) {
4049 high++;
4050 }
4051 }
4052 #else
4053 while (high+1 < this->total_nintervals &&
4054 !strcmp(label,&(this->labels[this->labelpointers[this->labelorder[high+1]]]))) {
4055 high++;
4056 }
4057 #endif
4058 #endif
4059
4060
4061 *nmatches = high - low + 1;
4062 if (*nmatches > 0) {
4063 matches = (int *) CALLOC(*nmatches,sizeof(int));
4064 j = 0;
4065 for (recno = low; recno <= high; recno++) {
4066 #ifdef WORDS_BIGENDIAN
4067 #if 0
4068 printf("Pushing %d:%d\n",recno,Bigendian_convert_int(this->labelorder[recno]));
4069 #endif
4070 matches[j++] = Bigendian_convert_int(this->labelorder[recno])+1;
4071
4072 #else
4073 #if 0
4074 printf("Pushing %d:%d\n",recno,this->labelorder[recno]);
4075 #endif
4076 matches[j++] = this->labelorder[recno]+1;
4077 #endif
4078 }
4079 }
4080 }
4081
4082 return matches;
4083 }
4084
4085 /* Slow. Used before binary search method above. */
4086 int
IIT_find_linear(T this,char * label)4087 IIT_find_linear (T this, char *label) {
4088 int i;
4089 char *p;
4090
4091 for (i = 0; i < this->total_nintervals; i++) {
4092 #ifdef WORDS_BIGENDIAN
4093 #ifdef HAVE_64_BIT
4094 if (this->label_pointers_8p == true) {
4095 p = &(this->labels[Bigendian_convert_uint8(this->labelpointers8[i])]);
4096 } else {
4097 p = &(this->labels[Bigendian_convert_uint(this->labelpointers[i])]);
4098 }
4099 #else
4100 p = &(this->labels[Bigendian_convert_uint(this->labelpointers[i])]);
4101 #endif
4102 #else
4103 #ifdef HAVE_64_BIT
4104 if (this->label_pointers_8p == true) {
4105 p = &(this->labels[this->labelpointers8[i]]);
4106 } else {
4107 p = &(this->labels[this->labelpointers[i]]);
4108 }
4109 #else
4110 p = &(this->labels[this->labelpointers[i]]);
4111 #endif
4112 #endif
4113 while (isspace((int) *p)) {
4114 p++;
4115 }
4116 if (!strcmp(label,p)) {
4117 return i + 1;
4118 }
4119 }
4120
4121 return -1;
4122 }
4123
4124 int
IIT_find_one(T this,char * label)4125 IIT_find_one (T this, char *label) {
4126 int index;
4127 int *matches, nmatches;
4128
4129 matches = IIT_find(&nmatches,this,label);
4130 if (nmatches == 0) {
4131 /*
4132 fprintf(stderr,"Expected one match for %s, but got 0\n",
4133 label);
4134 */
4135 index = -1;
4136 } else {
4137 if (nmatches > 1) {
4138 fprintf(stderr,"Expected one match for %s, but got %d\n",
4139 label,nmatches);
4140 }
4141 index = matches[0];
4142 FREE(matches);
4143 }
4144
4145 return index;
4146 }
4147
4148
4149 /************************************************************************/
4150
4151
4152 static int
int_compare(const void * a,const void * b)4153 int_compare (const void *a, const void *b) {
4154 int x = * (int *) a;
4155 int y = * (int *) b;
4156
4157 if (x < y) {
4158 return -1;
4159 } else if (y < x) {
4160 return +1;
4161 } else {
4162 return 0;
4163 }
4164 }
4165
4166
4167 static int
uint_compare_ascending(const void * a,const void * b)4168 uint_compare_ascending (const void *a, const void *b) {
4169 unsigned int x = * (unsigned int *) a;
4170 unsigned int y = * (unsigned int *) b;
4171
4172 if (x < y) {
4173 return -1;
4174 } else if (y < x) {
4175 return +1;
4176 } else {
4177 return 0;
4178 }
4179 }
4180
4181
4182 static int
uint_compare_descending(const void * a,const void * b)4183 uint_compare_descending (const void *a, const void *b) {
4184 unsigned int x = * (unsigned int *) a;
4185 unsigned int y = * (unsigned int *) b;
4186
4187 if (x > y) {
4188 return -1;
4189 } else if (y > x) {
4190 return +1;
4191 } else {
4192 return 0;
4193 }
4194 }
4195
4196
4197 Chrpos_T *
IIT_get_highs_for_low(int * nuniq,T this,int divno,Chrpos_T x)4198 IIT_get_highs_for_low (int *nuniq, T this, int divno, Chrpos_T x) {
4199 Chrpos_T *uniq = NULL, *coords = NULL, prev;
4200 int neval, ncoords, i;
4201 int match, lambda, min1, max1 = 0;
4202 struct Interval_T interval;
4203
4204 if (divno < 0) {
4205 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4206 *nuniq = 0;
4207 return (Chrpos_T *) NULL;
4208 }
4209 min1 = this->nintervals[divno] + 1;
4210
4211 debug(printf("Entering IIT_get_highs_for_low with divno %d and query %u\n",divno,x));
4212 fnode_query_aux(&min1,&max1,this,divno,0,x);
4213 debug(printf("min1=%d max1=%d\n",min1,max1));
4214
4215 if (max1 < min1) {
4216 *nuniq = 0;
4217 return (Chrpos_T *) NULL;
4218 } else {
4219 neval = (max1 - min1 + 1) + (max1 - min1 + 1);
4220 coords = (Chrpos_T *) CALLOC(neval,sizeof(Chrpos_T));
4221 ncoords = 0;
4222
4223 for (lambda = min1; lambda <= max1; lambda++) {
4224 match = this->sigmas[divno][lambda];
4225 /* Have to subtract 1 because intervals array is zero-based */
4226 interval = this->intervals[divno][match - 1];
4227 if (interval.low == x) {
4228 coords[ncoords++] = interval.high;
4229 }
4230
4231 match = this->omegas[divno][lambda];
4232 /* Have to subtract 1 because intervals array is zero-based */
4233 interval = this->intervals[divno][match - 1];
4234 if (interval.low == x) {
4235 coords[ncoords++] = interval.high;
4236 }
4237 }
4238
4239 if (ncoords == 0) {
4240 *nuniq = 0;
4241 FREE(coords);
4242 return (Chrpos_T *) NULL;
4243
4244 } else {
4245 /* Eliminate duplicates */
4246 qsort(coords,ncoords,sizeof(Chrpos_T),uint_compare_ascending);
4247
4248 uniq = (Chrpos_T *) CALLOC(ncoords,sizeof(Chrpos_T));
4249 *nuniq = 0;
4250 prev = 0;
4251 for (i = 0; i < ncoords; i++) {
4252 if (coords[i] != prev) {
4253 uniq[(*nuniq)++] = coords[i];
4254 prev = coords[i];
4255 }
4256 }
4257
4258 FREE(coords);
4259 return uniq;
4260 }
4261 }
4262 }
4263
4264
4265 Chrpos_T *
IIT_get_lows_for_high(int * nuniq,T this,int divno,Chrpos_T x)4266 IIT_get_lows_for_high (int *nuniq, T this, int divno, Chrpos_T x) {
4267 Chrpos_T *uniq = NULL, *coords = NULL, prev;
4268 int neval, ncoords, i;
4269 int match, lambda, min1, max1 = 0;
4270 struct Interval_T interval;
4271
4272 if (divno < 0) {
4273 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4274 *nuniq = 0;
4275 return (Chrpos_T *) NULL;
4276 }
4277 min1 = this->nintervals[divno] + 1;
4278
4279 debug(printf("Entering IIT_get_lows_for_high with divno %d and query %u\n",divno,x));
4280 fnode_query_aux(&min1,&max1,this,divno,0,x);
4281 debug(printf("min1=%d max1=%d\n",min1,max1));
4282
4283 if (max1 < min1) {
4284 *nuniq = 0;
4285 return (Chrpos_T *) NULL;
4286 } else {
4287 neval = (max1 - min1 + 1) + (max1 - min1 + 1);
4288 coords = (Chrpos_T *) CALLOC(neval,sizeof(Chrpos_T));
4289 ncoords = 0;
4290
4291 for (lambda = min1; lambda <= max1; lambda++) {
4292 match = this->sigmas[divno][lambda];
4293 /* Have to subtract 1 because intervals array is zero-based */
4294 interval = this->intervals[divno][match - 1];
4295 if (interval.high == x) {
4296 coords[ncoords++] = interval.low;
4297 }
4298
4299 match = this->omegas[divno][lambda];
4300 /* Have to subtract 1 because intervals array is zero-based */
4301 interval = this->intervals[divno][match - 1];
4302 if (interval.high == x) {
4303 coords[ncoords++] = interval.low;
4304 }
4305 }
4306
4307 if (ncoords == 0) {
4308 *nuniq = 0;
4309 FREE(coords);
4310 return (Chrpos_T *) NULL;
4311
4312 } else {
4313 /* Eliminate duplicates */
4314 qsort(coords,ncoords,sizeof(Chrpos_T),uint_compare_descending);
4315
4316 uniq = (Chrpos_T *) CALLOC(ncoords,sizeof(Chrpos_T));
4317 *nuniq = 0;
4318 prev = 0;
4319 for (i = 0; i < ncoords; i++) {
4320 if (coords[i] != prev) {
4321 uniq[(*nuniq)++] = coords[i];
4322 prev = coords[i];
4323 }
4324 }
4325
4326 FREE(coords);
4327 return uniq;
4328 }
4329 }
4330 }
4331
4332
4333 bool
IIT_low_exists_signed_p(T this,int divno,Chrpos_T x,int sign)4334 IIT_low_exists_signed_p (T this, int divno, Chrpos_T x, int sign) {
4335 int match, lambda, min1, max1 = 0;
4336 struct Interval_T interval;
4337
4338 if (divno < 0) {
4339 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4340 return false;
4341 }
4342 min1 = this->nintervals[divno] + 1;
4343
4344 debug(printf("Entering IIT_get_highs_for_low with divno %d and query %u\n",divno,x));
4345 fnode_query_aux(&min1,&max1,this,divno,0,x);
4346 debug(printf("min1=%d max1=%d\n",min1,max1));
4347
4348 if (max1 < min1) {
4349 return false;
4350 } else {
4351 for (lambda = min1; lambda <= max1; lambda++) {
4352 match = this->sigmas[divno][lambda];
4353 /* Have to subtract 1 because intervals array is zero-based */
4354 interval = this->intervals[divno][match - 1];
4355 if (interval.low == x && (sign == 0 || interval.sign == sign)) {
4356 return true;
4357 }
4358
4359 match = this->omegas[divno][lambda];
4360 /* Have to subtract 1 because intervals array is zero-based */
4361 interval = this->intervals[divno][match - 1];
4362 if (interval.low == x && (sign == 0 || interval.sign == sign)) {
4363 return true;
4364 }
4365 }
4366
4367 return false;
4368 }
4369 }
4370
4371 bool
IIT_high_exists_signed_p(T this,int divno,Chrpos_T x,int sign)4372 IIT_high_exists_signed_p (T this, int divno, Chrpos_T x, int sign) {
4373 int match, lambda, min1, max1 = 0;
4374 struct Interval_T interval;
4375
4376 if (divno < 0) {
4377 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4378 return false;
4379 }
4380 min1 = this->nintervals[divno] + 1;
4381
4382 debug(printf("Entering IIT_get_lows_for_high with divno %d and query %u\n",divno,x));
4383 fnode_query_aux(&min1,&max1,this,divno,0,x);
4384 debug(printf("min1=%d max1=%d\n",min1,max1));
4385
4386 if (max1 < min1) {
4387 return false;
4388 } else {
4389 for (lambda = min1; lambda <= max1; lambda++) {
4390 match = this->sigmas[divno][lambda];
4391 /* Have to subtract 1 because intervals array is zero-based */
4392 interval = this->intervals[divno][match - 1];
4393 if (interval.high == x && (sign == 0 || interval.sign == sign)) {
4394 return true;
4395 }
4396
4397 match = this->omegas[divno][lambda];
4398 /* Have to subtract 1 because intervals array is zero-based */
4399 interval = this->intervals[divno][match - 1];
4400 if (interval.high == x && (sign == 0 || interval.sign == sign)) {
4401 return true;
4402 }
4403 }
4404
4405 return false;
4406 }
4407 }
4408
4409
4410 int *
IIT_get_lows_signed(int * nmatches,T this,int divno,Chrpos_T x,Chrpos_T y,int sign)4411 IIT_get_lows_signed (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y, int sign) {
4412 int *uniq = NULL, *matches, matchstart, neval, nfound, i;
4413 int match, lambda, prev;
4414 int min1, max1 = 0, min2, max2 = 0;
4415 struct Interval_T interval;
4416
4417 if (divno < 0) {
4418 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4419 *nmatches = 0;
4420 return (int *) NULL;
4421 } else {
4422 min1 = min2 = this->nintervals[divno] + 1;
4423 }
4424
4425 debug(printf("Entering IIT_low_signed_p with divno %d and query %u..%u\n",divno,x,y));
4426 fnode_query_aux(&min1,&max1,this,divno,0,x);
4427 fnode_query_aux(&min2,&max2,this,divno,0,y);
4428 debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
4429
4430 *nmatches = 0;
4431 if (max2 >= min1) {
4432 neval = (max2 - min1 + 1) + (max2 - min1 + 1);
4433 matches = (int *) CALLOC(neval,sizeof(int));
4434
4435 nfound = 0;
4436 for (lambda = min1; lambda <= max2; lambda++) {
4437 match = this->sigmas[divno][lambda];
4438 /* Have to subtract 1 because intervals array is zero-based */
4439 interval = this->intervals[divno][match - 1];
4440 if (interval.low >= x && interval.low <= y && (sign == 0 || interval.sign == sign)) {
4441 matches[nfound++] = match;
4442 }
4443
4444 match = this->omegas[divno][lambda];
4445 /* Have to subtract 1 because intervals array is zero-based */
4446 interval = this->intervals[divno][match - 1];
4447 if (interval.low >= x && interval.low <= y && (sign == 0 || interval.sign == sign)) {
4448 matches[nfound++] = match;
4449 }
4450 }
4451
4452 if (nfound == 0) {
4453 FREE(matches);
4454 return (int *) NULL;
4455 } else {
4456 /* Eliminate duplicates */
4457 uniq = (int *) CALLOC(nfound,sizeof(int));
4458 qsort(matches,nfound,sizeof(int),int_compare);
4459 prev = 0;
4460 debug(printf("unique segments in lambda %d to %d:",min1,max2));
4461 for (i = 0; i < nfound; i++) {
4462 if (matches[i] != prev) {
4463 debug(printf(" %d",matches[i]));
4464 uniq[(*nmatches)++] = matches[i];
4465 prev = matches[i];
4466 }
4467 }
4468 debug(printf("\n"));
4469 FREE(matches);
4470
4471 /* No need to check for interval overlap */
4472 }
4473 }
4474
4475 matchstart = this->cum_nintervals[divno];
4476 for (i = 0; i < *nmatches; i++) {
4477 uniq[i] += matchstart;
4478 }
4479
4480 return uniq;
4481 }
4482
4483
4484 int *
IIT_get_highs_signed(int * nmatches,T this,int divno,Chrpos_T x,Chrpos_T y,int sign)4485 IIT_get_highs_signed (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y, int sign) {
4486 int *uniq = NULL, *matches, matchstart, neval, nfound, i;
4487 int match, lambda, prev;
4488 int min1, max1 = 0, min2, max2 = 0;
4489 struct Interval_T interval;
4490
4491 if (divno < 0) {
4492 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4493 *nmatches = 0;
4494 return (int *) NULL;
4495 } else {
4496 min1 = min2 = this->nintervals[divno] + 1;
4497 }
4498
4499 debug(printf("Entering IIT_low_signed_p with divno %d and query %u..%u\n",divno,x,y));
4500 fnode_query_aux(&min1,&max1,this,divno,0,x);
4501 fnode_query_aux(&min2,&max2,this,divno,0,y);
4502 debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
4503
4504 *nmatches = 0;
4505 if (max2 >= min1) {
4506 neval = (max2 - min1 + 1) + (max2 - min1 + 1);
4507 matches = (int *) CALLOC(neval,sizeof(int));
4508
4509 nfound = 0;
4510 for (lambda = min1; lambda <= max2; lambda++) {
4511 match = this->sigmas[divno][lambda];
4512 /* Have to subtract 1 because intervals array is zero-based */
4513 interval = this->intervals[divno][match - 1];
4514 if (interval.high >= x && interval.high <= y && (sign == 0 || interval.sign == sign)) {
4515 matches[nfound++] = match;
4516 }
4517
4518 match = this->omegas[divno][lambda];
4519 /* Have to subtract 1 because intervals array is zero-based */
4520 interval = this->intervals[divno][match - 1];
4521 if (interval.high >= x && interval.high <= y && (sign == 0 || interval.sign == sign)) {
4522 matches[nfound++] = match;
4523 }
4524 }
4525
4526 if (nfound == 0) {
4527 FREE(matches);
4528 return (int *) NULL;
4529 } else {
4530 /* Eliminate duplicates */
4531 uniq = (int *) CALLOC(nfound,sizeof(int));
4532 qsort(matches,nfound,sizeof(int),int_compare);
4533 prev = 0;
4534 debug(printf("unique segments in lambda %d to %d:",min1,max2));
4535 for (i = 0; i < nfound; i++) {
4536 if (matches[i] != prev) {
4537 debug(printf(" %d",matches[i]));
4538 uniq[(*nmatches)++] = matches[i];
4539 prev = matches[i];
4540 }
4541 }
4542 debug(printf("\n"));
4543 FREE(matches);
4544
4545 /* No need to check for interval overlap */
4546 }
4547 }
4548
4549 matchstart = this->cum_nintervals[divno];
4550 for (i = 0; i < *nmatches; i++) {
4551 uniq[i] += matchstart;
4552 }
4553
4554 return uniq;
4555 }
4556
4557
4558
4559 int *
IIT_get(int * nmatches,T this,char * divstring,Chrpos_T x,Chrpos_T y,bool sortp)4560 IIT_get (int *nmatches, T this, char *divstring, Chrpos_T x, Chrpos_T y, bool sortp) {
4561 int *sorted, *matches = NULL, matchstart, *uniq, neval, nuniq, i;
4562 int lambda, prev;
4563 int divno;
4564 int min1, max1 = 0, min2, max2 = 0;
4565 int nintervals;
4566
4567 divno = IIT_divint(this,divstring);
4568
4569 #if 1
4570 /* Usually don't need to check, unless crossing between iits,
4571 because divstring comes from same iit */
4572 if (divno < 0) {
4573 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4574 *nmatches = 0;
4575 return (int *) NULL;
4576 }
4577 #endif
4578
4579 if ((nintervals = this->nintervals[divno]) == 0) {
4580 *nmatches = 0;
4581 return (int *) NULL;
4582 } else {
4583 min1 = min2 = nintervals + 1;
4584 }
4585
4586 debug(printf("Entering IIT_get with query %u %u\n",x,y));
4587 fnode_query_aux(&min1,&max1,this,divno,0,x);
4588 fnode_query_aux(&min2,&max2,this,divno,0,y);
4589 debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
4590
4591 *nmatches = 0;
4592 if (max2 >= min1) {
4593 neval = (max2 - min1 + 1) + (max2 - min1 + 1);
4594 matches = (int *) CALLOC(neval,sizeof(int));
4595 uniq = (int *) CALLOC(neval,sizeof(int));
4596
4597 i = 0;
4598 for (lambda = min1; lambda <= max2; lambda++) {
4599 matches[i++] = this->sigmas[divno][lambda];
4600 matches[i++] = this->omegas[divno][lambda];
4601 }
4602
4603 /* Eliminate duplicates */
4604 qsort(matches,neval,sizeof(int),int_compare);
4605 nuniq = 0;
4606 prev = 0;
4607 debug(printf("unique segments in lambda %d to %d:",min1,max2));
4608 for (i = 0; i < neval; i++) {
4609 if (matches[i] != prev) {
4610 debug(printf(" %d",matches[i]));
4611 uniq[nuniq++] = matches[i];
4612 prev = matches[i];
4613 }
4614 }
4615 debug(printf("\n"));
4616
4617 for (i = 0; i < nuniq; i++) {
4618 if (Interval_overlap_p(x,y,this->intervals[divno],uniq[i]) == true) {
4619 matches[(*nmatches)++] = uniq[i];
4620 debug(printf("Pushing overlapping segment %d (%u..%u)\n",uniq[i],
4621 Interval_low(&(this->intervals[divno][uniq[i]-1])),
4622 Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4623 } else {
4624 debug(printf("Not pushing non-overlapping segment %d (%u..%u)\n",uniq[i],
4625 Interval_low(&(this->intervals[divno][uniq[i]-1])),
4626 Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4627 }
4628 }
4629
4630 FREE(uniq);
4631 }
4632
4633 /* Convert to universal indices */
4634 matchstart = this->cum_nintervals[divno];
4635 for (i = 0; i < *nmatches; i++) {
4636 matches[i] += matchstart;
4637 }
4638
4639 if (sortp == false) {
4640 return matches;
4641 #if 0
4642 } else if (this->version <= 2) {
4643 sorted = sort_matches_by_type(this,matches,*nmatches,/*alphabetizep*/true);
4644 FREE(matches);
4645 return sorted;
4646 #endif
4647 } else {
4648 sorted = sort_matches_by_position(this,matches,*nmatches);
4649 FREE(matches);
4650 return sorted;
4651 }
4652 }
4653
4654
4655 int *
IIT_get_signed(int * nmatches,T this,char * divstring,Chrpos_T x,Chrpos_T y,int sign,bool sortp)4656 IIT_get_signed (int *nmatches, T this, char *divstring, Chrpos_T x, Chrpos_T y, int sign, bool sortp) {
4657 int *sorted, *matches = NULL, matchstart, *uniq, neval, nuniq, i;
4658 int lambda, prev;
4659 int divno;
4660 int min1, max1 = 0, min2, max2 = 0;
4661 int nintervals;
4662 int index;
4663
4664 divno = IIT_divint(this,divstring);
4665
4666 #if 1
4667 /* Usually don't need to check, unless crossing between iits,
4668 because divstring comes from same iit */
4669 if (divno < 0) {
4670 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4671 *nmatches = 0;
4672 return (int *) NULL;
4673 }
4674 #endif
4675
4676 if ((nintervals = this->nintervals[divno]) == 0) {
4677 *nmatches = 0;
4678 return (int *) NULL;
4679 } else {
4680 min1 = min2 = nintervals + 1;
4681 }
4682
4683 debug(printf("Entering IIT_get with query %u %u\n",x,y));
4684 fnode_query_aux(&min1,&max1,this,divno,0,x);
4685 fnode_query_aux(&min2,&max2,this,divno,0,y);
4686 debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
4687
4688 *nmatches = 0;
4689 if (max2 >= min1) {
4690 neval = (max2 - min1 + 1) + (max2 - min1 + 1);
4691 matches = (int *) CALLOC(neval,sizeof(int));
4692 uniq = (int *) CALLOC(neval,sizeof(int));
4693
4694 i = 0;
4695 for (lambda = min1; lambda <= max2; lambda++) {
4696 index = this->sigmas[divno][lambda];
4697 if (sign == 0 || Interval_sign(&(this->intervals[divno][index-1])) == sign) {
4698 matches[i++] = index;
4699 }
4700 index = this->omegas[divno][lambda];
4701 if (sign == 0 || Interval_sign(&(this->intervals[divno][index-1])) == sign) {
4702 matches[i++] = index;
4703 }
4704 }
4705
4706 /* Eliminate duplicates */
4707 qsort(matches,neval,sizeof(int),int_compare);
4708 nuniq = 0;
4709 prev = 0;
4710 debug(printf("unique segments in lambda %d to %d:",min1,max2));
4711 for (i = 0; i < neval; i++) {
4712 if (matches[i] != prev) {
4713 debug(printf(" %d",matches[i]));
4714 uniq[nuniq++] = matches[i];
4715 prev = matches[i];
4716 }
4717 }
4718 debug(printf("\n"));
4719
4720 for (i = 0; i < nuniq; i++) {
4721 if (Interval_overlap_p(x,y,this->intervals[divno],uniq[i]) == true) {
4722 matches[(*nmatches)++] = uniq[i];
4723 debug(printf("Pushing overlapping segment %d (%u..%u)\n",uniq[i],
4724 Interval_low(&(this->intervals[divno][uniq[i]-1])),
4725 Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4726 } else {
4727 debug(printf("Not pushing non-overlapping segment %d (%u..%u)\n",uniq[i],
4728 Interval_low(&(this->intervals[divno][uniq[i]-1])),
4729 Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4730 }
4731 }
4732
4733 FREE(uniq);
4734 }
4735
4736 /* Convert to universal indices */
4737 matchstart = this->cum_nintervals[divno];
4738 for (i = 0; i < *nmatches; i++) {
4739 matches[i] += matchstart;
4740 }
4741
4742 if (sortp == false) {
4743 return matches;
4744 #if 0
4745 } else if (this->version <= 2) {
4746 sorted = sort_matches_by_type(this,matches,*nmatches,/*alphabetizep*/true);
4747 FREE(matches);
4748 return sorted;
4749 #endif
4750 } else {
4751 sorted = sort_matches_by_position(this,matches,*nmatches);
4752 FREE(matches);
4753 return sorted;
4754 }
4755 }
4756
4757
4758 bool
IIT_exists_with_divno(T this,int divno,Chrpos_T x,Chrpos_T y)4759 IIT_exists_with_divno (T this, int divno, Chrpos_T x, Chrpos_T y) {
4760 int match;
4761 int lambda;
4762 int min1, max1 = 0, min2, max2 = 0;
4763
4764 if (divno < 0) {
4765 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4766 return false;
4767 }
4768 min1 = min2 = this->nintervals[divno] + 1;
4769
4770 debug(printf("Entering IIT_get_with_divno with divno %d and query %u %u\n",divno,x,y));
4771 fnode_query_aux(&min1,&max1,this,divno,0,x);
4772 fnode_query_aux(&min2,&max2,this,divno,0,y);
4773 debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
4774
4775 for (lambda = min1; lambda <= max2; lambda++) {
4776 match = this->sigmas[divno][lambda];
4777 if (Interval_overlap_p(x,y,this->intervals[divno],match) == true) {
4778 return true;
4779 }
4780 match = this->omegas[divno][lambda];
4781 if (Interval_overlap_p(x,y,this->intervals[divno],match) == true) {
4782 return true;
4783 }
4784 }
4785
4786 return false;
4787 }
4788
4789
4790 bool
IIT_exists_with_divno_signed(T this,int divno,Chrpos_T x,Chrpos_T y,int sign)4791 IIT_exists_with_divno_signed (T this, int divno, Chrpos_T x, Chrpos_T y, int sign) {
4792 int match;
4793 int lambda;
4794 int min1, max1 = 0, min2, max2 = 0;
4795 Interval_T interval;
4796
4797 if (divno < 0) {
4798 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4799 return false;
4800 }
4801 min1 = min2 = this->nintervals[divno] + 1;
4802
4803 debug(printf("Entering IIT_exists_with_divno_signed with divno %d and query %u %u\n",divno,x,y));
4804 fnode_query_aux(&min1,&max1,this,divno,0,x);
4805 fnode_query_aux(&min2,&max2,this,divno,0,y);
4806 debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
4807
4808 for (lambda = min1; lambda <= max2; lambda++) {
4809 match = this->sigmas[divno][lambda];
4810 interval = &(this->intervals[divno][match - 1]);
4811 if (Interval_low(interval) == x && Interval_high(interval) == y &&
4812 (sign == 0 || Interval_sign(interval) == sign)) {
4813 return true;
4814 }
4815
4816 match = this->omegas[divno][lambda];
4817 interval = &(this->intervals[divno][match - 1]);
4818 if (Interval_low(interval) == x && Interval_high(interval) == y &&
4819 (sign == 0 || Interval_sign(interval) == sign)) {
4820 return true;
4821 }
4822 }
4823
4824 return false;
4825 }
4826
4827
4828 bool
IIT_exists_with_divno_typed_signed(T this,int divno,Chrpos_T x,Chrpos_T y,int type,int sign)4829 IIT_exists_with_divno_typed_signed (T this, int divno, Chrpos_T x, Chrpos_T y, int type, int sign) {
4830 int match;
4831 int lambda;
4832 int min1, max1 = 0, min2, max2 = 0;
4833 Interval_T interval;
4834
4835 if (divno < 0) {
4836 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4837 return false;
4838 }
4839 min1 = min2 = this->nintervals[divno] + 1;
4840
4841 debug(printf("Entering IIT_exists_with_divno_typed_signed with divno %d and query %u %u\n",divno,x,y));
4842 fnode_query_aux(&min1,&max1,this,divno,0,x);
4843 fnode_query_aux(&min2,&max2,this,divno,0,y);
4844 debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
4845
4846 for (lambda = min1; lambda <= max2; lambda++) {
4847 match = this->sigmas[divno][lambda];
4848 interval = &(this->intervals[divno][match - 1]);
4849 if (Interval_low(interval) == x && Interval_high(interval) == y &&
4850 Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
4851 return true;
4852 }
4853
4854 match = this->omegas[divno][lambda];
4855 interval = &(this->intervals[divno][match - 1]);
4856 if (Interval_low(interval) == x && Interval_high(interval) == y &&
4857 Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
4858 return true;
4859 }
4860 }
4861
4862 return false;
4863 }
4864
4865
4866 #if 0
4867 bool
4868 IIT_exists_with_divno_typed_signed (T this, int divno, Chrpos_T x, Chrpos_T y, int type, int sign) {
4869 int match;
4870 int lambda;
4871 int min1, max1 = 0, min2, max2 = 0;
4872 Interval_T interval;
4873
4874 if (divno < 0) {
4875 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4876 return false;
4877 }
4878 min1 = min2 = this->nintervals[divno] + 1;
4879
4880 debug(printf("Entering IIT_get_with_divno with divno %d and query %u %u\n",divno,x,y));
4881 fnode_query_aux(&min1,&max1,this,divno,0,x);
4882 fnode_query_aux(&min2,&max2,this,divno,0,y);
4883 debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
4884
4885 for (lambda = min1; lambda <= max2; lambda++) {
4886 match = this->sigmas[divno][lambda];
4887 interval = &(this->intervals[divno][match - 1]);
4888 if (Interval_overlap_p(x,y,this->intervals[divno],match) == true &&
4889 Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
4890 return true;
4891 }
4892 match = this->omegas[divno][lambda];
4893 interval = &(this->intervals[divno][match - 1]);
4894 if (Interval_overlap_p(x,y,this->intervals[divno],match) == true &&
4895 Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
4896 return true;
4897 }
4898 }
4899
4900 return false;
4901 }
4902 #endif
4903
4904
4905
4906 int *
IIT_get_with_divno(int * nmatches,T this,int divno,Chrpos_T x,Chrpos_T y,bool sortp)4907 IIT_get_with_divno (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y, bool sortp) {
4908 int *sorted, *matches = NULL, matchstart, *uniq, neval, nuniq, i;
4909 int lambda, prev;
4910 int min1, max1 = 0, min2, max2 = 0;
4911
4912 if (divno < 0) {
4913 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4914 *nmatches = 0;
4915 return (int *) NULL;
4916 }
4917 min1 = min2 = this->nintervals[divno] + 1;
4918
4919 debug(printf("Entering IIT_get_with_divno with divno %d and query %u %u\n",divno,x,y));
4920 fnode_query_aux(&min1,&max1,this,divno,0,x);
4921 fnode_query_aux(&min2,&max2,this,divno,0,y);
4922 debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
4923
4924 *nmatches = 0;
4925 if (max2 >= min1) {
4926 neval = (max2 - min1 + 1) + (max2 - min1 + 1);
4927 matches = (int *) CALLOC(neval,sizeof(int));
4928 uniq = (int *) CALLOC(neval,sizeof(int));
4929
4930 i = 0;
4931 for (lambda = min1; lambda <= max2; lambda++) {
4932 matches[i++] = this->sigmas[divno][lambda];
4933 matches[i++] = this->omegas[divno][lambda];
4934 }
4935
4936 /* Eliminate duplicates */
4937 qsort(matches,neval,sizeof(int),int_compare);
4938 nuniq = 0;
4939 prev = 0;
4940 debug(printf("unique segments in lambda %d to %d:",min1,max2));
4941 for (i = 0; i < neval; i++) {
4942 if (matches[i] != prev) {
4943 debug(printf(" %d",matches[i]));
4944 uniq[nuniq++] = matches[i];
4945 prev = matches[i];
4946 }
4947 }
4948 debug(printf("\n"));
4949
4950 for (i = 0; i < nuniq; i++) {
4951 if (Interval_overlap_p(x,y,this->intervals[divno],uniq[i]) == true) {
4952 matches[(*nmatches)++] = uniq[i];
4953 debug(printf("Pushing overlapping segment %d (%u..%u)\n",uniq[i],
4954 Interval_low(&(this->intervals[divno][uniq[i]-1])),
4955 Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4956 } else {
4957 debug(printf("Not pushing non-overlapping segment %d (%u..%u)\n",uniq[i],
4958 Interval_low(&(this->intervals[divno][uniq[i]-1])),
4959 Interval_high(&(this->intervals[divno][uniq[i]-1]))));
4960 }
4961 }
4962
4963 FREE(uniq);
4964 }
4965
4966 /* Convert to universal indices */
4967 matchstart = this->cum_nintervals[divno];
4968 for (i = 0; i < *nmatches; i++) {
4969 matches[i] += matchstart;
4970 }
4971
4972 if (sortp == false) {
4973 return matches;
4974 #if 0
4975 } else if (this->version <= 2) {
4976 sorted = sort_matches_by_type(this,matches,*nmatches,/*alphabetizep*/true);
4977 FREE(matches);
4978 return sorted;
4979 #endif
4980 } else {
4981 sorted = sort_matches_by_position(this,matches,*nmatches);
4982 FREE(matches);
4983 return sorted;
4984 }
4985 }
4986
4987
4988
4989 int *
IIT_get_signed_with_divno(int * nmatches,T this,int divno,Chrpos_T x,Chrpos_T y,bool sortp,int sign)4990 IIT_get_signed_with_divno (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y, bool sortp,
4991 int sign) {
4992 int *sorted, *matches = NULL, matchstart, *uniq, neval, nuniq, i;
4993 int lambda, prev;
4994 int min1, max1 = 0, min2, max2 = 0;
4995 int index;
4996
4997 if (divno < 0) {
4998 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
4999 *nmatches = 0;
5000 return (int *) NULL;
5001 }
5002 min1 = min2 = this->nintervals[divno] + 1;
5003
5004 debug(printf("Entering IIT_get_with_divno with divno %d and query %u %u\n",divno,x,y));
5005 fnode_query_aux(&min1,&max1,this,divno,0,x);
5006 fnode_query_aux(&min2,&max2,this,divno,0,y);
5007 debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
5008
5009 *nmatches = 0;
5010 if (max2 >= min1) {
5011 neval = (max2 - min1 + 1) + (max2 - min1 + 1);
5012 matches = (int *) CALLOC(neval,sizeof(int));
5013 uniq = (int *) CALLOC(neval,sizeof(int));
5014
5015 i = 0;
5016 for (lambda = min1; lambda <= max2; lambda++) {
5017 index = this->sigmas[divno][lambda];
5018 if (sign == 0 || Interval_sign(&(this->intervals[divno][index-1])) == sign) {
5019 matches[i++] = index;
5020 }
5021 index = this->omegas[divno][lambda];
5022 if (sign == 0 || Interval_sign(&(this->intervals[divno][index-1])) == sign) {
5023 matches[i++] = index;
5024 }
5025 }
5026
5027 /* Eliminate duplicates */
5028 qsort(matches,neval,sizeof(int),int_compare);
5029 nuniq = 0;
5030 prev = 0;
5031 debug(printf("unique segments in lambda %d to %d:",min1,max2));
5032 for (i = 0; i < neval; i++) {
5033 if (matches[i] != prev) {
5034 debug(printf(" %d",matches[i]));
5035 uniq[nuniq++] = matches[i];
5036 prev = matches[i];
5037 }
5038 }
5039 debug(printf("\n"));
5040
5041 for (i = 0; i < nuniq; i++) {
5042 if (Interval_overlap_p(x,y,this->intervals[divno],uniq[i]) == true) {
5043 matches[(*nmatches)++] = uniq[i];
5044 debug(printf("Pushing overlapping segment %d (%u..%u)\n",uniq[i],
5045 Interval_low(&(this->intervals[divno][uniq[i]-1])),
5046 Interval_high(&(this->intervals[divno][uniq[i]-1]))));
5047 } else {
5048 debug(printf("Not pushing non-overlapping segment %d (%u..%u)\n",uniq[i],
5049 Interval_low(&(this->intervals[divno][uniq[i]-1])),
5050 Interval_high(&(this->intervals[divno][uniq[i]-1]))));
5051 }
5052 }
5053
5054 FREE(uniq);
5055 }
5056
5057 /* Convert to universal indices */
5058 matchstart = this->cum_nintervals[divno];
5059 for (i = 0; i < *nmatches; i++) {
5060 matches[i] += matchstart;
5061 }
5062
5063 if (sortp == false) {
5064 return matches;
5065 #if 0
5066 } else if (this->version <= 2) {
5067 sorted = sort_matches_by_type(this,matches,*nmatches,/*alphabetizep*/true);
5068 FREE(matches);
5069 return sorted;
5070 #endif
5071 } else {
5072 sorted = sort_matches_by_position(this,matches,*nmatches);
5073 FREE(matches);
5074 return sorted;
5075 }
5076 }
5077
5078
5079 static int
coord_search_low(T this,int divno,Chrpos_T x)5080 coord_search_low (T this, int divno, Chrpos_T x) {
5081 int low, middle, high;
5082 bool foundp = false;
5083 Chrpos_T middlevalue;
5084 int index;
5085
5086 low = 1; /* not 0, because alphas[divno][0] not used */
5087 high = this->nintervals[divno];
5088
5089 debug3(printf("low = %d, high = %d\n",low,high));
5090 while (!foundp && low < high) {
5091 middle = (low+high)/2;
5092 index = this->alphas[divno][middle];
5093 middlevalue = Interval_low(&(this->intervals[divno][index-1]));
5094
5095 debug3(printf(" compare x %u with middlevalue %u (for interval %d)\n",x,middlevalue,this->alphas[divno][middle]-1));
5096 if (x < middlevalue) {
5097 high = middle;
5098 } else if (x > middlevalue) {
5099 low = middle + 1;
5100 } else {
5101 foundp = true;
5102 }
5103 debug3(printf("low = %d, high = %d, middle = %d\n",low,high,middle));
5104 }
5105
5106 if (foundp == true) {
5107 debug3(printf("found\n"));
5108 return middle;
5109 } else {
5110 debug3(printf("not found\n"));
5111 return low;
5112 }
5113 }
5114
5115 static int
coord_search_high(T this,int divno,Chrpos_T x)5116 coord_search_high (T this, int divno, Chrpos_T x) {
5117 int low, middle, high;
5118 bool foundp = false;
5119 Chrpos_T middlevalue;
5120 int index;
5121
5122 low = 1; /* not 0, because betas[divno][0] not used */
5123 high = this->nintervals[divno];
5124
5125 while (!foundp && low < high) {
5126 middle = (low+high)/2;
5127 index = this->betas[divno][middle];
5128 middlevalue = Interval_high(&(this->intervals[divno][index-1]));
5129
5130 if (x < middlevalue) {
5131 high = middle;
5132 } else if (x > middlevalue) {
5133 low = middle + 1;
5134 } else {
5135 foundp = true;
5136 }
5137 }
5138
5139 if (foundp == true) {
5140 return middle;
5141 } else {
5142 return high;
5143 }
5144 }
5145
5146
5147 /* Specialized version of IIT_get_flanking, for 1 right flank */
5148 /* Returns a relative index, requiring use of IIT_interval_for_divno */
5149 int
IIT_get_next(T this,int divno,Chrpos_T y)5150 IIT_get_next (T this, int divno, Chrpos_T y) {
5151 int lambda;
5152 Interval_T interval;
5153
5154 #if 0
5155 for (lambda = 1; lambda <= this->nintervals[divno]; lambda++) {
5156 interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5157 printf("lambda %d %d: %u..%u\n",
5158 lambda,this->alphas[divno][lambda],Interval_low(interval),Interval_high(interval));
5159 }
5160 printf("\n");
5161 #endif
5162
5163
5164 /* Look at alphas for right flank */
5165 lambda = coord_search_low(this,divno,y);
5166 debug2(printf("coord_search_low lambda = %d\n",lambda));
5167
5168 while (lambda <= this->nintervals[divno]) {
5169 interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5170 debug2(printf("Looking at %u..%u\n",Interval_low(interval),Interval_high(interval)));
5171 if (Interval_low(interval) <= y) {
5172 debug2(printf("Advancing because interval_low %u <= %u\n",Interval_low(interval),y));
5173 lambda++;
5174 } else {
5175 debug2(printf("Returning %d\n\n",this->alphas[divno][lambda]));
5176 return this->alphas[divno][lambda];
5177 }
5178 }
5179
5180 debug2(printf("Returning -1\n\n"));
5181 return -1;
5182 }
5183
5184
5185 void
IIT_get_flanking(int ** leftflanks,int * nleftflanks,int ** rightflanks,int * nrightflanks,T this,char * divstring,Chrpos_T x,Chrpos_T y,int nflanking,int sign)5186 IIT_get_flanking (int **leftflanks, int *nleftflanks, int **rightflanks, int *nrightflanks,
5187 T this, char *divstring, Chrpos_T x, Chrpos_T y, int nflanking, int sign) {
5188 int lambda, matchstart, i;
5189 Interval_T interval;
5190 bool stopp;
5191 int divno;
5192
5193 divno = IIT_divint(this,divstring);
5194
5195 debug2(printf("Entering IIT_get_flanking with divno %d, query %u %u, nflanking = %d, sign %d\n",divno,x,y,nflanking,sign));
5196
5197 if (this->alphas[divno] == NULL) {
5198 #if 0
5199 compute_flanking(this);
5200 #else
5201 fprintf(stderr,"Flanking hits not supported on version %d of iit files. Please use iit_update to update your file\n",
5202 this->version);
5203 exit(9);
5204 #endif
5205 }
5206
5207 /* Look at alphas for right flank */
5208 lambda = coord_search_low(this,divno,y);
5209 debug2(printf("coord_search_low lambda = %d\n",lambda));
5210
5211 *rightflanks = (int *) CALLOC(nflanking,sizeof(int));
5212 *nrightflanks = 0;
5213 stopp = false;
5214 while (lambda <= this->nintervals[divno] && stopp == false) {
5215 interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5216 if (Interval_low(interval) <= y) {
5217 debug2(printf("Advancing because interval_low %u <= %u\n",Interval_low(interval),y));
5218 lambda++;
5219 } else if (sign != 0 && Interval_sign(interval) != sign) {
5220 debug2(printf("Advancing because sign != 0 && interval_sign %d != %d\n",Interval_sign(interval),sign));
5221 lambda++;
5222 } else {
5223 (*rightflanks)[(*nrightflanks)++] = this->alphas[divno][lambda];
5224 debug2(printf("Storing right flank %d\n",this->alphas[divno][lambda]));
5225 if (*nrightflanks < nflanking) {
5226 debug2(printf("Advancing because need more\n"));
5227 lambda++;
5228 } else {
5229 stopp = true;
5230 }
5231 }
5232 }
5233
5234 /* Look at betas for left flank */
5235 lambda = coord_search_high(this,divno,x);
5236
5237 *leftflanks = (int *) CALLOC(nflanking,sizeof(int));
5238 *nleftflanks = 0;
5239 stopp = false;
5240 while (lambda >= 1 && stopp == false) {
5241 interval = &(this->intervals[divno][this->betas[divno][lambda]-1]);
5242 if (Interval_high(interval) >= x) {
5243 lambda--;
5244 } else if (sign != 0 && Interval_sign(interval) != sign) {
5245 lambda--;
5246 } else {
5247 (*leftflanks)[(*nleftflanks)++] = this->betas[divno][lambda];
5248 if (*nleftflanks < nflanking) {
5249 lambda--;
5250 } else {
5251 stopp = true;
5252 }
5253 }
5254 }
5255
5256 /* Convert to universal indices */
5257 matchstart = this->cum_nintervals[divno];
5258 for (i = 0; i < *nrightflanks; i++) {
5259 (*rightflanks)[i] += matchstart;
5260 }
5261 for (i = 0; i < *nleftflanks; i++) {
5262 (*leftflanks)[i] += matchstart;
5263 }
5264
5265 return;
5266 }
5267
5268 void
IIT_get_flanking_with_divno(int ** leftflanks,int * nleftflanks,int ** rightflanks,int * nrightflanks,T this,int divno,Chrpos_T x,Chrpos_T y,int nflanking,int sign)5269 IIT_get_flanking_with_divno (int **leftflanks, int *nleftflanks, int **rightflanks, int *nrightflanks,
5270 T this, int divno, Chrpos_T x, Chrpos_T y, int nflanking, int sign) {
5271 int lambda, matchstart, i;
5272 Interval_T interval;
5273 bool stopp;
5274
5275 debug2(printf("Entering IIT_get_flanking_with_divno with divno %d, query %u %u, nflanking = %d, sign %d\n",divno,x,y,nflanking,sign));
5276
5277 if (this->alphas[divno] == NULL) {
5278 #if 0
5279 compute_flanking(this);
5280 #else
5281 fprintf(stderr,"Flanking hits not supported on version %d of iit files. Please use iit_update to update your file\n",
5282 this->version);
5283 exit(9);
5284 #endif
5285 }
5286
5287 /* Look at alphas for right flank */
5288 lambda = coord_search_low(this,divno,y);
5289 debug2(printf("coord_search_low lambda = %d\n",lambda));
5290
5291 *rightflanks = (int *) CALLOC(nflanking,sizeof(int));
5292 *nrightflanks = 0;
5293 stopp = false;
5294 while (lambda <= this->nintervals[divno] && stopp == false) {
5295 interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5296 if (Interval_low(interval) <= y) {
5297 debug2(printf("Advancing because interval_low %u <= %u\n",Interval_low(interval),y));
5298 lambda++;
5299 } else if (sign != 0 && Interval_sign(interval) != sign) {
5300 debug2(printf("Advancing because sign != 0 && interval_sign %d != %d\n",Interval_sign(interval),sign));
5301 lambda++;
5302 } else {
5303 (*rightflanks)[(*nrightflanks)++] = this->alphas[divno][lambda];
5304 debug2(printf("Storing right flank %d\n",this->alphas[divno][lambda]));
5305 if (*nrightflanks < nflanking) {
5306 debug2(printf("Advancing because need more\n"));
5307 lambda++;
5308 } else {
5309 stopp = true;
5310 }
5311 }
5312 }
5313
5314 /* Look at betas for left flank */
5315 lambda = coord_search_high(this,divno,x);
5316
5317 *leftflanks = (int *) CALLOC(nflanking,sizeof(int));
5318 *nleftflanks = 0;
5319 stopp = false;
5320 while (lambda >= 1 && stopp == false) {
5321 interval = &(this->intervals[divno][this->betas[divno][lambda]-1]);
5322 if (Interval_high(interval) >= x) {
5323 lambda--;
5324 } else if (sign != 0 && Interval_sign(interval) != sign) {
5325 lambda--;
5326 } else {
5327 (*leftflanks)[(*nleftflanks)++] = this->betas[divno][lambda];
5328 if (*nleftflanks < nflanking) {
5329 lambda--;
5330 } else {
5331 stopp = true;
5332 }
5333 }
5334 }
5335
5336 /* Convert to universal indices */
5337 matchstart = this->cum_nintervals[divno];
5338 for (i = 0; i < *nrightflanks; i++) {
5339 (*rightflanks)[i] += matchstart;
5340 }
5341 for (i = 0; i < *nleftflanks; i++) {
5342 (*leftflanks)[i] += matchstart;
5343 }
5344
5345 return;
5346 }
5347
5348 void
IIT_get_flanking_typed(int ** leftflanks,int * nleftflanks,int ** rightflanks,int * nrightflanks,T this,char * divstring,Chrpos_T x,Chrpos_T y,int nflanking,int type,int sign)5349 IIT_get_flanking_typed (int **leftflanks, int *nleftflanks, int **rightflanks, int *nrightflanks,
5350 T this, char *divstring, Chrpos_T x, Chrpos_T y, int nflanking, int type,
5351 int sign) {
5352 int lambda, matchstart, i;
5353 Interval_T interval;
5354 bool stopp;
5355 int divno;
5356
5357 divno = IIT_divint(this,divstring);
5358
5359 debug2(printf("Entering IIT_get_flanking_typed with query %u %u => divno is %d\n",x,y,divno));
5360
5361 if (this->alphas[divno] == NULL) {
5362 #if 0
5363 IIT_compute_flanking(this);
5364 #else
5365 fprintf(stderr,"Flanking hits not supported on version %d of iit files. Please use iit_update to update your file\n",
5366 this->version);
5367 exit(9);
5368 #endif
5369 }
5370
5371 /* Look at alphas for right flank */
5372 lambda = coord_search_low(this,divno,y);
5373 debug2(printf("coord_search_low yields lambda %d\n",lambda));
5374
5375 *rightflanks = (int *) CALLOC(nflanking,sizeof(int));
5376 *nrightflanks = 0;
5377 stopp = false;
5378 while (lambda <= this->nintervals[divno] && stopp == false) {
5379 interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5380 if (sign != 0 && Interval_sign(interval) != sign) {
5381 debug2(printf("Advancing because sign != 0 && interval_sign %d != %d\n",Interval_sign(interval),sign));
5382 lambda++;
5383 } else if (Interval_low(interval) <= y) {
5384 debug2(printf("Advancing because interval_low %u <= %u\n",Interval_low(interval),y));
5385 lambda++;
5386 } else if (Interval_type(interval) != type) {
5387 debug2(printf("Advancing because interval_type %d != %d\n",Interval_type(interval),type));
5388 lambda++;
5389 } else {
5390 (*rightflanks)[(*nrightflanks)++] = this->alphas[divno][lambda];
5391 debug2(printf("Storing right flank %d\n",this->alphas[divno][lambda]));
5392 if (*nrightflanks < nflanking) {
5393 debug2(printf("Advancing because need more\n"));
5394 lambda++;
5395 } else {
5396 stopp = true;
5397 }
5398 }
5399 }
5400
5401 /* Look at betas for left flank */
5402 lambda = coord_search_high(this,divno,x);
5403
5404 *leftflanks = (int *) CALLOC(nflanking,sizeof(int));
5405 *nleftflanks = 0;
5406 stopp = false;
5407 while (lambda >= 1 && stopp == false) {
5408 interval = &(this->intervals[divno][this->betas[divno][lambda]-1]);
5409 if (sign != 0 && Interval_sign(interval) != sign) {
5410 lambda--;
5411 } else if (Interval_high(interval) >= x) {
5412 lambda--;
5413 } else if (Interval_type(interval) != type) {
5414 lambda--;
5415 } else {
5416 (*leftflanks)[(*nleftflanks)++] = this->betas[divno][lambda];
5417 if (*nleftflanks < nflanking) {
5418 lambda--;
5419 } else {
5420 stopp = true;
5421 }
5422 }
5423 }
5424
5425 /* Convert to universal indices */
5426 matchstart = this->cum_nintervals[divno];
5427 for (i = 0; i < *nrightflanks; i++) {
5428 (*rightflanks)[i] += matchstart;
5429 }
5430 for (i = 0; i < *nleftflanks; i++) {
5431 (*leftflanks)[i] += matchstart;
5432 }
5433
5434 return;
5435 }
5436
5437 void
IIT_get_flanking_multiple_typed(int ** leftflanks,int * nleftflanks,int ** rightflanks,int * nrightflanks,T this,char * divstring,Chrpos_T x,Chrpos_T y,int nflanking,int * types,int ntypes)5438 IIT_get_flanking_multiple_typed (int **leftflanks, int *nleftflanks, int **rightflanks, int *nrightflanks,
5439 T this, char *divstring, Chrpos_T x, Chrpos_T y, int nflanking, int *types, int ntypes) {
5440 int k, i;
5441 int lambda, matchstart;
5442 Interval_T interval;
5443 bool stopp;
5444 int divno;
5445
5446 divno = IIT_divint(this,divstring);
5447
5448 debug(printf("Entering IIT_get_flanking_multiple_typed with query %u %u\n",x,y));
5449
5450 if (this->alphas[divno] == NULL) {
5451 #if 0
5452 IIT_compute_flanking(this);
5453 #else
5454 fprintf(stderr,"Flanking hits not supported on version %d of iit files. Please use iit_update to update your file\n",
5455 this->version);
5456 exit(9);
5457 #endif
5458 }
5459
5460 /* Look at alphas for right flank */
5461 lambda = coord_search_low(this,divno,y);
5462
5463 *rightflanks = (int *) CALLOC(nflanking,sizeof(int));
5464 *nrightflanks = 0;
5465 stopp = false;
5466 while (lambda <= this->nintervals[divno] && stopp == false) {
5467 interval = &(this->intervals[divno][this->alphas[divno][lambda]-1]);
5468 if (Interval_low(interval) <= y) {
5469 lambda++;
5470 } else {
5471 k = 0;
5472 while (k < ntypes && Interval_type(interval) != types[k]) {
5473 k++;
5474 }
5475 if (k >= ntypes) {
5476 lambda++;
5477 } else {
5478 (*rightflanks)[(*nrightflanks)++] = this->alphas[divno][lambda];
5479 if (*nrightflanks < nflanking) {
5480 lambda++;
5481 } else {
5482 stopp = true;
5483 }
5484 }
5485 }
5486 }
5487
5488
5489 /* Look at betas for left flank */
5490 lambda = coord_search_high(this,divno,x);
5491
5492 *leftflanks = (int *) CALLOC(nflanking,sizeof(int));
5493 *nleftflanks = 0;
5494 stopp = false;
5495 while (lambda >= 1 && stopp == false) {
5496 interval = &(this->intervals[divno][this->betas[divno][lambda]-1]);
5497 if (Interval_high(interval) >= x) {
5498 lambda--;
5499 } else {
5500 k = 0;
5501 while (k < ntypes && Interval_type(interval) != types[k]) {
5502 k++;
5503 }
5504 if (k >= ntypes) {
5505 lambda--;
5506 } else {
5507 (*leftflanks)[(*nleftflanks)++] = this->betas[divno][lambda];
5508 if (*nleftflanks < nflanking) {
5509 lambda--;
5510 } else {
5511 stopp = true;
5512 }
5513 }
5514 }
5515 }
5516
5517 /* Convert to universal indices */
5518 matchstart = this->cum_nintervals[divno];
5519 for (i = 0; i < *nrightflanks; i++) {
5520 (*rightflanks)[i] += matchstart;
5521 }
5522 for (i = 0; i < *nleftflanks; i++) {
5523 (*leftflanks)[i] += matchstart;
5524 }
5525
5526 return;
5527 }
5528
5529
5530 static const Except_T iit_error = { "IIT problem" };
5531
5532 int
IIT_get_one(T this,char * divstring,Chrpos_T x,Chrpos_T y)5533 IIT_get_one (T this, char *divstring, Chrpos_T x, Chrpos_T y) {
5534 int lambda;
5535 int min1, max1 = 0, min2, max2 = 0;
5536 int divno;
5537 bool stopp;
5538 Interval_T interval;
5539
5540 divno = IIT_divint(this,divstring);
5541 min1 = min2 = this->nintervals[divno] + 1;
5542
5543 debug(printf("Entering IIT_get_one with query %u %u\n",x,y));
5544 fnode_query_aux(&min1,&max1,this,divno,0,x);
5545 fnode_query_aux(&min2,&max2,this,divno,0,y);
5546 debug(printf("min1=%d max1=%d min2=%d max2=%d\n",min1,max1,min2,max2));
5547
5548 if (max2 >= min1) {
5549 for (lambda = min1; lambda <= max2; lambda++) {
5550 if (Interval_overlap_p(x,y,this->intervals[divno],this->sigmas[divno][lambda]) == true) {
5551 return this->sigmas[divno][lambda];
5552 }
5553 }
5554 for (lambda = min1; lambda <= max2; lambda++) {
5555 if (Interval_overlap_p(x,y,this->intervals[divno],this->omegas[divno][lambda]) == true) {
5556 return this->omegas[divno][lambda];
5557 }
5558 }
5559 }
5560
5561 /* fprintf(stderr,"Expected one match for %u--%u, but got none\n",x,y); */
5562 /* If we miss (e.g., for circular chromosome), then report the chromosome below */
5563 /* Look at betas or omegas for left flank */
5564 lambda = min1 - 1;
5565 stopp = false;
5566 while (lambda >= 1 && stopp == false) {
5567 interval = &(this->intervals[divno][this->omegas[divno][lambda]-1]);
5568 if (Interval_high(interval) >= x) {
5569 lambda--;
5570 } else {
5571 return this->omegas[divno][lambda];
5572 }
5573 }
5574
5575 return this->omegas[divno][/*lambda*/1];
5576 }
5577
5578 /* Generally called where intervals don't overlap, like chromosomes,
5579 and where x == y. */
5580 /*
5581 int
5582 IIT_get_one_safe (T this, Chrpos_T x, Chrpos_T y) {
5583 int index;
5584 int *matches, nmatches;
5585
5586 matches = IIT_get(&nmatches,this,x,y,sortp);
5587 if (nmatches != 1) {
5588 fprintf(stderr,"Expected one match for %u--%u, but got %d\n",
5589 x,y,nmatches);
5590 abort();
5591 }
5592 index = matches[0];
5593 FREE(matches);
5594 return index;
5595 }
5596 */
5597
5598 int *
IIT_get_typed(int * ntypematches,T this,char * divstring,Chrpos_T x,Chrpos_T y,int type,bool sortp)5599 IIT_get_typed (int *ntypematches, T this, char *divstring, Chrpos_T x, Chrpos_T y, int type, bool sortp) {
5600 int *sorted;
5601 int index;
5602 /* int divno; */
5603 int *typematches = NULL, *matches, nmatches, i, j;
5604 Interval_T interval;
5605
5606 *ntypematches = 0;
5607 matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5608 for (i = 0; i < nmatches; i++) {
5609 index = matches[i];
5610 interval = &(this->intervals[0][index-1]);
5611 if (Interval_type(interval) == type) {
5612 (*ntypematches)++;
5613 }
5614 }
5615
5616 if (*ntypematches > 0) {
5617 typematches = (int *) CALLOC(*ntypematches,sizeof(int));
5618 j = 0;
5619 for (i = 0; i < nmatches; i++) {
5620 index = matches[i];
5621 interval = &(this->intervals[0][index-1]);
5622 if (Interval_type(interval) == type) {
5623 typematches[j++] = index;
5624 }
5625 }
5626 }
5627
5628 if (matches != NULL) {
5629 FREE(matches);
5630 }
5631
5632 if (sortp == false) {
5633 return typematches;
5634 #if 0
5635 } else if (this->version <= 2) {
5636 sorted = sort_matches_by_type(this,typematches,*ntypematches,/*alphabetizep*/false);
5637 FREE(typematches);
5638 return sorted;
5639 #endif
5640 } else {
5641 /* divno = IIT_divint(this,divstring); */
5642 sorted = sort_matches_by_position(this,typematches,*ntypematches);
5643 FREE(typematches);
5644 return sorted;
5645 }
5646 }
5647
5648 int *
IIT_get_typed_with_divno(int * ntypematches,T this,int divno,Chrpos_T x,Chrpos_T y,int type,bool sortp)5649 IIT_get_typed_with_divno (int *ntypematches, T this, int divno, Chrpos_T x, Chrpos_T y,
5650 int type, bool sortp) {
5651 int *sorted;
5652 int index;
5653 int *typematches = NULL, *matches, nmatches, i, j;
5654 Interval_T interval;
5655
5656 if (divno < 0) {
5657 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
5658 *ntypematches = 0;
5659 return (int *) NULL;
5660 }
5661
5662 *ntypematches = 0;
5663 matches = IIT_get_with_divno(&nmatches,this,divno,x,y,/*sortp*/false);
5664 for (i = 0; i < nmatches; i++) {
5665 index = matches[i];
5666 interval = &(this->intervals[0][index-1]);
5667 if (Interval_type(interval) == type) {
5668 (*ntypematches)++;
5669 }
5670 }
5671
5672 if (*ntypematches > 0) {
5673 typematches = (int *) CALLOC(*ntypematches,sizeof(int));
5674 j = 0;
5675 for (i = 0; i < nmatches; i++) {
5676 index = matches[i];
5677 interval = &(this->intervals[0][index-1]);
5678 if (Interval_type(interval) == type) {
5679 typematches[j++] = index;
5680 }
5681 }
5682 }
5683
5684 if (matches != NULL) {
5685 FREE(matches);
5686 }
5687
5688 if (sortp == false) {
5689 return typematches;
5690 #if 0
5691 } else if (this->version <= 2) {
5692 sorted = sort_matches_by_type(this,typematches,*ntypematches,/*alphabetizep*/false);
5693 FREE(typematches);
5694 return sorted;
5695 #endif
5696 } else {
5697 sorted = sort_matches_by_position(this,typematches,*ntypematches);
5698 FREE(typematches);
5699 return sorted;
5700 }
5701 }
5702
5703
5704 int *
IIT_get_typed_signed(int * ntypematches,T this,char * divstring,Chrpos_T x,Chrpos_T y,int type,int sign,bool sortp)5705 IIT_get_typed_signed (int *ntypematches, T this, char *divstring, Chrpos_T x, Chrpos_T y,
5706 int type, int sign, bool sortp) {
5707 int *sorted;
5708 int index;
5709 /* int divno; */
5710 int *typematches = NULL, *matches, nmatches, i, j;
5711 Interval_T interval;
5712
5713 *ntypematches = 0;
5714 matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5715 for (i = 0; i < nmatches; i++) {
5716 index = matches[i];
5717 interval = &(this->intervals[0][index-1]);
5718 if (Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
5719 (*ntypematches)++;
5720 }
5721 }
5722
5723 if (*ntypematches > 0) {
5724 typematches = (int *) CALLOC(*ntypematches,sizeof(int));
5725 j = 0;
5726 for (i = 0; i < nmatches; i++) {
5727 index = matches[i];
5728 interval = &(this->intervals[0][index-1]);
5729 if (Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
5730 typematches[j++] = index;
5731 }
5732 }
5733 }
5734
5735 if (matches != NULL) {
5736 FREE(matches);
5737 }
5738
5739 if (sortp == false) {
5740 return typematches;
5741 #if 0
5742 } else if (this->version <= 2) {
5743 sorted = sort_matches_by_type(this,typematches,*ntypematches,/*alphabetizep*/false);
5744 FREE(typematches);
5745 return sorted;
5746 #endif
5747 } else {
5748 /* divno = IIT_divint(this,divstring); */
5749 sorted = sort_matches_by_position(this,typematches,*ntypematches);
5750 FREE(typematches);
5751 return sorted;
5752 }
5753 }
5754
5755
5756 int *
IIT_get_typed_signed_with_divno(int * ntypematches,T this,int divno,Chrpos_T x,Chrpos_T y,int type,int sign,bool sortp)5757 IIT_get_typed_signed_with_divno (int *ntypematches, T this, int divno, Chrpos_T x, Chrpos_T y,
5758 int type, int sign, bool sortp) {
5759 int *sorted;
5760 int index;
5761 int *typematches = NULL, *matches, nmatches, i, j;
5762 Interval_T interval;
5763
5764 if (divno < 0) {
5765 /* fprintf(stderr,"No div %s found in iit file\n",divstring); */
5766 *ntypematches = 0;
5767 return (int *) NULL;
5768 }
5769
5770 *ntypematches = 0;
5771 matches = IIT_get_with_divno(&nmatches,this,divno,x,y,/*sortp*/false);
5772 for (i = 0; i < nmatches; i++) {
5773 index = matches[i];
5774 interval = &(this->intervals[0][index-1]);
5775 if (Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
5776 (*ntypematches)++;
5777 }
5778 }
5779
5780 if (*ntypematches > 0) {
5781 typematches = (int *) CALLOC(*ntypematches,sizeof(int));
5782 j = 0;
5783 for (i = 0; i < nmatches; i++) {
5784 index = matches[i];
5785 interval = &(this->intervals[0][index-1]);
5786 if (Interval_type(interval) == type && (sign == 0 || Interval_sign(interval) == sign)) {
5787 typematches[j++] = index;
5788 }
5789 }
5790 }
5791
5792 if (matches != NULL) {
5793 FREE(matches);
5794 }
5795
5796 if (sortp == false) {
5797 return typematches;
5798 #if 0
5799 } else if (this->version <= 2) {
5800 sorted = sort_matches_by_type(this,typematches,*ntypematches,/*alphabetizep*/false);
5801 FREE(typematches);
5802 return sorted;
5803 #endif
5804 } else {
5805 sorted = sort_matches_by_position(this,typematches,*ntypematches);
5806 FREE(typematches);
5807 return sorted;
5808 }
5809 }
5810
5811
5812 int *
IIT_get_multiple_typed(int * ntypematches,T this,char * divstring,Chrpos_T x,Chrpos_T y,int * types,int ntypes,bool sortp)5813 IIT_get_multiple_typed (int *ntypematches, T this, char *divstring, Chrpos_T x, Chrpos_T y,
5814 int *types, int ntypes, bool sortp) {
5815 int *sorted;
5816 int index;
5817 /* int divno; */
5818 int *typematches = NULL, *matches, nmatches, i, j, k;
5819 Interval_T interval;
5820
5821 *ntypematches = 0;
5822 matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5823 for (i = 0; i < nmatches; i++) {
5824 index = matches[i];
5825 interval = &(this->intervals[0][index-1]);
5826 k = 0;
5827 while (k < ntypes && Interval_type(interval) != types[k]) {
5828 k++;
5829 }
5830 if (k < ntypes) {
5831 (*ntypematches)++;
5832 }
5833 }
5834
5835 if (*ntypematches > 0) {
5836 typematches = (int *) CALLOC(*ntypematches,sizeof(int));
5837 j = 0;
5838 for (i = 0; i < nmatches; i++) {
5839 index = matches[i];
5840 interval = &(this->intervals[0][index-1]);
5841 k = 0;
5842 while (k < ntypes && Interval_type(interval) != types[k]) {
5843 k++;
5844 }
5845 if (k < ntypes) {
5846 typematches[j++] = index;
5847 }
5848 }
5849 }
5850
5851 if (matches != NULL) {
5852 FREE(matches);
5853 }
5854
5855 if (sortp == false || this->version >= 3) {
5856 return typematches;
5857 #if 0
5858 } else if (this->version <= 2) {
5859 sorted = sort_matches_by_type(this,typematches,*ntypematches,/*alphabetizep*/true);
5860 FREE(typematches);
5861 return sorted;
5862 #endif
5863 } else {
5864 /* divno = IIT_divint(this,divstring); */
5865 sorted = sort_matches_by_position(this,typematches,*ntypematches);
5866 FREE(typematches);
5867 return sorted;
5868 }
5869 }
5870
5871 int
IIT_get_exact(T this,char * divstring,Chrpos_T x,Chrpos_T y,int type)5872 IIT_get_exact (T this, char *divstring, Chrpos_T x, Chrpos_T y, int type) {
5873 int index;
5874 int *matches, nmatches, i;
5875 Interval_T interval;
5876
5877 matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5878 for (i = 0; i < nmatches; i++) {
5879 index = matches[i];
5880 interval = &(this->intervals[0][index-1]);
5881 if (Interval_low(interval) == x && Interval_high(interval) == y &&
5882 Interval_type(interval) == type) {
5883 FREE(matches);
5884 return index;
5885 }
5886 }
5887
5888 FREE(matches);
5889 return -1;
5890 }
5891
5892 bool
IIT_exact_p(T this,char * divstring,Chrpos_T x,Chrpos_T y,int type)5893 IIT_exact_p (T this, char *divstring, Chrpos_T x, Chrpos_T y, int type) {
5894 int index;
5895 int *matches, nmatches, i;
5896 Interval_T interval;
5897
5898 if (x == y) {
5899 matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5900 for (i = 0; i < nmatches; i++) {
5901 index = matches[i];
5902 interval = &(this->intervals[0][index-1]);
5903 if (Interval_low(interval) == x && Interval_high(interval) == y &&
5904 Interval_sign(interval) == 0 && Interval_type(interval) == type) {
5905 FREE(matches);
5906 return true;
5907 }
5908 }
5909
5910 } else if (x < y) {
5911 matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5912 for (i = 0; i < nmatches; i++) {
5913 index = matches[i];
5914 interval = &(this->intervals[0][index-1]);
5915 if (Interval_low(interval) == x && Interval_high(interval) == y &&
5916 Interval_sign(interval) > 0 && Interval_type(interval) == type) {
5917 FREE(matches);
5918 return true;
5919 }
5920 }
5921
5922 } else {
5923 matches = IIT_get(&nmatches,this,divstring,y,x,/*sortp*/false);
5924 for (i = 0; i < nmatches; i++) {
5925 index = matches[i];
5926 interval = &(this->intervals[0][index-1]);
5927 if (Interval_low(interval) == x && Interval_high(interval) == y &&
5928 Interval_sign(interval) < 0 && Interval_type(interval) == type) {
5929 FREE(matches);
5930 return true;
5931 }
5932 }
5933 }
5934
5935 FREE(matches);
5936 return false;
5937 }
5938
5939
5940 int *
IIT_get_exact_multiple(int * nexactmatches,T this,char * divstring,Chrpos_T x,Chrpos_T y,int type)5941 IIT_get_exact_multiple (int *nexactmatches, T this, char *divstring, Chrpos_T x, Chrpos_T y, int type) {
5942 int *exactmatches;
5943 int index;
5944 int *matches, nmatches, i, j;
5945 Interval_T interval;
5946
5947 *nexactmatches = 0;
5948 matches = IIT_get(&nmatches,this,divstring,x,y,/*sortp*/false);
5949 for (i = 0; i < nmatches; i++) {
5950 index = matches[i];
5951 interval = &(this->intervals[0][index-1]);
5952 if (Interval_low(interval) == x && Interval_high(interval) == y &&
5953 Interval_type(interval) == type) {
5954 (*nexactmatches)++;
5955 }
5956 }
5957
5958 if (*nexactmatches == 0) {
5959 FREE(matches);
5960 return (int *) NULL;
5961 } else {
5962 exactmatches = (int *) CALLOC(*nexactmatches,sizeof(int));
5963 j = 0;
5964 for (i = 0; i < nmatches; i++) {
5965 index = matches[i];
5966 interval = &(this->intervals[0][index-1]);
5967 if (Interval_low(interval) == x && Interval_high(interval) == y &&
5968 Interval_type(interval) == type) {
5969 exactmatches[j++] = index;
5970 }
5971 }
5972 FREE(matches);
5973 return exactmatches;
5974 }
5975 }
5976
5977
5978 #if 0
5979 /* Previously called by print_splicesite_labels in pair.c */
5980 int *
5981 IIT_get_exact_multiple_with_divno (int *nexactmatches, T this, int divno, Chrpos_T x, Chrpos_T y, int type) {
5982 int *exactmatches;
5983 int index;
5984 int *matches, nmatches, i, j;
5985 Interval_T interval;
5986
5987 *nexactmatches = 0;
5988 matches = IIT_get_with_divno(&nmatches,this,divno,x,y,/*sortp*/false);
5989 for (i = 0; i < nmatches; i++) {
5990 index = matches[i];
5991 interval = &(this->intervals[0][index-1]);
5992 if (Interval_low(interval) == x && Interval_high(interval) == y &&
5993 Interval_type(interval) == type) {
5994 (*nexactmatches)++;
5995 }
5996 }
5997
5998 if (*nexactmatches == 0) {
5999 FREE(matches);
6000 return (int *) NULL;
6001 } else {
6002 exactmatches = (int *) CALLOC(*nexactmatches,sizeof(int));
6003 j = 0;
6004 for (i = 0; i < nmatches; i++) {
6005 index = matches[i];
6006 interval = &(this->intervals[0][index-1]);
6007 if (Interval_low(interval) == x && Interval_high(interval) == y &&
6008 Interval_type(interval) == type) {
6009 exactmatches[j++] = index;
6010 }
6011 }
6012 FREE(matches);
6013 return exactmatches;
6014 }
6015 }
6016 #endif
6017
6018
6019 /************************************************************************/
6020
6021 /* Modified from IIT_find */
6022 int *
IIT_get_values_between(int * nmatches,T this,double lowval,double highval)6023 IIT_get_values_between (int *nmatches, T this, double lowval, double highval) {
6024 int *matches = NULL, j;
6025 double val;
6026 int start, end;
6027 int low, middle, high, recno;
6028 bool foundp;
6029
6030 debug(printf("Entering IIT_get_values_between with %f to %f\n",lowval,highval));
6031
6032 /* Find start */
6033 foundp = false;
6034 low = 0;
6035 high = this->total_nintervals;
6036
6037 #ifdef DEBUG
6038 #ifndef WORDS_BIGENDIAN
6039 for (middle = low; middle < high; middle++) {
6040 printf("%d:%d:%f\n",middle,this->valueorder[middle],
6041 this->values[this->valueorder[middle]]);
6042 }
6043 printf("\n");
6044 #endif
6045 #endif
6046
6047 while (!foundp && low < high) {
6048 middle = (low+high)/2;
6049
6050 #ifdef DEBUG
6051 #ifndef WORDS_BIGENDIAN
6052 printf("low %d middle %d:%d:%f high %d\n",
6053 low,middle,this->valueorder[middle],
6054 this->values[this->valueorder[middle]],high);
6055 #endif
6056 #endif
6057
6058 #ifdef WORDS_BIGENDIAN
6059 val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[middle])]);
6060 #else
6061 val = this->values[this->valueorder[middle]];
6062 #endif
6063
6064 if (val > lowval) {
6065 high = middle;
6066 debug(printf("Decreasing high to %d\n",high));
6067 } else if (val < lowval) {
6068 low = middle + 1;
6069 debug(printf("Increasing low to %d\n",low));
6070 } else {
6071 foundp = true;
6072 }
6073 }
6074
6075 if (foundp == true) {
6076 start = middle;
6077 debug(printf("start is middle = %d\n\n",start));
6078
6079 #ifdef WORDS_BIGENDIAN
6080 while (start-1 >= 0 &&
6081 lowval == Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[start-1])])) {
6082 start--;
6083 }
6084 #else
6085 while (start-1 >= 0 &&
6086 lowval == this->values[this->valueorder[start-1]]) {
6087 start--;
6088 debug(printf("Regressing start to %d\n",start));
6089 }
6090 #endif
6091
6092 } else if ((start = low) >= this->total_nintervals) {
6093 *nmatches = 0;
6094 return (int *) NULL;
6095
6096 } else {
6097 debug(printf("start is low = %d\n\n",start));
6098 #ifdef WORDS_BIGENDIAN
6099 val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[start])]);
6100 #else
6101 val = this->values[this->valueorder[start]];
6102 #endif
6103 debug(printf("Final value for low bound = %f\n",val));
6104 if (val < lowval) {
6105 *nmatches = 0;
6106 return (int *) NULL;
6107 }
6108 }
6109
6110
6111 /* Find end */
6112 foundp = false;
6113 low = 0;
6114 high = this->total_nintervals;
6115 while (!foundp && low < high) {
6116 middle = (low+high)/2;
6117
6118 #ifdef DEBUG
6119 #ifndef WORDS_BIGENDIAN
6120 printf("low %d middle %d:%d:%f high %d\n",
6121 low,middle,this->valueorder[middle],
6122 this->values[this->valueorder[middle]],high);
6123 #endif
6124 #endif
6125
6126 #ifdef WORDS_BIGENDIAN
6127 val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[middle])]);
6128 #else
6129 val = this->values[this->valueorder[middle]];
6130 #endif
6131
6132 if (val > highval) {
6133 high = middle;
6134 debug(printf("Decreasing high to %d\n",high));
6135 } else if (val < highval) {
6136 low = middle + 1;
6137 debug(printf("Increasing low to %d\n",low));
6138 } else {
6139 foundp = true;
6140 }
6141 }
6142
6143 if (foundp == true) {
6144 end = middle;
6145 debug(printf("end is middle = %d\n\n",end));
6146
6147 #ifdef WORDS_BIGENDIAN
6148 while (end+1 < this->total_nintervals &&
6149 highval == Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[end+1])])) {
6150 end++;
6151 }
6152 #else
6153 while (end+1 < this->total_nintervals &&
6154 highval == this->values[this->valueorder[end+1]]) {
6155 end++;
6156 debug(printf("Advancing end to %d\n",end));
6157 }
6158 #endif
6159
6160 } else if ((end = high - 1) < 0) {
6161 *nmatches = 0;
6162 return (int *) NULL;
6163
6164 } else {
6165 debug(printf("end is high - 1 = %d\n\n",end));
6166
6167 #ifdef WORDS_BIGENDIAN
6168 val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[end])]);
6169 #else
6170 val = this->values[this->valueorder[end]];
6171 #endif
6172 debug(printf("Final value for high bound = %f\n",val));
6173
6174 if (val > highval) {
6175 *nmatches = 0;
6176 return (int *) NULL;
6177 }
6178 }
6179
6180 *nmatches = end - start + 1;
6181 if (*nmatches <= 0) {
6182 *nmatches = 0;
6183 return (int *) NULL;
6184 } else {
6185 matches = (int *) CALLOC(*nmatches,sizeof(int));
6186 j = 0;
6187 for (recno = start; recno <= end; recno++) {
6188 #ifdef WORDS_BIGENDIAN
6189 #ifdef DEBUG
6190 printf("Pushing %d:%d\n",recno,Bigendian_convert_int(this->valueorder[recno]));
6191 #endif
6192 matches[j++] = Bigendian_convert_int(this->valueorder[recno])+1;
6193
6194 #else
6195 #ifdef DEBUG
6196 printf("Pushing %d:%d\n",recno,this->valueorder[recno]);
6197 #endif
6198 matches[j++] = this->valueorder[recno]+1;
6199 #endif
6200 }
6201
6202 return matches;
6203 }
6204 }
6205
6206
6207 int *
IIT_get_values_below(int * nmatches,T this,double highval)6208 IIT_get_values_below (int *nmatches, T this, double highval) {
6209 int *matches = NULL, j;
6210 double val;
6211 int start = 0, end;
6212 int low, middle, high, recno;
6213 bool foundp;
6214
6215 debug(printf("Entering IIT_get_values_below with %f\n",highval));
6216
6217 /* Find end */
6218 foundp = false;
6219 low = 0;
6220 high = this->total_nintervals;
6221 while (!foundp && low < high) {
6222 middle = (low+high)/2;
6223
6224 #ifdef DEBUG
6225 #ifndef WORDS_BIGENDIAN
6226 printf("low %d middle %d:%d:%f high %d\n",
6227 low,middle,this->valueorder[middle],
6228 this->values[this->valueorder[middle]],high);
6229 #endif
6230 #endif
6231
6232 #ifdef WORDS_BIGENDIAN
6233 val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[middle])]);
6234 #else
6235 val = this->values[this->valueorder[middle]];
6236 #endif
6237
6238 if (val > highval) {
6239 high = middle;
6240 debug(printf("Decreasing high to %d\n",high));
6241 } else if (val < highval) {
6242 low = middle + 1;
6243 debug(printf("Increasing low to %d\n",low));
6244 } else {
6245 foundp = true;
6246 }
6247 }
6248
6249 if (foundp == true) {
6250 end = middle;
6251 debug(printf("end is middle = %d\n\n",end));
6252
6253 #ifdef WORDS_BIGENDIAN
6254 while (end+1 < this->total_nintervals &&
6255 highval == Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[end+1])])) {
6256 end++;
6257 }
6258 #else
6259 while (end+1 < this->total_nintervals &&
6260 highval == this->values[this->valueorder[end+1]]) {
6261 end++;
6262 debug(printf("Advancing end to %d\n",end));
6263 }
6264 #endif
6265
6266 } else if ((end = high - 1) < 0) {
6267 *nmatches = 0;
6268 return (int *) NULL;
6269
6270 } else {
6271 debug(printf("end is high - 1 = %d\n\n",end));
6272
6273 #ifdef WORDS_BIGENDIAN
6274 val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[end])]);
6275 #else
6276 val = this->values[this->valueorder[end]];
6277 #endif
6278 debug(printf("Final value for high bound = %f\n",val));
6279
6280 if (val > highval) {
6281 *nmatches = 0;
6282 return (int *) NULL;
6283 }
6284 }
6285
6286
6287 *nmatches = end - start + 1;
6288 if (*nmatches <= 0) {
6289 *matches = 0;
6290 return (int *) NULL;
6291 } else {
6292 matches = (int *) CALLOC(*nmatches,sizeof(int));
6293 j = 0;
6294 for (recno = start; recno <= end; recno++) {
6295 #ifdef WORDS_BIGENDIAN
6296 #ifdef DEBUG
6297 printf("Pushing %d:%d\n",recno,Bigendian_convert_int(this->valueorder[recno]));
6298 #endif
6299 matches[j++] = Bigendian_convert_int(this->valueorder[recno])+1;
6300
6301 #else
6302 #ifdef DEBUG
6303 printf("Pushing %d:%d\n",recno,this->valueorder[recno]);
6304 #endif
6305 matches[j++] = this->valueorder[recno]+1;
6306 #endif
6307 }
6308
6309 return matches;
6310 }
6311 }
6312
6313
6314 int *
IIT_get_values_above(int * nmatches,T this,double lowval)6315 IIT_get_values_above (int *nmatches, T this, double lowval) {
6316 int *matches = NULL, j;
6317 double val;
6318 int start, end = this->total_nintervals - 1;
6319 int low, middle, high, recno;
6320 bool foundp;
6321
6322 debug(printf("Entering IIT_get_values_above with %f\n",lowval));
6323
6324 /* Find start */
6325 foundp = false;
6326 low = 0;
6327 high = this->total_nintervals;
6328
6329 while (!foundp && low < high) {
6330 middle = (low+high)/2;
6331
6332 #ifdef DEBUG
6333 #ifndef WORDS_BIGENDIAN
6334 printf("low %d middle %d:%d:%f high %d\n",
6335 low,middle,this->valueorder[middle],
6336 this->values[this->valueorder[middle]],high);
6337 #endif
6338 #endif
6339
6340 #ifdef WORDS_BIGENDIAN
6341 val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[middle])]);
6342 #else
6343 val = this->values[this->valueorder[middle]];
6344 #endif
6345
6346 if (val > lowval) {
6347 high = middle;
6348 debug(printf("Decreasing high to %d\n",high));
6349 } else if (val < lowval) {
6350 low = middle + 1;
6351 debug(printf("Increasing low to %d\n",low));
6352 } else {
6353 foundp = true;
6354 }
6355 }
6356
6357 if (foundp == true) {
6358 start = middle;
6359 debug(printf("start is middle = %d\n\n",start));
6360
6361 #ifdef WORDS_BIGENDIAN
6362 while (start-1 >= 0 &&
6363 lowval == Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[start-1])])) {
6364 start--;
6365 }
6366 #else
6367 while (start-1 >= 0 &&
6368 lowval == this->values[this->valueorder[start-1]]) {
6369 start--;
6370 debug(printf("Regressing start to %d\n",start));
6371 }
6372 #endif
6373
6374 } else if ((start = low) >= this->total_nintervals) {
6375 *nmatches = 0;
6376 return (int *) NULL;
6377
6378 } else {
6379 debug(printf("start is low = %d\n\n",start));
6380 #ifdef WORDS_BIGENDIAN
6381 val = Bigendian_convert_double(this->values[Bigendian_convert_int(this->valueorder[start])]);
6382 #else
6383 val = this->values[this->valueorder[start]];
6384 #endif
6385 debug(printf("Final value for low bound = %f\n",val));
6386 if (val < lowval) {
6387 *nmatches = 0;
6388 return (int *) NULL;
6389 }
6390 }
6391
6392
6393 *nmatches = end - start + 1;
6394 if (*nmatches <= 0) {
6395 *matches = 0;
6396 return (int *) NULL;
6397 } else {
6398 matches = (int *) CALLOC(*nmatches,sizeof(int));
6399 j = 0;
6400 for (recno = start; recno <= end; recno++) {
6401 #ifdef WORDS_BIGENDIAN
6402 #ifdef DEBUG
6403 printf("Pushing %d:%d\n",recno,Bigendian_convert_int(this->valueorder[recno]));
6404 #endif
6405 matches[j++] = Bigendian_convert_int(this->valueorder[recno])+1;
6406
6407 #else
6408 #ifdef DEBUG
6409 printf("Pushing %d:%d\n",recno,this->valueorder[recno]);
6410 #endif
6411 matches[j++] = this->valueorder[recno]+1;
6412 #endif
6413 }
6414
6415 return matches;
6416 }
6417 }
6418
6419
6420
6421 /************************************************************************/
6422
6423 #if 0
6424 /* Need to work on */
6425 /* Retrieves intervals from an IIT where type > 0. Used by gmapindex to
6426 construct altstrain_iit. Here, the iit is a contig_iit. */
6427 List_T
6428 IIT_intervallist_typed (List_T *labellist, Uintlist_T *seglength_list, T this) {
6429 List_T intervallist = NULL;
6430 Interval_T interval;
6431 char *label, *annotation, *restofheader, firstchar;
6432 bool allocp;
6433 int i;
6434 Chrpos_T seglength;
6435
6436 *labellist = NULL;
6437 *seglength_list = NULL;
6438 for (i = 0; i < this->nintervals; i++) {
6439 interval = &(this->intervals[i]);
6440 if (Interval_type(interval) > 0) {
6441 intervallist = List_push(intervallist,Interval_copy(interval));
6442 label = IIT_label(this,i+1,&allocp);
6443 *labellist = List_push(*labellist,label);
6444
6445 if (this->version <= 1) {
6446 /* Annotation may be negative to indicate contig is reverse complement */
6447 annotation = IIT_annotation(&restofheader,this,i+1,&allocp);
6448 firstchar = annotation[0];
6449 if (firstchar == '-') {
6450 seglength = (Chrpos_T) strtoul(&(annotation[1]),NULL,10);
6451 } else {
6452 seglength = (Chrpos_T) strtoul(annotation,NULL,10);
6453 *seglength_list = Uintlist_push(*seglength_list,seglength);
6454 }
6455 if (allocp == true) {
6456 FREE(restofheader);
6457 }
6458 } else {
6459 seglength = (Chrpos_T) strtoul(annotation,NULL,10);
6460 *seglength_list = Uintlist_push(*seglength_list,seglength);
6461 }
6462 }
6463 }
6464 *labellist = List_reverse(*labellist);
6465 *seglength_list = Uintlist_reverse(*seglength_list);
6466 return List_reverse(intervallist);
6467 }
6468 #endif
6469
6470 List_T
IIT_typelist(T this)6471 IIT_typelist (T this) {
6472 List_T typelist = NULL;
6473 int i;
6474 char *typestring, *copy;
6475
6476 for (i = 0; i < this->ntypes; i++) {
6477 typestring = IIT_typestring(this,i);
6478 copy = (char *) CALLOC(strlen(typestring)+1,sizeof(char));
6479 strcpy(copy,typestring);
6480 typelist = List_push(typelist,copy);
6481 }
6482 return List_reverse(typelist);
6483 }
6484
6485
6486 /************************************************************************/
6487
6488 /* Assume 0-based index */
6489 static void
print_header(Filestring_T fp,T this,int recno,char * chr,bool relativep,Chrpos_T left,bool print_comment_p)6490 print_header (Filestring_T fp, T this, int recno, char *chr,
6491 bool relativep, Chrpos_T left, bool print_comment_p) {
6492 char *string, *restofheader, *p;
6493 Interval_T interval;
6494 bool allocp;
6495 #if 0
6496 int typeint;
6497 #endif
6498
6499 string = IIT_label(this,recno+1,&allocp);
6500
6501 FPRINTF(fp,"\t%s",this->name);
6502
6503 interval = &(this->intervals[0][recno]);
6504 if (relativep == true) {
6505 if (Interval_sign(interval) >= 0) {
6506 FPRINTF(fp,"\t%u..%u",Interval_low(interval)-left,Interval_high(interval)-left);
6507 } else {
6508 FPRINTF(fp,"\t%u..%u",Interval_high(interval)-left,Interval_low(interval)-left);
6509 }
6510 } else {
6511 if (Interval_sign(interval) >= 0) {
6512 FPRINTF(fp,"\t%s:%u..%u",chr,Interval_low(interval),Interval_high(interval));
6513 } else {
6514 FPRINTF(fp,"\t%s:%u..%u",chr,Interval_high(interval),Interval_low(interval));
6515 }
6516 }
6517
6518 #if 0
6519 if (map_bothstrands_p == true) {
6520 if ((typeint = Interval_type(interval)) <= 0) {
6521 FPRINTF(fp,"\t\t%s",string);
6522 } else {
6523 FPRINTF(fp,"\t%s\t%s",IIT_typestring(this,typeint),string);
6524 }
6525 } else {
6526 #endif
6527 FPRINTF(fp,"\t");
6528 p = string;
6529 while (*p != '\0' && *p != '\n') {
6530 PUTC(*p,fp);
6531 p++;
6532 }
6533
6534 #if 0
6535 }
6536 #endif
6537
6538 if (allocp == true) {
6539 FREE(string);
6540 }
6541
6542 if (print_comment_p == true) {
6543 p = IIT_annotation(&restofheader,this,recno+1,&allocp);
6544 FPRINTF(fp,"\t");
6545 while (*p != '\0' && *p != '\n') {
6546 PUTC(*p,fp);
6547 p++;
6548 }
6549
6550 if (allocp == true) {
6551 FREE(restofheader);
6552 }
6553 }
6554
6555 FPRINTF(fp,"\n");
6556
6557 return;
6558 }
6559
6560
6561 void
IIT_print_header(Filestring_T fp,T this,int * matches,int nmatches,char * chr,bool reversep,bool relativep,Chrpos_T left,bool print_comment_p)6562 IIT_print_header (Filestring_T fp, T this, int *matches, int nmatches,
6563 char *chr, bool reversep, bool relativep, Chrpos_T left,
6564 bool print_comment_p) {
6565 int recno, i;
6566
6567 if (reversep == true) {
6568 for (i = nmatches-1; i >= 0; i--) {
6569 recno = matches[i] - 1; /* Convert to 0-based */
6570 print_header(fp,this,recno,chr,relativep,left,print_comment_p);
6571 }
6572 } else {
6573 for (i = 0; i < nmatches; i++) {
6574 recno = matches[i] - 1; /* Convert to 0-based */
6575 print_header(fp,this,recno,chr,relativep,left,print_comment_p);
6576 }
6577 }
6578
6579 return;
6580 }
6581
6582
6583 Intlist_T
IIT_gene_exons_plus(int * chrnum,Uintlist_T * exonstarts,T genes_iit,int * genes_chrnum_crosstable,int index)6584 IIT_gene_exons_plus (int *chrnum, Uintlist_T *exonstarts, T genes_iit,
6585 int *genes_chrnum_crosstable, int index) {
6586 Intlist_T exonlengths = (Intlist_T) NULL;
6587 char *restofheader, *p;
6588 Chrpos_T exonstart, exonend;
6589 int divint;
6590 bool allocp;
6591
6592
6593 divint = IIT_divint_from_index(genes_iit,index);
6594 *chrnum = genes_chrnum_crosstable[divint];
6595 /* printf("index %d => divint %d => chrnum %d\n",index,divint,*chrnum); */
6596
6597 *exonstarts = (Uintlist_T) NULL;
6598
6599 p = IIT_annotation(&restofheader,genes_iit,index,&allocp);
6600
6601 /* Skip header */
6602 while (*p != '\0' && *p != '\n') {
6603 p++;
6604 }
6605 if (*p == '\n') p++;
6606
6607 while (*p != '\0') {
6608 if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6609 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6610 abort();
6611 } else {
6612 exonlengths = Intlist_push(exonlengths,exonend - exonstart + 1);
6613 *exonstarts = Uintlist_push(*exonstarts,exonstart);
6614
6615 /* Advance to next exon */
6616 while (*p != '\0' && *p != '\n') p++;
6617 if (*p == '\n') p++;
6618 }
6619 }
6620
6621 if (allocp) {
6622 FREE(restofheader);
6623 }
6624
6625 *exonstarts = Uintlist_reverse(*exonstarts);
6626 return Intlist_reverse(exonlengths);
6627 }
6628
6629
6630 Intlist_T
IIT_gene_exons_minus(int * chrnum,Uintlist_T * exonstarts,T genes_iit,int * genes_chrnum_crosstable,int index)6631 IIT_gene_exons_minus (int *chrnum, Uintlist_T *exonstarts, T genes_iit,
6632 int *genes_chrnum_crosstable, int index) {
6633 Intlist_T exonlengths = (Intlist_T) NULL;
6634 char *restofheader, *p;
6635 Chrpos_T exonstart, exonend;
6636 int divint;
6637 bool allocp;
6638
6639
6640 divint = IIT_divint_from_index(genes_iit,index);
6641 *chrnum = genes_chrnum_crosstable[divint];
6642 /* printf("index %d => divint %d => chrnum %d\n",index,divint,*chrnum); */
6643
6644 *exonstarts = (Uintlist_T) NULL;
6645
6646 p = IIT_annotation(&restofheader,genes_iit,index,&allocp);
6647
6648 /* Skip header */
6649 while (*p != '\0' && *p != '\n') {
6650 p++;
6651 }
6652 if (*p == '\n') p++;
6653
6654 while (*p != '\0') {
6655 if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6656 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6657 abort();
6658 } else {
6659 exonlengths = Intlist_push(exonlengths,exonstart - exonend + 1);
6660 *exonstarts = Uintlist_push(*exonstarts,exonstart);
6661
6662 /* Advance to next exon */
6663 while (*p != '\0' && *p != '\n') p++;
6664 if (*p == '\n') p++;
6665 }
6666 }
6667
6668 if (allocp) {
6669 FREE(restofheader);
6670 }
6671
6672 *exonstarts = Uintlist_reverse(*exonstarts);
6673 return Intlist_reverse(exonlengths);
6674 }
6675
6676
6677 int
IIT_gene_exons_array(int * transcript_genestrand,int ** exonbounds,unsigned int ** exonstarts,T alignment_iit,int alignment_index)6678 IIT_gene_exons_array (int *transcript_genestrand, int **exonbounds, unsigned int **exonstarts,
6679 T alignment_iit, int alignment_index) {
6680 int nexons;
6681 char *restofheader, *p;
6682 int exonbound = 0;
6683 Intlist_T exonbounds_list = NULL;
6684 Uintlist_T exonstarts_list = NULL;
6685 Chrpos_T exonstart, exonend;
6686 bool allocp;
6687
6688 *transcript_genestrand = IIT_interval_sign(alignment_iit,alignment_index);
6689 p = IIT_annotation(&restofheader,alignment_iit,alignment_index,&allocp);
6690
6691 /* Skip header */
6692 while (*p != '\0' && *p != '\n') {
6693 p++;
6694 }
6695 if (*p == '\n') p++;
6696
6697 if (*transcript_genestrand > 0) {
6698 while (*p != '\0') {
6699 if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6700 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6701 abort();
6702 } else {
6703 exonbound += exonend - exonstart + 1;
6704 exonbounds_list = Intlist_push(exonbounds_list,exonbound);
6705 exonstarts_list = Uintlist_push(exonstarts_list,exonstart);
6706
6707 /* Advance to next exon */
6708 while (*p != '\0' && *p != '\n') p++;
6709 if (*p == '\n') p++;
6710 }
6711 }
6712 } else {
6713 while (*p != '\0') {
6714 if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6715 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6716 abort();
6717 } else {
6718 exonbound += exonstart - exonend + 1;
6719 exonbounds_list = Intlist_push(exonbounds_list,exonbound);
6720 exonstarts_list = Uintlist_push(exonstarts_list,exonstart);
6721
6722 /* Advance to next exon */
6723 while (*p != '\0' && *p != '\n') p++;
6724 if (*p == '\n') p++;
6725 }
6726 }
6727 }
6728
6729 if (allocp) {
6730 FREE(restofheader);
6731 }
6732
6733 exonbounds_list = Intlist_reverse(exonbounds_list);
6734 *exonbounds = Intlist_to_array(&nexons,exonbounds_list);
6735 Intlist_free(&exonbounds_list);
6736
6737 exonstarts_list = Uintlist_reverse(exonstarts_list);
6738 *exonstarts = Uintlist_to_array(&nexons,exonstarts_list);
6739 Uintlist_free(&exonstarts_list);
6740
6741 return nexons;
6742 }
6743
6744
6745 Overlap_T
IIT_gene_overlap(T map_iit,int divno,Chrpos_T x,Chrpos_T y,bool favor_multiexon_p)6746 IIT_gene_overlap (T map_iit, int divno, Chrpos_T x, Chrpos_T y, bool favor_multiexon_p) {
6747 int *matches, index;
6748 int nmatches, i;
6749 Chrpos_T exonstart, exonend;
6750 int observed_genestrand;
6751 char *annot, *restofheader, *p;
6752 bool allocp = false;
6753 bool multiexon_p;
6754 bool foundp = false;
6755
6756 matches = IIT_get_with_divno(&nmatches,map_iit,divno,x,y,/*sortp*/false);
6757
6758 for (i = 0; i < nmatches; i++) {
6759 index = matches[i];
6760 observed_genestrand = IIT_interval_sign(map_iit,index);
6761 #if 0
6762 if (observed_genestrand > 0 && desired_genestrand < 0) {
6763 /* Inconsistent */
6764 } else if (observed_genestrand < 0 && desired_genestrand > 0) {
6765 /* Inconsistent */
6766 } else {
6767 #endif
6768 annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
6769
6770 /* Skip header */
6771 p = annot;
6772 while (*p != '\0' && *p != '\n') {
6773 p++;
6774 }
6775 if (*p == '\n') p++;
6776
6777 if (observed_genestrand > 0) {
6778 multiexon_p = false;
6779 while (*p != '\0') {
6780 if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6781 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6782 abort();
6783 } else {
6784 /* Advance to next exon */
6785 while (*p != '\0' && *p != '\n') p++;
6786 if (*p == '\n') p++;
6787 if (*p != '\0') {
6788 multiexon_p = true;
6789 }
6790
6791 if (exonend < x) {
6792 /* No overlap */
6793 } else if (exonstart > y) {
6794 /* No overlap */
6795 } else if (favor_multiexon_p == true) {
6796 if (multiexon_p == true) {
6797 FREE(matches);
6798 if (allocp) FREE(annot);
6799 return KNOWN_GENE_MULTIEXON;
6800 } else {
6801 /* Keep searching for a multi-exon gene */
6802 foundp = true;
6803 }
6804 } else {
6805 FREE(matches);
6806 if (allocp) FREE(annot);
6807 return KNOWN_GENE;
6808 }
6809 }
6810
6811 }
6812
6813 } else {
6814 multiexon_p = false;
6815 while (*p != '\0') {
6816 if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6817 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6818 abort();
6819 } else {
6820 /* Advance to next exon */
6821 while (*p != '\0' && *p != '\n') p++;
6822 if (*p == '\n') p++;
6823 if (*p != '\0') {
6824 multiexon_p = true;
6825 }
6826
6827 if (exonstart < x) {
6828 /* No overlap */
6829 } else if (exonend > y) {
6830 /* No overlap */
6831 } else if (favor_multiexon_p == true) {
6832 if (multiexon_p == true) {
6833 FREE(matches);
6834 if (allocp) FREE(annot);
6835 return KNOWN_GENE_MULTIEXON;
6836 } else {
6837 /* Keep searching for a multi-exon gene */
6838 foundp = true;
6839 }
6840 } else {
6841 FREE(matches);
6842 if (allocp) FREE(annot);
6843 return KNOWN_GENE;
6844 }
6845 }
6846 }
6847 }
6848 #if 0
6849 }
6850 #endif
6851 }
6852
6853 FREE(matches);
6854 if (allocp) FREE(annot);
6855 if (foundp == true) {
6856 return KNOWN_GENE;
6857 } else {
6858 return NO_KNOWN_GENE;
6859 }
6860 }
6861
6862
6863 Chrpos_T
IIT_genestruct_chrpos(char * strand,char ** divstring,char ** gene,T map_iit,char * transcript,int querypos)6864 IIT_genestruct_chrpos (char *strand, char **divstring, char **gene,
6865 T map_iit, char *transcript, int querypos) {
6866 Interval_T interval0;
6867 int index0;
6868 Chrpos_T exonstart0, exonend0, exonlength;
6869 char *annot, *restofheader, *p;
6870 bool allocp = false;
6871
6872
6873 if ((index0 = IIT_find_one(map_iit,transcript)) < 0) {
6874 fprintf(stderr,"Could not find transcript %s in genes map\n",transcript);
6875 return (Chrpos_T) 0;
6876 } else {
6877 *divstring = IIT_divstring_from_index(map_iit,index0);
6878 interval0 = &(map_iit->intervals[0][index0-1]);
6879 annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
6880 }
6881
6882
6883 /* Get gene from header */
6884 p = annot;
6885 while (*p != '\0' && *p != '\n' && *p != ' ') {
6886 p++;
6887 }
6888 *gene = (char *) MALLOC((p - annot + 1)*sizeof(char));
6889 strncpy(*gene,annot,p - annot);
6890 (*gene)[p - annot] = '\0';
6891
6892 while (*p != '\0' && *p != '\n') {
6893 p++;
6894 }
6895 if (*p == '\n') p++;
6896
6897
6898 if (Interval_sign(interval0) > 0) {
6899 *strand = '+';
6900 while (*p != '\0') {
6901 if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
6902 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6903 abort();
6904 } else {
6905 exonlength = exonend0 - exonstart0 + 1;
6906 if (exonlength < (Chrpos_T) querypos) {
6907 querypos -= exonlength;
6908 } else {
6909 if (allocp) {
6910 FREE(restofheader);
6911 }
6912 return exonstart0 + querypos - 1; /* Because both exonstart0 and querypos are 1-based */
6913 }
6914 }
6915
6916 /* Advance to the next exon */
6917 while (*p != '\0' && *p != '\n') p++;
6918 if (*p == '\n') p++;
6919 }
6920
6921 } else {
6922 *strand = '-';
6923 while (*p != '\0') {
6924 if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
6925 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6926 abort();
6927 } else {
6928 exonlength = exonstart0 - exonend0 + 1;
6929 if (exonlength < (Chrpos_T) querypos) {
6930 querypos -= exonlength;
6931 } else {
6932 if (allocp) {
6933 FREE(restofheader);
6934 }
6935 return exonstart0 - querypos + 1; /* Because both exonstart and querypos are 1-based */
6936 }
6937 }
6938
6939 /* Advance to the next exon */
6940 while (*p != '\0' && *p != '\n') p++;
6941 if (*p == '\n') p++;
6942 }
6943 }
6944
6945 if (allocp) {
6946 FREE(restofheader);
6947 }
6948
6949 fprintf(stderr,"querypos is too long\n");
6950 return (Chrpos_T) 0;
6951 }
6952
6953
6954 bool
IIT_gene_overlapp(T map_iit,int index,Chrpos_T x,Chrpos_T y)6955 IIT_gene_overlapp (T map_iit, int index, Chrpos_T x, Chrpos_T y) {
6956 Chrpos_T exonstart, exonend;
6957 int observed_genestrand;
6958 char *annot, *restofheader, *p;
6959 bool allocp = false;
6960
6961 observed_genestrand = IIT_interval_sign(map_iit,index);
6962 annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
6963
6964 /* Skip header */
6965 p = annot;
6966 while (*p != '\0' && *p != '\n') {
6967 p++;
6968 }
6969 if (*p == '\n') p++;
6970
6971 if (observed_genestrand > 0) {
6972 while (*p != '\0') {
6973 if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6974 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6975 abort();
6976 } else {
6977 /* Advance to next exon */
6978 while (*p != '\0' && *p != '\n') p++;
6979 if (*p == '\n') p++;
6980
6981 if (exonend < x) {
6982 /* No overlap */
6983 } else if (exonstart > y) {
6984 /* No overlap */
6985 } else {
6986 if (allocp) FREE(annot);
6987 return true;
6988 }
6989 }
6990
6991 }
6992
6993 } else {
6994 while (*p != '\0') {
6995 if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
6996 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
6997 abort();
6998 } else {
6999 /* Advance to next exon */
7000 while (*p != '\0' && *p != '\n') p++;
7001 if (*p == '\n') p++;
7002
7003 if (exonstart < x) {
7004 /* No overlap */
7005 } else if (exonend > y) {
7006 /* No overlap */
7007 } else {
7008 if (allocp) FREE(annot);
7009 return true;
7010 }
7011 }
7012 }
7013 }
7014
7015 if (allocp) FREE(annot);
7016 return false;
7017 }
7018
7019
7020 /* Can handle only genes with the same direction as the given gene */
7021 Intlist_T
IIT_unique_positions(T map_iit,int index0,int divno)7022 IIT_unique_positions (T map_iit, int index0, int divno) {
7023 Intlist_T uniques = (Intlist_T) NULL;
7024 int nunique;
7025 Interval_T interval0;
7026 int *matches, index;
7027 int nmatches, i;
7028 Chrpos_T exonstart0, exonend0, exonstart, exonend, pos;
7029 char *annot, *restofheader, *p, *q;
7030 char **pointers;
7031 int npointers, ptri;
7032 bool allocp = false;
7033 bool uniquep;
7034
7035
7036 interval0 = &(map_iit->intervals[0][index0-1]);
7037 matches = IIT_get_signed_with_divno(&nmatches,map_iit,divno,Interval_low(interval0),Interval_high(interval0),
7038 /*sortp*/false,Interval_sign(interval0));
7039 if (nmatches == 0) {
7040 /* No overlapping genes found */
7041 pointers = (char **) NULL;
7042 npointers = 0;
7043 } else {
7044 pointers = (char **) MALLOC(nmatches * sizeof(char *));
7045 npointers = 0;
7046 for (i = 0; i < nmatches; i++) {
7047 index = matches[i];
7048 if (index != index0) {
7049 annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
7050
7051 /* Skip header */
7052 p = annot;
7053 while (*p != '\0' && *p != '\n') {
7054 p++;
7055 }
7056 if (*p == '\n') p++;
7057
7058 pointers[npointers++] = p;
7059 }
7060 }
7061 FREE(matches);
7062 }
7063
7064 annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
7065 /* Skip header */
7066 p = annot;
7067 while (*p != '\0' && *p != '\n') {
7068 p++;
7069 }
7070 if (*p == '\n') p++;
7071
7072 nunique = -1;
7073 if (Interval_sign(interval0) > 0) {
7074 while (*p != '\0') {
7075 if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
7076 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
7077 abort();
7078 } else {
7079 if (nunique >= 0) {
7080 uniques = Intlist_push(uniques,nunique);
7081 }
7082 nunique = 0;
7083
7084 for (pos = exonstart0; pos <= exonend0; pos++) {
7085 uniquep = true;
7086 for (ptri = 0; ptri < npointers; ptri++) {
7087 q = pointers[ptri];
7088 if (*q == '\0') {
7089 /* Skip */
7090 exonstart = exonend = -1U;
7091 } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7092 fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7093 abort();
7094 }
7095
7096 /* Advance to appropriate exon if necessary */
7097 while (pos > exonend) {
7098 while (*q != '\0' && *q != '\n') q++;
7099 if (*q == '\n') q++;
7100
7101 if (*q == '\0') {
7102 exonstart = exonend = -1U;
7103 } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7104 fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7105 abort();
7106 }
7107 }
7108
7109 if (pos >= exonstart && pos <= exonend) {
7110 uniquep = false;
7111 }
7112
7113 pointers[ptri] = q;
7114 }
7115 if (uniquep == true) {
7116 nunique += 1;
7117 }
7118 }
7119
7120 /* Advance to the next exon */
7121 while (*p != '\0' && *p != '\n') p++;
7122 if (*p == '\n') p++;
7123 }
7124 }
7125
7126 } else {
7127 while (*p != '\0') {
7128 if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
7129 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
7130 abort();
7131 } else {
7132 if (nunique >= 0) {
7133 uniques = Intlist_push(uniques,nunique);
7134 }
7135 nunique = 0;
7136
7137 for (pos = exonstart0; pos >= exonend0; --pos) {
7138 uniquep = true;
7139 for (ptri = 0; ptri < npointers; ptri++) {
7140 q = pointers[ptri];
7141 if (*q == '\0') {
7142 /* Skip */
7143 exonstart = exonend = 0;
7144 } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7145 fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7146 abort();
7147 }
7148
7149 /* Advance to appropriate exon if necessary */
7150 while (pos < exonend) {
7151 while (*q != '\0' && *q != '\n') q++;
7152 if (*q == '\n') q++;
7153
7154 if (*q == '\0') {
7155 exonstart = exonend = 0;
7156 } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7157 fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7158 abort();
7159 }
7160 }
7161
7162 if (pos <= exonstart && pos >= exonend) {
7163 uniquep = false;
7164 }
7165
7166 pointers[ptri] = q;
7167 }
7168 if (uniquep == true) {
7169 nunique += 1;
7170 }
7171 }
7172
7173 /* Advance to the next exon */
7174 while (*p != '\0' && *p != '\n') p++;
7175 if (*p == '\n') p++;
7176 }
7177 }
7178 }
7179
7180
7181 if (nunique >= 0) {
7182 uniques = Intlist_push(uniques,nunique);
7183 }
7184
7185 FREE(pointers);
7186 return Intlist_reverse(uniques);
7187 }
7188
7189
7190 /* Needed for a second round of gene expression assignment */
7191 Intlist_T
IIT_unique_positions_given_others(T map_iit,int index0,int * matches,int nmatches)7192 IIT_unique_positions_given_others (T map_iit, int index0, int *matches, int nmatches) {
7193 Intlist_T uniques = (Intlist_T) NULL;
7194 int nunique;
7195 Interval_T interval0;
7196 int index;
7197 int i;
7198 Chrpos_T exonstart0, exonend0, exonstart, exonend, pos;
7199 char *annot, *restofheader, *p, *q;
7200 char **pointers;
7201 int npointers, ptri;
7202 bool allocp = false;
7203 bool uniquep;
7204
7205
7206 interval0 = &(map_iit->intervals[0][index0-1]);
7207
7208 pointers = MALLOC(nmatches * sizeof(char *));
7209 npointers = 0;
7210 for (i = 0; i < nmatches; i++) {
7211 index = matches[i];
7212 if (index != index0) {
7213 annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
7214
7215 /* Skip header */
7216 p = annot;
7217 while (*p != '\0' && *p != '\n') {
7218 p++;
7219 }
7220 if (*p == '\n') p++;
7221
7222 pointers[npointers++] = p;
7223 }
7224 }
7225 /* FREE(matches); */
7226
7227 annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
7228 /* Skip header */
7229 p = annot;
7230 while (*p != '\0' && *p != '\n') {
7231 p++;
7232 }
7233 if (*p == '\n') p++;
7234
7235 nunique = -1;
7236 if (Interval_sign(interval0) > 0) {
7237 while (*p != '\0') {
7238 if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
7239 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
7240 abort();
7241 } else {
7242 if (nunique >= 0) {
7243 uniques = Intlist_push(uniques,nunique);
7244 }
7245 nunique = 0;
7246
7247 for (pos = exonstart0; pos <= exonend0; pos++) {
7248 uniquep = true;
7249 for (ptri = 0; ptri < npointers; ptri++) {
7250 q = pointers[ptri];
7251 if (*q == '\0') {
7252 /* Skip */
7253 exonstart = exonend = -1U;
7254 } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7255 fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7256 abort();
7257 }
7258
7259 /* Advance to appropriate exon if necessary */
7260 while (pos > exonend) {
7261 while (*q != '\0' && *q != '\n') q++;
7262 if (*q == '\n') q++;
7263
7264 if (*q == '\0') {
7265 exonstart = exonend = -1U;
7266 } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7267 fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7268 abort();
7269 }
7270 }
7271
7272 if (pos >= exonstart && pos <= exonend) {
7273 uniquep = false;
7274 }
7275
7276 pointers[ptri] = q;
7277 }
7278 if (uniquep == true) {
7279 nunique += 1;
7280 }
7281 }
7282
7283 /* Advance to the next exon */
7284 while (*p != '\0' && *p != '\n') p++;
7285 if (*p == '\n') p++;
7286 }
7287 }
7288
7289 } else {
7290 while (*p != '\0') {
7291 if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
7292 fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
7293 abort();
7294 } else {
7295 if (nunique >= 0) {
7296 uniques = Intlist_push(uniques,nunique);
7297 }
7298 nunique = 0;
7299
7300 for (pos = exonstart0; pos >= exonend0; --pos) {
7301 uniquep = true;
7302 for (ptri = 0; ptri < npointers; ptri++) {
7303 q = pointers[ptri];
7304 if (*q == '\0') {
7305 /* Skip */
7306 exonstart = exonend = 0;
7307 } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7308 fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7309 abort();
7310 }
7311
7312 /* Advance to appropriate exon if necessary */
7313 while (pos < exonend) {
7314 while (*q != '\0' && *q != '\n') q++;
7315 if (*q == '\n') q++;
7316
7317 if (*q == '\0') {
7318 exonstart = exonend = 0;
7319 } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
7320 fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
7321 abort();
7322 }
7323 }
7324
7325 if (pos <= exonstart && pos >= exonend) {
7326 uniquep = false;
7327 }
7328
7329 pointers[ptri] = q;
7330 }
7331 if (uniquep == true) {
7332 nunique += 1;
7333 }
7334 }
7335
7336 /* Advance to the next exon */
7337 while (*p != '\0' && *p != '\n') p++;
7338 if (*p == '\n') p++;
7339 }
7340 }
7341 }
7342
7343
7344 if (nunique >= 0) {
7345 uniques = Intlist_push(uniques,nunique);
7346 }
7347
7348 FREE(pointers);
7349 return Intlist_reverse(uniques);
7350 }
7351
7352
7353 /* Can handle only genes with the same direction as the given gene */
7354 /* Values or either 1 (unique) or 0 (not unique) */
7355 Intlist_T
IIT_unique_splicep(T map_iit,int index0,int divno)7356 IIT_unique_splicep (T map_iit, int index0, int divno) {
7357 Intlist_T uniques = (Intlist_T) NULL;
7358 Interval_T interval0;
7359 int *matches, index;
7360 int nmatches, i;
7361 Chrpos_T exonstart0, intronstart0, intronend0, exonend0,
7362 exonstart, intronstart, intronend, exonend;
7363 char *annot, *restofheader, *p, *q;
7364 char **pointers;
7365 int npointers, ptri;
7366 bool allocp = false;
7367 bool uniquep, firstp;
7368
7369
7370 interval0 = &(map_iit->intervals[0][index0-1]);
7371 matches = IIT_get_signed_with_divno(&nmatches,map_iit,divno,Interval_low(interval0),Interval_high(interval0),
7372 /*sortp*/false,Interval_sign(interval0));
7373 if (nmatches == 0) {
7374 /* No overlapping genes found */
7375 pointers = (char **) NULL;
7376 npointers = 0;
7377 } else {
7378 pointers = (char **) MALLOC(nmatches * sizeof(char *));
7379 npointers = 0;
7380 for (i = 0; i < nmatches; i++) {
7381 index = matches[i];
7382 if (index != index0) {
7383 annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
7384
7385 /* Skip header */
7386 p = annot;
7387 while (*p != '\0' && *p != '\n') {
7388 p++;
7389 }
7390 if (*p == '\n') p++;
7391
7392 pointers[npointers++] = p;
7393 }
7394 }
7395 FREE(matches);
7396 }
7397
7398 annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
7399 /* Skip header */
7400 p = annot;
7401 while (*p != '\0' && *p != '\n') {
7402 p++;
7403 }
7404 if (*p == '\n') p++;
7405
7406 firstp = true;
7407 if (Interval_sign(interval0) > 0) {
7408 while (*p != '\0') {
7409 if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
7410 /* Passed last intron */
7411 while (*p != '\0') p++;
7412 } else {
7413 if (firstp == false) {
7414 uniques = Intlist_push(uniques,(int) uniquep);
7415 }
7416 firstp = false;
7417
7418 uniquep = true;
7419 for (ptri = 0; ptri < npointers; ptri++) {
7420 q = pointers[ptri];
7421 if (*q == '\0') {
7422 /* Skip */
7423 intronstart = intronend = -1U;
7424 } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7425 /* Passed last intron */
7426 intronstart = intronend = 0;
7427 while (*q != '\0') q++;
7428 }
7429
7430 /* Advance to appropriate exon if necessary */
7431 while (intronstart0 > intronstart) {
7432 while (*q != '\0' && *q != '\n') q++;
7433 if (*q == '\n') q++;
7434
7435 if (*q == '\0') {
7436 intronstart = intronend = -1U;
7437 } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7438 intronstart = intronend = 0;
7439 while (*q != '\0') q++;
7440 }
7441 }
7442
7443 if (intronstart == intronstart0 && intronend == intronend0) {
7444 uniquep = false;
7445 }
7446
7447 pointers[ptri] = q;
7448 }
7449 }
7450
7451 /* Advance to the next exon */
7452 while (*p != '\0' && *p != '\n') p++;
7453 if (*p == '\n') p++;
7454 }
7455
7456 } else {
7457 while (*p != '\0') {
7458 if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
7459 /* Passed last intron */
7460 while (*p != '\0') p++;
7461 } else {
7462 if (firstp == false) {
7463 uniques = Intlist_push(uniques,(int) uniquep);
7464 }
7465 firstp = false;
7466
7467 uniquep = true;
7468 for (ptri = 0; ptri < npointers; ptri++) {
7469 q = pointers[ptri];
7470 if (*q == '\0') {
7471 /* Skip */
7472 intronstart = intronend = 0;
7473 } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7474 /* Passed last intron */
7475 intronstart = intronend = 0;
7476 while (*q != '\0') q++;
7477 }
7478
7479 /* Advance to appropriate exon if necessary */
7480 while (intronstart0 < intronstart) {
7481 while (*q != '\0' && *q != '\n') q++;
7482 if (*q == '\n') q++;
7483
7484 if (*q == '\0') {
7485 intronstart = intronend = 0;
7486 } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7487 intronstart = intronend = 0;
7488 while (*q != '\0') q++;
7489 }
7490 }
7491
7492 if (intronstart == intronstart0 && intronend == intronend0) {
7493 uniquep = false;
7494 }
7495
7496 pointers[ptri] = q;
7497 }
7498 }
7499
7500 /* Advance to the next exon */
7501 while (*p != '\0' && *p != '\n') p++;
7502 if (*p == '\n') p++;
7503 }
7504 }
7505
7506 if (firstp == false) {
7507 uniques = Intlist_push(uniques,(int) uniquep);
7508 }
7509
7510 FREE(pointers);
7511 return Intlist_reverse(uniques);
7512 }
7513
7514
7515 /* Can handle only genes with the same direction as the given gene */
7516 /* Values or either 1 (unique) or 0 (not unique) */
7517 Intlist_T
IIT_unique_splicep_given_others(T map_iit,int index0,int * matches,int nmatches)7518 IIT_unique_splicep_given_others (T map_iit, int index0, int *matches, int nmatches) {
7519 Intlist_T uniques = (Intlist_T) NULL;
7520 Interval_T interval0;
7521 int index;
7522 int i;
7523 Chrpos_T exonstart0, intronstart0, intronend0, exonend0,
7524 exonstart, intronstart, intronend, exonend;
7525 char *annot, *restofheader, *p, *q;
7526 char **pointers;
7527 int npointers, ptri;
7528 bool allocp = false;
7529 bool uniquep, firstp;
7530
7531
7532 interval0 = &(map_iit->intervals[0][index0-1]);
7533
7534 pointers = MALLOC(nmatches * sizeof(char *));
7535 npointers = 0;
7536 for (i = 0; i < nmatches; i++) {
7537 index = matches[i];
7538 if (index != index0) {
7539 annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
7540
7541 /* Skip header */
7542 p = annot;
7543 while (*p != '\0' && *p != '\n') {
7544 p++;
7545 }
7546 if (*p == '\n') p++;
7547
7548 pointers[npointers++] = p;
7549 }
7550 }
7551 /* FREE(matches); */
7552
7553 annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
7554 /* Skip header */
7555 p = annot;
7556 while (*p != '\0' && *p != '\n') {
7557 p++;
7558 }
7559 if (*p == '\n') p++;
7560
7561 firstp = true;
7562 if (Interval_sign(interval0) > 0) {
7563 while (*p != '\0') {
7564 if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
7565 /* Passed last intron */
7566 while (*p != '\0') p++;
7567 } else {
7568 if (firstp == false) {
7569 uniques = Intlist_push(uniques,(int) uniquep);
7570 }
7571 firstp = false;
7572
7573 uniquep = true;
7574 for (ptri = 0; ptri < npointers; ptri++) {
7575 q = pointers[ptri];
7576 if (*q == '\0') {
7577 /* Skip */
7578 intronstart = intronend = -1U;
7579 } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7580 /* Passed last intron */
7581 intronstart = intronend = 0;
7582 while (*q != '\0') q++;
7583 }
7584
7585 /* Advance to appropriate exon if necessary */
7586 while (intronstart0 > intronstart) {
7587 while (*q != '\0' && *q != '\n') q++;
7588 if (*q == '\n') q++;
7589
7590 if (*q == '\0') {
7591 intronstart = intronend = -1U;
7592 } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7593 intronstart = intronend = 0;
7594 while (*q != '\0') q++;
7595 }
7596 }
7597
7598 if (intronstart == intronstart0 && intronend == intronend0) {
7599 uniquep = false;
7600 }
7601
7602 pointers[ptri] = q;
7603 }
7604 }
7605
7606 /* Advance to the next exon */
7607 while (*p != '\0' && *p != '\n') p++;
7608 if (*p == '\n') p++;
7609 }
7610
7611 } else {
7612 while (*p != '\0') {
7613 if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
7614 /* Passed last intron */
7615 while (*p != '\0') p++;
7616 } else {
7617 if (firstp == false) {
7618 uniques = Intlist_push(uniques,(int) uniquep);
7619 }
7620 firstp = false;
7621
7622 uniquep = true;
7623 for (ptri = 0; ptri < npointers; ptri++) {
7624 q = pointers[ptri];
7625 if (*q == '\0') {
7626 /* Skip */
7627 intronstart = intronend = 0;
7628 } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7629 /* Passed last intron */
7630 intronstart = intronend = 0;
7631 while (*q != '\0') q++;
7632 }
7633
7634 /* Advance to appropriate exon if necessary */
7635 while (intronstart0 < intronstart) {
7636 while (*q != '\0' && *q != '\n') q++;
7637 if (*q == '\n') q++;
7638
7639 if (*q == '\0') {
7640 intronstart = intronend = 0;
7641 } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
7642 intronstart = intronend = 0;
7643 while (*q != '\0') q++;
7644 }
7645 }
7646
7647 if (intronstart == intronstart0 && intronend == intronend0) {
7648 uniquep = false;
7649 }
7650
7651 pointers[ptri] = q;
7652 }
7653 }
7654
7655 /* Advance to the next exon */
7656 while (*p != '\0' && *p != '\n') p++;
7657 if (*p == '\n') p++;
7658 }
7659 }
7660
7661 if (firstp == false) {
7662 uniques = Intlist_push(uniques,(int) uniquep);
7663 }
7664
7665 FREE(pointers);
7666 return Intlist_reverse(uniques);
7667 }
7668