1 static char rcsid[] = "$Id: iit-write.c 213998 2018-03-03 04:59:34Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #ifndef HAVE_MEMCPY
7 # define memcpy(d,s,n) bcopy((s),(d),(n))
8 #endif
9 #ifndef HAVE_MEMMOVE
10 # define memmove(d,s,n) bcopy((s),(d),(n))
11 #endif
12 
13 #include "iit-write.h"
14 
15 #ifdef WORDS_BIGENDIAN
16 #include "bigendian.h"
17 #else
18 #include "littleendian.h"
19 #endif
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>		/* For strlen and strcmp */
24 #include "assert.h"
25 #include "mem.h"
26 #include "fopen.h"
27 #include "interval.h"
28 #include "types.h"		/* For HAVE_64_BIT and UINT8 */
29 #include "doublelist.h"
30 
31 
32 /* Makes IIT files more efficient, because the integer arrays will be aligned */
33 #define PAD_STRINGS 1
34 
35 #ifdef DEBUG
36 #define debug(x) x
37 #else
38 #define debug(x)
39 #endif
40 
41 /* Padding */
42 #ifdef DEBUG1
43 #define debug1(x) x
44 #else
45 #define debug1(x)
46 #endif
47 
48 
49 typedef struct Node_T *Node_T;
50 struct Node_T {
51   int index;
52   Chrpos_T value;
53   int a;
54   int b;
55   Node_T left;
56   Node_T right;
57 };
58 
59 static Node_T
Node_new()60 Node_new () {
61   Node_T new = (Node_T) MALLOC(sizeof(*new));
62 
63   new->index = -1;
64   new->left = new->right = NULL;
65   return new;
66 }
67 
68 static void
Node_gc(Node_T * old)69 Node_gc (Node_T *old) {
70   if (*old) {
71     Node_gc(&((*old)->left));
72     Node_gc(&((*old)->right));
73     FREE(*old);
74   }
75   return;
76 }
77 
78 
79 #define T IIT_T
80 
81 
82 /************************************************************************/
83 
84 static int
is_right_split(int i,int j,int r,Chrpos_T value,int * sigmas,struct Interval_T * intervals)85 is_right_split (int i, int j, int r, Chrpos_T value, int *sigmas,
86 		struct Interval_T *intervals) {
87   int iota, lambda;
88 
89   iota = 0;
90   for (lambda = i; lambda <= j; lambda++) {
91     if (Interval_is_contained(value,intervals,sigmas[lambda]) == true) {
92       iota = lambda;
93     }
94   }
95   return (iota == r);
96 }
97 
98 static bool
is_sorted(int array[],int i,int j,Chrpos_T (* endpoint)(),struct Interval_T * intervals)99 is_sorted (int array[], int i, int j, Chrpos_T (*endpoint)(), struct Interval_T *intervals) {
100   int lambda;
101 
102   for (lambda = i; lambda <= j - 1; lambda++) {
103     if (endpoint(intervals,array[lambda]) > endpoint(intervals,array[lambda + 1])) {
104       return false;
105     }
106   }
107   return true;
108 }
109 
110 static bool
is_empty(int array[],int i,int j)111 is_empty (int array[], int i, int j) {
112   int lambda;
113 
114   for (lambda = i; lambda <= j; lambda++) {
115     if (array[lambda] != 0) {
116       return false;
117     }
118   }
119   return true;
120 }
121 
122 static bool
is_valid_input(int i,int j,int * sigmas,int * omegas,struct Interval_T * intervals)123 is_valid_input (int i, int j, int *sigmas, int *omegas, struct Interval_T *intervals) {
124   int lambda, iota;
125 
126   assert(is_sorted (sigmas, i, j, Interval_array_low, intervals));
127   assert(is_empty (omegas, i, j));
128   for (lambda = i; lambda <= j; lambda++) {
129     iota = sigmas[lambda];
130     assert(Interval_is_contained(Interval_array_low(intervals,iota),intervals,iota)
131 	   || Interval_is_contained(Interval_array_high(intervals,iota),intervals,iota));
132 
133   }
134   return true;
135 }
136 
137 /************************************************************************/
138 
139 static bool
Node_is_valid_output(Node_T node,int i,int j,int * sigmas,int * omegas,struct Interval_T * intervals)140 Node_is_valid_output (Node_T node, int i, int j, int *sigmas, int *omegas,
141 		      struct Interval_T *intervals) {
142   int lambda;
143 
144   assert((i <= node->a) && (node->b <= j));
145   assert(is_sorted (sigmas, node->a, node->b, Interval_array_low, intervals));
146   assert(is_sorted (omegas, node->a, node->b, Interval_array_high, intervals));
147   assert(is_right_split (i, j, node->b, node->value, sigmas, intervals));
148 
149   for (lambda = node->a; lambda <= node->b; lambda++) {
150     assert(Interval_is_contained(node->value,intervals,sigmas[lambda])
151 	   && Interval_is_contained(node->value,intervals,omegas[lambda]));
152   }
153 
154   for (lambda = i; lambda <= node->a - 1; lambda++) {
155     assert(Interval_is_contained(node->value,intervals,sigmas[lambda]) == false);
156   }
157   for (lambda = node->b + 1; lambda <= j; lambda++) {
158     assert(Interval_is_contained(node->value,intervals,sigmas[lambda]) == false);
159   }
160 
161   return true;
162 }
163 
164 
165 
166 /* Refinement of node_make().
167    Select proper value and split off "right of" triangles. */
168 static void
node_select(int * index,Chrpos_T * value,int i,int j,int * sigmas,int * omegas,struct Interval_T * intervals)169 node_select (int *index, Chrpos_T *value, int i, int j,
170 	     int *sigmas, int *omegas, struct Interval_T *intervals) {
171   int r = j - (j - i) / 3;
172   Chrpos_T k = Interval_array_low(intervals,sigmas[r]);
173 
174   while ((r < j) && (Interval_array_low(intervals,sigmas[r + 1]) == k)) {
175     r ++;
176   }
177 
178   if (Interval_is_contained(k,intervals,sigmas[r]) == false) {
179     /* adjust r to the left for "open" intervals */
180     while ((r > i) && (Interval_is_contained(k,intervals,sigmas[r-1]) == false)) {
181       r --;
182       printf(" basic_iit: (-)\n");
183     }
184     if (Interval_is_contained(k,intervals,sigmas[r]) == false) {
185       r --;
186       printf(" basic_iit: [-]\n");
187       assert(r == i - 1);
188       printf(" basic_iit WARNING: empty NODE!?!\n");
189     }
190   }
191   assert(is_right_split (i, j, r, k, sigmas, intervals));
192   *index = r;
193   *value = k;
194   return;
195 }
196 
197 
198 /* Makes node out of sigmas[i..j], and recurses. */
199 static Node_T
Node_make(int * nnodes,int i,int j,int * sigmas,int * omegas,struct Interval_T * intervals,bool presortedp)200 Node_make (int *nnodes, int i, int j, int *sigmas, int *omegas, struct Interval_T *intervals,
201 	   bool presortedp) {
202   Node_T node;
203   int lambda, iota;
204   int q, r;
205 
206   assert(is_valid_input (i, j, sigmas, omegas, intervals));
207   if (i > j) {
208     return (Node_T) NULL;
209   } else {
210     node = Node_new();
211     *nnodes += 1;
212 
213     /* select value & get "right of" intervals sigma[r+1..j] */
214     node_select(&r,&node->value,i,j,sigmas,omegas,intervals);
215 
216     /* mark "contains" intervals in sigma[i..r] to omega[i<q+1..r] */
217     q = r;
218     for (lambda = r; lambda >= i; lambda--) {
219       if (Interval_is_contained(node->value,intervals,sigmas[lambda]) == true) {
220 	omegas[q] = sigmas[lambda];
221 	sigmas[lambda] = 0;
222 	q --;
223       }
224     }
225 
226     /* move remaining "left of" intervals from sigma[i..r] to sigma[i..q] */
227     iota = i;
228     for (lambda = i; lambda <= r; lambda++) {
229       if (sigmas[lambda] != 0) {
230 	sigmas[iota] = sigmas[lambda];
231 	iota ++;
232       }
233     }
234     assert(iota == q + 1);
235 
236     /* copy omega[q+1..r] back to sigma[q+1..r] & sort omega[q+1..r] */
237     for (lambda = q+1; lambda <= r; lambda++) {
238       sigmas[lambda] = omegas[lambda];
239     }
240     Interval_qsort_by_omega(omegas,q+1,r,intervals,presortedp);
241     node->a = q + 1;
242     node->b = r;
243 
244 #if 0
245     fprintf(stderr," NODE=%u [%d..%d], left: %d, cont: %d, right: %d\n",
246 	    node->value, i, j, q - i + 1, r - q, j - r);
247 #endif
248 
249     assert(Node_is_valid_output (node, i, j, sigmas, omegas, intervals));
250 
251     /* recurse */
252     node->left  = Node_make(&(*nnodes),i,q,sigmas,omegas,intervals,presortedp);
253     node->right = Node_make(&(*nnodes),r+1,j,sigmas,omegas,intervals,presortedp);
254 
255     return node;
256   }
257 }
258 
259 
260 
261 static void
Node_index(Node_T node,int * index)262 Node_index (Node_T node, int *index) {
263   if (node != NULL) {
264     node->index = (*index)++;
265     Node_index(node->left,&(*index));
266     Node_index(node->right,&(*index));
267   }
268   return;
269 }
270 
271 
272 static int
IIT_count_nnodes(List_T intervallist,bool presortedp)273 IIT_count_nnodes (List_T intervallist, bool presortedp) {
274   int nnodes;
275   Node_T root;
276   int nintervals, i;
277   List_T p;
278   struct Interval_T *intervals;
279   int *sigmas;
280   int *omegas;
281 
282   if ((nintervals = List_length(intervallist)) == 0) {
283     return 0;
284   } else {
285     intervals = (struct Interval_T *) CALLOC(nintervals,sizeof(struct Interval_T));
286     for (p = intervallist, i = 0; i < nintervals; i++, p = List_next(p)) {
287       memcpy(&(intervals[i]),List_head(p),sizeof(struct Interval_T));
288     }
289 
290     /* IIT ordering of intervals */
291     nnodes = 0;
292     sigmas = (int *) CALLOC(nintervals+1,sizeof(int));
293     for (i = 1; i <= nintervals; i++) {
294       sigmas[i] = i;
295     }
296 
297     /* Sort sigmas with respect to Interval_array_low */
298     Interval_qsort_by_sigma(sigmas,1,nintervals,intervals,presortedp);
299 
300     omegas = (int *) CALLOC(nintervals+1,sizeof(int));
301 
302     /* make first node, and recurse... */
303     root = Node_make(&nnodes,1,nintervals,sigmas,omegas,intervals,presortedp);
304 
305     Node_gc(&root);
306     FREE(omegas);
307     FREE(sigmas);
308     FREE(intervals);
309 
310     return nnodes;
311   }
312 }
313 
314 static void
IIT_build_one_div(Node_T * root,struct Interval_T ** intervals,int ** alphas,int ** betas,int ** sigmas,int ** omegas,int * nnodes,List_T intervallist,int nintervals,bool presortedp)315 IIT_build_one_div (Node_T *root, struct Interval_T **intervals, int **alphas, int **betas, int **sigmas, int **omegas,
316 		   int *nnodes, List_T intervallist, int nintervals, bool presortedp) {
317   int index = 0;		/* Must be initialized to 0 */
318   int i;
319   List_T p;
320 
321   if (nintervals == 0) {
322     *intervals = (struct Interval_T *) NULL;
323   } else {
324     *intervals = (struct Interval_T *) CALLOC(nintervals,sizeof(struct Interval_T));
325     for (p = intervallist, i = 0; i < nintervals; i++, p = List_next(p)) {
326       memcpy(&((*intervals)[i]),List_head(p),sizeof(struct Interval_T));
327     }
328   }
329 
330   /* Strict ordering of intervals */
331 
332   *alphas = (int *) CALLOC(nintervals+1,sizeof(int));
333   *betas = (int *) CALLOC(nintervals+1,sizeof(int));
334   for (i = 1; i <= nintervals; i++) {
335     (*alphas)[i] = (*betas)[i] = i;
336   }
337   Interval_qsort_by_sigma(*alphas,1,nintervals,*intervals,presortedp);
338   Interval_qsort_by_omega(*betas,1,nintervals,*intervals,presortedp);
339 
340 
341   /* IIT ordering of intervals */
342   *nnodes = 0;
343   *sigmas = (int *) CALLOC(nintervals+1,sizeof(int));
344   for (i = 1; i <= nintervals; i++) {
345     (*sigmas)[i] = i;
346   }
347 
348   /* Sort sigmas with respect to Interval_array_low */
349   Interval_qsort_by_sigma(*sigmas,1,nintervals,*intervals,presortedp);
350 
351   *omegas = (int *) CALLOC(nintervals+1,sizeof(int));
352 
353   /* make first node, and recurse... */
354   *root = Node_make(&(*nnodes),1,nintervals,*sigmas,*omegas,*intervals,presortedp);
355   Node_index(*root,&index);
356 
357   return;
358 }
359 
360 /************************************************************************
361  *   Output procedures
362  ************************************************************************/
363 
364 /************************************************************************
365  * File format:
366  *   0: sizeof(int)  [in version >= 2]
367  *   version: sizeof(int)  [in version >= 2]
368  *   label_pointer_size: sizeof(int) [in version >= 5]
369  *   annot_pointer_size: sizeof(int) [in version >= 5]
370  *
371  *   total_nintervals: sizeof(int)  [so maximum is 2 billion]
372  *      If >= 0, then using 4-byte quantities (unsigned int) for interval coordinates
373  *      If < 0, then using 8-byte quantities for interval coordinates (generally only used for v1)
374  *   ntypes: sizeof(int)
375  *   nfields: sizeof(int)  [in version >= 2]
376  *
377  *   ndivs: sizeof(int) [in version >= 3.  In version < 3, set ndivs = 1.]
378  *   nintervals: ndivs*sizeof(int) [in version >= 3]
379  *   cum_nintervals: (ndivs+1)*sizeof(int) [in version >= 3]
380  *   nnodes: ndivs*sizeof(int)
381  *   cum_nnodes: (ndivs+1)*sizeof(int) [in version >= 3]
382  *
383  *   divsort: sizeof(int) [in version >= 3: 0 = none, 1 = alpha, 2 = chrom]
384  *   divpointers: (ndivs+1)*sizeof(UINT4)  [in version >= 3]
385  *   divs: ndivs*(variable length strings, including '\0')  [in version >= 3]
386  *
387  *   for each div (recall that in version < 3, ndivs = 1):
388  *     alphas: (nintervals[div]+1)*sizeof(int)  [in version >= 2]
389  *     betas: (nintervals[div]+1)*sizeof(int)  [in version >= 2]
390  *     sigmas: (nintervals[div]+1)*sizeof(int)
391  *     omegas: (nintervals[div]+1)*sizeof(int)
392  *     nodes: nnodes[div]*sizeof(struct FNode_T)
393  *
394  *   intervals: total_nintervals[div]*sizeof(struct Interval_T)
395  *      [3 ints in version 1; 4 ints or longs in version >= 2, depending on whether total_nintervals > 0]
396  *
397  *   typepointers: (ntypes+1)*sizeof(UINT4)
398  *   types: ntypes*(variable length strings, including '\0')
399  *
400  *   fieldpointers: (nfields+1)*sizeof(UINT4)  [in version >= 2]
401  *   fields: nfields*(variable length strings, including '\0')  [in version >= 2]
402  *
403  *   valueorder: total_nintervals*sizeof(int)  [if version == 6]
404  *   values: total_nintervals*sizeof(double)   [if version == 6]
405  *
406  *   labelorder: total_nintervals*sizeof(int);
407  *   labelpointers: (total_nintervals+1)*sizeof(UINT4)  [changes to long unsigned int in v4 or if label_pointer_size = 8 in v5]
408  *   labels: total_nintervals*(variable length strings, including '\0')
409  *
410  *   annotpointers: (total_nintervals+1)*sizeof(UINT4)  [changes to long unsigned int in v4 or if annot_pointer_size = 8 in v5]
411  *   annotations: total_nintervals*(variable length strings, including '\0')
412  *
413  *   Note: labels and annotations are organized by division, so we can
414  *   find the label/annot by looking up index = cum_nintervals[div]+recno.
415  *   This is because alphas/betas/sigmas/omegas run from 1 to nintervals[div]
416  ************************************************************************/
417 
418 /* For making valueorder */
419 struct Valueitem_T {
420   int divno;
421   int recno;
422   double value;
423 };
424 
425 static int
Valueitem_cmp(const void * x,const void * y)426 Valueitem_cmp (const void *x, const void *y) {
427   struct Valueitem_T *a = (struct Valueitem_T *) x;
428   struct Valueitem_T *b = (struct Valueitem_T *) y;
429 
430   if (a->value < b->value) {
431     return -1;
432   } else if (b->value < a->value) {
433     return +1;
434   } else {
435     return 0;
436   }
437 }
438 
439 
440 static int *
get_valueorder(List_T divlist,Table_T valuetable,int * cum_nintervals,int total_nintervals)441 get_valueorder (List_T divlist, Table_T valuetable, int *cum_nintervals, int total_nintervals) {
442   int *valueorder, divno, recno, i, k = 0;
443   struct Valueitem_T *valueitems;
444   char *divstring;
445   List_T d;
446   Doublelist_T valuelist, v;
447 
448   valueorder = (int *) CALLOC(total_nintervals,sizeof(int));
449   valueitems = (struct Valueitem_T *) CALLOC(total_nintervals,sizeof(struct Valueitem_T));
450   divno = 0;
451 
452   for (d = divlist; d != NULL; d = List_next(d)) {
453     divstring = (char *) List_head(d);
454     valuelist = (Doublelist_T) Table_get(valuetable,(void *) divstring);
455     recno = 0;
456     for (v = valuelist; v != NULL; v = Doublelist_next(v)) {
457       valueitems[k].divno = divno;
458       valueitems[k].recno = recno;
459       valueitems[k].value = Doublelist_head(v);
460       k++;
461       recno++;
462     }
463     divno++;
464   }
465 
466   qsort(valueitems,total_nintervals,sizeof(struct Valueitem_T),Valueitem_cmp);
467   for (i = 0; i < total_nintervals; i++) {
468     valueorder[i] = cum_nintervals[valueitems[i].divno] + valueitems[i].recno;
469   }
470   FREE(valueitems);
471   return valueorder;
472 }
473 
474 
475 
476 /* For making labelorder */
477 struct Sortitem_T {
478   int divno;
479   int recno;
480   char *label;
481 };
482 
483 static int
Sortitem_cmp(const void * x,const void * y)484 Sortitem_cmp (const void *x, const void *y) {
485   struct Sortitem_T *a = (struct Sortitem_T *) x;
486   struct Sortitem_T *b = (struct Sortitem_T *) y;
487 
488   return strcmp(a->label,b->label);
489 }
490 
491 static int *
get_labelorder(List_T divlist,Table_T labeltable,int * cum_nintervals,int total_nintervals)492 get_labelorder (List_T divlist, Table_T labeltable, int *cum_nintervals, int total_nintervals) {
493   int *labelorder, divno, recno, i, k = 0;
494   struct Sortitem_T *sortitems;
495   char *divstring;
496   List_T labellist, d, p;
497 
498   labelorder = (int *) CALLOC(total_nintervals,sizeof(int));
499   sortitems = (struct Sortitem_T *) CALLOC(total_nintervals,sizeof(struct Sortitem_T));
500   divno = 0;
501 
502   for (d = divlist; d != NULL; d = List_next(d)) {
503     divstring = (char *) List_head(d);
504     labellist = (List_T) Table_get(labeltable,(void *) divstring);
505     recno = 0;
506     for (p = labellist; p != NULL; p = List_next(p)) {
507       sortitems[k].divno = divno;
508       sortitems[k].recno = recno;
509       sortitems[k].label = (char *) List_head(p);
510       k++;
511       recno++;
512     }
513     divno++;
514   }
515 
516   qsort(sortitems,total_nintervals,sizeof(struct Sortitem_T),Sortitem_cmp);
517   for (i = 0; i < total_nintervals; i++) {
518     labelorder[i] = cum_nintervals[sortitems[i].divno] + sortitems[i].recno;
519   }
520   FREE(sortitems);
521   return labelorder;
522 }
523 
524 
525 /* Prints in DFS order */
526 static void
Node_fwrite(FILE * fp,Node_T node)527 Node_fwrite (FILE *fp, Node_T node) {
528   int leftindex, rightindex;
529 
530   if (node != NULL) {
531     if (node->left == NULL) {
532       leftindex = -1;
533     } else {
534       leftindex = node->left->index;
535     }
536 
537     if (node->right == NULL) {
538       rightindex = -1;
539     } else {
540       rightindex = node->right->index;
541     }
542 
543     debug(printf("Writing node %u %d %d\n",node->value,node->a,node->b));
544     FWRITE_UINT(node->value,fp);
545     FWRITE_INT(node->a,fp);
546     FWRITE_INT(node->b,fp);
547     FWRITE_INT(leftindex,fp);
548     FWRITE_INT(rightindex,fp);
549 
550     Node_fwrite(fp,node->left);
551     Node_fwrite(fp,node->right);
552   }
553   return;
554 }
555 
556 
557 /* Prints in DFS order */
558 static void
Node_create(T new,int divno,int * i,Node_T node)559 Node_create (T new, int divno, int *i, Node_T node) {
560   int leftindex, rightindex;
561 
562   if (node != NULL) {
563     if (node->left == NULL) {
564       leftindex = -1;
565     } else {
566       leftindex = node->left->index;
567     }
568 
569     if (node->right == NULL) {
570       rightindex = -1;
571     } else {
572       rightindex = node->right->index;
573     }
574 
575     debug(printf("Writing node %u %d %d\n",node->value,node->a,node->b));
576     new->nodes[divno][*i].value = node->value;
577     new->nodes[divno][*i].a = node->a;
578     new->nodes[divno][*i].b = node->b;
579     new->nodes[divno][*i].leftindex = leftindex;
580     new->nodes[divno][*i].rightindex = rightindex;
581     *i += 1;
582 
583     Node_create(new,divno,&(*i),node->left);
584     Node_create(new,divno,&(*i),node->right);
585   }
586   return;
587 }
588 
589 
590 #if 0
591 /* Stores in DFS order */
592 static void
593 Node_store (int *fnodei, struct FNode_T *fnodes, Node_T node) {
594   int leftindex, rightindex;
595 
596   if (node != NULL) {
597     if (node->left == NULL) {
598       leftindex = -1;
599     } else {
600       leftindex = node->left->index;
601     }
602 
603     if (node->right == NULL) {
604       rightindex = -1;
605     } else {
606       rightindex = node->right->index;
607     }
608 
609     fnodes[*fnodei].value = node->value;
610     fnodes[*fnodei].a = node->a;
611     fnodes[*fnodei].b = node->b;
612     fnodes[*fnodei].leftindex = leftindex;
613     fnodes[*fnodei].rightindex = rightindex;
614     (*fnodei)++;
615 
616     Node_store(&(*fnodei),fnodes,node->left);
617     Node_store(&(*fnodei),fnodes,node->right);
618   }
619   return;
620 }
621 #endif
622 
623 
624 static void
IIT_write_div_header(FILE * fp,int total_nintervals,int ntypes,int nfields,int ndivs,int * nintervals,int * nnodes,int * cum_nintervals,int * cum_nnodes,List_T divlist,Sorttype_T divsort,int version,bool label_pointers_8p,bool annot_pointers_8p)625 IIT_write_div_header (FILE *fp, int total_nintervals, int ntypes, int nfields, int ndivs,
626 		      int *nintervals, int *nnodes, int *cum_nintervals, int *cum_nnodes,
627 		      List_T divlist, Sorttype_T divsort, int version,
628 		      bool label_pointers_8p, bool annot_pointers_8p) {
629   int new_format_indicator = 0, label_pointer_size, annot_pointer_size;
630   int divno;
631   UINT4 pointer = 0U, padded;
632   List_T p;
633   char *divstring;
634 
635   assert(version >= 2);
636   FWRITE_INT(new_format_indicator,fp); /* Indicates new format, since nintervals > 0 */
637   FWRITE_INT(version,fp);
638 
639   if (version >= 5) {
640     if (label_pointers_8p == false) {
641       label_pointer_size = 4;
642     } else {
643       label_pointer_size = 8;
644     }
645     if (annot_pointers_8p == false) {
646       annot_pointer_size = 4;
647     } else {
648       annot_pointer_size = 8;
649     }
650     FWRITE_INT(label_pointer_size,fp);
651     FWRITE_INT(annot_pointer_size,fp);
652   }
653 
654   FWRITE_INT(total_nintervals,fp);
655   FWRITE_INT(ntypes,fp);
656   FWRITE_INT(nfields,fp);
657 
658   if (version >= 3) {
659     FWRITE_INT(ndivs,fp);
660     for (divno = 0; divno < ndivs; divno++) {
661       FWRITE_INT(nintervals[divno],fp);
662     }
663     for (divno = 0; divno <= ndivs; divno++) {
664       FWRITE_INT(cum_nintervals[divno],fp);
665     }
666   }
667   for (divno = 0; divno < ndivs; divno++) {
668     FWRITE_INT(nnodes[divno],fp);
669   }
670   if (version >= 3) {
671     for (divno = 0; divno <= ndivs; divno++) {
672       FWRITE_INT(cum_nnodes[divno],fp);
673     }
674   }
675 
676   if (version >= 3) {
677     FWRITE_INT(divsort,fp);
678   }
679 
680   if (version >= 3) {
681     /* Write div pointers */
682     pointer = 0U;
683     for (p = divlist; p != NULL; p = List_next(p)) {
684       FWRITE_UINT(pointer,fp);
685       divstring = (char *) List_head(p);
686       pointer += (UINT4) strlen(divstring)+1U;	/* Add count for '\0' */
687     }
688 #ifdef PAD_STRINGS
689     /* Pad last pointer to next multiple of 4 */
690     padded = (pointer + 3) & ~3;
691     debug1(printf("divstrings: converting pointer %d to padded %d\n",pointer,padded));
692     FWRITE_UINT(padded,fp);
693 #else
694     FWRITE_UINT(pointer,fp);
695 #endif
696 
697     /* Write divs */
698     for (p = divlist; p != NULL; p = List_next(p)) {
699       divstring = (char *) List_head(p);
700       FWRITE_CHARS(divstring,strlen(divstring)+1,fp); /* Write '\0' */
701     }
702 #ifdef PAD_STRINGS
703     /* Add padding */
704     while (pointer++ < padded) {
705       debug1(printf("divstrings: adding 0 as padding\n"));
706       fputc('\0',fp);
707     }
708 #endif
709   }
710 
711   return;
712 }
713 
714 
715 static void
IIT_create_div_header(T new,int total_nintervals,int ntypes,int nfields,int ndivs,int * nintervals,int * nnodes,int * cum_nintervals,int * cum_nnodes,List_T divlist,Sorttype_T divsort,int version)716 IIT_create_div_header (T new, int total_nintervals, int ntypes, int nfields, int ndivs,
717 		       int *nintervals, int *nnodes, int *cum_nintervals, int *cum_nnodes,
718 		       List_T divlist, Sorttype_T divsort, int version) {
719   int divno;
720   UINT4 pointer = 0U, padded;
721   List_T p;
722   char *divstring;
723   UINT4 stringlen, k;
724 
725   new->version = version;
726   new->total_nintervals = total_nintervals;
727   new->ntypes = ntypes;
728   new->nfields = nfields;
729 
730   new->ndivs = ndivs;
731   new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
732   for (divno = 0; divno < ndivs; divno++) {
733     new->nintervals[divno] = nintervals[divno];
734   }
735 
736   new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
737   for (divno = 0; divno <= ndivs; divno++) {
738     new->cum_nintervals[divno] = cum_nintervals[divno];
739   }
740 
741   new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
742   for (divno = 0; divno < ndivs; divno++) {
743     new->nnodes[divno] = nnodes[divno];
744   }
745 
746   new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
747   for (divno = 0; divno <= ndivs; divno++) {
748     new->cum_nnodes[divno] = cum_nnodes[divno];
749   }
750 
751   new->divsort = divsort;
752 
753   new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
754   /* Create div pointers */
755   k = 0;
756   pointer = 0U;
757   for (p = divlist; p != NULL; p = List_next(p)) {
758     new->divpointers[k++] = pointer;
759     divstring = (char *) List_head(p);
760     pointer += (UINT4) strlen(divstring)+1U;	/* Add count for '\0' */
761   }
762 #ifdef PAD_STRINGS
763   /* Pad last pointer to next multiple of 4 */
764   padded = (pointer + 3) & ~3;
765   debug1(printf("divstrings: converting pointer %d to padded %d\n",pointer,padded));
766   new->divpointers[k++] = padded;
767 #else
768   new->divpointers[k++] = pointer;
769 #endif
770 
771   stringlen = new->divpointers[new->ndivs];
772   new->divstrings = (char *) CALLOC(stringlen,sizeof(char));
773   /* Create divs */
774   k = 0;
775   for (p = divlist; p != NULL; p = List_next(p)) {
776     divstring = (char *) List_head(p);
777     strcpy(&(new->divstrings[k]),divstring);
778     k += strlen(divstring);
779     new->divstrings[k++] = '\0';
780   }
781 #ifdef PAD_STRINGS
782   /* Add padding */
783   while (k < padded) {
784     debug1(printf("divstrings: adding 0 as padding\n"));
785     new->divstrings[k++] = '\0';
786   }
787 #endif
788 
789   return;
790 }
791 
792 
793 static void
IIT_write_one_div(FILE * fp,Node_T root,int * alphas,int * betas,int * sigmas,int * omegas,int nintervals,int version)794 IIT_write_one_div (FILE *fp, Node_T root, int *alphas, int *betas, int *sigmas, int *omegas,
795 		   int nintervals, int version) {
796 #ifdef DEBUG
797   int i;
798 #endif
799 
800   assert(version >= 2);
801   FWRITE_INTS(alphas,nintervals + 1,fp);
802   FWRITE_INTS(betas,nintervals + 1,fp);
803 
804   debug(
805 	printf("alphas:");
806 	for (i = 0; i < nintervals + 1; i++) {
807 	  printf(" %d",alphas[i]);
808 	}
809 	printf("\n");
810 
811 	printf("betas:");
812 	for (i = 0; i < nintervals + 1; i++) {
813 	  printf(" %d",betas[i]);
814 	}
815 	printf("\n");
816 	);
817 
818   FWRITE_INTS(sigmas,nintervals + 1,fp);
819   FWRITE_INTS(omegas,nintervals + 1,fp);
820 
821   debug(
822 	printf("sigmas:");
823 	for (i = 0; i < nintervals + 1; i++) {
824 	  printf(" %d",sigmas[i]);
825 	}
826 	printf("\n");
827 
828 	printf("omegas:");
829 	for (i = 0; i < nintervals + 1; i++) {
830 	  printf(" %d",omegas[i]);
831 	}
832 	printf("\n");
833 	);
834 
835   debug(printf("Starting to write nodes\n"));
836   Node_fwrite(fp,root);
837   debug(printf("\n"));
838 
839   return;
840 }
841 
842 
843 static void
IIT_create_one_div(T new,int divno,Node_T root,int * alphas,int * betas,int * sigmas,int * omegas,int nintervals)844 IIT_create_one_div (T new, int divno, Node_T root, int *alphas, int *betas, int *sigmas, int *omegas,
845 		    int nintervals) {
846   int i;
847 
848   new->alphas[divno] = (int *) CALLOC(nintervals+1,sizeof(int));
849   for (i = 0; i <= nintervals; i++) {
850     new->alphas[divno][i] = alphas[i];
851   }
852 
853   new->betas[divno] = (int *) CALLOC(nintervals+1,sizeof(int));
854   for (i = 0; i <= nintervals; i++) {
855     new->betas[divno][i] = betas[i];
856   }
857 
858   new->sigmas[divno] = (int *) CALLOC(nintervals+1,sizeof(int));
859   for (i = 0; i <= nintervals; i++) {
860     new->sigmas[divno][i] = sigmas[i];
861   }
862 
863   new->omegas[divno] = (int *) CALLOC(nintervals+1,sizeof(int));
864   for (i = 0; i <= nintervals; i++) {
865     new->omegas[divno][i] = omegas[i];
866   }
867 
868   if (new->nnodes[divno] == 0) {
869     new->nodes[divno] = (struct FNode_T *) NULL;
870   } else {
871     new->nodes[divno] = (struct FNode_T *) CALLOC(new->nnodes[divno],sizeof(struct FNode_T));
872     i = 0;
873     Node_create(new,divno,&i,root);
874   }
875 
876   return;
877 }
878 
879 
880 static void
IIT_write_div_footer(FILE * fp,List_T divlist,List_T typelist,List_T fieldlist,Table_T intervaltable,Table_T valuetable,Table_T labeltable,Table_T annottable,int * cum_nintervals,int total_nintervals,int version,bool label_pointers_8p,bool annot_pointers_8p)881 IIT_write_div_footer (FILE *fp, List_T divlist, List_T typelist, List_T fieldlist,
882 		      Table_T intervaltable, Table_T valuetable, Table_T labeltable, Table_T annottable,
883 		      int *cum_nintervals, int total_nintervals, int version,
884 		      bool label_pointers_8p, bool annot_pointers_8p) {
885   List_T intervallist, labellist, annotlist, d, p;
886   Doublelist_T valuelist, v;
887   Interval_T interval;
888 #ifdef HAVE_64_BIT
889   UINT8 pointer8 = 0LU, padded8;
890 #endif
891   UINT4 pointer = 0U, padded;
892   int *labelorder, *valueorder;
893   double value;
894   char *divstring, *typestring, *fieldstring, *label, *annot;
895 
896 #ifndef HAVE_64_BIT
897   if (label_pointers_8p == true || annot_pointers_8p == true) {
898     fprintf(stderr,"Machine is not 64-bit, so cannot write 8-byte pointers.\n");
899     exit(9);
900   }
901 #endif
902 
903   assert(version >= 2);
904   for (d = divlist; d != NULL; d = List_next(d)) {
905     divstring = (char *) List_head(d);
906     intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
907     for (p = intervallist; p != NULL; p = List_next(p)) {
908       interval = (Interval_T) List_head(p);
909       FWRITE_UINT(interval->low,fp);
910       FWRITE_UINT(interval->high,fp);
911       FWRITE_INT(interval->sign,fp);
912       FWRITE_INT(interval->type,fp);
913     }
914   }
915 
916   /* Write type pointers */
917   pointer = 0U;
918   for (p = typelist; p != NULL; p = List_next(p)) {
919     FWRITE_UINT(pointer,fp);
920     typestring = (char *) List_head(p);
921     if (typestring == NULL) {
922       abort();			/* Even type 0 has an empty string */
923     } else {
924       pointer += (UINT4) strlen(typestring)+1U;	/* Add count for '\0' */
925     }
926   }
927 #ifdef PAD_STRINGS
928   /* Pad last pointer to next multiple of 4 */
929   padded = (pointer + 3) & ~3;
930   debug1(printf("typestrings: converting pointer %d to padded %d\n",pointer,padded));
931   FWRITE_UINT(padded,fp);
932 #else
933   FWRITE_UINT(pointer,fp);
934 #endif
935 
936 
937   /* Write types */
938   for (p = typelist; p != NULL; p = List_next(p)) {
939     typestring = (char *) List_head(p);
940     FWRITE_CHARS(typestring,strlen(typestring)+1,fp); /* Write '\0' */
941   }
942 #ifdef PAD_STRINGS
943   /* Add padding */
944   while (pointer++ < padded) {
945     debug1(printf("typestrings: adding 0 as padding\n"));
946     fputc('\0',fp);
947   }
948 #endif
949 
950 
951   /* Write field pointers */
952   pointer = 0U;
953   for (p = fieldlist; p != NULL; p = List_next(p)) {
954     FWRITE_UINT(pointer,fp);
955     fieldstring = (char *) List_head(p);
956     pointer += (UINT4) strlen(fieldstring)+1U;	/* Add count for '\0' */
957   }
958 #ifdef PAD_STRINGS
959   /* Pad last pointer to next multiple of 4 */
960   padded = (pointer + 3) & ~3;
961   debug1(printf("fieldstrings: converting pointer %d to padded %d\n",pointer,padded));
962   FWRITE_UINT(padded,fp);
963 #else
964   FWRITE_UINT(pointer,fp);
965 #endif
966 
967 
968   /* Write fields */
969   for (p = fieldlist; p != NULL; p = List_next(p)) {
970     fieldstring = (char *) List_head(p);
971     FWRITE_CHARS(fieldstring,strlen(fieldstring)+1,fp); /* Write '\0' */
972   }
973 #ifdef PAD_STRINGS
974   /* Add padding */
975   while (pointer++ < padded) {
976     debug1(printf("fieldstrings: adding 0 as padding\n"));
977     fputc('\0',fp);
978   }
979 #endif
980 
981 
982   /* Write valueorder (if values present) */
983   if (valuetable != NULL) {
984     valueorder = get_valueorder(divlist,valuetable,cum_nintervals,total_nintervals);
985     FWRITE_INTS(valueorder,total_nintervals,fp);
986     FREE(valueorder);
987 
988     /* Write values */
989     for (d = divlist; d != NULL; d = List_next(d)) {
990       divstring = (char *) List_head(d);
991       valuelist = (Doublelist_T) Table_get(valuetable,(void *) divstring);
992       for (v = valuelist; v != NULL; v = Doublelist_next(v)) {
993 	value = Doublelist_head(v);
994 	FWRITE_DOUBLE(value,fp);
995       }
996     }
997   }
998 
999   /* Write labelorder */
1000   labelorder = get_labelorder(divlist,labeltable,cum_nintervals,total_nintervals);
1001   FWRITE_INTS(labelorder,total_nintervals,fp);
1002   FREE(labelorder);
1003 
1004   /* Write label pointers */
1005 #ifdef HAVE_64_BIT
1006   if (label_pointers_8p == true) {
1007     pointer8 = 0LU;
1008   } else {
1009     pointer = 0U;
1010   }
1011 #else
1012   pointer = 0U;
1013 #endif
1014 
1015   for (d = divlist; d != NULL; d = List_next(d)) {
1016     divstring = (char *) List_head(d);
1017     labellist = (List_T) Table_get(labeltable,(void *) divstring);
1018     for (p = labellist; p != NULL; p = List_next(p)) {
1019 #ifdef HAVE_64_BIT
1020       if (label_pointers_8p == true) {
1021 	FWRITE_UINT8(pointer8,fp);
1022       } else {
1023 	FWRITE_UINT(pointer,fp);
1024       }
1025 #else
1026       FWRITE_UINT(pointer,fp);
1027 #endif
1028 
1029       label = (char *) List_head(p);
1030 #ifdef HAVE_64_BIT
1031       if (label_pointers_8p == true) {
1032 	pointer8 += (UINT8) strlen(label)+1LU;	/* Add count for '\0' */
1033       } else {
1034 	pointer += (UINT4) strlen(label)+1U;	/* Add count for '\0' */
1035       }
1036 #else
1037       pointer += (UINT4) strlen(label)+1U;	/* Add count for '\0' */
1038 #endif
1039     }
1040   }
1041 
1042 #ifdef PAD_STRINGS
1043   /* Pad last pointer to next multiple of 4 */
1044 #ifdef HAVE_64_BIT
1045   if (label_pointers_8p == true) {
1046     padded8 = (pointer8 + 3) & ~3;
1047     FWRITE_UINT8(padded8,fp);
1048   } else {
1049     padded = (pointer + 3) & ~3;
1050     debug1(printf("labels: converting pointer %d to padded %d\n",pointer,padded));
1051     FWRITE_UINT(padded,fp);
1052   }
1053 #else
1054   padded = (pointer + 3) & ~3;
1055   FWRITE_UINT(padded,fp);
1056 #endif
1057 #else
1058 #ifdef HAVE_64_BIT
1059   if (label_pointers_8p == true) {
1060     FWRITE_UINT8(pointer8,fp);
1061   } else {
1062     FWRITE_UINT(pointer,fp);
1063   }
1064 #else
1065   FWRITE_UINT(pointer,fp);
1066 #endif
1067 #endif
1068 
1069 
1070   /* Write labels */
1071   for (d = divlist; d != NULL; d = List_next(d)) {
1072     divstring = (char *) List_head(d);
1073     labellist = (List_T) Table_get(labeltable,(void *) divstring);
1074     for (p = labellist; p != NULL; p = List_next(p)) {
1075       label = (char *) List_head(p);
1076       FWRITE_CHARS(label,strlen(label)+1,fp); /* Write '\0' */
1077     }
1078   }
1079 #ifdef PAD_STRINGS
1080   /* Add padding */
1081 #ifdef HAVE_64_BIT
1082   if (label_pointers_8p == true) {
1083     while (pointer8++ < padded8) {
1084       fputc('\0',fp);
1085     }
1086   } else {
1087     while (pointer++ < padded) {
1088       debug1(printf("labels: adding 0 as padding\n"));
1089       fputc('\0',fp);
1090     }
1091   }
1092 #else
1093   while (pointer++ < padded) {
1094     fputc('\0',fp);
1095   }
1096 #endif
1097 #endif
1098 
1099 
1100 
1101   /* Write annot pointers */
1102 #ifdef HAVE_64_BIT
1103   if (annot_pointers_8p == true) {
1104     pointer8 = 0LU;
1105   } else {
1106     pointer = 0U;
1107   }
1108 #else
1109   pointer = 0U;
1110 #endif
1111 
1112   for (d = divlist; d != NULL; d = List_next(d)) {
1113     divstring = (char *) List_head(d);
1114     annotlist = (List_T) Table_get(annottable,(void *) divstring);
1115     for (p = annotlist; p != NULL; p = List_next(p)) {
1116 #ifdef HAVE_64_BIT
1117       if (annot_pointers_8p == true) {
1118 	FWRITE_UINT8(pointer8,fp);
1119       } else {
1120 	FWRITE_UINT(pointer,fp);
1121       }
1122 #else
1123       FWRITE_UINT(pointer,fp);
1124 #endif
1125 
1126       annot = (char *) List_head(p);
1127 #ifdef HAVE_64_BIT
1128       if (annot_pointers_8p == true) {
1129 	pointer8 += (UINT8) strlen(annot)+1LU;	/* Add count for '\0' */
1130       } else {
1131 	pointer += (UINT4) strlen(annot)+1;	/* Add count for '\0' */
1132       }
1133 #else
1134       pointer += (UINT4) strlen(annot)+1;	/* Add count for '\0' */
1135 #endif
1136     }
1137   }
1138 
1139 #ifdef PAD_STRINGS
1140   /* Pad last pointer to next multiple of 4 */
1141 #ifdef HAVE_64_BIT
1142   if (annot_pointers_8p == true) {
1143     padded8 = (pointer8 + 3) & ~3;
1144     FWRITE_UINT8(padded8,fp);
1145   } else {
1146     padded = (pointer + 3) & ~3;
1147     debug1(printf("annotations: converting pointer %d to padded %d\n",pointer,padded));
1148     FWRITE_UINT(padded,fp);
1149   }
1150 #else
1151   padded = (pointer + 3) & ~3;
1152   FWRITE_UINT(padded,fp);
1153 #endif
1154 #else
1155 #ifdef HAVE_64_BIT
1156   if (annot_pointers_8p == true) {
1157     FWRITE_UINT8(pointer8,fp);
1158   } else {
1159     FWRITE_UINT(pointer,fp);
1160   }
1161 #else
1162   FWRITE_UINT(pointer,fp);
1163 #endif
1164 #endif
1165 
1166 
1167   /* Write annotations */
1168   for (d = divlist; d != NULL; d = List_next(d)) {
1169     divstring = (char *) List_head(d);
1170     annotlist = (List_T) Table_get(annottable,(void *) divstring);
1171     for (p = annotlist; p != NULL; p = List_next(p)) {
1172       annot = (char *) List_head(p);
1173       FWRITE_CHARS(annot,strlen(annot)+1,fp); /* Write '\0' */
1174     }
1175   }
1176 #ifdef PAD_STRINGS
1177   /* Add padding */
1178 #ifdef HAVE_64_BIT
1179   if (annot_pointers_8p == true) {
1180     while (pointer8++ < padded8) {
1181       fputc('\0',fp);
1182     }
1183   } else {
1184     while (pointer++ < padded) {
1185       debug1(printf("annotations: adding 0 as padding\n"));
1186       fputc('\0',fp);
1187     }
1188   }
1189 #else
1190   while (pointer++ < padded) {
1191     fputc('\0',fp);
1192   }
1193 #endif
1194 #endif
1195 
1196 
1197   return;
1198 }
1199 
1200 
1201 static void
IIT_create_div_footer(T new,List_T divlist,List_T typelist,List_T fieldlist,Table_T intervaltable,Table_T labeltable,Table_T datatable,int * cum_nintervals,int total_nintervals)1202 IIT_create_div_footer (T new, List_T divlist, List_T typelist, List_T fieldlist,
1203 		       Table_T intervaltable, Table_T labeltable, Table_T datatable,
1204 		       int *cum_nintervals, int total_nintervals) {
1205   List_T intervallist, labellist, datalist, d, p;
1206   Interval_T interval;
1207   UINT4 pointer = 0U;
1208   char *divstring, *typestring, *fieldstring, *label;
1209   int stringlen;
1210   int divno, k;
1211 
1212   /* Create intervals */
1213   new->intervals = (struct Interval_T **) CALLOC(new->ndivs,sizeof(struct Interval_T *));
1214   new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
1215   for (divno = 1; divno < new->ndivs; divno++) {
1216     new->intervals[divno] = &(new->intervals[divno-1][new->nintervals[divno-1]]);
1217   }
1218 
1219   k = 0;
1220   for (d = divlist; d != NULL; d = List_next(d)) {
1221     divstring = (char *) List_head(d);
1222     intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
1223     for (p = intervallist; p != NULL; p = List_next(p)) {
1224       interval = (Interval_T) List_head(p);
1225       new->intervals[0][k].low = interval->low;
1226       new->intervals[0][k].high = interval->high;
1227       new->intervals[0][k].sign = interval->sign;
1228       new->intervals[0][k].type = interval->type;
1229       k++;
1230     }
1231   }
1232 
1233 
1234   /* Create type pointers */
1235   new->typepointers = (UINT4 *) CALLOC(new->ntypes+1,sizeof(UINT4));
1236   k = 0;
1237   new->typepointers[k++] = pointer = 0U;
1238   for (p = typelist; p != NULL; p = List_next(p)) {
1239     typestring = (char *) List_head(p);
1240     if (typestring == NULL) {
1241       abort();			/* Even type 0 has an empty string */
1242     } else {
1243       pointer += (UINT4) strlen(typestring)+1U;	/* Add count for '\0' */
1244       new->typepointers[k++] = pointer;
1245     }
1246   }
1247 
1248   /* Create types */
1249   if ((stringlen = new->typepointers[new->ntypes]) > 0) {
1250     new->typestrings = (char *) CALLOC(stringlen,sizeof(char));
1251     k = 0;
1252     for (p = typelist; p != NULL; p = List_next(p)) {
1253       typestring = (char *) List_head(p);
1254       strcpy(&(new->typestrings[k]),typestring);
1255       k += strlen(typestring);
1256       new->typestrings[k++] = '\0';
1257     }
1258   }
1259 
1260 
1261   /* Create field pointers */
1262   new->fieldpointers = (UINT4 *) CALLOC(new->nfields+1,sizeof(UINT4));
1263   k = 0;
1264   new->fieldpointers[k++] = pointer = 0U;
1265   for (p = fieldlist; p != NULL; p = List_next(p)) {
1266     fieldstring = (char *) List_head(p);
1267     if (fieldstring == NULL) {
1268       abort();			/* Even type 0 has an empty string */
1269     } else {
1270       pointer += (UINT4) strlen(fieldstring)+1U;	/* Add count for '\0' */
1271       new->fieldpointers[k++] = pointer;
1272     }
1273   }
1274 
1275   /* Create fields */
1276   if ((stringlen = new->fieldpointers[new->nfields]) > 0) {
1277     new->fieldstrings = (char *) CALLOC(stringlen,sizeof(char));
1278     k = 0;
1279     for (p = fieldlist; p != NULL; p = List_next(p)) {
1280       fieldstring = (char *) List_head(p);
1281       strcpy(&(new->fieldstrings[k]),fieldstring);
1282       k += strlen(fieldstring);
1283       new->fieldstrings[k++] = '\0';
1284     }
1285   }
1286 
1287 
1288   /* Create labelorder */
1289   new->labelorder = get_labelorder(divlist,labeltable,cum_nintervals,total_nintervals);
1290 
1291   /* Create label pointers */
1292   new->labelpointers = (UINT4 *) CALLOC(new->total_nintervals+1,sizeof(UINT4));
1293   k = 0;
1294   new->labelpointers[k++] = pointer = 0U;
1295   for (d = divlist; d != NULL; d = List_next(d)) {
1296     divstring = (char *) List_head(d);
1297     labellist = (List_T) Table_get(labeltable,(void *) divstring);
1298     for (p = labellist; p != NULL; p = List_next(p)) {
1299       label = (char *) List_head(p);
1300       pointer += (UINT4) strlen(label)+1U;	/* Add count for '\0' */
1301       new->labelpointers[k++] = pointer;
1302     }
1303   }
1304 
1305   /* Create labels */
1306   stringlen = new->labelpointers[new->total_nintervals];
1307   new->labels = (char *) CALLOC(stringlen,sizeof(char));
1308   k = 0;
1309   for (d = divlist; d != NULL; d = List_next(d)) {
1310     divstring = (char *) List_head(d);
1311     labellist = (List_T) Table_get(labeltable,(void *) divstring);
1312     for (p = labellist; p != NULL; p = List_next(p)) {
1313       label = (char *) List_head(p);
1314       strcpy(&(new->labels[k]),label);
1315       k += strlen(label);
1316       new->labels[k++] = '\0';
1317     }
1318   }
1319 
1320 
1321   /* Create data pointers */
1322   new->datapointers = (void **) CALLOC(new->total_nintervals,sizeof(void *));
1323   k = 0;
1324   for (d = divlist; d != NULL; d = List_next(d)) {
1325     divstring = (char *) List_head(d);
1326     datalist = (List_T) Table_get(datatable,(void *) divstring);
1327     for (p = datalist; p != NULL; p = List_next(p)) {
1328       new->datapointers[k++] = List_head(p);
1329     }
1330   }
1331 
1332   return;
1333 }
1334 
1335 
1336 #if 0
1337 static void
1338 compute_flanking (T this) {
1339   int i;
1340 
1341   this->alphas = (int *) CALLOC(this->nintervals+1,sizeof(int));
1342   this->betas = (int *) CALLOC(this->nintervals+1,sizeof(int));
1343   for (i = 1; i <= this->nintervals; i++) {
1344     this->alphas[i] = this->betas[i] = i;
1345   }
1346   Interval_qsort_by_sigma(this->alphas,1,this->nintervals,this->intervals);
1347   Interval_qsort_by_omega(this->betas,1,this->nintervals,this->intervals);
1348   return;
1349 }
1350 
1351 
1352 /* Used only by iit_update.c */
1353 void
1354 IIT_output_direct (char *iitfile, T this, int version) {
1355   FILE *fp;
1356   off_t stringlen;
1357   int new_format_indicator = 0, i;
1358   FNode_T node;
1359 
1360   if ((fp = FOPEN_WRITE_BINARY(iitfile)) == NULL) {
1361     fprintf(stderr,"Error: can't open file %s\n",iitfile);
1362     exit(9);
1363   }
1364 
1365   assert(version >= 2);
1366   FWRITE_INT(new_format_indicator,fp); /* Indicates new format, since nintervals > 0 */
1367   FWRITE_INT(version,fp);
1368 
1369   FWRITE_INT(this->nintervals,fp);
1370   FWRITE_INT(this->ntypes,fp);
1371   FWRITE_INT(this->nfields,fp);
1372   FWRITE_INT(this->nnodes,fp);
1373 
1374   if (this->alphas == NULL) {
1375     compute_flanking(this);
1376   }
1377   FWRITE_INTS(this->alphas,this->nintervals + 1,fp);
1378   FWRITE_INTS(this->betas,this->nintervals + 1,fp);
1379 
1380   FWRITE_INTS(this->sigmas,this->nintervals + 1,fp);
1381   FWRITE_INTS(this->omegas,this->nintervals + 1,fp);
1382 
1383   /* Write nodes directly */
1384   for (i = 0; i < this->nnodes; i++) {
1385     node = &(this->nodes[i]);
1386     FWRITE_UINT(node->value,fp);
1387     FWRITE_INT(node->a,fp);
1388     FWRITE_INT(node->b,fp);
1389     FWRITE_INT(node->leftindex,fp);
1390     FWRITE_INT(node->rightindex,fp);
1391   }
1392 
1393   for (i = 0; i < this->nintervals; i++) {
1394     FWRITE_UINT(this->intervals[i].low,fp);
1395     FWRITE_UINT(this->intervals[i].high,fp);
1396     FWRITE_INT(this->intervals[i].sign,fp);
1397     FWRITE_INT(this->intervals[i].type,fp);
1398   }
1399 
1400   /* Write types directly */
1401   for (i = 0; i < this->ntypes+1; i++) {
1402     FWRITE_UINT(this->typepointers[i],fp);
1403   }
1404   if ((stringlen = this->typepointers[this->ntypes]) == 0) {
1405     fprintf(stderr,"Error in writing types: type stringlen is 0.\n");
1406     exit(9);
1407   } else {
1408     FWRITE_CHARS(this->typestrings,stringlen,fp);
1409   }
1410 
1411   /* Write fields directly */
1412   for (i = 0; i < this->nfields+1; i++) {
1413     FWRITE_UINT(this->fieldpointers[i],fp);
1414   }
1415   stringlen = this->fieldpointers[this->nfields];
1416   if (stringlen > 0) {
1417     FWRITE_CHARS(this->fieldstrings,stringlen,fp);
1418   }
1419 
1420   /* Write labelorder */
1421   FWRITE_INTS(this->labelorder,this->nintervals,fp);
1422 
1423   /* Write labels directly */
1424 #ifdef HAVE_64_BIT
1425   if (this->label_pointers_8p == true) {
1426     FWRITE_UINT8S(this->labelpointers,this->nintervals+1,fp);
1427   } else {
1428     FWRITE_UINTS(this->labelpointers,this->nintervals+1,fp);
1429   }
1430 #else
1431   FWRITE_UINTS(this->labelpointers,this->nintervals+1,fp);
1432 #endif
1433   if ((stringlen = this->labelpointers[this->nintervals]) == 0) {
1434     fprintf(stderr,"Error in writing labels: label stringlen is 0.\n");
1435     exit(9);
1436   } else {
1437     FWRITE_CHARS(this->labels,stringlen,fp);
1438   }
1439 
1440   /* Write annotations directly */
1441 #ifdef HAVE_64_BIT
1442   if (this->annot_pointers_8p == true) {
1443     FWRITE_UINT8S(this->annotpointers,this->nintervals+1,fp);
1444   } else {
1445     FWRITE_UINTS(this->annotpointers,this->nintervals+1,fp);
1446   }
1447 #else
1448   FWRITE_UINTS(this->annotpointers,this->nintervals+1,fp);
1449 #endif
1450   if ((stringlen = this->annotpointers[this->nintervals]) == 0) {
1451     fprintf(stderr,"Error in writing annotations: annotation stringlen is 0.\n");
1452     exit(9);
1453   } else {
1454     FWRITE_CHARS(this->annotations,stringlen,fp);
1455   }
1456 
1457   fclose(fp);
1458 
1459   return;
1460 }
1461 #endif
1462 
1463 
1464 /* If annotlist is NULL, X's are written */
1465 void
IIT_write(char * iitfile,List_T divlist,List_T typelist,List_T fieldlist,Table_T intervaltable,Table_T valuetable,Table_T labeltable,Table_T annottable,Sorttype_T divsort,int version,bool label_pointers_8p,bool annot_pointers_8p)1466 IIT_write (char *iitfile, List_T divlist, List_T typelist, List_T fieldlist,
1467 	   Table_T intervaltable, Table_T valuetable, Table_T labeltable, Table_T annottable,
1468 	   Sorttype_T divsort, int version, bool label_pointers_8p, bool annot_pointers_8p) {
1469   Node_T root;
1470   FILE *fp;
1471   List_T intervallist, d;
1472   char *divstring;
1473   int ndivs, total_nintervals, *nintervals, *nnodes, nnodes_one_div, divno;
1474   int *cum_nintervals, *cum_nnodes;
1475   struct Interval_T *intervals;
1476   int *alphas, *betas, *sigmas, *omegas;
1477 
1478   if ((fp = FOPEN_WRITE_BINARY(iitfile)) == NULL) {
1479     fprintf(stderr,"Error: can't open file %s\n",iitfile);
1480     exit(9);
1481   } else {
1482     ndivs = List_length(divlist);
1483     nintervals = (int *) CALLOC(ndivs,sizeof(int));
1484     nnodes = (int *) CALLOC(ndivs,sizeof(int));
1485     for (d = divlist, divno = 0; d != NULL; d = List_next(d), divno++) {
1486       divstring = (char *) List_head(d);
1487       intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
1488       nintervals[divno] = List_length(intervallist);
1489       nnodes[divno] = IIT_count_nnodes(intervallist,/*presortedp*/false);
1490     }
1491 
1492     cum_nintervals = (int *) CALLOC(ndivs+1,sizeof(int));
1493     cum_nintervals[0] = 0;
1494     for (divno = 1; divno <= ndivs; divno++) {
1495       cum_nintervals[divno] = cum_nintervals[divno-1] + nintervals[divno-1];
1496     }
1497     total_nintervals = cum_nintervals[ndivs];
1498     if (total_nintervals == 0) {
1499       fprintf(stderr,"Warning: No intervals were seen in input file\n");
1500       FREE(cum_nintervals);
1501       FREE(nnodes);
1502       FREE(nintervals);
1503       fclose(fp);
1504       return;
1505     }
1506 
1507     cum_nnodes = (int *) CALLOC(ndivs+1,sizeof(int));
1508     cum_nnodes[0] = 0;
1509     for (divno = 1; divno <= ndivs; divno++) {
1510       cum_nnodes[divno] = cum_nnodes[divno-1] + nnodes[divno-1];
1511     }
1512 
1513     fprintf(stderr,"Writing IIT file header information...");
1514     IIT_write_div_header(fp,total_nintervals,List_length(typelist),List_length(fieldlist),
1515 			 ndivs,nintervals,nnodes,cum_nintervals,cum_nnodes,divlist,divsort,version,
1516 			 label_pointers_8p,annot_pointers_8p);
1517     fprintf(stderr,"done\n");
1518 
1519     for (d = divlist, divno = 0; d != NULL; d = List_next(d), divno++) {
1520       divstring = (char *) List_head(d);
1521       intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
1522 
1523       if (divstring[0] == '\0') {
1524 	fprintf(stderr,"Processing null division/chromosome...sorting...");
1525       } else {
1526 	fprintf(stderr,"Processing division/chromosome %s...sorting...",divstring);
1527       }
1528       IIT_build_one_div(&root,&intervals,&alphas,&betas,&sigmas,&omegas,&nnodes_one_div,intervallist,nintervals[divno],
1529 			/*presortedp*/false);
1530 
1531       fprintf(stderr,"writing...");
1532       IIT_write_one_div(fp,root,alphas,betas,sigmas,omegas,nintervals[divno],version);
1533       fprintf(stderr,"done (%d intervals)\n",nintervals[divno]);
1534 
1535       Node_gc(&root);
1536       FREE(omegas);
1537       FREE(sigmas);
1538       FREE(betas);
1539       FREE(alphas);
1540       FREE(intervals);
1541     }
1542 
1543     fprintf(stderr,"Writing IIT file footer information...");
1544     IIT_write_div_footer(fp,divlist,typelist,fieldlist,intervaltable,
1545 			 valuetable,labeltable,annottable,cum_nintervals,total_nintervals,version,
1546 			 label_pointers_8p,annot_pointers_8p);
1547     fprintf(stderr,"done\n");
1548     FREE(cum_nnodes);
1549     FREE(nnodes);
1550     FREE(cum_nintervals);
1551     FREE(nintervals);
1552 
1553     fclose(fp);
1554     return;
1555   }
1556 }
1557 
1558 
1559 /* If annotlist is NULL, X's are written */
1560 T
IIT_create(List_T divlist,List_T typelist,List_T fieldlist,Table_T intervaltable,Table_T labeltable,Table_T datatable,Sorttype_T divsort,int version,bool presortedp)1561 IIT_create (List_T divlist, List_T typelist, List_T fieldlist, Table_T intervaltable,
1562 	    Table_T labeltable, Table_T datatable, Sorttype_T divsort, int version,
1563 	    bool presortedp) {
1564   T new;
1565   Node_T root;
1566   List_T intervallist, d;
1567   char *divstring;
1568   int ndivs, total_nintervals, *nintervals, *nnodes, nnodes_one_div, divno;
1569   int *cum_nintervals, *cum_nnodes;
1570   struct Interval_T *intervals;
1571   int *alphas, *betas, *sigmas, *omegas;
1572 
1573   ndivs = List_length(divlist);
1574   nintervals = (int *) CALLOC(ndivs,sizeof(int));
1575   nnodes = (int *) CALLOC(ndivs,sizeof(int));
1576   for (d = divlist, divno = 0; d != NULL; d = List_next(d), divno++) {
1577     divstring = (char *) List_head(d);
1578     intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
1579     nintervals[divno] = List_length(intervallist);
1580     nnodes[divno] = IIT_count_nnodes(intervallist,presortedp);
1581   }
1582 
1583   cum_nintervals = (int *) CALLOC(ndivs+1,sizeof(int));
1584   cum_nintervals[0] = 0;
1585   for (divno = 1; divno <= ndivs; divno++) {
1586     cum_nintervals[divno] = cum_nintervals[divno-1] + nintervals[divno-1];
1587   }
1588   total_nintervals = cum_nintervals[ndivs];
1589   if (total_nintervals == 0) {
1590     fprintf(stderr,"Warning: No intervals were given to IIT_create\n");
1591     FREE(cum_nintervals);
1592     FREE(nnodes);
1593     FREE(nintervals);
1594     return (T) NULL;
1595   } else {
1596     new = (T) MALLOC(sizeof(*new));
1597   }
1598 
1599   cum_nnodes = (int *) CALLOC(ndivs+1,sizeof(int));
1600   cum_nnodes[0] = 0;
1601   for (divno = 1; divno <= ndivs; divno++) {
1602     cum_nnodes[divno] = cum_nnodes[divno-1] + nnodes[divno-1];
1603   }
1604 
1605   IIT_create_div_header(new,total_nintervals,List_length(typelist),List_length(fieldlist),
1606 			ndivs,nintervals,nnodes,cum_nintervals,cum_nnodes,divlist,divsort,version);
1607 
1608   new->alphas = (int **) CALLOC(new->ndivs,sizeof(int *));
1609   new->betas = (int **) CALLOC(new->ndivs,sizeof(int *));
1610   new->sigmas = (int **) CALLOC(new->ndivs,sizeof(int *));
1611   new->omegas = (int **) CALLOC(new->ndivs,sizeof(int *));
1612   new->nodes = (struct FNode_T **) CALLOC(new->ndivs,sizeof(struct FNode_T *));
1613 
1614   for (d = divlist, divno = 0; d != NULL; d = List_next(d), divno++) {
1615     divstring = (char *) List_head(d);
1616     intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
1617     IIT_build_one_div(&root,&intervals,&alphas,&betas,&sigmas,&omegas,&nnodes_one_div,intervallist,nintervals[divno],
1618 		      presortedp);
1619     IIT_create_one_div(new,divno,root,alphas,betas,sigmas,omegas,nintervals[divno]);
1620 
1621     Node_gc(&root);
1622     FREE(omegas);
1623     FREE(sigmas);
1624     FREE(betas);
1625     FREE(alphas);
1626     FREE(intervals);
1627   }
1628 
1629   IIT_create_div_footer(new,divlist,typelist,fieldlist,intervaltable,
1630 			labeltable,datatable,cum_nintervals,total_nintervals);
1631   FREE(cum_nintervals);
1632 
1633   return new;
1634 }
1635 
1636 
1637 #if 0
1638 /* Problematic because this file no longer defines T */
1639 void
1640 IIT_backfill_sequence (T this, int index, int offset, char *Buffer) {
1641   int recno;
1642   char *ptr;
1643 
1644   recno = index - 1;
1645   ptr = &(this->annotations[this->annotpointers[recno] + offset]);
1646   if (strncpy(ptr,Buffer,strlen(Buffer)) == NULL) {
1647     fprintf(stderr,"Unable to write %s into iit file\n",Buffer);
1648     exit(9);
1649   }
1650   return;
1651 }
1652 #endif
1653 
1654