1 static char rcsid[] = "$Id: iit-write.c 213998 2018-03-03 04:59:34Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #ifndef HAVE_MEMCPY
7 # define memcpy(d,s,n) bcopy((s),(d),(n))
8 #endif
9 #ifndef HAVE_MEMMOVE
10 # define memmove(d,s,n) bcopy((s),(d),(n))
11 #endif
12
13 #include "iit-write.h"
14
15 #ifdef WORDS_BIGENDIAN
16 #include "bigendian.h"
17 #else
18 #include "littleendian.h"
19 #endif
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h> /* For strlen and strcmp */
24 #include "assert.h"
25 #include "mem.h"
26 #include "fopen.h"
27 #include "interval.h"
28 #include "types.h" /* For HAVE_64_BIT and UINT8 */
29 #include "doublelist.h"
30
31
32 /* Makes IIT files more efficient, because the integer arrays will be aligned */
33 #define PAD_STRINGS 1
34
35 #ifdef DEBUG
36 #define debug(x) x
37 #else
38 #define debug(x)
39 #endif
40
41 /* Padding */
42 #ifdef DEBUG1
43 #define debug1(x) x
44 #else
45 #define debug1(x)
46 #endif
47
48
49 typedef struct Node_T *Node_T;
50 struct Node_T {
51 int index;
52 Chrpos_T value;
53 int a;
54 int b;
55 Node_T left;
56 Node_T right;
57 };
58
59 static Node_T
Node_new()60 Node_new () {
61 Node_T new = (Node_T) MALLOC(sizeof(*new));
62
63 new->index = -1;
64 new->left = new->right = NULL;
65 return new;
66 }
67
68 static void
Node_gc(Node_T * old)69 Node_gc (Node_T *old) {
70 if (*old) {
71 Node_gc(&((*old)->left));
72 Node_gc(&((*old)->right));
73 FREE(*old);
74 }
75 return;
76 }
77
78
79 #define T IIT_T
80
81
82 /************************************************************************/
83
84 static int
is_right_split(int i,int j,int r,Chrpos_T value,int * sigmas,struct Interval_T * intervals)85 is_right_split (int i, int j, int r, Chrpos_T value, int *sigmas,
86 struct Interval_T *intervals) {
87 int iota, lambda;
88
89 iota = 0;
90 for (lambda = i; lambda <= j; lambda++) {
91 if (Interval_is_contained(value,intervals,sigmas[lambda]) == true) {
92 iota = lambda;
93 }
94 }
95 return (iota == r);
96 }
97
98 static bool
is_sorted(int array[],int i,int j,Chrpos_T (* endpoint)(),struct Interval_T * intervals)99 is_sorted (int array[], int i, int j, Chrpos_T (*endpoint)(), struct Interval_T *intervals) {
100 int lambda;
101
102 for (lambda = i; lambda <= j - 1; lambda++) {
103 if (endpoint(intervals,array[lambda]) > endpoint(intervals,array[lambda + 1])) {
104 return false;
105 }
106 }
107 return true;
108 }
109
110 static bool
is_empty(int array[],int i,int j)111 is_empty (int array[], int i, int j) {
112 int lambda;
113
114 for (lambda = i; lambda <= j; lambda++) {
115 if (array[lambda] != 0) {
116 return false;
117 }
118 }
119 return true;
120 }
121
122 static bool
is_valid_input(int i,int j,int * sigmas,int * omegas,struct Interval_T * intervals)123 is_valid_input (int i, int j, int *sigmas, int *omegas, struct Interval_T *intervals) {
124 int lambda, iota;
125
126 assert(is_sorted (sigmas, i, j, Interval_array_low, intervals));
127 assert(is_empty (omegas, i, j));
128 for (lambda = i; lambda <= j; lambda++) {
129 iota = sigmas[lambda];
130 assert(Interval_is_contained(Interval_array_low(intervals,iota),intervals,iota)
131 || Interval_is_contained(Interval_array_high(intervals,iota),intervals,iota));
132
133 }
134 return true;
135 }
136
137 /************************************************************************/
138
139 static bool
Node_is_valid_output(Node_T node,int i,int j,int * sigmas,int * omegas,struct Interval_T * intervals)140 Node_is_valid_output (Node_T node, int i, int j, int *sigmas, int *omegas,
141 struct Interval_T *intervals) {
142 int lambda;
143
144 assert((i <= node->a) && (node->b <= j));
145 assert(is_sorted (sigmas, node->a, node->b, Interval_array_low, intervals));
146 assert(is_sorted (omegas, node->a, node->b, Interval_array_high, intervals));
147 assert(is_right_split (i, j, node->b, node->value, sigmas, intervals));
148
149 for (lambda = node->a; lambda <= node->b; lambda++) {
150 assert(Interval_is_contained(node->value,intervals,sigmas[lambda])
151 && Interval_is_contained(node->value,intervals,omegas[lambda]));
152 }
153
154 for (lambda = i; lambda <= node->a - 1; lambda++) {
155 assert(Interval_is_contained(node->value,intervals,sigmas[lambda]) == false);
156 }
157 for (lambda = node->b + 1; lambda <= j; lambda++) {
158 assert(Interval_is_contained(node->value,intervals,sigmas[lambda]) == false);
159 }
160
161 return true;
162 }
163
164
165
166 /* Refinement of node_make().
167 Select proper value and split off "right of" triangles. */
168 static void
node_select(int * index,Chrpos_T * value,int i,int j,int * sigmas,int * omegas,struct Interval_T * intervals)169 node_select (int *index, Chrpos_T *value, int i, int j,
170 int *sigmas, int *omegas, struct Interval_T *intervals) {
171 int r = j - (j - i) / 3;
172 Chrpos_T k = Interval_array_low(intervals,sigmas[r]);
173
174 while ((r < j) && (Interval_array_low(intervals,sigmas[r + 1]) == k)) {
175 r ++;
176 }
177
178 if (Interval_is_contained(k,intervals,sigmas[r]) == false) {
179 /* adjust r to the left for "open" intervals */
180 while ((r > i) && (Interval_is_contained(k,intervals,sigmas[r-1]) == false)) {
181 r --;
182 printf(" basic_iit: (-)\n");
183 }
184 if (Interval_is_contained(k,intervals,sigmas[r]) == false) {
185 r --;
186 printf(" basic_iit: [-]\n");
187 assert(r == i - 1);
188 printf(" basic_iit WARNING: empty NODE!?!\n");
189 }
190 }
191 assert(is_right_split (i, j, r, k, sigmas, intervals));
192 *index = r;
193 *value = k;
194 return;
195 }
196
197
198 /* Makes node out of sigmas[i..j], and recurses. */
199 static Node_T
Node_make(int * nnodes,int i,int j,int * sigmas,int * omegas,struct Interval_T * intervals,bool presortedp)200 Node_make (int *nnodes, int i, int j, int *sigmas, int *omegas, struct Interval_T *intervals,
201 bool presortedp) {
202 Node_T node;
203 int lambda, iota;
204 int q, r;
205
206 assert(is_valid_input (i, j, sigmas, omegas, intervals));
207 if (i > j) {
208 return (Node_T) NULL;
209 } else {
210 node = Node_new();
211 *nnodes += 1;
212
213 /* select value & get "right of" intervals sigma[r+1..j] */
214 node_select(&r,&node->value,i,j,sigmas,omegas,intervals);
215
216 /* mark "contains" intervals in sigma[i..r] to omega[i<q+1..r] */
217 q = r;
218 for (lambda = r; lambda >= i; lambda--) {
219 if (Interval_is_contained(node->value,intervals,sigmas[lambda]) == true) {
220 omegas[q] = sigmas[lambda];
221 sigmas[lambda] = 0;
222 q --;
223 }
224 }
225
226 /* move remaining "left of" intervals from sigma[i..r] to sigma[i..q] */
227 iota = i;
228 for (lambda = i; lambda <= r; lambda++) {
229 if (sigmas[lambda] != 0) {
230 sigmas[iota] = sigmas[lambda];
231 iota ++;
232 }
233 }
234 assert(iota == q + 1);
235
236 /* copy omega[q+1..r] back to sigma[q+1..r] & sort omega[q+1..r] */
237 for (lambda = q+1; lambda <= r; lambda++) {
238 sigmas[lambda] = omegas[lambda];
239 }
240 Interval_qsort_by_omega(omegas,q+1,r,intervals,presortedp);
241 node->a = q + 1;
242 node->b = r;
243
244 #if 0
245 fprintf(stderr," NODE=%u [%d..%d], left: %d, cont: %d, right: %d\n",
246 node->value, i, j, q - i + 1, r - q, j - r);
247 #endif
248
249 assert(Node_is_valid_output (node, i, j, sigmas, omegas, intervals));
250
251 /* recurse */
252 node->left = Node_make(&(*nnodes),i,q,sigmas,omegas,intervals,presortedp);
253 node->right = Node_make(&(*nnodes),r+1,j,sigmas,omegas,intervals,presortedp);
254
255 return node;
256 }
257 }
258
259
260
261 static void
Node_index(Node_T node,int * index)262 Node_index (Node_T node, int *index) {
263 if (node != NULL) {
264 node->index = (*index)++;
265 Node_index(node->left,&(*index));
266 Node_index(node->right,&(*index));
267 }
268 return;
269 }
270
271
272 static int
IIT_count_nnodes(List_T intervallist,bool presortedp)273 IIT_count_nnodes (List_T intervallist, bool presortedp) {
274 int nnodes;
275 Node_T root;
276 int nintervals, i;
277 List_T p;
278 struct Interval_T *intervals;
279 int *sigmas;
280 int *omegas;
281
282 if ((nintervals = List_length(intervallist)) == 0) {
283 return 0;
284 } else {
285 intervals = (struct Interval_T *) CALLOC(nintervals,sizeof(struct Interval_T));
286 for (p = intervallist, i = 0; i < nintervals; i++, p = List_next(p)) {
287 memcpy(&(intervals[i]),List_head(p),sizeof(struct Interval_T));
288 }
289
290 /* IIT ordering of intervals */
291 nnodes = 0;
292 sigmas = (int *) CALLOC(nintervals+1,sizeof(int));
293 for (i = 1; i <= nintervals; i++) {
294 sigmas[i] = i;
295 }
296
297 /* Sort sigmas with respect to Interval_array_low */
298 Interval_qsort_by_sigma(sigmas,1,nintervals,intervals,presortedp);
299
300 omegas = (int *) CALLOC(nintervals+1,sizeof(int));
301
302 /* make first node, and recurse... */
303 root = Node_make(&nnodes,1,nintervals,sigmas,omegas,intervals,presortedp);
304
305 Node_gc(&root);
306 FREE(omegas);
307 FREE(sigmas);
308 FREE(intervals);
309
310 return nnodes;
311 }
312 }
313
314 static void
IIT_build_one_div(Node_T * root,struct Interval_T ** intervals,int ** alphas,int ** betas,int ** sigmas,int ** omegas,int * nnodes,List_T intervallist,int nintervals,bool presortedp)315 IIT_build_one_div (Node_T *root, struct Interval_T **intervals, int **alphas, int **betas, int **sigmas, int **omegas,
316 int *nnodes, List_T intervallist, int nintervals, bool presortedp) {
317 int index = 0; /* Must be initialized to 0 */
318 int i;
319 List_T p;
320
321 if (nintervals == 0) {
322 *intervals = (struct Interval_T *) NULL;
323 } else {
324 *intervals = (struct Interval_T *) CALLOC(nintervals,sizeof(struct Interval_T));
325 for (p = intervallist, i = 0; i < nintervals; i++, p = List_next(p)) {
326 memcpy(&((*intervals)[i]),List_head(p),sizeof(struct Interval_T));
327 }
328 }
329
330 /* Strict ordering of intervals */
331
332 *alphas = (int *) CALLOC(nintervals+1,sizeof(int));
333 *betas = (int *) CALLOC(nintervals+1,sizeof(int));
334 for (i = 1; i <= nintervals; i++) {
335 (*alphas)[i] = (*betas)[i] = i;
336 }
337 Interval_qsort_by_sigma(*alphas,1,nintervals,*intervals,presortedp);
338 Interval_qsort_by_omega(*betas,1,nintervals,*intervals,presortedp);
339
340
341 /* IIT ordering of intervals */
342 *nnodes = 0;
343 *sigmas = (int *) CALLOC(nintervals+1,sizeof(int));
344 for (i = 1; i <= nintervals; i++) {
345 (*sigmas)[i] = i;
346 }
347
348 /* Sort sigmas with respect to Interval_array_low */
349 Interval_qsort_by_sigma(*sigmas,1,nintervals,*intervals,presortedp);
350
351 *omegas = (int *) CALLOC(nintervals+1,sizeof(int));
352
353 /* make first node, and recurse... */
354 *root = Node_make(&(*nnodes),1,nintervals,*sigmas,*omegas,*intervals,presortedp);
355 Node_index(*root,&index);
356
357 return;
358 }
359
360 /************************************************************************
361 * Output procedures
362 ************************************************************************/
363
364 /************************************************************************
365 * File format:
366 * 0: sizeof(int) [in version >= 2]
367 * version: sizeof(int) [in version >= 2]
368 * label_pointer_size: sizeof(int) [in version >= 5]
369 * annot_pointer_size: sizeof(int) [in version >= 5]
370 *
371 * total_nintervals: sizeof(int) [so maximum is 2 billion]
372 * If >= 0, then using 4-byte quantities (unsigned int) for interval coordinates
373 * If < 0, then using 8-byte quantities for interval coordinates (generally only used for v1)
374 * ntypes: sizeof(int)
375 * nfields: sizeof(int) [in version >= 2]
376 *
377 * ndivs: sizeof(int) [in version >= 3. In version < 3, set ndivs = 1.]
378 * nintervals: ndivs*sizeof(int) [in version >= 3]
379 * cum_nintervals: (ndivs+1)*sizeof(int) [in version >= 3]
380 * nnodes: ndivs*sizeof(int)
381 * cum_nnodes: (ndivs+1)*sizeof(int) [in version >= 3]
382 *
383 * divsort: sizeof(int) [in version >= 3: 0 = none, 1 = alpha, 2 = chrom]
384 * divpointers: (ndivs+1)*sizeof(UINT4) [in version >= 3]
385 * divs: ndivs*(variable length strings, including '\0') [in version >= 3]
386 *
387 * for each div (recall that in version < 3, ndivs = 1):
388 * alphas: (nintervals[div]+1)*sizeof(int) [in version >= 2]
389 * betas: (nintervals[div]+1)*sizeof(int) [in version >= 2]
390 * sigmas: (nintervals[div]+1)*sizeof(int)
391 * omegas: (nintervals[div]+1)*sizeof(int)
392 * nodes: nnodes[div]*sizeof(struct FNode_T)
393 *
394 * intervals: total_nintervals[div]*sizeof(struct Interval_T)
395 * [3 ints in version 1; 4 ints or longs in version >= 2, depending on whether total_nintervals > 0]
396 *
397 * typepointers: (ntypes+1)*sizeof(UINT4)
398 * types: ntypes*(variable length strings, including '\0')
399 *
400 * fieldpointers: (nfields+1)*sizeof(UINT4) [in version >= 2]
401 * fields: nfields*(variable length strings, including '\0') [in version >= 2]
402 *
403 * valueorder: total_nintervals*sizeof(int) [if version == 6]
404 * values: total_nintervals*sizeof(double) [if version == 6]
405 *
406 * labelorder: total_nintervals*sizeof(int);
407 * labelpointers: (total_nintervals+1)*sizeof(UINT4) [changes to long unsigned int in v4 or if label_pointer_size = 8 in v5]
408 * labels: total_nintervals*(variable length strings, including '\0')
409 *
410 * annotpointers: (total_nintervals+1)*sizeof(UINT4) [changes to long unsigned int in v4 or if annot_pointer_size = 8 in v5]
411 * annotations: total_nintervals*(variable length strings, including '\0')
412 *
413 * Note: labels and annotations are organized by division, so we can
414 * find the label/annot by looking up index = cum_nintervals[div]+recno.
415 * This is because alphas/betas/sigmas/omegas run from 1 to nintervals[div]
416 ************************************************************************/
417
418 /* For making valueorder */
419 struct Valueitem_T {
420 int divno;
421 int recno;
422 double value;
423 };
424
425 static int
Valueitem_cmp(const void * x,const void * y)426 Valueitem_cmp (const void *x, const void *y) {
427 struct Valueitem_T *a = (struct Valueitem_T *) x;
428 struct Valueitem_T *b = (struct Valueitem_T *) y;
429
430 if (a->value < b->value) {
431 return -1;
432 } else if (b->value < a->value) {
433 return +1;
434 } else {
435 return 0;
436 }
437 }
438
439
440 static int *
get_valueorder(List_T divlist,Table_T valuetable,int * cum_nintervals,int total_nintervals)441 get_valueorder (List_T divlist, Table_T valuetable, int *cum_nintervals, int total_nintervals) {
442 int *valueorder, divno, recno, i, k = 0;
443 struct Valueitem_T *valueitems;
444 char *divstring;
445 List_T d;
446 Doublelist_T valuelist, v;
447
448 valueorder = (int *) CALLOC(total_nintervals,sizeof(int));
449 valueitems = (struct Valueitem_T *) CALLOC(total_nintervals,sizeof(struct Valueitem_T));
450 divno = 0;
451
452 for (d = divlist; d != NULL; d = List_next(d)) {
453 divstring = (char *) List_head(d);
454 valuelist = (Doublelist_T) Table_get(valuetable,(void *) divstring);
455 recno = 0;
456 for (v = valuelist; v != NULL; v = Doublelist_next(v)) {
457 valueitems[k].divno = divno;
458 valueitems[k].recno = recno;
459 valueitems[k].value = Doublelist_head(v);
460 k++;
461 recno++;
462 }
463 divno++;
464 }
465
466 qsort(valueitems,total_nintervals,sizeof(struct Valueitem_T),Valueitem_cmp);
467 for (i = 0; i < total_nintervals; i++) {
468 valueorder[i] = cum_nintervals[valueitems[i].divno] + valueitems[i].recno;
469 }
470 FREE(valueitems);
471 return valueorder;
472 }
473
474
475
476 /* For making labelorder */
477 struct Sortitem_T {
478 int divno;
479 int recno;
480 char *label;
481 };
482
483 static int
Sortitem_cmp(const void * x,const void * y)484 Sortitem_cmp (const void *x, const void *y) {
485 struct Sortitem_T *a = (struct Sortitem_T *) x;
486 struct Sortitem_T *b = (struct Sortitem_T *) y;
487
488 return strcmp(a->label,b->label);
489 }
490
491 static int *
get_labelorder(List_T divlist,Table_T labeltable,int * cum_nintervals,int total_nintervals)492 get_labelorder (List_T divlist, Table_T labeltable, int *cum_nintervals, int total_nintervals) {
493 int *labelorder, divno, recno, i, k = 0;
494 struct Sortitem_T *sortitems;
495 char *divstring;
496 List_T labellist, d, p;
497
498 labelorder = (int *) CALLOC(total_nintervals,sizeof(int));
499 sortitems = (struct Sortitem_T *) CALLOC(total_nintervals,sizeof(struct Sortitem_T));
500 divno = 0;
501
502 for (d = divlist; d != NULL; d = List_next(d)) {
503 divstring = (char *) List_head(d);
504 labellist = (List_T) Table_get(labeltable,(void *) divstring);
505 recno = 0;
506 for (p = labellist; p != NULL; p = List_next(p)) {
507 sortitems[k].divno = divno;
508 sortitems[k].recno = recno;
509 sortitems[k].label = (char *) List_head(p);
510 k++;
511 recno++;
512 }
513 divno++;
514 }
515
516 qsort(sortitems,total_nintervals,sizeof(struct Sortitem_T),Sortitem_cmp);
517 for (i = 0; i < total_nintervals; i++) {
518 labelorder[i] = cum_nintervals[sortitems[i].divno] + sortitems[i].recno;
519 }
520 FREE(sortitems);
521 return labelorder;
522 }
523
524
525 /* Prints in DFS order */
526 static void
Node_fwrite(FILE * fp,Node_T node)527 Node_fwrite (FILE *fp, Node_T node) {
528 int leftindex, rightindex;
529
530 if (node != NULL) {
531 if (node->left == NULL) {
532 leftindex = -1;
533 } else {
534 leftindex = node->left->index;
535 }
536
537 if (node->right == NULL) {
538 rightindex = -1;
539 } else {
540 rightindex = node->right->index;
541 }
542
543 debug(printf("Writing node %u %d %d\n",node->value,node->a,node->b));
544 FWRITE_UINT(node->value,fp);
545 FWRITE_INT(node->a,fp);
546 FWRITE_INT(node->b,fp);
547 FWRITE_INT(leftindex,fp);
548 FWRITE_INT(rightindex,fp);
549
550 Node_fwrite(fp,node->left);
551 Node_fwrite(fp,node->right);
552 }
553 return;
554 }
555
556
557 /* Prints in DFS order */
558 static void
Node_create(T new,int divno,int * i,Node_T node)559 Node_create (T new, int divno, int *i, Node_T node) {
560 int leftindex, rightindex;
561
562 if (node != NULL) {
563 if (node->left == NULL) {
564 leftindex = -1;
565 } else {
566 leftindex = node->left->index;
567 }
568
569 if (node->right == NULL) {
570 rightindex = -1;
571 } else {
572 rightindex = node->right->index;
573 }
574
575 debug(printf("Writing node %u %d %d\n",node->value,node->a,node->b));
576 new->nodes[divno][*i].value = node->value;
577 new->nodes[divno][*i].a = node->a;
578 new->nodes[divno][*i].b = node->b;
579 new->nodes[divno][*i].leftindex = leftindex;
580 new->nodes[divno][*i].rightindex = rightindex;
581 *i += 1;
582
583 Node_create(new,divno,&(*i),node->left);
584 Node_create(new,divno,&(*i),node->right);
585 }
586 return;
587 }
588
589
590 #if 0
591 /* Stores in DFS order */
592 static void
593 Node_store (int *fnodei, struct FNode_T *fnodes, Node_T node) {
594 int leftindex, rightindex;
595
596 if (node != NULL) {
597 if (node->left == NULL) {
598 leftindex = -1;
599 } else {
600 leftindex = node->left->index;
601 }
602
603 if (node->right == NULL) {
604 rightindex = -1;
605 } else {
606 rightindex = node->right->index;
607 }
608
609 fnodes[*fnodei].value = node->value;
610 fnodes[*fnodei].a = node->a;
611 fnodes[*fnodei].b = node->b;
612 fnodes[*fnodei].leftindex = leftindex;
613 fnodes[*fnodei].rightindex = rightindex;
614 (*fnodei)++;
615
616 Node_store(&(*fnodei),fnodes,node->left);
617 Node_store(&(*fnodei),fnodes,node->right);
618 }
619 return;
620 }
621 #endif
622
623
624 static void
IIT_write_div_header(FILE * fp,int total_nintervals,int ntypes,int nfields,int ndivs,int * nintervals,int * nnodes,int * cum_nintervals,int * cum_nnodes,List_T divlist,Sorttype_T divsort,int version,bool label_pointers_8p,bool annot_pointers_8p)625 IIT_write_div_header (FILE *fp, int total_nintervals, int ntypes, int nfields, int ndivs,
626 int *nintervals, int *nnodes, int *cum_nintervals, int *cum_nnodes,
627 List_T divlist, Sorttype_T divsort, int version,
628 bool label_pointers_8p, bool annot_pointers_8p) {
629 int new_format_indicator = 0, label_pointer_size, annot_pointer_size;
630 int divno;
631 UINT4 pointer = 0U, padded;
632 List_T p;
633 char *divstring;
634
635 assert(version >= 2);
636 FWRITE_INT(new_format_indicator,fp); /* Indicates new format, since nintervals > 0 */
637 FWRITE_INT(version,fp);
638
639 if (version >= 5) {
640 if (label_pointers_8p == false) {
641 label_pointer_size = 4;
642 } else {
643 label_pointer_size = 8;
644 }
645 if (annot_pointers_8p == false) {
646 annot_pointer_size = 4;
647 } else {
648 annot_pointer_size = 8;
649 }
650 FWRITE_INT(label_pointer_size,fp);
651 FWRITE_INT(annot_pointer_size,fp);
652 }
653
654 FWRITE_INT(total_nintervals,fp);
655 FWRITE_INT(ntypes,fp);
656 FWRITE_INT(nfields,fp);
657
658 if (version >= 3) {
659 FWRITE_INT(ndivs,fp);
660 for (divno = 0; divno < ndivs; divno++) {
661 FWRITE_INT(nintervals[divno],fp);
662 }
663 for (divno = 0; divno <= ndivs; divno++) {
664 FWRITE_INT(cum_nintervals[divno],fp);
665 }
666 }
667 for (divno = 0; divno < ndivs; divno++) {
668 FWRITE_INT(nnodes[divno],fp);
669 }
670 if (version >= 3) {
671 for (divno = 0; divno <= ndivs; divno++) {
672 FWRITE_INT(cum_nnodes[divno],fp);
673 }
674 }
675
676 if (version >= 3) {
677 FWRITE_INT(divsort,fp);
678 }
679
680 if (version >= 3) {
681 /* Write div pointers */
682 pointer = 0U;
683 for (p = divlist; p != NULL; p = List_next(p)) {
684 FWRITE_UINT(pointer,fp);
685 divstring = (char *) List_head(p);
686 pointer += (UINT4) strlen(divstring)+1U; /* Add count for '\0' */
687 }
688 #ifdef PAD_STRINGS
689 /* Pad last pointer to next multiple of 4 */
690 padded = (pointer + 3) & ~3;
691 debug1(printf("divstrings: converting pointer %d to padded %d\n",pointer,padded));
692 FWRITE_UINT(padded,fp);
693 #else
694 FWRITE_UINT(pointer,fp);
695 #endif
696
697 /* Write divs */
698 for (p = divlist; p != NULL; p = List_next(p)) {
699 divstring = (char *) List_head(p);
700 FWRITE_CHARS(divstring,strlen(divstring)+1,fp); /* Write '\0' */
701 }
702 #ifdef PAD_STRINGS
703 /* Add padding */
704 while (pointer++ < padded) {
705 debug1(printf("divstrings: adding 0 as padding\n"));
706 fputc('\0',fp);
707 }
708 #endif
709 }
710
711 return;
712 }
713
714
715 static void
IIT_create_div_header(T new,int total_nintervals,int ntypes,int nfields,int ndivs,int * nintervals,int * nnodes,int * cum_nintervals,int * cum_nnodes,List_T divlist,Sorttype_T divsort,int version)716 IIT_create_div_header (T new, int total_nintervals, int ntypes, int nfields, int ndivs,
717 int *nintervals, int *nnodes, int *cum_nintervals, int *cum_nnodes,
718 List_T divlist, Sorttype_T divsort, int version) {
719 int divno;
720 UINT4 pointer = 0U, padded;
721 List_T p;
722 char *divstring;
723 UINT4 stringlen, k;
724
725 new->version = version;
726 new->total_nintervals = total_nintervals;
727 new->ntypes = ntypes;
728 new->nfields = nfields;
729
730 new->ndivs = ndivs;
731 new->nintervals = (int *) CALLOC(new->ndivs,sizeof(int));
732 for (divno = 0; divno < ndivs; divno++) {
733 new->nintervals[divno] = nintervals[divno];
734 }
735
736 new->cum_nintervals = (int *) CALLOC(new->ndivs+1,sizeof(int));
737 for (divno = 0; divno <= ndivs; divno++) {
738 new->cum_nintervals[divno] = cum_nintervals[divno];
739 }
740
741 new->nnodes = (int *) CALLOC(new->ndivs,sizeof(int));
742 for (divno = 0; divno < ndivs; divno++) {
743 new->nnodes[divno] = nnodes[divno];
744 }
745
746 new->cum_nnodes = (int *) CALLOC(new->ndivs+1,sizeof(int));
747 for (divno = 0; divno <= ndivs; divno++) {
748 new->cum_nnodes[divno] = cum_nnodes[divno];
749 }
750
751 new->divsort = divsort;
752
753 new->divpointers = (UINT4 *) CALLOC(new->ndivs+1,sizeof(UINT4));
754 /* Create div pointers */
755 k = 0;
756 pointer = 0U;
757 for (p = divlist; p != NULL; p = List_next(p)) {
758 new->divpointers[k++] = pointer;
759 divstring = (char *) List_head(p);
760 pointer += (UINT4) strlen(divstring)+1U; /* Add count for '\0' */
761 }
762 #ifdef PAD_STRINGS
763 /* Pad last pointer to next multiple of 4 */
764 padded = (pointer + 3) & ~3;
765 debug1(printf("divstrings: converting pointer %d to padded %d\n",pointer,padded));
766 new->divpointers[k++] = padded;
767 #else
768 new->divpointers[k++] = pointer;
769 #endif
770
771 stringlen = new->divpointers[new->ndivs];
772 new->divstrings = (char *) CALLOC(stringlen,sizeof(char));
773 /* Create divs */
774 k = 0;
775 for (p = divlist; p != NULL; p = List_next(p)) {
776 divstring = (char *) List_head(p);
777 strcpy(&(new->divstrings[k]),divstring);
778 k += strlen(divstring);
779 new->divstrings[k++] = '\0';
780 }
781 #ifdef PAD_STRINGS
782 /* Add padding */
783 while (k < padded) {
784 debug1(printf("divstrings: adding 0 as padding\n"));
785 new->divstrings[k++] = '\0';
786 }
787 #endif
788
789 return;
790 }
791
792
793 static void
IIT_write_one_div(FILE * fp,Node_T root,int * alphas,int * betas,int * sigmas,int * omegas,int nintervals,int version)794 IIT_write_one_div (FILE *fp, Node_T root, int *alphas, int *betas, int *sigmas, int *omegas,
795 int nintervals, int version) {
796 #ifdef DEBUG
797 int i;
798 #endif
799
800 assert(version >= 2);
801 FWRITE_INTS(alphas,nintervals + 1,fp);
802 FWRITE_INTS(betas,nintervals + 1,fp);
803
804 debug(
805 printf("alphas:");
806 for (i = 0; i < nintervals + 1; i++) {
807 printf(" %d",alphas[i]);
808 }
809 printf("\n");
810
811 printf("betas:");
812 for (i = 0; i < nintervals + 1; i++) {
813 printf(" %d",betas[i]);
814 }
815 printf("\n");
816 );
817
818 FWRITE_INTS(sigmas,nintervals + 1,fp);
819 FWRITE_INTS(omegas,nintervals + 1,fp);
820
821 debug(
822 printf("sigmas:");
823 for (i = 0; i < nintervals + 1; i++) {
824 printf(" %d",sigmas[i]);
825 }
826 printf("\n");
827
828 printf("omegas:");
829 for (i = 0; i < nintervals + 1; i++) {
830 printf(" %d",omegas[i]);
831 }
832 printf("\n");
833 );
834
835 debug(printf("Starting to write nodes\n"));
836 Node_fwrite(fp,root);
837 debug(printf("\n"));
838
839 return;
840 }
841
842
843 static void
IIT_create_one_div(T new,int divno,Node_T root,int * alphas,int * betas,int * sigmas,int * omegas,int nintervals)844 IIT_create_one_div (T new, int divno, Node_T root, int *alphas, int *betas, int *sigmas, int *omegas,
845 int nintervals) {
846 int i;
847
848 new->alphas[divno] = (int *) CALLOC(nintervals+1,sizeof(int));
849 for (i = 0; i <= nintervals; i++) {
850 new->alphas[divno][i] = alphas[i];
851 }
852
853 new->betas[divno] = (int *) CALLOC(nintervals+1,sizeof(int));
854 for (i = 0; i <= nintervals; i++) {
855 new->betas[divno][i] = betas[i];
856 }
857
858 new->sigmas[divno] = (int *) CALLOC(nintervals+1,sizeof(int));
859 for (i = 0; i <= nintervals; i++) {
860 new->sigmas[divno][i] = sigmas[i];
861 }
862
863 new->omegas[divno] = (int *) CALLOC(nintervals+1,sizeof(int));
864 for (i = 0; i <= nintervals; i++) {
865 new->omegas[divno][i] = omegas[i];
866 }
867
868 if (new->nnodes[divno] == 0) {
869 new->nodes[divno] = (struct FNode_T *) NULL;
870 } else {
871 new->nodes[divno] = (struct FNode_T *) CALLOC(new->nnodes[divno],sizeof(struct FNode_T));
872 i = 0;
873 Node_create(new,divno,&i,root);
874 }
875
876 return;
877 }
878
879
880 static void
IIT_write_div_footer(FILE * fp,List_T divlist,List_T typelist,List_T fieldlist,Table_T intervaltable,Table_T valuetable,Table_T labeltable,Table_T annottable,int * cum_nintervals,int total_nintervals,int version,bool label_pointers_8p,bool annot_pointers_8p)881 IIT_write_div_footer (FILE *fp, List_T divlist, List_T typelist, List_T fieldlist,
882 Table_T intervaltable, Table_T valuetable, Table_T labeltable, Table_T annottable,
883 int *cum_nintervals, int total_nintervals, int version,
884 bool label_pointers_8p, bool annot_pointers_8p) {
885 List_T intervallist, labellist, annotlist, d, p;
886 Doublelist_T valuelist, v;
887 Interval_T interval;
888 #ifdef HAVE_64_BIT
889 UINT8 pointer8 = 0LU, padded8;
890 #endif
891 UINT4 pointer = 0U, padded;
892 int *labelorder, *valueorder;
893 double value;
894 char *divstring, *typestring, *fieldstring, *label, *annot;
895
896 #ifndef HAVE_64_BIT
897 if (label_pointers_8p == true || annot_pointers_8p == true) {
898 fprintf(stderr,"Machine is not 64-bit, so cannot write 8-byte pointers.\n");
899 exit(9);
900 }
901 #endif
902
903 assert(version >= 2);
904 for (d = divlist; d != NULL; d = List_next(d)) {
905 divstring = (char *) List_head(d);
906 intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
907 for (p = intervallist; p != NULL; p = List_next(p)) {
908 interval = (Interval_T) List_head(p);
909 FWRITE_UINT(interval->low,fp);
910 FWRITE_UINT(interval->high,fp);
911 FWRITE_INT(interval->sign,fp);
912 FWRITE_INT(interval->type,fp);
913 }
914 }
915
916 /* Write type pointers */
917 pointer = 0U;
918 for (p = typelist; p != NULL; p = List_next(p)) {
919 FWRITE_UINT(pointer,fp);
920 typestring = (char *) List_head(p);
921 if (typestring == NULL) {
922 abort(); /* Even type 0 has an empty string */
923 } else {
924 pointer += (UINT4) strlen(typestring)+1U; /* Add count for '\0' */
925 }
926 }
927 #ifdef PAD_STRINGS
928 /* Pad last pointer to next multiple of 4 */
929 padded = (pointer + 3) & ~3;
930 debug1(printf("typestrings: converting pointer %d to padded %d\n",pointer,padded));
931 FWRITE_UINT(padded,fp);
932 #else
933 FWRITE_UINT(pointer,fp);
934 #endif
935
936
937 /* Write types */
938 for (p = typelist; p != NULL; p = List_next(p)) {
939 typestring = (char *) List_head(p);
940 FWRITE_CHARS(typestring,strlen(typestring)+1,fp); /* Write '\0' */
941 }
942 #ifdef PAD_STRINGS
943 /* Add padding */
944 while (pointer++ < padded) {
945 debug1(printf("typestrings: adding 0 as padding\n"));
946 fputc('\0',fp);
947 }
948 #endif
949
950
951 /* Write field pointers */
952 pointer = 0U;
953 for (p = fieldlist; p != NULL; p = List_next(p)) {
954 FWRITE_UINT(pointer,fp);
955 fieldstring = (char *) List_head(p);
956 pointer += (UINT4) strlen(fieldstring)+1U; /* Add count for '\0' */
957 }
958 #ifdef PAD_STRINGS
959 /* Pad last pointer to next multiple of 4 */
960 padded = (pointer + 3) & ~3;
961 debug1(printf("fieldstrings: converting pointer %d to padded %d\n",pointer,padded));
962 FWRITE_UINT(padded,fp);
963 #else
964 FWRITE_UINT(pointer,fp);
965 #endif
966
967
968 /* Write fields */
969 for (p = fieldlist; p != NULL; p = List_next(p)) {
970 fieldstring = (char *) List_head(p);
971 FWRITE_CHARS(fieldstring,strlen(fieldstring)+1,fp); /* Write '\0' */
972 }
973 #ifdef PAD_STRINGS
974 /* Add padding */
975 while (pointer++ < padded) {
976 debug1(printf("fieldstrings: adding 0 as padding\n"));
977 fputc('\0',fp);
978 }
979 #endif
980
981
982 /* Write valueorder (if values present) */
983 if (valuetable != NULL) {
984 valueorder = get_valueorder(divlist,valuetable,cum_nintervals,total_nintervals);
985 FWRITE_INTS(valueorder,total_nintervals,fp);
986 FREE(valueorder);
987
988 /* Write values */
989 for (d = divlist; d != NULL; d = List_next(d)) {
990 divstring = (char *) List_head(d);
991 valuelist = (Doublelist_T) Table_get(valuetable,(void *) divstring);
992 for (v = valuelist; v != NULL; v = Doublelist_next(v)) {
993 value = Doublelist_head(v);
994 FWRITE_DOUBLE(value,fp);
995 }
996 }
997 }
998
999 /* Write labelorder */
1000 labelorder = get_labelorder(divlist,labeltable,cum_nintervals,total_nintervals);
1001 FWRITE_INTS(labelorder,total_nintervals,fp);
1002 FREE(labelorder);
1003
1004 /* Write label pointers */
1005 #ifdef HAVE_64_BIT
1006 if (label_pointers_8p == true) {
1007 pointer8 = 0LU;
1008 } else {
1009 pointer = 0U;
1010 }
1011 #else
1012 pointer = 0U;
1013 #endif
1014
1015 for (d = divlist; d != NULL; d = List_next(d)) {
1016 divstring = (char *) List_head(d);
1017 labellist = (List_T) Table_get(labeltable,(void *) divstring);
1018 for (p = labellist; p != NULL; p = List_next(p)) {
1019 #ifdef HAVE_64_BIT
1020 if (label_pointers_8p == true) {
1021 FWRITE_UINT8(pointer8,fp);
1022 } else {
1023 FWRITE_UINT(pointer,fp);
1024 }
1025 #else
1026 FWRITE_UINT(pointer,fp);
1027 #endif
1028
1029 label = (char *) List_head(p);
1030 #ifdef HAVE_64_BIT
1031 if (label_pointers_8p == true) {
1032 pointer8 += (UINT8) strlen(label)+1LU; /* Add count for '\0' */
1033 } else {
1034 pointer += (UINT4) strlen(label)+1U; /* Add count for '\0' */
1035 }
1036 #else
1037 pointer += (UINT4) strlen(label)+1U; /* Add count for '\0' */
1038 #endif
1039 }
1040 }
1041
1042 #ifdef PAD_STRINGS
1043 /* Pad last pointer to next multiple of 4 */
1044 #ifdef HAVE_64_BIT
1045 if (label_pointers_8p == true) {
1046 padded8 = (pointer8 + 3) & ~3;
1047 FWRITE_UINT8(padded8,fp);
1048 } else {
1049 padded = (pointer + 3) & ~3;
1050 debug1(printf("labels: converting pointer %d to padded %d\n",pointer,padded));
1051 FWRITE_UINT(padded,fp);
1052 }
1053 #else
1054 padded = (pointer + 3) & ~3;
1055 FWRITE_UINT(padded,fp);
1056 #endif
1057 #else
1058 #ifdef HAVE_64_BIT
1059 if (label_pointers_8p == true) {
1060 FWRITE_UINT8(pointer8,fp);
1061 } else {
1062 FWRITE_UINT(pointer,fp);
1063 }
1064 #else
1065 FWRITE_UINT(pointer,fp);
1066 #endif
1067 #endif
1068
1069
1070 /* Write labels */
1071 for (d = divlist; d != NULL; d = List_next(d)) {
1072 divstring = (char *) List_head(d);
1073 labellist = (List_T) Table_get(labeltable,(void *) divstring);
1074 for (p = labellist; p != NULL; p = List_next(p)) {
1075 label = (char *) List_head(p);
1076 FWRITE_CHARS(label,strlen(label)+1,fp); /* Write '\0' */
1077 }
1078 }
1079 #ifdef PAD_STRINGS
1080 /* Add padding */
1081 #ifdef HAVE_64_BIT
1082 if (label_pointers_8p == true) {
1083 while (pointer8++ < padded8) {
1084 fputc('\0',fp);
1085 }
1086 } else {
1087 while (pointer++ < padded) {
1088 debug1(printf("labels: adding 0 as padding\n"));
1089 fputc('\0',fp);
1090 }
1091 }
1092 #else
1093 while (pointer++ < padded) {
1094 fputc('\0',fp);
1095 }
1096 #endif
1097 #endif
1098
1099
1100
1101 /* Write annot pointers */
1102 #ifdef HAVE_64_BIT
1103 if (annot_pointers_8p == true) {
1104 pointer8 = 0LU;
1105 } else {
1106 pointer = 0U;
1107 }
1108 #else
1109 pointer = 0U;
1110 #endif
1111
1112 for (d = divlist; d != NULL; d = List_next(d)) {
1113 divstring = (char *) List_head(d);
1114 annotlist = (List_T) Table_get(annottable,(void *) divstring);
1115 for (p = annotlist; p != NULL; p = List_next(p)) {
1116 #ifdef HAVE_64_BIT
1117 if (annot_pointers_8p == true) {
1118 FWRITE_UINT8(pointer8,fp);
1119 } else {
1120 FWRITE_UINT(pointer,fp);
1121 }
1122 #else
1123 FWRITE_UINT(pointer,fp);
1124 #endif
1125
1126 annot = (char *) List_head(p);
1127 #ifdef HAVE_64_BIT
1128 if (annot_pointers_8p == true) {
1129 pointer8 += (UINT8) strlen(annot)+1LU; /* Add count for '\0' */
1130 } else {
1131 pointer += (UINT4) strlen(annot)+1; /* Add count for '\0' */
1132 }
1133 #else
1134 pointer += (UINT4) strlen(annot)+1; /* Add count for '\0' */
1135 #endif
1136 }
1137 }
1138
1139 #ifdef PAD_STRINGS
1140 /* Pad last pointer to next multiple of 4 */
1141 #ifdef HAVE_64_BIT
1142 if (annot_pointers_8p == true) {
1143 padded8 = (pointer8 + 3) & ~3;
1144 FWRITE_UINT8(padded8,fp);
1145 } else {
1146 padded = (pointer + 3) & ~3;
1147 debug1(printf("annotations: converting pointer %d to padded %d\n",pointer,padded));
1148 FWRITE_UINT(padded,fp);
1149 }
1150 #else
1151 padded = (pointer + 3) & ~3;
1152 FWRITE_UINT(padded,fp);
1153 #endif
1154 #else
1155 #ifdef HAVE_64_BIT
1156 if (annot_pointers_8p == true) {
1157 FWRITE_UINT8(pointer8,fp);
1158 } else {
1159 FWRITE_UINT(pointer,fp);
1160 }
1161 #else
1162 FWRITE_UINT(pointer,fp);
1163 #endif
1164 #endif
1165
1166
1167 /* Write annotations */
1168 for (d = divlist; d != NULL; d = List_next(d)) {
1169 divstring = (char *) List_head(d);
1170 annotlist = (List_T) Table_get(annottable,(void *) divstring);
1171 for (p = annotlist; p != NULL; p = List_next(p)) {
1172 annot = (char *) List_head(p);
1173 FWRITE_CHARS(annot,strlen(annot)+1,fp); /* Write '\0' */
1174 }
1175 }
1176 #ifdef PAD_STRINGS
1177 /* Add padding */
1178 #ifdef HAVE_64_BIT
1179 if (annot_pointers_8p == true) {
1180 while (pointer8++ < padded8) {
1181 fputc('\0',fp);
1182 }
1183 } else {
1184 while (pointer++ < padded) {
1185 debug1(printf("annotations: adding 0 as padding\n"));
1186 fputc('\0',fp);
1187 }
1188 }
1189 #else
1190 while (pointer++ < padded) {
1191 fputc('\0',fp);
1192 }
1193 #endif
1194 #endif
1195
1196
1197 return;
1198 }
1199
1200
1201 static void
IIT_create_div_footer(T new,List_T divlist,List_T typelist,List_T fieldlist,Table_T intervaltable,Table_T labeltable,Table_T datatable,int * cum_nintervals,int total_nintervals)1202 IIT_create_div_footer (T new, List_T divlist, List_T typelist, List_T fieldlist,
1203 Table_T intervaltable, Table_T labeltable, Table_T datatable,
1204 int *cum_nintervals, int total_nintervals) {
1205 List_T intervallist, labellist, datalist, d, p;
1206 Interval_T interval;
1207 UINT4 pointer = 0U;
1208 char *divstring, *typestring, *fieldstring, *label;
1209 int stringlen;
1210 int divno, k;
1211
1212 /* Create intervals */
1213 new->intervals = (struct Interval_T **) CALLOC(new->ndivs,sizeof(struct Interval_T *));
1214 new->intervals[0] = (struct Interval_T *) CALLOC(new->total_nintervals,sizeof(struct Interval_T));
1215 for (divno = 1; divno < new->ndivs; divno++) {
1216 new->intervals[divno] = &(new->intervals[divno-1][new->nintervals[divno-1]]);
1217 }
1218
1219 k = 0;
1220 for (d = divlist; d != NULL; d = List_next(d)) {
1221 divstring = (char *) List_head(d);
1222 intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
1223 for (p = intervallist; p != NULL; p = List_next(p)) {
1224 interval = (Interval_T) List_head(p);
1225 new->intervals[0][k].low = interval->low;
1226 new->intervals[0][k].high = interval->high;
1227 new->intervals[0][k].sign = interval->sign;
1228 new->intervals[0][k].type = interval->type;
1229 k++;
1230 }
1231 }
1232
1233
1234 /* Create type pointers */
1235 new->typepointers = (UINT4 *) CALLOC(new->ntypes+1,sizeof(UINT4));
1236 k = 0;
1237 new->typepointers[k++] = pointer = 0U;
1238 for (p = typelist; p != NULL; p = List_next(p)) {
1239 typestring = (char *) List_head(p);
1240 if (typestring == NULL) {
1241 abort(); /* Even type 0 has an empty string */
1242 } else {
1243 pointer += (UINT4) strlen(typestring)+1U; /* Add count for '\0' */
1244 new->typepointers[k++] = pointer;
1245 }
1246 }
1247
1248 /* Create types */
1249 if ((stringlen = new->typepointers[new->ntypes]) > 0) {
1250 new->typestrings = (char *) CALLOC(stringlen,sizeof(char));
1251 k = 0;
1252 for (p = typelist; p != NULL; p = List_next(p)) {
1253 typestring = (char *) List_head(p);
1254 strcpy(&(new->typestrings[k]),typestring);
1255 k += strlen(typestring);
1256 new->typestrings[k++] = '\0';
1257 }
1258 }
1259
1260
1261 /* Create field pointers */
1262 new->fieldpointers = (UINT4 *) CALLOC(new->nfields+1,sizeof(UINT4));
1263 k = 0;
1264 new->fieldpointers[k++] = pointer = 0U;
1265 for (p = fieldlist; p != NULL; p = List_next(p)) {
1266 fieldstring = (char *) List_head(p);
1267 if (fieldstring == NULL) {
1268 abort(); /* Even type 0 has an empty string */
1269 } else {
1270 pointer += (UINT4) strlen(fieldstring)+1U; /* Add count for '\0' */
1271 new->fieldpointers[k++] = pointer;
1272 }
1273 }
1274
1275 /* Create fields */
1276 if ((stringlen = new->fieldpointers[new->nfields]) > 0) {
1277 new->fieldstrings = (char *) CALLOC(stringlen,sizeof(char));
1278 k = 0;
1279 for (p = fieldlist; p != NULL; p = List_next(p)) {
1280 fieldstring = (char *) List_head(p);
1281 strcpy(&(new->fieldstrings[k]),fieldstring);
1282 k += strlen(fieldstring);
1283 new->fieldstrings[k++] = '\0';
1284 }
1285 }
1286
1287
1288 /* Create labelorder */
1289 new->labelorder = get_labelorder(divlist,labeltable,cum_nintervals,total_nintervals);
1290
1291 /* Create label pointers */
1292 new->labelpointers = (UINT4 *) CALLOC(new->total_nintervals+1,sizeof(UINT4));
1293 k = 0;
1294 new->labelpointers[k++] = pointer = 0U;
1295 for (d = divlist; d != NULL; d = List_next(d)) {
1296 divstring = (char *) List_head(d);
1297 labellist = (List_T) Table_get(labeltable,(void *) divstring);
1298 for (p = labellist; p != NULL; p = List_next(p)) {
1299 label = (char *) List_head(p);
1300 pointer += (UINT4) strlen(label)+1U; /* Add count for '\0' */
1301 new->labelpointers[k++] = pointer;
1302 }
1303 }
1304
1305 /* Create labels */
1306 stringlen = new->labelpointers[new->total_nintervals];
1307 new->labels = (char *) CALLOC(stringlen,sizeof(char));
1308 k = 0;
1309 for (d = divlist; d != NULL; d = List_next(d)) {
1310 divstring = (char *) List_head(d);
1311 labellist = (List_T) Table_get(labeltable,(void *) divstring);
1312 for (p = labellist; p != NULL; p = List_next(p)) {
1313 label = (char *) List_head(p);
1314 strcpy(&(new->labels[k]),label);
1315 k += strlen(label);
1316 new->labels[k++] = '\0';
1317 }
1318 }
1319
1320
1321 /* Create data pointers */
1322 new->datapointers = (void **) CALLOC(new->total_nintervals,sizeof(void *));
1323 k = 0;
1324 for (d = divlist; d != NULL; d = List_next(d)) {
1325 divstring = (char *) List_head(d);
1326 datalist = (List_T) Table_get(datatable,(void *) divstring);
1327 for (p = datalist; p != NULL; p = List_next(p)) {
1328 new->datapointers[k++] = List_head(p);
1329 }
1330 }
1331
1332 return;
1333 }
1334
1335
1336 #if 0
1337 static void
1338 compute_flanking (T this) {
1339 int i;
1340
1341 this->alphas = (int *) CALLOC(this->nintervals+1,sizeof(int));
1342 this->betas = (int *) CALLOC(this->nintervals+1,sizeof(int));
1343 for (i = 1; i <= this->nintervals; i++) {
1344 this->alphas[i] = this->betas[i] = i;
1345 }
1346 Interval_qsort_by_sigma(this->alphas,1,this->nintervals,this->intervals);
1347 Interval_qsort_by_omega(this->betas,1,this->nintervals,this->intervals);
1348 return;
1349 }
1350
1351
1352 /* Used only by iit_update.c */
1353 void
1354 IIT_output_direct (char *iitfile, T this, int version) {
1355 FILE *fp;
1356 off_t stringlen;
1357 int new_format_indicator = 0, i;
1358 FNode_T node;
1359
1360 if ((fp = FOPEN_WRITE_BINARY(iitfile)) == NULL) {
1361 fprintf(stderr,"Error: can't open file %s\n",iitfile);
1362 exit(9);
1363 }
1364
1365 assert(version >= 2);
1366 FWRITE_INT(new_format_indicator,fp); /* Indicates new format, since nintervals > 0 */
1367 FWRITE_INT(version,fp);
1368
1369 FWRITE_INT(this->nintervals,fp);
1370 FWRITE_INT(this->ntypes,fp);
1371 FWRITE_INT(this->nfields,fp);
1372 FWRITE_INT(this->nnodes,fp);
1373
1374 if (this->alphas == NULL) {
1375 compute_flanking(this);
1376 }
1377 FWRITE_INTS(this->alphas,this->nintervals + 1,fp);
1378 FWRITE_INTS(this->betas,this->nintervals + 1,fp);
1379
1380 FWRITE_INTS(this->sigmas,this->nintervals + 1,fp);
1381 FWRITE_INTS(this->omegas,this->nintervals + 1,fp);
1382
1383 /* Write nodes directly */
1384 for (i = 0; i < this->nnodes; i++) {
1385 node = &(this->nodes[i]);
1386 FWRITE_UINT(node->value,fp);
1387 FWRITE_INT(node->a,fp);
1388 FWRITE_INT(node->b,fp);
1389 FWRITE_INT(node->leftindex,fp);
1390 FWRITE_INT(node->rightindex,fp);
1391 }
1392
1393 for (i = 0; i < this->nintervals; i++) {
1394 FWRITE_UINT(this->intervals[i].low,fp);
1395 FWRITE_UINT(this->intervals[i].high,fp);
1396 FWRITE_INT(this->intervals[i].sign,fp);
1397 FWRITE_INT(this->intervals[i].type,fp);
1398 }
1399
1400 /* Write types directly */
1401 for (i = 0; i < this->ntypes+1; i++) {
1402 FWRITE_UINT(this->typepointers[i],fp);
1403 }
1404 if ((stringlen = this->typepointers[this->ntypes]) == 0) {
1405 fprintf(stderr,"Error in writing types: type stringlen is 0.\n");
1406 exit(9);
1407 } else {
1408 FWRITE_CHARS(this->typestrings,stringlen,fp);
1409 }
1410
1411 /* Write fields directly */
1412 for (i = 0; i < this->nfields+1; i++) {
1413 FWRITE_UINT(this->fieldpointers[i],fp);
1414 }
1415 stringlen = this->fieldpointers[this->nfields];
1416 if (stringlen > 0) {
1417 FWRITE_CHARS(this->fieldstrings,stringlen,fp);
1418 }
1419
1420 /* Write labelorder */
1421 FWRITE_INTS(this->labelorder,this->nintervals,fp);
1422
1423 /* Write labels directly */
1424 #ifdef HAVE_64_BIT
1425 if (this->label_pointers_8p == true) {
1426 FWRITE_UINT8S(this->labelpointers,this->nintervals+1,fp);
1427 } else {
1428 FWRITE_UINTS(this->labelpointers,this->nintervals+1,fp);
1429 }
1430 #else
1431 FWRITE_UINTS(this->labelpointers,this->nintervals+1,fp);
1432 #endif
1433 if ((stringlen = this->labelpointers[this->nintervals]) == 0) {
1434 fprintf(stderr,"Error in writing labels: label stringlen is 0.\n");
1435 exit(9);
1436 } else {
1437 FWRITE_CHARS(this->labels,stringlen,fp);
1438 }
1439
1440 /* Write annotations directly */
1441 #ifdef HAVE_64_BIT
1442 if (this->annot_pointers_8p == true) {
1443 FWRITE_UINT8S(this->annotpointers,this->nintervals+1,fp);
1444 } else {
1445 FWRITE_UINTS(this->annotpointers,this->nintervals+1,fp);
1446 }
1447 #else
1448 FWRITE_UINTS(this->annotpointers,this->nintervals+1,fp);
1449 #endif
1450 if ((stringlen = this->annotpointers[this->nintervals]) == 0) {
1451 fprintf(stderr,"Error in writing annotations: annotation stringlen is 0.\n");
1452 exit(9);
1453 } else {
1454 FWRITE_CHARS(this->annotations,stringlen,fp);
1455 }
1456
1457 fclose(fp);
1458
1459 return;
1460 }
1461 #endif
1462
1463
1464 /* If annotlist is NULL, X's are written */
1465 void
IIT_write(char * iitfile,List_T divlist,List_T typelist,List_T fieldlist,Table_T intervaltable,Table_T valuetable,Table_T labeltable,Table_T annottable,Sorttype_T divsort,int version,bool label_pointers_8p,bool annot_pointers_8p)1466 IIT_write (char *iitfile, List_T divlist, List_T typelist, List_T fieldlist,
1467 Table_T intervaltable, Table_T valuetable, Table_T labeltable, Table_T annottable,
1468 Sorttype_T divsort, int version, bool label_pointers_8p, bool annot_pointers_8p) {
1469 Node_T root;
1470 FILE *fp;
1471 List_T intervallist, d;
1472 char *divstring;
1473 int ndivs, total_nintervals, *nintervals, *nnodes, nnodes_one_div, divno;
1474 int *cum_nintervals, *cum_nnodes;
1475 struct Interval_T *intervals;
1476 int *alphas, *betas, *sigmas, *omegas;
1477
1478 if ((fp = FOPEN_WRITE_BINARY(iitfile)) == NULL) {
1479 fprintf(stderr,"Error: can't open file %s\n",iitfile);
1480 exit(9);
1481 } else {
1482 ndivs = List_length(divlist);
1483 nintervals = (int *) CALLOC(ndivs,sizeof(int));
1484 nnodes = (int *) CALLOC(ndivs,sizeof(int));
1485 for (d = divlist, divno = 0; d != NULL; d = List_next(d), divno++) {
1486 divstring = (char *) List_head(d);
1487 intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
1488 nintervals[divno] = List_length(intervallist);
1489 nnodes[divno] = IIT_count_nnodes(intervallist,/*presortedp*/false);
1490 }
1491
1492 cum_nintervals = (int *) CALLOC(ndivs+1,sizeof(int));
1493 cum_nintervals[0] = 0;
1494 for (divno = 1; divno <= ndivs; divno++) {
1495 cum_nintervals[divno] = cum_nintervals[divno-1] + nintervals[divno-1];
1496 }
1497 total_nintervals = cum_nintervals[ndivs];
1498 if (total_nintervals == 0) {
1499 fprintf(stderr,"Warning: No intervals were seen in input file\n");
1500 FREE(cum_nintervals);
1501 FREE(nnodes);
1502 FREE(nintervals);
1503 fclose(fp);
1504 return;
1505 }
1506
1507 cum_nnodes = (int *) CALLOC(ndivs+1,sizeof(int));
1508 cum_nnodes[0] = 0;
1509 for (divno = 1; divno <= ndivs; divno++) {
1510 cum_nnodes[divno] = cum_nnodes[divno-1] + nnodes[divno-1];
1511 }
1512
1513 fprintf(stderr,"Writing IIT file header information...");
1514 IIT_write_div_header(fp,total_nintervals,List_length(typelist),List_length(fieldlist),
1515 ndivs,nintervals,nnodes,cum_nintervals,cum_nnodes,divlist,divsort,version,
1516 label_pointers_8p,annot_pointers_8p);
1517 fprintf(stderr,"done\n");
1518
1519 for (d = divlist, divno = 0; d != NULL; d = List_next(d), divno++) {
1520 divstring = (char *) List_head(d);
1521 intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
1522
1523 if (divstring[0] == '\0') {
1524 fprintf(stderr,"Processing null division/chromosome...sorting...");
1525 } else {
1526 fprintf(stderr,"Processing division/chromosome %s...sorting...",divstring);
1527 }
1528 IIT_build_one_div(&root,&intervals,&alphas,&betas,&sigmas,&omegas,&nnodes_one_div,intervallist,nintervals[divno],
1529 /*presortedp*/false);
1530
1531 fprintf(stderr,"writing...");
1532 IIT_write_one_div(fp,root,alphas,betas,sigmas,omegas,nintervals[divno],version);
1533 fprintf(stderr,"done (%d intervals)\n",nintervals[divno]);
1534
1535 Node_gc(&root);
1536 FREE(omegas);
1537 FREE(sigmas);
1538 FREE(betas);
1539 FREE(alphas);
1540 FREE(intervals);
1541 }
1542
1543 fprintf(stderr,"Writing IIT file footer information...");
1544 IIT_write_div_footer(fp,divlist,typelist,fieldlist,intervaltable,
1545 valuetable,labeltable,annottable,cum_nintervals,total_nintervals,version,
1546 label_pointers_8p,annot_pointers_8p);
1547 fprintf(stderr,"done\n");
1548 FREE(cum_nnodes);
1549 FREE(nnodes);
1550 FREE(cum_nintervals);
1551 FREE(nintervals);
1552
1553 fclose(fp);
1554 return;
1555 }
1556 }
1557
1558
1559 /* If annotlist is NULL, X's are written */
1560 T
IIT_create(List_T divlist,List_T typelist,List_T fieldlist,Table_T intervaltable,Table_T labeltable,Table_T datatable,Sorttype_T divsort,int version,bool presortedp)1561 IIT_create (List_T divlist, List_T typelist, List_T fieldlist, Table_T intervaltable,
1562 Table_T labeltable, Table_T datatable, Sorttype_T divsort, int version,
1563 bool presortedp) {
1564 T new;
1565 Node_T root;
1566 List_T intervallist, d;
1567 char *divstring;
1568 int ndivs, total_nintervals, *nintervals, *nnodes, nnodes_one_div, divno;
1569 int *cum_nintervals, *cum_nnodes;
1570 struct Interval_T *intervals;
1571 int *alphas, *betas, *sigmas, *omegas;
1572
1573 ndivs = List_length(divlist);
1574 nintervals = (int *) CALLOC(ndivs,sizeof(int));
1575 nnodes = (int *) CALLOC(ndivs,sizeof(int));
1576 for (d = divlist, divno = 0; d != NULL; d = List_next(d), divno++) {
1577 divstring = (char *) List_head(d);
1578 intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
1579 nintervals[divno] = List_length(intervallist);
1580 nnodes[divno] = IIT_count_nnodes(intervallist,presortedp);
1581 }
1582
1583 cum_nintervals = (int *) CALLOC(ndivs+1,sizeof(int));
1584 cum_nintervals[0] = 0;
1585 for (divno = 1; divno <= ndivs; divno++) {
1586 cum_nintervals[divno] = cum_nintervals[divno-1] + nintervals[divno-1];
1587 }
1588 total_nintervals = cum_nintervals[ndivs];
1589 if (total_nintervals == 0) {
1590 fprintf(stderr,"Warning: No intervals were given to IIT_create\n");
1591 FREE(cum_nintervals);
1592 FREE(nnodes);
1593 FREE(nintervals);
1594 return (T) NULL;
1595 } else {
1596 new = (T) MALLOC(sizeof(*new));
1597 }
1598
1599 cum_nnodes = (int *) CALLOC(ndivs+1,sizeof(int));
1600 cum_nnodes[0] = 0;
1601 for (divno = 1; divno <= ndivs; divno++) {
1602 cum_nnodes[divno] = cum_nnodes[divno-1] + nnodes[divno-1];
1603 }
1604
1605 IIT_create_div_header(new,total_nintervals,List_length(typelist),List_length(fieldlist),
1606 ndivs,nintervals,nnodes,cum_nintervals,cum_nnodes,divlist,divsort,version);
1607
1608 new->alphas = (int **) CALLOC(new->ndivs,sizeof(int *));
1609 new->betas = (int **) CALLOC(new->ndivs,sizeof(int *));
1610 new->sigmas = (int **) CALLOC(new->ndivs,sizeof(int *));
1611 new->omegas = (int **) CALLOC(new->ndivs,sizeof(int *));
1612 new->nodes = (struct FNode_T **) CALLOC(new->ndivs,sizeof(struct FNode_T *));
1613
1614 for (d = divlist, divno = 0; d != NULL; d = List_next(d), divno++) {
1615 divstring = (char *) List_head(d);
1616 intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
1617 IIT_build_one_div(&root,&intervals,&alphas,&betas,&sigmas,&omegas,&nnodes_one_div,intervallist,nintervals[divno],
1618 presortedp);
1619 IIT_create_one_div(new,divno,root,alphas,betas,sigmas,omegas,nintervals[divno]);
1620
1621 Node_gc(&root);
1622 FREE(omegas);
1623 FREE(sigmas);
1624 FREE(betas);
1625 FREE(alphas);
1626 FREE(intervals);
1627 }
1628
1629 IIT_create_div_footer(new,divlist,typelist,fieldlist,intervaltable,
1630 labeltable,datatable,cum_nintervals,total_nintervals);
1631 FREE(cum_nintervals);
1632
1633 return new;
1634 }
1635
1636
1637 #if 0
1638 /* Problematic because this file no longer defines T */
1639 void
1640 IIT_backfill_sequence (T this, int index, int offset, char *Buffer) {
1641 int recno;
1642 char *ptr;
1643
1644 recno = index - 1;
1645 ptr = &(this->annotations[this->annotpointers[recno] + offset]);
1646 if (strncpy(ptr,Buffer,strlen(Buffer)) == NULL) {
1647 fprintf(stderr,"Unable to write %s into iit file\n",Buffer);
1648 exit(9);
1649 }
1650 return;
1651 }
1652 #endif
1653
1654