1 static char rcsid[] = "$Id: splicetrie_build.c 184476 2016-02-18 00:13:26Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "splicetrie_build.h"
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <math.h>		/* For qsort */
11 
12 #include "assert.h"
13 #include "mem.h"
14 #include "iitdef.h"
15 #include "interval.h"
16 
17 
18 #define MININTRONLEN 9
19 #define SPLICEDIST_EXTRA 10 /* Reported intron length does not include dinucleotides */
20 
21 #define EMPTY_POINTER 0U
22 #define INTRON_HIGH_TO_LOW 1U /* Because splicesites based on Interval_low (x..x+1 and y..y+1), but introns go from low to high (x..y+1) */
23 
24 
25 /* Generating trie */
26 #ifdef DEBUG
27 #define debug(x) x
28 #else
29 #define debug(x)
30 #endif
31 
32 /* Generating trie, details */
33 #ifdef DEBUG0
34 #define debug0(x) x
35 #else
36 #define debug0(x)
37 #endif
38 
39 /* Full dump */
40 #ifdef DEBUG2
41 #define debug2(x) x
42 #else
43 #define debug2(x)
44 #endif
45 
46 
47 char *
Splicetype_string(Splicetype_T splicetype)48 Splicetype_string (Splicetype_T splicetype) {
49   switch (splicetype) {
50   case DONOR: return "donor";
51   case ACCEPTOR: return "acceptor";
52   case ANTIDONOR: return "antidonor";
53   case ANTIACCEPTOR: return "antiacceptor";
54   default: abort();
55   }
56 }
57 
58 
59 /*                      87654321 */
60 #define LOW_TWO_BITS  0x00000003
61 
62 static void
splicefrag_nt_leftward(char * nt,Genomecomp_T splicefrag)63 splicefrag_nt_leftward (char *nt, Genomecomp_T splicefrag) {
64   int i, j;
65   Genomecomp_T highbits, lowbits;
66 
67   highbits = splicefrag >> 16;
68   lowbits = splicefrag /* & 0x0000FFFF */;
69 
70   j = 15;
71   for (i = 0; i < 16; i++) {
72     switch (((highbits & 0x01) << 1) | (lowbits & 0x01)) {
73     case RIGHT_A: nt[j] = 'A'; break;
74     case RIGHT_C: nt[j] = 'C'; break;
75     case RIGHT_G: nt[j] = 'G'; break;
76     case RIGHT_T: nt[j] = 'T'; break;
77     }
78     highbits >>= 1;
79     lowbits >>= 1;
80     j--;
81   }
82 
83   return;
84 }
85 
86 static void
splicefrag_nt_rightward(char * nt,Genomecomp_T splicefrag)87 splicefrag_nt_rightward (char *nt, Genomecomp_T splicefrag) {
88   int i, j;
89   Genomecomp_T highbits, lowbits;
90 
91   highbits = splicefrag >> 16;
92   lowbits = splicefrag /* & 0x0000FFFF */;
93 
94   j = 0;
95   for (i = 0; i < 16; i++) {
96     switch (((highbits & 0x01) << 1) | (lowbits & 0x01)) {
97     case RIGHT_A: nt[j] = 'A'; break;
98     case RIGHT_C: nt[j] = 'C'; break;
99     case RIGHT_G: nt[j] = 'G'; break;
100     case RIGHT_T: nt[j] = 'T'; break;
101     }
102     highbits >>= 1;
103     lowbits >>= 1;
104     j++;
105   }
106 
107   return;
108 }
109 
110 
111 
112 /* Splicetype is for the anchor splice */
113 void
Splicetrie_dump(Trieoffset_T * triestart,Univcoord_T * splicesites,Splicetype_T splicetype,Genomecomp_T * splicefrags_ref)114 Splicetrie_dump (Trieoffset_T *triestart, Univcoord_T *splicesites,
115 		 Splicetype_T splicetype, Genomecomp_T *splicefrags_ref) {
116   Triecontent_T leaf;
117   int nleaves, i;
118   Univcoord_T position;
119   Triecontent_T offseta, offsetc, offsetg, offsett;
120   char gbuffer[17];
121 
122   gbuffer[16] = '\0';
123 
124   if (single_leaf_p(leaf = triestart[0])) {
125     position = splicesites[leaf];
126     printf("%d %llu",(int) leaf,(unsigned long long) position);
127     if (splicetype == DONOR || splicetype == ANTIACCEPTOR) {
128       splicefrag_nt_rightward(gbuffer,splicefrags_ref[leaf]);
129       printf(" %s (rightward)\n",gbuffer);
130     } else {
131       splicefrag_nt_leftward(gbuffer,splicefrags_ref[leaf]);
132       printf(" %s (leftward)\n",gbuffer);
133     }
134 
135   } else if (multiple_leaf_p(leaf)) {
136     nleaves = (int) (-leaf);
137     for (i = 1; i <= nleaves; i++) {
138       leaf = triestart[i];
139       position = splicesites[leaf];
140       printf("%d %llu",(int) leaf,(unsigned long long) position);
141       if (splicetype == DONOR || splicetype == ANTIACCEPTOR) {
142 	splicefrag_nt_rightward(gbuffer,splicefrags_ref[leaf]);
143 	printf(" %s (rightward)\n",gbuffer);
144       } else {
145 	splicefrag_nt_leftward(gbuffer,splicefrags_ref[leaf]);
146 	printf(" %s (leftward)\n",gbuffer);
147       }
148     }
149 
150   } else {
151 #ifdef USE_2BYTE_RELOFFSETS
152     get_offsets(&offseta,&offsetc,&offsetg,&offsett,triestart[1],triestart[2]);
153 #else
154     offseta = (int) triestart[1];
155     offsetc = (int) triestart[2];
156     offsetg = (int) triestart[3];
157     offsett = (int) triestart[4];
158 #endif
159 
160     if (offseta > 0) {
161       Splicetrie_dump(&(triestart[-offseta]),splicesites,splicetype,splicefrags_ref);
162     }
163     if (offsetc > 0) {
164       Splicetrie_dump(&(triestart[-offsetc]),splicesites,splicetype,splicefrags_ref);
165     }
166     if (offsetg > 0) {
167       Splicetrie_dump(&(triestart[-offsetg]),splicesites,splicetype,splicefrags_ref);
168     }
169     if (offsett > 0) {
170       Splicetrie_dump(&(triestart[-offsett]),splicesites,splicetype,splicefrags_ref);
171     }
172   }
173 
174   return;
175 }
176 
177 
178 /*               87654321 */
179 #define UINT4_LEFT_A 0x00000000
180 #define UINT4_LEFT_C 0x40000000
181 #define UINT4_LEFT_G 0x80000000
182 #define UINT4_LEFT_T 0xC0000000
183 #define UINT4_HIGH2  0xC0000000
184 
185 
186 /* Puts leftmost character into lowest bits */
187 /* For right splicestrings, we want the leftmost character in the highest bits */
188 
189 static Genomecomp_T
compress16(bool * saw_n_p,char * buffer)190 compress16 (bool *saw_n_p, char *buffer) {
191   Genomecomp_T low = 0U;
192   int c;
193   int i;
194 
195   /* *saw_n_p = false; -- Want to check both ref and alt, so rely on caller to set */
196   for (i = 0; i < 16; i++) {
197     c = buffer[i];
198     low >>= 2;
199     switch (c) {
200     case 'A': break;
201     case 'C': low |= UINT4_LEFT_C; break;
202     case 'G': low |= UINT4_LEFT_G; break;
203     case 'T': low |= UINT4_LEFT_T; break;
204     default: *saw_n_p = true; break;
205     }
206   }
207 
208   return low;
209 }
210 
211 
212 static Genomecomp_T
uint4_reverse(Genomecomp_T forward)213 uint4_reverse (Genomecomp_T forward) {
214   Genomecomp_T reverse = 0U;
215   int c;
216   int i;
217 
218   for (i = 0; i < 16; i++) {
219     c = forward & 0x03;
220     reverse <<= 2;
221     reverse |= c;
222     forward >>= 2;
223   }
224 
225   return reverse;
226 }
227 
228 
229 
230 /*                   87654321 */
231 #define LEFT_SET   0x80000000
232 #define LEFT_CLEAR 0x00000000
233 
234 /* Puts the odd bits in the top 16 bits, and the even bits in the
235    bottom 16 bits.  Needed to match genomebits format, although that
236    has 32-bit blocks instead of 16 bits. */
237 static Genomecomp_T
unshuffle16(bool * saw_n_p,char * buffer)238 unshuffle16 (bool *saw_n_p, char *buffer) {
239   Genomecomp_T bits = 0U;
240   int c;
241   int i;
242 
243   /* *saw_n_p = false; -- Want to check both ref and alt, so rely on caller to set */
244   /* Low bits */
245   for (i = 0; i < 16; i++) {
246     c = buffer[i];
247     bits >>= 1;
248     switch (c) {
249     case 'A': /* bits |= LEFT_CLEAR; */ break;
250     case 'C':    bits |= LEFT_SET; break;
251     case 'G': /* bits |= LEFT_CLEAR; */ break;
252     case 'T':    bits |= LEFT_SET; break;
253     default: *saw_n_p = true; break;
254     }
255   }
256 
257   /* High bits */
258   for (i = 0; i < 16; i++) {
259     c = buffer[i];
260     bits >>= 1;
261     switch (c) {
262     case 'A': /* bits |= LEFT_CLEAR; */ break;
263     case 'C': /* bits |= LEFT_CLEAR; */ break;
264     case 'G':    bits |= LEFT_SET; break;
265     case 'T':    bits |= LEFT_SET; break;
266     default: *saw_n_p = true; break;
267     }
268   }
269 
270   return bits;
271 }
272 
273 
274 #if 0
275 /* Replaced by procedures in splicestringpool.c */
276 static void
277 Splicestring_free (Splicestring_T *old) {
278   FREE(*old);
279   return;
280 }
281 
282 void
283 Splicestring_gc (List_T *splicestrings, int nsplicesites) {
284   int i;
285   List_T list, p;
286   Splicestring_T splicestring;
287 
288   for (i = 0; i < nsplicesites; i++) {
289     list = splicestrings[i];
290     for (p = list; p != NULL; p = List_next(p)) {
291       splicestring = (Splicestring_T) List_head(p);
292       Splicestring_free(&splicestring);
293     }
294     List_free(&list);
295   }
296   FREE(splicestrings);
297   return;
298 }
299 
300 static Splicestring_T
301 Splicestring_new (Genomecomp_T string, Genomecomp_T splicesite, int splicesite_i) {
302   Splicestring_T new = (Splicestring_T) MALLOC(sizeof(*new));
303 
304   new->string = string;
305   new->splicesite = splicesite;
306   new->splicesite_i = (Genomecomp_T) splicesite_i;
307   return new;
308 }
309 #endif
310 
311 static int
Splicestring_cmp(const void * a,const void * b)312 Splicestring_cmp (const void *a, const void *b) {
313   Splicestring_T x = * (Splicestring_T *) a;
314   Splicestring_T y = * (Splicestring_T *) b;
315 
316 #ifdef USE_STRINGS
317   return strcmp(x->string,y->string);
318 #else
319   if (x->string < y->string) {
320     return -1;
321   } else if (x->string > y->string) {
322     return +1;
323   } else {
324     return 0;
325   }
326 #endif
327 }
328 
329 
330 static List_T
allelic_combinations(Genomecomp_T refstring,Genomecomp_T altstring,Genomecomp_T splicesite,int splicesite_i,Splicestringpool_T splicestringpool)331 allelic_combinations (Genomecomp_T refstring, Genomecomp_T altstring, Genomecomp_T splicesite, int splicesite_i,
332 		      Splicestringpool_T splicestringpool) {
333   List_T splicestrings = NULL;
334   Uintlist_T combinations, newcombinations, temp, p;
335   int refc, altc;
336   int i;
337 
338   combinations = Uintlist_push(NULL,0U);
339   for (i = 0; i < 16; i++) {
340     newcombinations = NULL;
341 
342     refc = (refstring & UINT4_HIGH2) >> 30;
343     altc = (altstring & UINT4_HIGH2) >> 30;
344     refstring <<= 2;
345     altstring <<= 2;
346     if (refc != altc) {
347       for (p = combinations; p != NULL; p = Uintlist_next(p)) {
348 	newcombinations = Uintlist_push(newcombinations,(Uintlist_head(p) << 2) | refc);
349 	newcombinations = Uintlist_push(newcombinations,(Uintlist_head(p) << 2) | altc);
350       }
351     } else {
352       for (p = combinations; p != NULL; p = Uintlist_next(p)) {
353 	newcombinations = Uintlist_push(newcombinations,(Uintlist_head(p) << 2) | refc);
354       }
355     }
356 
357     temp = combinations;
358     combinations = Uintlist_reverse(newcombinations);
359     Uintlist_free(&temp);
360   }
361 
362   for (p = combinations; p != NULL; p = Uintlist_next(p)) {
363     splicestrings = Splicestringpool_push(splicestrings,splicestringpool,
364 					  Uintlist_head(p),splicesite,splicesite_i);
365   }
366 
367   Uintlist_free(&combinations);
368   return splicestrings;
369 }
370 
371 
372 
373 
374 
375 
376 /* If distances are provided, splicedists are the observed distances */
377 Univcoord_T *
Splicetrie_retrieve_via_splicesites(bool * distances_observed_p,Genomecomp_T ** splicecomp,Splicetype_T ** splicetypes,Chrpos_T ** splicedists,List_T ** splicestrings,Genomecomp_T ** splicefrags_ref,Genomecomp_T ** splicefrags_alt,int * nsplicesites,IIT_T splicing_iit,int * splicing_divint_crosstable,int donor_typeint,int acceptor_typeint,Univ_IIT_T chromosome_iit,Genome_T genome,Genome_T genomealt,Chrpos_T shortsplicedist,Splicestringpool_T splicestringpool)378 Splicetrie_retrieve_via_splicesites (bool *distances_observed_p,
379 #ifdef GSNAP
380 				     Genomecomp_T **splicecomp,
381 #endif
382 				     Splicetype_T **splicetypes, Chrpos_T **splicedists,
383 				     List_T **splicestrings, Genomecomp_T **splicefrags_ref, Genomecomp_T **splicefrags_alt,
384 				     int *nsplicesites, IIT_T splicing_iit, int *splicing_divint_crosstable,
385 				     int donor_typeint, int acceptor_typeint, Univ_IIT_T chromosome_iit,
386 				     Genome_T genome, Genome_T genomealt, Chrpos_T shortsplicedist,
387 				     Splicestringpool_T splicestringpool) {
388   Univcoord_T *splicesites, chroffset, chrhigh, position;
389   Chrpos_T chrlength, chrpos;
390   Univcoord_T last_donor, last_antidonor, last_acceptor, last_antiacceptor;
391   int last_donor_k, last_antidonor_k, last_acceptor_k, last_antiacceptor_k;
392   int distance;
393   Genomecomp_T refstring, altstring;
394   int *splicesites1;
395   int divno, nsplicesites1, i, k;
396   Chrnum_T chrnum;
397   Interval_T *intervals, interval;
398   struct Interval_T *interval_structs;
399   char gbuffer_ref[17], gbuffer_alt[17], *chr;
400   char *restofheader;
401   /* char *annot; */
402   bool firstp = true, saw_n_p, allocp, alloc_header_p;
403   int ntoolong = 0;
404 
405 #ifdef DEBUG2
406   List_T p;
407   Splicestring_T splicestring;
408 #endif
409 
410 #ifdef GSNAP
411   int nblocks;
412 #endif
413 
414   k = 0;
415   for (chrnum = 1; chrnum <= Univ_IIT_total_nintervals(chromosome_iit); chrnum++) {
416     if ((divno = splicing_divint_crosstable[chrnum]) > 0) {
417       Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,/*circular_typeint*/-1);
418       /* chrlength = Univ_IIT_length(chromosome_iit,chrnum); */
419       splicesites1 = IIT_get_with_divno(&nsplicesites1,splicing_iit,divno,
420 					0U,chrlength-1U,/*sortp*/false);
421       if (nsplicesites1 > 0) {
422 	if (firstp == true) {
423 	  /* annot = */ IIT_annotation(&restofheader,splicing_iit,splicesites1[0],&alloc_header_p);
424 	  if (restofheader[0] == '\0') {
425 	    fprintf(stderr,"splice distances absent...");
426 	    *distances_observed_p = false;
427 	  } else if (sscanf(restofheader,"%d",&distance) < 1) {
428 	    fprintf(stderr,"splice distances absent...");
429 	    *distances_observed_p = false;
430 	  } else {
431 	    fprintf(stderr,"splice distances present...");
432 	    *distances_observed_p = true;
433 	  }
434 	  if (alloc_header_p == true) {
435 	    FREE(restofheader);
436 	  }
437 	  firstp = false;
438 	}
439 
440 	intervals = (Interval_T *) CALLOC(nsplicesites1,sizeof(Interval_T));
441 	for (i = 0; i < nsplicesites1; i++) {
442 	  intervals[i] = &(splicing_iit->intervals[divno][i]);
443 	}
444 	qsort(intervals,nsplicesites1,sizeof(Interval_T),Interval_cmp_low);
445 
446 	last_donor = last_antidonor = last_acceptor = last_antiacceptor = 0U;
447 	for (i = 0; i < nsplicesites1; i++) {
448 	  interval = intervals[i];
449 	  chrpos = Interval_low(interval);
450 	  position = chrpos + chroffset;
451 
452 	  if (position >= chrhigh) {
453 	    chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
454 	    fprintf(stderr,"\nSplice site %s:%u extends beyond chromosome length %u.  Discarding...",
455 		    chr,chrpos,chrlength);
456 	    if (allocp) FREE(chr);
457 
458 	  } else if (Interval_type(interval) == donor_typeint) {
459 	    if (Interval_sign(interval) > 0) {
460 	      if (position != last_donor) {
461 		last_donor = position;
462 		k++;
463 	      }
464 	    } else {
465 	      if (position != last_antidonor) {
466 		last_antidonor = position;
467 		k++;
468 	      }
469 	    }
470 	  } else if (Interval_type(interval) == acceptor_typeint) {
471 	    if (Interval_sign(interval) > 0) {
472 	      if (position != last_acceptor) {
473 		last_acceptor = position;
474 		k++;
475 	      }
476 	    } else {
477 	      if (position != last_antiacceptor) {
478 		last_antiacceptor = position;
479 		k++;
480 	      }
481 	    }
482 	  }
483 	}
484 	FREE(intervals);
485 	FREE(splicesites1);
486       }
487     }
488   }
489 
490   *nsplicesites = k;
491   debug(printf("total unique splicesites: %d\n",*nsplicesites));
492   fprintf(stderr,"%d unique splicesites...",*nsplicesites);
493 
494   /* The above procedure determines number of unique splicesites */
495 
496   if (*nsplicesites == 0) {
497 #ifdef GSNAP
498     *splicecomp = (Genomecomp_T *) NULL;
499 #endif
500     splicesites = (Univcoord_T *) NULL;
501     *splicetypes = (Splicetype_T *) NULL;
502     *splicedists = (Chrpos_T *) NULL;
503     *splicestrings = (List_T *) NULL;
504     *splicefrags_ref = (Genomecomp_T *) NULL;
505     *splicefrags_alt = (Genomecomp_T *) NULL;
506     return splicesites;
507   }
508 
509 #ifdef GSNAP
510   nblocks = (Genome_totallength(genome)+31)/32U;
511   *splicecomp = (Genomecomp_T *) CALLOC(nblocks,sizeof(Genomecomp_T));
512 #endif
513   splicesites = (Univcoord_T *) CALLOC((*nsplicesites) + 1,sizeof(Univcoord_T));
514   *splicetypes = (Splicetype_T *) CALLOC(*nsplicesites,sizeof(Splicetype_T));
515   *splicedists = (Chrpos_T *) CALLOC(*nsplicesites,sizeof(Chrpos_T));
516   *splicestrings = (List_T *) CALLOC(*nsplicesites,sizeof(List_T));
517   *splicefrags_ref = (Genomecomp_T *) CALLOC(*nsplicesites,sizeof(Genomecomp_T));
518   if (genomealt == NULL) {
519     *splicefrags_alt = *splicefrags_ref;
520   } else {
521     *splicefrags_alt = (Genomecomp_T *) CALLOC(*nsplicesites,sizeof(Genomecomp_T));
522   }
523 
524   /* Use interval_structs instead of intervals, because we want to
525      copy information and avoid creating many Interval_T objects */
526 
527   k = 0;
528   for (chrnum = 1; chrnum <= Univ_IIT_total_nintervals(chromosome_iit); chrnum++) {
529     if ((divno = splicing_divint_crosstable[chrnum]) > 0) {
530       Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,/*circular_typeint*/-1);
531       /* chrlength = Univ_IIT_length(chromosome_iit,chrnum); */
532       splicesites1 = IIT_get_with_divno(&nsplicesites1,splicing_iit,divno,
533 					0U,chrlength-1U,/*sortp*/false);
534       if (nsplicesites1 > 0) {
535 	interval_structs = (struct Interval_T *) CALLOC(nsplicesites1,sizeof(struct Interval_T));
536 	for (i = 0; i < nsplicesites1; i++) {
537 	  /* intervals[i] = &(splicing_iit->intervals[divno][i]); */
538 	  /* Copy so we can store distance information in Interval_high */
539 	  Interval_copy_existing(&(interval_structs[i]),&(splicing_iit->intervals[divno][i]));
540 	  if (*distances_observed_p == false) {
541 	    /* No, want to have essentially zero distance */
542 	    /* Interval_store_length(intervals[i],shortsplicedist); */
543 	  } else {
544 	    /* annot = */ IIT_annotation(&restofheader,splicing_iit,splicesites1[i],&alloc_header_p);
545 	    if (sscanf(restofheader,"%d",&distance) != 1) {
546 	      fprintf(stderr,"splicesites file missing distance in entry %s...exiting\n",
547 		      IIT_label(splicing_iit,splicesites1[i],&allocp));
548 	      exit(9);
549 	    } else if (distance < 0) {
550 	      fprintf(stderr,"splicesites file has a negative distance %d in entry %s...exiting\n",
551 		      distance,IIT_label(splicing_iit,splicesites1[i],&allocp));
552 	      exit(9);
553 	    } else if (distance > (int) shortsplicedist) {
554 	      ntoolong++;
555 	      Interval_store_length(&(interval_structs[i]),distance + SPLICEDIST_EXTRA);  /* Previously stored shortsplicedist */
556 	    } else {
557 	      Interval_store_length(&(interval_structs[i]),distance + SPLICEDIST_EXTRA);
558 	    }
559 	    if (alloc_header_p == true) {
560 	      FREE(restofheader);
561 	    }
562 	  }
563 	}
564 
565 	qsort(interval_structs,nsplicesites1,sizeof(struct Interval_T),Interval_cmp_low_struct);
566 
567 	last_donor = last_antidonor = last_acceptor = last_antiacceptor = 0U;
568 	for (i = 0; i < nsplicesites1; i++) {
569 	  interval = &(interval_structs[i]);
570 	  chrpos = Interval_low(interval);
571 	  position = chrpos + chroffset;
572 
573 	  if (position >= chrhigh) {
574 #if 0
575 	    /* Warning given previously */
576 	    chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
577 	    fprintf(stderr,"\nSplice site %s:%u extends beyond chromosome length %u.  Discarding...",
578 		    chr,chrpos,chrlength);
579 	    if (allocp) FREE(chr);
580 #endif
581 
582 	  } else if (Interval_type(interval) == donor_typeint) {
583 	    if (Interval_sign(interval) > 0) {
584 	      if (position == last_donor) {
585 		if (Interval_length(interval) > (*splicedists)[last_donor_k]) {
586 		  (*splicedists)[last_donor_k] = Interval_length(interval);
587 		}
588 
589 	      } else {
590 		last_donor_k = k;
591 		last_donor = splicesites[k] = position;
592 #ifdef GSNAP
593 		assert(position/32U < (unsigned int) nblocks);
594 		(*splicecomp)[position/32U] |= (1 << (position % 32));
595 #endif
596 		(*splicetypes)[k] = DONOR;
597 		(*splicedists)[k] = Interval_length(interval);
598 
599 		saw_n_p = false;
600 		Genome_fill_buffer_simple(genome,position-16,16,gbuffer_ref);
601 		refstring = compress16(&saw_n_p,gbuffer_ref);
602 		(*splicefrags_ref)[k] = unshuffle16(&saw_n_p,gbuffer_ref);
603 		if (genomealt) {
604 		  Genome_fill_buffer_simple_alt(genome,genomealt,position-16,16,gbuffer_alt);
605 		  altstring = compress16(&saw_n_p,gbuffer_alt);
606 		  (*splicefrags_alt)[k] = unshuffle16(&saw_n_p,gbuffer_alt);
607 		}
608 
609 		if (saw_n_p == true) {
610 		  chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
611 		  fprintf(stderr,"\nNon-standard nucleotide N near splice site %s:%u.  Discarding...",
612 			  chr,chrpos);
613 		  if (allocp) FREE(chr);
614 
615 		} else if (genomealt) {
616 		  (*splicestrings)[k] = allelic_combinations(refstring,altstring,position,k,splicestringpool);
617 		  k++;
618 		} else {
619 		  (*splicestrings)[k] = Splicestringpool_push(NULL,splicestringpool,refstring,position,k);
620 		  k++;
621 		}
622 	      }
623 
624 	    } else {
625 	      if (position == last_antidonor) {
626 		if (Interval_length(interval) > (*splicedists)[last_antidonor_k]) {
627 		  (*splicedists)[last_antidonor_k] = Interval_length(interval);
628 		}
629 
630 	      } else {
631 		last_antidonor_k = k;
632 		last_antidonor = splicesites[k] = position;
633 #ifdef GSNAP
634 		assert(position/32U < (unsigned int) nblocks);
635 		(*splicecomp)[position/32U] |= (1 << (position % 32));
636 #endif
637 		(*splicetypes)[k] = ANTIDONOR;
638 		(*splicedists)[k] = Interval_length(interval);
639 
640 		saw_n_p = false;
641 		Genome_fill_buffer_simple(genome,position,16,gbuffer_ref);
642 		refstring = uint4_reverse(compress16(&saw_n_p,gbuffer_ref));
643 		(*splicefrags_ref)[k] = unshuffle16(&saw_n_p,gbuffer_ref);
644 		if (genomealt) {
645 		  Genome_fill_buffer_simple_alt(genome,genomealt,position,16,gbuffer_alt);
646 		  altstring = uint4_reverse(compress16(&saw_n_p,gbuffer_alt));
647 		  (*splicefrags_alt)[k] = unshuffle16(&saw_n_p,gbuffer_alt);
648 		}
649 
650 		if (saw_n_p == true) {
651 		  chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
652 		  fprintf(stderr,"\nNon-standard nucleotide N near splice site %s:%u.  Discarding...",
653 			  chr,chrpos);
654 		  if (allocp) FREE(chr);
655 		} else if (genomealt) {
656 		  (*splicestrings)[k] = allelic_combinations(refstring,altstring,position,k,splicestringpool);
657 		  k++;
658 		} else {
659 		  (*splicestrings)[k] = Splicestringpool_push(NULL,splicestringpool,refstring,position,k);
660 		  k++;
661 		}
662 	      }
663 	    }
664 
665 	  } else if (Interval_type(interval) == acceptor_typeint) {
666 	    if (Interval_sign(interval) > 0) {
667 	      if (position == last_acceptor) {
668 		if (Interval_length(interval) > (*splicedists)[last_acceptor_k]) {
669 		  (*splicedists)[last_acceptor_k] = Interval_length(interval);
670 		}
671 
672 	      } else {
673 		last_acceptor_k = k;
674 		last_acceptor = splicesites[k] = position;
675 #ifdef GSNAP
676 		assert(position/32U < (unsigned int) nblocks);
677 		(*splicecomp)[position/32U] |= (1 << (position % 32));
678 #endif
679 		(*splicetypes)[k] = ACCEPTOR;
680 		(*splicedists)[k] = Interval_length(interval);
681 
682 		saw_n_p = false;
683 		Genome_fill_buffer_simple(genome,position,16,gbuffer_ref);
684 		refstring = uint4_reverse(compress16(&saw_n_p,gbuffer_ref));
685 		(*splicefrags_ref)[k] = unshuffle16(&saw_n_p,gbuffer_ref);
686 		if (genomealt) {
687 		  Genome_fill_buffer_simple_alt(genome,genomealt,position,16,gbuffer_alt);
688 		  altstring = uint4_reverse(compress16(&saw_n_p,gbuffer_alt));
689 		  (*splicefrags_alt)[k] = unshuffle16(&saw_n_p,gbuffer_alt);
690 		}
691 
692 		if (saw_n_p == true) {
693 		  chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
694 		  fprintf(stderr,"\nNon-standard nucleotide N near splice site %s:%u.  Discarding...",
695 			  chr,chrpos);
696 		  if (allocp) FREE(chr);
697 		} else if (genomealt) {
698 		  (*splicestrings)[k] = allelic_combinations(refstring,altstring,position,k,splicestringpool);
699 		  k++;
700 		} else {
701 		  (*splicestrings)[k] = Splicestringpool_push(NULL,splicestringpool,refstring,position,k);
702 		  k++;
703 		}
704 	      }
705 
706 	    } else {
707 	      if (position == last_antiacceptor) {
708 		if (Interval_length(interval) > (*splicedists)[last_antiacceptor_k]) {
709 		  (*splicedists)[last_antiacceptor_k] = Interval_length(interval);
710 		}
711 
712 	      } else {
713 		last_antiacceptor_k = k;
714 		last_antiacceptor = splicesites[k] = position;
715 #ifdef GSNAP
716 		assert(position/32U < (unsigned int) nblocks);
717 		(*splicecomp)[position/32U] |= (1 << (position % 32));
718 #endif
719 		(*splicetypes)[k] = ANTIACCEPTOR;
720 		(*splicedists)[k] = Interval_length(interval);
721 
722 		saw_n_p = false;
723 		Genome_fill_buffer_simple(genome,position-16,16,gbuffer_ref);
724 		refstring = compress16(&saw_n_p,gbuffer_ref);
725 		(*splicefrags_ref)[k] = unshuffle16(&saw_n_p,gbuffer_ref);
726 		if (genomealt) {
727 		  Genome_fill_buffer_simple_alt(genome,genomealt,position-16,16,gbuffer_alt);
728 		  altstring = compress16(&saw_n_p,gbuffer_alt);
729 		  (*splicefrags_alt)[k] = unshuffle16(&saw_n_p,gbuffer_alt);
730 		}
731 
732 		if (saw_n_p == true) {
733 		  chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
734 		  fprintf(stderr,"\nNon-standard nucleotide N near splice site %s:%u.  Discarding...",
735 			  chr,chrpos);
736 		  if (allocp) FREE(chr);
737 		} else if (genomealt) {
738 		  (*splicestrings)[k] = allelic_combinations(refstring,altstring,position,k,splicestringpool);
739 		  k++;
740 		} else {
741 		  (*splicestrings)[k] = Splicestringpool_push(NULL,splicestringpool,refstring,position,k);
742 		  k++;
743 		}
744 	      }
745 
746 	    }
747 	  }
748 	}
749 
750 	FREE(interval_structs);
751 	FREE(splicesites1);
752       }
753     }
754   }
755 
756   *nsplicesites = k;
757   splicesites[*nsplicesites] = (Univcoord_T) -1; /* Marker for comparison in identify_all_segments */
758   fprintf(stderr,"\n%d splicesites are valid...",*nsplicesites);
759 
760 #ifdef DEBUG2
761   for (k = 0; k < *nsplicesites; k++) {
762     printf("%d: %u %s %08X\n",k,splicesites[k],Splicetype_string((*splicetypes)[k]),(*splicefrags_ref)[k]);
763     for (p = (*splicestrings)[k]; p != NULL; p = List_next(p)) {
764       splicestring = (Splicestring_T) List_head(p);
765       printf("  %u %u %u\n",splicestring->string,splicestring->splicesite,splicestring->splicesite_i);
766     }
767   }
768 #endif
769 
770   if (ntoolong > 0) {
771     fprintf(stderr,"%d entries with distance > %d specified for local splice distance...",ntoolong,shortsplicedist);
772   }
773 
774   return splicesites;
775 }
776 
777 
778 struct Cell_T {
779   int k;			/* original k */
780   Univcoord_T pos;		/* splicesite pos */
781 };
782 
783 static int
Cell_position_cmp(const void * a,const void * b)784 Cell_position_cmp (const void *a, const void *b) {
785   struct Cell_T x = * (struct Cell_T *) a;
786   struct Cell_T y = * (struct Cell_T *) b;
787 
788   if (x.pos < y.pos) {
789     return -1;
790   } else if (y.pos < x.pos) {
791     return +1;
792   } else {
793     return 0;
794   }
795 }
796 
797 
798 Univcoord_T *
Splicetrie_retrieve_via_introns(Genomecomp_T ** splicecomp,Splicetype_T ** splicetypes,Chrpos_T ** splicedists,List_T ** splicestrings,Genomecomp_T ** splicefrags_ref,Genomecomp_T ** splicefrags_alt,int * nsplicesites,IIT_T splicing_iit,int * splicing_divint_crosstable,Univ_IIT_T chromosome_iit,Genome_T genome,Genome_T genomealt,Splicestringpool_T splicestringpool)799 Splicetrie_retrieve_via_introns (
800 #ifdef GSNAP
801 				 Genomecomp_T **splicecomp,
802 #endif
803 				 Splicetype_T **splicetypes, Chrpos_T **splicedists,
804 				 List_T **splicestrings, Genomecomp_T **splicefrags_ref, Genomecomp_T **splicefrags_alt,
805 				 int *nsplicesites, IIT_T splicing_iit, int *splicing_divint_crosstable,
806 				 Univ_IIT_T chromosome_iit, Genome_T genome, Genome_T genomealt,
807 				 Splicestringpool_T splicestringpool) {
808   Univcoord_T *splicesites, chroffset, chrhigh, position;
809   Chrpos_T chrlength, chrpos;
810   Univcoord_T last_donor, last_antidonor, last_acceptor, last_antiacceptor;
811   int last_donor_k, last_antidonor_k, last_acceptor_k, last_antiacceptor_k;
812   Genomecomp_T refstring, altstring;
813   int *introns1;
814   int divno, nintrons1, i, k, j;
815   Chrnum_T chrnum;
816   Interval_T *intervals, interval;
817   char gbuffer_ref[17], gbuffer_alt[17], *chr;
818   bool saw_n_p, allocp;
819 
820   struct Cell_T *cells;
821   Univcoord_T *temp_splicesites;
822   Splicetype_T *temp_splicetypes;
823   Chrpos_T *temp_splicedists;
824   List_T *temp_splicestrings, p;
825   UINT4 *temp_splicefrags_ref, *temp_splicefrags_alt;
826   Splicestring_T splicestring;
827 
828 #ifdef GSNAP
829   int nblocks;
830 #endif
831 
832   k = 0;
833   for (chrnum = 1; chrnum <= Univ_IIT_total_nintervals(chromosome_iit); chrnum++) {
834     if ((divno = splicing_divint_crosstable[chrnum]) > 0) {
835       Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,/*circular_typeint*/-1);
836       /* chrlength = Univ_IIT_length(chromosome_iit,chrnum); */
837       introns1 = IIT_get_with_divno(&nintrons1,splicing_iit,divno,0U,chrlength-1U,/*sortp*/false);
838       if (nintrons1 > 0) {
839 	intervals = (Interval_T *) CALLOC(nintrons1,sizeof(Interval_T));
840 	for (i = 0; i < nintrons1; i++) {
841 	  intervals[i] = &(splicing_iit->intervals[divno][i]);
842 	}
843 
844 	qsort(intervals,nintrons1,sizeof(Interval_T),Interval_cmp_low);
845 	last_donor = last_antiacceptor = 0U;
846 	for (i = 0; i < nintrons1; i++) {
847 	  interval = intervals[i];
848 	  chrpos = Interval_low(interval);
849 	  position = chrpos + chroffset;
850 
851 	  if (position >= chrhigh) {
852 	    chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
853 	    fprintf(stderr,"\nSplice site %s:%u extends beyond chromosome length %u.  Discarding...",
854 		    chr,chrpos,chrlength);
855 	    if (allocp) FREE(chr);
856 
857 	  } else if (Interval_sign(interval) > 0) {
858 	    if (position != last_donor) {
859 	      last_donor = position;
860 	      k++;
861 	    }
862 	  } else {
863 	    if (position != last_antiacceptor) {
864 	      last_antiacceptor = position;
865 	      k++;
866 	    }
867 	  }
868 	}
869 
870 	qsort(intervals,nintrons1,sizeof(Interval_T),Interval_cmp_high);
871 	last_acceptor = last_antidonor = 0U;
872 	for (i = 0; i < nintrons1; i++) {
873 	  interval = intervals[i];
874 	  chrpos = Interval_high(interval) - INTRON_HIGH_TO_LOW;
875 	  position = chrpos + chroffset;
876 
877 	  if (position >= chrhigh) {
878 	    chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
879 	    fprintf(stderr,"\nSplice site %s:%u extends beyond chromosome length %u.  Discarding...",
880 		    chr,chrpos,chrlength);
881 	    if (allocp) FREE(chr);
882 
883 	  } else if (Interval_sign(interval) > 0) {
884 	    if (position != last_acceptor) {
885 	      last_acceptor = position;
886 	      k++;
887 	    }
888 	  } else {
889 	    if (position != last_antidonor) {
890 	      last_antidonor = position;
891 	      k++;
892 	    }
893 	  }
894 	}
895 
896 	FREE(intervals);
897 	FREE(introns1);
898       }
899     }
900   }
901 
902   *nsplicesites = k;
903   debug(printf("total unique splicesites: %d\n",*nsplicesites));
904   fprintf(stderr,"%d unique splicesites...",*nsplicesites);
905 
906   /* The above procedure determines number of unique splicesites */
907 
908   if (*nsplicesites == 0) {
909 #ifdef GSNAP
910     *splicecomp = (Genomecomp_T *) NULL;
911 #endif
912     splicesites = (Univcoord_T *) NULL;
913     *splicetypes = (Splicetype_T *) NULL;
914     *splicedists = (Chrpos_T *) NULL;
915     *splicestrings = (List_T *) NULL;
916     *splicefrags_ref = (Genomecomp_T *) NULL;
917     *splicefrags_alt = (Genomecomp_T *) NULL;
918 
919     return splicesites;
920   }
921 
922 #ifdef GSNAP
923   nblocks = (Genome_totallength(genome)+31)/32U;
924   *splicecomp = (Genomecomp_T *) CALLOC(nblocks,sizeof(Genomecomp_T));
925 #endif
926   splicesites = (Univcoord_T *) CALLOC((*nsplicesites) + 1,sizeof(Univcoord_T));
927   *splicetypes = (Splicetype_T *) CALLOC(*nsplicesites,sizeof(Splicetype_T));
928   *splicedists = (Chrpos_T *) CALLOC(*nsplicesites,sizeof(Chrpos_T));
929   *splicestrings = (List_T *) CALLOC(*nsplicesites,sizeof(List_T));
930   *splicefrags_ref = (Genomecomp_T *) CALLOC(*nsplicesites,sizeof(Genomecomp_T));
931   if (genomealt == NULL) {
932     *splicefrags_alt = *splicefrags_ref;
933   } else {
934     *splicefrags_alt = (Genomecomp_T *) CALLOC(*nsplicesites,sizeof(Genomecomp_T));
935   }
936 
937 
938   k = 0;
939   for (chrnum = 1; chrnum <= Univ_IIT_total_nintervals(chromosome_iit); chrnum++) {
940     if ((divno = splicing_divint_crosstable[chrnum]) > 0) {
941       Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,/*circular_typeint*/-1);
942       /* chrlength = Univ_IIT_length(chromosome_iit,chrnum); */
943       introns1 = IIT_get_with_divno(&nintrons1,splicing_iit,divno,0U,chrlength-1U,/*sortp*/false);
944       if (nintrons1 > 0) {
945 	intervals = (Interval_T *) CALLOC(nintrons1,sizeof(Interval_T));
946 	for (i = 0; i < nintrons1; i++) {
947 	  intervals[i] = &(splicing_iit->intervals[divno][i]);
948 	}
949 
950 	qsort(intervals,nintrons1,sizeof(Interval_T),Interval_cmp_low);
951 	last_donor = last_antiacceptor = 0U;
952 	for (i = 0; i < nintrons1; i++) {
953 	  interval = intervals[i];
954 	  chrpos = Interval_low(interval);
955 	  position = chrpos + chroffset;
956 
957 	  if (position >= chrhigh) {
958 #if 0
959 	    /* Warning given previously */
960 	    chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
961 	    fprintf(stderr,"\nSplice site %s:%u extends beyond chromosome length %u.  Discarding...",
962 		    chr,chrpos,chrlength);
963 	    if (allocp) FREE(chr);
964 #endif
965 
966 	  } else if (Interval_sign(interval) > 0) {
967 	    if (position == last_donor) {
968 	      if (Interval_length(interval) > (*splicedists)[last_donor_k]) {
969 		(*splicedists)[last_donor_k] = Interval_length(interval);
970 	      }
971 
972 	    } else {
973 	      last_donor_k = k;
974 	      last_donor = splicesites[k] = position;
975 #ifdef GSNAP
976 	      assert(position/32U < (unsigned int) nblocks);
977 	      (*splicecomp)[position/32U] |= (1 << (position % 32));
978 #endif
979 	      (*splicetypes)[k] = DONOR;
980 	      (*splicedists)[k] = Interval_length(interval);
981 
982 	      saw_n_p = false;
983 	      Genome_fill_buffer_simple(genome,position-16,16,gbuffer_ref);
984 	      refstring = compress16(&saw_n_p,gbuffer_ref);
985 	      (*splicefrags_ref)[k] = unshuffle16(&saw_n_p,gbuffer_ref);
986 	      if (genomealt) {
987 		Genome_fill_buffer_simple_alt(genome,genomealt,position-16,16,gbuffer_alt);
988 		altstring = compress16(&saw_n_p,gbuffer_alt);
989 		(*splicefrags_alt)[k] = unshuffle16(&saw_n_p,gbuffer_alt);
990 	      }
991 
992 	      if (saw_n_p == true) {
993 		chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
994 		fprintf(stderr,"\nNon-standard nucleotide N near splice site %s:%u.  Discarding...",
995 			chr,chrpos);
996 		if (allocp) FREE(chr);
997 	      } else if (genomealt) {
998 		(*splicestrings)[k] = allelic_combinations(refstring,altstring,position,k,splicestringpool);
999 		k++;
1000 	      } else {
1001 		(*splicestrings)[k] = Splicestringpool_push(NULL,splicestringpool,refstring,position,k);
1002 		k++;
1003 	      }
1004 	    }
1005 
1006 	  } else {
1007 	    if (position == last_antiacceptor) {
1008 	      if (Interval_length(interval) > (*splicedists)[last_antiacceptor_k]) {
1009 		(*splicedists)[last_antiacceptor_k] = Interval_length(interval);
1010 	      }
1011 
1012 	    } else {
1013 	      last_antiacceptor_k = k;
1014 	      last_antiacceptor = splicesites[k] = position;
1015 #ifdef GSNAP
1016 	      assert(position/32U < (unsigned int) nblocks);
1017 	      (*splicecomp)[position/32U] |= (1 << (position % 32));
1018 #endif
1019 	      (*splicetypes)[k] = ANTIACCEPTOR;
1020 	      (*splicedists)[k] = Interval_length(interval);
1021 
1022 	      saw_n_p = false;
1023 	      Genome_fill_buffer_simple(genome,position-16,16,gbuffer_ref);
1024 	      refstring = compress16(&saw_n_p,gbuffer_ref);
1025 	      (*splicefrags_ref)[k] = unshuffle16(&saw_n_p,gbuffer_ref);
1026 	      if (genomealt) {
1027 		Genome_fill_buffer_simple_alt(genome,genomealt,position-16,16,gbuffer_alt);
1028 		altstring = compress16(&saw_n_p,gbuffer_alt);
1029 		(*splicefrags_alt)[k] = unshuffle16(&saw_n_p,gbuffer_alt);
1030 	      }
1031 
1032 	      if (saw_n_p == true) {
1033 		chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
1034 		fprintf(stderr,"\nNon-standard nucleotide N near splice site %s:%u.  Discarding...",
1035 			chr,chrpos);
1036 		if (allocp) FREE(chr);
1037 	      } else if (genomealt) {
1038 		(*splicestrings)[k] = allelic_combinations(refstring,altstring,position,k,splicestringpool);
1039 		k++;
1040 	      } else {
1041 		(*splicestrings)[k] = Splicestringpool_push(NULL,splicestringpool,refstring,position,k);
1042 		k++;
1043 	      }
1044 	    }
1045 
1046 	  }
1047 	}
1048 
1049 	qsort(intervals,nintrons1,sizeof(Interval_T),Interval_cmp_high);
1050 	last_acceptor = last_antidonor = 0U;
1051 	for (i = 0; i < nintrons1; i++) {
1052 	  interval = intervals[i];
1053 	  chrpos = Interval_high(interval) - INTRON_HIGH_TO_LOW;
1054 	  position = chrpos + chroffset;
1055 
1056 	  if (position >= chrhigh) {
1057 #if 0
1058 	    /* Warning given previously */
1059 	    chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
1060 	    fprintf(stderr,"\nSplice site %s:%u extends beyond chromosome length %u.  Discarding...",
1061 		    chr,chrpos,chrlength);
1062 	    if (allocp) FREE(chr);
1063 #endif
1064 
1065 	  } else if (Interval_sign(interval) > 0) {
1066 	    if (position == last_acceptor) {
1067 	      if (Interval_length(interval) > (*splicedists)[last_acceptor_k]) {
1068 		(*splicedists)[last_acceptor_k] = Interval_length(interval);
1069 	      }
1070 
1071 	    } else {
1072 	      last_acceptor_k = k;
1073 	      last_acceptor = splicesites[k] = position;
1074 #ifdef GSNAP
1075 	      assert(position/32U < (unsigned int) nblocks);
1076 	      (*splicecomp)[position/32U] |= (1 << (position % 32));
1077 #endif
1078 	      (*splicetypes)[k] = ACCEPTOR;
1079 	      (*splicedists)[k] = Interval_length(interval);
1080 
1081 	      saw_n_p = false;
1082 	      Genome_fill_buffer_simple(genome,position,16,gbuffer_ref);
1083 	      refstring = uint4_reverse(compress16(&saw_n_p,gbuffer_ref));
1084 	      (*splicefrags_ref)[k] = unshuffle16(&saw_n_p,gbuffer_ref);
1085 	      if (genomealt) {
1086 		Genome_fill_buffer_simple_alt(genome,genomealt,position,16,gbuffer_alt);
1087 		altstring = uint4_reverse(compress16(&saw_n_p,gbuffer_alt));
1088 		(*splicefrags_alt)[k] = unshuffle16(&saw_n_p,gbuffer_alt);
1089 	      }
1090 
1091 	      if (saw_n_p == true) {
1092 		chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
1093 		fprintf(stderr,"\nNon-standard nucleotide N near splice site %s:%u.  Discarding...",
1094 			chr,chrpos);
1095 		if (allocp) FREE(chr);
1096 	      } else if (genomealt) {
1097 		(*splicestrings)[k] = allelic_combinations(refstring,altstring,position,k,splicestringpool);
1098 		k++;
1099 	      } else {
1100 		(*splicestrings)[k] = Splicestringpool_push(NULL,splicestringpool,refstring,position,k);
1101 		k++;
1102 	      }
1103 	    }
1104 
1105 	  } else {
1106 	    if (position == last_antidonor) {
1107 	      if (Interval_length(interval) > (*splicedists)[last_antidonor_k]) {
1108 		(*splicedists)[last_antidonor_k] = Interval_length(interval);
1109 	      }
1110 
1111 	    } else {
1112 	      last_antidonor_k = k;
1113 	      last_antidonor = splicesites[k] = position;
1114 #ifdef GSNAP
1115 	      assert(position/32U < (unsigned int) nblocks);
1116 	      (*splicecomp)[position/32U] |= (1 << (position % 32));
1117 #endif
1118 	      (*splicetypes)[k] = ANTIDONOR;
1119 	      (*splicedists)[k] = Interval_length(interval);
1120 
1121 	      saw_n_p = false;
1122 	      Genome_fill_buffer_simple(genome,position,16,gbuffer_ref);
1123 	      refstring = uint4_reverse(compress16(&saw_n_p,gbuffer_ref));
1124 	      (*splicefrags_ref)[k] = unshuffle16(&saw_n_p,gbuffer_ref);
1125 	      if (genomealt) {
1126 		Genome_fill_buffer_simple_alt(genome,genomealt,position,16,gbuffer_alt);
1127 		altstring = uint4_reverse(compress16(&saw_n_p,gbuffer_alt));
1128 		(*splicefrags_alt)[k] = unshuffle16(&saw_n_p,gbuffer_alt);
1129 	      }
1130 
1131 	      if (saw_n_p == true) {
1132 		chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
1133 		fprintf(stderr,"\nNon-standard nucleotide N near splice site %s:%u.  Discarding...",
1134 			chr,chrpos);
1135 		if (allocp) FREE(chr);
1136 	      } else if (genomealt) {
1137 		(*splicestrings)[k] = allelic_combinations(refstring,altstring,position,k,splicestringpool);
1138 		k++;
1139 	      } else {
1140 		(*splicestrings)[k] = Splicestringpool_push(NULL,splicestringpool,refstring,position,k);
1141 		k++;
1142 	      }
1143 	    }
1144 	  }
1145 	}
1146 
1147 	FREE(intervals);
1148 	FREE(introns1);
1149       }
1150     }
1151   }
1152 
1153   *nsplicesites = k;
1154   fprintf(stderr,"\n%d splicesites are valid...",*nsplicesites);
1155 
1156 
1157   /* Need to sort by individual splicesites */
1158   cells = (struct Cell_T *) CALLOC(*nsplicesites,sizeof(struct Cell_T));
1159   for (k = 0; k < *nsplicesites; k++) {
1160     cells[k].k = k;
1161     cells[k].pos = splicesites[k];
1162   }
1163   qsort(cells,*nsplicesites,sizeof(struct Cell_T),Cell_position_cmp);
1164 
1165 
1166   /* Save unordered information */
1167   temp_splicesites = splicesites;
1168   temp_splicetypes = *splicetypes;
1169   temp_splicedists = *splicedists;
1170   temp_splicestrings = *splicestrings;
1171   temp_splicefrags_ref = *splicefrags_ref;
1172   if (genomealt == NULL) {
1173     temp_splicefrags_alt = temp_splicefrags_ref;
1174   } else {
1175     temp_splicefrags_alt = *splicefrags_alt;
1176   }
1177 
1178   /* Allocate ordered information */
1179   splicesites = (Univcoord_T *) CALLOC((*nsplicesites) + 1,sizeof(Univcoord_T));
1180   *splicetypes = (Splicetype_T *) CALLOC(*nsplicesites,sizeof(Splicetype_T));
1181   *splicedists = (Chrpos_T *) CALLOC(*nsplicesites,sizeof(Chrpos_T));
1182   *splicestrings = (List_T *) CALLOC(*nsplicesites,sizeof(List_T));
1183   *splicefrags_ref = (UINT4 *) CALLOC(*nsplicesites,sizeof(UINT4));
1184   if (genomealt == NULL) {
1185     *splicefrags_alt = *splicefrags_ref;
1186   } else {
1187     *splicefrags_alt = (UINT4 *) CALLOC(*nsplicesites,sizeof(UINT4));
1188   }
1189 
1190 
1191   /* Order all information */
1192   for (j = 0; j < *nsplicesites; j++) {
1193     k = cells[j].k;
1194     splicesites[j] = temp_splicesites[k];
1195     (*splicetypes)[j] = temp_splicetypes[k];
1196     (*splicedists)[j] = temp_splicedists[k];
1197     (*splicestrings)[j] = temp_splicestrings[k];
1198     for (p = (*splicestrings)[j]; p != NULL; p = List_next(p)) {
1199       /* Fix reference back to splicesite */
1200       splicestring = (Splicestring_T) List_head(p);
1201       splicestring->splicesite_i = j;
1202     }
1203     (*splicefrags_ref)[j] = temp_splicefrags_ref[k];
1204     (*splicefrags_alt)[j] = temp_splicefrags_alt[k];
1205   }
1206 
1207   splicesites[*nsplicesites] = (Univcoord_T) -1; /* Marker for comparison in identify_all_segments */
1208 
1209   FREE(cells);
1210   FREE(temp_splicesites);
1211   FREE(temp_splicetypes);
1212   FREE(temp_splicedists);
1213   FREE(temp_splicestrings);
1214   FREE(temp_splicefrags_ref);
1215   if (genomealt != NULL) {
1216     FREE(temp_splicefrags_alt);
1217   }
1218 
1219 
1220 #ifdef DEBUG2
1221   for (k = 0; k < *nsplicesites; k++) {
1222     printf("%d: %u %s %08X\n",k,splicesites[k],Splicetype_string((*splicetypes)[k]),(*splicefrags_ref)[k]);
1223     for (p = (*splicestrings)[k]; p != NULL; p = List_next(p)) {
1224       splicestring = (Splicestring_T) List_head(p);
1225       printf("  %u %u %u\n",splicestring->string,splicestring->splicesite,splicestring->splicesite_i);
1226     }
1227   }
1228 #endif
1229 
1230   return splicesites;
1231 }
1232 
1233 
1234 #if 0
1235 static bool
1236 get_exons (Uintlist_T *donors, Uintlist_T *acceptors, char *annot) {
1237   Chrpos_T exonstart, exonend;
1238   char *p;
1239 
1240   /* Skip header */
1241   p = annot;
1242   while (*p != '\0' && *p != '\n') {
1243     p++;
1244   }
1245   if (*p == '\n') p++;
1246 
1247   *donors = *acceptors = (Uintlist_T) NULL;
1248   while (*p != '\0') {
1249     if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
1250       return false;
1251     } else {
1252       *donors = Uintlist_push(*donors,exonend);
1253       *acceptors = Uintlist_push(*acceptors,exonstart);
1254     }
1255 
1256     /* Advance to next exon to see if this is the last one */
1257     while (*p != '\0' && *p != '\n') p++;
1258     if (*p == '\n') p++;
1259   }
1260 
1261   if (*donors == NULL || *acceptors == NULL) {
1262     return false;
1263   } else {
1264     *donors = Uintlist_pop(*donors,&exonend);
1265     *donors = Uintlist_reverse(*donors);
1266 
1267     *acceptors = Uintlist_reverse(*acceptors);
1268     *acceptors = Uintlist_pop(*acceptors,&exonstart);
1269 
1270     return true;
1271   }
1272 }
1273 #endif
1274 
1275 
1276 /************************************************************************/
1277 
1278 
1279 
1280 typedef struct Trie_T *Trie_T;
1281 struct Trie_T {
1282   Splicestring_T leaf;
1283 
1284   int nsites;
1285   int na;
1286   int nc;
1287   int ng;
1288   int nt;
1289 
1290   Trie_T triea;
1291   Trie_T triec;
1292   Trie_T trieg;
1293   Trie_T triet;
1294 };
1295 
1296 
1297 #if 0
1298 static void
1299 Trie_free (Trie_T *old) {
1300   if ((*old) == NULL) {
1301     return;
1302   } else if ((*old)->leaf != NULL) {
1303     FREE(*old);
1304     return;
1305   } else {
1306     Trie_free(&(*old)->triea);
1307     Trie_free(&(*old)->triec);
1308     Trie_free(&(*old)->trieg);
1309     Trie_free(&(*old)->triet);
1310     FREE(*old);
1311     return;
1312   }
1313 }
1314 #endif
1315 
1316 /************************************************************************
1317  *   Building splicetries
1318  ************************************************************************/
1319 
1320 #if 0
1321 static Trie_T
1322 Trie_new (Splicestring_T *sites, int nsites, int charpos) {
1323   Trie_T trie;
1324   int i, ptr;
1325 
1326   if (nsites == 0) {
1327     return (Trie_T) NULL;
1328 
1329   } else {
1330 
1331     if (nsites == 1) {
1332       trie = (Trie_T) MALLOC(sizeof(*trie));
1333       trie->nsites = 1;
1334       trie->na = trie->nc = trie->ng = trie->nt = 0;
1335       trie->triea = trie->triec = trie->trieg = trie->triet = (Trie_T) NULL;
1336       trie->leaf = sites[0];
1337       /* trie->remainder = &(sites[0]->string[charpos]); */
1338       return trie;
1339 
1340     } else if (charpos >= 16) {
1341       return (Trie_T) NULL;
1342 
1343     } else {
1344       trie = (Trie_T) MALLOC(sizeof(*trie));
1345       trie->nsites = nsites;
1346       trie->na = trie->nc = trie->ng = trie->nt = 0;
1347       trie->leaf = (Splicestring_T) NULL;
1348       /* trie->remainder = (char *) NULL; */
1349 
1350       for (i = 0; i < nsites; i++) {
1351 	switch ((sites[i]->string >> (30 - 2*charpos)) & 0x03) {
1352 	case RIGHT_A: trie->na++; break;
1353 	case RIGHT_C: trie->nc++; break;
1354 	case RIGHT_G: trie->ng++; break;
1355 	case RIGHT_T: trie->nt++; break;
1356 	default: abort();
1357 	}
1358       }
1359 
1360       ptr = 0;
1361       trie->triea = Trie_new(&(sites[ptr]),trie->na,charpos+1);
1362       ptr += trie->na;
1363 
1364       trie->triec = Trie_new(&(sites[ptr]),trie->nc,charpos+1);
1365       ptr += trie->nc;
1366 
1367       trie->trieg = Trie_new(&(sites[ptr]),trie->ng,charpos+1);
1368       ptr += trie->ng;
1369 
1370       trie->triet = Trie_new(&(sites[ptr]),trie->nt,charpos+1);
1371       /* ptr += trie->nt; */
1372 
1373       return trie;
1374     }
1375   }
1376 }
1377 #endif
1378 
1379 
1380 /* Based on original Trie_output_new, which used repeated calls to Uintlist_push */
1381 static int
Trie_output_size(int size,Splicestring_T * sites,int nsites,int charpos)1382 Trie_output_size (int size, Splicestring_T *sites, int nsites, int charpos) {
1383   int i, k;
1384   /* unsigned int posa, posc, posg, post; */
1385   int na, nc, ng, nt;
1386 
1387   if (nsites == 0) {
1388     debug0(printf("nsites == 0, so not adding anything\n"));
1389 
1390   } else if (nsites == 1) {
1391     debug0(printf("nsites == 1, so adding 1\n"));
1392     size += 1;
1393 
1394   } else if (charpos >= 16) {
1395     if (nsites > MAX_DUPLICATES) {
1396       fprintf(stderr,"Warning: Splicetrie exceeded max duplicates value of %d\n",MAX_DUPLICATES);
1397     } else {
1398       size += 1;
1399 
1400       for (i = 0; i < nsites; i++) {
1401 	debug0(printf(" %d",sites[i]->splicesite_i));
1402 	size += 1;
1403       }
1404       debug0(printf("\n"));
1405     }
1406 
1407   } else {
1408     na = nc = ng = nt = 0;
1409     for (i = 0; i < nsites; i++) {
1410       switch ((sites[i]->string >> (30 - 2*charpos)) & 0x03) {
1411       case RIGHT_A: na++; break;
1412       case RIGHT_C: nc++; break;
1413       case RIGHT_G: ng++; break;
1414       case RIGHT_T: nt++; break;
1415       default: abort();
1416       }
1417     }
1418     debug0(printf("%d A, %d C, %d G, %d T\n",na,nc,ng,nt));
1419 
1420     k = 0;
1421     size = Trie_output_size(size,&(sites[k]),na,charpos+1);
1422 
1423     k += na;
1424     size = Trie_output_size(size,&(sites[k]),nc,charpos+1);
1425 
1426     k += nc;
1427     size = Trie_output_size(size,&(sites[k]),ng,charpos+1);
1428 
1429     k += ng;
1430     size = Trie_output_size(size,&(sites[k]),nt,charpos+1);
1431     /* k += nt; */
1432 
1433     size += 5;
1434   }
1435 
1436   return size;
1437 }
1438 
1439 
1440 
1441 #ifdef USE_LIST
1442 /* Combination of Trie_new and Trie_output */
1443 /* Note that when *ptr = NULL_POINTER, we do not need to advance
1444    nprinted, because no entry is made into triecontents_list */
1445 static Uintlist_T
Trie_output_list(unsigned int * ptr,int * nprinted,Uintlist_T triecontents_list,Splicestring_T * sites,int nsites,int charpos)1446 Trie_output_list (unsigned int *ptr, int *nprinted, Uintlist_T triecontents_list,
1447 		  Splicestring_T *sites, int nsites, int charpos) {
1448   int i, k;
1449   unsigned int posa, posc, posg, post;
1450   int na, nc, ng, nt;
1451 
1452   if (nsites == 0) {
1453     debug0(printf("nsites == 0, so NULL\n"));
1454     *ptr = NULL_POINTER;
1455 
1456   } else if (nsites == 1) {
1457     debug0(printf("nsites == 1, so pushing %d\n",sites[0]->splicesite_i));
1458     *ptr = *nprinted;
1459     triecontents_list = Uintlist_push(triecontents_list,sites[0]->splicesite_i);
1460     *nprinted += 1;
1461 
1462   } else if (charpos >= 16) {
1463     if (nsites > MAX_DUPLICATES) {
1464       fprintf(stderr,"Warning: Splicetrie exceeded max duplicates value of %d\n",MAX_DUPLICATES);
1465       *ptr = NULL_POINTER;
1466     } else {
1467       *ptr = *nprinted;
1468 
1469       triecontents_list = Uintlist_push(triecontents_list,(Triecontent_T) (-nsites));
1470       *nprinted += 1;
1471       for (i = 0; i < nsites; i++) {
1472 	debug0(printf(" %d",sites[i]->splicesite_i));
1473 	triecontents_list = Uintlist_push(triecontents_list,sites[i]->splicesite_i);
1474 	*nprinted += 1;
1475       }
1476       debug0(printf("\n"));
1477     }
1478 
1479   } else {
1480     na = nc = ng = nt = 0;
1481     for (i = 0; i < nsites; i++) {
1482       switch ((sites[i]->string >> (30 - 2*charpos)) & 0x03) {
1483       case RIGHT_A: na++; break;
1484       case RIGHT_C: nc++; break;
1485       case RIGHT_G: ng++; break;
1486       case RIGHT_T: nt++; break;
1487       default: abort();
1488       }
1489     }
1490     debug0(printf("%d A, %d C, %d G, %d T\n",na,nc,ng,nt));
1491 
1492     k = 0;
1493     triecontents_list = Trie_output_list(&posa,&(*nprinted),triecontents_list,
1494 					&(sites[k]),na,charpos+1);
1495     k += na;
1496     triecontents_list = Trie_output_list(&posc,&(*nprinted),triecontents_list,
1497 					&(sites[k]),nc,charpos+1);
1498     k += nc;
1499     triecontents_list = Trie_output_list(&posg,&(*nprinted),triecontents_list,
1500 					&(sites[k]),ng,charpos+1);
1501     k += ng;
1502     triecontents_list = Trie_output_list(&post,&(*nprinted),triecontents_list,
1503 					&(sites[k]),nt,charpos+1);
1504     /* k += nt; */
1505 
1506     *ptr = *nprinted;
1507     triecontents_list = Uintlist_push(triecontents_list,INTERNAL_NODE);
1508 
1509     if (posa == NULL_POINTER) {
1510       triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1511     } else {
1512       triecontents_list = Uintlist_push(triecontents_list,*ptr - posa);
1513     }
1514     if (posc == NULL_POINTER) {
1515       triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1516     } else {
1517       triecontents_list = Uintlist_push(triecontents_list,*ptr - posc);
1518     }
1519     if (posg == NULL_POINTER) {
1520       triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1521     } else {
1522       triecontents_list = Uintlist_push(triecontents_list,*ptr - posg);
1523     }
1524     if (post == NULL_POINTER) {
1525       triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1526     } else {
1527       triecontents_list = Uintlist_push(triecontents_list,*ptr - post);
1528     }
1529     *nprinted += 5;
1530   }
1531 
1532   return triecontents_list;
1533 }
1534 
1535 #else
1536 
1537 /* ptr is for offsets */
1538 static unsigned int *
Trie_output_array(unsigned int * ptr,int * nprinted,unsigned int * contents,Splicestring_T * sites,int nsites,int charpos)1539 Trie_output_array (unsigned int *ptr, int *nprinted, unsigned int *contents,
1540 		   Splicestring_T *sites, int nsites, int charpos) {
1541   int i, k;
1542   unsigned int posa, posc, posg, post;
1543   int na, nc, ng, nt;
1544 
1545   if (nsites == 0) {
1546     debug0(printf("nsites == 0, so NULL\n"));
1547     *ptr = NULL_POINTER;
1548 
1549   } else if (nsites == 1) {
1550     debug0(printf("nsites == 1, so pushing %d\n",sites[0]->splicesite_i));
1551     *ptr = *nprinted;
1552     *contents++ = sites[0]->splicesite_i;
1553     *nprinted += 1;
1554 
1555   } else if (charpos >= 16) {
1556     if (nsites > MAX_DUPLICATES) {
1557       fprintf(stderr,"Warning: Splicetrie exceeded max duplicates value of %d\n",MAX_DUPLICATES);
1558       *ptr = NULL_POINTER;
1559     } else {
1560       *ptr = *nprinted;
1561 
1562       *contents++ = (Triecontent_T) (-nsites);
1563       *nprinted += 1;
1564       for (i = 0; i < nsites; i++) {
1565 	debug0(printf(" %d",sites[i]->splicesite_i));
1566 	*contents++ = sites[i]->splicesite_i;
1567 	*nprinted += 1;
1568       }
1569       debug0(printf("\n"));
1570     }
1571 
1572   } else {
1573     na = nc = ng = nt = 0;
1574     for (i = 0; i < nsites; i++) {
1575       switch ((sites[i]->string >> (30 - 2*charpos)) & 0x03) {
1576       case RIGHT_A: na++; break;
1577       case RIGHT_C: nc++; break;
1578       case RIGHT_G: ng++; break;
1579       case RIGHT_T: nt++; break;
1580       default: abort();
1581       }
1582     }
1583     debug0(printf("%d A, %d C, %d G, %d T\n",na,nc,ng,nt));
1584 
1585     k = 0;
1586     contents = Trie_output_array(&posa,&(*nprinted),contents,&(sites[k]),na,charpos+1);
1587 
1588     k += na;
1589     contents = Trie_output_array(&posc,&(*nprinted),contents,&(sites[k]),nc,charpos+1);
1590 
1591     k += nc;
1592     contents = Trie_output_array(&posg,&(*nprinted),contents,&(sites[k]),ng,charpos+1);
1593 
1594     k += ng;
1595     contents = Trie_output_array(&post,&(*nprinted),contents,&(sites[k]),nt,charpos+1);
1596     /* k += nt; */
1597 
1598     *ptr = *nprinted;
1599     *contents++ = INTERNAL_NODE;
1600 
1601     if (posa == NULL_POINTER) {
1602       *contents++ = EMPTY_POINTER;
1603     } else {
1604       *contents++ = *ptr - posa;
1605     }
1606     if (posc == NULL_POINTER) {
1607       *contents++ = EMPTY_POINTER;
1608     } else {
1609       *contents++ = *ptr - posc;
1610     }
1611     if (posg == NULL_POINTER) {
1612       *contents++ = EMPTY_POINTER;
1613     } else {
1614       *contents++ = *ptr - posg;
1615     }
1616     if (post == NULL_POINTER) {
1617       *contents++ = EMPTY_POINTER;
1618     } else {
1619       *contents++ = *ptr - post;
1620     }
1621     *nprinted += 5;
1622   }
1623 
1624   return contents;
1625 }
1626 #endif
1627 
1628 
1629 #if 0
1630 static void
1631 Trie_print (Trie_T trie, int level) {
1632   int i;
1633 
1634   if (trie == NULL) {
1635     printf("\n");
1636 
1637   } else {
1638     if (trie->leaf != NULL) {
1639       printf(" %d@%u\n",(int) trie->leaf->splicesite_i,trie->leaf->splicesite);
1640 
1641     } else {
1642       printf("\n");
1643       for (i = 0; i < level; i++) {
1644 	printf("\t");
1645       }
1646       printf("A (%d)",trie->na);
1647       Trie_print(trie->triea,level+1);
1648 
1649       for (i = 0; i < level; i++) {
1650 	printf("\t");
1651       }
1652       printf("C (%d)",trie->nc);
1653       Trie_print(trie->triec,level+1);
1654 
1655       for (i = 0; i < level; i++) {
1656 	printf("\t");
1657       }
1658       printf("G (%d)",trie->ng);
1659       Trie_print(trie->trieg,level+1);
1660 
1661       for (i = 0; i < level; i++) {
1662 	printf("\t");
1663       }
1664       printf("T (%d)",trie->nt);
1665       Trie_print(trie->triet,level+1);
1666     }
1667   }
1668 
1669   return;
1670 }
1671 #endif
1672 
1673 #if 0
1674 static int
1675 Trie_print_compact (int *ptr, FILE *fp, Trie_T trie, int nprinted) {
1676   int posa, posc, posg, post;
1677 
1678   if (trie == NULL) {
1679     *ptr = NULL_POINTER;
1680     return nprinted;
1681 
1682   } else {
1683     if (trie->leaf != NULL) {
1684       printf("%d. %d@%u\n",nprinted,(int) trie->leaf->splicesite_i,trie->leaf->splicesite);
1685       *ptr = nprinted;
1686       return nprinted + 1;
1687 
1688     } else {
1689       nprinted = Trie_print_compact(&posa,fp,trie->triea,nprinted);
1690       nprinted = Trie_print_compact(&posc,fp,trie->triec,nprinted);
1691       nprinted = Trie_print_compact(&posg,fp,trie->trieg,nprinted);
1692       nprinted = Trie_print_compact(&post,fp,trie->triet,nprinted);
1693 
1694       *ptr = nprinted;
1695 
1696       printf("%d. %u\n",nprinted,INTERNAL_NODE);
1697 
1698       printf("relative: %d %d %d %d\n",
1699 	     *ptr - posa,*ptr - posc,*ptr - posg,*ptr - post);
1700       printf("absolute: %d %d %d %d\n",
1701 	     posa,posc,posg,post);
1702 
1703       return nprinted + 3;
1704     }
1705   }
1706 }
1707 #endif
1708 
1709 
1710 #ifdef USE_2BYTE_RELOFFSETS
1711 #define TRIE_EMPTY_SIZE 3
1712 #else
1713 #define TRIE_EMPTY_SIZE 5
1714 #endif
1715 
1716 
1717 #ifdef USE_LIST
1718 static Uintlist_T
Trie_output_empty_list(unsigned int * ptr,int * nprinted,Uintlist_T triecontents_list)1719 Trie_output_empty_list (unsigned int *ptr, int *nprinted, Uintlist_T triecontents_list) {
1720   *ptr = (unsigned int) *nprinted;
1721   triecontents_list = Uintlist_push(triecontents_list,INTERNAL_NODE);
1722 #ifdef USE_2BYTE_RELOFFSETS
1723   triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1724   triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1725   *nprinted += 3;
1726 #else
1727   triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1728   triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1729   triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1730   triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1731   *nprinted += 5;
1732 #endif
1733   return triecontents_list;
1734 }
1735 
1736 #else
1737 static unsigned int *
Trie_output_empty_array(unsigned int * ptr,int * nprinted,unsigned int * contents)1738 Trie_output_empty_array (unsigned int *ptr, int *nprinted, unsigned int *contents) {
1739   *ptr = (unsigned int) *nprinted;
1740   *contents++ = INTERNAL_NODE;
1741 #ifdef USE_2BYTE_RELOFFSETS
1742   *contents++ = EMPTY_POINTER;
1743   *contents++ = EMPTY_POINTER;
1744   *nprinted += 3;
1745 #else
1746   *contents++ = EMPTY_POINTER;
1747   *contents++ = EMPTY_POINTER;
1748   *contents++ = EMPTY_POINTER;
1749   *contents++ = EMPTY_POINTER;
1750   *nprinted += 5;
1751 #endif
1752   return contents;
1753 }
1754 
1755 #endif
1756 
1757 
1758 #if 0
1759 static Uintlist_T
1760 Trie_output (int *ptr, int *nprinted, Uintlist_T triecontents_list, Trie_T trie) {
1761   int posa, posc, posg, post;
1762 #ifdef USE_2BYTE_RELOFFSETS
1763   unsigned int pointersAC, pointersGT;
1764   int reloffset;
1765 #endif
1766 
1767   if (trie == NULL) {
1768     *ptr = NULL_POINTER;
1769 
1770   } else if (trie->leaf != NULL) {
1771     *ptr = *nprinted;
1772     /* Entering splicesite_i, rather than position */
1773     triecontents_list = Uintlist_push(triecontents_list,trie->leaf->splicesite_i);
1774     *nprinted += 1;
1775 
1776   } else {
1777     triecontents_list = Trie_output(&posa,&(*nprinted),triecontents_list,trie->triea);
1778     triecontents_list = Trie_output(&posc,&(*nprinted),triecontents_list,trie->triec);
1779     triecontents_list = Trie_output(&posg,&(*nprinted),triecontents_list,trie->trieg);
1780     triecontents_list = Trie_output(&post,&(*nprinted),triecontents_list,trie->triet);
1781 
1782     *ptr = *nprinted;
1783     triecontents_list = Uintlist_push(triecontents_list,INTERNAL_NODE);
1784 
1785 
1786 #ifdef USE_2BYTE_RELOFFSETS
1787     if (posa == NULL_POINTER) {
1788       pointersAC = 0;
1789     } else if ((reloffset = *ptr - posa) >= 65536) {
1790       fprintf(stderr,"Reloffset A %d = %d - %d is too big for 2 bytes\n",reloffset,*ptr,posa);
1791       abort();
1792     } else {
1793       pointersAC = reloffset;
1794     }
1795 
1796     if (posc == NULL_POINTER) {
1797       pointersAC = (pointersAC << 16);
1798     } else if ((reloffset = *ptr - posc) >= 65536) {
1799       fprintf(stderr,"Reloffset C %d = %d - %d is too big for 2 bytes\n",reloffset,*ptr,posc);
1800       abort();
1801     } else {
1802       pointersAC = (pointersAC << 16) + reloffset;
1803     }
1804 
1805     if (posg == NULL_POINTER) {
1806       pointersGT = 0;
1807     } else if ((reloffset = *ptr - posg) >= 65536) {
1808       fprintf(stderr,"Reloffset G %d = %d - %d is too big for 2 bytes\n",reloffset,*ptr,posg);
1809       abort();
1810     } else {
1811       pointersGT = reloffset;
1812     }
1813 
1814     if (post == NULL_POINTER) {
1815       pointersGT = (pointersGT << 16);
1816     } else if ((reloffset = *ptr - post) >= 65536) {
1817       fprintf(stderr,"Reloffset T %d = %d - %d is too big for 2 bytes\n",reloffset,*ptr,post);
1818       abort();
1819     } else {
1820       pointersGT = (pointersGT << 16) + reloffset;
1821     }
1822 
1823     triecontents_list = Uintlist_push(triecontents_list,pointersAC);
1824     triecontents_list = Uintlist_push(triecontents_list,pointersGT);
1825 
1826     *nprinted += 3;
1827 #else
1828     if (posa == NULL_POINTER) {
1829       triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1830     } else {
1831       triecontents_list = Uintlist_push(triecontents_list,*ptr - posa);
1832     }
1833     if (posc == NULL_POINTER) {
1834       triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1835     } else {
1836       triecontents_list = Uintlist_push(triecontents_list,*ptr - posc);
1837     }
1838     if (posg == NULL_POINTER) {
1839       triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1840     } else {
1841       triecontents_list = Uintlist_push(triecontents_list,*ptr - posg);
1842     }
1843     if (post == NULL_POINTER) {
1844       triecontents_list = Uintlist_push(triecontents_list,EMPTY_POINTER);
1845     } else {
1846       triecontents_list = Uintlist_push(triecontents_list,*ptr - post);
1847     }
1848     *nprinted += 5;
1849 #endif
1850 
1851   }
1852   return triecontents_list;
1853 }
1854 #endif
1855 
1856 
1857 void
Splicetrie_npartners(int ** nsplicepartners_skip,int ** nsplicepartners_obs,int ** nsplicepartners_max,Univcoord_T * splicesites,Splicetype_T * splicetypes,Chrpos_T * splicedists,List_T * splicestrings,int nsplicesites,Univ_IIT_T chromosome_iit,Chrpos_T max_distance,bool distances_observed_p)1858 Splicetrie_npartners (int **nsplicepartners_skip, int **nsplicepartners_obs, int **nsplicepartners_max,
1859 		      Univcoord_T *splicesites, Splicetype_T *splicetypes,
1860 		      Chrpos_T *splicedists, List_T *splicestrings, int nsplicesites,
1861 		      Univ_IIT_T chromosome_iit, Chrpos_T max_distance, bool distances_observed_p) {
1862   int nsites_skip, nsites_obs, nsites_max, j, j1;
1863   Univcoord_T chroffset, chrhigh, leftbound_obs, leftbound_max, rightbound_obs, rightbound_max;
1864   Univcoord_T leftbound_min, rightbound_min;
1865   Chrpos_T chrlength;
1866   Chrnum_T chrnum;
1867 
1868 
1869   (*nsplicepartners_skip) = (int *) CALLOC(nsplicesites,sizeof(int));
1870   if (distances_observed_p == true) {
1871     (*nsplicepartners_obs) = (int *) CALLOC(nsplicesites,sizeof(int));
1872   } else {
1873     (*nsplicepartners_obs) = (int *) NULL;
1874   }
1875   (*nsplicepartners_max) = (int *) CALLOC(nsplicesites,sizeof(int));
1876 
1877   chrhigh = 0U;
1878   for (j = 0; j < nsplicesites; j++) {
1879     if (splicesites[j] > chrhigh) {
1880       chrnum = Univ_IIT_get_one(chromosome_iit,splicesites[j],splicesites[j]);
1881       Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,/*circular_typeint*/-1);
1882       /* chrhigh += 1U; */
1883     }
1884 
1885     switch (splicetypes[j]) {
1886     case DONOR:
1887       nsites_skip = nsites_obs = nsites_max = 0;
1888       j1 = j + 1;
1889 
1890       if ((rightbound_min = splicesites[j] + MININTRONLEN) > chrhigh) {
1891 	rightbound_min = chrhigh;
1892       }
1893       while (j1 < nsplicesites && splicesites[j1] < rightbound_min) {
1894 	nsites_skip++;
1895 	j1++;
1896       }
1897 
1898       if (distances_observed_p) {
1899 	if ((rightbound_obs = splicesites[j] + splicedists[j]) > chrhigh) {
1900 	  rightbound_obs = chrhigh;
1901 	}
1902 	while (j1 < nsplicesites && splicesites[j1] < rightbound_obs) {
1903 	  if (splicetypes[j1] == ACCEPTOR) {
1904 	    nsites_obs += List_length(splicestrings[j1]);
1905 	  }
1906 	  j1++;
1907 	}
1908       }
1909 
1910       if ((rightbound_max = splicesites[j] + max_distance) > chrhigh) {
1911 	rightbound_max = chrhigh;
1912       }
1913       while (j1 < nsplicesites && splicesites[j1] < rightbound_max) {
1914 	if (splicetypes[j1] == ACCEPTOR) {
1915 	  nsites_max += List_length(splicestrings[j1]);
1916 	}
1917 	j1++;
1918       }
1919 
1920       break;
1921 
1922     case ACCEPTOR:
1923       nsites_skip = nsites_obs = nsites_max = 0;
1924       j1 = j - 1;
1925 
1926       if (splicesites[j] < chroffset + MININTRONLEN) {
1927 	leftbound_min = chroffset;
1928       } else {
1929 	leftbound_min = splicesites[j] - MININTRONLEN;
1930       }
1931       while (j1 >= 0 && splicesites[j1] > leftbound_min) {
1932 	nsites_skip++;
1933 	j1--;
1934       }
1935 
1936       if (distances_observed_p) {
1937 	if (splicesites[j] < chroffset + splicedists[j]) {
1938 	  leftbound_obs = chroffset;
1939 	} else {
1940 	  leftbound_obs = splicesites[j] - splicedists[j];
1941 	}
1942 	while (j1 >= 0 && splicesites[j1] > leftbound_obs) {
1943 	  if (splicetypes[j1] == DONOR) {
1944 	    nsites_obs += List_length(splicestrings[j1]);
1945 	  }
1946 	  j1--;
1947 	}
1948       }
1949 
1950       if (splicesites[j] < chroffset + max_distance) {
1951 	leftbound_max = chroffset;
1952       } else {
1953 	leftbound_max = splicesites[j] - max_distance;
1954       }
1955       while (j1 >= 0 && splicesites[j1] > leftbound_max) {
1956 	if (splicetypes[j1] == DONOR) {
1957 	  nsites_max += List_length(splicestrings[j1]);
1958 	}
1959 	j1--;
1960       }
1961 
1962       break;
1963 
1964     case ANTIDONOR:
1965       nsites_skip = nsites_obs = nsites_max = 0;
1966       j1 = j - 1;
1967 
1968       if (splicesites[j] < chroffset + MININTRONLEN) {
1969 	leftbound_min = chroffset;
1970       } else {
1971 	leftbound_min = splicesites[j] - MININTRONLEN;
1972       }
1973       while (j1 >= 0 && splicesites[j1] > leftbound_min) {
1974 	nsites_skip++;
1975 	j1--;
1976       }
1977 
1978       if (distances_observed_p) {
1979 	if (splicesites[j] < chroffset + splicedists[j]) {
1980 	  leftbound_obs = chroffset;
1981 	} else {
1982 	  leftbound_obs = splicesites[j] - splicedists[j];
1983 	}
1984 	while (j1 >= 0 && splicesites[j1] > leftbound_obs) {
1985 	  if (splicetypes[j1] == ANTIACCEPTOR) {
1986 	    nsites_obs += List_length(splicestrings[j1]);
1987 	  }
1988 	  j1--;
1989 	}
1990       }
1991 
1992       if (splicesites[j] < chroffset + max_distance) {
1993 	leftbound_max = chroffset;
1994       } else {
1995 	leftbound_max = splicesites[j] - max_distance;
1996       }
1997       while (j1 >= 0 && splicesites[j1] > leftbound_max) {
1998 	if (splicetypes[j1] == ANTIACCEPTOR) {
1999 	  nsites_max += List_length(splicestrings[j1]);
2000 	}
2001 	j1--;
2002       }
2003 
2004       break;
2005 
2006     case ANTIACCEPTOR:
2007       nsites_skip = nsites_obs = nsites_max = 0;
2008       j1 = j + 1;
2009 
2010       if ((rightbound_min = splicesites[j] + MININTRONLEN) > chrhigh) {
2011 	rightbound_min = chrhigh;
2012       }
2013       while (j1 < nsplicesites && splicesites[j1] < rightbound_min) {
2014 	nsites_skip++;
2015 	j1++;
2016       }
2017 
2018       if (distances_observed_p) {
2019 	if ((rightbound_obs = splicesites[j] + splicedists[j]) > chrhigh) {
2020 	  rightbound_obs = chrhigh;
2021 	}
2022 	while (j1 < nsplicesites && splicesites[j1] < rightbound_obs) {
2023 	  if (splicetypes[j1] == ANTIDONOR) {
2024 	    nsites_obs += List_length(splicestrings[j1]);
2025 	  }
2026 	  j1++;
2027 	}
2028       }
2029 
2030       if ((rightbound_max = splicesites[j] + max_distance) > chrhigh) {
2031 	rightbound_max = chrhigh;
2032       }
2033       while (j1 < nsplicesites && splicesites[j1] < rightbound_max) {
2034 	if (splicetypes[j1] == ANTIDONOR) {
2035 	  nsites_max += List_length(splicestrings[j1]);
2036 	}
2037 	j1++;
2038       }
2039 
2040       break;
2041 
2042     default:
2043       fprintf(stderr,"Unexpected splicetype %d\n",splicetypes[j]);
2044       abort();
2045     }
2046 
2047     (*nsplicepartners_skip)[j] = nsites_skip;
2048     if (distances_observed_p) {
2049       (*nsplicepartners_obs)[j] = nsites_obs;
2050     }
2051     (*nsplicepartners_max)[j] = nsites_max;
2052   }
2053 
2054   return;
2055 }
2056 
2057 
2058 #define MAX_SITES_ALLOCATED 1000
2059 
2060 static void
build_via_splicesites_size(int * trie_obs_size,int * trie_max_size,int * nsplicepartners_skip,int * nsplicepartners_obs,int * nsplicepartners_max,Splicetype_T * splicetypes,List_T * splicestrings,int nsplicesites,bool distances_observed_p)2061 build_via_splicesites_size (int *trie_obs_size, int *trie_max_size,
2062 			    int *nsplicepartners_skip, int *nsplicepartners_obs, int *nsplicepartners_max,
2063 			    Splicetype_T *splicetypes, List_T *splicestrings, int nsplicesites,
2064 			    bool distances_observed_p) {
2065   List_T p;
2066   int nsites, j, j1;
2067   Splicestring_T *sites, sites_allocated[MAX_SITES_ALLOCATED];
2068   int npartners;
2069 
2070   for (j = 0; j < nsplicesites; j++) {
2071     switch (splicetypes[j]) {
2072     case DONOR:
2073       j1 = j + 1 + nsplicepartners_skip[j];
2074 
2075       if (distances_observed_p) {
2076 	if ((npartners = nsplicepartners_obs[j]) == 0) {
2077 #if 0
2078 	  triecontents_obs_list = Trie_output_empty_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list);
2079 #else
2080 	  (*trie_obs_size) += TRIE_EMPTY_SIZE;
2081 #endif
2082 
2083 	} else {
2084 	  if (npartners > MAX_SITES_ALLOCATED) {
2085 	    sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2086 	  } else {
2087 	    sites = &(sites_allocated[0]);
2088 	  }
2089 	  nsites = 0;
2090 	  while (nsites < nsplicepartners_obs[j]) {
2091 	    if (splicetypes[j1] == ACCEPTOR) {
2092 	      for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2093 		sites[nsites++] = (Splicestring_T) List_head(p);
2094 	      }
2095 	    }
2096 	    j1++;
2097 	  }
2098 	  assert(nsites == nsplicepartners_obs[j]);
2099 	  qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2100 #if 0
2101 	  triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
2102 						  sites,nsites,/*charpos*/0);
2103 #else
2104 	  *trie_obs_size = Trie_output_size(*trie_obs_size,sites,nsites,/*charpos*/0);
2105 #endif
2106 	  if (npartners > MAX_SITES_ALLOCATED) {
2107 	    FREE(sites);
2108 	  }
2109 	}
2110       }
2111 
2112       if ((npartners = nsplicepartners_max[j]) == 0) {
2113 #if 0
2114 	triecontents_max_list = Trie_output_empty_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list);
2115 #else
2116 	(*trie_max_size) += TRIE_EMPTY_SIZE;
2117 #endif
2118 
2119       } else {
2120 	if (npartners > MAX_SITES_ALLOCATED) {
2121 	  sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2122 	} else {
2123 	  sites = &(sites_allocated[0]);
2124 	}
2125 	nsites = 0;
2126 	while (nsites < nsplicepartners_max[j]) {
2127 	  if (splicetypes[j1] == ACCEPTOR) {
2128 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2129 	      sites[nsites++] = (Splicestring_T) List_head(p);
2130 	    }
2131 	  }
2132 	  j1++;
2133 	}
2134 	assert(nsites == nsplicepartners_max[j]);
2135 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2136 #if 0
2137 	triecontents_max_list = Trie_output_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list,
2138 						sites,nsites,/*charpos*/0);
2139 #else
2140 	*trie_max_size = Trie_output_size(*trie_max_size,sites,nsites,/*charpos*/0);
2141 #endif
2142 	if (npartners > MAX_SITES_ALLOCATED) {
2143 	  FREE(sites);
2144 	}
2145       }
2146 
2147       break;
2148 
2149     case ACCEPTOR:
2150 
2151       j1 = j - 1 - nsplicepartners_skip[j];
2152 
2153       if (distances_observed_p) {
2154 	if ((npartners = nsplicepartners_obs[j]) == 0) {
2155 #if 0
2156 	  triecontents_obs_list = Trie_output_empty_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list);
2157 #else
2158 	  (*trie_obs_size) += TRIE_EMPTY_SIZE;
2159 #endif
2160 
2161 	} else {
2162 	  if (npartners > MAX_SITES_ALLOCATED) {
2163 	    sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2164 	  } else {
2165 	    sites = &(sites_allocated[0]);
2166 	  }
2167 	  nsites = 0;
2168 	  while (nsites < nsplicepartners_obs[j]) {
2169 	    if (splicetypes[j1] == DONOR) {
2170 	      for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2171 		sites[nsites++] = (Splicestring_T) List_head(p);
2172 	      }
2173 	    }
2174 	    j1--;
2175 	  }
2176 	  assert(nsites == nsplicepartners_obs[j]);
2177 	  qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2178 #if 0
2179 	  triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
2180 						  sites,nsites,/*charpos*/0);
2181 #else
2182 	  *trie_obs_size = Trie_output_size(*trie_obs_size,sites,nsites,/*charpos*/0);
2183 #endif
2184 	  if (npartners > MAX_SITES_ALLOCATED) {
2185 	    FREE(sites);
2186 	  }
2187 	}
2188       }
2189 
2190       if ((npartners = nsplicepartners_max[j]) == 0) {
2191 #if 0
2192 	triecontents_max_list = Trie_output_empty_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list);
2193 #else
2194 	(*trie_max_size) += TRIE_EMPTY_SIZE;
2195 #endif
2196 
2197       } else {
2198 	if (npartners > MAX_SITES_ALLOCATED) {
2199 	  sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2200 	} else {
2201 	  sites = &(sites_allocated[0]);
2202 	}
2203 	nsites = 0;
2204 	while (nsites < nsplicepartners_max[j]) {
2205 	  if (splicetypes[j1] == DONOR) {
2206 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2207 	      sites[nsites++] = (Splicestring_T) List_head(p);
2208 	    }
2209 	  }
2210 	  j1--;
2211 	}
2212 	assert(nsites == nsplicepartners_max[j]);
2213 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2214 #if 0
2215 	triecontents_max_list = Trie_output_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list,
2216 						sites,nsites,/*charpos*/0);
2217 #else
2218 	*trie_max_size = Trie_output_size(*trie_max_size,sites,nsites,/*charpos*/0);
2219 #endif
2220 	if (npartners > MAX_SITES_ALLOCATED) {
2221 	  FREE(sites);
2222 	}
2223       }
2224 
2225       break;
2226 
2227     case ANTIDONOR:
2228 
2229       j1 = j - 1 - nsplicepartners_skip[j];
2230 
2231       if (distances_observed_p) {
2232 	if ((npartners = nsplicepartners_obs[j]) == 0) {
2233 #if 0
2234 	  triecontents_obs_list = Trie_output_empty_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list);
2235 #else
2236 	  (*trie_obs_size) += TRIE_EMPTY_SIZE;
2237 #endif
2238 
2239 	} else {
2240 	  if (npartners > MAX_SITES_ALLOCATED) {
2241 	    sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2242 	  } else {
2243 	    sites = &(sites_allocated)[0];
2244 	  }
2245 	  nsites = 0;
2246 	  while (nsites < nsplicepartners_obs[j]) {
2247 	    if (splicetypes[j1] == ANTIACCEPTOR) {
2248 	      for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2249 		sites[nsites++] = (Splicestring_T) List_head(p);
2250 	      }
2251 	    }
2252 	    j1--;
2253 	  }
2254 	  assert(nsites == nsplicepartners_obs[j]);
2255 	  qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2256 #if 0
2257 	  triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
2258 						  sites,nsites,/*charpos*/0);
2259 #else
2260 	  *trie_obs_size = Trie_output_size(*trie_obs_size,sites,nsites,/*charpos*/0);
2261 #endif
2262 	  if (npartners > MAX_SITES_ALLOCATED) {
2263 	    FREE(sites);
2264 	  }
2265 	}
2266       }
2267 
2268       if ((npartners = nsplicepartners_max[j]) == 0) {
2269 #if 0
2270 	triecontents_max_list = Trie_output_empty_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list);
2271 #else
2272 	(*trie_max_size) += TRIE_EMPTY_SIZE;
2273 #endif
2274 
2275       } else {
2276 	if (npartners > MAX_SITES_ALLOCATED) {
2277 	  sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2278 	} else {
2279 	  sites = &(sites_allocated[0]);
2280 	}
2281 	nsites = 0;
2282 	while (nsites < nsplicepartners_max[j]) {
2283 	  if (splicetypes[j1] == ANTIACCEPTOR) {
2284 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2285 	      sites[nsites++] = (Splicestring_T) List_head(p);
2286 	    }
2287 	  }
2288 	  j1--;
2289 	}
2290 	assert(nsites == nsplicepartners_max[j]);
2291 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2292 #if 0
2293 	triecontents_max_list = Trie_output_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list,
2294 						sites,nsites,/*charpos*/0);
2295 #else
2296 	*trie_max_size = Trie_output_size(*trie_max_size,sites,nsites,/*charpos*/0);
2297 #endif
2298 	if (npartners > MAX_SITES_ALLOCATED) {
2299 	  FREE(sites);
2300 	}
2301       }
2302 
2303       break;
2304 
2305     case ANTIACCEPTOR:
2306 
2307       j1 = j + 1 + nsplicepartners_skip[j];
2308 
2309       if (distances_observed_p) {
2310 	if ((npartners = nsplicepartners_obs[j]) == 0) {
2311 #if 0
2312 	  triecontents_obs_list = Trie_output_empty_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list);
2313 #else
2314 	  (*trie_obs_size) += TRIE_EMPTY_SIZE;
2315 #endif
2316 
2317 	} else {
2318 	  if (npartners > MAX_SITES_ALLOCATED) {
2319 	    sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2320 	  } else {
2321 	    sites = &(sites_allocated[0]);
2322 	  }
2323 	  nsites = 0;
2324 	  while (nsites < nsplicepartners_obs[j]) {
2325 	    if (splicetypes[j1] == ANTIDONOR) {
2326 	      for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2327 		sites[nsites++] = (Splicestring_T) List_head(p);
2328 	      }
2329 	    }
2330 	    j1++;
2331 	  }
2332 	  assert(nsites == nsplicepartners_obs[j]);
2333 	  qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2334 #if 0
2335 	  triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
2336 						  sites,nsites,/*charpos*/0);
2337 #else
2338 	  *trie_obs_size = Trie_output_size(*trie_obs_size,sites,nsites,/*charpos*/0);
2339 #endif
2340 	  if (npartners > MAX_SITES_ALLOCATED) {
2341 	    FREE(sites);
2342 	  }
2343 	}
2344       }
2345 
2346       if ((npartners = nsplicepartners_max[j]) == 0) {
2347 #if 0
2348 	triecontents_max_list = Trie_output_empty_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list);
2349 #else
2350 	(*trie_max_size) += TRIE_EMPTY_SIZE;
2351 #endif
2352 
2353       } else {
2354 	if (npartners > MAX_SITES_ALLOCATED) {
2355 	  sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2356 	} else {
2357 	  sites = &(sites_allocated[0]);
2358 	}
2359 	nsites = 0;
2360 	while (nsites < nsplicepartners_max[j]) {
2361 	  if (splicetypes[j1] == ANTIDONOR) {
2362 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2363 	      sites[nsites++] = (Splicestring_T) List_head(p);
2364 	    }
2365 	  }
2366 	  j1++;
2367 	}
2368 	assert(nsites == nsplicepartners_max[j]);
2369 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2370 #if 0
2371 	triecontents_max_list = Trie_output_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list,
2372 						sites,nsites,/*charpos*/0);
2373 #else
2374 	*trie_max_size = Trie_output_size(*trie_max_size,sites,nsites,/*charpos*/0);
2375 #endif
2376 	if (npartners > MAX_SITES_ALLOCATED) {
2377 	  FREE(sites);
2378 	}
2379       }
2380 
2381       break;
2382 
2383     default:
2384       fprintf(stderr,"Unexpected splicetype %d\n",splicetypes[j]);
2385       abort();
2386     }
2387   }
2388 
2389   return;
2390 }
2391 
2392 
2393 
2394 void
Splicetrie_build_via_splicesites(Triecontent_T ** triecontents_obs,Trieoffset_T ** trieoffsets_obs,Triecontent_T ** triecontents_max,Trieoffset_T ** trieoffsets_max,int * nsplicepartners_skip,int * nsplicepartners_obs,int * nsplicepartners_max,Splicetype_T * splicetypes,List_T * splicestrings,int nsplicesites)2395 Splicetrie_build_via_splicesites (Triecontent_T **triecontents_obs, Trieoffset_T **trieoffsets_obs,
2396 				  Triecontent_T **triecontents_max, Trieoffset_T **trieoffsets_max,
2397 				  int *nsplicepartners_skip, int *nsplicepartners_obs, int *nsplicepartners_max,
2398 				  Splicetype_T *splicetypes, List_T *splicestrings, int nsplicesites) {
2399 #ifdef USE_LIST
2400   Uintlist_T triecontents_obs_list = NULL, triecontents_max_list = NULL;
2401 #else
2402   unsigned int *triecontents_obs_ptr, *triecontents_max_ptr;
2403 #endif
2404   List_T p;
2405   int nsites, j, j1;
2406   Splicestring_T *sites, sites_allocated[MAX_SITES_ALLOCATED];
2407   int npartners;
2408   int nprinted_obs = 0, nprinted_max = 0;
2409   int trie_obs_size = 0, trie_max_size = 0;
2410   bool distances_observed_p;
2411 
2412   if (nsplicepartners_obs == NULL) {
2413     distances_observed_p = false;
2414   } else {
2415     distances_observed_p = true;
2416   }
2417 
2418   if (distances_observed_p == true) {
2419     *trieoffsets_obs = (Trieoffset_T *) CALLOC(nsplicesites,sizeof(Trieoffset_T));
2420   } else {
2421     *trieoffsets_obs = (Trieoffset_T *) NULL;
2422   }
2423   *trieoffsets_max = (Trieoffset_T *) CALLOC(nsplicesites,sizeof(Trieoffset_T));
2424 
2425   build_via_splicesites_size(&trie_obs_size,&trie_max_size,
2426 			     nsplicepartners_skip,nsplicepartners_obs,nsplicepartners_max,
2427 			     splicetypes,splicestrings,nsplicesites,distances_observed_p);
2428 #ifndef USE_LIST
2429   if (distances_observed_p) {
2430     triecontents_obs_ptr = *triecontents_obs = (Triecontent_T *) MALLOC(trie_obs_size * sizeof(Triecontent_T));
2431   } else {
2432     *triecontents_obs = (Triecontent_T *) NULL;
2433   }
2434   triecontents_max_ptr = *triecontents_max = (Triecontent_T *) MALLOC(trie_max_size * sizeof(Triecontent_T));
2435 #endif
2436 
2437 
2438   for (j = 0; j < nsplicesites; j++) {
2439     switch (splicetypes[j]) {
2440     case DONOR:
2441       debug(
2442 	    if (distances_observed_p == true) {
2443 	      printf("donor #%d (%d partners obs, %d partners max):",
2444 		     j,nsplicepartners_obs[j],nsplicepartners_max[j]);
2445 	    } else {
2446 	      printf("donor #%d (%d partners max):",
2447 		     j,nsplicepartners_max[j]);
2448 	    });
2449 
2450       j1 = j + 1 + nsplicepartners_skip[j];
2451 
2452       if (distances_observed_p) {
2453 	if ((npartners = nsplicepartners_obs[j]) == 0) {
2454 #ifdef USE_LIST
2455 	  triecontents_obs_list = Trie_output_empty_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list);
2456 #else
2457 	  triecontents_obs_ptr = Trie_output_empty_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr);
2458 #endif
2459 
2460 	} else {
2461 	  if (npartners > MAX_SITES_ALLOCATED) {
2462 	    sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2463 	  } else {
2464 	    sites = &(sites_allocated[0]);
2465 	  }
2466 	  nsites = 0;
2467 	  while (nsites < nsplicepartners_obs[j]) {
2468 	    if (splicetypes[j1] == ACCEPTOR) {
2469 	      for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2470 		debug(printf(" %d",j1));
2471 		sites[nsites++] = (Splicestring_T) List_head(p);
2472 	      }
2473 	    }
2474 	    j1++;
2475 	  }
2476 	  assert(nsites == nsplicepartners_obs[j]);
2477 	  qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2478 #ifdef USE_LIST
2479 	  triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
2480 						  sites,nsites,/*charpos*/0);
2481 #else
2482 	  triecontents_obs_ptr = Trie_output_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr,
2483 						  sites,nsites,/*charpos*/0);
2484 #endif
2485 	  if (npartners > MAX_SITES_ALLOCATED) {
2486 	    FREE(sites);
2487 	  }
2488 	}
2489       }
2490 
2491       if ((npartners = nsplicepartners_max[j]) == 0) {
2492 #ifdef USE_LIST
2493 	triecontents_max_list = Trie_output_empty_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list);
2494 #else
2495 	triecontents_max_ptr = Trie_output_empty_array(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_ptr);
2496 #endif
2497 
2498       } else {
2499 	if (npartners > MAX_SITES_ALLOCATED) {
2500 	  sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2501 	} else {
2502 	  sites = &(sites_allocated[0]);
2503 	}
2504 	nsites = 0;
2505 	while (nsites < nsplicepartners_max[j]) {
2506 	  if (splicetypes[j1] == ACCEPTOR) {
2507 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2508 	      debug(printf(" %d",j1));
2509 	      sites[nsites++] = (Splicestring_T) List_head(p);
2510 	    }
2511 	  }
2512 	  j1++;
2513 	}
2514 	assert(nsites == nsplicepartners_max[j]);
2515 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2516 #ifdef USE_LIST
2517 	triecontents_max_list = Trie_output_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list,
2518 						sites,nsites,/*charpos*/0);
2519 #else
2520 	triecontents_max_ptr = Trie_output_array(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_ptr,
2521 						sites,nsites,/*charpos*/0);
2522 #endif
2523 	if (npartners > MAX_SITES_ALLOCATED) {
2524 	  FREE(sites);
2525 	}
2526       }
2527 
2528       debug(printf("\n"));
2529       break;
2530 
2531     case ACCEPTOR:
2532       debug(
2533 	    if (distances_observed_p == true) {
2534 	      printf("acceptor #%d (%d partners obs, %d partners max):",
2535 		     j,nsplicepartners_obs[j],nsplicepartners_max[j]);
2536 	    } else {
2537 	      printf("acceptor #%d (%d partners max):",
2538 		     j,nsplicepartners_max[j]);
2539 	    });
2540 
2541       j1 = j - 1 - nsplicepartners_skip[j];
2542 
2543       if (distances_observed_p) {
2544 	if ((npartners = nsplicepartners_obs[j]) == 0) {
2545 #ifdef USE_LIST
2546 	  triecontents_obs_list = Trie_output_empty_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list);
2547 #else
2548 	  triecontents_obs_ptr = Trie_output_empty_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr);
2549 #endif
2550 
2551 	} else {
2552 	  if (npartners > MAX_SITES_ALLOCATED) {
2553 	    sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2554 	  } else {
2555 	    sites = &(sites_allocated[0]);
2556 	  }
2557 	  nsites = 0;
2558 	  while (nsites < nsplicepartners_obs[j]) {
2559 	    if (splicetypes[j1] == DONOR) {
2560 	      for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2561 		debug(printf(" %d",j1));
2562 		sites[nsites++] = (Splicestring_T) List_head(p);
2563 	      }
2564 	    }
2565 	    j1--;
2566 	  }
2567 	  assert(nsites == nsplicepartners_obs[j]);
2568 	  qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2569 #ifdef USE_LIST
2570 	  triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
2571    	                                           sites,nsites,/*charpos*/0);
2572 #else
2573 	  triecontents_obs_ptr = Trie_output_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr,
2574 	                                           sites,nsites,/*charpos*/0);
2575 #endif
2576 	  if (npartners > MAX_SITES_ALLOCATED) {
2577 	    FREE(sites);
2578 	  }
2579 	}
2580       }
2581 
2582       if ((npartners = nsplicepartners_max[j]) == 0) {
2583 #ifdef USE_LIST
2584 	triecontents_max_list = Trie_output_empty_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list);
2585 #else
2586 	triecontents_max_ptr = Trie_output_empty_array(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_ptr);
2587 #endif
2588 
2589       } else {
2590 	if (npartners > MAX_SITES_ALLOCATED) {
2591 	  sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2592 	} else {
2593 	  sites = &(sites_allocated[0]);
2594 	}
2595 	nsites = 0;
2596 	while (nsites < nsplicepartners_max[j]) {
2597 	  if (splicetypes[j1] == DONOR) {
2598 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2599 	      debug(printf(" %d",j1));
2600 	      sites[nsites++] = (Splicestring_T) List_head(p);
2601 	    }
2602 	  }
2603 	  j1--;
2604 	}
2605 	assert(nsites == nsplicepartners_max[j]);
2606 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2607 #ifdef USE_LIST
2608 	triecontents_max_list = Trie_output_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list,
2609 						sites,nsites,/*charpos*/0);
2610 #else
2611 	triecontents_max_ptr = Trie_output_array(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_ptr,
2612 						sites,nsites,/*charpos*/0);
2613 #endif
2614 	if (npartners > MAX_SITES_ALLOCATED) {
2615 	  FREE(sites);
2616 	}
2617       }
2618 
2619       debug(printf("\n"));
2620       break;
2621 
2622     case ANTIDONOR:
2623       debug(
2624 	    if (distances_observed_p == true) {
2625 	      printf("antidonor #%d (%d partners obs, %d partners max):",
2626 		     j,nsplicepartners_obs[j],nsplicepartners_max[j]);
2627 	    } else {
2628 	      printf("antidonor #%d (%d partners max):",
2629 		     j,nsplicepartners_max[j]);
2630 	    });
2631 
2632       j1 = j - 1 - nsplicepartners_skip[j];
2633 
2634       if (distances_observed_p) {
2635 	if ((npartners = nsplicepartners_obs[j]) == 0) {
2636 #ifdef USE_LIST
2637 	  triecontents_obs_list = Trie_output_empty_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list);
2638 #else
2639 	  triecontents_obs_ptr = Trie_output_empty_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr);
2640 #endif
2641 
2642 	} else {
2643 	  if (npartners > MAX_SITES_ALLOCATED) {
2644 	    sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2645 	  } else {
2646 	    sites = &(sites_allocated[0]);
2647 	  }
2648 	  nsites = 0;
2649 	  while (nsites < nsplicepartners_obs[j]) {
2650 	    if (splicetypes[j1] == ANTIACCEPTOR) {
2651 	      for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2652 		debug(printf(" %d",j1));
2653 		sites[nsites++] = (Splicestring_T) List_head(p);
2654 	      }
2655 	    }
2656 	    j1--;
2657 	  }
2658 	  assert(nsites == nsplicepartners_obs[j]);
2659 	  qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2660 #ifdef USE_LIST
2661 	  triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
2662 						  sites,nsites,/*charpos*/0);
2663 #else
2664 	  triecontents_obs_ptr = Trie_output_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr,
2665 						  sites,nsites,/*charpos*/0);
2666 #endif
2667 	  if (npartners > MAX_SITES_ALLOCATED) {
2668 	    FREE(sites);
2669 	  }
2670 	}
2671       }
2672 
2673       if ((npartners = nsplicepartners_max[j]) == 0) {
2674 #ifdef USE_LIST
2675 	triecontents_max_list = Trie_output_empty_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list);
2676 #else
2677 	triecontents_max_ptr = Trie_output_empty_array(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_ptr);
2678 #endif
2679 
2680       } else {
2681 	if (npartners > MAX_SITES_ALLOCATED) {
2682 	  sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2683 	} else {
2684 	  sites = &(sites_allocated[0]);
2685 	}
2686 	nsites = 0;
2687 	while (nsites < nsplicepartners_max[j]) {
2688 	  if (splicetypes[j1] == ANTIACCEPTOR) {
2689 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2690 	      debug(printf(" %d",j1));
2691 	      sites[nsites++] = (Splicestring_T) List_head(p);
2692 	    }
2693 	  }
2694 	  j1--;
2695 	}
2696 	assert(nsites == nsplicepartners_max[j]);
2697 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2698 #ifdef USE_LIST
2699 	triecontents_max_list = Trie_output_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list,
2700 						sites,nsites,/*charpos*/0);
2701 #else
2702 	triecontents_max_ptr = Trie_output_array(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_ptr,
2703 						sites,nsites,/*charpos*/0);
2704 #endif
2705 	if (npartners > MAX_SITES_ALLOCATED) {
2706 	  FREE(sites);
2707 	}
2708       }
2709 
2710       debug(printf("\n"));
2711       break;
2712 
2713     case ANTIACCEPTOR:
2714       debug(
2715 	    if (distances_observed_p == true) {
2716 	      printf("antiacceptor #%d (%d partners obs, %d partners max):",
2717 		     j,nsplicepartners_obs[j],nsplicepartners_max[j]);
2718 	    } else {
2719 	      printf("antiacceptor #%d (%d partners max):",
2720 		     j,nsplicepartners_max[j]);
2721 	    });
2722 
2723       j1 = j + 1 + nsplicepartners_skip[j];
2724 
2725       if (distances_observed_p) {
2726 	if ((npartners = nsplicepartners_obs[j]) == 0) {
2727 #ifdef USE_LIST
2728 	  triecontents_obs_list = Trie_output_empty_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list);
2729 #else
2730 	  triecontents_obs_ptr = Trie_output_empty_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr);
2731 #endif
2732 
2733 	} else {
2734 	  if (npartners > MAX_SITES_ALLOCATED) {
2735 	    sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2736 	  } else {
2737 	    sites = &(sites_allocated[0]);
2738 	  }
2739 	  nsites = 0;
2740 	  while (nsites < nsplicepartners_obs[j]) {
2741 	    if (splicetypes[j1] == ANTIDONOR) {
2742 	      for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2743 		debug(printf(" %d",j1));
2744 		sites[nsites++] = (Splicestring_T) List_head(p);
2745 	      }
2746 	    }
2747 	    j1++;
2748 	  }
2749 	  assert(nsites == nsplicepartners_obs[j]);
2750 	  qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2751 #ifdef USE_LIST
2752 	  triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
2753 						  sites,nsites,/*charpos*/0);
2754 #else
2755 	  triecontents_obs_ptr = Trie_output_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr,
2756 						  sites,nsites,/*charpos*/0);
2757 #endif
2758 	  if (npartners > MAX_SITES_ALLOCATED) {
2759 	    FREE(sites);
2760 	  }
2761 	}
2762       }
2763 
2764       if ((npartners = nsplicepartners_max[j]) == 0) {
2765 #ifdef USE_LIST
2766 	triecontents_max_list = Trie_output_empty_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list);
2767 #else
2768 	triecontents_max_ptr = Trie_output_empty_array(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_ptr);
2769 #endif
2770 
2771       } else {
2772 	if (npartners > MAX_SITES_ALLOCATED) {
2773 	  sites = (Splicestring_T *) CALLOC(npartners,sizeof(Splicestring_T));
2774 	} else {
2775 	  sites = &(sites_allocated[0]);
2776 	}
2777 	nsites = 0;
2778 	while (nsites < nsplicepartners_max[j]) {
2779 	  if (splicetypes[j1] == ANTIDONOR) {
2780 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2781 	      debug(printf(" %d",j1));
2782 	      sites[nsites++] = (Splicestring_T) List_head(p);
2783 	    }
2784 	  }
2785 	  j1++;
2786 	}
2787 	assert(nsites == nsplicepartners_max[j]);
2788 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2789 #ifdef USE_LIST
2790 	triecontents_max_list = Trie_output_list(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_list,
2791 						sites,nsites,/*charpos*/0);
2792 #else
2793 	triecontents_max_ptr = Trie_output_array(&((*trieoffsets_max)[j]),&nprinted_max,triecontents_max_ptr,
2794 						sites,nsites,/*charpos*/0);
2795 #endif
2796 	if (npartners > MAX_SITES_ALLOCATED) {
2797 	  FREE(sites);
2798 	}
2799       }
2800 
2801       debug(printf("\n"));
2802       break;
2803 
2804     default:
2805       fprintf(stderr,"Unexpected splicetype %d\n",splicetypes[j]);
2806       abort();
2807     }
2808   }
2809 
2810 
2811   assert(nprinted_obs == trie_obs_size);
2812   assert(nprinted_max == trie_max_size);
2813 
2814 
2815   if (distances_observed_p) {
2816     fprintf(stderr,"splicetrie_obs has %d entries...",nprinted_obs);
2817 #ifdef USE_LIST
2818     triecontents_obs_list = Uintlist_reverse(triecontents_obs_list);
2819     *triecontents_obs = Uintlist_to_array(&nprinted_obs,triecontents_obs_list);
2820     Uintlist_free(&triecontents_obs_list);
2821 #endif
2822   } else {
2823     *triecontents_obs = (Triecontent_T *) NULL;
2824   }
2825 
2826 
2827   fprintf(stderr,"splicetrie_max has %d entries...",nprinted_max);
2828 #ifdef USE_LIST
2829   triecontents_max_list = Uintlist_reverse(triecontents_max_list);
2830   *triecontents_max = Uintlist_to_array(&nprinted_max,triecontents_max_list);
2831   Uintlist_free(&triecontents_max_list);
2832 #endif
2833 
2834   return;
2835 }
2836 
2837 
2838 #ifdef DEBUG
2839 /* Splicetype is for the anchor splice */
2840 static void
dump_sites(Splicestring_T * sites,int nsites,Splicetype_T splicetype)2841 dump_sites (Splicestring_T *sites, int nsites, Splicetype_T splicetype) {
2842   int i;
2843   char gbuffer[17];
2844 
2845   gbuffer[16] = '\0';
2846 
2847   for (i = 0; i < nsites; i++) {
2848     printf("%d %u",sites[i]->splicesite_i,sites[i]->splicesite);
2849     if (splicetype == DONOR || splicetype == ANTIACCEPTOR) {
2850       splicefrag_nt_rightward(gbuffer,sites[i]->string);
2851       printf(" %s (rightward)\n",gbuffer);
2852     } else {
2853       splicefrag_nt_leftward(gbuffer,sites[i]->string);
2854       printf(" %s (leftward)\n",gbuffer);
2855     }
2856   }
2857   return;
2858 }
2859 #endif
2860 
2861 
2862 
2863 static void
build_via_introns_size(int * trie_obs_size,Univcoord_T * splicesites,Splicetype_T * splicetypes,List_T * splicestrings,int nsplicesites,Univ_IIT_T chromosome_iit,IIT_T splicing_iit,int * splicing_divint_crosstable)2864 build_via_introns_size (int *trie_obs_size, Univcoord_T *splicesites, Splicetype_T *splicetypes,
2865 			List_T *splicestrings, int nsplicesites,
2866 			Univ_IIT_T chromosome_iit, IIT_T splicing_iit, int *splicing_divint_crosstable) {
2867   List_T p;
2868   int nsites, j, j1;
2869   Splicestring_T *sites, sites_allocated[MAX_SITES_ALLOCATED];
2870 
2871   Univcoord_T chroffset, chrhigh;
2872   Chrpos_T chrlength;
2873   Chrpos_T *coords;
2874   int ncoords, k;
2875   Chrnum_T chrnum;
2876   int divno;
2877 
2878 
2879   chrhigh = 0U;
2880   for (j = 0; j < nsplicesites; j++) {
2881     if (splicesites[j] > chrhigh) {
2882       chrnum = Univ_IIT_get_one(chromosome_iit,splicesites[j],splicesites[j]);
2883       Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,/*circular_typeint*/-1);
2884       /* chrhigh += 1U; */
2885 
2886       divno = splicing_divint_crosstable[chrnum];
2887       assert(divno > 0);
2888     }
2889 
2890     switch (splicetypes[j]) {
2891     case DONOR:
2892       nsites = 0;
2893       coords = IIT_get_highs_for_low(&ncoords,splicing_iit,divno,splicesites[j]-chroffset);
2894 
2895       j1 = j + 1;
2896       k = 0;
2897       while (j1 < nsplicesites && k < ncoords) {
2898 	if (splicetypes[j1] == ACCEPTOR && splicesites[j1] == coords[k] - INTRON_HIGH_TO_LOW + chroffset) {
2899 	  nsites += List_length(splicestrings[j1]);
2900 	  k++;
2901 	}
2902 	j1++;
2903       }
2904       /* assertion may not hold if last few introns are invalidated */
2905       /* assert(k == ncoords); */
2906 
2907       if (nsites == 0) {
2908 #if 0
2909 	(*trieoffsets_obs)[j] = NULL_POINTER;
2910 #endif
2911 
2912       } else {
2913 	if (nsites > MAX_SITES_ALLOCATED) {
2914 	  sites = (Splicestring_T *) CALLOC(nsites,sizeof(Splicestring_T));
2915 	} else {
2916 	  sites = &(sites_allocated[0]);
2917 	}
2918 	nsites = 0;
2919 	j1 = j + 1;
2920 	k = 0;
2921 	while (j1 < nsplicesites && k < ncoords) {
2922 	  if (splicetypes[j1] == ACCEPTOR && splicesites[j1] == coords[k] - INTRON_HIGH_TO_LOW + chroffset) {
2923 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2924 	      sites[nsites++] = (Splicestring_T) List_head(p);
2925 	    }
2926 	    k++;
2927 	  }
2928 	  j1++;
2929 	}
2930 
2931 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2932 #if 0
2933 	triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
2934 						sites,nsites,/*charpos*/0);
2935 #else
2936 	*trie_obs_size = Trie_output_size(*trie_obs_size,sites,nsites,/*charpos*/0);
2937 #endif
2938 	if (nsites > MAX_SITES_ALLOCATED) {
2939 	  FREE(sites);
2940 	}
2941       }
2942 
2943       FREE(coords);
2944       break;
2945 
2946     case ACCEPTOR:
2947       nsites = 0;
2948       coords = IIT_get_lows_for_high(&ncoords,splicing_iit,divno,splicesites[j] + INTRON_HIGH_TO_LOW - chroffset);
2949 
2950       j1 = j - 1;
2951       k = 0;
2952       while (j1 >= 0 && k < ncoords) {
2953 	if (splicetypes[j1] == DONOR && splicesites[j1] == coords[k] + chroffset) {
2954 	  nsites += List_length(splicestrings[j1]);
2955 	  k++;
2956 	}
2957 	j1--;
2958       }
2959       /* assertion may not hold if last few introns are invalidated */
2960       /* assert(k == ncoords); */
2961 
2962       if (nsites == 0) {
2963 #if 0
2964 	(*trieoffsets_obs)[j] = NULL_POINTER;
2965 #endif
2966 
2967       } else {
2968 	if (nsites > MAX_SITES_ALLOCATED) {
2969 	  sites = (Splicestring_T *) CALLOC(nsites,sizeof(Splicestring_T));
2970 	} else {
2971 	  sites = &(sites_allocated[0]);
2972 	}
2973 	nsites = 0;
2974 	j1 = j - 1;
2975 	k = 0;
2976 	while (j1 >= 0 && k < ncoords) {
2977 	  if (splicetypes[j1] == DONOR && splicesites[j1] == coords[k] + chroffset) {
2978 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
2979 	      sites[nsites++] = (Splicestring_T) List_head(p);
2980 	    }
2981 	    k++;
2982 	  }
2983 	  j1--;
2984 	}
2985 
2986 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
2987 #if 0
2988 	triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
2989 						sites,nsites,/*charpos*/0);
2990 #else
2991 	*trie_obs_size = Trie_output_size(*trie_obs_size,sites,nsites,/*charpos*/0);
2992 #endif
2993 	if (nsites > MAX_SITES_ALLOCATED) {
2994 	  FREE(sites);
2995 	}
2996       }
2997 
2998       FREE(coords);
2999       break;
3000 
3001     case ANTIDONOR:
3002       nsites = 0;
3003       coords = IIT_get_lows_for_high(&ncoords,splicing_iit,divno,splicesites[j] + INTRON_HIGH_TO_LOW - chroffset);
3004 
3005       j1 = j - 1;
3006       k = 0;
3007       while (j1 >= 0 && k < ncoords) {
3008 	if (splicetypes[j1] == ANTIACCEPTOR && splicesites[j1] == coords[k] + chroffset) {
3009 	  nsites += List_length(splicestrings[j1]);
3010 	  k++;
3011 	}
3012 	j1--;
3013       }
3014       /* assertion may not hold if last few introns are invalidated */
3015       /* assert(k == ncoords); */
3016 
3017       if (nsites == 0) {
3018 #if 0
3019 	(*trieoffsets_obs)[j] = NULL_POINTER;
3020 #endif
3021 
3022       } else {
3023 	if (nsites > MAX_SITES_ALLOCATED) {
3024 	  sites = (Splicestring_T *) CALLOC(nsites,sizeof(Splicestring_T));
3025 	} else {
3026 	  sites = &(sites_allocated[0]);
3027 	}
3028 	nsites = 0;
3029 	j1 = j - 1;
3030 	k = 0;
3031 	while (j1 >= 0 && k < ncoords) {
3032 	  if (splicetypes[j1] == ANTIACCEPTOR && splicesites[j1] == coords[k] + chroffset) {
3033 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
3034 	      sites[nsites++] = (Splicestring_T) List_head(p);
3035 	    }
3036 	    k++;
3037 	  }
3038 	  j1--;
3039 	}
3040 
3041 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
3042 #if 0
3043 	triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
3044 						sites,nsites,/*charpos*/0);
3045 #else
3046 	*trie_obs_size = Trie_output_size(*trie_obs_size,sites,nsites,/*charpos*/0);
3047 #endif
3048 	if (nsites > MAX_SITES_ALLOCATED) {
3049 	  FREE(sites);
3050 	}
3051       }
3052 
3053       FREE(coords);
3054       break;
3055 
3056     case ANTIACCEPTOR:
3057       nsites = 0;
3058       coords = IIT_get_highs_for_low(&ncoords,splicing_iit,divno,splicesites[j]-chroffset);
3059 
3060       j1 = j + 1;
3061       k = 0;
3062       while (j1 < nsplicesites && k < ncoords) {
3063 	if (splicetypes[j1] == ANTIDONOR && splicesites[j1] == coords[k] - INTRON_HIGH_TO_LOW + chroffset) {
3064 	  nsites += List_length(splicestrings[j1]);
3065 	  k++;
3066 	}
3067 	j1++;
3068       }
3069       /* assertion may not hold if last few introns are invalidated */
3070       /* assert(k == ncoords); */
3071 
3072       if (nsites == 0) {
3073 #if 0
3074 	(*trieoffsets_obs)[j] = NULL_POINTER;
3075 #endif
3076 
3077       } else {
3078 	if (nsites > MAX_SITES_ALLOCATED) {
3079 	  sites = (Splicestring_T *) CALLOC(nsites,sizeof(Splicestring_T));
3080 	} else {
3081 	  sites = &(sites_allocated[0]);
3082 	}
3083 	nsites = 0;
3084 	j1 = j + 1;
3085 	k = 0;
3086 	while (j1 < nsplicesites && k < ncoords) {
3087 	  if (splicetypes[j1] == ANTIDONOR && splicesites[j1] == coords[k] - INTRON_HIGH_TO_LOW + chroffset) {
3088 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
3089 	      sites[nsites++] = (Splicestring_T) List_head(p);
3090 	    }
3091 	    k++;
3092 	  }
3093 	  j1++;
3094 	}
3095 
3096 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
3097 #if 0
3098 	triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
3099 						sites,nsites,/*charpos*/0);
3100 #else
3101 	*trie_obs_size = Trie_output_size(*trie_obs_size,sites,nsites,/*charpos*/0);
3102 #endif
3103 	if (nsites > MAX_SITES_ALLOCATED) {
3104 	  FREE(sites);
3105 	}
3106       }
3107 
3108       FREE(coords);
3109       break;
3110 
3111     default:
3112       fprintf(stderr,"Unexpected splicetype %d\n",splicetypes[j]);
3113       abort();
3114     }
3115   }
3116 
3117   return;
3118 }
3119 
3120 
3121 void
Splicetrie_build_via_introns(Triecontent_T ** triecontents_obs,Trieoffset_T ** trieoffsets_obs,Univcoord_T * splicesites,Splicetype_T * splicetypes,List_T * splicestrings,int nsplicesites,Univ_IIT_T chromosome_iit,IIT_T splicing_iit,int * splicing_divint_crosstable)3122 Splicetrie_build_via_introns (Triecontent_T **triecontents_obs, Trieoffset_T **trieoffsets_obs,
3123 			      Univcoord_T *splicesites, Splicetype_T *splicetypes,
3124 			      List_T *splicestrings, int nsplicesites,
3125 			      Univ_IIT_T chromosome_iit, IIT_T splicing_iit, int *splicing_divint_crosstable) {
3126 #ifdef USE_LIST
3127   Uintlist_T triecontents_obs_list = NULL;
3128 #else
3129   unsigned int *triecontents_obs_ptr;
3130 #endif
3131   List_T p;
3132   int nsites, j, j1;
3133   Splicestring_T *sites, sites_allocated[MAX_SITES_ALLOCATED];
3134   int nprinted_obs = 0;
3135   int trie_obs_size = 0;
3136 
3137   Univcoord_T chroffset, chrhigh;
3138   Chrpos_T chrlength;
3139   Chrpos_T *coords;
3140   int ncoords, k;
3141   Chrnum_T chrnum;
3142   int divno;
3143 
3144 
3145   *trieoffsets_obs = (Trieoffset_T *) CALLOC(nsplicesites,sizeof(Trieoffset_T));
3146 
3147   build_via_introns_size(&trie_obs_size,splicesites,splicetypes,splicestrings,nsplicesites,
3148 			 chromosome_iit,splicing_iit,splicing_divint_crosstable);
3149 #ifndef USE_LIST
3150   triecontents_obs_ptr = *triecontents_obs = (Triecontent_T *) MALLOC(trie_obs_size * sizeof(Triecontent_T));
3151 #endif
3152 
3153 
3154   chrhigh = 0U;
3155   for (j = 0; j < nsplicesites; j++) {
3156     if (splicesites[j] > chrhigh) {
3157       chrnum = Univ_IIT_get_one(chromosome_iit,splicesites[j],splicesites[j]);
3158       Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,/*circular_typeint*/-1);
3159       /* chrhigh += 1U; */
3160 
3161       divno = splicing_divint_crosstable[chrnum];
3162       assert(divno > 0);
3163     }
3164 
3165     switch (splicetypes[j]) {
3166     case DONOR:
3167       nsites = 0;
3168       coords = IIT_get_highs_for_low(&ncoords,splicing_iit,divno,splicesites[j]-chroffset);
3169 
3170       j1 = j + 1;
3171       k = 0;
3172       while (j1 < nsplicesites && k < ncoords) {
3173 	if (splicetypes[j1] == ACCEPTOR && splicesites[j1] == coords[k] - INTRON_HIGH_TO_LOW + chroffset) {
3174 	  nsites += List_length(splicestrings[j1]);
3175 	  k++;
3176 	}
3177 	j1++;
3178       }
3179       /* assertion may not hold if last few introns are invalidated */
3180       /* assert(k == ncoords); */
3181 
3182       if (nsites == 0) {
3183 	(*trieoffsets_obs)[j] = NULL_POINTER;
3184 
3185       } else {
3186 	if (nsites > MAX_SITES_ALLOCATED) {
3187 	  sites = (Splicestring_T *) CALLOC(nsites,sizeof(Splicestring_T));
3188 	} else {
3189 	  sites = &(sites_allocated[0]);
3190 	}
3191 	nsites = 0;
3192 	j1 = j + 1;
3193 	k = 0;
3194 	while (j1 < nsplicesites && k < ncoords) {
3195 	  if (splicetypes[j1] == ACCEPTOR && splicesites[j1] == coords[k] - INTRON_HIGH_TO_LOW + chroffset) {
3196 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
3197 	      sites[nsites++] = (Splicestring_T) List_head(p);
3198 	    }
3199 	    k++;
3200 	  }
3201 	  j1++;
3202 	}
3203 
3204 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
3205 #ifdef USE_LIST
3206 	triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
3207 					      sites,nsites,/*charpos*/0);
3208 #else
3209 	triecontents_obs_ptr = Trie_output_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr,
3210 					      sites,nsites,/*charpos*/0);
3211 #endif
3212 	if (nsites > MAX_SITES_ALLOCATED) {
3213 	  FREE(sites);
3214 	}
3215       }
3216 
3217       FREE(coords);
3218       break;
3219 
3220     case ACCEPTOR:
3221       nsites = 0;
3222       coords = IIT_get_lows_for_high(&ncoords,splicing_iit,divno,splicesites[j] + INTRON_HIGH_TO_LOW - chroffset);
3223 
3224       j1 = j - 1;
3225       k = 0;
3226       while (j1 >= 0 && k < ncoords) {
3227 	if (splicetypes[j1] == DONOR && splicesites[j1] == coords[k] + chroffset) {
3228 	  nsites += List_length(splicestrings[j1]);
3229 	  k++;
3230 	}
3231 	j1--;
3232       }
3233       /* assertion may not hold if last few introns are invalidated */
3234       /* assert(k == ncoords); */
3235 
3236       if (nsites == 0) {
3237 	(*trieoffsets_obs)[j] = NULL_POINTER;
3238 
3239       } else {
3240 	if (nsites > MAX_SITES_ALLOCATED) {
3241 	  sites = (Splicestring_T *) CALLOC(nsites,sizeof(Splicestring_T));
3242 	} else {
3243 	  sites = &(sites_allocated[0]);
3244 	}
3245 	nsites = 0;
3246 	j1 = j - 1;
3247 	k = 0;
3248 	while (j1 >= 0 && k < ncoords) {
3249 	  if (splicetypes[j1] == DONOR && splicesites[j1] == coords[k] + chroffset) {
3250 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
3251 	      sites[nsites++] = (Splicestring_T) List_head(p);
3252 	    }
3253 	    k++;
3254 	  }
3255 	  j1--;
3256 	}
3257 
3258 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
3259 #ifdef USE_LIST
3260 	triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
3261 						 sites,nsites,/*charpos*/0);
3262 #else
3263 	triecontents_obs_ptr = Trie_output_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr,
3264 						 sites,nsites,/*charpos*/0);
3265 #endif
3266 	if (nsites > MAX_SITES_ALLOCATED) {
3267 	  FREE(sites);
3268 	}
3269       }
3270 
3271       FREE(coords);
3272       break;
3273 
3274     case ANTIDONOR:
3275       nsites = 0;
3276       coords = IIT_get_lows_for_high(&ncoords,splicing_iit,divno,splicesites[j] + INTRON_HIGH_TO_LOW - chroffset);
3277 
3278       j1 = j - 1;
3279       k = 0;
3280       while (j1 >= 0 && k < ncoords) {
3281 	if (splicetypes[j1] == ANTIACCEPTOR && splicesites[j1] == coords[k] + chroffset) {
3282 	  nsites += List_length(splicestrings[j1]);
3283 	  k++;
3284 	}
3285 	j1--;
3286       }
3287       /* assertion may not hold if last few introns are invalidated */
3288       /* assert(k == ncoords); */
3289 
3290       if (nsites == 0) {
3291 	(*trieoffsets_obs)[j] = NULL_POINTER;
3292 
3293       } else {
3294 	if (nsites > MAX_SITES_ALLOCATED) {
3295 	  sites = (Splicestring_T *) CALLOC(nsites,sizeof(Splicestring_T));
3296 	} else {
3297 	  sites = &(sites_allocated[0]);
3298 	}
3299 	nsites = 0;
3300 	j1 = j - 1;
3301 	k = 0;
3302 	while (j1 >= 0 && k < ncoords) {
3303 	  if (splicetypes[j1] == ANTIACCEPTOR && splicesites[j1] == coords[k] + chroffset) {
3304 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
3305 	      sites[nsites++] = (Splicestring_T) List_head(p);
3306 	    }
3307 	    k++;
3308 	  }
3309 	  j1--;
3310 	}
3311 
3312 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
3313 #ifdef USE_LIST
3314 	triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
3315 						sites,nsites,/*charpos*/0);
3316 #else
3317 	triecontents_obs_ptr = Trie_output_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr,
3318                                         	 sites,nsites,/*charpos*/0);
3319 #endif
3320 	if (nsites > MAX_SITES_ALLOCATED) {
3321 	  FREE(sites);
3322 	}
3323       }
3324 
3325       FREE(coords);
3326       break;
3327 
3328     case ANTIACCEPTOR:
3329       nsites = 0;
3330       coords = IIT_get_highs_for_low(&ncoords,splicing_iit,divno,splicesites[j]-chroffset);
3331 
3332       j1 = j + 1;
3333       k = 0;
3334       while (j1 < nsplicesites && k < ncoords) {
3335 	if (splicetypes[j1] == ANTIDONOR && splicesites[j1] == coords[k] - INTRON_HIGH_TO_LOW + chroffset) {
3336 	  nsites += List_length(splicestrings[j1]);
3337 	  k++;
3338 	}
3339 	j1++;
3340       }
3341       /* assertion may not hold if last few introns are invalidated */
3342       /* assert(k == ncoords); */
3343 
3344       if (nsites == 0) {
3345 	(*trieoffsets_obs)[j] = NULL_POINTER;
3346 
3347       } else {
3348 	if (nsites > MAX_SITES_ALLOCATED) {
3349 	  sites = (Splicestring_T *) CALLOC(nsites,sizeof(Splicestring_T));
3350 	} else {
3351 	  sites = &(sites_allocated[0]);
3352 	}
3353 	nsites = 0;
3354 	j1 = j + 1;
3355 	k = 0;
3356 	while (j1 < nsplicesites && k < ncoords) {
3357 	  if (splicetypes[j1] == ANTIDONOR && splicesites[j1] == coords[k] - INTRON_HIGH_TO_LOW + chroffset) {
3358 	    for (p = splicestrings[j1]; p != NULL; p = List_next(p)) {
3359 	      sites[nsites++] = (Splicestring_T) List_head(p);
3360 	    }
3361 	    k++;
3362 	  }
3363 	  j1++;
3364 	}
3365 
3366 	qsort(sites,nsites,sizeof(Splicestring_T),Splicestring_cmp);
3367 #ifdef USE_LIST
3368 	triecontents_obs_list = Trie_output_list(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_list,
3369 						 sites,nsites,/*charpos*/0);
3370 #else
3371 	triecontents_obs_ptr = Trie_output_array(&((*trieoffsets_obs)[j]),&nprinted_obs,triecontents_obs_ptr,
3372 						 sites,nsites,/*charpos*/0);
3373 #endif
3374 	if (nsites > MAX_SITES_ALLOCATED) {
3375 	  FREE(sites);
3376 	}
3377       }
3378 
3379       FREE(coords);
3380       break;
3381 
3382     default:
3383       fprintf(stderr,"Unexpected splicetype %d\n",splicetypes[j]);
3384       abort();
3385     }
3386   }
3387 
3388   assert(nprinted_obs == trie_obs_size);
3389 
3390   fprintf(stderr,"splicetrie_obs has %d entries...",nprinted_obs);
3391 #ifdef USE_LIST
3392   triecontents_obs_list = Uintlist_reverse(triecontents_obs_list);
3393   *triecontents_obs = Uintlist_to_array(&nprinted_obs,triecontents_obs_list);
3394   Uintlist_free(&triecontents_obs_list);
3395 #endif
3396 
3397   return;
3398 }
3399 
3400