1 static char rcsid[] = "$Id: stage2.c 218187 2019-01-17 13:15:10Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "stage2.h"
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <math.h>
11 
12 #include "assert.h"
13 #include "mem.h"
14 #include "comp.h"
15 #include "pair.h"
16 #include "pairdef.h"
17 #include "intlist.h"
18 #include "diag.h"
19 #include "genome_sites.h"
20 #include "complement.h"
21 #include "maxent_hr.h"
22 
23 
24 /* Tests whether genomicseg == query in convert_to_nucleotides, and
25    whether oligoindex_hr gives same results as oligoindex */
26 /* #define EXTRACT_GENOMICSEG 1 */
27 
28 /* #define USE_DIAGPOOL 1 -- Defined in diagpool.h */
29 
30 /* #define SQUARE 1 */
31 
32 /* #define SLOW 1 */
33 
34 #define SUFF_PCTCOVERAGE_OLIGOINDEX 0.90
35 
36 /* #define SUFF_PCTCOVERAGE_STAGE2 0.10 */
37 #define SUFF_NCOVERED 200
38 #define SUFF_MAXNCONSECUTIVE 20
39 #define GREEDY_NCONSECUTIVE 100
40 
41 #define MAX_NACTIVE 100	/* 100 previously considered too low, but may
42 			   be okay in conjunction with
43 			   diagonalization */
44 #define MAX_GRAND_LOOKBACK 200
45 
46 /* Penalty for genomic distances */
47 
48 #define INTRON_PENALTY_UNKNOWN 8
49 #define INTRON_PENALTY_INCONSISTENT 16
50 
51 /* Needs to be high to avoid short exons, but needs to be low to identify short exons. */
52 /* On an example by Nathan Weeks, needed to set this value to be 1 or
53    0 to find a short exon.  Setting to 0 gives too many short exons.
54    Also found querydist_credit to be a bad idea. */
55 #define NINTRON_PENALTY_MISMATCH 1
56 /* #define USE_QUERYDIST_CREDIT 1 */
57 
58 #define NON_CANONICAL_PENALTY_ENDS 4
59 #define NON_CANONICAL_PENALTY_MIDDLE 4
60 #define MISS_BEHIND 16
61 #define GREEDY_ADVANCE 6
62 #define FINAL_SCORE_TOLERANCE 20 /* Was 8, but missed some paths on Y chromosome */
63 #define NONOVERLAPPING_SCORE_TOLERANCE 0.5
64 
65 #define ENOUGH_CONSECUTIVE 32
66 
67 #define INFINITE 1000000
68 
69 /* EQUAL_DISTANCE used to be 3 for PMAP and 6 for GMAP, but that
70    allowed indels in repetitive regions.  Now have separate
71    variables. */
72 #ifdef PMAP
73 #define EQUAL_DISTANCE_FOR_CONSECUTIVE 0
74 #define EQUAL_DISTANCE_NOT_SPLICING 3
75 #else
76 #define EQUAL_DISTANCE_FOR_CONSECUTIVE 0
77 #define EQUAL_DISTANCE_NOT_SPLICING 9
78 #endif
79 
80 
81 #define INTRON_DEFN 9		/* Cannot exceed 9 */
82 #define NEAR_END_LENGTH 20	/* Determines whether to ignore EXON_DEFN at ends */
83 #define EXON_DEFN 30
84 #define MAX_SKIPPED 3
85 
86 #define SCORE_FOR_RESTRICT 10
87 /* #define SUFFICIENT_ROOTNLINKS 10  */ /* Setting this too low can slow down program considerably */
88 
89 
90 #ifdef PMAP
91 #define SAMPLE_INTERVAL 1
92 #define NT_PER_MATCH 3
93 #define CONSEC_POINTS_PER_MATCH 3 /* Possible increase to reward consecutiveness */
94 #define NONCODON_INDEL_PENALTY 15
95 #else
96 #define SAMPLE_INTERVAL 2	/* For cases where adjacentp == false.
97 				   Means that we can find islands of
98 				   9-mers */
99 #define NT_PER_MATCH 1
100 #define NT_PER_CODON 3
101 #define CONSEC_POINTS_PER_MATCH 1 /* Possible increase to reward consecutiveness */
102 #define CONSEC_POINTS_PER_CODON 3 /* Possible increase to reward consecutiveness */
103 #endif
104 
105 #define SHIFT_EXTRA 15
106 
107 #define ONE 1
108 #define TEN_THOUSAND 8192	/* Power of 2 */
109 #define HUNDRED_THOUSAND 100000.0
110 #define ONE_MILLION 1000000.0
111 
112 
113 
114 static bool splicingp;
115 static bool use_canonical_middle_p;
116 static bool use_canonical_ends_p;
117 static int suboptimal_score_end;
118 static int suboptimal_score_start;
119 static Mode_T mode;
120 static bool snps_p;
121 static int sufflookback;
122 static int nsufflookback;
123 static int maxintronlen;
124 
125 
126 void
Stage2_setup(bool splicingp_in,bool cross_species_p,int suboptimal_score_start_in,int suboptimal_score_end_in,int sufflookback_in,int nsufflookback_in,int maxintronlen_in,Mode_T mode_in,bool snps_p_in)127 Stage2_setup (bool splicingp_in, bool cross_species_p,
128 	      int suboptimal_score_start_in, int suboptimal_score_end_in,
129 	      int sufflookback_in, int nsufflookback_in, int maxintronlen_in,
130 	      Mode_T mode_in, bool snps_p_in) {
131   splicingp = splicingp_in;
132   if (splicingp == true) {
133     use_canonical_ends_p = true;
134   } else {
135     use_canonical_ends_p = false;
136   }
137   if (cross_species_p == true) {
138     use_canonical_middle_p = true;
139   } else {
140     use_canonical_middle_p = false;
141   }
142   suboptimal_score_start = suboptimal_score_start_in;
143   suboptimal_score_end = suboptimal_score_end_in;
144 
145   sufflookback = sufflookback_in;
146   nsufflookback = nsufflookback_in;
147   maxintronlen = maxintronlen_in;
148 
149   mode = mode_in;
150   snps_p = snps_p_in;
151   return;
152 }
153 
154 
155 /* General */
156 #ifdef DEBUG
157 #define debug(x) x
158 #else
159 #define debug(x)
160 #endif
161 
162 /* Final results of stage 2 */
163 #ifdef DEBUG0
164 #define debug0(x) x
165 #else
166 #define debug0(x)
167 #endif
168 
169 /* Print all links */
170 #ifdef DEBUG1
171 #define debug1(x) x
172 #else
173 #define debug1(x)
174 #endif
175 
176 /* For generating a graph */
177 #ifdef DEBUG3
178 #define debug3(x) x
179 #else
180 #define debug3(x)
181 #endif
182 
183 /* Converting to nucleotides */
184 #ifdef DEBUG5
185 #define debug5(x) x
186 #else
187 #define debug5(x)
188 #endif
189 
190 /* revise_active */
191 #ifdef DEBUG6
192 #define debug6(x) x
193 #else
194 #define debug6(x)
195 #endif
196 
197 /* Shifted canonical */
198 #ifdef DEBUG7
199 #define debug7(x) x
200 #else
201 #define debug7(x)
202 #endif
203 
204 /* find_canonical_dinucleotides */
205 #ifdef DEBUG8
206 #define debug8(x) x
207 #else
208 #define debug8(x)
209 #endif
210 
211 /* Dynamic programming */
212 /* Can also define debug9(x) as: if (curr_querypos == XX) {x;} */
213 #ifdef DEBUG9
214 #define debug9(x) x
215 #else
216 #define debug9(x)
217 #endif
218 
219 /* binary search */
220 #ifdef DEBUG10
221 #define debug10(x) x
222 #else
223 #define debug10(x)
224 #endif
225 
226 /* Multiple alignments */
227 #ifdef DEBUG11
228 #define debug11(x) x
229 #else
230 #define debug11(x)
231 #endif
232 
233 /* Grand winner */
234 #ifdef DEBUG12
235 #define debug12(x) x
236 #else
237 #define debug12(x)
238 #endif
239 
240 
241 /* Filter unique */
242 #ifdef DEBUG13
243 #define debug13(x) x
244 #else
245 #define debug13(x)
246 #endif
247 
248 /* Filter unique, details of overlap */
249 #ifdef DEBUG13A
250 #define debug13a(x) x
251 #else
252 #define debug13a(x)
253 #endif
254 
255 
256 struct Stage2_alloc_T {
257   int max_querylength_alloc;
258 
259   bool *coveredp;
260   Chrpos_T **mappings;
261   int *npositions;
262   unsigned int *minactive;
263   unsigned int *maxactive;
264   int *firstactive;
265   int *nactive;
266 };
267 
268 void
Stage2_alloc_free(Stage2_alloc_T * old)269 Stage2_alloc_free (Stage2_alloc_T *old) {
270   FREE((*old)->firstactive);
271   FREE((*old)->nactive);
272   FREE((*old)->maxactive);
273   FREE((*old)->minactive);
274   FREE((*old)->npositions);
275   FREE((*old)->mappings);
276   FREE((*old)->coveredp);
277   FREE(*old);
278   return;
279 }
280 
281 Stage2_alloc_T
Stage2_alloc_new(int max_querylength_alloc)282 Stage2_alloc_new (int max_querylength_alloc) {
283   Stage2_alloc_T new = (Stage2_alloc_T) MALLOC(sizeof(*new));
284 
285   new->max_querylength_alloc = max_querylength_alloc;
286 
287   new->coveredp = (bool *) MALLOC(max_querylength_alloc * sizeof(bool));
288   new->mappings = (Chrpos_T **) MALLOC(max_querylength_alloc * sizeof(Chrpos_T *));
289   new->npositions = (int *) MALLOC(max_querylength_alloc * sizeof(int));
290   new->minactive = (unsigned int *) MALLOC(max_querylength_alloc * sizeof(unsigned int));
291   new->maxactive = (unsigned int *) MALLOC(max_querylength_alloc * sizeof(unsigned int));
292   new->firstactive = (int *) MALLOC(max_querylength_alloc * sizeof(int));
293   new->nactive = (int *) MALLOC(max_querylength_alloc * sizeof(int));
294 
295   return new;
296 }
297 
298 
299 #define T Stage2_T
300 struct T {
301   List_T middle;
302   List_T all_starts;
303   List_T all_ends;
304 };
305 
306 
307 void
Stage2_free(T * old)308 Stage2_free (T *old) {
309   /* List_free(&(*old)->middle); -- Not necessary because of pairpool */
310   /* List_free(&(*old)->all_starts); -- Handled by Stage3middle_free */
311   /* List_free(&(*old)->all_ends); -- Handled by Stage3middle_free */
312   FREE(*old);
313   return;
314 }
315 
316 static T
Stage2_new(List_T middle,List_T all_starts,List_T all_ends)317 Stage2_new (List_T middle, List_T all_starts, List_T all_ends) {
318   T new = (T) MALLOC(sizeof(*new));
319 #ifdef DEBUG0
320   List_T p;
321 #endif
322 
323   new->middle = middle;
324   new->all_starts = all_starts;
325   new->all_ends = all_ends;
326 
327 #ifdef DEBUG0
328   printf("Starts:\n");
329   for (p = all_starts; p != NULL; p = List_next(p)) {
330     Pair_dump_list(List_head(p),true);
331   }
332 
333   printf("Ends:\n");
334   for (p = all_ends; p != NULL; p = List_next(p)) {
335     Pair_dump_list(List_head(p),true);
336   }
337 #endif
338 
339   return new;
340 }
341 
342 List_T
Stage2_middle(T this)343 Stage2_middle (T this) {
344   return this->middle;
345 }
346 
347 List_T
Stage2_all_starts(T this)348 Stage2_all_starts (T this) {
349   return this->all_starts;
350 }
351 
352 List_T
Stage2_all_ends(T this)353 Stage2_all_ends (T this) {
354   return this->all_ends;
355 }
356 
357 
358 /************************************************************************/
359 
360 typedef struct Link_T *Link_T;
361 struct Link_T {
362   int fwd_consecutive;
363   int fwd_rootposition;
364   /*int fwd_rootnlinks;*/		/* Number of links in last branch */
365   /* int fwd_score; */                  /* Kept as a separate structure */
366 
367   int fwd_pos;
368   int fwd_hit;
369   int fwd_tracei;		/* Corresponds to a distinct set of branches */
370 
371 #ifdef DEBUG9
372   int fwd_intronnfwd;
373   int fwd_intronnrev;
374   int fwd_intronnunk;
375 #endif
376 
377 #ifdef SEPARATE_FWD_REV
378   /* No longer checking separate fwd/rev directions */
379   int rev_consecutive;
380   int rev_rootposition;
381   /*int rev_rootnlinks;*/		/* Number of links in last branch */
382   int rev_score;
383 
384   int rev_pos;
385   int rev_hit;
386 
387 #ifdef DEBUG9
388   int rev_tracei;		/* Corresponds to a distinct set of branches */
389   int rev_intronnfwd;
390   int rev_intronnrev;
391   int rev_intronnunk;
392 #endif	/* rev */
393 
394 #endif
395 };
396 
397 
398 /* lengths2 is has length1 entries.  Note that lengths2 may have
399    negative entries */
400 static struct Link_T **
Linkmatrix_1d_new(int length1,int * lengths2,int totallength)401 Linkmatrix_1d_new (int length1, int *lengths2, int totallength) {
402   struct Link_T **links;
403   int i;
404 
405   /* Outer dimension can be MALLOC, but inner one must be CALLOC */
406   links = (struct Link_T **) MALLOC(length1 * sizeof(struct Link_T *));
407   links[0] = (struct Link_T *) CALLOC(totallength,sizeof(struct Link_T));
408   for (i = 1; i < length1; i++) {
409     if (lengths2[i-1] < 0) {
410       links[i] = links[i-1];
411     } else {
412       links[i] = &(links[i-1][lengths2[i-1]]);
413     }
414   }
415   return links;
416 }
417 
418 static void
Linkmatrix_1d_free(struct Link_T *** links)419 Linkmatrix_1d_free (struct Link_T ***links) {
420   FREE((*links)[0]);
421   FREE(*links);
422   return;
423 }
424 
425 
426 static struct Link_T **
Linkmatrix_2d_new(int length1,int * lengths2)427 Linkmatrix_2d_new (int length1, int *lengths2) {
428   struct Link_T **links;
429   int i;
430 
431   links = (struct Link_T **) CALLOC(length1,sizeof(struct Link_T *));
432   for (i = 0; i < length1; i++) {
433     if (lengths2[i] <= 0) {
434       links[i] = (struct Link_T *) NULL;
435     } else {
436       links[i] = (struct Link_T *) CALLOC(lengths2[i],sizeof(struct Link_T));
437     }
438   }
439   return links;
440 }
441 
442 static void
Linkmatrix_2d_free(struct Link_T *** links,int length1)443 Linkmatrix_2d_free (struct Link_T ***links, int length1) {
444   int i;
445 
446   for (i = 0; i < length1; i++) {
447     if ((*links)[i]) {
448       FREE((*links)[i]);
449     }
450   }
451   FREE(*links);
452   return;
453 }
454 
455 
456 
457 #ifdef DEBUG1
458 #ifdef SEPARATE_FWD_REV
459 static void
Linkmatrix_print_both(struct Link_T ** links,Chrpos_T ** mappings,int length1,int * npositions,char * queryseq_ptr,int indexsize)460 Linkmatrix_print_both (struct Link_T **links, Chrpos_T **mappings, int length1,
461 		       int *npositions, char *queryseq_ptr, int indexsize) {
462   int i, j;
463   char *oligo;
464 
465   oligo = (char *) MALLOCA((indexsize+1) * sizeof(char));
466   for (i = 0; i <= length1-indexsize; i++) {
467     strncpy(oligo,&(queryseq_ptr[i]),indexsize);
468     oligo[indexsize] = '\0';
469 
470     printf("Querypos %d (%s, %d positions):",i,oligo,npositions[i]);
471     for (j = 0; j < npositions[i]; j++) {
472       printf(" %d.%u:%d(%d,%d)[%u]-%d(%d,%d)[%u]",
473 	     j,mappings[i][j],links[i][j].fwd_score,
474 	     links[i][j].fwd_pos,links[i][j].fwd_hit,links[i][j].fwd_tracei,
475 	     links[i][j].rev_score,
476 	     links[i][j].rev_pos,links[i][j].rev_hit,links[i][j].rev_tracei);
477     }
478     printf("\n");
479   }
480   printf("\n");
481 
482   FREEA(oligo);
483 
484   return;
485 }
486 
487 #else
488 
489 /* For PMAP, indexsize is in aa */
490 static void
print_fwd(struct Link_T ** links,int ** fwd_scores,Chrpos_T ** mappings,int length1,int * npositions,char * queryseq_ptr,int indexsize)491 print_fwd (struct Link_T **links, int **fwd_scores,
492 	   Chrpos_T **mappings, int length1,
493 	   int *npositions, char *queryseq_ptr, int indexsize) {
494   int i, j, lastpos;
495   char *oligo;
496 
497   oligo = (char *) MALLOCA((indexsize+1) * sizeof(char));
498   lastpos = length1 - indexsize;
499 
500   for (i = 0; i <= lastpos; i++) {
501     strncpy(oligo,&(queryseq_ptr[i]),indexsize);
502     oligo[indexsize] = '\0';
503 
504     printf("Querypos %d (%s, %d positions):",i,oligo,npositions[i]);
505     for (j = 0; j < npositions[i]; j++) {
506       printf(" %d.%u:%d(%d,%d)[%u]",
507 	     j,mappings[i][j],fwd_scores[i][j],
508 	     links[i][j].fwd_pos,links[i][j].fwd_hit,links[i][j].fwd_tracei);
509     }
510     printf("\n");
511   }
512   printf("\n");
513 
514   FREEA(oligo);
515 
516   return;
517 }
518 
519 #endif
520 #endif
521 
522 static void
mappings_dump_R(Chrpos_T ** mappings,int * npositions,int length1,int ** active,int * firstactive,int indexsize,char * varname)523 mappings_dump_R (Chrpos_T **mappings, int *npositions, int length1,
524 		 int **active, int *firstactive, int indexsize, char *varname) {
525   int querypos;
526   int lastpos, hit;
527   bool printp = false;
528 
529   lastpos = length1 - indexsize;
530   printf("%s <- matrix(c(\n",varname);
531   for (querypos = 0; querypos < lastpos; querypos++) {
532     if (firstactive) {
533       if (mappings[querypos] != NULL) {
534 	hit = firstactive[querypos];
535 	while (hit != -1) {
536 	  /* Last elt is for score */
537 	  if (printp == false) {
538 	    printp = true;
539 	  } else {
540 	    printf(",\n");
541 	  }
542 	  printf("%d,%d,%d,%d",querypos,mappings[querypos][hit],
543 		 hit,active[querypos][hit]);
544 	  hit = active[querypos][hit];
545 	}
546       }
547     } else {
548       for (hit = 0; hit < npositions[querypos]; hit++) {
549 	if (printp == false) {
550 	  printp = true;
551 	} else {
552 	  printf(",\n");
553 	}
554 	printf("%d,%d,%d",querypos,mappings[querypos][hit],hit);
555       }
556     }
557   }
558   printf("),ncol=2,byrow=T)\n");
559 
560   return;
561 }
562 
563 
564 #if 0
565 static void
566 best_path_dump_R (struct Link_T **links, Chrpos_T **mappings,
567 		  int querypos, int hit, bool fwdp, char *varname) {
568   Chrpos_T position;
569   int prev_querypos, prevhit, save_querypos, savehit;
570   bool printp = false;
571 
572   save_querypos = querypos;
573   savehit = hit;
574 
575   printf("%s <- matrix(c(\n",varname);
576   prev_querypos = querypos+1;
577   while (querypos >= 0) {
578     position = mappings[querypos][hit];
579 
580     if (printp == false) {
581       printp = true;
582     } else {
583       printf(",\n");
584     }
585     printf("%d,%d",querypos,position);
586 
587     prev_querypos = querypos;
588     prevhit = hit;
589     if (fwdp) {
590       querypos = links[prev_querypos][prevhit].fwd_pos;
591       hit = links[prev_querypos][prevhit].fwd_hit;
592 #ifdef SEPARATE_FWD_REV
593     } else {
594       querypos = links[prev_querypos][prevhit].rev_pos;
595       hit = links[prev_querypos][prevhit].rev_hit;
596 #endif
597     }
598   }
599   printf("),ncol=2,byrow=T)\n");
600 
601   querypos = save_querypos;
602   hit = savehit;
603 
604   printp = false;
605   printf("%s <- matrix(c(\n","scores");
606   prev_querypos = querypos+1;
607   while (querypos >= 0) {
608     position = mappings[querypos][hit];
609 
610     if (printp == false) {
611       printp = true;
612     } else {
613       printf(",\n");
614     }
615     if (fwdp == true) {
616       printf("%d,%d",querypos,links[querypos][hit].fwd_score);
617 #ifdef SEPARATE_FWD_REV
618     } else {
619       printf("%d,%d",querypos,links[querypos][hit].rev_score);
620 #endif
621     }
622 
623     prev_querypos = querypos;
624     prevhit = hit;
625     if (fwdp) {
626       querypos = links[prev_querypos][prevhit].fwd_pos;
627       hit = links[prev_querypos][prevhit].fwd_hit;
628 #ifdef SEPARATE_FWD_REV
629     } else {
630       querypos = links[prev_querypos][prevhit].rev_pos;
631       hit = links[prev_querypos][prevhit].rev_hit;
632 #endif
633     }
634   }
635   printf("),ncol=2,byrow=T)\n");
636 
637   return;
638 }
639 #endif
640 
641 static void
active_bounds_dump_R(Chrpos_T * minactive,Chrpos_T * maxactive,int querylength)642 active_bounds_dump_R (Chrpos_T *minactive, Chrpos_T *maxactive,
643 		      int querylength) {
644   int querypos;
645   bool printp = false;
646 
647   printf("querypos <- 0:%d\n",querylength-1);
648   printf("%s <- c(\n","minactive");
649   for (querypos = 0; querypos < querylength; querypos++) {
650     if (printp == false) {
651       printp = true;
652     } else {
653       printf(",\n");
654     }
655     printf("%d",minactive[querypos]);
656   }
657   printf(")\n");
658 
659   printp = false;
660   printf("%s <- c(\n","maxactive");
661   for (querypos = 0; querypos < querylength; querypos++) {
662     if (printp == false) {
663       printp = true;
664     } else {
665       printf(",\n");
666     }
667     printf("%d",maxactive[querypos]);
668   }
669   printf(")\n");
670 
671   return;
672 }
673 
674 
675 #ifdef PMAP
676 #define QUERYDIST_PENALTY_FACTOR 2
677 #else
678 #define QUERYDIST_PENALTY_FACTOR 8
679 #endif
680 
681 
682 /************************************************************************
683  *  Procedures for finding canonical introns quickly
684  ************************************************************************/
685 
686 #ifdef DEBUG8
687 
688 static void
print_last_dinucl(int * last_dinucl,int genomiclength)689 print_last_dinucl (int *last_dinucl, int genomiclength) {
690   int pos;
691 
692   for (pos = 0; pos < genomiclength - 3 + SHIFT_EXTRA; pos++) {
693     printf("%d %d\n",pos,last_dinucl[pos]);
694   }
695   printf("\n");
696 
697   return;
698 }
699 
700 #endif
701 
702 
703 #if 0
704 /* Need this procedure because we are skipping some oligomers */
705 static bool
706 find_shifted_canonical (Chrpos_T leftpos, Chrpos_T rightpos, int querydistance,
707 			Chrpos_T (*genome_left_position)(Chrpos_T, Chrpos_T, Univcoord_T, Univcoord_T, bool),
708 			Chrpos_T (*genome_right_position)(Chrpos_T, Chrpos_T, Univcoord_T, Univcoord_T, bool),
709 			Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, bool skip_repetitive_p) {
710   Chrpos_T leftdi, rightdi;
711   Chrpos_T last_leftpos, last_rightpos;
712   int shift, leftmiss, rightmiss;
713   Chrpos_T left_chrbound, right_chrbound;
714 
715   /* leftpos = prevposition + querydistance + indexsize_nt - 1; */
716   /* rightpos = position; */
717 
718   debug7(printf("Looking for shifted canonical at leftpos %u to rightpos %u, chroffset %u, chrhigh %u\n",
719 		leftpos,rightpos,chroffset,chrhigh));
720 
721 #if 0
722   /* previously checked against genomiclength */
723   if (leftpos > genomiclength || rightpos > genomiclength) {
724     return false;
725   }
726 #else
727   /* Checking just before call to genome_right_position */
728 #endif
729 
730   if (leftpos >= rightpos) {
731     debug7(printf("leftpos %u >= rightpos %u, so returning false\n",leftpos,rightpos));
732     return false;
733   }
734 
735   if (leftpos < 103) {
736     left_chrbound = 3;	/* Previously 0, but then can find splice site at beginning of segment */
737   } else {
738     left_chrbound = leftpos - 100;
739   }
740 
741   if (rightpos < 103) {
742     right_chrbound = 3;	/* Previously 0, but then can find splice site at beginning of segment */
743   } else {
744     right_chrbound = rightpos - 100;
745   }
746 
747 #if 0
748   if (skip_repetitive_p == false) {
749 
750     last_leftpos = (*genome_left_position)(leftpos,left_chrbound,chroffset,chrhigh,plusp);
751     last_rightpos = (*genome_right_position)(rightpos,right_chrbound,chroffset,chrhigh,plusp);
752     debug7(printf("last_leftpos %u, last_rightpos %u\n",last_leftpos,last_rightpos));
753 
754     debug7(printf("skip_repetitive_p == false, so returning %u == %u && %u == %u\n",
755 		  leftpos,last_leftpos,rightpos,last_rightpos));
756     return (leftpos == last_leftpos && rightpos == last_rightpos);
757   }
758 #endif
759 
760   /* Allow canonical to be to right of match */
761   leftpos += SHIFT_EXTRA;
762   if (leftpos > chrhigh - 3) {
763     leftpos = chrhigh - 3;
764   }
765   rightpos += SHIFT_EXTRA;
766   if (rightpos > chrhigh - 3) {
767     rightpos = chrhigh - 3;
768   }
769   debug7(printf("after shift, leftpos = %u, rightpos = %u\n",leftpos,rightpos));
770 
771   shift = 0;
772   while (shift <= querydistance + SHIFT_EXTRA + SHIFT_EXTRA) {
773 
774 #if 0
775     if (leftpos < 0) {
776       return false;
777     } else if (rightpos < 0) {
778       /* Shouldn't need to check if leftpos >= 0 and rightpos >= leftpos, in the other two conditions) */
779       return false;
780     } else if (rightpos >= chrlength) {
781       return false;
782     }
783 #endif
784     if (leftpos < 3) {
785       return false;
786     } else if (leftpos > rightpos) {
787       return false;
788     }
789 
790     last_leftpos = (*genome_left_position)(leftpos,left_chrbound,chroffset,chrhigh,plusp);
791     debug7(printf("last_leftpos %u\n",last_leftpos));
792     assert(last_leftpos != 0U);
793     if ((leftdi = last_leftpos) == -1) {
794       debug7(printf("\n"));
795       return false;
796     } else {
797       leftmiss = (int) (leftpos - leftdi);
798     }
799 
800     last_rightpos = (*genome_right_position)(rightpos,right_chrbound,chroffset,chrhigh,plusp);
801     debug7(printf("last_rightpos %u\n",last_rightpos));
802     assert(last_rightpos != 0U);
803     if ((rightdi = last_rightpos) == -1) {
804       debug7(printf("\n"));
805       return false;
806     } else {
807       rightmiss = (int) (rightpos - rightdi);
808     }
809 
810     debug7(printf("shift %d/left %d (miss %d)/right %d (miss %d)\n",shift,leftpos,leftmiss,rightpos,rightmiss));
811     if (leftmiss == rightmiss) {  /* was leftmiss == 0 && rightmiss == 0, which doesn't allow for a shift */
812       debug7(printf(" => Success at %u..%u (fwd) or %u..%u (rev)\n\n",
813 		    leftpos-leftmiss+/*onebasedp*/1U,rightpos-rightmiss+/*onebasedp*/1U,
814 		    chrhigh-chroffset-(leftpos-leftmiss),chrhigh-chroffset-(rightpos-rightmiss)));
815       return true;
816     } else if (leftmiss >= rightmiss) {
817       shift += leftmiss;
818       leftpos -= leftmiss;
819       rightpos -= leftmiss;
820     } else {
821       shift += rightmiss;
822       leftpos -= rightmiss;
823       rightpos -= rightmiss;
824     }
825   }
826 
827   debug7(printf("\n"));
828   return false;
829 }
830 #endif
831 
832 
833 
834 
835 #if 0
836 /* General case for ranges in score_querypos */
837 while (prevhit != -1 && (prevposition = mappings[prev_querypos][prevhit]) + indexsize_nt <= position) {
838   /* printf("fwd: prevposition %u, prevhit %d\n",prevposition,prevhit); */
839   prevlink = &(links[prev_querypos][prevhit]);
840 
841   gendistance = position - prevposition - indexsize_nt;
842   /* diffdistance = abs(gendistance - querydistance); */
843   if (gendistance > querydistance) {
844     diffdistance = gendistance - querydistance;
845   } else {
846     diffdistance = querydistance - gendistance;
847   }
848 
849   if (diffdistance < maxintronlen) {
850     if (diffdistance <= EQUAL_DISTANCE_NOT_SPLICING) {
851       debug9(canonicalsgn = 9);
852       fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH;
853 #ifdef PMAP
854       if (diffdistance % 3 != 0) {
855 	fwd_score -= NONCODON_INDEL_PENALTY;
856       }
857 #endif
858     } else if (near_end_p == false && prevlink->fwd_consecutive < EXON_DEFN) {
859       debug9(canonicalsgn = 0);
860       if (splicingp == true) {
861 	fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH;
862       } else {
863 	fwd_score = prevlink->fwd_score - (diffdistance/ONE + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH;
864       }
865 
866     } else if (splicingp == false) {
867       debug9(canonicalsgn = 0);
868       fwd_score = prevlink->fwd_score - (diffdistance/ONE + 1) - querydist_penalty;
869 
870     } else if (use_shifted_canonical_p == true) {
871       leftpos = prevposition + querydistance - 1;
872       /* printf("leftpos %d, last_leftpos %d, rightpos %d\n",leftpos,last_leftpos,rightpos); */
873       if (leftpos == last_leftpos) {
874 	canonicalp = last_canonicalp;
875       } else {
876 	debug7(printf("Calling find_shift_canonical fwd\n"));
877 	canonicalp = find_shifted_canonical(leftpos,rightpos,querydistance-indexsize_nt,
878 					    /* &lastGT,&lastAG, */
879 					    Genome_prev_donor_position,Genome_prev_acceptor_position,
880 					    chroffset,chrhigh,plusp,skip_repetitive_p);
881 	/* And need to check for shift_canonical_rev */
882 
883 	last_leftpos = leftpos;
884 	last_canonicalp = canonicalp;
885       }
886       if (canonicalp == true) {
887 	debug9(canonicalsgn = +1);
888 	fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty;
889       } else {
890 	debug9(canonicalsgn = 0);
891 	fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH;
892       }
893 
894     } else {
895       debug9(canonicalsgn = +1);
896       fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty;
897     }
898 
899     debug9(printf("\tD. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
900 		  prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
901 		  prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
902 		  best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
903 		  gendistance,querydistance,canonicalsgn));
904 
905     /* Allow ties, which should favor shorter intron */
906     if (fwd_score >= best_fwd_score) {
907       if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
908 	best_fwd_consecutive = prevlink->fwd_consecutive + (querydistance + indexsize_nt);
909 	/* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */
910       } else {
911 	best_fwd_consecutive = 0;
912 	/* best_fwd_rootnlinks = 1; */
913       }
914       best_fwd_score = fwd_score;
915       best_fwd_prevpos = prev_querypos;
916       best_fwd_prevhit = prevhit;
917 #ifdef DEBUG9
918       best_fwd_tracei = ++*fwd_tracei;
919       best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
920       best_fwd_intronnrev = prevlink->fwd_intronnrev;
921       best_fwd_intronnunk = prevlink->fwd_intronnunk;
922       switch (canonicalsgn) {
923       case 1: best_fwd_intronnfwd++; break;
924       case 0: best_fwd_intronnunk++; break;
925       }
926 #endif
927       debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
928     } else {
929       debug9(printf(" => Loses to %d\n",best_fwd_score));
930     }
931   }
932 
933   prevhit = active[prev_querypos][prevhit];
934  }
935 #endif
936 
937 
938 #if 0
939 /* SIMD version */
940 _positions = _mm_set1_epi32(position - indexsize_nt);
941 _querydistance = _mm_set1_epi32(querydistance);
942 _splicing_querydist_penalty = _mm_set1_epi32(querydist_penalty+1+NINTRON_PENALTY_MISMATCH);
943 _max_scores = _mm_set1_epi32(-1000);
944 
945 prevhit = low_hit;
946 while (prevhit + 4 < high_hit) {
947   /* printf("fwd: prevposition %u, prevhit %d\n",prevposition,prevhit); */
948   _prevpositions = _mm_loadu_epi32(&(mappings[prev_querypos][prevhit]));
949   _gendistance = _mm_sub_epi32(_positions,_prevpositions);
950   if (_mm_cmpgt_epi32(_gendistance,_zeroes) == 0) {
951     break;
952   } else {
953     _diffdistance = _mm_abs_epi32(_mm_sub_epi32(_gendistance,_querydistance));
954 
955     _prev_scores = _mm_loadu_epi32(&(fwd_scores[prev_querypos][prevhit]));
956 
957     _scores_close = _mm_add_epi32(_prev_scores,_mm_set1_epi32(CONSEC_POINTS_PER_MATCH));
958     /* Right shift of 13 bits gives division by 8192 */
959     _scores_splice = _mm_add_epi32(_prev_scores,_mm_sub_epi32(_mm_srli_epi32(_diffdistance,13),_splicing_querydist_penalty));
960 
961     _scores = _mm_blendv_ps(_scores_close,_scores_splice,_mm_cmpgt_epi32(_diffdistance,_mm_set1_epi32(EQUAL_DISTANCE_NOT_SPLICING)));
962 
963     _mm_storeu_epi32(_scores);
964 
965     _max_scores = _mm_max_epi32(_max_scores,_scores);
966     prevhit += 4;
967   }
968 }
969 
970 /* Take care of serial cases */
971 
972 
973 
974 
975 /* Compute overall max and return.  Caller can find prev_querypos with
976    largest max and store in fwd_pos[curr_querypos][currhit] and max in
977    fwd_max[curr_querypos][currhit].  During traceback, recompute at
978    prev_querypos and find prevhit that gives the max.  */
979 
980   if (diffdistance < maxintronlen) {
981     if (diffdistance <= EQUAL_DISTANCE_NOT_SPLICING) {
982       debug9(canonicalsgn = 9);
983       fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH;
984 #ifdef PMAP
985       if (diffdistance % 3 != 0) {
986 	fwd_score -= NONCODON_INDEL_PENALTY;
987       }
988 #endif
989     } else if (near_end_p == false && prevlink->fwd_consecutive < EXON_DEFN) {
990       debug9(canonicalsgn = 0);
991       if (splicingp == true) {
992 	fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH;
993       } else {
994 	fwd_score = prevlink->fwd_score - (diffdistance/ONE + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH;
995       }
996 
997     } else if (splicingp == false) {
998       debug9(canonicalsgn = 0);
999       fwd_score = prevlink->fwd_score - (diffdistance/ONE + 1) - querydist_penalty;
1000 
1001     } else if (use_shifted_canonical_p == true) {
1002       leftpos = prevposition + querydistance - 1;
1003       /* printf("leftpos %d, last_leftpos %d, rightpos %d\n",leftpos,last_leftpos,rightpos); */
1004       if (leftpos == last_leftpos) {
1005 	canonicalp = last_canonicalp;
1006       } else {
1007 	debug7(printf("Calling find_shift_canonical fwd\n"));
1008 	canonicalp = find_shifted_canonical(leftpos,rightpos,querydistance-indexsize_nt,
1009 					    /* &lastGT,&lastAG, */
1010 					    Genome_prev_donor_position,Genome_prev_acceptor_position,
1011 					    chroffset,chrhigh,plusp,skip_repetitive_p);
1012 	/* And need to check for shift_canonical_rev */
1013 
1014 	last_leftpos = leftpos;
1015 	last_canonicalp = canonicalp;
1016       }
1017       if (canonicalp == true) {
1018 	debug9(canonicalsgn = +1);
1019 	fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty;
1020       } else {
1021 	debug9(canonicalsgn = 0);
1022 	fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH;
1023       }
1024 
1025     } else {
1026       debug9(canonicalsgn = +1);
1027       fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty;
1028     }
1029 
1030     debug9(printf("\tD. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d, intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
1031 		  prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
1032 		  prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,
1033 		  best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
1034 		  gendistance,querydistance,canonicalsgn));
1035 
1036     /* Allow ties, which should favor shorter intron */
1037     if (fwd_score >= best_fwd_score) {
1038       if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
1039 	best_fwd_consecutive = prevlink->fwd_consecutive + (querydistance + indexsize_nt);
1040 	/* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */
1041       } else {
1042 	best_fwd_consecutive = 0;
1043 	/* best_fwd_rootnlinks = 1; */
1044       }
1045       best_fwd_score = fwd_score;
1046       best_fwd_prevpos = prev_querypos;
1047       best_fwd_prevhit = prevhit;
1048 #ifdef DEBUG9
1049       best_fwd_tracei = ++*fwd_tracei;
1050       best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
1051       best_fwd_intronnrev = prevlink->fwd_intronnrev;
1052       best_fwd_intronnunk = prevlink->fwd_intronnunk;
1053       switch (canonicalsgn) {
1054       case 1: best_fwd_intronnfwd++; break;
1055       case 0: best_fwd_intronnunk++; break;
1056       }
1057 #endif
1058       debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
1059     } else {
1060       debug9(printf(" => Loses to %d\n",best_fwd_score));
1061     }
1062   }
1063 
1064   prevhit = active[prev_querypos][prevhit];
1065  }
1066 #endif
1067 
1068 
1069 static void
score_querypos_lookback_one(int * fwd_tracei,Link_T currlink,int curr_querypos,int currhit,unsigned int position,struct Link_T ** links,int ** fwd_scores,Chrpos_T ** mappings,int ** active,int * firstactive,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int indexsize,Intlist_T processed,bool anchoredp,bool localp,bool splicingp,bool use_canonical_p,int non_canonical_penalty)1070 score_querypos_lookback_one (int *fwd_tracei, Link_T currlink, int curr_querypos, int currhit,
1071 			     unsigned int position,
1072 			     struct Link_T **links, int **fwd_scores, Chrpos_T **mappings,
1073 			     int **active, int *firstactive,
1074 			     Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
1075 			     int indexsize, Intlist_T processed,
1076 #ifdef MOVE_TO_STAGE3
1077 			     bool anchoredp,
1078 #endif
1079 			     bool localp, bool splicingp,
1080 			     bool use_canonical_p, int non_canonical_penalty) {
1081   Link_T prevlink;
1082   struct Link_T *prev_links;
1083   Chrpos_T *prev_mappings;
1084   int *prev_active;
1085 
1086   int best_fwd_consecutive = indexsize*NT_PER_MATCH;
1087   int best_fwd_rootposition = position;
1088   /* int best_fwd_rootnlinks = 1; */
1089   int best_fwd_score = 0, fwd_score;
1090   int best_fwd_prevpos = -1, best_fwd_prevhit = -1;
1091   int best_fwd_tracei, last_tracei;
1092 #ifdef DEBUG9
1093   int best_fwd_intronnfwd = 0, best_fwd_intronnrev = 0, best_fwd_intronnunk = 0;
1094   int canonicalsgn = 0;
1095 #endif
1096   bool donep;
1097   int prev_querypos, prevhit;
1098   Chrpos_T prevposition;
1099   int gendistance;
1100   Univcoord_T prevpos, currpos;
1101   int querydistance, diffdistance, lookback, nlookback, nseen, indexsize_nt;
1102   /* int querydist_penalty; */
1103   int querydist_credit;
1104   /* bool near_end_p; */
1105   bool canonicalp;
1106 
1107 #ifdef PMAP
1108   indexsize_nt = indexsize*3;	/* Use when evaluating across genomic positions */
1109 #else
1110   indexsize_nt = indexsize;
1111 #endif
1112 #if 0
1113   indexsize_query = indexsize;	/* Use when evaluating across query positions */
1114 #endif
1115 
1116 
1117   /* Parameters for section D, assuming adjacent is false */
1118   /* adjacentp = false; */
1119   nlookback = nsufflookback;
1120   lookback = sufflookback;
1121 
1122   /* A. Evaluate adjacent position (at last one processed) */
1123   if (processed != NULL) {
1124     prev_querypos = Intlist_head(processed);
1125     prev_links = links[prev_querypos];
1126     prev_mappings = mappings[prev_querypos];
1127     prev_active = active[prev_querypos];
1128 
1129 #ifdef PMAP
1130     querydistance = (curr_querypos - prev_querypos)*3;
1131 #else
1132     querydistance = curr_querypos - prev_querypos;
1133 #endif
1134     prevhit = firstactive[prev_querypos];
1135     prevposition = position;	/* Prevents prevposition + querydistance == position */
1136     while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) + querydistance < position) {
1137       prevhit = /*active[prev_querypos]*/prev_active[prevhit];
1138     }
1139     if (prevposition + querydistance == position) {
1140       prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
1141       best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
1142       best_fwd_rootposition = prevlink->fwd_rootposition;
1143       /* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */
1144       best_fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*querydistance;
1145 
1146       best_fwd_prevpos = prev_querypos;
1147       best_fwd_prevhit = prevhit;
1148       best_fwd_tracei = prevlink->fwd_tracei;
1149 #ifdef DEBUG9
1150       best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
1151       best_fwd_intronnrev = prevlink->fwd_intronnrev;
1152       best_fwd_intronnunk = prevlink->fwd_intronnunk;
1153 #endif
1154       /* adjacentp = true; */
1155 
1156       /* Parameters for section D when adjacent is true, so we don't look so far back */
1157       nlookback = 1;
1158       lookback = sufflookback/2;
1159 
1160 
1161       debug9(printf("\tA. Adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
1162 		    prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],fwd_scores[prev_querypos][prevhit],
1163 		    best_fwd_score,best_fwd_consecutive,best_fwd_tracei,
1164 		    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk));
1165     }
1166   }
1167 
1168 
1169 #ifdef MOVE_TO_STAGE3
1170   /* Check work list */
1171   if (anchoredp && curr_querypos - indexsize_query <= querystart) {
1172     /* Allow close prevpositions that overlap with anchor */
1173     /* Can give rise to false positives, and increases amount of dynamic programming work */
1174   } else if (0 && anchoredp && curr_querypos == queryend) {
1175     /* Test first position */
1176   } else if (0) {
1177     while (processed != NULL && (prev_querypos = Intlist_head(processed)) > curr_querypos - indexsize_query) {
1178       debug9(printf("Skipping prev_querypos %d, because too close\n",prev_querypos));
1179       processed = Intlist_next(processed);
1180     }
1181   }
1182 #endif
1183 
1184   /* D. Evaluate for mismatches (all other previous querypos) */
1185   donep = false;
1186   nseen = 0;
1187   last_tracei = -1;
1188   for ( ; processed != NULL && best_fwd_consecutive < ENOUGH_CONSECUTIVE && donep == false;
1189 	processed = Intlist_next(processed), nseen++) {
1190     prev_querypos = Intlist_head(processed);
1191 
1192 #ifdef PMAP
1193     querydistance = (curr_querypos - prev_querypos)*3;
1194 #else
1195     querydistance = curr_querypos - prev_querypos;
1196 #endif
1197 
1198     if (nseen > nlookback && querydistance - indexsize_nt > lookback) {
1199       donep = true;
1200     }
1201 
1202     if ((prevhit = firstactive[prev_querypos]) != -1) {
1203       /* querydist_penalty = (querydistance - indexsize_nt)/QUERYDIST_PENALTY_FACTOR; */
1204       /* Actually a querydist_penalty */
1205       querydist_credit = -querydistance/indexsize_nt;
1206 
1207       prev_mappings = mappings[prev_querypos];
1208       prev_links = links[prev_querypos];
1209       prev_active = active[prev_querypos];
1210 
1211       /* Range 0 */
1212       while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) {
1213 	debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei));
1214 	prevhit = /*active[prev_querypos]*/prev_active[prevhit];
1215       }
1216       if (prevhit != -1) {
1217 	last_tracei = prev_links[prevhit].fwd_tracei;
1218       }
1219 
1220       /* Range 1: From Infinity to maxintronlen */
1221       if (splicingp == true) {
1222 	/* This is equivalent to diffdistance >= maxintronlen, where
1223 	   diffdistance = abs(gendistance - querydistance) and
1224 	   gendistance = (position - prevposition - indexsize_nt) */
1225 	while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) + maxintronlen + querydistance <= position) {
1226 	  /* Skip */
1227 	  /* printf("fwd: prevposition %u, prevhit %d\n",prevposition,prevhit); */
1228 	  prevhit = /*active[prev_querypos]*/prev_active[prevhit];
1229 	}
1230       }
1231 
1232       /* Range 2: From maxintronlen to (prev_querypos + EQUAL_DISTANCE_NOT_SPLICING) */
1233       /* This is equivalent to +diffdistance > EQUAL_DISTANCE_NOT_SPLICING */
1234       while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) + EQUAL_DISTANCE_NOT_SPLICING + querydistance < position) {
1235 	/* printf("fwd: prevposition %u, prevhit %d\n",prevposition,prevhit); */
1236 	prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
1237 
1238 	gendistance = position - prevposition;
1239 	assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
1240 	diffdistance = gendistance - querydistance; /* No need for abs() */
1241 
1242 	fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/;
1243 	if (splicingp == true) {
1244 	  fwd_score -= (diffdistance/TEN_THOUSAND + 1);
1245 	} else {
1246 	  fwd_score -= (diffdistance/ONE + 1);
1247 	}
1248 
1249 	if (use_canonical_p == true) {
1250 
1251 	  /* prevpos is lower genomic coordinate than currpos */
1252 	  /* need to subtract from position and prevposition to compensate for greedy matches */
1253 	  /* need to add to position and prevposition to compensate for missed matches */
1254 	  if (plusp == true) {
1255 	    prevpos = chroffset + prevposition + indexsize_nt;
1256 	    currpos = chroffset + position - querydistance + indexsize_nt;
1257 	    assert(prevpos < currpos);
1258 
1259 	    if (prevpos < GREEDY_ADVANCE || currpos < GREEDY_ADVANCE) {
1260 	      canonicalp = false;
1261 	    } else if (Genome_sense_canonicalp(/*donor_rightbound*/prevpos + MISS_BEHIND,
1262 					       /*donor_leftbound*/prevpos - GREEDY_ADVANCE,
1263 					       /*acceptor_rightbound*/currpos + MISS_BEHIND,
1264 					       /*acceptor_leftbound*/currpos - GREEDY_ADVANCE,
1265 					       chroffset) == true) {
1266 	      debug9(printf("lookback plus: sense canonical\n"));
1267 	      canonicalp = true;
1268 	    } else if (Genome_antisense_canonicalp(/*donor_rightbound*/currpos + MISS_BEHIND,
1269 						   /*donor_leftbound*/currpos - GREEDY_ADVANCE,
1270 						   /*acceptor_rightbound*/prevpos + MISS_BEHIND,
1271 						   /*acceptor_leftbound*/prevpos - GREEDY_ADVANCE,
1272 						   chroffset) == true) {
1273 	      debug9(printf("lookback plus: antisense canonical\n"));
1274 	      canonicalp = true;
1275 	    } else {
1276 	      debug9(printf("lookback plus: not canonical\n"));
1277 	      canonicalp = false;
1278 	    }
1279 
1280 	  } else {
1281 	    prevpos = chrhigh + 1 - prevposition - indexsize_nt;
1282 	    currpos = chrhigh + 1 - position + querydistance - indexsize_nt;
1283 	    assert(currpos < prevpos);
1284 
1285 	    if (currpos < MISS_BEHIND || prevpos < MISS_BEHIND) {
1286 	      canonicalp = false;
1287 	    } else if (Genome_sense_canonicalp(/*donor_rightbound*/currpos + GREEDY_ADVANCE,
1288 					       /*donor_leftbound*/currpos - MISS_BEHIND,
1289 					       /*acceptor_rightbound*/prevpos + GREEDY_ADVANCE,
1290 					       /*acceptor_leftbound*/prevpos - MISS_BEHIND,
1291 					       chroffset) == true) {
1292 	      debug9(printf("lookback minus: sense canonical\n"));
1293 	      canonicalp = true;
1294 	    } else if (Genome_antisense_canonicalp(/*donor_rightbound*/prevpos + GREEDY_ADVANCE,
1295 						   /*donor_leftbound*/prevpos - MISS_BEHIND,
1296 						   /*acceptor_rightbound*/currpos + GREEDY_ADVANCE,
1297 						   /*acceptor_leftbound*/currpos - MISS_BEHIND,
1298 						   chroffset) == true) {
1299 	      debug9(printf("lookback minus: antisense canonical\n"));
1300 	      canonicalp = true;
1301 	    } else {
1302 	      debug9(printf("lookback minus: not canonical\n"));
1303 	      canonicalp = false;
1304 	    }
1305 	  }
1306 
1307 	  if (canonicalp == true) {
1308 	    debug9(canonicalsgn = +1);
1309 	  } else {
1310 	    debug9(canonicalsgn = 0);
1311 	    fwd_score -= non_canonical_penalty;
1312 	  }
1313 
1314 	}
1315 
1316 	debug9(printf("\tD2. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
1317 		      prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
1318 		      fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
1319 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
1320 		      gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
1321 
1322 	/* Disallow ties, which should favor adjacent */
1323 	if (fwd_score > best_fwd_score) {
1324 	  if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
1325 	    best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
1326 	    /* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */
1327 	  } else {
1328 	    best_fwd_consecutive = 0;
1329 	    /* best_fwd_rootnlinks = 1; */
1330 	  }
1331 	  best_fwd_rootposition = prevlink->fwd_rootposition;
1332 	  best_fwd_score = fwd_score;
1333 	  best_fwd_prevpos = prev_querypos;
1334 	  best_fwd_prevhit = prevhit;
1335 	  best_fwd_tracei = ++*fwd_tracei;
1336 #ifdef DEBUG9
1337 	  best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
1338 	  best_fwd_intronnrev = prevlink->fwd_intronnrev;
1339 	  best_fwd_intronnunk = prevlink->fwd_intronnunk;
1340 	  switch (canonicalsgn) {
1341 	  case 1: best_fwd_intronnfwd++; break;
1342 	  case 0: best_fwd_intronnunk++; break;
1343 	  }
1344 #endif
1345 	  debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
1346 	} else {
1347 	  debug9(printf(" => Loses to %d\n",best_fwd_score));
1348 	}
1349 
1350 	prevhit = /*active[prev_querypos]*/prev_active[prevhit];
1351       }
1352 
1353       /* Scoring appears to be the same as for range 4, which is rarely called, so including in range 4 */
1354       /* Range 3: From (querypos + EQUAL_DISTANCE_NOT_SPLICING) to (querypos - EQUAL_DISTANCE_NOT_SPLICING) */
1355       /* This is equivalent to -diffdistance > EQUAL_DISTANCE_NOT_SPLICING && prevposition + indexsize_nt <= position */
1356 
1357       /* Range 4: From (prev_querypos - EQUAL_DISTANCE_NOT_SPLICING) to indexsize_nt */
1358       while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) + indexsize_nt <= position) {
1359 	/* printf("fwd: prevposition %u, prevhit %d\n",prevposition,prevhit); */
1360 	prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
1361 
1362 	gendistance = position - prevposition;
1363 	/* was abs(gendistance - querydistance) */
1364 	diffdistance = gendistance > querydistance ? (gendistance - querydistance) : (querydistance - gendistance);
1365 
1366 #ifdef BAD_GMAX
1367 	fwd_score = prevlink->fwd_score + querydist_credit - (diffdistance/ONE + 1) /*- querydist_penalty*/;
1368 #else
1369 	/* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */
1370 	/* This is how version 2013-08-14 did it */
1371 	fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH;
1372 #endif
1373 
1374 #if 0
1375 	/* Used in range 4 but not in range 3 */
1376 	if (/*near_end_p == false &&*/ prevlink->fwd_consecutive < EXON_DEFN) {
1377 	  fwd_score -= NINTRON_PENALTY_MISMATCH;
1378 	}
1379 #endif
1380 
1381 	debug9(printf("\tD4. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
1382 		      prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
1383 		      fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
1384 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
1385 		      gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
1386 
1387 	/* Disallow ties, which should favor adjacent */
1388 	if (fwd_score > best_fwd_score) {
1389 	  if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
1390 	    best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
1391 	    /* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */
1392 	  } else {
1393 	    best_fwd_consecutive = 0;
1394 	    /* best_fwd_rootnlinks = 1; */
1395 	  }
1396 	  best_fwd_rootposition = prevlink->fwd_rootposition;
1397 	  best_fwd_score = fwd_score;
1398 	  best_fwd_prevpos = prev_querypos;
1399 	  best_fwd_prevhit = prevhit;
1400 	  /* best_fwd_tracei = ++*fwd_tracei; */
1401 	  best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */
1402 #ifdef DEBUG9
1403 	  best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
1404 	  best_fwd_intronnrev = prevlink->fwd_intronnrev;
1405 	  best_fwd_intronnunk = prevlink->fwd_intronnunk;
1406 	  switch (canonicalsgn) {
1407 	  case 1: best_fwd_intronnfwd++; break;
1408 	  case 0: best_fwd_intronnunk++; break;
1409 	  }
1410 #endif
1411 	  debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
1412 	} else {
1413 	  debug9(printf(" => Loses to %d\n",best_fwd_score));
1414 	}
1415 
1416 	prevhit = /*active[prev_querypos]*/prev_active[prevhit];
1417       }
1418     }
1419   }
1420 
1421   /* Best_score needs to beat something positive to prevent a
1422      small local extension from beating a good canonical intron.
1423      If querypos is too small, don't insert an intron.  */
1424   /* linksconsecutive already assigned above */
1425   currlink->fwd_consecutive = best_fwd_consecutive;
1426   currlink->fwd_rootposition = best_fwd_rootposition;
1427   /* currlink->fwd_rootnlinks = best_fwd_rootnlinks; */
1428   currlink->fwd_pos = best_fwd_prevpos;
1429   currlink->fwd_hit = best_fwd_prevhit;
1430   if (currlink->fwd_pos >= 0) {
1431     currlink->fwd_tracei = best_fwd_tracei;
1432     fwd_scores[curr_querypos][currhit] = best_fwd_score;
1433 #ifdef MOVE_TO_STAGE3
1434   } else if (anchoredp == true) {
1435     currlink->fwd_tracei = -1;
1436     fwd_scores[curr_querypos][currhit] = -100000;
1437 #endif
1438   } else if (localp == true) {
1439     currlink->fwd_tracei = ++*fwd_tracei;
1440     fwd_scores[curr_querypos][currhit] = indexsize_nt;
1441   } else {
1442     currlink->fwd_tracei = ++*fwd_tracei;
1443     fwd_scores[curr_querypos][currhit] = best_fwd_score;
1444   }
1445 
1446 #ifdef DEBUG9
1447   currlink->fwd_intronnfwd = best_fwd_intronnfwd;
1448   currlink->fwd_intronnrev = best_fwd_intronnrev;
1449   currlink->fwd_intronnunk = best_fwd_intronnunk;
1450 #endif
1451 
1452   debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n",
1453 		currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][currhit],currlink->fwd_tracei));
1454   debug3(printf("%d %d  %d %d  1\n",querypos,hit,best_prevpos,best_prevhit));
1455 
1456   return;
1457 }
1458 
1459 
1460 
1461 
1462 static void
score_querypos_lookback_mult(int * fwd_tracei,int low_hit,int high_hit,int curr_querypos,unsigned int * positions,struct Link_T ** links,int ** fwd_scores,Chrpos_T ** mappings,int ** active,int * firstactive,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int indexsize,Intlist_T processed,bool anchoredp,bool localp,bool splicingp,bool use_canonical_p,int non_canonical_penalty)1463 score_querypos_lookback_mult (int *fwd_tracei, int low_hit, int high_hit, int curr_querypos,
1464 			      unsigned int *positions,
1465 			      struct Link_T **links, int **fwd_scores,
1466 			      Chrpos_T **mappings, int **active, int *firstactive,
1467 			      Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
1468 			      int indexsize, Intlist_T processed,
1469 #ifdef MOVE_TO_STAGE3
1470 			      bool anchoredp,
1471 #endif
1472 			      bool localp, bool splicingp,
1473 			      bool use_canonical_p, int non_canonical_penalty) {
1474   Link_T prevlink, currlink;
1475   Intlist_T last_item, p;
1476   int nhits = high_hit - low_hit, nprocessed, hiti;
1477 
1478   struct Link_T *prev_links, *adj_links;
1479   Chrpos_T *prev_mappings, *adj_mappings;
1480   int *prev_active, *adj_active;
1481 
1482   int overall_fwd_consecutive, best_fwd_consecutive;
1483   int best_fwd_rootposition;
1484   int best_fwd_score, fwd_score;
1485   int best_fwd_prevpos, best_fwd_prevhit;
1486   int best_fwd_tracei, last_tracei;
1487 #ifdef DEBUG9
1488   int best_fwd_intronnfwd, best_fwd_intronnrev, best_fwd_intronnunk;
1489   int canonicalsgn = 0;
1490 #endif
1491   int adj_querypos, adj_querydistance, prev_querypos, prevhit, adj_frontier, *frontier;
1492   Chrpos_T prevposition, position;
1493   int gendistance;
1494   Univcoord_T prevpos, currpos;
1495   int querydistance, diffdistance, indexsize_nt;
1496   int max_nseen, max_adjacent_nseen, max_nonadjacent_nseen, nseen;
1497   int querydist_credit;
1498   bool canonicalp;
1499 
1500 #ifdef PMAP
1501   indexsize_nt = indexsize*3;	/* Use when evaluating across genomic positions */
1502 #else
1503   indexsize_nt = indexsize;
1504 #endif
1505 #if 0
1506   indexsize_query = indexsize;	/* Use when evaluating across query positions */
1507 #endif
1508 
1509 
1510   /* Determine work load */
1511   /* printf("Work load (lookback): %s\n",Intlist_to_string(processed)); */
1512   last_item = processed;
1513 #ifdef MOVE_TO_STAGE3
1514   if (anchoredp && curr_querypos - indexsize_query <= querystart) {
1515     /* Allow close prevpositions that overlap with anchor */
1516     /* Can give rise to false positives, and increases amount of dynamic programming work */
1517     /* debug9(printf("No skipping because close to anchor\n")); */
1518   } else if (0 && anchoredp && curr_querypos == queryend) {
1519     /* Test first position */
1520   } else if (0) {
1521     while (processed != NULL && (/*prev_querypos =*/ Intlist_head(processed)) > curr_querypos - indexsize_query) {
1522       debug9(printf("Skipping prev_querypos %d, because too close\n",Intlist_head(processed)));
1523       processed = Intlist_next(processed);
1524     }
1525   }
1526 #endif
1527 
1528   if (last_item == NULL) {
1529     for (hiti = 0; hiti < nhits; hiti++) {
1530       currlink = &(links[curr_querypos][hiti + low_hit]);
1531 
1532       currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
1533       currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
1534       currlink->fwd_pos = /*best_fwd_prevpos =*/ -1;
1535       currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
1536 
1537 #ifdef MOVE_TO_STAGE3
1538       if (anchoredp == true) {
1539 	currlink->fwd_tracei = -1;
1540 	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
1541       } else
1542 #endif
1543       if (localp == true) {
1544 	currlink->fwd_tracei = ++*fwd_tracei;
1545 	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
1546       } else {
1547 	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
1548       }
1549     }
1550 
1551   } else if (processed == NULL) {
1552     debug9(printf("processed is NULL\n"));
1553     /* A. Evaluate adjacent position (at last one processed, if available).  Don't evaluate for mismatches (D). */
1554     adj_querypos = Intlist_head(last_item);
1555     adj_links = links[adj_querypos];
1556     adj_mappings = mappings[adj_querypos];
1557     adj_active = active[adj_querypos];
1558 
1559 #ifdef PMAP
1560     adj_querydistance = (curr_querypos - adj_querypos)*3;
1561 #else
1562     adj_querydistance = curr_querypos - adj_querypos;
1563 #endif
1564 
1565     /* Process prevhit and hiti in parallel.  Values are asscending along prevhit chain and from 0 to nhits-1. */
1566     prevhit = firstactive[adj_querypos];
1567     hiti = 0;
1568     while (prevhit != -1 && hiti < nhits) {
1569       if ((prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) + adj_querydistance < (position = positions[hiti])) {
1570 	prevhit = /*active[adj_querypos]*/adj_active[prevhit];
1571 
1572       } else if (prevposition + adj_querydistance > position) {
1573 	currlink = &(links[curr_querypos][hiti + low_hit]);
1574 
1575 	currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
1576 	currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
1577 	currlink->fwd_pos = /*best_fwd_prevpos =*/ -1;
1578 	currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
1579 
1580 #ifdef MOVE_TO_STAGE3
1581 	if (anchoredp == true) {
1582 	  currlink->fwd_tracei = -1;
1583 	  fwd_scores[curr_querypos][hiti + low_hit] = -100000;
1584 	} else
1585 #endif
1586         if (localp == true) {
1587 	  currlink->fwd_tracei = ++*fwd_tracei;
1588 	  fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
1589 	} else {
1590 	  fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
1591 	}
1592 
1593 	hiti++;
1594 
1595       } else {
1596 	/* Adjacent position found for hiti */
1597 	currlink = &(links[curr_querypos][hiti + low_hit]);
1598 	prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]);
1599 
1600 	currlink->fwd_consecutive = /*best_fwd_consecutive =*/ prevlink->fwd_consecutive + adj_querydistance;
1601 	currlink->fwd_rootposition = /*best_fwd_rootposition =*/ prevlink->fwd_rootposition;
1602 	currlink->fwd_pos = /*best_fwd_prevpos =*/ adj_querypos;
1603 	currlink->fwd_hit = /*best_fwd_prevhit =*/ prevhit;
1604 	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance;
1605 
1606 #ifdef DEBUG9
1607 	printf("\tA(1). For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
1608 	       hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit],
1609 	       fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei,
1610 	       /*best_fwd_intronnfwd*/prevlink->fwd_intronnfwd,
1611 	       /*best_fwd_intronnrev*/prevlink->fwd_intronnrev,
1612 	       /*best_fwd_intronnunk*/prevlink->fwd_intronnunk);
1613 #endif
1614 
1615 	prevhit = /*active[adj_querypos]*/adj_active[prevhit];
1616 	hiti++;
1617       }
1618     }
1619 
1620     while (hiti < nhits) {
1621       currlink = &(links[curr_querypos][hiti + low_hit]);
1622 
1623       currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
1624       currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
1625       currlink->fwd_pos = /*best_fwd_prevpos =*/ -1;
1626       currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
1627 
1628 #ifdef MOVE_TO_STAGE3
1629       if (anchoredp == true) {
1630 	currlink->fwd_tracei = -1;
1631 	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
1632       } else
1633 #endif
1634       if (localp == true) {
1635 	currlink->fwd_tracei = ++*fwd_tracei;
1636 	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
1637       } else {
1638 	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
1639       }
1640 
1641       hiti++;
1642     }
1643 
1644   } else {
1645     adj_querypos = Intlist_head(last_item);
1646     adj_links = links[adj_querypos];
1647     adj_mappings = mappings[adj_querypos];
1648     adj_active = active[adj_querypos];
1649 
1650 #ifdef PMAP
1651     adj_querydistance = (curr_querypos - adj_querypos)*3;
1652 #else
1653     adj_querydistance = curr_querypos - adj_querypos;
1654 #endif
1655     nprocessed = Intlist_length(processed);
1656     frontier = (int *) MALLOCA(nprocessed * sizeof(int));
1657 
1658     nseen = 0;
1659     for (p = processed; p != NULL; p = Intlist_next(p)) {
1660       prev_querypos = Intlist_head(p);
1661 
1662       querydistance = curr_querypos - prev_querypos;
1663       if (nseen <= /*nlookback*/1 || querydistance - indexsize_nt <= /*lookback*/sufflookback/2) {
1664 	max_adjacent_nseen = nseen;
1665       }
1666       if (nseen <= /*nlookback*/nsufflookback || querydistance - indexsize_nt <= /*lookback*/sufflookback) {
1667 	max_nonadjacent_nseen = nseen;
1668       }
1669 
1670       frontier[nseen++] = firstactive[prev_querypos];
1671     }
1672 
1673 
1674     /* Look for overall_fwd_consecutive to see whether we can be greedy */
1675     overall_fwd_consecutive = 0;
1676     adj_frontier = firstactive[adj_querypos];
1677     for (hiti = 0; hiti < nhits; hiti++) {
1678       position = positions[hiti];
1679 
1680       /* A. Evaluate adjacent positions (at last one processed) */
1681       prevhit = adj_frontier;	/* Get information from last hiti */
1682       prevposition = position;	/* Prevents prevposition + adj_querydistance == position */
1683       while (prevhit != -1 && (prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) + adj_querydistance < position) {
1684 	prevhit = /*active[adj_querypos]*/adj_active[prevhit];
1685       }
1686       adj_frontier = prevhit;	/* Save information for next hiti */
1687 
1688       if (prevposition + adj_querydistance == position) {
1689 	/* Adjacent found */
1690 	prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]);
1691 	if (prevlink->fwd_consecutive + adj_querydistance > overall_fwd_consecutive) {
1692 	  overall_fwd_consecutive = prevlink->fwd_consecutive + adj_querydistance;
1693 	}
1694       }
1695     }
1696     debug(printf("Overall fwd consecutive is %d\n",overall_fwd_consecutive));
1697 
1698 
1699     /* Now process */
1700     adj_frontier = firstactive[adj_querypos];
1701     for (hiti = 0; hiti < nhits; hiti++) {
1702       position = positions[hiti];
1703 
1704       /* A. Evaluate adjacent positions (at last one processed) */
1705       prevhit = adj_frontier;	/* Get information from last hiti */
1706       prevposition = position;	/* Prevents prevposition + adj_querydistance == position */
1707       while (prevhit != -1 && (prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) + adj_querydistance < position) {
1708 	prevhit = /*active[adj_querypos]*/adj_active[prevhit];
1709       }
1710       adj_frontier = prevhit;	/* Save information for next hiti */
1711 
1712       if (prevposition + adj_querydistance == position) {
1713 	/* Adjacent found */
1714 	prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]);
1715 
1716 	best_fwd_consecutive = prevlink->fwd_consecutive + adj_querydistance;
1717 	best_fwd_rootposition = prevlink->fwd_rootposition;
1718 	best_fwd_prevpos = adj_querypos;
1719 	best_fwd_prevhit = prevhit;
1720 	best_fwd_score = fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance;
1721 	max_nseen = max_adjacent_nseen;	/* Look not so far back */
1722 	best_fwd_tracei = prevlink->fwd_tracei;
1723 
1724 #ifdef DEBUG9
1725 	best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
1726 	best_fwd_intronnrev = prevlink->fwd_intronnrev;
1727 	best_fwd_intronnunk = prevlink->fwd_intronnunk;
1728 #endif
1729 	debug9(printf("\tA(2). For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
1730 		      hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit],
1731 		      best_fwd_score,best_fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei,
1732 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk));
1733 
1734       } else {
1735 	/* Adjacent not found */
1736 	best_fwd_consecutive = indexsize*NT_PER_MATCH;
1737 	best_fwd_rootposition = position;
1738 	best_fwd_prevpos = -1;
1739 	best_fwd_prevhit = -1;
1740 	best_fwd_score = 0;
1741 	max_nseen = max_nonadjacent_nseen; /* Look farther back */
1742 	best_fwd_tracei = -1;
1743 
1744 #ifdef DEBUG9
1745 	best_fwd_intronnfwd = 0;
1746 	best_fwd_intronnrev = 0;
1747 	best_fwd_intronnunk = 0;
1748 #endif
1749       }
1750 
1751       if (overall_fwd_consecutive < GREEDY_NCONSECUTIVE) {
1752 	/* D. Evaluate for mismatches (all other previous querypos) */
1753 	nseen = 0;
1754 	last_tracei = -1;
1755 	for (p = processed; p != NULL && best_fwd_consecutive < ENOUGH_CONSECUTIVE && nseen <= max_nseen;
1756 	     p = Intlist_next(p), nseen++) {
1757 
1758 	  /* Making this check helps with efficiency */
1759 	  if ((prevhit = frontier[nseen]) != -1) { /* Retrieve starting point from last hiti */
1760 	    prev_querypos = Intlist_head(p);
1761 #ifdef PMAP
1762 	    querydistance = (curr_querypos - prev_querypos)*3;
1763 #else
1764 	    querydistance = curr_querypos - prev_querypos;
1765 #endif
1766 	    /* Actually a querydist_penalty */
1767 	    querydist_credit = -querydistance/indexsize_nt;
1768 
1769 	    prev_mappings = mappings[prev_querypos];
1770 	    prev_links = links[prev_querypos];
1771 	    prev_active = active[prev_querypos];
1772 
1773 	    /* Range 0 */
1774 	    while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) {
1775 	      debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei));
1776 	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
1777 	    }
1778 	    if (prevhit != -1) {
1779 	      last_tracei = prev_links[prevhit].fwd_tracei;
1780 	    }
1781 
1782 	    /* Range 1: From Infinity to maxintronlen.  To be skipped.
1783 	       This is equivalent to diffdistance >= maxintronlen, where
1784 	       diffdistance = abs(gendistance - querydistance) and
1785 	       gendistance = (position - prevposition - indexsize_nt) */
1786 	    while (prevhit != -1 && (/*prevposition =*/ /*mappings[prev_querypos]*/prev_mappings[prevhit]) + maxintronlen + querydistance <= position) {
1787 	      /* Accept within range 1 (ignore) */
1788 	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
1789 	    }
1790 	    frontier[nseen] = prevhit;	/* Store as starting point for next hiti */
1791 
1792 	    /* Range 2: From maxintronlen to (prev_querypos + EQUAL_DISTANCE_NOT_SPLICING) */
1793 	    /* This is equivalent to +diffdistance > EQUAL_DISTANCE_NOT_SPLICING */
1794 	    while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) + EQUAL_DISTANCE_NOT_SPLICING + querydistance < position) {
1795 	      prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
1796 
1797 	      gendistance = position - prevposition;
1798 	      assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
1799 	      diffdistance = gendistance - querydistance; /* No need for abs() */
1800 
1801 	      fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/;
1802 	      if (splicingp == true) {
1803 		fwd_score -= (diffdistance/TEN_THOUSAND + 1);
1804 	      } else {
1805 		fwd_score -= (diffdistance/ONE + 1);
1806 	      }
1807 
1808 	      if (use_canonical_p == true) {
1809 		/* prevpos is lower genomic coordinate than currpos */
1810 		/* need to subtract from position and prevposition to compensate for greedy matches */
1811 		/* need to add to position and prevposition to compensate for missed matches */
1812 		if (plusp == true) {
1813 		  prevpos = chroffset + prevposition + indexsize_nt;
1814 		  currpos = chroffset + position - querydistance + indexsize_nt;
1815 		  if (prevpos < GREEDY_ADVANCE || currpos < GREEDY_ADVANCE) {
1816 		    canonicalp = false;
1817 		  } else if (Genome_sense_canonicalp(/*donor_rightbound*/prevpos + MISS_BEHIND,
1818 						     /*donor_leftbound*/prevpos - GREEDY_ADVANCE,
1819 						     /*acceptor_rightbound*/currpos + MISS_BEHIND,
1820 						     /*acceptor_leftbound*/currpos - GREEDY_ADVANCE,
1821 						     chroffset) == true) {
1822 		    debug9(printf("lookback plus: sense canonical\n"));
1823 		    canonicalp = true;
1824 		  } else if (Genome_antisense_canonicalp(/*donor_rightbound*/currpos + MISS_BEHIND,
1825 							 /*donor_leftbound*/currpos - GREEDY_ADVANCE,
1826 							 /*acceptor_rightbound*/prevpos + MISS_BEHIND,
1827 							 /*acceptor_leftbound*/prevpos - GREEDY_ADVANCE,
1828 							 chroffset) == true) {
1829 		    debug9(printf("lookback plus: antisense canonical\n"));
1830 		    canonicalp = true;
1831 		  } else {
1832 		    debug9(printf("lookback plus: not canonical\n"));
1833 		    canonicalp = false;
1834 		  }
1835 
1836 		} else {
1837 		  prevpos = chrhigh + 1 - prevposition - indexsize_nt;
1838 		  currpos = chrhigh + 1 - position + querydistance - indexsize_nt;
1839 		  if (currpos < MISS_BEHIND || prevpos < MISS_BEHIND) {
1840 		    canonicalp = false;
1841 		  } else if (Genome_sense_canonicalp(/*donor_rightbound*/currpos + GREEDY_ADVANCE,
1842 						     /*donor_leftbound*/currpos - MISS_BEHIND,
1843 						     /*acceptor_rightbound*/prevpos + GREEDY_ADVANCE,
1844 						     /*acceptor_leftbound*/prevpos - MISS_BEHIND,
1845 						     chroffset) == true) {
1846 		    debug9(printf("lookback minus: sense canonical\n"));
1847 		    canonicalp = true;
1848 		  } else if (Genome_antisense_canonicalp(/*donor_rightbound*/prevpos + GREEDY_ADVANCE,
1849 							 /*donor_leftbound*/prevpos - MISS_BEHIND,
1850 							 /*acceptor_rightbound*/currpos + GREEDY_ADVANCE,
1851 							 /*acceptor_leftbound*/currpos - MISS_BEHIND,
1852 							 chroffset) == true) {
1853 		    debug9(printf("lookback minus: antisense canonical\n"));
1854 		    canonicalp = true;
1855 		  } else {
1856 		    debug9(printf("lookback minus: not canonical\n"));
1857 		    canonicalp = false;
1858 		  }
1859 		}
1860 
1861 		if (canonicalp == true) {
1862 		  debug9(canonicalsgn = +1);
1863 		} else {
1864 		  debug9(canonicalsgn = 0);
1865 		  fwd_score -= non_canonical_penalty;
1866 		}
1867 	      }
1868 
1869 	      debug9(printf("\tD2, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
1870 			    hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
1871 			    fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
1872 			    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
1873 			    gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
1874 
1875 	      /* Disallow ties, which should favor adjacent */
1876 	      if (fwd_score > best_fwd_score) {
1877 		if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
1878 		  best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
1879 		} else {
1880 		  best_fwd_consecutive = 0;
1881 		}
1882 		best_fwd_rootposition = prevlink->fwd_rootposition;
1883 		best_fwd_score = fwd_score;
1884 		best_fwd_prevpos = prev_querypos;
1885 		best_fwd_prevhit = prevhit;
1886 		best_fwd_tracei = ++*fwd_tracei;
1887 #ifdef DEBUG9
1888 		best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
1889 		best_fwd_intronnrev = prevlink->fwd_intronnrev;
1890 		best_fwd_intronnunk = prevlink->fwd_intronnunk;
1891 		switch (canonicalsgn) {
1892 		case 1: best_fwd_intronnfwd++; break;
1893 		case 0: best_fwd_intronnunk++; break;
1894 		}
1895 #endif
1896 		debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
1897 	      } else {
1898 		debug9(printf(" => Loses to %d\n",best_fwd_score));
1899 	      }
1900 	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
1901 	    }
1902 
1903 
1904 	    /* Scoring appears to be the same as for range 4, which is rarely called, so including in range 4 */
1905 	    /* Range 3: From (querypos + EQUAL_DISTANCE_NOT_SPLICING) to (querypos - EQUAL_DISTANCE_NOT_SPLICING) */
1906 	    /* This is equivalent to -diffdistance > EQUAL_DISTANCE_NOT_SPLICING && prevposition + indexsize_nt <= position */
1907 
1908 
1909 	    /* Range 4: From (prev_querypos - EQUAL_DISTANCE_NOT_SPLICING) to indexsize_nt */
1910 	    while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) + indexsize_nt <= position) {
1911 	      prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
1912 
1913 	      gendistance = position - prevposition;
1914 	      /* was abs(gendistance - querydistance) */
1915 	      diffdistance = gendistance > querydistance ? (gendistance - querydistance) : (querydistance - gendistance);
1916 
1917 #ifdef BAD_GMAX
1918 	      fwd_score = prevlink->fwd_score + querydist_credit - (diffdistance/ONE + 1) /*- querydist_penalty*/;
1919 #else
1920 	      /* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */
1921 	      /* This is how version 2013-08-14 did it */
1922 	      fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH;
1923 #endif
1924 
1925 	      debug9(printf("\tD4, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
1926 			    hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
1927 			    fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
1928 			    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
1929 			    gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
1930 
1931 	      /* Disallow ties, which should favor adjacent */
1932 	      if (fwd_score > best_fwd_score) {
1933 		if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
1934 		  best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
1935 		} else {
1936 		  best_fwd_consecutive = 0;
1937 		}
1938 		best_fwd_rootposition = prevlink->fwd_rootposition;
1939 		best_fwd_score = fwd_score;
1940 		best_fwd_prevpos = prev_querypos;
1941 		best_fwd_prevhit = prevhit;
1942 		/* best_fwd_tracei = ++*fwd_tracei; */
1943 		best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */
1944 #ifdef DEBUG9
1945 		best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
1946 		best_fwd_intronnrev = prevlink->fwd_intronnrev;
1947 		best_fwd_intronnunk = prevlink->fwd_intronnunk;
1948 		switch (canonicalsgn) {
1949 		case 1: best_fwd_intronnfwd++; break;
1950 		case 0: best_fwd_intronnunk++; break;
1951 		}
1952 #endif
1953 		debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
1954 	      } else {
1955 		debug9(printf(" => Loses to %d\n",best_fwd_score));
1956 	      }
1957 
1958 	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
1959 	    }
1960 	  }
1961 	}
1962       }
1963 
1964       /* Best_score needs to beat something positive to prevent a
1965 	 small local extension from beating a good canonical intron.
1966 	 If querypos is too small, don't insert an intron.  */
1967       /* linksconsecutive already assigned above */
1968       currlink = &(links[curr_querypos][hiti + low_hit]);
1969       currlink->fwd_consecutive = best_fwd_consecutive;
1970       currlink->fwd_rootposition = best_fwd_rootposition;
1971       currlink->fwd_pos = best_fwd_prevpos;
1972       currlink->fwd_hit = best_fwd_prevhit;
1973       if (currlink->fwd_pos >= 0) {
1974 	currlink->fwd_tracei = best_fwd_tracei;
1975 	fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score;
1976 #ifdef MOVE_TO_STAGE3
1977       } else if (anchoredp == true) {
1978 	currlink->fwd_tracei = -1;
1979 	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
1980 #endif
1981       } else if (localp == true) {
1982 	currlink->fwd_tracei = ++*fwd_tracei;
1983 	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
1984       } else {
1985 	currlink->fwd_tracei = ++*fwd_tracei;
1986 	fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score;
1987       }
1988 
1989 #ifdef DEBUG9
1990       currlink->fwd_intronnfwd = best_fwd_intronnfwd;
1991       currlink->fwd_intronnrev = best_fwd_intronnrev;
1992       currlink->fwd_intronnunk = best_fwd_intronnunk;
1993 #endif
1994 
1995       debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n",
1996 		    currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_tracei));
1997       debug3(printf("%d %d  %d %d  1\n",querypos,hit,best_prevpos,best_prevhit));
1998     }
1999 
2000     FREEA(frontier);
2001   }
2002 
2003   return;
2004 }
2005 
2006 
2007 static void
score_querypos_lookforward_one(int * fwd_tracei,Link_T currlink,int curr_querypos,int currhit,unsigned int position,struct Link_T ** links,int ** fwd_scores,Chrpos_T ** mappings,int ** active,int * firstactive,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int indexsize,Intlist_T processed,bool anchoredp,bool localp,bool splicingp,bool use_canonical_p,int non_canonical_penalty)2008 score_querypos_lookforward_one (int *fwd_tracei, Link_T currlink, int curr_querypos, int currhit,
2009 				unsigned int position,
2010 				struct Link_T **links, int **fwd_scores,
2011 				Chrpos_T **mappings, int **active, int *firstactive,
2012 				Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
2013 				int indexsize, Intlist_T processed,
2014 #ifdef MOVE_TO_STAGE3
2015 				bool anchoredp,
2016 #endif
2017 				bool localp, bool splicingp,
2018 				bool use_canonical_p, int non_canonical_penalty) {
2019   Link_T prevlink;
2020   struct Link_T *prev_links;
2021   Chrpos_T *prev_mappings;
2022   int *prev_active;
2023 
2024   int best_fwd_consecutive = indexsize*NT_PER_MATCH;
2025   int best_fwd_rootposition = position;
2026   int best_fwd_score = 0, fwd_score;
2027   int best_fwd_prevpos = -1, best_fwd_prevhit = -1;
2028   int best_fwd_tracei, last_tracei;
2029 #ifdef DEBUG9
2030   int best_fwd_intronnfwd = 0, best_fwd_intronnrev = 0, best_fwd_intronnunk = 0;
2031   int canonicalsgn = 0;
2032 #endif
2033   bool donep;
2034   int prev_querypos, prevhit;
2035   Chrpos_T prevposition;
2036   int gendistance;
2037   Univcoord_T prevpos, currpos;
2038   int querydistance, diffdistance, lookback, nlookback, nseen, indexsize_nt;
2039   /* int querydist_penalty; */
2040   int querydist_credit;
2041   /* bool near_end_p; */
2042   bool canonicalp;
2043 
2044 #ifdef PMAP
2045   indexsize_nt = indexsize*3;
2046 #else
2047   indexsize_nt = indexsize;
2048 #endif
2049   /* indexsize_query = indexsize; */	/* Use when evaluating across query positions */
2050 
2051 
2052   /* Parameters for section D, assuming adjacent is false */
2053   /* adjacentp = false; */
2054   nlookback = nsufflookback;
2055   lookback = sufflookback;
2056 
2057   /* A. Evaluate adjacent position (at last one processed) */
2058   if (processed != NULL) {
2059     prev_querypos = Intlist_head(processed);
2060     prev_mappings = mappings[prev_querypos];
2061     prev_links = links[prev_querypos];
2062     prev_active = active[prev_querypos];
2063 
2064 #ifdef PMAP
2065     querydistance = (prev_querypos - curr_querypos)*3;
2066 #else
2067     querydistance = prev_querypos - curr_querypos;
2068 #endif
2069     prevhit = firstactive[prev_querypos];
2070     prevposition = position;	/* Prevents prevposition == position + querydistance */
2071     while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) > position + querydistance) {
2072       prevhit = /*active[prev_querypos]*/prev_active[prevhit];
2073     }
2074     if (prevposition == position + querydistance) {
2075       prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
2076       best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
2077       /* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */
2078       best_fwd_rootposition = prevlink->fwd_rootposition;
2079       best_fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*querydistance;
2080 
2081       best_fwd_prevpos = prev_querypos;
2082       best_fwd_prevhit = prevhit;
2083       best_fwd_tracei = prevlink->fwd_tracei;
2084 #ifdef DEBUG9
2085       best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
2086       best_fwd_intronnrev = prevlink->fwd_intronnrev;
2087       best_fwd_intronnunk = prevlink->fwd_intronnunk;
2088 #endif
2089       /* adjacentp = true; */
2090       /* Parameters for section D when adjacent is true */
2091       nlookback = 1;
2092       lookback = sufflookback/2;
2093 
2094       debug9(printf("\tA. Adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
2095 		    prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],fwd_scores[prev_querypos][prevhit],
2096 		    best_fwd_score,best_fwd_consecutive,best_fwd_tracei,
2097 		    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk));
2098     }
2099   }
2100 
2101   /* Check work list */
2102 #ifdef MOVE_TO_STAGE3
2103   if (anchoredp && curr_querypos + indexsize_query >= queryend) {
2104     /* Allow close prevpositions that overlap with anchor */
2105     /* Can give rise to false positives, and increases amount of dynamic programming work */
2106     debug9(printf("No skipping because close to anchor\n"));
2107   } else if (0 && anchoredp && curr_querypos == querystart) {
2108     /* Test end position */
2109   } else if (0) {
2110     while (processed != NULL && (prev_querypos = Intlist_head(processed)) < curr_querypos + indexsize_query) {
2111       debug9(printf("Skipping prev_querypos %d, because too close\n",prev_querypos));
2112       processed = Intlist_next(processed);
2113     }
2114   }
2115 #endif
2116 
2117   /* D. Evaluate for mismatches (all other previous querypos) */
2118   donep = false;
2119   nseen = 0;
2120   last_tracei = -1;
2121   for ( ; processed != NULL && best_fwd_consecutive < ENOUGH_CONSECUTIVE && donep == false;
2122 	processed = Intlist_next(processed), nseen++) {
2123     prev_querypos = Intlist_head(processed);
2124 
2125 #ifdef PMAP
2126     querydistance = (prev_querypos - curr_querypos)*3;
2127 #else
2128     querydistance = prev_querypos - curr_querypos;
2129 #endif
2130 
2131     if (nseen > nlookback && querydistance - indexsize_nt > lookback) {
2132       donep = true;
2133     }
2134 
2135     if ((prevhit = firstactive[prev_querypos]) != -1) {
2136       /* querydist_penalty = (querydistance - indexsize_nt)/QUERYDIST_PENALTY_FACTOR; */
2137       /* Actually a querydist_penalty */
2138       querydist_credit = -querydistance/indexsize_nt;
2139 
2140       prev_mappings = mappings[prev_querypos];
2141       prev_links = links[prev_querypos];
2142       prev_active = active[prev_querypos];
2143 
2144       /* Range 0 */
2145       while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) {
2146 	debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei));
2147 	prevhit = /*active[prev_querypos]*/prev_active[prevhit];
2148       }
2149       if (prevhit != -1) {
2150 	last_tracei = prev_links[prevhit].fwd_tracei;
2151       }
2152 
2153       /* Range 1: From Infinity to maxintronlen */
2154       if (splicingp == true) {
2155 	/* This is equivalent to diffdistance >= maxintronlen, where
2156 	   diffdistance = abs(gendistance - querydistance) and
2157 	   gendistance = (position - prevposition - indexsize_nt) */
2158 	while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) >= position + maxintronlen + querydistance) {
2159 	  /* Skip */
2160 	  /* printf("fwd: prevposition %u, prevhit %d\n",prevposition,prevhit); */
2161 	  prevhit = /*active[prev_querypos]*/prev_active[prevhit];
2162 	}
2163       }
2164 
2165       /* Range 2: From maxintronlen to (prev_querypos + EQUAL_DISTANCE_NOT_SPLICING) */
2166       /* This is equivalent to +diffdistance > EQUAL_DISTANCE_NOT_SPLICING */
2167       while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) > position + EQUAL_DISTANCE_NOT_SPLICING + querydistance) {
2168 	/* printf("fwd: prevposition %u, prevhit %d\n",prevposition,prevhit); */
2169 	prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
2170 
2171 	gendistance = prevposition - position;
2172 	assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
2173 	diffdistance = gendistance - querydistance; /* No need for abs() */
2174 
2175 	fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/;
2176 	if (splicingp == true) {
2177 	  fwd_score -= (diffdistance/TEN_THOUSAND + 1);
2178 	} else {
2179 	  fwd_score -= (diffdistance/ONE + 1);
2180 	}
2181 
2182 	if (use_canonical_p == true) {
2183 
2184 	  /* prevpos is higher genomic coordinate than currpos */
2185 	  /* need to add to position and prevposition to compensate for greedy matches */
2186 	  /* need to subtract from position and prevposition to compensate for missed matches */
2187 	  if (plusp == true) {
2188 	    prevpos = chroffset + prevposition;
2189 	    currpos = chroffset + position + querydistance;
2190 	    if (currpos < MISS_BEHIND || prevpos < MISS_BEHIND) {
2191 	      canonicalp = false;
2192 	    } else if (Genome_sense_canonicalp(/*donor_rightbound*/currpos + GREEDY_ADVANCE,
2193 					       /*donor_leftbound*/currpos - MISS_BEHIND,
2194 					       /*acceptor_rightbound*/prevpos + GREEDY_ADVANCE,
2195 					       /*acceptor_leftbound*/prevpos - MISS_BEHIND,
2196 					       chroffset) == true) {
2197 	      debug9(printf("lookforward plus: sense canonical\n"));
2198 	      canonicalp = true;
2199 	    } else if (Genome_antisense_canonicalp(/*donor_rightbound*/prevpos + GREEDY_ADVANCE,
2200 						   /*donor_leftbound*/prevpos - MISS_BEHIND,
2201 						   /*acceptor_rightbound*/currpos + GREEDY_ADVANCE,
2202 						   /*acceptor_leftbound*/currpos - MISS_BEHIND,
2203 						   chroffset) == true) {
2204 	      debug9(printf("lookforward plus: antisense canonical\n"));
2205 	      canonicalp = true;
2206 	    } else {
2207 	      debug9(printf("lookforward plus: not canonical\n"));
2208 	      canonicalp = false;
2209 	    }
2210 
2211 	  } else {
2212 	    prevpos = chrhigh + 1 - prevposition;
2213 	    currpos = chrhigh + 1 - position - querydistance;
2214 	    if (prevpos < GREEDY_ADVANCE || currpos < GREEDY_ADVANCE) {
2215 	      canonicalp = false;
2216 	    } else if (Genome_sense_canonicalp(/*donor_rightbound*/prevpos + MISS_BEHIND,
2217 					       /*donor_leftbound*/prevpos - GREEDY_ADVANCE,
2218 					       /*acceptor_rightbound*/currpos + MISS_BEHIND,
2219 					       /*acceptor_leftbound*/currpos - GREEDY_ADVANCE,
2220 					       chroffset) == true) {
2221 	      debug9(printf("lookforward minus: sense canonical\n"));
2222 	      canonicalp = true;
2223 	    } else if (Genome_antisense_canonicalp(/*donor_rightbound*/currpos + MISS_BEHIND,
2224 						   /*donor_leftbound*/currpos - GREEDY_ADVANCE,
2225 						   /*acceptor_rightbound*/prevpos + MISS_BEHIND,
2226 						   /*acceptor_leftbound*/prevpos - GREEDY_ADVANCE,
2227 						   chroffset) == true) {
2228 	      debug9(printf("lookforward minus: antisense canonical\n"));
2229 	      canonicalp = true;
2230 	    } else {
2231 	      debug9(printf("lookforward minus: not canonical\n"));
2232 	      canonicalp = false;
2233 	    }
2234 	  }
2235 
2236 	  if (canonicalp == true) {
2237 	    debug9(canonicalsgn = +1);
2238 	  } else {
2239 	    debug9(canonicalsgn = 0);
2240 	    fwd_score -= non_canonical_penalty;
2241 	  }
2242 	}
2243 
2244 	debug9(printf("\tD2. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
2245 		      prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
2246 		      fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
2247 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
2248 		      gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
2249 
2250 	/* Disallow ties, which should favor adjacent */
2251 	if (fwd_score > best_fwd_score) {
2252 	  if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
2253 	    best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
2254 	    /* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */
2255 	  } else {
2256 	    best_fwd_consecutive = 0;
2257 	    /* best_fwd_rootnlinks = 1; */
2258 	  }
2259 	  best_fwd_rootposition = prevlink->fwd_rootposition;
2260 	  best_fwd_score = fwd_score;
2261 	  best_fwd_prevpos = prev_querypos;
2262 	  best_fwd_prevhit = prevhit;
2263 	  best_fwd_tracei = ++*fwd_tracei;
2264 #ifdef DEBUG9
2265 	  best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
2266 	  best_fwd_intronnrev = prevlink->fwd_intronnrev;
2267 	  best_fwd_intronnunk = prevlink->fwd_intronnunk;
2268 	  switch (canonicalsgn) {
2269 	  case 1: best_fwd_intronnfwd++; break;
2270 	  case 0: best_fwd_intronnunk++; break;
2271 	  }
2272 #endif
2273 	  debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
2274 	} else {
2275 	  debug9(printf(" => Loses to %d\n",best_fwd_score));
2276 	}
2277 
2278 	prevhit = /*active[prev_querypos]*/prev_active[prevhit];
2279       }
2280 
2281       /* Scoring appears to be the same as for range 4, which is rarely called, so including in range 4 */
2282       /* Range 3: From (querypos + EQUAL_DISTANCE_NOT_SPLICING) to (querypos - EQUAL_DISTANCE_NOT_SPLICING) */
2283       /* This is equivalent to -diffdistance > EQUAL_DISTANCE_NOT_SPLICING && prevposition + indexsize_nt <= position */
2284 
2285       /* Range 4: From (prev_querypos - EQUAL_DISTANCE_NOT_SPLICING) to indexsize_nt */
2286       while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) >= position + indexsize_nt) {
2287 	/* printf("fwd: prevposition %u, prevhit %d\n",prevposition,prevhit); */
2288 	prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
2289 
2290 	gendistance = prevposition - position;
2291 	/* was abs(gendistance - querydistance) */
2292 	diffdistance = gendistance > querydistance ? (gendistance - querydistance) : (querydistance - gendistance);
2293 
2294 #ifdef BAD_GMAX
2295 	fwd_score = prevlink->fwd_score + querydist_credit - (diffdistance/ONE + 1) /*- querydist_penalty*/;
2296 #else
2297 	/* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */
2298 	/* This is how version 2013-08-14 did it */
2299 	fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH;
2300 #endif
2301 #if 0
2302 	if (/*near_end_p == false &&*/ prevlink->fwd_consecutive < EXON_DEFN) {
2303 	  fwd_score -= NINTRON_PENALTY_MISMATCH;
2304 	}
2305 #endif
2306 
2307 	debug9(printf("\tD4. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
2308 		      prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
2309 		      fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
2310 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
2311 		      gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
2312 
2313 	/* Disallow ties, which should favor adjacent */
2314 	if (fwd_score > best_fwd_score) {
2315 	  if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
2316 	    best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
2317 	    /* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */
2318 	  } else {
2319 	    best_fwd_consecutive = 0;
2320 	    /* best_fwd_rootnlinks = 1; */
2321 	  }
2322 	  best_fwd_rootposition = prevlink->fwd_rootposition;
2323 	  best_fwd_score = fwd_score;
2324 	  best_fwd_prevpos = prev_querypos;
2325 	  best_fwd_prevhit = prevhit;
2326 	  /* best_fwd_tracei = ++*fwd_tracei; */
2327 	  best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */
2328 #ifdef DEBUG9
2329 	  best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
2330 	  best_fwd_intronnrev = prevlink->fwd_intronnrev;
2331 	  best_fwd_intronnunk = prevlink->fwd_intronnunk;
2332 	  switch (canonicalsgn) {
2333 	  case 1: best_fwd_intronnfwd++; break;
2334 	  case 0: best_fwd_intronnunk++; break;
2335 	  }
2336 #endif
2337 	  debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
2338 	} else {
2339 	  debug9(printf(" => Loses to %d\n",best_fwd_score));
2340 	}
2341 
2342 	prevhit = /*active[prev_querypos]*/prev_active[prevhit];
2343       }
2344     }
2345   }
2346 
2347   /* Best_score needs to beat something positive to prevent a
2348      small local extension from beating a good canonical intron.
2349      If querypos is too small, don't insert an intron.  */
2350   /* linksconsecutive already assigned above */
2351   currlink->fwd_consecutive = best_fwd_consecutive;
2352   currlink->fwd_rootposition = best_fwd_rootposition;
2353   /* currlink->fwd_rootnlinks = best_fwd_rootnlinks; */
2354   currlink->fwd_pos = best_fwd_prevpos;
2355   currlink->fwd_hit = best_fwd_prevhit;
2356   if (currlink->fwd_pos >= 0) {
2357     currlink->fwd_tracei = best_fwd_tracei;
2358     fwd_scores[curr_querypos][currhit] = best_fwd_score;
2359 #ifdef MOVE_TO_STAGE3
2360   } else if (anchoredp == true) {
2361     currlink->fwd_tracei = -1;
2362     fwd_scores[curr_querypos][currhit] = -100000;
2363 #endif
2364   } else if (localp == true) {
2365     currlink->fwd_tracei = ++*fwd_tracei;
2366     fwd_scores[curr_querypos][currhit] = indexsize_nt;
2367   } else {
2368     currlink->fwd_tracei = ++*fwd_tracei;
2369     fwd_scores[curr_querypos][currhit] = best_fwd_score;
2370   }
2371 
2372 #ifdef DEBUG9
2373   currlink->fwd_intronnfwd = best_fwd_intronnfwd;
2374   currlink->fwd_intronnrev = best_fwd_intronnrev;
2375   currlink->fwd_intronnunk = best_fwd_intronnunk;
2376 #endif
2377 
2378   debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n",
2379 		currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][currhit],currlink->fwd_tracei));
2380   debug3(printf("%d %d  %d %d  1\n",querypos,hit,best_prevpos,best_prevhit));
2381 
2382   return;
2383 }
2384 
2385 
2386 static void
score_querypos_lookforward_mult(int * fwd_tracei,int low_hit,int high_hit,int curr_querypos,unsigned int * positions,struct Link_T ** links,int ** fwd_scores,Chrpos_T ** mappings,int ** active,int * firstactive,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int indexsize,Intlist_T processed,bool anchoredp,bool localp,bool splicingp,bool use_canonical_p,int non_canonical_penalty)2387 score_querypos_lookforward_mult (int *fwd_tracei, int low_hit, int high_hit, int curr_querypos,
2388 				 unsigned int *positions,
2389 				 struct Link_T **links, int **fwd_scores,
2390 				 Chrpos_T **mappings, int **active, int *firstactive,
2391 				 Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
2392 				 int indexsize, Intlist_T processed,
2393 #ifdef MOVE_TO_STAGE3
2394 				 bool anchoredp,
2395 #endif
2396 				 bool localp, bool splicingp,
2397 				 bool use_canonical_p, int non_canonical_penalty) {
2398   Link_T prevlink, currlink;
2399   Intlist_T last_item, p;
2400   int nhits = high_hit - low_hit, nprocessed, hiti;
2401 
2402   struct Link_T *prev_links, *adj_links;
2403   Chrpos_T *prev_mappings, *adj_mappings;
2404   int *prev_active, *adj_active;
2405 
2406   int overall_fwd_consecutive, best_fwd_consecutive;
2407   int best_fwd_rootposition;
2408   int best_fwd_score, fwd_score;
2409   int best_fwd_prevpos, best_fwd_prevhit;
2410   int best_fwd_tracei, last_tracei;
2411 #ifdef DEBUG9
2412   int best_fwd_intronnfwd, best_fwd_intronnrev, best_fwd_intronnunk;
2413   int canonicalsgn = 0;
2414 #endif
2415   int adj_querypos, adj_querydistance, prev_querypos, prevhit, adj_frontier, *frontier;
2416   Chrpos_T prevposition, position;
2417   int gendistance;
2418   Univcoord_T prevpos, currpos;
2419   int querydistance, diffdistance, indexsize_nt;
2420   int max_nseen, max_adjacent_nseen, max_nonadjacent_nseen, nseen;
2421   int querydist_credit;
2422   bool canonicalp;
2423 
2424 #ifdef PMAP
2425   indexsize_nt = indexsize*3;
2426 #else
2427   indexsize_nt = indexsize;
2428 #endif
2429   /* indexsize_query = indexsize; */  /* Use when evaluating across query positions */
2430 
2431 
2432   /* Determine work load */
2433   /* printf("Work load (lookforward): %s\n",Intlist_to_string(processed)); */
2434   last_item = processed;
2435 #ifdef MOVE_TO_STAGE3
2436   if (anchoredp && curr_querypos + indexsize_query >= queryend) {
2437     /* Allow close prevpositions that overlap with anchor */
2438     /* Can give rise to false positives, and increases amount of dynamic programming work */
2439     /* debug9(printf("No skipping because close to anchor\n")); */
2440   } else if (0 && anchoredp && curr_querypos == querystart) {
2441     /* Test end position */
2442   } else if (0) {
2443     while (processed != NULL && (prev_querypos = Intlist_head(processed)) < curr_querypos + indexsize_query) {
2444       debug9(printf("Skipping prev_querypos %d, because too close\n",prev_querypos));
2445       processed = Intlist_next(processed);
2446     }
2447   }
2448 #endif
2449 
2450   if (last_item == NULL) {
2451     for (hiti = nhits - 1; hiti >= 0; hiti--) {
2452       currlink = &(links[curr_querypos][hiti + low_hit]);
2453 
2454       currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
2455       currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
2456       currlink->fwd_pos = /*best_fwd_prevpos =*/ -1;
2457       currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
2458 
2459 #ifdef MOVE_TO_STAGE3
2460       if (anchoredp == true) {
2461 	currlink->fwd_tracei = -1;
2462 	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
2463       } else
2464 #endif
2465       if (localp == true) {
2466 	currlink->fwd_tracei = ++*fwd_tracei;
2467 	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
2468       } else {
2469 	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
2470       }
2471     }
2472 
2473   } else if (processed == NULL) {
2474     /* A. Evaluate adjacent position (at last one processed, if available).  Don't evaluate for mismatches (D). */
2475     adj_querypos = Intlist_head(last_item);
2476     adj_links = links[adj_querypos];
2477     adj_mappings = mappings[adj_querypos];
2478     adj_active = active[adj_querypos];
2479 
2480 #ifdef PMAP
2481     adj_querydistance = (adj_querypos - curr_querypos)*3;
2482 #else
2483     adj_querydistance = adj_querypos - curr_querypos;
2484 #endif
2485 
2486     /* Process prevhit and hiti in parallel.  Values are descending along prevhit chain and from nhits-1 to 0. */
2487     prevhit = firstactive[adj_querypos];
2488     hiti = nhits - 1;
2489     while (prevhit != -1 && hiti >= 0) {
2490       if ((prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) > (position = positions[hiti]) + adj_querydistance) {
2491 	prevhit = /*active[adj_querypos]*/adj_active[prevhit];
2492 
2493       } else if (prevposition < position + adj_querydistance) {
2494 	/* Adjacent position not found for hiti */
2495 	currlink = &(links[curr_querypos][hiti + low_hit]);
2496 
2497 	currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
2498 	currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
2499 	currlink->fwd_pos = /*best_fwd_prevpos =*/ -1;
2500 	currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
2501 
2502 #ifdef MOVE_TO_STAGE3
2503 	if (anchoredp == true) {
2504 	  currlink->fwd_tracei = -1;
2505 	  fwd_scores[curr_querypos][hiti + low_hit] = -100000;
2506 	} else
2507 #endif
2508         if (localp == true) {
2509 	  currlink->fwd_tracei = ++*fwd_tracei;
2510 	  fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
2511 	} else {
2512 	  fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
2513 	}
2514 
2515 	hiti--;
2516 
2517       } else {
2518 	/* Adjacent position found for hiti */
2519 	currlink = &(links[curr_querypos][hiti + low_hit]);
2520 	prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]);
2521 
2522 	currlink->fwd_consecutive = /*best_fwd_consecutive =*/ prevlink->fwd_consecutive + adj_querydistance;
2523 	currlink->fwd_rootposition = /*best_fwd_rootposition =*/ prevlink->fwd_rootposition;
2524 	currlink->fwd_pos = /*best_fwd_prevpos =*/ adj_querypos;
2525 	currlink->fwd_hit = /*best_fwd_prevhit =*/ prevhit;
2526 	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance;
2527 
2528 #ifdef DEBUG9
2529 	printf("\tA(3). For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
2530 	       hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit],
2531 	       fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei,
2532 	       /*best_fwd_intronnfwd*/prevlink->fwd_intronnfwd,
2533 	       /*best_fwd_intronnrev*/prevlink->fwd_intronnrev,
2534 	       /*best_fwd_intronnunk*/prevlink->fwd_intronnunk);
2535 #endif
2536 
2537 	prevhit = /*active[adj_querypos]*/adj_active[prevhit];
2538 	hiti--;
2539       }
2540     }
2541 
2542     while (hiti >= 0) {
2543       /* Adjacent position not found for hiti */
2544       currlink = &(links[curr_querypos][hiti + low_hit]);
2545 
2546       currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
2547       currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
2548       currlink->fwd_pos = /*best_fwd_prevpos =*/ -1;
2549       currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
2550 
2551 #ifdef MOVE_TO_STAGE3
2552       if (anchoredp == true) {
2553 	currlink->fwd_tracei = -1;
2554 	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
2555       } else
2556 #endif
2557       if (localp == true) {
2558 	currlink->fwd_tracei = ++*fwd_tracei;
2559 	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
2560       } else {
2561 	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
2562       }
2563 
2564       hiti--;
2565     }
2566 
2567   } else {
2568     adj_querypos = Intlist_head(last_item);
2569     adj_links = links[adj_querypos];
2570     adj_mappings = mappings[adj_querypos];
2571     adj_active = active[adj_querypos];
2572 
2573 #ifdef PMAP
2574     adj_querydistance = (adj_querypos - curr_querypos)*3;
2575 #else
2576     adj_querydistance = adj_querypos - curr_querypos;
2577 #endif
2578 
2579     nprocessed = Intlist_length(processed);
2580     frontier = (int *) MALLOCA(nprocessed * sizeof(int));
2581 
2582     nseen = 0;
2583     for (p = processed; p != NULL; p = Intlist_next(p)) {
2584       prev_querypos = Intlist_head(p);
2585 
2586       querydistance = prev_querypos - curr_querypos;
2587       if (nseen <= /*nlookback*/1 || querydistance - indexsize_nt <= /*lookback*/sufflookback/2) {
2588 	max_adjacent_nseen = nseen;
2589       }
2590       if (nseen <= /*nlookback*/nsufflookback || querydistance - indexsize_nt <= /*lookback*/sufflookback) {
2591 	max_nonadjacent_nseen = nseen;
2592       }
2593 
2594       frontier[nseen++] = firstactive[prev_querypos];
2595     }
2596 
2597 
2598     /* Look for overall_fwd_consecutive to see whether we can be greedy */
2599     overall_fwd_consecutive = 0;
2600     adj_frontier = firstactive[adj_querypos];
2601     for (hiti = nhits - 1; hiti >= 0; hiti--) {
2602       position = positions[hiti];
2603 
2604       /* A. Evaluate adjacent position (at last one processed) */
2605       prevhit = adj_frontier;	/* Get information from last hiti */
2606       prevposition = position;	/* Prevents prevposition == position + adj_querydistance */
2607       while (prevhit != -1 && (prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) > position + adj_querydistance) {
2608 	prevhit = /*active[adj_querypos]*/adj_active[prevhit];
2609       }
2610       adj_frontier = prevhit;	/* Save information for next hiti */
2611 
2612       if (prevposition == position + adj_querydistance) {
2613 	/* Adjacent found */
2614 	prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]);
2615 	if (prevlink->fwd_consecutive + adj_querydistance > overall_fwd_consecutive) {
2616 	  overall_fwd_consecutive = prevlink->fwd_consecutive + adj_querydistance;
2617 	}
2618       }
2619     }
2620     debug(printf("Overall fwd consecutive is %d\n",overall_fwd_consecutive));
2621 
2622 
2623     /* Now process */
2624     adj_frontier = firstactive[adj_querypos];
2625     for (hiti = nhits - 1; hiti >= 0; hiti--) {
2626       position = positions[hiti];
2627 
2628       /* A. Evaluate adjacent position (at last one processed) */
2629       prevhit = adj_frontier;	/* Get information from last hiti */
2630       prevposition = position;	/* Prevents prevposition == position + adj_querydistance */
2631       while (prevhit != -1 && (prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) > position + adj_querydistance) {
2632 	prevhit = /*active[adj_querypos]*/adj_active[prevhit];
2633       }
2634       adj_frontier = prevhit;	/* Save information for next hiti */
2635 
2636       if (prevposition == position + adj_querydistance) {
2637 	/* Adjacent found */
2638 	prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]);
2639 
2640 	best_fwd_consecutive = prevlink->fwd_consecutive + adj_querydistance;
2641 	best_fwd_rootposition = prevlink->fwd_rootposition;
2642 	best_fwd_prevpos = adj_querypos;
2643 	best_fwd_prevhit = prevhit;
2644 	best_fwd_score = fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance;
2645 	max_nseen = max_adjacent_nseen;	/* Look not so far back */
2646 	best_fwd_tracei = prevlink->fwd_tracei;
2647 
2648 #ifdef DEBUG9
2649 	best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
2650 	best_fwd_intronnrev = prevlink->fwd_intronnrev;
2651 	best_fwd_intronnunk = prevlink->fwd_intronnunk;
2652 #endif
2653 	debug9(printf("\tA(4). For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
2654 		      hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit],
2655 		      best_fwd_score,best_fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei,
2656 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk));
2657       } else {
2658 	/* Adjacent not found */
2659 	best_fwd_consecutive = indexsize*NT_PER_MATCH;
2660 	best_fwd_rootposition = position;
2661 	best_fwd_prevpos = -1;
2662 	best_fwd_prevhit = -1;
2663 	best_fwd_score = 0;
2664 	max_nseen = max_nonadjacent_nseen; /* Look farther back */
2665 	best_fwd_tracei = -1;
2666 
2667 #ifdef DEBUG9
2668 	best_fwd_intronnfwd = 0;
2669 	best_fwd_intronnrev = 0;
2670 	best_fwd_intronnunk = 0;
2671 #endif
2672       }
2673 
2674       if (overall_fwd_consecutive < GREEDY_NCONSECUTIVE) {
2675 	/* D. Evaluate for mismatches (all other previous querypos) */
2676 	nseen = 0;
2677 	last_tracei = -1;
2678 	for (p = processed; p != NULL && best_fwd_consecutive < ENOUGH_CONSECUTIVE && nseen <= max_nseen;
2679 	     p = Intlist_next(p), nseen++) {
2680 
2681 	  /* Making this check helps with efficiency */
2682 	  if ((prevhit = frontier[nseen]) != -1) {	/* Retrieve starting point from last hiti */
2683 	    prev_querypos = Intlist_head(p);
2684 #ifdef PMAP
2685 	    querydistance = (prev_querypos - curr_querypos)*3;
2686 #else
2687 	    querydistance = prev_querypos - curr_querypos;
2688 #endif
2689 	    /* Actually a querydist_penalty */
2690 	    querydist_credit = -querydistance/indexsize_nt;
2691 
2692 	    prev_mappings = mappings[prev_querypos];
2693 	    prev_links = links[prev_querypos];
2694 	    prev_active = active[prev_querypos];
2695 
2696 	    /* Range 0 */
2697 	    while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) {
2698 	      debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei));
2699 	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
2700 	    }
2701 	    if (prevhit != -1) {
2702 	      last_tracei = prev_links[prevhit].fwd_tracei;
2703 	    }
2704 
2705 	    /* Range 1: From Infinity to maxintronlen.  To be skipped.
2706 	       This is equivalent to diffdistance >= maxintronlen, where
2707 	       diffdistance = abs(gendistance - querydistance) and
2708 	       gendistance = (position - prevposition - indexsize_nt) */
2709 	    while (prevhit != -1 && (/*prevposition =*/ /*mappings[prev_querypos]*/prev_mappings[prevhit]) >= position + maxintronlen + querydistance) {
2710 	      /* Accept within range 1 (ignore) */
2711 	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
2712 	    }
2713 	    frontier[nseen] = prevhit;	/* Store as starting point for next hiti */
2714 
2715 
2716 	    /* Range 2: From maxintronlen to (prev_querypos + EQUAL_DISTANCE_NOT_SPLICING) */
2717 	    /* This is equivalent to +diffdistance > EQUAL_DISTANCE_NOT_SPLICING */
2718 	    while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) > position + EQUAL_DISTANCE_NOT_SPLICING + querydistance) {
2719 	      prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
2720 
2721 	      gendistance = prevposition - position;
2722 	      assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
2723 	      diffdistance = gendistance - querydistance; /* No need for abs() */
2724 
2725 	      fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/;
2726 	      if (splicingp == true) {
2727 		fwd_score -= (diffdistance/TEN_THOUSAND + 1);
2728 	      } else {
2729 		fwd_score -= (diffdistance/ONE + 1);
2730 	      }
2731 
2732 	      if (use_canonical_p == true) {
2733 		/* prevpos is higher genomic coordinate than currpos */
2734 		/* need to add to position and prevposition to compensate for greedy matches */
2735 		/* need to subtract from position and prevposition to compensate for missed matches */
2736 		if (plusp == true) {
2737 		  prevpos = chroffset + prevposition;
2738 		  currpos = chroffset + position + querydistance;
2739 		  if (currpos < MISS_BEHIND || prevpos < MISS_BEHIND) {
2740 		    canonicalp = false;
2741 		  } else if (Genome_sense_canonicalp(/*donor_rightbound*/currpos + GREEDY_ADVANCE,
2742 						     /*donor_leftbound*/currpos - MISS_BEHIND,
2743 						     /*acceptor_rightbound*/prevpos + GREEDY_ADVANCE,
2744 						     /*acceptor_leftbound*/prevpos - MISS_BEHIND,
2745 						     chroffset) == true) {
2746 		    debug9(printf("lookforward plus: sense canonical\n"));
2747 		    canonicalp = true;
2748 		  } else if (Genome_antisense_canonicalp(/*donor_rightbound*/prevpos + GREEDY_ADVANCE,
2749 							 /*donor_leftbound*/prevpos - MISS_BEHIND,
2750 							 /*acceptor_rightbound*/currpos + GREEDY_ADVANCE,
2751 							 /*acceptor_leftbound*/currpos - MISS_BEHIND,
2752 							 chroffset) == true) {
2753 		    debug9(printf("lookforward plus: antisense canonical\n"));
2754 		    canonicalp = true;
2755 		  } else {
2756 		    debug9(printf("lookforward plus: not canonical\n"));
2757 		    canonicalp = false;
2758 		  }
2759 
2760 		} else {
2761 		  prevpos = chrhigh + 1 - prevposition;
2762 		  currpos = chrhigh + 1 - position - querydistance;
2763 		  if (prevpos < GREEDY_ADVANCE || currpos < GREEDY_ADVANCE) {
2764 		    canonicalp = false;
2765 		  } else if (Genome_sense_canonicalp(/*donor_rightbound*/prevpos + MISS_BEHIND,
2766 						     /*donor_leftbound*/prevpos - GREEDY_ADVANCE,
2767 						     /*acceptor_rightbound*/currpos + MISS_BEHIND,
2768 						     /*acceptor_leftbound*/currpos - GREEDY_ADVANCE,
2769 						     chroffset) == true) {
2770 		    debug9(printf("lookforward minus: sense canonical\n"));
2771 		    canonicalp = true;
2772 		  } else if (Genome_antisense_canonicalp(/*donor_rightbound*/currpos + MISS_BEHIND,
2773 							 /*donor_leftbound*/currpos - GREEDY_ADVANCE,
2774 							 /*acceptor_rightbound*/prevpos + MISS_BEHIND,
2775 							 /*acceptor_leftbound*/prevpos - GREEDY_ADVANCE,
2776 							 chroffset) == true) {
2777 		    debug9(printf("lookforward minus: antisense canonical\n"));
2778 		    canonicalp = true;
2779 		  } else {
2780 		    debug9(printf("lookforward minus: not canonical\n"));
2781 		    canonicalp = false;
2782 		  }
2783 		}
2784 
2785 		if (canonicalp == true) {
2786 		  debug9(canonicalsgn = +1);
2787 		} else {
2788 		  debug9(canonicalsgn = 0);
2789 		  fwd_score -= non_canonical_penalty;
2790 		}
2791 	      }
2792 
2793 	      debug9(printf("\tD2, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
2794 			    hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
2795 			    fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
2796 			    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
2797 			    gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
2798 
2799 	      /* Disallow ties, which should favor adjacent */
2800 	      if (fwd_score > best_fwd_score) {
2801 		if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
2802 		  best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
2803 		} else {
2804 		  best_fwd_consecutive = 0;
2805 		}
2806 		best_fwd_rootposition = prevlink->fwd_rootposition;
2807 		best_fwd_score = fwd_score;
2808 		best_fwd_prevpos = prev_querypos;
2809 		best_fwd_prevhit = prevhit;
2810 		best_fwd_tracei = ++*fwd_tracei;
2811 #ifdef DEBUG9
2812 		best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
2813 		best_fwd_intronnrev = prevlink->fwd_intronnrev;
2814 		best_fwd_intronnunk = prevlink->fwd_intronnunk;
2815 		switch (canonicalsgn) {
2816 		case 1: best_fwd_intronnfwd++; break;
2817 		case 0: best_fwd_intronnunk++; break;
2818 		}
2819 #endif
2820 		debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
2821 	      } else {
2822 		debug9(printf(" => Loses to %d\n",best_fwd_score));
2823 	      }
2824 
2825 	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
2826 	    }
2827 
2828 
2829 	    /* Scoring appears to be the same as for range 4, which is rarely called, so including in range 4 */
2830 	    /* Range 3: From (querypos + EQUAL_DISTANCE_NOT_SPLICING) to (querypos - EQUAL_DISTANCE_NOT_SPLICING) */
2831 	    /* This is equivalent to -diffdistance > EQUAL_DISTANCE_NOT_SPLICING && prevposition + indexsize_nt <= position */
2832 
2833 
2834 	    /* Range 4: From (prev_querypos - EQUAL_DISTANCE_NOT_SPLICING) to indexsize_nt */
2835 	    while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) >= position + indexsize_nt) {
2836 	      prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
2837 
2838 	      gendistance = prevposition - position;
2839 	      /* was abs(gendistance - querydistance) */
2840 	      diffdistance = gendistance > querydistance ? (gendistance - querydistance) : (querydistance - gendistance);
2841 
2842 #ifdef BAD_GMAX
2843 	      fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit - (diffdistance/ONE + 1) /*- querydist_penalty*/;
2844 #else
2845 	      /* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */
2846 	      /* This is how version 2013-08-14 did it */
2847 	      fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH;
2848 #endif
2849 
2850 	      debug9(printf("\tD4, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
2851 			    hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
2852 			    fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
2853 			    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
2854 			    gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
2855 
2856 	      /* Disallow ties, which should favor adjacent */
2857 	      if (fwd_score > best_fwd_score) {
2858 		if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
2859 		  best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
2860 		} else {
2861 		  best_fwd_consecutive = 0;
2862 		}
2863 		best_fwd_rootposition = prevlink->fwd_rootposition;
2864 		best_fwd_score = fwd_score;
2865 		best_fwd_prevpos = prev_querypos;
2866 		best_fwd_prevhit = prevhit;
2867 		/* best_fwd_tracei = ++*fwd_tracei; */
2868 		best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */
2869 
2870 #ifdef DEBUG9
2871 		best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
2872 		best_fwd_intronnrev = prevlink->fwd_intronnrev;
2873 		best_fwd_intronnunk = prevlink->fwd_intronnunk;
2874 		switch (canonicalsgn) {
2875 		case 1: best_fwd_intronnfwd++; break;
2876 		case 0: best_fwd_intronnunk++; break;
2877 		}
2878 #endif
2879 		debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
2880 	      } else {
2881 		debug9(printf(" => Loses to %d\n",best_fwd_score));
2882 	      }
2883 
2884 	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
2885 	    }
2886 	  }
2887 	}
2888       }
2889 
2890       /* Best_score needs to beat something positive to prevent a
2891 	 small local extension from beating a good canonical intron.
2892 	 If querypos is too small, don't insert an intron.  */
2893       /* linksconsecutive already assigned above */
2894       currlink = &(links[curr_querypos][hiti + low_hit]);
2895       currlink->fwd_consecutive = best_fwd_consecutive;
2896       currlink->fwd_rootposition = best_fwd_rootposition;
2897       currlink->fwd_pos = best_fwd_prevpos;
2898       currlink->fwd_hit = best_fwd_prevhit;
2899       if (currlink->fwd_pos >= 0) {
2900 	currlink->fwd_tracei = best_fwd_tracei;
2901 	fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score;
2902 #ifdef MOVE_TO_STAGE3
2903       } else if (anchoredp == true) {
2904 	currlink->fwd_tracei = -1;
2905 	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
2906 #endif
2907       } else if (localp == true) {
2908 	currlink->fwd_tracei = ++*fwd_tracei;
2909 	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
2910       } else {
2911 	currlink->fwd_tracei = ++*fwd_tracei;
2912 	fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score;
2913       }
2914 
2915 #ifdef DEBUG9
2916       currlink->fwd_intronnfwd = best_fwd_intronnfwd;
2917       currlink->fwd_intronnrev = best_fwd_intronnrev;
2918       currlink->fwd_intronnunk = best_fwd_intronnunk;
2919 #endif
2920 
2921       debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n",
2922 		    currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_tracei));
2923       debug3(printf("%d %d  %d %d  1\n",querypos,hit,best_prevpos,best_prevhit));
2924     }
2925 
2926     FREEA(frontier);
2927   }
2928 
2929   return;
2930 }
2931 
2932 
2933 static void
revise_active_lookback(int ** active,int * firstactive,int * nactive,int low_hit,int high_hit,int ** fwd_scores,int querypos)2934 revise_active_lookback (int **active, int *firstactive, int *nactive,
2935 			int low_hit, int high_hit, int **fwd_scores, int querypos) {
2936   int best_score, threshold, score;
2937   int hit, *ptr;
2938 
2939   debug6(printf("Revising querypos %d from low_hit %d to high_hit %d.  Scores:\n",querypos,low_hit,high_hit));
2940   if ((hit = low_hit) >= high_hit) {
2941     debug6(printf("1.  Initializing firstactive for querypos %d to be -1\n",querypos));
2942     firstactive[querypos] = -1;
2943     nactive[querypos] = 0;
2944 
2945   } else {
2946     debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit]));
2947     best_score = fwd_scores[querypos][hit];
2948 #ifdef SEPARATE_FWD_REV
2949     debug6(printf(" and rev_score is %d",rev_scores[querypos][hit]));
2950     if ((score = rev_scores[querypos][hit]) > best_score) {
2951       best_score = score;
2952     }
2953 #endif
2954     debug6(printf("\n"));
2955 
2956     for (hit++; hit < high_hit; hit++) {
2957       debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit]));
2958       if ((score = fwd_scores[querypos][hit]) > best_score) {
2959 	best_score = score;
2960       }
2961 #ifdef SEPARATE_FWD_REV
2962       debug6(printf(" and rev_score is %d",rev_scores[querypos][hit]));
2963       if ((score = rev_scores[querypos][hit]) > best_score) {
2964 	best_score = score;
2965       }
2966 #endif
2967       debug6(printf("\n"));
2968     }
2969 
2970     threshold = best_score - SCORE_FOR_RESTRICT;
2971     if (threshold < 0) {
2972       threshold = 0;
2973     }
2974 
2975     nactive[querypos] = 0;
2976     firstactive[querypos] = -1;
2977     ptr = &(firstactive[querypos]);
2978     hit = low_hit;
2979     while (hit < high_hit) {
2980       while (hit < high_hit && fwd_scores[querypos][hit] <= threshold
2981 #ifdef SEPARATE_FWD_REV
2982 	     && rev_scores[querypos][hit] <= threshold
2983 #endif
2984 	     ) {
2985 	hit++;
2986       }
2987       *ptr = hit;
2988       if (hit < high_hit) {
2989 	nactive[querypos] += 1;
2990 	ptr = &(active[querypos][hit]);
2991 	hit++;
2992       }
2993     }
2994     *ptr = -1;
2995   }
2996 
2997   debug6(
2998 	 printf("Valid hits (%d) at querypos %d (firstactive %d):",nactive[querypos],querypos,firstactive[querypos]);
2999 	 hit = firstactive[querypos];
3000 	 while (hit != -1) {
3001 	   printf(" %d",hit);
3002 	   hit = active[querypos][hit];
3003 	 }
3004 	 printf("\n");
3005 	 );
3006 
3007   return;
3008 }
3009 
3010 
3011 static void
revise_active_lookforward(int ** active,int * firstactive,int * nactive,int low_hit,int high_hit,int ** fwd_scores,int querypos)3012 revise_active_lookforward (int **active, int *firstactive, int *nactive,
3013 			   int low_hit, int high_hit, int **fwd_scores, int querypos) {
3014   int best_score, threshold, score;
3015   int hit, *ptr;
3016 
3017   debug6(printf("Revising querypos %d from high_hit %d to low_hit %d.  Scores:\n",querypos,high_hit,low_hit));
3018   if ((hit = high_hit - 1) < low_hit) {
3019     debug6(printf("2.  Initializing firstactive for querypos %d to be -1\n",querypos));
3020     firstactive[querypos] = -1;
3021     nactive[querypos] = 0;
3022   } else {
3023     debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit]));
3024     best_score = fwd_scores[querypos][hit];
3025 #ifdef SEPARATE_FWD_REV
3026     debug6(printf(" and rev_score is %d",rev_scores[querypos][hit]));
3027     if ((score = rev_scores[querypos][hit]) > best_score) {
3028       best_score = score;
3029     }
3030 #endif
3031     debug6(printf("\n"));
3032 
3033     for (--hit; hit >= low_hit; --hit) {
3034       debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit]));
3035       if ((score = fwd_scores[querypos][hit]) > best_score) {
3036 	best_score = score;
3037       }
3038 #ifdef SEPARATE_FWD_REV
3039       debug6(printf(" and rev_score is %d",rev_scores[querypos][hit]));
3040       if ((score = rev_scores[querypos][hit]) > best_score) {
3041 	best_score = score;
3042       }
3043 #endif
3044       debug6(printf("\n"));
3045     }
3046 
3047     threshold = best_score - SCORE_FOR_RESTRICT;
3048     if (threshold < 0) {
3049       threshold = 0;
3050     }
3051 
3052     nactive[querypos] = 0;
3053     firstactive[querypos] = -1;
3054     ptr = &(firstactive[querypos]);
3055     hit = high_hit - 1;
3056     while (hit >= low_hit) {
3057       while (hit >= low_hit && fwd_scores[querypos][hit] <= threshold
3058 #ifdef SEPARATE_FWD_REV
3059 	     && rev_scores[querypos][hit] <= threshold
3060 #endif
3061 	     ) {
3062 	--hit;
3063       }
3064       *ptr = hit;
3065       if (hit >= low_hit) {
3066 	nactive[querypos] += 1;
3067 	ptr = &(active[querypos][hit]);
3068 	--hit;
3069       }
3070     }
3071     *ptr = -1;
3072   }
3073 
3074   debug6(
3075 	 printf("Valid hits (%d) at querypos %d (firstactive %d):",nactive[querypos],querypos,firstactive[querypos]);
3076 	 hit = firstactive[querypos];
3077 	 while (hit != -1) {
3078 	   printf(" %d",hit);
3079 	   hit = active[querypos][hit];
3080 	 }
3081 	 printf("\n");
3082 	 );
3083 
3084   return;
3085 }
3086 
3087 
3088 
3089 static int **
intmatrix_1d_new(int length1,int * lengths2,int totallength)3090 intmatrix_1d_new (int length1, int *lengths2, int totallength) {
3091   int **matrix;
3092   int i;
3093 
3094   matrix = (int **) CALLOC(length1,sizeof(int *));
3095   matrix[0] = (int *) CALLOC(totallength,sizeof(int));
3096   for (i = 1; i < length1; i++) {
3097     if (lengths2[i-1] <= 0) {
3098       matrix[i] = matrix[i-1];
3099     } else {
3100       matrix[i] = &(matrix[i-1][lengths2[i-1]]);
3101     }
3102   }
3103   return matrix;
3104 }
3105 
3106 static void
intmatrix_1d_free(int *** matrix)3107 intmatrix_1d_free (int ***matrix) {
3108   FREE((*matrix)[0]);
3109   FREE(*matrix);
3110   return;
3111 }
3112 
3113 
3114 static int **
intmatrix_2d_new(int length1,int * lengths2)3115 intmatrix_2d_new (int length1, int *lengths2) {
3116   int **matrix;
3117   int i;
3118 
3119   matrix = (int **) CALLOC(length1,sizeof(int *));
3120   for (i = 0; i < length1; i++) {
3121     if (lengths2[i] <= 0) {
3122       matrix[i] = (int *) NULL;
3123     } else {
3124       matrix[i] = (int *) CALLOC(lengths2[i],sizeof(int));
3125     }
3126   }
3127   return matrix;
3128 }
3129 
3130 static void
intmatrix_2d_free(int *** matrix,int length1)3131 intmatrix_2d_free (int ***matrix, int length1) {
3132   int i;
3133 
3134   for (i = 0; i < length1; i++) {
3135     if ((*matrix)[i]) {
3136       FREE((*matrix)[i]);
3137     }
3138   }
3139   FREE(*matrix);
3140   return;
3141 }
3142 
3143 
3144 /************************************************************************
3145  *   Cells used for ranking hits
3146  ************************************************************************/
3147 
3148 #if 0
3149 typedef struct Cell_T *Cell_T;
3150 struct Cell_T {
3151   int rootposition;
3152   int endposition;
3153   int querypos;
3154   int hit;
3155   bool fwdp;
3156   int score;
3157 };
3158 
3159 /* Replaced by Cellpool_T routines */
3160 static void
3161 Cell_free (Cell_T *old) {
3162   FREE(*old);
3163   return;
3164 }
3165 
3166 
3167 static Cell_T
3168 Cell_new (int rootposition, int endposition, int querypos, int hit, bool fwdp, int score) {
3169   Cell_T new = (Cell_T) MALLOC(sizeof(*new));
3170 
3171   new->rootposition = rootposition;
3172   new->endposition = endposition;
3173   new->querypos = querypos;
3174   new->hit = hit;
3175   new->fwdp = fwdp;
3176   new->score = score;
3177   return new;
3178 }
3179 #endif
3180 
3181 
3182 #ifdef SLOW
3183 /* Used for the final set of cells, to see if we have non-overlapping paths */
3184 static int
Cell_interval_cmp(const void * a,const void * b)3185 Cell_interval_cmp (const void *a, const void *b) {
3186   Cell_T x = * (Cell_T *) a;
3187   Cell_T y = * (Cell_T *) b;
3188 
3189   if (x->rootposition < y->rootposition) {
3190     return -1;
3191   } else if (y->rootposition < x->rootposition) {
3192     return +1;
3193 
3194   } else if (x->endposition > y->endposition) {
3195     return -1;
3196   } else if (y->endposition > x->endposition) {
3197     return +1;
3198 
3199   } else {
3200     return 0;
3201   }
3202 }
3203 #endif
3204 
3205 
3206 /* Used for the initial set of cells, to get the end cell for each rootposition */
3207 static int
Cell_rootposition_left_cmp(const void * a,const void * b)3208 Cell_rootposition_left_cmp (const void *a, const void *b) {
3209   Cell_T x = * (Cell_T *) a;
3210   Cell_T y = * (Cell_T *) b;
3211 
3212   if (x->rootposition < y->rootposition) {
3213     return -1;
3214   } else if (y->rootposition < x->rootposition) {
3215     return +1;
3216 
3217 #if 0
3218     /* Want score ranking, rather than interval ranking here.  Otherwise, we don't get the final endposition */
3219   } else if (x->endposition < y->endposition) {
3220     return -1;
3221   } else if (y->endposition < x->endposition) {
3222     return +1;
3223 #endif
3224 
3225 #if 0
3226   } else if (x->tracei < y->tracei) {
3227     return -1;
3228   } else if (y->tracei < x->tracei) {
3229     return +1;
3230 #endif
3231   } else if (x->score > y->score) {
3232     return -1;
3233   } else if (y->score > x->score) {
3234     return +1;
3235   } else if (x->querypos > y->querypos) {
3236     return -1;
3237   } else if (y->querypos > x->querypos) {
3238     return +1;
3239   } else if (x->hit < y->hit) {
3240     return -1;
3241   } else if (y->hit < x->hit) {
3242     return +1;
3243   } else if (x->fwdp == true && y->fwdp == false) {
3244     return -1;
3245   } else if (y->fwdp == true && x->fwdp == false) {
3246     return +1;
3247   } else {
3248     return 0;
3249   }
3250 }
3251 
3252 
3253 /* Used for the initial set of cells, to get the end cell for each rootposition */
3254 static int
Cell_rootposition_right_cmp(const void * a,const void * b)3255 Cell_rootposition_right_cmp (const void *a, const void *b) {
3256   Cell_T x = * (Cell_T *) a;
3257   Cell_T y = * (Cell_T *) b;
3258 
3259   if (x->rootposition < y->rootposition) {
3260     return -1;
3261   } else if (y->rootposition < x->rootposition) {
3262     return +1;
3263 
3264 #if 0
3265     /* Want score ranking, rather than interval ranking here.  Otherwise, we don't get the final endposition */
3266   } else if (x->endposition < y->endposition) {
3267     return -1;
3268   } else if (y->endposition < x->endposition) {
3269     return +1;
3270 #endif
3271 
3272 #if 0
3273   } else if (x->tracei < y->tracei) {
3274     return -1;
3275   } else if (y->tracei < x->tracei) {
3276     return +1;
3277 #endif
3278   } else if (x->score > y->score) {
3279     return -1;
3280   } else if (y->score > x->score) {
3281     return +1;
3282   } else if (x->querypos > y->querypos) {
3283     return -1;
3284   } else if (y->querypos > x->querypos) {
3285     return +1;
3286   } else if (x->hit > y->hit) {
3287     return -1;
3288   } else if (y->hit > x->hit) {
3289     return +1;
3290   } else if (x->fwdp == true && y->fwdp == false) {
3291     return -1;
3292   } else if (y->fwdp == true && x->fwdp == false) {
3293     return +1;
3294   } else {
3295     return 0;
3296   }
3297 }
3298 
3299 
3300 static int
Cell_score_cmp(const void * a,const void * b)3301 Cell_score_cmp (const void *a, const void *b) {
3302   Cell_T x = * (Cell_T *) a;
3303   Cell_T y = * (Cell_T *) b;
3304 
3305   if (x->score > y->score) {
3306     return -1;
3307   } else if (y->score > x->score) {
3308     return +1;
3309   } else {
3310     return 0;
3311   }
3312 }
3313 
3314 
3315 #ifdef USE_THRESHOLD_SCORE
3316 /* Doesn't work well for short dynamic programming at the ends of a read */
3317 static Cell_T *
Linkmatrix_get_cells_fwd(int * nunique,struct Link_T ** links,int querystart,int queryend,int * npositions,int indexsize,int bestscore,bool favor_right_p,Cellpool_T cellpool)3318 Linkmatrix_get_cells_fwd (int *nunique, struct Link_T **links, int querystart, int queryend, int *npositions,
3319 			  int indexsize, int bestscore, bool favor_right_p, Cellpool_T cellpool) {
3320   Cell_T *sorted, *cells;
3321   List_T celllist = NULL;
3322   int querypos, hit;
3323   int rootposition, last_rootposition;
3324   int threshold_score, best_score_for_root;
3325   int ngood, ncells, i, k;
3326 
3327   if (bestscore > 2*suboptimal_score_end) {
3328     threshold_score = bestscore - suboptimal_score_end;
3329   } else {
3330     threshold_score = bestscore/2;
3331   }
3332   if (threshold_score <= indexsize) {
3333     threshold_score = indexsize + 1;
3334   }
3335 
3336   ncells = 0;
3337   for (querypos = querystart; querypos <= queryend; querypos++) {
3338     ngood = 0;
3339     for (hit = 0; hit < npositions[querypos]; hit++) {
3340       if (links[querypos][hit].fwd_score >= threshold_score) {
3341 	ngood++;
3342       }
3343     }
3344     if (ngood > 0 && ngood <= 10) {
3345       for (hit = 0; hit < npositions[querypos]; hit++) {
3346 	debug11(printf("  At %d,%d, comparing score %d with threshold_score %d\n",
3347 		       querypos,hit,links[querypos][hit].fwd_score,threshold_score));
3348 	if (links[querypos][hit].fwd_score >= threshold_score) {
3349 	  rootposition = links[querypos][hit].fwd_rootposition;
3350 	  /* tracei = links[querypos][hit].fwd_tracei; */
3351 	  celllist = Cellpool_push(celllist,cellpool,rootposition,
3352 				   /*endposition*/(int) mappings[querypos][hit],
3353 				   querypos,hit,/*fwdp*/true,links[querypos][hit].fwd_score);
3354 	  ncells++;
3355 	}
3356       }
3357     }
3358   }
3359 
3360   if (ncells == 0) {
3361     *nunique = 0;
3362     return (Cell_T *) NULL;
3363 
3364   } else {
3365     /* Take best result for each tracei */
3366     /* Using alloca can give a stack overflow */
3367     cells = (Cell_T *) List_to_array(celllist,NULL);
3368     /* List_free(&celllist); -- No need with cellpool */
3369 
3370     if (favor_right_p == true) {
3371       qsort(cells,ncells,sizeof(Cell_T),Cell_rootposition_right_cmp);
3372     } else {
3373       /* favor_right_p is always false for GMAP */
3374       qsort(cells,ncells,sizeof(Cell_T),Cell_rootposition_left_cmp);
3375     }
3376 
3377     sorted = (Cell_T *) MALLOC(ncells * sizeof(Cell_T)); /* Return value */
3378     k = 0;
3379 
3380     last_rootposition = -1;
3381     best_score_for_root = -1;
3382     for (i = 0; i < ncells; i++) {
3383       if (cells[i]->rootposition != last_rootposition) {
3384 	debug11(printf("Pushing rootposition %d, trace #%d, score %d, pos %d, hit %d\n",
3385 		       cells[i]->rootposition,cells[i]->tracei,cells[i]->score,cells[i]->querypos,cells[i]->hit));
3386 	sorted[k++] = cells[i];
3387 	last_rootposition = cells[i]->rootposition;
3388 	best_score_for_root = cells[i]->score;
3389 
3390       } else if (cells[i]->querypos == best_score_for_root) {
3391 	debug11(printf("Equivalent cell for rootposition %d, trace #%d, score %d, pos %d, hit %d\n",
3392 		       cells[i]->rootposition,cells[i]->tracei,cells[i]->score,cells[i]->querypos,cells[i]->hit));
3393 	sorted[k++] = cells[i];
3394 	/* last_rootposition = cells[i]->rootposition;*/
3395 	/* best_score_for_root = cells[i]->score; */
3396 
3397       } else {
3398 	/* Cell_free(&(cells[i])); -- no need with cellpool */
3399 
3400       }
3401     }
3402     debug11(printf("\n"));
3403     FREE(cells);
3404 
3405     *nunique = k;
3406     qsort(sorted,*nunique,sizeof(Cell_T),Cell_score_cmp);
3407 
3408     return sorted;
3409   }
3410 }
3411 
3412 #else
3413 
3414 static Cell_T *
get_cells_fwd(int * nunique,struct Link_T ** links,int ** fwd_scores,Chrpos_T ** mappings,int querystart,int queryend,int * npositions,bool favor_right_p,Cellpool_T cellpool)3415 get_cells_fwd (int *nunique, struct Link_T **links, int **fwd_scores, Chrpos_T **mappings,
3416 	       int querystart, int queryend, int *npositions,
3417 	       bool favor_right_p, Cellpool_T cellpool) {
3418   Cell_T *sorted, *cells;
3419   List_T celllist = NULL;
3420   int querypos, hit;
3421   int rootposition, last_rootposition;
3422   int best_score_for_root;
3423   int ncells, i, k;
3424 
3425   ncells = 0;
3426   for (querypos = querystart; querypos <= queryend; querypos++) {
3427     for (hit = 0; hit < npositions[querypos]; hit++) {
3428       if (fwd_scores[querypos][hit] > 0) {
3429 	rootposition = links[querypos][hit].fwd_rootposition;
3430 	/* tracei = links[querypos][hit].fwd_tracei; */
3431 	celllist = Cellpool_push(celllist,cellpool,rootposition,
3432 				 /*endposition*/(int) mappings[querypos][hit],
3433 				 querypos,hit,/*fwdp*/true,fwd_scores[querypos][hit]);
3434 	ncells++;
3435       }
3436     }
3437   }
3438 
3439   debug12(printf("Have %d cells\n",ncells));
3440   if (ncells == 0) {
3441     *nunique = 0;
3442     return (Cell_T *) NULL;
3443 
3444   } else {
3445     /* Take best result for each tracei */
3446     /* Using alloca can give a stack overflow */
3447     cells = (Cell_T *) List_to_array(celllist,NULL);
3448     /* List_free(&celllist); -- No need with cellpool */
3449 
3450     if (favor_right_p == true) {
3451       qsort(cells,ncells,sizeof(Cell_T),Cell_rootposition_right_cmp);
3452     } else {
3453       /* favor_right_p is always false for GMAP */
3454       qsort(cells,ncells,sizeof(Cell_T),Cell_rootposition_left_cmp);
3455     }
3456 
3457     sorted = (Cell_T *) MALLOC(ncells * sizeof(Cell_T)); /* Return value */
3458     k = 0;
3459 
3460     last_rootposition = -1;
3461     best_score_for_root = -1;
3462     for (i = 0; i < ncells; i++) {
3463       if (cells[i]->rootposition != last_rootposition) {
3464 	/* Take best cell at this rootposition */
3465 	debug11(printf("Pushing rootposition %d, score %d, pos %d, hit %d\n",
3466 		       cells[i]->rootposition,cells[i]->score,cells[i]->querypos,cells[i]->hit));
3467 	sorted[k++] = cells[i];
3468 	last_rootposition = cells[i]->rootposition;
3469 	best_score_for_root = cells[i]->score;
3470 
3471       } else if (cells[i]->score == best_score_for_root) {
3472 	/* Take equivalent cell for this rootposition */
3473 	debug11(printf("Pushing equivalent end for rootposition %d, score %d, pos %d, hit %d\n",
3474 		       cells[i]->rootposition,cells[i]->score,cells[i]->querypos,cells[i]->hit));
3475 	sorted[k++] = cells[i];
3476 	/* last_rootposition = cells[i]->rootposition; */
3477 	/* best_score_for_root = cells[i]->score; */
3478 
3479       } else {
3480 	/* Cell_free(&(cells[i])); -- no need with cellpool */
3481       }
3482     }
3483     debug11(printf("\n"));
3484     FREE(cells);
3485 
3486     *nunique = k;
3487     qsort(sorted,*nunique,sizeof(Cell_T),Cell_score_cmp);
3488 
3489     return sorted;
3490   }
3491 }
3492 
3493 #endif
3494 
3495 #if 0
3496 static Cell_T *
3497 Linkmatrix_get_cells_both (int *nunique, struct Link_T **links, int querystart, int queryend, int *npositions,
3498 			   int indexsize, int bestscore, bool favor_right_p, Cellpool_T cellpool) {
3499   Cell_T *sorted, *cells;
3500   List_T celllist = NULL;
3501   int querypos, hit;
3502   int rootposition, last_rootposition;
3503   int threshold_score, best_score_for_root;
3504   int ngood, ncells, i, k;
3505 
3506   if (bestscore > 2*suboptimal_score_end) {
3507     threshold_score = bestscore - suboptimal_score_end;
3508   } else {
3509     threshold_score = bestscore/2;
3510   }
3511   if (threshold_score <= indexsize) {
3512     threshold_score = indexsize + 1;
3513   }
3514 
3515   debug11(printf("Entered Linkmatrix_get_cells_both with querystart %d, queryend %d, threshold score %d\n",
3516 		 querystart,queryend,threshold_score));
3517 
3518   ncells = 0;
3519   for (querypos = querystart; querypos <= queryend; querypos++) {
3520     ngood = 0;
3521     for (hit = 0; hit < npositions[querypos]; hit++) {
3522       if (links[querypos][hit].fwd_score >= threshold_score) {
3523 	ngood++;
3524       }
3525 #ifdef SEPARATE_FWD_REV
3526       if (links[querypos][hit].rev_score >= threshold_score) {
3527 	ngood++;
3528       }
3529 #endif
3530     }
3531     if (ngood > 0 && ngood <= 10) {
3532       for (hit = 0; hit < npositions[querypos]; hit++) {
3533 	if (links[querypos][hit].fwd_score >= threshold_score) {
3534 	  rootposition = links[querypos][hit].fwd_rootposition;
3535 	  /* tracei = links[querypos][hit].fwd_tracei; */
3536 	  celllist = Cellpool_push(celllist,cellpool,rootposition,
3537 				   /*endposition*/(int) mappings[querypos][hit],
3538 				   querypos,hit,/*fwdp*/true,links[querypos][hit].fwd_score);
3539 	  ncells++;
3540 	}
3541 #ifdef SEPARATE_FWD_REV
3542 	if (links[querypos][hit].rev_score >= threshold_score) {
3543 	  rootposition = links[querypos][hit].rev_rootposition;
3544 	  /* tracei = links[querypos][hit].rev_tracei; */
3545 	  celllist = Cellpool_push(celllist,cellpool,rootposition,
3546 				   /*endposition*/(int) mappings[querypos][hit],
3547 				   querypos,hit,/*fwdp*/false,links[querypos][hit].rev_score);
3548 	  ncells++;
3549 	}
3550 #endif
3551       }
3552     }
3553   }
3554 
3555   debug12(printf("Have %d cells\n",ncells));
3556   if (ncells == 0) {
3557     *nunique = 0;
3558     return (Cell_T *) NULL;
3559 
3560   } else {
3561     /* Take best result for each tracei */
3562     /* Using alloca can give a stack overflow */
3563     cells = (Cell_T *) List_to_array(celllist,NULL);
3564     /* List_free(&celllist); -- no need with cellpool */
3565 
3566     if (favor_right_p == true) {
3567       qsort(cells,ncells,sizeof(Cell_T),Cell_rootposition_right_cmp);
3568     } else {
3569       /* favor_right_p is always false for GMAP */
3570       qsort(cells,ncells,sizeof(Cell_T),Cell_rootposition_left_cmp);
3571     }
3572 
3573     sorted = (Cell_T *) MALLOC(ncells * sizeof(Cell_T)); /* Return value */
3574     k = 0;
3575 
3576     last_rootposition = -1;
3577     best_score_for_root = -1;
3578     for (i = 0; i < ncells; i++) {
3579       if (cells[i]->rootposition != last_rootposition) {
3580 	/* Take best cell at this rootposition */
3581 	debug11(printf("rootposition %d, score %d, pos %d, hit %d\n",
3582 		       cells[i]->rootposition,cells[i]->score,cells[i]->querypos,cells[i]->hit));
3583 	sorted[k++] = cells[i];
3584 	last_rootposition = cells[i]->rootposition;
3585 	best_score_for_root = cells[i]->score;
3586 
3587       } else if (cells[i]->score == best_score_for_root) {
3588 	/* Take equivalent end cell for this rootposition */
3589 	debug11(printf("equivalent end for rootposition %d, score %d, pos %d, hit %d\n",
3590 		       cells[i]->rootposition,cells[i]->score,cells[i]->querypos,cells[i]->hit));
3591 	sorted[k++] = cells[i];
3592 	/* last_rootposition = cells[i]->rootposition; */
3593 	/* best_score_for_root = cells[i]->score; */
3594 
3595       } else {
3596 	/* Cell_free(&(cells[i])); -- no need with cellpool */
3597       }
3598     }
3599     debug11(printf("\n"));
3600     FREE(cells);
3601 
3602     *nunique = k;
3603     qsort(sorted,*nunique,sizeof(Cell_T),Cell_score_cmp);
3604 
3605     return sorted;
3606   }
3607 }
3608 #endif
3609 
3610 
3611 #ifdef MOVE_TO_STAGE3
3612 static int
binary_search(int lowi,int highi,Chrpos_T * mappings,Chrpos_T goal)3613 binary_search (int lowi, int highi, Chrpos_T *mappings, Chrpos_T goal) {
3614   int middlei;
3615 
3616   debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%u\n",lowi,highi,goal));
3617   if (mappings == NULL) {
3618     return -1;
3619   } else {
3620     while (lowi < highi) {
3621       middlei = lowi + ((highi - lowi) / 2);
3622       debug10(printf("  binary: %d:%u %d:%u %d:%u   vs. %u\n",
3623 		     lowi,mappings[lowi],middlei,mappings[middlei],
3624 		     highi,mappings[highi],goal));
3625       if (goal < mappings[middlei]) {
3626 	highi = middlei;
3627       } else if (goal > mappings[middlei]) {
3628 	lowi = middlei + 1;
3629       } else {
3630 	debug10(printf("binary search returns %d\n",middlei));
3631 	return middlei;
3632       }
3633     }
3634 
3635     debug10(printf("binary search returns %d\n",highi));
3636     return highi;
3637   }
3638 }
3639 #endif
3640 
3641 
3642 /* Returns celllist */
3643 /* For PMAP, indexsize is in aa. */
3644 static Cell_T *
align_compute_scores_lookback(int * ncells,struct Link_T ** links,int ** fwd_scores,Chrpos_T ** mappings,int * npositions,int totalpositions,bool oned_matrix_p,Chrpos_T * minactive,Chrpos_T * maxactive,int * firstactive,int * nactive,Cellpool_T cellpool,int querystart,int queryend,int querylength,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int indexsize,char * queryseq_ptr,bool anchoredp,int anchor_querypos,Chrpos_T anchor_position,bool localp,bool skip_repetitive_p,bool use_canonical_p,int non_canonical_penalty,bool debug_graphic_p,bool favor_right_p,bool middlep)3645 align_compute_scores_lookback (int *ncells, struct Link_T **links, int **fwd_scores,
3646 			       Chrpos_T **mappings, int *npositions, int totalpositions,
3647 			       bool oned_matrix_p, Chrpos_T *minactive, Chrpos_T *maxactive,
3648 			       int *firstactive, int *nactive, Cellpool_T cellpool,
3649 			       int querystart, int queryend, int querylength,
3650 
3651 			       Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
3652 
3653 			       int indexsize,
3654 #ifdef DEBUG9
3655 			       char *queryseq_ptr,
3656 #endif
3657 #ifdef MOVE_TO_STAGE3
3658 			       bool anchoredp, int anchor_querypos, Chrpos_T anchor_position,
3659 #endif
3660 			       bool localp, bool skip_repetitive_p,
3661 			       bool use_canonical_p, int non_canonical_penalty, bool debug_graphic_p,
3662 			       bool favor_right_p, bool middlep) {
3663 #if 0
3664   bool anchoredp = false;
3665   int anchor_querypos = 0;
3666   Chrpos_T anchor_position = 0;
3667 #endif
3668 
3669   Cell_T *cells;
3670   Link_T currlink;
3671   int curr_querypos, indexsize_nt, indexsize_query, hit, nhits, low_hit, high_hit;
3672   int nskipped, min_hits, specific_querypos, specific_low_hit, specific_high_hit, next_querypos;
3673   Intlist_T processed = NULL;
3674   int best_overall_score = 0;
3675   int grand_fwd_score, grand_fwd_querypos, grand_fwd_hit, best_fwd_hit, best_fwd_score;
3676 #ifdef SEPARATE_FWD_REV
3677   int grand_rev_score, grand_rev_querypos, grand_rev_hit, best_rev_hit, best_rev_score;
3678 #ifdef DEBUG9
3679   int rev_tracei = 0;
3680 #endif
3681 #endif
3682   int **active;
3683   Chrpos_T position, prevposition;
3684   int fwd_tracei = 0;
3685 #if 0
3686   int *lastGT, *lastAG;
3687 #ifndef PMAP
3688   int *lastCT, *lastAC;
3689 #endif
3690 #endif
3691 #ifdef DEBUG9
3692   Link_T prevlink;
3693   char *oligo;
3694 #endif
3695 #ifdef DEBUG12
3696   Link_T termlink = NULL;
3697 #endif
3698 
3699 #ifdef PMAP
3700   indexsize_nt = indexsize*3;
3701 #else
3702   indexsize_nt = indexsize;
3703 #endif
3704   indexsize_query = indexsize;	/* Use when evaluating across query positions */
3705 
3706 
3707 #ifdef DEBUG9
3708   oligo = (char *) CALLOC(indexsize+1,sizeof(char));
3709 #endif
3710   debug0(printf("Lookback: querystart = %d, queryend = %d, indexsize = %d\n",querystart,queryend,indexsize));
3711 
3712   assert(oned_matrix_p == true);
3713   if (oned_matrix_p == true) {
3714     active = intmatrix_1d_new(querylength,npositions,totalpositions);
3715   } else {
3716     active = intmatrix_2d_new(querylength,npositions);
3717   }
3718 
3719 #if 0
3720   firstactive = (int *) MALLOC(querylength * sizeof(int));
3721   nactive = (int *) MALLOC(querylength * sizeof(int));
3722 #endif
3723 
3724   /* Initialize */
3725   for (curr_querypos = 0; curr_querypos < querystart; curr_querypos++) {
3726     debug6(printf("3.  Initializing firstactive for querypos %d to be -1\n",curr_querypos));
3727     firstactive[curr_querypos] = -1;
3728     nactive[curr_querypos] = 0;
3729   }
3730   while (curr_querypos <= queryend && npositions[curr_querypos] <= 0) {
3731     debug6(printf("4.  Initializing firstactive for querypos %d to be -1\n",curr_querypos));
3732     debug9(printf("Skipping querypos %d which has no positions\n",curr_querypos));
3733     firstactive[curr_querypos] = -1;
3734     nactive[curr_querypos] = 0;
3735     curr_querypos++;
3736   }
3737 
3738 #ifdef MOVE_TO_STAGE3
3739   if (anchoredp == true) {
3740     /* Guaranteed to find a hit */
3741     hit = binary_search(0,npositions[anchor_querypos],mappings[anchor_querypos],/*goal*/anchor_position);
3742     if (mappings[anchor_querypos] == NULL) {
3743       printf("mappings at anchor_querypos %d is NULL.  mappings = %p\n",anchor_querypos,mappings);
3744       abort();
3745     }
3746 
3747     currlink = &(links[anchor_querypos][hit]);
3748 #ifndef SEPARATE_FWD_REV
3749     currlink->fwd_pos = currlink->fwd_hit = -1;
3750     currlink->fwd_consecutive = EXON_DEFN;
3751     currlink->fwd_tracei = 0;
3752     fwd_scores[anchor_querypos][hit] = indexsize_nt;
3753 #else
3754     fprintf(stderr,"Not implemented yet\n");
3755     abort();
3756 #endif
3757 
3758     debug6(printf("Setting firstactive for anchorpos %d to be %d\n",anchor_querypos,hit));
3759     firstactive[anchor_querypos] = hit;
3760     nactive[anchor_querypos] = 1;
3761     active[anchor_querypos][hit] = -1;
3762 
3763     debug6(printf("Pushing anchorpos %d as processed\n",anchor_querypos));
3764     processed = Intlist_push(processed,anchor_querypos);
3765 
3766   } else
3767 #endif
3768 
3769   if (curr_querypos <= queryend) {
3770     for (hit = 0; hit < npositions[curr_querypos]; hit++) {
3771       currlink = &(links[curr_querypos][hit]);
3772 #ifndef SEPARATE_FWD_REV
3773       currlink->fwd_pos = currlink->fwd_hit = -1;
3774       currlink->fwd_consecutive = indexsize_nt;
3775       currlink->fwd_tracei = -1;
3776       /* currlink->fwd_rootnlinks = 1; */
3777       fwd_scores[curr_querypos][hit] = indexsize_nt;
3778 #else
3779       currlink->fwd_pos = currlink->fwd_hit = -1;
3780       currlink->fwd_consecutive = indexsize_nt;
3781       currlink->fwd_tracei = -1;
3782       /* currlink->fwd_rootnlinks = 1; */
3783       fwd_scores[curr_querypos][hit] = indexsize_nt;
3784       if (splicingp == true) {
3785 	currlink->rev_pos = currlink->rev_hit = -1;
3786 	currlink->rev_consecutive = indexsize_nt;
3787 	currlink->rev_tracei = -1;
3788 	/* currlink->rev_rootnlinks = 1; */
3789 	rev_scores[curr_querypos][hit] = indexsize_nt;
3790       }
3791 #endif
3792     }
3793     revise_active_lookback(active,firstactive,nactive,0,npositions[curr_querypos],fwd_scores,curr_querypos);
3794   }
3795 
3796   grand_fwd_score = 0;
3797   grand_fwd_querypos = -1;
3798   grand_fwd_hit = -1;
3799 #ifdef SEPARATE_FWD_REV
3800   if (splicingp == true) {
3801     grand_rev_score = 0;
3802     grand_rev_querypos = -1;
3803     grand_rev_hit = -1;
3804   }
3805 #endif
3806 
3807   nskipped = 0;
3808   min_hits = 1000000;
3809   specific_querypos = -1;
3810 
3811   /* curr_querypos += 1; -- this causes curr_querypos at querystart to be ignored */
3812   while (curr_querypos <= queryend) {
3813     best_fwd_score = 0;
3814     best_fwd_hit = -1;
3815 #ifdef SEPARATE_FWD_REV
3816     best_rev_score = 0;
3817     best_rev_hit = -1;
3818 #endif
3819 
3820     debug9(printf("Positions at querypos %d (forward order):",curr_querypos);
3821 	   for (hit = 0; hit < npositions[curr_querypos]; hit++) {
3822 	     printf(" %u",mappings[curr_querypos][hit]);
3823 	   }
3824 	   printf("\n");
3825 	   );
3826 
3827     hit = 0;
3828     while (hit < npositions[curr_querypos] && mappings[curr_querypos][hit] < minactive[curr_querypos]) {
3829       hit++;
3830     }
3831     low_hit = hit;
3832     while (hit < npositions[curr_querypos] && mappings[curr_querypos][hit] <= maxactive[curr_querypos]) {
3833       hit++;
3834     }
3835     high_hit = hit;
3836     debug9(printf("Querypos %d has hit %d..%d out of %d (minactive = %u, maxactive = %u)\n",
3837 		  curr_querypos,low_hit,high_hit-1,npositions[curr_querypos],minactive[curr_querypos],maxactive[curr_querypos]));
3838 
3839     /* Can't use nactive yet, so use high_hit - low_hit */
3840     if (skip_repetitive_p && high_hit - low_hit >= MAX_NACTIVE && nskipped <= MAX_SKIPPED) { /* Previously turned off */
3841       debug6(printf("Too many active (%d - %d) at querypos %d.  Setting firstactive to be -1\n",high_hit,low_hit,curr_querypos));
3842       firstactive[curr_querypos] = -1;
3843       nactive[curr_querypos] = 0;
3844       nskipped++;
3845       debug9(printf("  %d skipped because of %d hits\n",nskipped,high_hit - low_hit + 1));
3846 
3847       /* Store most specific querypos in section of skipped */
3848       if (high_hit - low_hit < min_hits) {
3849 	min_hits = high_hit - low_hit;
3850 	specific_querypos = curr_querypos;
3851 	specific_low_hit = low_hit;
3852 	specific_high_hit = high_hit;
3853       }
3854       curr_querypos++;
3855 
3856     } else {
3857       if (nskipped > MAX_SKIPPED) {
3858 	debug9(printf("Too many skipped.  Going back to specific querypos %d\n",specific_querypos));
3859 	next_querypos = curr_querypos;
3860 	curr_querypos = specific_querypos;
3861 	low_hit = specific_low_hit;
3862 	high_hit = specific_high_hit;
3863       } else {
3864 	next_querypos = curr_querypos + 1;
3865       }
3866 
3867       if ((nhits = high_hit - low_hit) > 0) {
3868 	if (nhits == 1) {
3869 	  currlink = &(links[curr_querypos][low_hit]);
3870 	  position = mappings[curr_querypos][low_hit];
3871 
3872 	  debug9(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
3873 	  debug9(oligo[indexsize] = '\0');
3874 	  debug9(printf("Finding link looking back from querypos %d,%d at %ux%d (%s).  prev_querypos was %d\n",
3875 			curr_querypos,low_hit,position,active[curr_querypos][low_hit],oligo,processed ? Intlist_head(processed) : -1));
3876 
3877 	  score_querypos_lookback_one(&fwd_tracei,currlink,curr_querypos,low_hit,position,
3878 				      links,fwd_scores,mappings,active,firstactive,chroffset,chrhigh,plusp,
3879 				      indexsize,processed,localp,splicingp,use_canonical_p,
3880 				      non_canonical_penalty);
3881 
3882 	  if (fwd_scores[curr_querypos][low_hit] > 0) {
3883 	    debug9(printf("Single hit at low_hit %d has score %d\n",low_hit,fwd_scores[curr_querypos][low_hit]));
3884 	    best_fwd_score = fwd_scores[curr_querypos][low_hit];
3885 	    best_fwd_hit = low_hit;
3886 	  }
3887 
3888 	} else {
3889 	  debug9(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
3890 	  debug9(oligo[indexsize] = '\0');
3891 	  debug9(printf("Finding links looking back from querypos %d,%d..%d at (%u..%u) (%s).  prev_querypos was %d\n",
3892 			curr_querypos,low_hit,high_hit-1,mappings[curr_querypos][low_hit],mappings[curr_querypos][high_hit-1],
3893 			oligo,processed ? Intlist_head(processed) : -1));
3894 
3895 	  score_querypos_lookback_mult(&fwd_tracei,low_hit,high_hit,curr_querypos,
3896 				       /*positions*/&(mappings[curr_querypos][low_hit]),
3897 				       links,fwd_scores,mappings,active,firstactive,chroffset,chrhigh,plusp,
3898 				       indexsize,processed,localp,splicingp,use_canonical_p,
3899 				       non_canonical_penalty);
3900 
3901 	  debug9(printf("Checking hits from low_hit %d to high_hit %d\n",low_hit,high_hit));
3902 	  for (hit = low_hit; hit < high_hit; hit++) {
3903 	    debug9(printf("Hit %d has score %d\n",hit,fwd_scores[curr_querypos][hit]));
3904 	    if (fwd_scores[curr_querypos][hit] > best_fwd_score) {
3905 	      best_fwd_score = fwd_scores[curr_querypos][hit];
3906 	      best_fwd_hit = hit;
3907 	    }
3908 	  }
3909 	}
3910 
3911 	if (best_fwd_score > best_overall_score) {
3912 	  best_overall_score = best_fwd_score;
3913 	}
3914 
3915 	nskipped = 0;
3916 	min_hits = 1000000;
3917 	specific_querypos = -1;
3918 
3919 #ifndef SEPARATE_FWD_REV
3920 	debug9(printf("Overall result at querypos %d yields best_fwd_hit %d\n",
3921 		      curr_querypos,best_fwd_hit));
3922 #else
3923 	debug9(printf("Overall result at querypos %d yields best_fwd_hit %d and best_rev_hit %d\n",
3924 		      curr_querypos,best_fwd_hit,best_rev_hit));
3925 #endif
3926 
3927 #if 1
3928 	/* Previously, thought that using this code causes misses in
3929 	   some alignments, but not using it causes missing end
3930 	   exons */
3931 	if (middlep == false && best_fwd_hit < 0) {
3932 	  /* Allow for a new start, to test different ends */
3933 	  for (hit = 0; hit < npositions[curr_querypos]; hit++) {
3934 	    currlink = &(links[curr_querypos][hit]);
3935 #ifndef SEPARATE_FWD_REV
3936 	    currlink->fwd_pos = currlink->fwd_hit = -1;
3937 	    currlink->fwd_consecutive = indexsize_nt;
3938 	    currlink->fwd_tracei = -1;
3939 	    /* currlink->fwd_rootnlinks = 1; */
3940 	    fwd_scores[curr_querypos][hit] = indexsize_nt;
3941 #else
3942 	    currlink->fwd_pos = currlink->fwd_hit = -1;
3943 	    currlink->fwd_consecutive = indexsize_nt;
3944 	    currlink->fwd_tracei = -1;
3945 	    /* currlink->fwd_rootnlinks = 1; */
3946 	    fwd_scores[curr_querypos][hit] = indexsize_nt;
3947 	    if (splicingp == true) {
3948 	      currlink->rev_pos = currlink->rev_hit = -1;
3949 	      currlink->rev_consecutive = indexsize_nt;
3950 	      currlink->rev_tracei = -1;
3951 	      /* currlink->rev_rootnlinks = 1; */
3952 	      rev_scores[curr_querypos][hit] = indexsize_nt;
3953 	    }
3954 #endif
3955 	  }
3956 	}
3957 #endif
3958 
3959 	if (splicingp == true && best_fwd_hit >= 0 && links[curr_querypos][best_fwd_hit].fwd_hit < 0 &&
3960 	    grand_fwd_querypos >= 0 && curr_querypos >= grand_fwd_querypos + indexsize_query) {
3961 	  if ((best_fwd_score = fwd_scores[grand_fwd_querypos][grand_fwd_hit] - (curr_querypos - grand_fwd_querypos)) > 0) {
3962 	    prevposition = mappings[grand_fwd_querypos][grand_fwd_hit];
3963 	    debug12(printf("Considering prevposition %u to position %u as a grand fwd lookback\n",prevposition,position));
3964 	    for (hit = low_hit; hit < high_hit; hit++) {
3965 	      if ((position = mappings[curr_querypos][hit]) > prevposition + maxintronlen) {
3966 		debug12(printf("  => Too long\n"));
3967 	      } else if (position >= prevposition + indexsize_nt) {
3968 		currlink = &(links[curr_querypos][hit]);
3969 		currlink->fwd_consecutive = indexsize_nt;
3970 		currlink->fwd_pos = grand_fwd_querypos;
3971 		currlink->fwd_hit = grand_fwd_hit;
3972 		currlink->fwd_tracei = ++fwd_tracei;
3973 		fwd_scores[curr_querypos][hit] = best_fwd_score;
3974 #ifdef DEBUG9
3975 		prevlink = &(links[grand_fwd_querypos][grand_fwd_hit]);
3976 		currlink->fwd_intronnfwd = prevlink->fwd_intronnfwd;
3977 		currlink->fwd_intronnrev = prevlink->fwd_intronnrev;
3978 		currlink->fwd_intronnunk = prevlink->fwd_intronnunk + 1;
3979 #endif
3980 	      }
3981 	    }
3982 	    debug12(printf("At querypos %d, setting all fwd hits to point back to grand_fwd %d,%d with a score of %d\n",
3983 			   curr_querypos,grand_fwd_querypos,grand_fwd_hit,fwd_scores[grand_fwd_querypos][grand_fwd_hit]));
3984 	  }
3985 	}
3986 
3987 	/* Use >= to favor longer path in case of ties */
3988 	if (best_fwd_hit >= 0 && best_fwd_score >= grand_fwd_score &&
3989 	    links[curr_querypos][best_fwd_hit].fwd_consecutive > EXON_DEFN) {
3990 	  grand_fwd_score = best_fwd_score;
3991 	  grand_fwd_querypos = curr_querypos;
3992 	  grand_fwd_hit = best_fwd_hit;
3993 	  debug12(termlink = &(links[curr_querypos][best_fwd_hit]));
3994 	  debug12(printf("At querypos %d, revising grand fwd to be hit %d with score of %d (pointing back to %d,%d)\n",
3995 			 curr_querypos,best_fwd_hit,best_fwd_score,termlink->fwd_pos,termlink->fwd_hit));
3996 	}
3997 
3998 #ifdef SEPARATE_FWD_REV
3999 	if (best_rev_score > best_overall_score) {
4000 	  best_overall_score = best_rev_score;
4001 	}
4002 
4003 	if (splicingp == false || use_canonical_p == false) {
4004 	  /* rev scores should be the same as the fwd scores */
4005 	} else {
4006 	  if (best_rev_hit >= 0 && links[curr_querypos][best_rev_hit].rev_hit < 0 &&
4007 	      grand_rev_querypos >= 0 && curr_querypos >= grand_rev_querypos + indexsize_query) {
4008 	    prevlink = &(links[grand_rev_querypos][grand_rev_hit]);
4009 	    if ((best_rev_score = prevlink->rev_score - (curr_querypos - grand_rev_querypos)) > 0) {
4010 	      prevposition = mappings[grand_rev_querypos][grand_rev_hit];
4011 	      debug12(printf("Considering prevposition %u to position %u as a grand rev lookback\n",prevposition,position));
4012 	      for (hit = low_hit; hit < high_hit; hit++) {
4013 		if ((position = mappings[curr_querypos][hit]) > prevposition + maxintronlen) {
4014 		  debug12(printf("  => Too long\n"));
4015 		} else if (position >= prevposition + indexsize_nt) {
4016 		  currlink = &(links[curr_querypos][hit]);
4017 		  currlink->rev_consecutive = indexsize_nt;
4018 		  /* currlink->rev_rootnlinks = 1; */
4019 		  currlink->rev_pos = grand_rev_querypos;
4020 		  currlink->rev_hit = grand_rev_hit;
4021 		  currlink->rev_score = best_rev_score;
4022 #ifdef DEBUG9
4023 		  currlink->rev_tracei = ++rev_tracei;
4024 		  currlink->rev_intronnrev = prevlink->rev_intronnfwd;
4025 		  currlink->rev_intronnrev = prevlink->rev_intronnrev;
4026 		  currlink->rev_intronnunk = prevlink->rev_intronnunk + 1;
4027 #endif
4028 		}
4029 	      }
4030 	      debug12(printf("At querypos %d, setting all rev hits to point back to grand_rev %d,%d with a score of %d\n",
4031 			     curr_querypos,grand_rev_querypos,grand_rev_hit,prevlink->rev_score));
4032 	    }
4033 	  }
4034 
4035 	  /* Use >= to favor longer path in case of ties */
4036 	  if (best_rev_hit >= 0 && best_rev_score >= grand_rev_score &&
4037 	      links[curr_querypos][best_rev_hit].rev_consecutive > EXON_DEFN) {
4038 	    grand_rev_score = best_rev_score;
4039 	    grand_rev_querypos = curr_querypos;
4040 	    grand_rev_hit = best_rev_hit;
4041 	  }
4042 	}
4043 #endif
4044       }
4045 
4046       revise_active_lookback(active,firstactive,nactive,low_hit,high_hit,fwd_scores,curr_querypos);
4047 
4048       /* Need to push querypos, even if firstactive[curr_querypos] == -1 */
4049       /* Want to skip npositions[curr_querypos] == 0, so we can find adjacent despite mismatch or overabundance */
4050       if (npositions[curr_querypos] > 0) {
4051 	debug6(printf("Pushing querypos %d onto processed\n",curr_querypos));
4052 	processed = Intlist_push(processed,curr_querypos);
4053       }
4054       curr_querypos = next_querypos;
4055     }
4056   }
4057   debug9(printf("End of loop lookback\n"));
4058 
4059   Intlist_free(&processed);
4060 
4061   /* These are the final active oligomers, after pruning by score */
4062   if (debug_graphic_p == true) {
4063     mappings_dump_R(mappings,npositions,querylength,active,firstactive,indexsize,"active.mers");
4064   }
4065 
4066 #if 0
4067   FREE(nactive);
4068   FREE(firstactive);
4069 #endif
4070 
4071   if (oned_matrix_p == true) {
4072     intmatrix_1d_free(&active);
4073   } else {
4074     intmatrix_2d_free(&active,querylength);
4075   }
4076 
4077 
4078   /* Grand winners */
4079   debug12(printf("Finding grand winners, using root position method\n"));
4080 #ifdef SEPARATE_FWD_REV
4081   if (splicingp == false || use_canonical_p == false) {
4082     cells = Linkmatrix_get_cells_fwd(&(*ncells),links,querystart,queryend,npositions,
4083 				     favor_right_p,cellpool);
4084   } else {
4085     cells = Linkmatrix_get_cells_both(&(*ncells),links,querystart,queryend,npositions,
4086 				      indexsize,best_overall_score,favor_right_p,cellpool);
4087   }
4088 #else
4089   cells = get_cells_fwd(&(*ncells),links,fwd_scores,mappings,querystart,queryend,npositions,
4090 			favor_right_p,cellpool);
4091 #endif
4092 
4093   debug9(FREE(oligo));
4094 
4095   return cells;
4096 }
4097 
4098 
4099 static char complCode[128] = COMPLEMENT_LC;
4100 
4101 /* genomicstart == chroffset + chrpos */
4102 /* arguments were genomicpos, genomicstart, genomiclength */
4103 
4104 static char
get_genomic_nt(char * g_alt,Chrpos_T chrpos,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp)4105 get_genomic_nt (char *g_alt, Chrpos_T chrpos, Univcoord_T chroffset,
4106 		Univcoord_T chrhigh, bool watsonp) {
4107   char c2, c2_alt;
4108 
4109   if (watsonp) {
4110     return Genome_get_char_blocks(&(*g_alt),chroffset + chrpos);
4111 
4112   } else {
4113     c2 = Genome_get_char_blocks(&c2_alt,chrhigh - chrpos);
4114     *g_alt = complCode[(int) c2_alt];
4115     return complCode[(int) c2];
4116   }
4117 }
4118 
4119 
4120 static List_T
traceback_one(int curr_querypos,int hit,struct Link_T ** links,Chrpos_T ** mappings,char * queryseq_ptr,char * queryuc_ptr,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp,bool lookbackp,int ** fwd_scores,int indexsize,Pairpool_T pairpool,bool fwdp)4121 traceback_one (int curr_querypos, int hit, struct Link_T **links, Chrpos_T **mappings,
4122 	       char *queryseq_ptr, char *queryuc_ptr,
4123 #ifdef PMAP
4124 	       Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp, bool lookbackp,
4125 #endif
4126 #ifdef DEBUG0
4127 	       int **fwd_scores, int indexsize,
4128 #endif
4129 	       Pairpool_T pairpool, bool fwdp) {
4130   List_T path = NULL;
4131   Chrpos_T position;
4132   int prev_querypos, prevhit;
4133   char c2;
4134 #ifdef PMAP
4135   char c2_alt;
4136 #endif
4137 
4138 #ifdef DEBUG0
4139   char *oligo;
4140 #endif
4141 
4142 
4143   while (curr_querypos >= 0) {
4144     position = mappings[curr_querypos][hit];
4145 
4146 #ifdef PMAP
4147     /* Change querypos positions from protein to nucleotide */
4148     if (lookbackp == true) {
4149       c2 = get_genomic_nt(&c2_alt,position+2,chroffset,chrhigh,watsonp);
4150       path = Pairpool_push(path,pairpool,curr_querypos*3+2,position+2,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
4151 			   /*dynprogindex*/0);
4152       c2 = get_genomic_nt(&c2_alt,position+1,chroffset,chrhigh,watsonp);
4153       path = Pairpool_push(path,pairpool,curr_querypos*3+1,position+1,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
4154 			   /*dynprogindex*/0);
4155       c2 = get_genomic_nt(&c2_alt,position,chroffset,chrhigh,watsonp);
4156       path = Pairpool_push(path,pairpool,curr_querypos*3,position,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
4157 			   /*dynprogindex*/0);
4158     } else {
4159       c2 = get_genomic_nt(&c2_alt,position,chroffset,chrhigh,watsonp);
4160       path = Pairpool_push(path,pairpool,curr_querypos*3,position,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
4161 			   /*dynprogindex*/0);
4162       c2 = get_genomic_nt(&c2_alt,position+1,chroffset,chrhigh,watsonp);
4163       path = Pairpool_push(path,pairpool,curr_querypos*3+1,position+1,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
4164 			   /*dynprogindex*/0);
4165       c2 = get_genomic_nt(&c2_alt,position+2,chroffset,chrhigh,watsonp);
4166       path = Pairpool_push(path,pairpool,curr_querypos*3+2,position+2,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
4167 			   /*dynprogindex*/0);
4168     }
4169 #else
4170     /* genomic nucleotide same as queryseq */
4171     c2 = queryuc_ptr[curr_querypos];
4172     path = Pairpool_push(path,pairpool,curr_querypos,position,queryseq_ptr[curr_querypos],MATCH_COMP,
4173 			 c2,/*genomealt*/c2,/*dynprogindex*/0);
4174 #endif
4175 
4176 
4177 #ifdef DEBUG0
4178     debug0(oligo = (char *) MALLOC((indexsize+1)*sizeof(char)));
4179     debug0(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
4180     debug0(oligo[indexsize] = '\0');
4181     if (fwdp == true) {
4182       debug0(printf("Pushing %d,%d (%s) at %u, score = %d, consec = %d",
4183 		    curr_querypos,hit,oligo,position,
4184 		    fwd_scores[curr_querypos][hit],links[curr_querypos][hit].fwd_consecutive));
4185       debug9(printf(" (from #%d), intr = %d(+)/%d(-)/%d(?)",
4186 		    links[curr_querypos][hit].fwd_tracei,links[curr_querypos][hit].fwd_intronnfwd,links[curr_querypos][hit].fwd_intronnrev,
4187 		    links[curr_querypos][hit].fwd_intronnunk));
4188       debug0(printf("\n"));
4189 
4190 #ifdef SEPARATE_FWD_REV
4191     } else {
4192       debug0(printf("Pushing %d,%d (%s) at %u, score = %d, consec = %d",
4193 		    curr_querypos,hit,oligo,position,
4194 		    links[curr_querypos][hit].rev_score,links[curr_querypos][hit].rev_consecutive));
4195       debug9(printf(" (from #%d), intr = %d(+)/%d(-)/%d(?)",
4196 		    links[curr_querypos][hit].rev_tracei,links[curr_querypos][hit].rev_intronnfwd,links[curr_querypos][hit].rev_intronnrev,
4197 		    links[curr_querypos][hit].rev_intronnunk));
4198       debug0(printf("\n"));
4199 
4200 #endif
4201     }
4202 #endif
4203     debug0(FREE(oligo));
4204 
4205     /* prevposition = position; */
4206     prev_querypos = curr_querypos;
4207     prevhit = hit;
4208     if (fwdp == true) {
4209       curr_querypos = links[prev_querypos][prevhit].fwd_pos;
4210       hit = links[prev_querypos][prevhit].fwd_hit;
4211 #ifdef SEPARATE_FWD_REV
4212     } else {
4213       curr_querypos = links[prev_querypos][prevhit].rev_pos;
4214       hit = links[prev_querypos][prevhit].rev_hit;
4215 #endif
4216     }
4217     debug3(printf("%d %d  %d %d  3\n",prev_querypos,prevhit,curr_querypos,hit));
4218   }
4219   debug0(printf("Done\n\n"));
4220 
4221   return path;
4222 }
4223 
4224 
4225 static List_T
traceback_one_snps(int curr_querypos,int hit,struct Link_T ** links,Chrpos_T ** mappings,char * queryseq_ptr,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp,int ** fwd_scores,int indexsize,Pairpool_T pairpool,bool fwdp)4226 traceback_one_snps (int curr_querypos, int hit, struct Link_T **links, Chrpos_T **mappings,
4227 		    char *queryseq_ptr,
4228 
4229 		    Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
4230 #ifdef DEBUG0
4231 		    int **fwd_scores, int indexsize,
4232 #endif
4233 		    Pairpool_T pairpool, bool fwdp) {
4234   List_T path = NULL;
4235   Chrpos_T position;
4236   int prev_querypos, prevhit;
4237   char c2, c2_alt;
4238 
4239 #ifdef DEBUG0
4240   char *oligo;
4241 #endif
4242 
4243 
4244   while (curr_querypos >= 0) {
4245     position = mappings[curr_querypos][hit];
4246 
4247 #ifdef PMAP
4248     /* Change querypos positions from protein to nucleotide */
4249     c2 = get_genomic_nt(&c2_alt,position+2,chroffset,chrhigh,watsonp);
4250     path = Pairpool_push(path,pairpool,curr_querypos*3+2,position+2,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
4251 			 /*dynprogindex*/0);
4252     c2 = get_genomic_nt(&c2_alt,position+1,chroffset,chrhigh,watsonp);
4253     path = Pairpool_push(path,pairpool,curr_querypos*3+1,position+1,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
4254 			 /*dynprogindex*/0);
4255     c2 = get_genomic_nt(&c2_alt,position,chroffset,chrhigh,watsonp);
4256     path = Pairpool_push(path,pairpool,curr_querypos*3,position,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
4257 			 /*dynprogindex*/0);
4258 #else
4259     /* genomic nucleotide or SNP same as queryseq */
4260     c2 = get_genomic_nt(&c2_alt,position,chroffset,chrhigh,watsonp);
4261     path = Pairpool_push(path,pairpool,curr_querypos,position,queryseq_ptr[curr_querypos],MATCH_COMP,c2,c2_alt,
4262 			 /*dynprogindex*/0);
4263 #endif
4264 
4265 
4266 #ifdef DEBUG0
4267     debug0(oligo = (char *) MALLOC((indexsize+1)*sizeof(char)));
4268     debug0(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
4269     debug0(oligo[indexsize] = '\0');
4270     if (fwdp == true) {
4271       debug0(printf("Pushing %d,%d (%s) at %u, score = %d, consec = %d",
4272 		    curr_querypos,hit,oligo,position,
4273 		    fwd_scores[curr_querypos][hit],links[curr_querypos][hit].fwd_consecutive));
4274       debug9(printf(" (from #%d), intr = %d(+)/%d(-)/%d(?)",
4275 		    links[curr_querypos][hit].fwd_tracei,links[curr_querypos][hit].fwd_intronnfwd,links[curr_querypos][hit].fwd_intronnrev,
4276 		    links[curr_querypos][hit].fwd_intronnunk));
4277       debug0(printf("\n"));
4278 
4279 #ifdef SEPARATE_FWD_REV
4280     } else {
4281       debug0(printf("Pushing %d,%d (%s) at %u, score = %d, consec = %d",
4282 		    curr_querypos,hit,oligo,position,
4283 		    links[curr_querypos][hit].rev_score,links[curr_querypos][hit].rev_consecutive));
4284       debug9(printf(" (from #%d), intr = %d(+)/%d(-)/%d(?)",
4285 		    links[curr_querypos][hit].rev_tracei,links[curr_querypos][hit].rev_intronnfwd,links[curr_querypos][hit].rev_intronnrev,
4286 		    links[curr_querypos][hit].rev_intronnunk));
4287       debug0(printf("\n"));
4288 #endif
4289     }
4290 #endif
4291     debug0(FREE(oligo));
4292 
4293     /* prevposition = position; */
4294     prev_querypos = curr_querypos;
4295     prevhit = hit;
4296     if (fwdp == true) {
4297       curr_querypos = links[prev_querypos][prevhit].fwd_pos;
4298       hit = links[prev_querypos][prevhit].fwd_hit;
4299 #ifdef SEPARATE_FWD_REV
4300     } else {
4301       curr_querypos = links[prev_querypos][prevhit].rev_pos;
4302       hit = links[prev_querypos][prevhit].rev_hit;
4303 #endif
4304     }
4305     debug3(printf("%d %d  %d %d  3\n",prev_querypos,prevhit,curr_querypos,hit));
4306   }
4307   debug0(printf("Done\n\n"));
4308 
4309   return path;
4310 }
4311 
4312 
4313 /* Performs dynamic programming.  For PMAP, indexsize is in aa. */
4314 static List_T
align_compute_lookback(Chrpos_T ** mappings,int * npositions,int totalpositions,bool oned_matrix_p,Chrpos_T * minactive,Chrpos_T * maxactive,int * firstactive,int * nactive,Cellpool_T cellpool,char * queryseq_ptr,char * queryuc_ptr,int querylength,int querystart,int queryend,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int indexsize,Pairpool_T pairpool,bool anchoredp,int anchor_querypos,Chrpos_T anchor_position,bool localp,bool skip_repetitive_p,bool use_canonical_p,int non_canonical_penalty,bool favor_right_p,bool middlep,int max_nalignments,bool debug_graphic_p)4315 align_compute_lookback (Chrpos_T **mappings, int *npositions, int totalpositions,
4316 			bool oned_matrix_p, Chrpos_T *minactive, Chrpos_T *maxactive,
4317 			int *firstactive, int *nactive, Cellpool_T cellpool,
4318 			char *queryseq_ptr, char *queryuc_ptr, int querylength, int querystart, int queryend,
4319 			Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
4320 			int indexsize, Pairpool_T pairpool,
4321 #ifdef MOVE_TO_STAGE3
4322 			bool anchoredp, int anchor_querypos, Chrpos_T anchor_position,
4323 #endif
4324 			bool localp, bool skip_repetitive_p, bool use_canonical_p, int non_canonical_penalty,
4325 			bool favor_right_p, bool middlep, int max_nalignments, bool debug_graphic_p) {
4326   List_T all_paths = NULL;
4327   int npaths = 0;
4328   struct Link_T **links;
4329   int **fwd_scores;
4330 
4331 #if 0
4332   bool anchoredp = false;
4333   int anchor_querypos = 0;
4334   Chrpos_T anchor_position = 0;
4335 #endif
4336 
4337   Cell_T *cells, cell;
4338   int ncells, i;
4339 
4340   bool fwdp;
4341   int querypos, hit;
4342   int bestscore;
4343 #ifdef SLOW
4344   int last_endposition;
4345 #endif
4346 
4347 
4348   if (oned_matrix_p == true) {
4349     links = Linkmatrix_1d_new(querylength,npositions,totalpositions);
4350     fwd_scores = intmatrix_1d_new(querylength,npositions,totalpositions);
4351   } else {
4352     links = Linkmatrix_2d_new(querylength,npositions);
4353     fwd_scores = intmatrix_2d_new(querylength,npositions);
4354   }
4355 
4356   /* These are all oligomers */
4357   if (debug_graphic_p == true) {
4358     mappings_dump_R(mappings,npositions,querylength,/*active*/NULL,/*firstactive*/NULL,indexsize,"all.mers");
4359   }
4360 
4361   cells = align_compute_scores_lookback(&ncells,links,fwd_scores,
4362 					mappings,npositions,totalpositions,
4363 					oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
4364 					querystart,queryend,querylength,
4365 
4366 					chroffset,chrhigh,plusp,
4367 
4368 					indexsize,
4369 #ifdef DEBUG9
4370 					queryseq_ptr,
4371 #endif
4372 					localp,skip_repetitive_p,use_canonical_p,non_canonical_penalty,
4373 					debug_graphic_p,favor_right_p,middlep);
4374   /* cells are currently sorted by Cell_score_cmp in get_cells_fwd */
4375 
4376 
4377 #ifdef SEPARATE_FWD_REV
4378   debug1(Linkmatrix_print_both(links,mappings,querylength,npositions,queryseq_ptr,indexsize));
4379 #else
4380   debug1(print_fwd(links,fwd_scores,mappings,querylength,npositions,queryseq_ptr,indexsize));
4381 #endif
4382 
4383   if (ncells == 0) {
4384     all_paths = (List_T) NULL;
4385 
4386   } else {
4387     /* High-scoring paths */
4388     bestscore = cells[0]->score;
4389     debug11(printf("** Looping on %d cells, allowing up to %d alignments, plus any with best score %d\n",
4390 		   ncells,max_nalignments,bestscore));
4391 
4392     if (snps_p == true) {
4393       for (i = 0; i < ncells && (i < max_nalignments || cells[i]->score == bestscore)
4394 	     && cells[i]->score > bestscore - FINAL_SCORE_TOLERANCE; i++) {
4395 	cell = cells[i];
4396 	querypos = cell->querypos;
4397 	hit = cell->hit;
4398 	fwdp = cell->fwdp;
4399 	debug11(printf("Starting subpath %d for rootposition %d with score %d, querypos %d, hit %d, endposition %d\n",
4400 		       i,cell->rootposition,cell->score,querypos,hit,cell->endposition));
4401 
4402 	all_paths = List_push(all_paths,(void *) traceback_one_snps(querypos,hit,links,mappings,queryseq_ptr,
4403 								    chroffset,chrhigh,/*watsonp*/plusp,
4404 #ifdef DEBUG0
4405 								    fwd_scores,indexsize,
4406 #endif
4407 								    pairpool,fwdp));
4408 	npaths++;
4409 	cell->pushedp = true;
4410       }
4411 
4412     } else {
4413       for (i = 0; i < ncells && (i < max_nalignments || cells[i]->score == bestscore)
4414 	     && cells[i]->score > bestscore - FINAL_SCORE_TOLERANCE; i++) {
4415 	cell = cells[i];
4416 	querypos = cell->querypos;
4417 	hit = cell->hit;
4418 	fwdp = cell->fwdp;
4419 	debug11(printf("Starting subpath %d for rootposition %d with score %d, querypos %d, hit %d, endposition %d\n",
4420 		       i,cell->rootposition,cell->score,querypos,hit,cell->endposition));
4421 
4422 #if 0
4423 	if (debug_graphic_p == true) {
4424 	  best_path_dump_R(links,mappings,querypos,hit,fwdp,"best.path");
4425 	  printf("plot(all.mers,col=\"black\",pch=\".\",xlab=\"Query\",ylab=\"Genomic\")\n");
4426 	  printf("points(active.mers,col=\"red\",pch=\".\")\n");
4427 	  printf("points(best.path,col=\"green\",pch=\".\")\n");
4428 	  printf("lines(querypos,minactive,col=\"blue\")\n");
4429 	  printf("lines(querypos,maxactive,col=\"blue\")\n");
4430 	}
4431 #endif
4432 
4433 	all_paths = List_push(all_paths,(void *) traceback_one(querypos,hit,links,mappings,queryseq_ptr,queryuc_ptr,
4434 #ifdef PMAP
4435 							       chroffset,chrhigh,/*watsonp*/plusp,/*lookbackp*/true,
4436 #endif
4437 #ifdef DEBUG0
4438 							       fwd_scores,indexsize,
4439 #endif
4440 							       pairpool,fwdp));
4441 	npaths++;
4442 	cell->pushedp = true;
4443       }
4444     }
4445 
4446 
4447 #ifdef SLOW
4448     if (npaths < max_nalignments) {
4449       /* Non-overlapping paths */
4450       debug11(printf("** Looping on %d cells, looking for non-overlapping paths.  Total paths so far: %d\n",
4451 		     ncells,npaths));
4452       qsort(cells,ncells,sizeof(Cell_T),Cell_interval_cmp);
4453       last_endposition = 0;
4454       if (snps_p == true) {
4455 	for (i = 0; i < ncells && npaths < max_nalignments; i++) {
4456 	  cell = cells[i];
4457 	  if (cell->score > bestscore * NONOVERLAPPING_SCORE_TOLERANCE &&
4458 	      cell->rootposition > last_endposition && cell->pushedp == false) {
4459 	    debug11(printf("Starting subpath %d for rootposition %d with score %d, querypos %d, hit %d, endposition %d\n",
4460 			   i,cell->rootposition,cell->score,querypos,hit,cell->endposition));
4461 	    querypos = cell->querypos;
4462 	    hit = cell->hit;
4463 	    fwdp = cell->fwdp;
4464 	    all_paths = List_push(all_paths,(void *) traceback_one_snps(querypos,hit,links,mappings,queryseq_ptr,
4465 									chroffset,chrhigh,/*watsonp*/plusp,
4466 #ifdef DEBUG0
4467 									fwd_scores,indexsize,
4468 #endif
4469 									pairpool,fwdp));
4470 	    npaths++;
4471 	    cell->pushedp = true;
4472 	    last_endposition = cell->endposition;
4473 	  }
4474 	}
4475 
4476       } else {
4477 	for (i = 0; i < ncells && npaths < max_nalignments; i++) {
4478 	  cell = cells[i];
4479 	  if (cell->score > bestscore * NONOVERLAPPING_SCORE_TOLERANCE &&
4480 	      cell->rootposition > last_endposition && cell->pushedp == false) {
4481 	    debug11(printf("Starting subpath %d for rootposition %d with score %d, querypos %d, hit %d, endposition %d\n",
4482 			   i,cell->rootposition,cell->score,querypos,hit,cell->endposition));
4483 	    querypos = cell->querypos;
4484 	    hit = cell->hit;
4485 	    fwdp = cell->fwdp;
4486 	    all_paths = List_push(all_paths,(void *) traceback_one(querypos,hit,links,mappings,queryseq_ptr,queryuc_ptr,
4487 #ifdef PMAP
4488 								   chroffset,chrhigh,/*watsonp*/plusp,/*lookbackp*/true,
4489 #endif
4490 #ifdef DEBUG0
4491 								   fwd_scores,indexsize,
4492 #endif
4493 								   pairpool,fwdp));
4494 	    npaths++;
4495 	    cell->pushedp = true;
4496 	    last_endposition = cell->endposition;
4497 	  }
4498 	}
4499       }
4500     }
4501 #endif
4502 
4503     debug11(printf("\n"));
4504 
4505 #if 0
4506     /* No need with cellpool */
4507     for (i = 0; i < ncells; i++) {
4508       cell = cells[i];
4509       Cell_free(&cell);
4510     }
4511 #endif
4512     FREE(cells);
4513   }
4514 
4515 
4516   if (oned_matrix_p == true) {
4517     Linkmatrix_1d_free(&links);
4518     intmatrix_1d_free(&fwd_scores);
4519   } else {
4520     Linkmatrix_2d_free(&links,querylength);
4521     intmatrix_2d_free(&fwd_scores,querylength);
4522   }
4523 
4524 #if 0
4525   for (p = all_paths; p != NULL; p = List_next(p)) {
4526     Pair_dump_list(List_head(p),/*zerobasedp*/true);
4527     printf("\n");
4528   }
4529 #endif
4530 
4531   return all_paths;
4532 }
4533 
4534 
4535 
4536 /* Returns celllist */
4537 /* For PMAP, indexsize is in aa. */
4538 static Cell_T *
align_compute_scores_lookforward(int * ncells,struct Link_T ** links,int ** fwd_scores,Chrpos_T ** mappings,int * npositions,int totalpositions,bool oned_matrix_p,Chrpos_T * minactive,Chrpos_T * maxactive,int * firstactive,int * nactive,Cellpool_T cellpool,int querystart,int queryend,int querylength,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int indexsize,char * queryseq_ptr,bool anchoredp,int anchor_querypos,Chrpos_T anchor_position,bool localp,bool skip_repetitive_p,bool use_canonical_p,int non_canonical_penalty,bool debug_graphic_p,bool favor_right_p,bool middlep)4539 align_compute_scores_lookforward (int *ncells, struct Link_T **links, int **fwd_scores,
4540 				  Chrpos_T **mappings, int *npositions, int totalpositions,
4541 				  bool oned_matrix_p, Chrpos_T *minactive, Chrpos_T *maxactive,
4542 				  int *firstactive, int *nactive, Cellpool_T cellpool,
4543 				  int querystart, int queryend, int querylength,
4544 				  Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
4545 				  int indexsize,
4546 #ifdef DEBUG9
4547 				  char *queryseq_ptr,
4548 #endif
4549 #ifdef MOVE_TO_STAGE3
4550 				  bool anchoredp, int anchor_querypos, Chrpos_T anchor_position,
4551 #endif
4552 				  bool localp, bool skip_repetitive_p,
4553 				  bool use_canonical_p, int non_canonical_penalty,
4554 				  bool debug_graphic_p, bool favor_right_p, bool middlep) {
4555 #if 0
4556   bool anchoredp = false;
4557   int anchor_querypos = 0;
4558   Chrpos_T anchor_position = 0;
4559 #endif
4560 
4561   Cell_T *cells;
4562   Link_T currlink;
4563   int curr_querypos, indexsize_nt, indexsize_query, hit, nhits, low_hit, high_hit;
4564   int nskipped, min_hits, specific_querypos, specific_low_hit, specific_high_hit, next_querypos;
4565   Intlist_T processed = NULL;
4566   int best_overall_score = 0;
4567   int grand_fwd_score, grand_fwd_querypos, grand_fwd_hit, best_fwd_hit, best_fwd_score;
4568 #ifdef SEPARATE_FWD_REV
4569   int grand_rev_score, grand_rev_querypos, grand_rev_hit, best_rev_hit, best_rev_score;
4570 #ifdef DEBUG9
4571   int rev_tracei = 0;
4572 #endif
4573 #endif
4574   int **active;
4575   Chrpos_T position, prevposition;
4576   int fwd_tracei = 0;
4577 #if 0
4578   int *lastGT, *lastAG;
4579 #ifndef PMAP
4580   int *lastCT, *lastAC;
4581 #endif
4582 #endif
4583 #ifdef DEBUG9
4584   Link_T prevlink;
4585   char *oligo;
4586 #endif
4587 #ifdef DEBUG12
4588   Link_T termlink = NULL;
4589 #endif
4590 
4591 #ifdef PMAP
4592   indexsize_nt = indexsize*3;
4593 #else
4594   indexsize_nt = indexsize;
4595 #endif
4596   indexsize_query = indexsize;	/* Use when evaluating across query positions */
4597 
4598 
4599 #ifdef DEBUG9
4600   oligo = (char *) CALLOC(indexsize+1,sizeof(char));
4601 #endif
4602   debug0(printf("Lookforward: querystart = %d, queryend = %d, indexsize = %d\n",querystart,queryend,indexsize));
4603 
4604   if (oned_matrix_p == true) {
4605     active = intmatrix_1d_new(querylength,npositions,totalpositions);
4606   } else {
4607     active = intmatrix_2d_new(querylength,npositions);
4608   }
4609 
4610 #if 0
4611   firstactive = (int *) MALLOC(querylength * sizeof(int));
4612   nactive = (int *) MALLOC(querylength * sizeof(int));
4613 #endif
4614 
4615   /* Initialize */
4616   for (curr_querypos = querylength - 1; curr_querypos > queryend; curr_querypos--) {
4617     debug6(printf("5.  Initializing firstactive for querypos %d to be -1\n",curr_querypos));
4618     firstactive[curr_querypos] = -1;
4619     nactive[curr_querypos] = 0;
4620   }
4621   while (curr_querypos >= querystart && npositions[curr_querypos] <= 0) {
4622     debug6(printf("6.  Initializing firstactive for querypos %d to be -1\n",curr_querypos));
4623     debug9(printf("Skipping querypos %d which has no positions\n",curr_querypos));
4624     firstactive[curr_querypos] = -1;
4625     nactive[curr_querypos] = 0;
4626     curr_querypos--;
4627   }
4628 
4629 #ifdef MOVE_TO_STAGE3
4630   if (anchoredp == true) {
4631     /* Guaranteed to find a hit */
4632     hit = binary_search(0,npositions[anchor_querypos],mappings[anchor_querypos],/*goal*/anchor_position);
4633     if (mappings[anchor_querypos] == NULL) {
4634       printf("mappings at anchor_querypos %d is NULL.  mappings = %p\n",anchor_querypos,mappings);
4635       abort();
4636     }
4637 
4638     currlink = &(links[anchor_querypos][hit]);
4639 #ifndef SEPARATE_FWD_REV
4640     currlink->fwd_pos = currlink->fwd_hit = -1;
4641     currlink->fwd_consecutive = EXON_DEFN;
4642     currlink->fwd_tracei = 0;
4643     fwd_scores[anchor_querypos][hit] = indexsize_nt;
4644 #else
4645     fprintf(stderr,"Not implemented yet\n");
4646     abort();
4647 #endif
4648 
4649     debug6(printf("Setting firstactive for anchorpos %d to be %d\n",anchor_querypos,hit));
4650     firstactive[anchor_querypos] = hit;
4651     nactive[anchor_querypos] = 1;
4652     active[anchor_querypos][hit] = -1;
4653 
4654     debug6(printf("Pushing anchorpos %d as processed\n",anchor_querypos));
4655     processed = Intlist_push(processed,anchor_querypos);
4656 
4657   } else
4658 #endif
4659 
4660   if (curr_querypos >= querystart) {
4661     for (hit = npositions[curr_querypos] - 1; hit >= 0; --hit) {
4662       currlink = &(links[curr_querypos][hit]);
4663 #ifndef SEPARATE_FWD_REV
4664       currlink->fwd_pos = currlink->fwd_hit = -1;
4665       currlink->fwd_consecutive = indexsize_nt;
4666       currlink->fwd_tracei = -1;
4667       /* currlink->fwd_rootnlinks = 1; */
4668       fwd_scores[curr_querypos][hit] = indexsize_nt;
4669 #else
4670       currlink->fwd_pos = currlink->fwd_hit = -1;
4671       currlink->fwd_score = indexsize_nt;
4672       currlink->fwd_consecutive = indexsize_nt;
4673       currlink->fwd_tracei = -1;
4674       /* currlink->fwd_rootnlinks = 1; */
4675       if (splicingp == true) {
4676 	currlink->rev_pos = currlink->rev_hit = -1;
4677 	currlink->rev_consecutive = indexsize_nt;
4678 	currlink->rev_tracei = -1;
4679 	/* currlink->rev_rootnlinks = 1; */
4680 	rev_scores[curr_querypos][hit] = indexsize_nt;
4681       }
4682 #endif
4683     }
4684     revise_active_lookforward(active,firstactive,nactive,0,npositions[curr_querypos],fwd_scores,curr_querypos);
4685   }
4686 
4687 
4688   grand_fwd_score = 0;
4689   grand_fwd_querypos = -1;
4690   grand_fwd_hit = -1;
4691 #ifdef SEPARATE_FWD_REV
4692   if (splicingp == true) {
4693     grand_rev_score = 0;
4694     grand_rev_querypos = -1;
4695     grand_rev_hit = -1;
4696   }
4697 #endif
4698 
4699   nskipped = 0;
4700   min_hits = 1000000;
4701   specific_querypos = -1;
4702 
4703   /* curr_querypos -= 1; -- this causes curr_querypos at queryend to be ignored */
4704   while (curr_querypos >= querystart) {
4705     best_fwd_score = 0;
4706     best_fwd_hit = -1;
4707 #ifdef SEPARATE_FWD_REV
4708     best_rev_score = 0;
4709     best_rev_hit = -1;
4710 #endif
4711 
4712     debug9(printf("Positions at querypos %d (reverse order):",curr_querypos);
4713 	   for (hit = npositions[curr_querypos] - 1; hit >= 0; --hit) {
4714 	     printf(" %u",mappings[curr_querypos][hit]);
4715 	   }
4716 	   printf("\n");
4717 	   );
4718 
4719     hit = npositions[curr_querypos] - 1;
4720     while (hit >= 0 && mappings[curr_querypos][hit] > maxactive[curr_querypos]) {
4721       --hit;
4722     }
4723     high_hit = hit + 1;
4724     while (hit >= 0 && mappings[curr_querypos][hit] >= minactive[curr_querypos]) {
4725       --hit;
4726     }
4727     low_hit = hit + 1;
4728     debug9(printf("Querypos %d has hit %d..%d out of %d (minactive = %u, maxactive = %u)\n",
4729 		  curr_querypos,high_hit-1,low_hit,npositions[curr_querypos],minactive[curr_querypos],maxactive[curr_querypos]));
4730 
4731     /* Can't use nactive yet, so use high_hit - low_hit */
4732     if (skip_repetitive_p && high_hit - low_hit >= MAX_NACTIVE && nskipped <= MAX_SKIPPED) { /* Previously turned off */
4733       debug6(printf("Too many active (%d - %d) at querypos %d.  Setting firstactive to be -1\n",high_hit,low_hit,curr_querypos));
4734       firstactive[curr_querypos] = -1;
4735       nactive[curr_querypos] = 0;
4736       nskipped++;
4737       debug9(printf("  %d skipped because of %d hits\n",nskipped,high_hit - low_hit + 1));
4738 
4739       /* Store most specific querypos in section of skipped */
4740       if (high_hit - low_hit < min_hits) {
4741 	min_hits = high_hit - low_hit;
4742 	specific_querypos = curr_querypos;
4743 	specific_low_hit = low_hit;
4744 	specific_high_hit = high_hit;
4745       }
4746       curr_querypos--;
4747 
4748     } else {
4749       if (nskipped > MAX_SKIPPED) {
4750 	debug9(printf("Too many skipped.  Going back to specific querypos %d\n",specific_querypos));
4751 	next_querypos = curr_querypos;
4752 	curr_querypos = specific_querypos;
4753 	low_hit = specific_low_hit;
4754 	high_hit = specific_high_hit;
4755       } else {
4756 	next_querypos = curr_querypos - 1;
4757       }
4758 
4759       if ((nhits = high_hit - low_hit) > 0) {
4760 	if (nhits == 1) {
4761 	  currlink = &(links[curr_querypos][low_hit]);
4762 	  position = mappings[curr_querypos][low_hit];
4763 
4764 	  debug9(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
4765 	  debug9(oligo[indexsize] = '\0');
4766 	  debug9(printf("Finding link looking forward from querypos %d,%d at %ux%d (%s).  prev_querypos was %d\n",
4767 			curr_querypos,low_hit,position,active[curr_querypos][low_hit],oligo,processed ? Intlist_head(processed) : -1));
4768 	  score_querypos_lookforward_one(&fwd_tracei,currlink,curr_querypos,low_hit,position,
4769 					 links,fwd_scores,mappings,active,firstactive,
4770 					 chroffset,chrhigh,plusp,
4771 					 indexsize,processed,localp,splicingp,use_canonical_p,
4772 					 non_canonical_penalty);
4773 	  if (fwd_scores[curr_querypos][low_hit] > 0) {
4774 	    debug9(printf("Single hit at low_hit %d has score %d\n",low_hit,fwd_scores[curr_querypos][low_hit]));
4775 	    best_fwd_score = fwd_scores[curr_querypos][low_hit];
4776 	    best_fwd_hit = low_hit;
4777 	  }
4778 
4779 	} else {
4780 	  debug9(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
4781 	  debug9(oligo[indexsize] = '\0');
4782 	  debug9(printf("Finding links looking forward from querypos %d,%d..%d at (%u..%u) (%s).  prev_querypos was %d\n",
4783 			curr_querypos,high_hit-1,low_hit,mappings[curr_querypos][high_hit-1],mappings[curr_querypos][low_hit],
4784 			oligo,processed ? Intlist_head(processed) : -1));
4785 
4786 	  score_querypos_lookforward_mult(&fwd_tracei,low_hit,high_hit,curr_querypos,
4787 					  /*positions*/&(mappings[curr_querypos][low_hit]),
4788 					  links,fwd_scores,mappings,active,firstactive,chroffset,chrhigh,plusp,
4789 					  indexsize,processed,localp,splicingp,use_canonical_p,
4790 					  non_canonical_penalty);
4791 
4792 	  debug9(printf("Checking hits from high_hit %d to low_hit %d\n",high_hit,low_hit));
4793 	  for (hit = high_hit - 1; hit >= low_hit; hit--) {
4794 	    debug9(printf("Hit %d has score %d\n",hit,fwd_scores[curr_querypos][hit]));
4795 	    if (fwd_scores[curr_querypos][hit] > best_fwd_score) {
4796 	      best_fwd_score = fwd_scores[curr_querypos][hit];
4797 	      best_fwd_hit = hit;
4798 	    }
4799 	  }
4800 	}
4801 
4802 	if (best_fwd_score > best_overall_score) {
4803 	  best_overall_score = best_fwd_score;
4804 	}
4805 
4806 	nskipped = 0;
4807 	min_hits = 1000000;
4808 	specific_querypos = -1;
4809 
4810 #ifndef SEPARATE_FWD_REV
4811 	debug9(printf("Overall result at querypos %d yields best_fwd_hit %d\n",
4812 		      curr_querypos,best_fwd_hit));
4813 #else
4814 	debug9(printf("Overall result at querypos %d yields best_fwd_hit %d and best_rev_hit %d\n",
4815 		      curr_querypos,best_fwd_hit,best_rev_hit));
4816 #endif
4817 
4818 #if 1
4819 	/* Previously, thought that using this code causes misses in
4820 	   some alignments, but not using it causes missing end
4821 	   exons */
4822 	if (middlep == false && best_fwd_hit < 0) {
4823 	  /* Allow for a new start */
4824 	  for (hit = 0; hit < npositions[curr_querypos]; hit++) {
4825 	    currlink = &(links[curr_querypos][hit]);
4826 #ifndef SEPARATE_FWD_REV
4827 	    currlink->fwd_pos = currlink->fwd_hit = -1;
4828 	    currlink->fwd_consecutive = indexsize_nt;
4829 	    currlink->fwd_tracei = -1;
4830 	    /* currlink->fwd_rootnlinks = 1; */
4831 	    fwd_scores[curr_querypos][hit] = indexsize_nt;
4832 #else
4833 	    currlink->fwd_pos = currlink->fwd_hit = -1;
4834 	    currlink->fwd_consecutive = indexsize_nt;
4835 	    currlink->fwd_tracei = -1;
4836 	    /* currlink->fwd_rootnlinks = 1; */
4837 	    fwd_scores[curr_querypos][hit] = indexsize_nt;
4838 	    if (splicingp == true) {
4839 	      currlink->rev_pos = currlink->rev_hit = -1;
4840 	      currlink->rev_consecutive = indexsize_nt;
4841 	      currlink->rev_tracei = -1;
4842 	      /* currlink->rev_rootnlinks = 1; */
4843 	      rev_scores[curr_querypos][hit] = indexsize_nt;
4844 	    }
4845 #endif
4846 	  }
4847 	}
4848 #endif
4849 
4850 	if (splicingp == true && best_fwd_hit >= 0 && links[curr_querypos][best_fwd_hit].fwd_hit < 0 &&
4851 	    grand_fwd_querypos <= querylength - indexsize_query && curr_querypos + indexsize_query <= grand_fwd_querypos) {
4852 	  if ((best_fwd_score = fwd_scores[grand_fwd_querypos][grand_fwd_hit] - (grand_fwd_querypos - curr_querypos)) > 0) {
4853 	    prevposition = mappings[grand_fwd_querypos][grand_fwd_hit];
4854 	    debug12(printf("Considering prevposition %u to position %u as a grand fwd lookforward\n",prevposition,position));
4855 	    for (hit = high_hit - 1; hit >= low_hit; --hit) {
4856 	      if ((position = mappings[curr_querypos][hit]) + maxintronlen < prevposition) {
4857 		debug12(printf("  => Too long\n"));
4858 	      } else if (position + indexsize_nt <= prevposition) {
4859 		currlink = &(links[curr_querypos][hit]);
4860 		currlink->fwd_consecutive = indexsize_nt;
4861 		currlink->fwd_pos = grand_fwd_querypos;
4862 		currlink->fwd_hit = grand_fwd_hit;
4863 		currlink->fwd_tracei = ++fwd_tracei;
4864 		/* currlink->fwd_rootnlinks = 1; */
4865 		fwd_scores[curr_querypos][hit] = best_fwd_score;
4866 #ifdef DEBUG9
4867 		prevlink = &(links[grand_fwd_querypos][grand_fwd_hit]);
4868 		currlink->fwd_intronnfwd = prevlink->fwd_intronnfwd;
4869 		currlink->fwd_intronnrev = prevlink->fwd_intronnrev;
4870 		currlink->fwd_intronnunk = prevlink->fwd_intronnunk + 1;
4871 #endif
4872 	      }
4873 	    }
4874 	    debug12(printf("At querypos %d, setting all fwd hits to point back to grand_fwd %d,%d with a score of %d\n",
4875 			   curr_querypos,grand_fwd_querypos,grand_fwd_hit,fwd_scores[grand_fwd_querypos][grand_fwd_hit]));
4876 	  }
4877 	}
4878 
4879 	/* Use >= to favor longer path in case of ties */
4880 	if (best_fwd_hit >= 0 && best_fwd_score >= grand_fwd_score &&
4881 	    links[curr_querypos][best_fwd_hit].fwd_consecutive > EXON_DEFN) {
4882 	  grand_fwd_score = best_fwd_score;
4883 	  grand_fwd_querypos = curr_querypos;
4884 	  grand_fwd_hit = best_fwd_hit;
4885 	  debug12(termlink = &(links[curr_querypos][best_fwd_hit]));
4886 	  debug12(printf("At querypos %d, revising grand fwd to be hit %d with score of %d (pointing back to %d,%d)\n",
4887 			 curr_querypos,best_fwd_hit,best_fwd_score,termlink->fwd_pos,termlink->fwd_hit));
4888 	}
4889 
4890 #ifdef SEPARATE_FWD_REV
4891 	if (best_rev_score > best_overall_score) {
4892 	  best_overall_score = best_rev_score;
4893 	}
4894 
4895 	if (splicingp == false || use_canonical_p == false) {
4896 	  /* rev scores should be the same as the fwd scores */
4897 	} else {
4898 	  if (best_rev_hit >= 0 && links[curr_querypos][best_rev_hit].rev_hit < 0 &&
4899 	      grand_rev_querypos <= querylength - indexsize_query && curr_querypos + indexsize_query <= grand_rev_querypos) {
4900 	    prevlink = &(links[grand_rev_querypos][grand_rev_hit]);
4901 	    if ((best_rev_score = prevlink->rev_score - (grand_rev_querypos - curr_querypos)) > 0) {
4902 	      prevposition = mappings[grand_rev_querypos][grand_rev_hit];
4903 	      debug12(printf("Considering prevposition %u to position %u as a grand rev lookforward\n",prevposition,position));
4904 	      for (hit = high_hit - 1; hit >= low_hit; --hit) {
4905 		if ((position = mappings[curr_querypos][hit]) + maxintronlen < prevposition) {
4906 		  debug12(printf("  => Too long\n"));
4907 		} else if (position + indexsize_nt <= prevposition) {
4908 		  currlink = &(links[curr_querypos][hit]);
4909 		  currlink->rev_consecutive = indexsize_nt;
4910 		  /* currlink->rev_rootnlinks = 1; */
4911 		  currlink->rev_pos = grand_rev_querypos;
4912 		  currlink->rev_hit = grand_rev_hit;
4913 		  currlink->rev_score = best_rev_score;
4914 #ifdef DEBUG9
4915 		  currlink->rev_tracei = ++rev_tracei;
4916 		  currlink->rev_intronnrev = prevlink->rev_intronnfwd;
4917 		  currlink->rev_intronnrev = prevlink->rev_intronnrev;
4918 		  currlink->rev_intronnunk = prevlink->rev_intronnunk + 1;
4919 #endif
4920 		}
4921 	      }
4922 	      debug12(printf("At querypos %d, setting all rev hits to point back to grand_rev %d,%d with a score of %d\n",
4923 			     curr_querypos,grand_rev_querypos,grand_rev_hit,prevlink->rev_score));
4924 	    }
4925 	  }
4926 
4927 	  /* Use >= to favor longer path in case of ties */
4928 	  if (best_rev_hit >= 0 && best_rev_score >= grand_rev_score &&
4929 	      links[curr_querypos][best_rev_hit].rev_consecutive > EXON_DEFN) {
4930 	    grand_rev_score = best_rev_score;
4931 	    grand_rev_querypos = curr_querypos;
4932 	    grand_rev_hit = best_rev_hit;
4933 	  }
4934 	}
4935 #endif
4936       }
4937 
4938       revise_active_lookforward(active,firstactive,nactive,low_hit,high_hit,fwd_scores,curr_querypos);
4939 
4940       /* Need to push curr_querypos, even if firstactive[curr_querypos] == -1 */
4941       /* Want to skip npositions[curr_querypos] == 0, so we can find adjacent despite mismatch or overabundance */
4942       if (npositions[curr_querypos] > 0) {
4943 	debug6(printf("Pushing querypos %d onto processed\n",curr_querypos));
4944 	processed = Intlist_push(processed,curr_querypos);
4945       }
4946       curr_querypos = next_querypos;
4947     }
4948   }
4949   debug9(printf("End of loop lookforward\n"));
4950 
4951 
4952   Intlist_free(&processed);
4953 
4954   /* These are the final active oligomers, after pruning by score */
4955   if (debug_graphic_p == true) {
4956     mappings_dump_R(mappings,npositions,querylength,active,firstactive,indexsize,"active.mers");
4957   }
4958 
4959 #if 0
4960   FREE(nactive);
4961   FREE(firstactive);
4962 #endif
4963 
4964   if (oned_matrix_p == true) {
4965     intmatrix_1d_free(&active);
4966   } else {
4967     intmatrix_2d_free(&active,querylength);
4968   }
4969 
4970 
4971   /* Grand winners */
4972   debug12(printf("Finding grand winners, using root position method\n"));
4973 #ifdef SEPARATE_FWD_REV
4974   if (splicingp == false || use_canonical_p == false) {
4975     cells = Linkmatrix_get_cells_fwd(&(*ncells),links,querystart,queryend,npositions,
4976 				     favor_right_p,cellpool);
4977   } else {
4978     cells = Linkmatrix_get_cells_both(&(*ncells),links,querystart,queryend,npositions,
4979 				      indexsize,best_overall_score,favor_right_p,cellpool);
4980   }
4981 #else
4982   cells = get_cells_fwd(&(*ncells),links,fwd_scores,mappings,querystart,queryend,npositions,
4983 			favor_right_p,cellpool);
4984 #endif
4985 
4986   debug9(FREE(oligo));
4987 
4988   return cells;
4989 }
4990 
4991 
4992 /* Performs dynamic programming.  For PMAP, indexsize is in aa. */
4993 static List_T
align_compute_lookforward(Chrpos_T ** mappings,int * npositions,int totalpositions,bool oned_matrix_p,Chrpos_T * minactive,Chrpos_T * maxactive,int * firstactive,int * nactive,Cellpool_T cellpool,char * queryseq_ptr,char * queryuc_ptr,int querylength,int querystart,int queryend,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int indexsize,Pairpool_T pairpool,bool anchoredp,int anchor_querypos,Chrpos_T anchor_position,bool localp,bool skip_repetitive_p,bool use_canonical_p,int non_canonical_penalty,bool favor_right_p,bool middlep,int max_nalignments,bool debug_graphic_p)4994 align_compute_lookforward (Chrpos_T **mappings, int *npositions, int totalpositions,
4995 			   bool oned_matrix_p, Chrpos_T *minactive, Chrpos_T *maxactive,
4996 			   int *firstactive, int *nactive, Cellpool_T cellpool,
4997 			   char *queryseq_ptr, char *queryuc_ptr, int querylength, int querystart, int queryend,
4998 
4999 			   Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
5000 			   int indexsize, Pairpool_T pairpool,
5001 #ifdef MOVE_TO_STAGE3
5002 			   bool anchoredp, int anchor_querypos, Chrpos_T anchor_position,
5003 #endif
5004 			   bool localp, bool skip_repetitive_p, bool use_canonical_p, int non_canonical_penalty,
5005 			   bool favor_right_p, bool middlep, int max_nalignments, bool debug_graphic_p) {
5006   List_T all_paths = NULL;
5007   int npaths = 0;
5008   struct Link_T **links;
5009   int **fwd_scores;
5010 
5011 #if 0
5012   bool anchoredp = false;
5013   int anchor_querypos = 0;
5014   Chrpos_T anchor_position = 0;
5015 #endif
5016 
5017   Cell_T *cells, cell;
5018   int ncells, i;
5019 
5020   bool fwdp;
5021   int querypos, hit;
5022   int bestscore;
5023 #ifdef SLOW
5024   int last_endposition;
5025 #endif
5026 
5027 
5028   if (oned_matrix_p == true) {
5029     links = Linkmatrix_1d_new(querylength,npositions,totalpositions);
5030     fwd_scores = intmatrix_1d_new(querylength,npositions,totalpositions);
5031   } else {
5032     links = Linkmatrix_2d_new(querylength,npositions);
5033     fwd_scores = intmatrix_2d_new(querylength,npositions);
5034   }
5035 
5036   /* These are all oligomers */
5037   if (debug_graphic_p == true) {
5038     mappings_dump_R(mappings,npositions,querylength,/*active*/NULL,/*firstactive*/NULL,indexsize,"all.mers");
5039   }
5040 
5041   cells = align_compute_scores_lookforward(&ncells,links,fwd_scores,
5042 					   mappings,npositions,totalpositions,
5043 					   oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
5044 					   querystart,queryend,querylength,
5045 
5046 					   chroffset,chrhigh,plusp,
5047 
5048 					   indexsize,
5049 #ifdef DEBUG9
5050 					   queryseq_ptr,
5051 #endif
5052 					   localp,skip_repetitive_p,use_canonical_p,non_canonical_penalty,
5053 					   debug_graphic_p,favor_right_p,middlep);
5054   /* cells are currently sorted by Cell_score_cmp in get_cells_fwd */
5055 
5056 #ifdef SEPARATE_FWD_REV
5057   debug1(Linkmatrix_print_both(links,mappings,querylength,npositions,queryseq_ptr,indexsize));
5058 #else
5059   debug1(print_fwd(links,fwd_scores,mappings,querylength,npositions,queryseq_ptr,indexsize));
5060 #endif
5061 
5062   if (ncells == 0) {
5063     all_paths = (List_T) NULL;
5064 
5065   } else {
5066     /* High-scoring paths */
5067     bestscore = cells[0]->score;
5068     debug11(printf("** Looping on %d cells, allowing up to %d alignments, plus any with best score %d\n",
5069 		   ncells,max_nalignments,bestscore));
5070 
5071     if (snps_p == true) {
5072       for (i = 0; i < ncells && (i < max_nalignments || cells[i]->score == bestscore)
5073 	     && cells[i]->score > bestscore - FINAL_SCORE_TOLERANCE; i++) {
5074 	cell = cells[i];
5075 	if (cell->pushedp == false) {
5076 	  querypos = cell->querypos;
5077 	  hit = cell->hit;
5078 	  fwdp = cell->fwdp;
5079 	  debug11(printf("Starting subpath %d at rootposition %d with score %d, querypos %d, hit %d, endposition %d\n",
5080 			 i,cell->rootposition,cell->score,querypos,hit,cell->endposition));
5081 	  all_paths = List_push(all_paths,(void *) traceback_one_snps(querypos,hit,links,mappings,queryseq_ptr,
5082 								      chroffset,chrhigh,/*watsonp*/plusp,
5083 #ifdef DEBUG0
5084 								      fwd_scores,indexsize,
5085 #endif
5086 								      pairpool,fwdp));
5087 	  npaths++;
5088 	  cell->pushedp = true;
5089 	}
5090       }
5091 
5092     } else {
5093       for (i = 0; i < ncells && (i < max_nalignments || cells[i]->score == bestscore)
5094 	     && cells[i]->score > bestscore - FINAL_SCORE_TOLERANCE; i++) {
5095 	cell = cells[i];
5096 	if (cell->pushedp == false) {
5097 	  querypos = cell->querypos;
5098 	  hit = cell->hit;
5099 	  fwdp = cell->fwdp;
5100 	  debug11(printf("Starting subpath %d at rootposition %d with score %d, querypos %d, hit %d, endposition %d\n",
5101 			 i,cell->rootposition,cell->score,querypos,hit,cell->endposition));
5102 
5103 #if 0
5104 	  if (debug_graphic_p == true) {
5105 	    /* best_path_dump_R(links,mappings,querypos,hit,fwdp,"best.path"); */
5106 	    printf("plot(all.mers,col=\"black\",pch=\".\",xlab=\"Query\",ylab=\"Genomic\")\n");
5107 	    printf("points(active.mers,col=\"red\",pch=\".\")\n");
5108 	    printf("points(best.path,col=\"green\",pch=\".\")\n");
5109 	    printf("lines(querypos,minactive,col=\"blue\")\n");
5110 	    printf("lines(querypos,maxactive,col=\"blue\")\n");
5111 	  }
5112 #endif
5113 
5114 	  all_paths = List_push(all_paths,(void *) traceback_one(querypos,hit,links,mappings,queryseq_ptr,queryuc_ptr,
5115 #ifdef PMAP
5116 								 chroffset,chrhigh,/*watsonp*/plusp,/*lookbackp*/false,
5117 #endif
5118 #ifdef DEBUG0
5119 								 fwd_scores,indexsize,
5120 #endif
5121 								 pairpool,fwdp));
5122 	  npaths++;
5123 	  cell->pushedp = true;
5124 	}
5125       }
5126 
5127     }
5128 
5129 #ifdef SLOW
5130     if (npaths < max_nalignments) {
5131       /* Non-overlapping paths */
5132       debug11(printf("** Looping on %d cells, looking for non-overlapping paths.  Total paths so far: %d\n",
5133 		     ncells,npaths));
5134       qsort(cells,ncells,sizeof(Cell_T),Cell_interval_cmp);
5135       last_endposition = 0;
5136       if (snps_p == true) {
5137 	for (i = 0; i < ncells && npaths < max_nalignments; i++) {
5138 	  cell = cells[i];
5139 	  if (cell->score > bestscore * NONOVERLAPPING_SCORE_TOLERANCE &&
5140 	      cell->rootposition > last_endposition && cell->pushedp == false) {
5141 	    debug11(printf("Starting subpath %d for rootposition %d with score %d, querypos %d, hit %d, endposition %d\n",
5142 			   i,cell->rootposition,cell->score,querypos,hit,cell->endposition));
5143 	    querypos = cell->querypos;
5144 	    hit = cell->hit;
5145 	    fwdp = cell->fwdp;
5146 	    all_paths = List_push(all_paths,(void *) traceback_one_snps(querypos,hit,links,mappings,queryseq_ptr,
5147 									chroffset,chrhigh,/*watsonp*/plusp,
5148 #ifdef DEBUG0
5149 									fwd_scores,indexsize,
5150 #endif
5151 									pairpool,fwdp));
5152 	    npaths++;
5153 	    cell->pushedp = true;
5154 	    last_endposition = cell->endposition;
5155 	  }
5156 	}
5157 
5158       } else {
5159 	for (i = 0; i < ncells && npaths < max_nalignments; i++) {
5160 	  cell = cells[i];
5161 	  if (cell->score > bestscore * NONOVERLAPPING_SCORE_TOLERANCE &&
5162 	      cell->rootposition > last_endposition && cell->pushedp == false) {
5163 	    debug11(printf("Starting subpath %d for rootposition %d with score %d, querypos %d, hit %d, endposition %d\n",
5164 			   i,cell->rootposition,cell->score,querypos,hit,cell->endposition));
5165 	    querypos = cell->querypos;
5166 	    hit = cell->hit;
5167 	    fwdp = cell->fwdp;
5168 	    all_paths = List_push(all_paths,(void *) traceback_one(querypos,hit,links,mappings,queryseq_ptr,queryuc_ptr,
5169 #ifdef PMAP
5170 								   chroffset,chrhigh,/*watsonp*/plusp,/*lookbackp*/false,
5171 #endif
5172 #ifdef DEBUG0
5173 								   fwd_scores,indexsize,
5174 #endif
5175 								   pairpool,fwdp));
5176 	    npaths++;
5177 	    cell->pushedp = true;
5178 	    last_endposition = cell->endposition;
5179 	  }
5180 	}
5181       }
5182     }
5183 #endif
5184 
5185     debug11(printf("\n"));
5186 
5187 #if 0
5188     /* No need with cellpool */
5189     for (i = 0; i < ncells; i++) {
5190       cell = cells[i];
5191       Cell_free(&cell);
5192     }
5193 #endif
5194     FREE(cells);
5195   }
5196 
5197 
5198   if (oned_matrix_p == true) {
5199     Linkmatrix_1d_free(&links);
5200     intmatrix_1d_free(&fwd_scores);
5201   } else {
5202     Linkmatrix_2d_free(&links,querylength);
5203     intmatrix_2d_free(&fwd_scores,querylength);
5204   }
5205 
5206 #if 0
5207   for (p = all_paths; p != NULL; p = List_next(p)) {
5208     Pair_dump_list(List_head(p),/*zerobasedp*/true);
5209     printf("\n");
5210   }
5211 #endif
5212 
5213   return all_paths;
5214 }
5215 
5216 
5217 #if 0
5218 /* Modified from stage3.c */
5219 static void
5220 get_splicesite_probs (double *left_prob, double *right_prob,
5221 		      Chrpos_T left_genomepos, Chrpos_T right_genomepos,
5222 		      Univcoord_T chroffset, Univcoord_T chrhigh, int genestrand, bool watsonp) {
5223   Univcoord_T splicesitepos;
5224 
5225   if (watsonp == true) {
5226     splicesitepos = chroffset + left_genomepos;
5227     if (genestrand > 0) {
5228       *left_prob = Maxent_hr_donor_prob(splicesitepos /*?*/+ 1,chroffset);
5229       debug5(printf("1. donor splicesitepos is %u (%u), prob %f\n",
5230 		    splicesitepos,splicesitepos-chroffset,*left_prob));
5231 
5232     } else {
5233       *left_prob = Maxent_hr_antiacceptor_prob(splicesitepos /**/+ 1,chroffset);
5234       debug5(printf("2. antiacceptor splicesitepos is %u (%u), prob %f\n",
5235 		    splicesitepos,splicesitepos-chroffset,*left_prob));
5236 
5237     }
5238   } else {
5239     splicesitepos = chrhigh - left_genomepos + 1;
5240     if (genestrand > 0) {
5241       *left_prob = Maxent_hr_acceptor_prob(splicesitepos /*?*/- 1,chroffset);
5242       debug5(printf("4. acceptor splicesitepos is %u (%u), prob %f\n",
5243 		    splicesitepos,splicesitepos-chroffset,*left_prob));
5244     } else {
5245       *left_prob = Maxent_hr_antidonor_prob(splicesitepos /**/- 1,chroffset);
5246       debug5(printf("3. antidonor splicesitepos is %u (%u), prob %f\n",
5247 		    splicesitepos,splicesitepos-chroffset,*left_prob));
5248     }
5249   }
5250 
5251   if (watsonp == true) {
5252     splicesitepos = chroffset + right_genomepos + 1;
5253     if (genestrand > 0) {
5254       *right_prob = Maxent_hr_acceptor_prob(splicesitepos /*?*/- 1,chroffset);
5255       debug5(printf("5. acceptor splicesitepos is %u (%u), prob %f\n",
5256 		    splicesitepos,splicesitepos-chroffset,*right_prob));
5257     } else {
5258       *right_prob = Maxent_hr_antidonor_prob(splicesitepos /**/- 1,chroffset);
5259       debug5(printf("6. antidonor splicesitepos is %u (%u), prob %f\n",
5260 		    splicesitepos,splicesitepos-chroffset,*right_prob));
5261 
5262     }
5263   } else {
5264     splicesitepos = chrhigh - right_genomepos;
5265     if (genestrand > 0) {
5266       *right_prob = Maxent_hr_donor_prob(splicesitepos /*?*/+ 1,chroffset);
5267       debug5(printf("8. donor splicesitepos is %u (%u), prob %f\n",
5268 		    splicesitepos,splicesitepos-chroffset,*right_prob));
5269     } else {
5270       *right_prob = Maxent_hr_antiacceptor_prob(splicesitepos /**/+ 1,chroffset);
5271       debug5(printf("7. antiacceptor splicesitepos is %u (%u), prob %f\n",
5272 		    splicesitepos,splicesitepos-chroffset,*right_prob));
5273     }
5274   }
5275 
5276   return;
5277 }
5278 #endif
5279 
5280 
5281 /* queryseq_ptr is NULL for PMAP.  querypos here is in nt. */
5282 static List_T
convert_to_nucleotides(List_T path,char * queryseq_ptr,char * queryuc_ptr,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp,int query_offset,Pairpool_T pairpool,int indexsize_nt,bool include_gapholders_p)5283 convert_to_nucleotides (List_T path,
5284 #ifndef PMAP
5285 			char *queryseq_ptr, char *queryuc_ptr,
5286 #endif
5287 			Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
5288 			int query_offset, Pairpool_T pairpool, int indexsize_nt,
5289 			bool include_gapholders_p) {
5290   List_T pairs = NULL;
5291   Pair_T pair;
5292   int querypos, lastquerypos, queryjump, genomejump, fill, default_fill;
5293   int genomepos, lastgenomepos;
5294   char c, c_alt;
5295 
5296   debug5(printf("Beginning convert_to_nucleotides with %d pairs.  query_offset = %d, indexsize_nt = %d\n",
5297 		List_length(path),query_offset,indexsize_nt));
5298 
5299   if (path == NULL) {
5300     return (List_T) NULL;
5301   } else {
5302     /* pairptr = path; */
5303     /* path = Pairpool_pop(path,&pair); */
5304     pair = (Pair_T) path->first;
5305     querypos = pair->querypos;
5306     genomepos = pair->genomepos;
5307   }
5308 
5309 #ifdef PMAP
5310   default_fill = indexsize_nt - 3;
5311 #else
5312   default_fill = indexsize_nt - 1;
5313 #endif
5314 
5315   lastquerypos = querypos + default_fill;
5316   lastgenomepos = genomepos + default_fill;
5317   while (lastquerypos > querypos) {
5318     debug5(printf("querypos %d vs lastquerypos %d, lastgenomepos %d\n",querypos,lastquerypos,lastgenomepos));
5319 
5320 #ifdef PMAP
5321     c = get_genomic_nt(&c_alt,lastgenomepos,chroffset,chrhigh,watsonp);
5322     pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,/*cdna*/c,MATCH_COMP,c,c_alt,
5323 			  /*dynprogindex*/0);
5324     debug5(printf("Pushing %c | %c at %d,%d\n",c,c,lastquerypos,lastgenomepos));
5325 #elif defined(EXTRACT_GENOMICSEG)
5326     if (queryuc_ptr[lastquerypos] == genomicuc_ptr[lastgenomepos]) {
5327       pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5328 			    queryseq_ptr[lastquerypos],MISMATCH_COMP,
5329 			    genomicseg_ptr[lastgenomepos],/*genomealt*/GENOMEALT_DEFERRED,
5330 			    /*dynprogindex*/0);
5331       debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],queryuc_ptr[lastquerypos],
5332 		    lastquerypos+query_offset,lastgenomepos));
5333     } else {
5334       abort();
5335       pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5336 			    queryseq_ptr[lastquerypos],MISMATCH_COMP,
5337 			    genomicseg_ptr[lastgenomepos],/*genomealt*/GENOMEALT_DEFERRED,
5338 			    /*dynprogindex*/0);
5339       debug5(printf("Pushing %c   %c at %d,%d\n",queryseq_ptr[lastquerypos],genomicseg_ptr[lastgenomepos],
5340 		    lastquerypos+query_offset,lastgenomepos));
5341     }
5342 #else
5343     if (mode == STANDARD) {
5344       c = queryuc_ptr[lastquerypos];
5345       pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5346 			    queryseq_ptr[lastquerypos],MATCH_COMP,c,/*genomealt*/c,
5347 			    /*dynprogindex*/0);
5348       debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],queryuc_ptr[lastquerypos],
5349 		    lastquerypos+query_offset,lastgenomepos));
5350     } else {
5351       c = get_genomic_nt(&c_alt,lastgenomepos,chroffset,chrhigh,watsonp);
5352       if (queryuc_ptr[lastquerypos] == c) {
5353 	pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5354 			      queryseq_ptr[lastquerypos],MATCH_COMP,c,c_alt,/*dynprogindex*/0);
5355 	debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],c,
5356 		      lastquerypos+query_offset,lastgenomepos));
5357       } else {
5358 	pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5359 			      queryseq_ptr[lastquerypos],AMBIGUOUS_COMP,c,c_alt,/*dynprogindex*/0);
5360 	debug5(printf("Pushing %c : %c at %d,%d\n",queryseq_ptr[lastquerypos],c,
5361 		      lastquerypos+query_offset,lastgenomepos));
5362       }
5363     }
5364 #endif
5365     --lastquerypos;
5366     --lastgenomepos;
5367   }
5368 
5369   /* Take care of observed first pair in oligomer */
5370   if (mode == STANDARD) {
5371     pair->querypos += query_offset; /* Revise coordinates */
5372     /*pair->genomepos += genomic_offset;*/ /* Revise coordinates */
5373 #ifdef WASTE
5374     pairs = Pairpool_push_existing(pairs,pairpool,pair);
5375 #else
5376     pairs = List_transfer_one(pairs,&path);
5377 #endif
5378     debug5(printf("Transferring %c : %c at %d,%d (first pair)\n",pair->cdna,c,
5379 		    pair->querypos+query_offset,pair->genomepos));
5380   } else {
5381     c = get_genomic_nt(&c_alt,pair->genomepos,chroffset,chrhigh,watsonp);
5382     if (pair->cdna == c) {
5383       pair->querypos += query_offset; /* Revise coordinates */
5384       /*pair->genomepos += genomic_offset;*/ /* Revise coordinates */
5385 #ifdef WASTE
5386       pairs = Pairpool_push_existing(pairs,pairpool,pair);
5387 #else
5388       pairs = List_transfer_one(pairs,&path);
5389 #endif
5390       debug5(printf("Transferring %c : %c at %d,%d (first pair)\n",pair->cdna,c,
5391 		    pair->querypos+query_offset,pair->genomepos));
5392     } else {
5393       path = Pairpool_pop(path,&pair);
5394       pairs = Pairpool_push(pairs,pairpool,pair->querypos+query_offset,pair->genomepos,
5395 			    pair->cdna,AMBIGUOUS_COMP,c,c_alt,/*dynprogindex*/0);
5396       debug5(printf("Pushing %c : %c at %d,%d (first pair)\n",pair->cdna,c,
5397 		    pair->querypos+query_offset,pair->genomepos));
5398     }
5399   }
5400 
5401   lastquerypos = querypos;
5402   lastgenomepos = genomepos;
5403 
5404   while (path != NULL) {
5405     /* pairptr = path; */
5406     /* path = Pairpool_pop(path,&pair); */
5407     pair = (Pair_T) path->first;
5408     querypos = pair->querypos;
5409     genomepos = pair->genomepos;
5410 
5411     queryjump = lastquerypos - 1 - querypos;
5412     genomejump = lastgenomepos - 1 - genomepos;
5413 
5414     if (queryjump == 0 && genomejump == 0) {
5415       /* Do nothing */
5416     } else {
5417       debug5(printf("At querypos %d, saw queryjump of %d and genomejump of %d\n",querypos,queryjump,genomejump));
5418 
5419       if (querypos + default_fill >= lastquerypos || genomepos + default_fill >= lastgenomepos) {
5420 	if (lastquerypos - querypos < (int) (lastgenomepos - genomepos)) {
5421 #if 0
5422 	  /* This can occur with wobble mask */
5423 	  fprintf(stderr,"Partial fill from querypos %d to %d (genomepos goes from %u to %u)\n",
5424 		  querypos,lastquerypos,genomepos,lastgenomepos);
5425 	  abort();
5426 #endif
5427 	  fill = lastquerypos - querypos - 1;
5428 	} else {
5429 #if 0
5430 	  /* This can occur with wobble mask */
5431 	  fprintf(stderr,"Partial fill from genomepos %u to %u (querypos goes from %d to %d)\n",
5432 		  genomepos,lastgenomepos,querypos,lastquerypos);
5433 	  abort();
5434 #endif
5435 	  fill = lastgenomepos - genomepos - 1;
5436 	}
5437       } else {
5438 	fill = default_fill;
5439       }
5440 
5441       /* Recompute queryjump and genomejump */
5442       queryjump -= fill;
5443       genomejump -= fill;
5444       debug5(printf("  Revised queryjump of %d and genomejump of %d\n",queryjump,genomejump));
5445       if (include_gapholders_p == true && (genomejump > 0 || queryjump > 0)) {
5446 	debug5(printf("  Pushing gapholder\n"));
5447 	pairs = Pairpool_push_gapholder(pairs,pairpool,queryjump,genomejump,
5448 					/*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
5449 #if 0
5450 	/* Need to run on both genestrands, and save both results in pair */
5451 	if (queryjump == 0) {
5452 	  get_splicesite_probs(&left_prob,&right_prob,genomepos+fill,lastgenomepos,
5453 			       chroffset,chrhigh,/*genestrand*/+1,watsonp);
5454 	}
5455 #endif
5456       }
5457 
5458       /* Fill rest of oligomer */
5459       lastquerypos = querypos + fill;
5460       lastgenomepos = genomepos + fill;
5461       debug5(printf("  Fill from querypos %d down to %d\n",lastquerypos,querypos));
5462       while (lastquerypos > querypos) {
5463 #ifdef PMAP
5464 	c = get_genomic_nt(&c_alt,lastgenomepos,chroffset,chrhigh,watsonp);
5465 	pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,/*cdna*/c,MATCH_COMP,c,c_alt,
5466 			      /*dynprogindex*/0);
5467 	debug5(printf("Pushing %c | %c at %d,%d\n",c,c,lastquerypos+query_offset,lastgenomepos));
5468 #elif defined(EXTRACT_GENOMICSEG)
5469 	if (queryuc_ptr[lastquerypos] == genomicuc_ptr[lastgenomepos]) {
5470 	  pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5471 				queryseq_ptr[lastquerypos],MATCH_COMP,
5472 				queryuc_ptr[lastquerypos],/*genomealt*/GENOMEALT_DEFERRED,
5473 				/*dynprogindex*/0);
5474 	  debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],genomicseg_ptr[lastgenomepos],
5475 			lastquerypos+query_offset,lastgenomepos));
5476 	} else {
5477 	  abort();
5478 	  pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5479 				queryseq_ptr[lastquerypos],MISMATCH_COMP,
5480 				genomicseg_ptr[lastgenomepos],/*genomealt*/GENOMEALT_DEFERRED,
5481 				/*dynprogindex*/0);
5482 	  debug5(printf("Pushing %c   %c at %d,%d\n",queryseq_ptr[lastquerypos],genomicseg_ptr[lastgenomepos],
5483 			lastquerypos+query_offset,lastgenomepos));
5484 	}
5485 #else
5486 	if (mode == STANDARD) {
5487 	  /* assert(queryuc_ptr[lastquerypos] == get_genomic_nt(&c_alt,lastgenomepos,genomicstart,genomiclength,watsonp)); */
5488 	  c = queryuc_ptr[lastquerypos];
5489 	  pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5490 				queryseq_ptr[lastquerypos],MATCH_COMP,c,/*genomealt*/c,
5491 				/*dynprogindex*/0);
5492 	  debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],queryuc_ptr[lastquerypos],
5493 			lastquerypos+query_offset,lastgenomepos));
5494 	} else {
5495 	  c = get_genomic_nt(&c_alt,lastgenomepos,chroffset,chrhigh,watsonp);
5496 	  if (queryuc_ptr[lastquerypos] == c) {
5497 	    pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5498 				  queryseq_ptr[lastquerypos],MATCH_COMP,c,c_alt,/*dynprogindex*/0);
5499 	    debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],c,
5500 			  lastquerypos+query_offset,lastgenomepos));
5501 	  } else {
5502 	    pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5503 				  queryseq_ptr[lastquerypos],AMBIGUOUS_COMP,c,c_alt,/*dynprogindex*/0);
5504 	    debug5(printf("Pushing %c : %c at %d,%d\n",queryseq_ptr[lastquerypos],c,
5505 			  lastquerypos+query_offset,lastgenomepos));
5506 	  }
5507 	}
5508 #endif
5509 	--lastquerypos;
5510 	--lastgenomepos;
5511       }
5512     }
5513 
5514     /* Take care of observed match */
5515     if (mode == STANDARD) {
5516       pair->querypos += query_offset; /* Revise coordinates */
5517       /*pair->genomepos += genomic_offset;*/ /* Revise coordinates */
5518 #ifdef WASTE
5519       pairs = Pairpool_push_existing(pairs,pairpool,pair);
5520 #else
5521       pairs = List_transfer_one(pairs,&path);
5522 #endif
5523       debug5(printf("Transferring %c : %c at %d,%d\n",pair->cdna,c,
5524 		    pair->querypos+query_offset,pair->genomepos));
5525     } else {
5526       c = get_genomic_nt(&c_alt,pair->genomepos,chroffset,chrhigh,watsonp);
5527       if (pair->cdna == c) {
5528 	pair->querypos += query_offset; /* Revise coordinates */
5529 	/*pair->genomepos += genomic_offset;*/ /* Revise coordinates */
5530 #ifdef WASTE
5531 	pairs = Pairpool_push_existing(pairs,pairpool,pair);
5532 #else
5533 	pairs = List_transfer_one(pairs,&path);
5534 #endif
5535 	debug5(printf("Transferring %c : %c at %d,%d\n",pair->cdna,c,
5536 		      pair->querypos+query_offset,pair->genomepos));
5537       } else {
5538 	path = Pairpool_pop(path,&pair);
5539 	pairs = Pairpool_push(pairs,pairpool,pair->querypos+query_offset,pair->genomepos,
5540 			      pair->cdna,AMBIGUOUS_COMP,c,c_alt,/*dynprogindex*/0);
5541 	debug5(printf("Pushing %c : %c at %d,%d (observed)\n",pair->cdna,c,
5542 		      pair->querypos+query_offset,pair->genomepos));
5543       }
5544     }
5545 
5546     lastquerypos = querypos;
5547     lastgenomepos = genomepos;
5548   }
5549 
5550   debug5(Pair_dump_list(pairs,true));
5551   /* pairs is in ascending querypos order */
5552   return pairs;		      /* Used to return List_reverse(pairs) */
5553 }
5554 
5555 
5556 /* queryseq_ptr is NULL for PMAP.  querypos here is in nt. */
5557 static List_T
convert_to_nucleotides_snps(List_T path,char * queryseq_ptr,char * queryuc_ptr,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp,int query_offset,Pairpool_T pairpool,int indexsize_nt,bool include_gapholders_p)5558 convert_to_nucleotides_snps (List_T path,
5559 #ifndef PMAP
5560 			     char *queryseq_ptr, char *queryuc_ptr,
5561 #endif
5562 			     Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
5563 			     int query_offset, Pairpool_T pairpool, int indexsize_nt,
5564 			     bool include_gapholders_p) {
5565   List_T pairs = NULL;
5566   Pair_T pair;
5567   int querypos, genomepos, lastquerypos, lastgenomepos, queryjump, genomejump, fill, default_fill;
5568   char c, c_alt;
5569 
5570   debug5(printf("Beginning convert_to_nucleotides_snps with %d pairs\n",List_length(path)));
5571 
5572   if (path == NULL) {
5573     return (List_T) NULL;
5574   } else {
5575     /* pairptr = path; */
5576     /* path = Pairpool_pop(path,&pair); */
5577     pair = (Pair_T) path->first;
5578     querypos = pair->querypos;
5579     genomepos = pair->genomepos;
5580   }
5581 
5582 #ifdef PMAP
5583   default_fill = indexsize_nt - 3;
5584 #else
5585   default_fill = indexsize_nt - 1;
5586 #endif
5587 
5588   lastquerypos = querypos + default_fill;
5589   lastgenomepos = genomepos + default_fill;
5590   while (lastquerypos > querypos) {
5591     debug5(printf("lastquerypos %d, lastgenomepos %d\n",
5592 		  lastquerypos,lastgenomepos));
5593 
5594 #ifdef PMAP
5595     c = get_genomic_nt(&c_alt,lastgenomepos,chroffset,chrhigh,watsonp);
5596     pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,/*cdna*/c,MATCH_COMP,c,c_alt,
5597 			  /*dynprogindex*/0);
5598     debug5(printf("Pushing %c | %c at %d,%d\n",c,c,lastquerypos,lastgenomepos));
5599 #elif defined(EXTRACT_GENOMICSEG)
5600     if (queryuc_ptr[lastquerypos] == genomicuc_ptr[lastgenomepos]) {
5601       pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5602 			    queryseq_ptr[lastquerypos],MISMATCH_COMP,
5603 			    genomicseg_ptr[lastgenomepos],/*genomealt*/GENOMEALT_DEFERRED,
5604 			    /*dynprogindex*/0);
5605       debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],queryuc_ptr[lastquerypos],
5606 		    lastquerypos+query_offset,lastgenomepos));
5607     } else {
5608       abort();
5609       pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5610 			    queryseq_ptr[lastquerypos],MISMATCH_COMP,
5611 			    genomicseg_ptr[lastgenomepos],/*genomealt*/GENOMEALT_DEFERRED,
5612 			    /*dynprogindex*/0);
5613       debug5(printf("Pushing %c   %c at %d,%d\n",queryseq_ptr[lastquerypos],genomicseg_ptr[lastgenomepos],
5614 		    lastquerypos+query_offset,lastgenomepos));
5615     }
5616 #else
5617     if (mode == STANDARD) {
5618       /* assert(queryuc_ptr[lastquerypos] == get_genomic_nt(&c_alt,lastgenomepos,chroffset,chrhigh,watsonp)); */
5619       c = get_genomic_nt(&c_alt,lastgenomepos,chroffset,chrhigh,watsonp);
5620       pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5621 			    queryseq_ptr[lastquerypos],MATCH_COMP,c,c_alt,
5622 			    /*dynprogindex*/0);
5623       debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],queryuc_ptr[lastquerypos],
5624 		    lastquerypos+query_offset,lastgenomepos));
5625     } else {
5626       c = get_genomic_nt(&c_alt,lastgenomepos,chroffset,chrhigh,watsonp);
5627       if (queryuc_ptr[lastquerypos] == c) {
5628 	pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5629 			      queryseq_ptr[lastquerypos],MATCH_COMP,c,c_alt,/*dynprogindex*/0);
5630 	debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],c,
5631 		      lastquerypos+query_offset,lastgenomepos));
5632       } else {
5633 	pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5634 			      queryseq_ptr[lastquerypos],AMBIGUOUS_COMP,c,c_alt,/*dynprogindex*/0);
5635 	debug5(printf("Pushing %c : %c at %d,%d\n",queryseq_ptr[lastquerypos],c,
5636 		      lastquerypos+query_offset,lastgenomepos));
5637       }
5638     }
5639 #endif
5640     --lastquerypos;
5641     --lastgenomepos;
5642   }
5643 
5644   /* Take care of observed first pair in oligomer */
5645   if (mode == STANDARD) {
5646     pair->querypos += query_offset; /* Revise coordinates */
5647     /*pair->genomepos += genomic_offset;*/ /* Revise coordinates */
5648 #ifdef WASTE
5649     pairs = Pairpool_push_existing(pairs,pairpool,pair);
5650 #else
5651     pairs = List_transfer_one(pairs,&path);
5652 #endif
5653     debug5(printf("Transferring %c : %c at %d,%d\n",pair->cdna,c,
5654 		  pair->querypos+query_offset,pair->genomepos));
5655   } else {
5656     c = get_genomic_nt(&c_alt,pair->genomepos,chroffset,chrhigh,watsonp);
5657     if (pair->cdna == c) {
5658       pair->querypos += query_offset; /* Revise coordinates */
5659       /*pair->genomepos += genomic_offset;*/ /* Revise coordinates */
5660 #ifdef WASTE
5661       pairs = Pairpool_push_existing(pairs,pairpool,pair);
5662 #else
5663       pairs = List_transfer_one(pairs,&path);
5664 #endif
5665       debug5(printf("Transferring %c : %c at %d,%d\n",pair->cdna,c,
5666 		    pair->querypos+query_offset,pair->genomepos));
5667     } else {
5668       path = Pairpool_pop(path,&pair);
5669       pairs = Pairpool_push(pairs,pairpool,pair->querypos+query_offset,pair->genomepos,
5670 			    pair->cdna,AMBIGUOUS_COMP,c,c_alt,/*dynprogindex*/0);
5671       debug5(printf("Pushing %c : %c at %d,%d (first pair)\n",pair->cdna,c,
5672 		    pair->querypos+query_offset,pair->genomepos));
5673     }
5674   }
5675 
5676   lastquerypos = querypos;
5677   lastgenomepos = genomepos;
5678 
5679   while (path != NULL) {
5680     /* pairptr = path; */
5681     /* path = Pairpool_pop(path,&pair); */
5682     pair = (Pair_T) path->first;
5683     querypos = pair->querypos;
5684     genomepos = pair->genomepos;
5685 
5686     queryjump = lastquerypos - 1 - querypos;
5687     genomejump = lastgenomepos - 1 - genomepos;
5688 
5689     if (queryjump == 0 && genomejump == 0) {
5690       /* Do nothing */
5691     } else {
5692       debug5(printf("At querypos %d, saw queryjump of %d and genomejump of %d\n",querypos,queryjump,genomejump));
5693 
5694       if (querypos + default_fill >= lastquerypos || genomepos + default_fill >= lastgenomepos) {
5695 	if (lastquerypos - querypos < lastgenomepos - genomepos) {
5696 #if 0
5697 	  /* This can occur with wobble mask */
5698 	  fprintf(stderr,"Partial fill from querypos %d to %d (genomepos goes from %u to %u)\n",
5699 		  querypos,lastquerypos,genomepos,lastgenomepos);
5700 	  abort();
5701 #endif
5702 	  fill = lastquerypos - querypos - 1;
5703 	} else {
5704 #if 0
5705 	  /* This can occur with wobble mask */
5706 	  fprintf(stderr,"Partial fill from genomepos %u to %u (querypos goes from %d to %d)\n",
5707 		  genomepos,lastgenomepos,querypos,lastquerypos);
5708 	  abort();
5709 #endif
5710 	  fill = lastgenomepos - genomepos - 1;
5711 	}
5712       } else {
5713 	fill = default_fill;
5714       }
5715 
5716       /* Recompute queryjump and genomejump */
5717       queryjump -= fill;
5718       genomejump -= fill;
5719       debug5(printf("  Revised queryjump of %d and genomejump of %d\n",queryjump,genomejump));
5720       if (include_gapholders_p == true && (genomejump > 0 || queryjump > 0)) {
5721 	debug5(printf("  Pushing gapholder\n"));
5722 	pairs = Pairpool_push_gapholder(pairs,pairpool,queryjump,genomejump,
5723 					/*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
5724       }
5725 
5726       /* Fill rest of oligomer */
5727       lastquerypos = querypos + fill;
5728       lastgenomepos = genomepos + fill;
5729       debug5(printf("  Fill from querypos %d down to %d\n",lastquerypos,querypos));
5730       while (lastquerypos > querypos) {
5731 #ifdef PMAP
5732 	c = get_genomic_nt(&c_alt,lastgenomepos,chroffset,chrhigh,watsonp);
5733 	pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,/*cdna*/c,MATCH_COMP,c,c_alt,
5734 			      /*dynprogindex*/0);
5735 	debug5(printf("Pushing %c | %c at %d,%d\n",c,c,lastquerypos+query_offset,lastgenomepos));
5736 #elif defined(EXTRACT_GENOMICSEG)
5737 	if (queryuc_ptr[lastquerypos] == genomicuc_ptr[lastgenomepos]) {
5738 	  pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5739 				queryseq_ptr[lastquerypos],MATCH_COMP,
5740 				queryuc_ptr[lastquerypos],/*genomealt*/GENOMEALT_DEFERRED,
5741 				/*dynprogindex*/0);
5742 	  debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],genomicseg_ptr[lastgenomepos],
5743 			lastquerypos+query_offset,lastgenomepos));
5744 	} else {
5745 	  abort();
5746 	  pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5747 				queryseq_ptr[lastquerypos],MISMATCH_COMP,
5748 				genomicseg_ptr[lastgenomepos],/*genomealt*/GENOMEALT_DEFERRED,
5749 				/*dynprogindex*/0);
5750 	  debug5(printf("Pushing %c   %c at %d,%d\n",queryseq_ptr[lastquerypos],genomicseg_ptr[lastgenomepos],
5751 			lastquerypos+query_offset,lastgenomepos));
5752 	}
5753 #else
5754 	if (mode == STANDARD) {
5755 	  c = get_genomic_nt(&c_alt,lastgenomepos,chroffset,chrhigh,watsonp);
5756 	  pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5757 				queryseq_ptr[lastquerypos],MATCH_COMP,c,c_alt,
5758 				/*dynprogindex*/0);
5759 	  debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],queryuc_ptr[lastquerypos],
5760 			lastquerypos+query_offset,lastgenomepos));
5761 	} else {
5762 	  c = get_genomic_nt(&c_alt,lastgenomepos,chroffset,chrhigh,watsonp);
5763 	  if (queryuc_ptr[lastquerypos] == c) {
5764 	    pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5765 				  queryseq_ptr[lastquerypos],MATCH_COMP,c,c_alt,/*dynprogindex*/0);
5766 	    debug5(printf("Pushing %c | %c at %d,%d\n",queryseq_ptr[lastquerypos],c,
5767 			  lastquerypos+query_offset,lastgenomepos));
5768 	  } else {
5769 	    pairs = Pairpool_push(pairs,pairpool,lastquerypos+query_offset,lastgenomepos,
5770 				  queryseq_ptr[lastquerypos],AMBIGUOUS_COMP,c,c_alt,/*dynprogindex*/0);
5771 	    debug5(printf("Pushing %c : %c at %d,%d\n",queryseq_ptr[lastquerypos],c,
5772 			  lastquerypos+query_offset,lastgenomepos));
5773 	  }
5774 	}
5775 #endif
5776 	--lastquerypos;
5777 	--lastgenomepos;
5778       }
5779     }
5780 
5781     /* Take care of observed match */
5782     if (mode == STANDARD) {
5783       pair->querypos += query_offset; /* Revise coordinates */
5784       /*pair->genomepos += genomic_offset;*/ /* Revise coordinates */
5785 #ifdef WASTE
5786       pairs = Pairpool_push_existing(pairs,pairpool,pair);
5787 #else
5788       pairs = List_transfer_one(pairs,&path);
5789 #endif
5790       debug5(printf("Transferring %c : %c at %d,%d\n",pair->cdna,c,
5791 		    pair->querypos+query_offset,pair->genomepos));
5792     } else {
5793       c = get_genomic_nt(&c_alt,pair->genomepos,chroffset,chrhigh,watsonp);
5794       if (pair->cdna == c) {
5795 	pair->querypos += query_offset; /* Revise coordinates */
5796 	/*pair->genomepos += genomic_offset;*/ /* Revise coordinates */
5797 #ifdef WASTE
5798 	pairs = Pairpool_push_existing(pairs,pairpool,pair);
5799 #else
5800 	pairs = List_transfer_one(pairs,&path);
5801 #endif
5802 	debug5(printf("Transferring %c : %c at %d,%d\n",pair->cdna,c,
5803 		      pair->querypos+query_offset,pair->genomepos));
5804       } else {
5805 	path = Pairpool_pop(path,&pair);
5806 	pairs = Pairpool_push(pairs,pairpool,pair->querypos+query_offset,pair->genomepos,
5807 			      pair->cdna,AMBIGUOUS_COMP,c,c_alt,/*dynprogindex*/0);
5808 	debug5(printf("Pushing %c : %c at %d,%d (observed)\n",pair->cdna,c,
5809 		      pair->querypos+query_offset,pair->genomepos));
5810       }
5811     }
5812 
5813     lastquerypos = querypos;
5814     lastgenomepos = genomepos;
5815   }
5816 
5817   debug5(Pair_dump_list(pairs,true));
5818   /* pairs is in ascending querypos order */
5819   return pairs;		      /* Used to return List_reverse(pairs) */
5820 }
5821 
5822 
5823 
5824 /* Returns ncovered */
5825 int
Stage2_scan(int * stage2_source,char * queryuc_ptr,int querylength,Chrpos_T chrstart,Chrpos_T chrend,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int genestrand,Stage2_alloc_T stage2_alloc,Oligoindex_array_T oligoindices,Diagpool_T diagpool,bool debug_graphic_p)5826 Stage2_scan (int *stage2_source, char *queryuc_ptr, int querylength,
5827 	     Chrpos_T chrstart, Chrpos_T chrend,
5828 	     Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
5829 	     int genestrand, Stage2_alloc_T stage2_alloc, Oligoindex_array_T oligoindices,
5830 	     Diagpool_T diagpool, bool debug_graphic_p) {
5831   int ncovered;
5832   int source;
5833   /* int indexsize; */
5834   Oligoindex_T oligoindex;
5835   Chrpos_T **mappings;
5836   bool *coveredp, oned_matrix_p;
5837   int *npositions, totalpositions;
5838   double pct_coverage;
5839   int maxnconsecutive;
5840   /* double diag_runtime; */
5841   List_T diagonals;
5842 #ifndef USE_DIAGPOOL
5843   List_p;
5844   Diag_T diag;
5845 #endif
5846 #ifdef EXTRACT_GENOMICSEG
5847   Count_T *counts;
5848 #endif
5849 
5850   if (debug_graphic_p == true) {
5851     /* printf("par(mfrow=c(1,2),cex=0.2)\n"); */
5852     printf("par(cex=0.3)\n");
5853     printf("layout(matrix(c(1,2),1,2),widths=c(0.5,0.5),heights=c(1))\n");
5854   }
5855 
5856   if (querylength > stage2_alloc->max_querylength_alloc) {
5857     coveredp = (bool *) CALLOC(querylength,sizeof(bool));
5858     mappings = (Chrpos_T **) MALLOC(querylength * sizeof(Chrpos_T *));
5859     npositions = (int *) CALLOC(querylength,sizeof(int));
5860   } else {
5861     coveredp = stage2_alloc->coveredp;
5862     mappings = stage2_alloc->mappings;
5863     npositions = stage2_alloc->npositions;
5864 
5865     memset(coveredp,0,querylength * sizeof(bool));
5866     memset(npositions,0,querylength * sizeof(int));
5867   }
5868 
5869   totalpositions = 0;
5870   maxnconsecutive = 0;
5871 
5872   source = 0;
5873   pct_coverage = 0.0;
5874   Diagpool_reset(diagpool);
5875   diagonals = (List_T) NULL;
5876   while (source < Oligoindex_array_length(oligoindices) && pct_coverage < SUFF_PCTCOVERAGE_OLIGOINDEX) {
5877     oligoindex = Oligoindex_array_elt(oligoindices,source);
5878     /* indexsize = Oligoindex_indexsize(oligoindex); */ /* Different sources can have different indexsizes */
5879 #ifdef PMAP
5880     if (plusp == true) {
5881       Oligoindex_pmap_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
5882 			    /*mappingend*/chroffset+chrend,/*plusp*/true,
5883 			    queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
5884 			    /*chrpos*/chrstart);
5885     } else {
5886       /* Need to add 1 to mappingend to cover same range as plusp */
5887       Oligoindex_pmap_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
5888 			    /*mappingend*/chroffset+chrend+1,/*plusp*/false,
5889 			    queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
5890 			    /*chrpos*/(chrhigh-chroffset)-chrend);
5891     }
5892 
5893 #else
5894 
5895 #ifdef EXTRACT_GENOMICSEG
5896     Oligoindex_hr_tally(oligoindex,genomicuc_ptr,/*genomiclength*/chrend-chrstart,queryuc_ptr,querylength,
5897 			/*sequencepos*/0);
5898     counts = Oligoindex_counts_copy(oligoindex);
5899 #endif
5900 
5901     if (plusp == true) {
5902       Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
5903 			  /*mappingend*/chroffset+chrend,/*plusp*/true,
5904 			  queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
5905 			  /*chrpos*/chrstart,genestrand);
5906     } else {
5907       /* Need to add 1 to mappingend to cover same range as plusp */
5908       Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
5909 			  /*mappingend*/chroffset+chrend+1,/*plusp*/false,
5910 			  queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
5911 			  /*chrpos*/(chrhigh-chroffset)-chrend,genestrand);
5912     }
5913 
5914 #ifdef EXTRACT_GENOMICSEG
5915     assert(Oligoindex_counts_equal(oligoindex,counts));
5916     /* Oligoindex_counts_dump(oligoindex,counts); */
5917     FREE(counts);
5918 #endif
5919 
5920 #endif
5921 
5922     diagonals = Oligoindex_get_mappings(diagonals,coveredp,mappings,npositions,&totalpositions,
5923 					&oned_matrix_p,&maxnconsecutive,oligoindices,oligoindex,queryuc_ptr,
5924 					/*querystart*/0,/*queryend*/querylength,querylength,
5925 					chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
5926     pct_coverage = Diag_update_coverage(coveredp,&ncovered,diagonals,querylength);
5927     debug(printf("Stage2_scan: source = %d, ncovered = %d, pct_coverage = %f\n",source,ncovered,pct_coverage));
5928 
5929     source++;
5930   }
5931   *stage2_source = source;
5932 
5933 #ifdef USE_DIAGPOOL
5934   /* No need to free diagonals */
5935 #else
5936   for (p = diagonals; p != NULL; p = List_next(p)) {
5937     diag = (Diag_T) List_head(p);
5938     Diag_free(&diag);
5939   }
5940   List_free(&diagonals);
5941 #endif
5942 
5943   if (querylength > stage2_alloc->max_querylength_alloc) {
5944     FREE(npositions);
5945     FREE(coveredp);
5946     FREE(mappings);		/* Don't need to free contents of mappings */
5947   }
5948 
5949 #if 1
5950   for (source = 0; source < Oligoindex_array_length(oligoindices); source++) {
5951     oligoindex = Oligoindex_array_elt(oligoindices,source);
5952     Oligoindex_untally(oligoindex);
5953   }
5954 #endif
5955 
5956   return ncovered;
5957 }
5958 
5959 
5960 
5961 static int
stage2_cmp(const void * a,const void * b)5962 stage2_cmp (const void *a, const void *b) {
5963   Stage2_T xx = * (Stage2_T *) a;
5964   Stage2_T yy = * (Stage2_T *) b;
5965   List_T x = xx->middle, y = yy->middle;
5966   Chrpos_T x_chrstart, x_chrend, y_chrstart, y_chrend;
5967 
5968   x_chrstart = ((Pair_T) x->first)->genomepos;
5969   x_chrend = ((Pair_T) List_last_value(x))->genomepos;
5970   assert(x_chrstart <= x_chrend); /* Equal if there is only one pair in the list */
5971 
5972   y_chrstart = ((Pair_T) y->first)->genomepos;
5973   y_chrend = ((Pair_T) List_last_value(y))->genomepos;
5974   assert(y_chrstart <= y_chrend); /* Equal if there is only one pair in the list */
5975 
5976   if (x_chrstart < y_chrstart) {
5977     return -1;
5978   } else if (y_chrstart < x_chrstart) {
5979     return +1;
5980 
5981     /* Want most compact representation */
5982   } else if (x_chrend < y_chrend) {
5983     return -1;
5984   } else if (y_chrend < x_chrend) {
5985     return +1;
5986 
5987   } else {
5988     return 0;
5989   }
5990 }
5991 
5992 
5993 /* paths, so chrend is first */
5994 static int
stage2pairs_start_cmp(const void * a,const void * b)5995 stage2pairs_start_cmp (const void *a, const void *b) {
5996   List_T x = * (List_T *) a;
5997   List_T y = * (List_T *) b;
5998   Chrpos_T x_chrstart, x_chrend, y_chrstart, y_chrend;
5999 
6000   x_chrend = ((Pair_T) x->first)->genomepos;
6001   x_chrstart = ((Pair_T) List_last_value(x))->genomepos;
6002   assert(x_chrstart <= x_chrend); /* Equal if there is only one pair in the list */
6003 
6004   y_chrend = ((Pair_T) y->first)->genomepos;
6005   y_chrstart = ((Pair_T) List_last_value(y))->genomepos;
6006   assert(y_chrstart <= y_chrend); /* Equal if there is only one pair in the list */
6007 
6008   if (x_chrend > y_chrend) {
6009     return -1;
6010   } else if (y_chrend > x_chrend) {
6011     return +1;
6012 
6013     /* Want most compact representation */
6014   } else if (x_chrstart > y_chrstart) {
6015     return -1;
6016   } else if (y_chrstart > x_chrstart) {
6017     return +1;
6018 
6019   } else {
6020     return 0;
6021   }
6022 }
6023 
6024 
6025 static int
stage2pairs_end_cmp(const void * a,const void * b)6026 stage2pairs_end_cmp (const void *a, const void *b) {
6027   List_T x = * (List_T *) a;
6028   List_T y = * (List_T *) b;
6029 
6030   Chrpos_T x_chrstart, x_chrend, y_chrstart, y_chrend;
6031 
6032   x_chrstart = ((Pair_T) x->first)->genomepos;
6033   x_chrend = ((Pair_T) List_last_value(x))->genomepos;
6034   assert(x_chrstart <= x_chrend); /* Equal if there is only one pair in the list */
6035 
6036   y_chrstart = ((Pair_T) y->first)->genomepos;
6037   y_chrend = ((Pair_T) List_last_value(y))->genomepos;
6038   assert(y_chrstart <= y_chrend); /* Equal if there is only one pair in the list */
6039 
6040   if (x_chrstart < y_chrstart) {
6041     return -1;
6042   } else if (y_chrstart < x_chrstart) {
6043     return +1;
6044 
6045     /* Want most compact representation */
6046   } else if (x_chrend < y_chrend) {
6047     return -1;
6048   } else if (y_chrend < x_chrend) {
6049     return +1;
6050 
6051   } else {
6052     return 0;
6053   }
6054 }
6055 
6056 
6057 /* Modified from gregion_overlap_p */
6058 static bool
stage2path_overlap_p(List_T x,List_T y)6059 stage2path_overlap_p (List_T x, List_T y) {
6060   Chrpos_T x_chrstart, x_chrend, y_chrstart, y_chrend;
6061   Chrpos_T overlap;
6062   double fraction;
6063 
6064   x_chrend = ((Pair_T) x->first)->genomepos;
6065   x_chrstart = ((Pair_T) List_last_value(x))->genomepos;
6066   assert(x_chrstart <= x_chrend); /* Equal if there is only one pair in the list */
6067 
6068   y_chrend = ((Pair_T) y->first)->genomepos;
6069   y_chrstart = ((Pair_T) List_last_value(y))->genomepos;
6070   assert(y_chrstart <= y_chrend); /* Equal if there is only one pair in the list */
6071 
6072   if (y_chrstart > x_chrend || x_chrstart > y_chrend) {
6073     debug13a(printf("x %u..%u, y %u..%u => no overlap\n",x_chrstart,x_chrend,y_chrstart,y_chrend));
6074     /*
6075       /-- x --/ /-- y --/ or /-- y --/ /-- x --/
6076     */
6077     return false;		/* No overlap */
6078 
6079   } else if (y_chrstart < x_chrstart) {
6080     debug13a(printf("x %u..%u, y %u..%u",x_chrstart,x_chrend,y_chrstart,y_chrend));
6081     if (y_chrend < x_chrend) {
6082       /*
6083 	/-- x --/
6084 	/-- y --/
6085       */
6086       overlap = y_chrend - x_chrstart;
6087       if (y_chrend - y_chrstart < x_chrend - x_chrstart) {
6088 	fraction = (double) overlap/(double) (y_chrend - y_chrstart);
6089       } else {
6090 	fraction = (double) overlap/(double) (x_chrend - x_chrstart);
6091       }
6092       debug13a(printf(" => fraction %f",fraction));
6093       if (fraction > 0.5) {
6094 	debug13a(printf(" => overlap\n",fraction));
6095 	return true;
6096       } else {
6097 	debug13a(printf(" => no overlap\n",fraction));
6098 	return false;
6099       }
6100 
6101     } else {
6102       /*
6103 	/-- x --/
6104 	/----- y -----/
6105       */
6106       debug13a(printf(" => subsumption\n"));
6107       return true;
6108     }
6109 
6110   } else {
6111     debug13a(printf("x %u..%u, y %u..%u\n",x_chrstart,x_chrend,y_chrstart,y_chrend));
6112     if (y_chrend < x_chrend) {
6113       /*
6114 	/----- x -----/
6115 	/-- y --/
6116       */
6117       debug13a(printf(" => subsumption\n"));
6118       return true;
6119 
6120     } else {
6121       /*
6122 	/-- x --/
6123 	/-- y --/
6124       */
6125       overlap = x_chrend - y_chrstart;
6126       if (y_chrend - y_chrstart < x_chrend - x_chrstart) {
6127 	fraction = (double) overlap/(double) (y_chrend - y_chrstart);
6128       } else {
6129 	fraction = (double) overlap/(double) (x_chrend - x_chrstart);
6130       }
6131       debug13a(printf(" => fraction %f",fraction));
6132       if (fraction > 0.5) {
6133 	debug13a(printf(" => overlap\n",fraction));
6134 	return true;
6135       } else {
6136 	debug13a(printf(" => no overlap\n",fraction));
6137 	return false;
6138       }
6139 
6140     }
6141   }
6142 }
6143 
6144 
6145 /* Modified from gregion_overlap_p */
6146 static bool
stage2pairs_overlap_p(List_T x,List_T y)6147 stage2pairs_overlap_p (List_T x, List_T y) {
6148   Chrpos_T x_chrstart, x_chrend, y_chrstart, y_chrend;
6149   Chrpos_T overlap;
6150   double fraction;
6151 
6152   x_chrstart = ((Pair_T) x->first)->genomepos;
6153   x_chrend = ((Pair_T) List_last_value(x))->genomepos;
6154   assert(x_chrstart <= x_chrend); /* Equal if there is only one pair in the list */
6155 
6156   y_chrstart = ((Pair_T) y->first)->genomepos;
6157   y_chrend = ((Pair_T) List_last_value(y))->genomepos;
6158   assert(y_chrstart <= y_chrend); /* Equal if there is only one pair in the list */
6159 
6160   if (y_chrstart > x_chrend || x_chrstart > y_chrend) {
6161     debug13a(printf("x %u..%u, y %u..%u => no overlap\n",x_chrstart,x_chrend,y_chrstart,y_chrend));
6162     /*
6163       /-- x --/ /-- y --/ or /-- y --/ /-- x --/
6164     */
6165     return false;		/* No overlap */
6166 
6167   } else if (y_chrstart < x_chrstart) {
6168     debug13a(printf("x %u..%u, y %u..%u",x_chrstart,x_chrend,y_chrstart,y_chrend));
6169     if (y_chrend < x_chrend) {
6170       /*
6171 	/-- x --/
6172 	/-- y --/
6173       */
6174       overlap = y_chrend - x_chrstart;
6175       if (y_chrend - y_chrstart < x_chrend - x_chrstart) {
6176 	fraction = (double) overlap/(double) (y_chrend - y_chrstart);
6177       } else {
6178 	fraction = (double) overlap/(double) (x_chrend - x_chrstart);
6179       }
6180       debug13a(printf(" => fraction %f",fraction));
6181       if (fraction > 0.5) {
6182 	debug13a(printf(" => overlap\n",fraction));
6183 	return true;
6184       } else {
6185 	debug13a(printf(" => no overlap\n",fraction));
6186 	return false;
6187       }
6188 
6189     } else {
6190       /*
6191 	/-- x --/
6192 	/----- y -----/
6193       */
6194       debug13a(printf(" => subsumption\n"));
6195       return true;
6196     }
6197 
6198   } else {
6199     debug13a(printf("x %u..%u, y %u..%u\n",x_chrstart,x_chrend,y_chrstart,y_chrend));
6200     if (y_chrend < x_chrend) {
6201       /*
6202 	/----- x -----/
6203 	/-- y --/
6204       */
6205       debug13a(printf(" => subsumption\n"));
6206       return true;
6207 
6208     } else {
6209       /*
6210 	/-- x --/
6211 	/-- y --/
6212       */
6213       overlap = x_chrend - y_chrstart;
6214       if (y_chrend - y_chrstart < x_chrend - x_chrstart) {
6215 	fraction = (double) overlap/(double) (y_chrend - y_chrstart);
6216       } else {
6217 	fraction = (double) overlap/(double) (x_chrend - x_chrstart);
6218       }
6219       debug13a(printf(" => fraction %f",fraction));
6220       if (fraction > 0.5) {
6221 	debug13a(printf(" => overlap\n",fraction));
6222 	return true;
6223       } else {
6224 	debug13a(printf(" => no overlap\n",fraction));
6225 	return false;
6226       }
6227 
6228     }
6229   }
6230 }
6231 
6232 
6233 
6234 static List_T
Stage2_filter_unique(List_T all_stage2results)6235 Stage2_filter_unique (List_T all_stage2results) {
6236   List_T unique = NULL;
6237   Stage2_T *array, stage2, xx, yy;
6238   int n, i, j;
6239   bool *eliminate;
6240 #ifdef DEBUG
6241   List_T p, q;
6242 #endif
6243 
6244   n = List_length(all_stage2results);
6245   debug13(printf("Entering Stage2_filter_unique with %d results\n",n));
6246 
6247   if (n == 0) {
6248     return NULL;
6249   }
6250 
6251 #ifdef DEBUG13
6252   for (p = all_stage2results; p != NULL; p = List_next(p)) {
6253     stage2 = (Stage2_T) List_head(p);
6254     stage2pairs = stage2->middle;
6255     printf("Stage 2 list at chrstart %u, chrend %u)\n",
6256 	   ((Pair_T) stage2pairs->first)->genomepos,
6257 	   ((Pair_T) List_last_value(stage2pairs))->genomepos);
6258   }
6259 #endif
6260 
6261   eliminate = (bool *) CALLOC(n,sizeof(bool));
6262   array = (Stage2_T *) List_to_array(all_stage2results,NULL);
6263   List_free(&all_stage2results);
6264   qsort(array,n,sizeof(Stage2_T),stage2_cmp);
6265 
6266 #ifdef DEBUG13
6267   for (i = 0; i < n; i++) {
6268     stage2 = array[i];
6269     stage2pairs = stage2->middle;
6270     printf("%d: Stage 2 list at chrstart %u, chrend %u)\n",
6271 	   i,((Pair_T) stage2pairs->first)->genomepos,
6272 	   ((Pair_T) List_last_value(stage2pairs))->genomepos);
6273   }
6274 #endif
6275 
6276 
6277   for (i = 0; i < n; i++) {
6278     xx = array[i];
6279     for (j = i+1; j < n; j++) {
6280       yy = array[j];
6281       if (stage2pairs_overlap_p(xx->middle,yy->middle) == true) {
6282 #if 0
6283 	printf("Found overlap between these regions:\n");
6284 	printf("   ");
6285 	printf("chrstart %u, chrend %u",
6286 	       ((Pair_T) xx->middle->first)->genomepos,
6287 	       ((Pair_T) List_last_value(xx->middle))->genomepos);
6288 	printf("   ");
6289 	printf("chrstart %u, chrend %u",
6290 	       ((Pair_T) yy->middle->first)->genomepos,
6291 	       ((Pair_T) List_last_value(yy->middle))->genomepos);
6292 	printf("\n");
6293 #endif
6294 	eliminate[j] = true;
6295       }
6296     }
6297   }
6298 
6299   for (i = n-1; i >= 0; i--) {
6300     stage2 = array[i];
6301     if (eliminate[i] == false) {
6302 #if 0
6303       debug13(printf("Keeping chrstart %u, chrend %u",
6304 		     ((Pair_T) stage2pairs->first)->genomepos,
6305 		     ((Pair_T) List_last_value(stage2pairs))->genomepos));
6306 #endif
6307       unique = List_push(unique,(void *) stage2);
6308     } else {
6309 #if 0
6310       debug13(printf("Eliminating chrstart %u, chrend %u",
6311 		     ((Pair_T) stage2pairs->first)->genomepos,
6312 		     ((Pair_T) List_last_value(stage2pairs))->genomepos));
6313 #endif
6314       Stage2_free(&stage2);
6315     }
6316   }
6317 
6318   FREE(eliminate);
6319   FREE(array);
6320 
6321 #ifdef DEBUG13
6322   for (p = unique, i = 0; p != NULL; p = p->rest, i++) {
6323     stage2 = (Stage2_T) p->first;
6324     stage2pairs = stage2->middle;
6325     printf("Final: chrstart %u, chrend %u\n",
6326 	   ((Pair_T) stage2pairs->first)->genomepos,
6327 	   ((Pair_T) List_last_value(stage2pairs))->genomepos);
6328   }
6329 #endif
6330 
6331   return unique;
6332 }
6333 
6334 
6335 static List_T
Stage2pairs_filter_unique_starts(List_T all_results)6336 Stage2pairs_filter_unique_starts (List_T all_results) {
6337   List_T unique = NULL;
6338   List_T *array, stage2pairs, x, y;
6339   int n, i, j;
6340   bool *eliminate, eliminatep = false;
6341 #ifdef DEBUG
6342   List_T p, q;
6343 #endif
6344 
6345   n = List_length(all_results);
6346   debug13(printf("Entering Stage2_filter_unique_starts with %d results\n",n));
6347 
6348   if (n == 0) {
6349     return NULL;
6350   }
6351 
6352 #ifdef DEBUG13
6353   for (p = all_results; p != NULL; p = List_next(p)) {
6354     stage2pairs = (List_T) List_head(p);
6355     printf("Stage 2 list at chrstart %u, chrend %u)\n",
6356 	   ((Pair_T) stage2pairs->first)->genomepos,
6357 	   ((Pair_T) List_last_value(stage2pairs))->genomepos);
6358   }
6359 #endif
6360 
6361   eliminate = (bool *) CALLOC(n,sizeof(bool));
6362   array = (List_T *) List_to_array(all_results,NULL);
6363   List_free(&all_results);
6364   qsort(array,n,sizeof(List_T),stage2pairs_start_cmp);
6365 
6366 #ifdef DEBUG13
6367   for (i = 0; i < n; i++) {
6368     stage2pairs = array[i];
6369     printf("%d: Stage 2 list at chrstart %u, chrend %u)\n",
6370 	   i,((Pair_T) stage2pairs->first)->genomepos,
6371 	   ((Pair_T) List_last_value(stage2pairs))->genomepos);
6372   }
6373 #endif
6374 
6375 
6376   for (i = 0; i < n; i++) {
6377     x = array[i];
6378     for (j = i+1; j < n; j++) {
6379       y = array[j];
6380       if (stage2path_overlap_p(x,y) == true) {
6381 #if 0
6382 	printf("Found overlap between these regions:\n");
6383 	printf("   ");
6384 	printf("chrstart %u, chrend %u",
6385 	       ((Pair_T) x->first)->genomepos,
6386 	       ((Pair_T) List_last_value(x))->genomepos);
6387 	printf("   ");
6388 	printf("chrstart %u, chrend %u",
6389 	       ((Pair_T) y->first)->genomepos,
6390 	       ((Pair_T) List_last_value(y))->genomepos);
6391 	printf("\n");
6392 #endif
6393 	eliminate[j] = true;
6394 	eliminatep = true;
6395       }
6396     }
6397   }
6398 
6399   if (eliminatep == false) {
6400     /* All are identical, so take the first one only */
6401     unique = List_push(unique,(void *) array[0]);
6402   } else {
6403     for (i = n-1; i >= 0; i--) {
6404       stage2pairs = array[i];
6405       if (eliminate[i] == false) {
6406 #if 0
6407 	debug13(printf("Keeping chrstart %u, chrend %u",
6408 		       ((Pair_T) stage2pairs->first)->genomepos,
6409 		       ((Pair_T) List_last_value(stage2pairs))->genomepos));
6410 #endif
6411 	unique = List_push(unique,(void *) stage2pairs);
6412       } else {
6413 #if 0
6414 	debug13(printf("Eliminating chrstart %u, chrend %u",
6415 		       ((Pair_T) stage2pairs->first)->genomepos,
6416 		       ((Pair_T) List_last_value(stage2pairs))->genomepos));
6417 #endif
6418 	/* List_free(&stage2pairs); */
6419       }
6420     }
6421   }
6422 
6423   FREE(eliminate);
6424   FREE(array);
6425 
6426 #ifdef DEBUG13
6427   for (p = unique, i = 0; p != NULL; p = p->rest, i++) {
6428     stage2pairs = (List_T) p->first;
6429     printf("Final: chrstart %u, chrend %u\n",
6430 	   ((Pair_T) stage2pairs->first)->genomepos,
6431 	   ((Pair_T) List_last_value(stage2pairs))->genomepos);
6432   }
6433 #endif
6434 
6435   return unique;
6436 }
6437 
6438 
6439 static List_T
Stage2pairs_filter_unique_ends(List_T all_results)6440 Stage2pairs_filter_unique_ends (List_T all_results) {
6441   List_T unique = NULL;
6442   List_T *array, stage2pairs, x, y;
6443   int n, i, j;
6444   bool *eliminate, eliminatep = false;
6445 #ifdef DEBUG
6446   List_T p, q;
6447 #endif
6448 
6449   n = List_length(all_results);
6450   debug13(printf("Entering Stage2_filter_unique_ends with %d results\n",n));
6451 
6452   if (n == 0) {
6453     return NULL;
6454   }
6455 
6456 #ifdef DEBUG13
6457   for (p = all_results; p != NULL; p = List_next(p)) {
6458     stage2pairs = (List_T) List_head(p);
6459     printf("Stage 2 list at chrstart %u, chrend %u)\n",
6460 	   ((Pair_T) stage2pairs->first)->genomepos,
6461 	   ((Pair_T) List_last_value(stage2pairs))->genomepos);
6462   }
6463 #endif
6464 
6465   eliminate = (bool *) CALLOC(n,sizeof(bool));
6466   array = (List_T *) List_to_array(all_results,NULL);
6467   List_free(&all_results);
6468   qsort(array,n,sizeof(List_T),stage2pairs_end_cmp);
6469 
6470 #ifdef DEBUG13
6471   for (i = 0; i < n; i++) {
6472     stage2pairs = array[i];
6473     printf("%d: Stage 2 list at chrstart %u, chrend %u)\n",
6474 	   i,((Pair_T) stage2pairs->first)->genomepos,
6475 	   ((Pair_T) List_last_value(stage2pairs))->genomepos);
6476   }
6477 #endif
6478 
6479 
6480   for (i = 0; i < n; i++) {
6481     x = array[i];
6482     for (j = i+1; j < n; j++) {
6483       y = array[j];
6484       if (stage2pairs_overlap_p(x,y) == true) {
6485 #if 0
6486 	printf("Found overlap between these regions:\n");
6487 	printf("   ");
6488 	printf("chrstart %u, chrend %u",
6489 	       ((Pair_T) x->first)->genomepos,
6490 	       ((Pair_T) List_last_value(x))->genomepos);
6491 	printf("   ");
6492 	printf("chrstart %u, chrend %u",
6493 	       ((Pair_T) y->first)->genomepos,
6494 	       ((Pair_T) List_last_value(y))->genomepos);
6495 	printf("\n");
6496 #endif
6497 	eliminate[j] = true;
6498 	eliminatep = true;
6499       }
6500     }
6501   }
6502 
6503   if (eliminatep == false) {
6504     /* All are identical, so take the first one only */
6505     unique = List_push(unique,(void *) array[0]);
6506   } else {
6507     for (i = n-1; i >= 0; i--) {
6508       stage2pairs = array[i];
6509       if (eliminate[i] == false) {
6510 #if 0
6511 	debug13(printf("Keeping chrstart %u, chrend %u",
6512 		       ((Pair_T) stage2pairs->first)->genomepos,
6513 		       ((Pair_T) List_last_value(stage2pairs))->genomepos));
6514 #endif
6515 	unique = List_push(unique,(void *) stage2pairs);
6516       } else {
6517 #if 0
6518 	debug13(printf("Eliminating chrstart %u, chrend %u",
6519 		       ((Pair_T) stage2pairs->first)->genomepos,
6520 		       ((Pair_T) List_last_value(stage2pairs))->genomepos));
6521 #endif
6522 	/* List_free(&stage2pairs); */
6523       }
6524     }
6525   }
6526 
6527   FREE(eliminate);
6528   FREE(array);
6529 
6530 #ifdef DEBUG13
6531   for (p = unique, i = 0; p != NULL; p = p->rest, i++) {
6532     stage2pairs = (List_T) p->first;
6533     printf("Final: chrstart %u, chrend %u\n",
6534 	   ((Pair_T) stage2pairs->first)->genomepos,
6535 	   ((Pair_T) List_last_value(stage2pairs))->genomepos);
6536   }
6537 #endif
6538 
6539   return unique;
6540 }
6541 
6542 
6543 
6544 
6545 
6546 List_T
Stage2_compute(char * queryseq_ptr,char * queryuc_ptr,int querylength,int query_offset,Chrpos_T chrstart,Chrpos_T chrend,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int genestrand,Stage2_alloc_T stage2_alloc,double proceed_pctcoverage,Oligoindex_array_T oligoindices,Pairpool_T pairpool,Diagpool_T diagpool,Cellpool_T cellpool,bool localp,bool skip_repetitive_p,bool favor_right_p,int max_nalignments,bool debug_graphic_p,Stopwatch_T stopwatch,bool diag_debug)6547 Stage2_compute (char *queryseq_ptr, char *queryuc_ptr, int querylength, int query_offset,
6548 		Chrpos_T chrstart, Chrpos_T chrend,
6549 		Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, int genestrand,
6550 #ifndef GSNAP
6551 		Stage2_alloc_T stage2_alloc, double proceed_pctcoverage,
6552 #endif
6553 		Oligoindex_array_T oligoindices,
6554 		Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
6555 		bool localp, bool skip_repetitive_p,
6556 		bool favor_right_p, int max_nalignments, bool debug_graphic_p,
6557 		Stopwatch_T stopwatch, bool diag_debug) {
6558   List_T all_stage2results = NULL, all_paths, all_ends, all_starts, path, pairs, p;
6559   List_T middle;
6560   /* Pair_T firstpair, lastpair; */
6561   int diag_querystart, diag_queryend;
6562   int indexsize, indexsize_nt;
6563   Oligoindex_T oligoindex;
6564   Chrpos_T **mappings;
6565   bool *coveredp, oned_matrix_p;
6566   int source;
6567   int *npositions, totalpositions;
6568   Chrpos_T *minactive, *maxactive;
6569   int *firstactive, *nactive;
6570   int maxnconsecutive;
6571   /* double diag_runtime; */
6572   List_T diagonals;
6573   /* int anchor_querypos, querystart, queryend; */
6574   /* Chrpos_T anchor_position; */
6575 #ifdef GSNAP
6576   Univcoord_T mappingstart, mappingend;
6577   Chrpos_T chrpos, mappinglength;
6578 #else
6579   double pct_coverage;
6580   int ncovered;
6581 #endif
6582 
6583 
6584 #ifndef USE_DIAGPOOL
6585   List_T p;
6586   Diag_T diag;
6587 #endif
6588 #ifdef DEBUG
6589   int nunique;
6590 #endif
6591 #ifdef DEBUG0
6592   int i;
6593 #endif
6594 
6595 #ifdef EXTRACT_GENOMICSEG
6596   Count_T *counts;
6597 #endif
6598 
6599   debug(printf("Entered Stage2_compute with chrstart %u and chrend %u\n",chrstart,chrend));
6600 
6601   Stopwatch_start(stopwatch);
6602 
6603   if (debug_graphic_p == true) {
6604     /* printf("par(mfrow=c(1,2),cex=0.2)\n"); */
6605     printf("par(cex=0.3)\n");
6606     printf("layout(matrix(c(1,2),1,2),widths=c(0.5,0.5),heights=c(1))\n");
6607   }
6608 
6609 #ifdef GSNAP
6610   coveredp = (bool *) CALLOCA(querylength,sizeof(bool));
6611   mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *));
6612   npositions = (int *) CALLOCA(querylength,sizeof(int));
6613   minactive = (unsigned int *) MALLOCA(querylength * sizeof(unsigned int));
6614   maxactive = (unsigned int *) MALLOCA(querylength * sizeof(unsigned int));
6615   firstactive = (int *) MALLOCA(querylength * sizeof(int));
6616   nactive = (int *) MALLOCA(querylength * sizeof(int));
6617 #else
6618   if (querylength > stage2_alloc->max_querylength_alloc) {
6619     coveredp = (bool *) CALLOC(querylength,sizeof(bool));
6620     mappings = (Chrpos_T **) MALLOC(querylength * sizeof(Chrpos_T *));
6621     npositions = (int *) CALLOC(querylength,sizeof(int));
6622     minactive = (unsigned int *) MALLOC(querylength * sizeof(unsigned int));
6623     maxactive = (unsigned int *) MALLOC(querylength * sizeof(unsigned int));
6624     firstactive = (int *) MALLOC(querylength * sizeof(int));
6625     nactive = (int *) MALLOC(querylength * sizeof(int));
6626   } else {
6627     coveredp = stage2_alloc->coveredp;
6628     mappings = stage2_alloc->mappings;
6629     npositions = stage2_alloc->npositions;
6630     minactive = stage2_alloc->minactive;
6631     maxactive = stage2_alloc->maxactive;
6632     firstactive = stage2_alloc->firstactive;
6633     nactive = stage2_alloc->nactive;
6634 
6635     memset(coveredp,0,querylength * sizeof(bool));
6636     memset(npositions,0,querylength * sizeof(int));
6637   }
6638 #endif
6639 
6640   totalpositions = 0;
6641   maxnconsecutive = 0;
6642 
6643   source = 0;
6644 #ifdef USE_DIAGPOOL
6645   Diagpool_reset(diagpool);
6646 #endif
6647   Cellpool_reset(cellpool);
6648   diagonals = (List_T) NULL;
6649 
6650 
6651 #ifdef GSNAP
6652   mappingstart = chroffset + chrstart;
6653   if (plusp == true) {
6654     mappingend = chroffset + chrend;
6655     chrpos = chrstart;
6656   } else {
6657     mappingend = chroffset + chrend + 1;
6658     chrpos = (chrhigh - chroffset) - chrend;
6659   }
6660   mappinglength = (Chrpos_T) (mappingend - mappingstart);
6661 
6662   if (mappinglength > 100000) {
6663     /* 9-mers */
6664     source = 0;
6665   } else if (mappinglength > 10000) {
6666     /* 8-mers */
6667     source = 1;
6668   } else {
6669     /* 7-mers */
6670     source = 2;
6671   }
6672 
6673   oligoindex = Oligoindex_array_elt(oligoindices,source);
6674   indexsize = Oligoindex_indexsize(oligoindex); /* Different sources can have different indexsizes */
6675   /* printf("indexsize = %d\n",indexsize); */
6676 
6677 
6678   Oligoindex_hr_tally(oligoindex,mappingstart,mappingend,plusp,
6679 		      queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
6680 		      chrpos,genestrand);
6681 
6682   diagonals = Oligoindex_get_mappings(diagonals,coveredp,mappings,npositions,&totalpositions,
6683 				      &oned_matrix_p,&maxnconsecutive,oligoindices,oligoindex,queryuc_ptr,
6684 				      /*querystart*/0,/*queryend*/querylength,querylength,
6685 				      chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
6686 #else
6687   /* GMAP */
6688   pct_coverage = 0.0;
6689   while (source < Oligoindex_array_length(oligoindices) && pct_coverage < SUFF_PCTCOVERAGE_OLIGOINDEX) {
6690     oligoindex = Oligoindex_array_elt(oligoindices,source);
6691     indexsize = Oligoindex_indexsize(oligoindex); /* Different sources can have different indexsizes */
6692 
6693 #ifdef PMAP
6694     if (plusp == true) {
6695       Oligoindex_pmap_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
6696 			    /*mappingend*/chroffset+chrend,/*plusp*/true,
6697 			    queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
6698 			    /*chrpos*/chrstart);
6699     } else {
6700       Oligoindex_pmap_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
6701 			    /*mappingend*/chroffset+chrend+1,/*plusp*/false,
6702 			    queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
6703 			    /*chrpos*/(chrhigh-chroffset)-chrend);
6704     }
6705 #else
6706 
6707 #if 0
6708     /* Previously used this for user_genomicseg, but now creating genome_blocks on the fly */
6709     Oligoindex_hr_tally(oligoindex,genomicuc_ptr,/*genomiclength*/chrend-chrstart,queryuc_ptr,querylength,
6710 			/*sequencepos*/0);
6711 #endif
6712 
6713 #ifdef EXTRACT_GENOMICSEG
6714     /* printf("indexsize = %d\n",indexsize); */
6715     /* printf("Query:  %.*s\n",querylength,queryuc_ptr); */
6716     /* printf("Genome: %s\n",genomicuc_ptr); */
6717     Oligoindex_hr_tally(oligoindex,genomicuc_ptr,/*genomiclength*/mappingend-mappingstart,
6718 			queryuc_ptr,querylength,sequencepos);
6719     counts = Oligoindex_counts_copy(oligoindex);
6720 
6721     /* printf("plusp %d\n",plusp); */
6722     /* printf("genomicstart %u, genomicend %u, genomiclength %d\n",genomicstart,genomicend,genomiclength); */
6723     /* printf("mappingstart %u, mappingend %u\n",mappingstart,mappingend); */
6724 #endif
6725 
6726     if (plusp == true) {
6727       Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
6728 			  /*mappingend*/chroffset+chrend,/*plusp*/true,
6729 			  queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
6730 			  /*chrpos*/chrstart,genestrand);
6731     } else {
6732       Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
6733 			  /*mappingend*/chroffset+chrend+1,/*plusp*/false,
6734 			  queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
6735 			  /*chrpos*/(chrhigh-chroffset)-chrend,genestrand);
6736     }
6737 
6738 #ifdef EXTRACT_GENOMICSEG
6739     assert(Oligoindex_counts_equal(oligoindex,counts));
6740     /* Oligoindex_counts_dump(oligoindex,counts); */
6741 
6742     FREE(counts);
6743 #endif
6744 
6745 #endif
6746 
6747     diagonals = Oligoindex_get_mappings(diagonals,coveredp,mappings,npositions,&totalpositions,
6748 					&oned_matrix_p,&maxnconsecutive,oligoindices,oligoindex,queryuc_ptr,
6749 					/*querystart*/0,/*queryend*/querylength,querylength,
6750 					chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
6751     pct_coverage = Diag_update_coverage(coveredp,&ncovered,diagonals,querylength);
6752     debug(printf("Stage2_compute: source = %d, ndiagonals = %d, ncovered = %d, pct_coverage = %f\n",
6753 		 source,List_length(diagonals),ncovered,pct_coverage));
6754 
6755     source++;
6756   }
6757 
6758 #endif
6759 
6760   /* *stage2_source = source; */
6761   /* *stage2_indexsize = indexsize; */
6762 #ifdef PMAP
6763   indexsize_nt = 3*indexsize;
6764 #else
6765   indexsize_nt = indexsize;
6766 #endif
6767 
6768   /* diag_runtime = */ Stopwatch_stop(stopwatch);
6769 
6770   Stopwatch_start(stopwatch);
6771 
6772   if (diag_debug == true) {
6773     /* Do nothing */
6774     middle = (List_T) NULL;
6775 
6776   } else if (totalpositions == 0) {
6777     debug(printf("Quitting because totalpositions is zero\n"));
6778     middle = (List_T) NULL;
6779 
6780 #ifndef GSNAP
6781   } else if (querylength > 150 && pct_coverage < proceed_pctcoverage && ncovered < SUFF_NCOVERED) {
6782     /* Filter only on long queries */
6783     debug(printf("Quitting because querylength %d > 150, and pct_coverage is only %f < %f, and ncovered is only %d < %d, maxnconsecutive = %d\n",
6784 		 querylength,pct_coverage,proceed_pctcoverage,ncovered,SUFF_NCOVERED,maxnconsecutive));
6785     middle = (List_T) NULL;
6786 #endif
6787 
6788   } else {
6789     debug(printf("Proceeding because maxnconsecutive is %d and pct_coverage is %f > %f or ncovered = %d > %d\n",
6790 		 maxnconsecutive,pct_coverage,proceed_pctcoverage,ncovered,SUFF_NCOVERED));
6791 
6792     debug(printf("Performing diag on genomiclength %u\n",chrend-chrstart));
6793     Diag_compute_bounds(&diag_querystart,&diag_queryend,minactive,maxactive,diagonals,querylength,
6794 			debug_graphic_p,chrstart,chrend,chroffset,chrhigh,plusp);
6795 
6796     debug(
6797 	  nunique = Diag_compute_bounds(&diag_querystart,&diag_queryend,minactive,maxactive,diagonals,querylength,
6798 					debug_graphic_p,chrstart,chrend,chroffset,chrhigh,plusp);
6799 	  fprintf(stderr,"%d diagonals (%d not dominated), maxnconsecutive = %d\n",
6800 		  List_length(diagonals),nunique,maxnconsecutive);
6801 	  );
6802 
6803     if (debug_graphic_p == true) {
6804       active_bounds_dump_R(minactive,maxactive,querylength);
6805       printf("lines(querypos,minactive,col=\"blue\")\n");
6806       printf("lines(querypos,maxactive,col=\"blue\")\n");
6807     }
6808 
6809     all_paths = align_compute_lookback(mappings,npositions,totalpositions,
6810 				       oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
6811 				       queryseq_ptr,queryuc_ptr,querylength,
6812 				       /*querystart*/diag_querystart,/*queryend*/diag_queryend,
6813 				       chroffset,chrhigh,plusp,indexsize,pairpool,
6814 				       localp,skip_repetitive_p,use_canonical_middle_p,NON_CANONICAL_PENALTY_MIDDLE,
6815 				       favor_right_p,/*middlep*/true,max_nalignments,debug_graphic_p);
6816     for (p = all_paths; p != NULL; p = List_next(p)) {
6817       path = (List_T) p->first;
6818 #ifdef MOVE_TO_STAGE3
6819       firstpair = path->first;
6820 #endif
6821       pairs = List_reverse(path);
6822 #ifdef MOVE_TO_STAGE3
6823       lastpair = pairs->first;
6824 #endif
6825 
6826       debug5(printf("Converting middle\n"));
6827       if (snps_p == true) {
6828 	middle = convert_to_nucleotides_snps(pairs,
6829 #ifndef PMAP
6830 					     queryseq_ptr,queryuc_ptr,
6831 #endif
6832 					     chroffset,chrhigh,/*watsonp*/plusp,
6833 					     query_offset,pairpool,indexsize_nt,
6834 					     /*include_gapholders_p*/true);
6835       } else {
6836 	middle = convert_to_nucleotides(pairs,
6837 #ifndef PMAP
6838 					queryseq_ptr,queryuc_ptr,
6839 #endif
6840 					chroffset,chrhigh,/*watsonp*/plusp,
6841 					query_offset,pairpool,indexsize_nt,
6842 					/*include_gapholders_p*/true);
6843       }
6844 
6845 
6846       all_ends = (List_T) NULL;
6847 
6848 #ifdef MOVE_TO_STAGE3
6849 #ifdef PMAP
6850       anchor_querypos = lastpair->querypos/3;
6851       /* anchor_position = lastpair->genomepos - 2; */
6852 #else
6853       anchor_querypos = lastpair->querypos;
6854       /* anchor_position = lastpair->genomepos; */
6855 #endif
6856       querystart = anchor_querypos + 1;
6857       queryend = querylength - 1;
6858       debug0(printf("For end, anchor querypos %d\n",anchor_querypos));
6859 
6860       end_paths = align_compute_lookback(mappings,npositions,totalpositions,
6861 					 oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
6862 					 queryseq_ptr,queryuc_ptr,querylength,querystart,queryend,
6863 					 chroffset,chrhigh,plusp,
6864 					 indexsize,pairpool,
6865 					 /*anchoredp*/true,anchor_querypos,anchor_position,
6866 					 localp,skip_repetitive_p,use_canonical_ends_p,NON_CANONICAL_PENALTY_ENDS,
6867 					 favor_right_p,/*middlep*/false,max_nalignments,debug_graphic_p);
6868 
6869       /* fprintf(stderr,"%d ends\n",List_length(end_paths)); */
6870       if (List_length(end_paths) == 1) {
6871 	pairs = (List_T) List_head(end_paths);
6872 	path = List_reverse(pairs);
6873 	debug5(printf("Converting single end\n"));
6874 	if (snps_p == true) {
6875 	  pairs = convert_to_nucleotides_snps(path,
6876 #ifndef PMAP
6877 					      queryseq_ptr,queryuc_ptr,
6878 #endif
6879 					      chroffset,chrhigh,/*watsonp*/plusp,
6880 					      query_offset,pairpool,indexsize_nt,
6881 					      /*include_gapholders_p*/false);
6882 	} else {
6883 	  pairs = convert_to_nucleotides(path,
6884 #ifndef PMAP
6885 					 queryseq_ptr,queryuc_ptr,
6886 #endif
6887 					 chroffset,chrhigh,/*watsonp*/plusp,
6888 					 query_offset,pairpool,indexsize_nt,
6889 					 /*include_gapholders_p*/false);
6890 	}
6891 	middle = Pairpool_remove_gapholders(middle);
6892 	middle = List_reverse(Pairpool_join_end3(List_reverse(middle),pairs,pairpool,/*copy_end_p*/false));
6893 	debug0(printf("ATTACHING SINGLE END TO MIDDLE\n"));
6894 	debug0(Pair_dump_list(middle,true));
6895 
6896       } else {
6897 	debug0(i = 0);
6898 	for (q = end_paths; q != NULL; q = List_next(q)) {
6899 	  pairs = (List_T) List_head(q);
6900 	  path = List_reverse(pairs);
6901 	  debug5(printf("Converting one end\n"));
6902 	  if (snps_p == true) {
6903 	    pairs = convert_to_nucleotides_snps(path,
6904 #ifndef PMAP
6905 						queryseq_ptr,queryuc_ptr,
6906 #endif
6907 						chroffset,chrhigh,/*watsonp*/plusp,
6908 						query_offset,pairpool,indexsize_nt,
6909 						/*include_gapholders_p*/false);
6910 	  } else {
6911 	    pairs = convert_to_nucleotides(path,
6912 #ifndef PMAP
6913 					   queryseq_ptr,queryuc_ptr,
6914 #endif
6915 					   chroffset,chrhigh,/*watsonp*/plusp,
6916 					   query_offset,pairpool,indexsize_nt,
6917 					   /*include_gapholders_p*/false);
6918 	  }
6919 	  debug0(printf("END %d/%d\n",i++,List_length(end_paths)));
6920 	  debug0(Pair_dump_list(pairs,true));
6921 	  all_ends = List_push(all_ends,(void *) pairs);
6922 	}
6923       }
6924       List_free(&end_paths);
6925 #endif
6926 
6927 
6928       all_starts = (List_T) NULL;
6929 
6930 #ifdef MOVE_TO_STAGE3
6931 #ifdef PMAP
6932       anchor_querypos = firstpair->querypos/3;
6933       anchor_position = firstpair->genomepos;
6934 #else
6935       anchor_querypos = firstpair->querypos;
6936       anchor_position = firstpair->genomepos;
6937 #endif
6938       debug0(printf("For start, anchor querypos %d\n",anchor_querypos));
6939 
6940       querystart = 0;
6941       queryend = anchor_querypos - 1;
6942       start_paths = align_compute_lookforward(mappings,npositions,totalpositions,
6943 					      oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
6944 					      queryseq_ptr,queryuc_ptr,querylength,querystart,queryend,
6945 					      chroffset,chrhigh,plusp,
6946 					      indexsize,pairpool,
6947 					      /*anchoredp*/true,anchor_querypos,anchor_position,
6948 					      localp,skip_repetitive_p,use_canonical_ends_p,NON_CANONICAL_PENALTY_ENDS,
6949 					      favor_right_p,/*middlep*/false,max_nalignments,debug_graphic_p);
6950 
6951       /* fprintf(stderr,"%d starts\n",List_length(start_paths)); */
6952       if (List_length(start_paths) == 1) {
6953 	path = (List_T) List_head(start_paths);
6954 	debug5(printf("Converting single start\n"));
6955 	if (snps_p == true) {
6956 	  pairs = convert_to_nucleotides_snps(path,
6957 #ifndef PMAP
6958 					      queryseq_ptr,queryuc_ptr,
6959 #endif
6960 					      chroffset,chrhigh,/*watsonp*/plusp,
6961 					      query_offset,pairpool,indexsize_nt,
6962 					      /*include_gapholders_p*/false);
6963 	} else {
6964 	  pairs = convert_to_nucleotides(path,
6965 #ifndef PMAP
6966 					 queryseq_ptr,queryuc_ptr,
6967 #endif
6968 					 chroffset,chrhigh,/*watsonp*/plusp,
6969 					 query_offset,pairpool,indexsize_nt,
6970 					 /*include_gapholders_p*/false);
6971 	}
6972 	path = List_reverse(pairs);
6973 	middle = Pairpool_remove_gapholders(middle);
6974 	middle = Pairpool_join_end5(middle,path,pairpool,/*copy_end_p*/false);
6975 	debug0(printf("ATTACHING SINGLE START TO MIDDLE\n"));
6976 	debug0(Pair_dump_list(middle,true));
6977 
6978       } else {
6979 	debug0(i = 0);
6980 	for (q = start_paths; q != NULL; q = List_next(q)) {
6981 	  path = (List_T) List_head(q);
6982 	  debug5(printf("Converting one start\n"));
6983 	  if (snps_p == true) {
6984 	    pairs = convert_to_nucleotides_snps(path,
6985 #ifndef PMAP
6986 						queryseq_ptr,queryuc_ptr,
6987 #endif
6988 						chroffset,chrhigh,/*watsonp*/plusp,
6989 						query_offset,pairpool,indexsize_nt,
6990 						/*include_gapholders_p*/false);
6991 	  } else {
6992 	    pairs = convert_to_nucleotides(path,
6993 #ifndef PMAP
6994 					   queryseq_ptr,queryuc_ptr,
6995 #endif
6996 					   chroffset,chrhigh,/*watsonp*/plusp,
6997 					   query_offset,pairpool,indexsize_nt,
6998 					   /*include_gapholders_p*/false);
6999 	  }
7000 	  path = List_reverse(pairs);
7001 	  debug0(printf("START %d/%d\n",i++,List_length(start_paths)));
7002 	  debug0(Pair_dump_list(path,true));
7003 	  all_starts = List_push(all_starts,(void *) path);
7004 	}
7005       }
7006       List_free(&start_paths);
7007 #endif
7008 
7009       all_stage2results = List_push(all_stage2results,(void *) Stage2_new(middle,all_starts,all_ends));
7010     }
7011 
7012     List_free(&all_paths);
7013   }
7014 
7015 #ifdef GSNAP
7016   FREEA(nactive);
7017   FREEA(firstactive);
7018   FREEA(maxactive);
7019   FREEA(minactive);
7020   FREEA(npositions);
7021   FREEA(coveredp);
7022   FREEA(mappings);		/* Don't need to free contents of mappings */
7023 #else
7024   if (querylength > stage2_alloc->max_querylength_alloc) {
7025     FREE(nactive);
7026     FREE(firstactive);
7027     FREE(maxactive);
7028     FREE(minactive);
7029     FREE(npositions);
7030     FREE(coveredp);
7031     FREE(mappings);		/* Don't need to free contents of mappings */
7032   }
7033 #endif
7034 
7035 #if 1
7036   for (source = 0; source < Oligoindex_array_length(oligoindices); source++) {
7037     oligoindex = Oligoindex_array_elt(oligoindices,source);
7038     Oligoindex_untally(oligoindex);
7039   }
7040 #endif
7041 
7042   Stopwatch_stop(stopwatch);
7043 
7044   if (diag_debug == true) {
7045     return diagonals;
7046   } else {
7047 
7048 #ifdef USE_DIAGPOOL
7049   /* No need to free diagonals */
7050 #else
7051     for (p = diagonals; p != NULL; p = List_next(p)) {
7052       diag = (Diag_T) List_head(p);
7053       Diag_free(&diag);
7054     }
7055     List_free(&diagonals);
7056 #endif
7057   }
7058 
7059   all_stage2results = Stage2_filter_unique(all_stage2results);
7060   debug0(printf("Done with stage2.  Returning %d results\n",List_length(all_stage2results)));
7061   return all_stage2results;
7062 }
7063 
7064 
7065 
7066 /* Since this stage2 is called from stage3 with a small segment of the
7067    query, we can use alloca instead of stage2_alloc */
7068 List_T
Stage2_compute_one(char * queryseq_ptr,char * queryuc_ptr,int querylength,int query_offset,Chrpos_T chrstart,Chrpos_T chrend,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int genestrand,Oligoindex_array_T oligoindices,Pairpool_T pairpool,Diagpool_T diagpool,Cellpool_T cellpool,bool localp,bool skip_repetitive_p,bool favor_right_p,bool debug_graphic_p)7069 Stage2_compute_one (char *queryseq_ptr, char *queryuc_ptr, int querylength, int query_offset,
7070 		    Chrpos_T chrstart, Chrpos_T chrend,
7071 		    Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, int genestrand,
7072 		    Oligoindex_array_T oligoindices,
7073 		    Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
7074 		    bool localp, bool skip_repetitive_p,
7075 		    bool favor_right_p, bool debug_graphic_p) {
7076   List_T pairs, all_paths;
7077   List_T middle, path;
7078   int indexsize, indexsize_nt;
7079   Oligoindex_T oligoindex;
7080   Chrpos_T **mappings;
7081   bool *coveredp, oned_matrix_p;
7082   int source;
7083   int *npositions, totalpositions;
7084   Chrpos_T *minactive, *maxactive;
7085   int *firstactive, *nactive;
7086   int ncovered;
7087   double pct_coverage;
7088   int maxnconsecutive;
7089   /* double diag_runtime; */
7090   List_T diagonals;
7091 
7092 
7093   debug(printf("Entered Stage2_compute_one with chrstart %u and chrend %u\n",chrstart,chrend));
7094 
7095   coveredp = (bool *) CALLOCA(querylength,sizeof(bool));
7096   mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *));
7097   npositions = (int *) CALLOCA(querylength,sizeof(int));
7098   minactive = (unsigned int *) MALLOCA(querylength * sizeof(unsigned int));
7099   maxactive = (unsigned int *) MALLOCA(querylength * sizeof(unsigned int));
7100   firstactive = (int *) MALLOCA(querylength * sizeof(int));
7101   nactive = (int *) MALLOCA(querylength * sizeof(int));
7102 
7103   totalpositions = 0;
7104   maxnconsecutive = 0;
7105 
7106   source = 0;
7107   pct_coverage = 0.0;
7108 #ifdef USE_DIAGPOOL
7109   Diagpool_reset(diagpool);
7110 #endif
7111   Cellpool_reset(cellpool);
7112   diagonals = (List_T) NULL;
7113   while (source < Oligoindex_array_length(oligoindices) && pct_coverage < SUFF_PCTCOVERAGE_OLIGOINDEX) {
7114     oligoindex = Oligoindex_array_elt(oligoindices,source);
7115     indexsize = Oligoindex_indexsize(oligoindex); /* Different sources can have different indexsizes */
7116 
7117 #ifdef PMAP
7118     if (plusp == true) {
7119       Oligoindex_pmap_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7120 			    /*mappingend*/chroffset+chrend,/*plusp*/true,
7121 			    queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7122 			    /*chrpos*/chrstart);
7123     } else {
7124       Oligoindex_pmap_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7125 			    /*mappingend*/chroffset+chrend+1,/*plusp*/false,
7126 			    queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7127 			    /*chrpos*/(chrhigh-chroffset)-chrend);
7128     }
7129 
7130 #else
7131 
7132     if (plusp == true) {
7133       Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7134 			  /*mappingend*/chroffset+chrend,/*plusp*/true,
7135 			  queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7136 			  /*chrpos*/chrstart,genestrand);
7137     } else {
7138       Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7139 			  /*mappingend*/chroffset+chrend+1,/*plusp*/false,
7140 			  queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7141 			  /*chrpos*/(chrhigh-chroffset)-chrend,genestrand);
7142     }
7143 
7144 #endif
7145 
7146     diagonals = Oligoindex_get_mappings(diagonals,coveredp,mappings,npositions,&totalpositions,
7147 					&oned_matrix_p,&maxnconsecutive,oligoindices,oligoindex,queryuc_ptr,
7148 					/*querystart*/0,/*queryend*/querylength,querylength,
7149 					chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
7150     pct_coverage = Diag_update_coverage(coveredp,&ncovered,diagonals,querylength);
7151     debug(printf("Stage2_compute: source = %d, ncovered = %d, pct_coverage = %f\n",source,ncovered,pct_coverage));
7152 
7153     source++;
7154   }
7155   /* *stage2_source = source; */
7156   /* *stage2_indexsize = indexsize; */
7157 #ifdef PMAP
7158   indexsize_nt = 3*indexsize;
7159 #else
7160   indexsize_nt = indexsize;
7161 #endif
7162 
7163 
7164   if (totalpositions == 0) {
7165     debug(printf("Quitting because totalpositions is zero\n"));
7166     middle = (List_T) NULL;
7167 
7168   } else {
7169     debug(printf("Proceeding because pct_coverage is %f > %f or ncovered = %d > %d\n",
7170 		 maxnconsecutive,pct_coverage,ncovered,SUFF_NCOVERED));
7171 
7172     debug(printf("Performing diag on genomiclength %u\n",chrend-chrstart));
7173     Diag_max_bounds(minactive,maxactive,querylength,chrstart,chrend,chroffset,chrhigh,plusp);
7174 
7175     if ((all_paths = align_compute_lookback(mappings,npositions,totalpositions,
7176 					    oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
7177 					    queryseq_ptr,queryuc_ptr,querylength,
7178 					    /*querystart*/0,/*queryend*/querylength-1,
7179 					    chroffset,chrhigh,plusp,indexsize,pairpool,
7180 					    localp,skip_repetitive_p,use_canonical_middle_p,NON_CANONICAL_PENALTY_MIDDLE,
7181 					    favor_right_p,/*middlep*/true,/*max_nalignments*/1,debug_graphic_p)) == NULL) {
7182       middle = (List_T) NULL;
7183     } else if ((path = (List_T) List_head(all_paths)) == NULL) {
7184       middle = (List_T) NULL;
7185     } else if (snps_p == true) {
7186       pairs = List_reverse(path);
7187       middle = convert_to_nucleotides_snps(pairs,
7188 #ifndef PMAP
7189 					   queryseq_ptr,queryuc_ptr,
7190 #endif
7191 					   chroffset,chrhigh,/*watsonp*/plusp,
7192 					   query_offset,pairpool,indexsize_nt,
7193 					   /*include_gapholders_p*/true);
7194     } else {
7195       pairs = List_reverse(path);
7196       middle = convert_to_nucleotides(pairs,
7197 #ifndef PMAP
7198 				      queryseq_ptr,queryuc_ptr,
7199 #endif
7200 				      chroffset,chrhigh,/*watsonp*/plusp,
7201 				      query_offset,pairpool,indexsize_nt,
7202 				      /*include_gapholders_p*/true);
7203     }
7204 
7205     List_free(&all_paths);
7206   }
7207 
7208   FREEA(nactive);
7209   FREEA(firstactive);
7210   FREEA(maxactive);
7211   FREEA(minactive);
7212   FREEA(npositions);
7213   FREEA(coveredp);
7214   FREEA(mappings);		/* Don't need to free contents of mappings */
7215 
7216 #if 1
7217   for (source = 0; source < Oligoindex_array_length(oligoindices); source++) {
7218     oligoindex = Oligoindex_array_elt(oligoindices,source);
7219     Oligoindex_untally(oligoindex);
7220   }
7221 #endif
7222 
7223 #ifdef USE_DIAGPOOL
7224   /* No need to free diagonals */
7225 #else
7226   for (p = diagonals; p != NULL; p = List_next(p)) {
7227     diag = (Diag_T) List_head(p);
7228     Diag_free(&diag);
7229   }
7230   List_free(&diagonals);
7231 #endif
7232 
7233   return List_reverse(middle);
7234 }
7235 
7236 
7237 
7238 /* Called by GSNAP for ends of substring alignments */
7239 List_T
Stage2_compute_starts(char * queryseq_ptr,char * queryuc_ptr,int querylength,int query_offset,Chrpos_T chrstart,Chrpos_T chrend,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int genestrand,Oligoindex_array_T oligoindices,Pairpool_T pairpool,Diagpool_T diagpool,Cellpool_T cellpool,bool localp,bool skip_repetitive_p,bool favor_right_p,int max_nalignments,bool debug_graphic_p)7240 Stage2_compute_starts (char *queryseq_ptr, char *queryuc_ptr, int querylength, int query_offset,
7241 		       Chrpos_T chrstart, Chrpos_T chrend,
7242 		       Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, int genestrand,
7243 		       Oligoindex_array_T oligoindices,
7244 		       Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
7245 		       bool localp, bool skip_repetitive_p,
7246 		       bool favor_right_p, int max_nalignments, bool debug_graphic_p) {
7247   List_T all_results;
7248   List_T pairs, all_paths, p;
7249   List_T path;
7250   int indexsize, indexsize_nt;
7251   Oligoindex_T oligoindex;
7252   Chrpos_T **mappings;
7253   bool *coveredp, oned_matrix_p;
7254   int source;
7255   int *npositions, totalpositions;
7256   Chrpos_T *minactive, *maxactive;
7257   int *firstactive, *nactive;
7258   int ncovered;
7259   double pct_coverage;
7260   int maxnconsecutive;
7261   /* double diag_runtime; */
7262   List_T diagonals;
7263 
7264 
7265   debug(printf("Entered Stage2_compute_starts with chrstart %u and chrend %u\n",chrstart,chrend));
7266 
7267   coveredp = (bool *) CALLOCA(querylength,sizeof(bool));
7268   mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *));
7269   npositions = (int *) CALLOCA(querylength,sizeof(int));
7270   minactive = (unsigned int *) MALLOCA(querylength * sizeof(unsigned int));
7271   maxactive = (unsigned int *) MALLOCA(querylength * sizeof(unsigned int));
7272   firstactive = (int *) MALLOCA(querylength * sizeof(int));
7273   nactive = (int *) MALLOCA(querylength * sizeof(int));
7274 
7275   totalpositions = 0;
7276   maxnconsecutive = 0;
7277 
7278   source = 0;
7279   pct_coverage = 0.0;
7280 #ifdef USE_DIAGPOOL
7281   Diagpool_reset(diagpool);
7282 #endif
7283   Cellpool_reset(cellpool);
7284   diagonals = (List_T) NULL;
7285   while (source < Oligoindex_array_length(oligoindices) && pct_coverage < SUFF_PCTCOVERAGE_OLIGOINDEX) {
7286     oligoindex = Oligoindex_array_elt(oligoindices,source);
7287     indexsize = Oligoindex_indexsize(oligoindex); /* Different sources can have different indexsizes */
7288 
7289 #ifdef PMAP
7290     if (plusp == true) {
7291       Oligoindex_pmap_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7292 			    /*mappingend*/chroffset+chrend,/*plusp*/true,
7293 			    queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7294 			    /*chrpos*/chrstart);
7295     } else {
7296       Oligoindex_pmap_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7297 			    /*mappingend*/chroffset+chrend+1,/*plusp*/false,
7298 			    queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7299 			    /*chrpos*/(chrhigh-chroffset)-chrend);
7300     }
7301 
7302 #else
7303 
7304     if (plusp == true) {
7305       Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7306 			  /*mappingend*/chroffset+chrend,/*plusp*/true,
7307 			  queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7308 			  /*chrpos*/chrstart,genestrand);
7309     } else {
7310       Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7311 			  /*mappingend*/chroffset+chrend+1,/*plusp*/false,
7312 			  queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7313 			  /*chrpos*/(chrhigh-chroffset)-chrend,genestrand);
7314     }
7315 
7316 #endif
7317 
7318     diagonals = Oligoindex_get_mappings(diagonals,coveredp,mappings,npositions,&totalpositions,
7319 					&oned_matrix_p,&maxnconsecutive,oligoindices,oligoindex,queryuc_ptr,
7320 					/*querystart*/0,/*queryend*/querylength,querylength,
7321 					chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
7322     pct_coverage = Diag_update_coverage(coveredp,&ncovered,diagonals,querylength);
7323     debug(printf("Stage2_compute: source = %d, ncovered = %d, pct_coverage = %f\n",source,ncovered,pct_coverage));
7324 
7325     source++;
7326   }
7327   /* *stage2_source = source; */
7328   /* *stage2_indexsize = indexsize; */
7329 #ifdef PMAP
7330   indexsize_nt = 3*indexsize;
7331 #else
7332   indexsize_nt = indexsize;
7333 #endif
7334 
7335 
7336   if (totalpositions == 0) {
7337     debug(printf("Quitting because totalpositions is zero\n"));
7338     all_results = (List_T) NULL;
7339 
7340   } else {
7341     debug(printf("Proceeding because maxnconsecutive is %d and pct_coverage is %f or ncovered = %d > %d\n",
7342 		 maxnconsecutive,pct_coverage,ncovered,SUFF_NCOVERED));
7343 
7344     debug(printf("Performing diag on genomiclength %u\n",chrend-chrstart));
7345     Diag_max_bounds(minactive,maxactive,querylength,chrstart,chrend,chroffset,chrhigh,plusp);
7346 
7347     if ((all_paths = align_compute_lookforward(mappings,npositions,totalpositions,
7348 					       oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
7349 					       queryseq_ptr,queryuc_ptr,querylength,
7350 					       /*querystart*/0,/*queryend*/querylength-1,
7351 					       chroffset,chrhigh,plusp,indexsize,pairpool,
7352 					       localp,skip_repetitive_p,use_canonical_middle_p,NON_CANONICAL_PENALTY_MIDDLE,
7353 					       favor_right_p,/*middlep*/false,max_nalignments,debug_graphic_p)) == NULL) {
7354       all_results = (List_T) NULL;
7355 
7356     } else if (snps_p == true) {
7357       all_results = (List_T) NULL;
7358       for (p = all_paths; p != NULL; p = List_next(p)) {
7359 	path = List_head(p);
7360 	pairs = convert_to_nucleotides_snps(path,
7361 #ifndef PMAP
7362 					    queryseq_ptr,queryuc_ptr,
7363 #endif
7364 					    chroffset,chrhigh,/*watsonp*/plusp,
7365 					    query_offset,pairpool,indexsize_nt,
7366 					    /*include_gapholders_p*/false);
7367 	path = List_reverse(pairs);
7368 	debug0(printf("START\n"));
7369 	debug0(Pair_dump_list(path,true));
7370 	if (path != NULL) {
7371 	  all_results = List_push(all_results,(void *) path);
7372 	}
7373       }
7374 
7375     } else {
7376       all_results = (List_T) NULL;
7377       for (p = all_paths; p != NULL; p = List_next(p)) {
7378 	path = List_head(p);
7379 	pairs = convert_to_nucleotides(path,
7380 #ifndef PMAP
7381 				       queryseq_ptr,queryuc_ptr,
7382 #endif
7383 				       chroffset,chrhigh,/*watsonp*/plusp,
7384 				       query_offset,pairpool,indexsize_nt,
7385 				       /*include_gapholders_p*/false);
7386 	path = List_reverse(pairs);
7387 	debug0(printf("START\n"));
7388 	debug0(Pair_dump_list(path,true));
7389 	if (path != NULL) {
7390 	  all_results = List_push(all_results,(void *) path);
7391 	}
7392       }
7393     }
7394 
7395     List_free(&all_paths);
7396   }
7397 
7398   FREEA(nactive);
7399   FREEA(firstactive);
7400   FREEA(maxactive);
7401   FREEA(minactive);
7402   FREEA(npositions);
7403   FREEA(coveredp);
7404   FREEA(mappings);		/* Don't need to free contents of mappings */
7405 
7406 #if 1
7407   for (source = 0; source < Oligoindex_array_length(oligoindices); source++) {
7408     oligoindex = Oligoindex_array_elt(oligoindices,source);
7409     Oligoindex_untally(oligoindex);
7410   }
7411 #endif
7412 
7413 #ifdef USE_DIAGPOOL
7414   /* No need to free diagonals */
7415 #else
7416   for (p = diagonals; p != NULL; p = List_next(p)) {
7417     diag = (Diag_T) List_head(p);
7418     Diag_free(&diag);
7419   }
7420   List_free(&diagonals);
7421 #endif
7422 
7423   debug0(printf("Before filtering starts, %d\n",List_length(all_results)));
7424   all_results = Stage2pairs_filter_unique_starts(all_results);
7425   debug0(printf("After filtering starts, %d\n",List_length(all_results)));
7426 
7427   return all_results;
7428 }
7429 
7430 
7431 /* Called by GSNAP for ends of substring alignments */
7432 List_T
Stage2_compute_ends(char * queryseq_ptr,char * queryuc_ptr,int querylength,int query_offset,Chrpos_T chrstart,Chrpos_T chrend,Univcoord_T chroffset,Univcoord_T chrhigh,bool plusp,int genestrand,Oligoindex_array_T oligoindices,Pairpool_T pairpool,Diagpool_T diagpool,Cellpool_T cellpool,bool localp,bool skip_repetitive_p,bool favor_right_p,int max_nalignments,bool debug_graphic_p)7433 Stage2_compute_ends (char *queryseq_ptr, char *queryuc_ptr, int querylength, int query_offset,
7434 		     Chrpos_T chrstart, Chrpos_T chrend,
7435 		     Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, int genestrand,
7436 		     Oligoindex_array_T oligoindices,
7437 		     Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
7438 		     bool localp,
7439 		     bool skip_repetitive_p,
7440 		     bool favor_right_p, int max_nalignments, bool debug_graphic_p) {
7441   List_T all_results;
7442   List_T pairs, all_paths, p;
7443   List_T path;
7444   int indexsize, indexsize_nt;
7445   Oligoindex_T oligoindex;
7446   Chrpos_T **mappings;
7447   bool *coveredp, oned_matrix_p;
7448   int source;
7449   int *npositions, totalpositions;
7450   Chrpos_T *minactive, *maxactive;
7451   int *firstactive, *nactive;
7452   int ncovered;
7453   double pct_coverage;
7454   int maxnconsecutive;
7455   /* double diag_runtime; */
7456   List_T diagonals;
7457 
7458 
7459   debug(printf("Entered Stage2_compute_ends with chrstart %u and chrend %u\n",chrstart,chrend));
7460 
7461   coveredp = (bool *) CALLOCA(querylength,sizeof(bool));
7462   mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *));
7463   npositions = (int *) CALLOCA(querylength,sizeof(int));
7464   minactive = (unsigned int *) MALLOCA(querylength * sizeof(unsigned int));
7465   maxactive = (unsigned int *) MALLOCA(querylength * sizeof(unsigned int));
7466   firstactive = (int *) MALLOCA(querylength * sizeof(int));
7467   nactive = (int *) MALLOCA(querylength * sizeof(int));
7468 
7469   totalpositions = 0;
7470   maxnconsecutive = 0;
7471 
7472   source = 0;
7473   pct_coverage = 0.0;
7474 #ifdef USE_DIAGPOOL
7475   Diagpool_reset(diagpool);
7476 #endif
7477   Cellpool_reset(cellpool);
7478   diagonals = (List_T) NULL;
7479   while (source < Oligoindex_array_length(oligoindices) && pct_coverage < SUFF_PCTCOVERAGE_OLIGOINDEX) {
7480     oligoindex = Oligoindex_array_elt(oligoindices,source);
7481     indexsize = Oligoindex_indexsize(oligoindex); /* Different sources can have different indexsizes */
7482 
7483 #ifdef PMAP
7484     if (plusp == true) {
7485       Oligoindex_pmap_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7486 			    /*mappingend*/chroffset+chrend,/*plusp*/true,
7487 			    queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7488 			    /*chrpos*/chrstart);
7489     } else {
7490       Oligoindex_pmap_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7491 			    /*mappingend*/chroffset+chrend+1,/*plusp*/false,
7492 			    queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7493 			    /*chrpos*/(chrhigh-chroffset)-chrend);
7494     }
7495 
7496 #else
7497 
7498     if (plusp == true) {
7499       Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7500 			  /*mappingend*/chroffset+chrend,/*plusp*/true,
7501 			  queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7502 			  /*chrpos*/chrstart,genestrand);
7503     } else {
7504       Oligoindex_hr_tally(oligoindex,/*mappingstart*/chroffset+chrstart,
7505 			  /*mappingend*/chroffset+chrend+1,/*plusp*/false,
7506 			  queryuc_ptr,/*querystart*/0,/*queryend*/querylength,
7507 			  /*chrpos*/(chrhigh-chroffset)-chrend,genestrand);
7508     }
7509 
7510 #endif
7511 
7512     diagonals = Oligoindex_get_mappings(diagonals,coveredp,mappings,npositions,&totalpositions,
7513 					&oned_matrix_p,&maxnconsecutive,oligoindices,oligoindex,queryuc_ptr,
7514 					/*querystart*/0,/*queryend*/querylength,querylength,
7515 					chrstart,chrend,chroffset,chrhigh,plusp,diagpool);
7516     pct_coverage = Diag_update_coverage(coveredp,&ncovered,diagonals,querylength);
7517     debug(printf("Stage2_compute: source = %d, ncovered = %d, pct_coverage = %f\n",source,ncovered,pct_coverage));
7518 
7519     source++;
7520   }
7521   /* *stage2_source = source; */
7522   /* *stage2_indexsize = indexsize; */
7523 #ifdef PMAP
7524   indexsize_nt = 3*indexsize;
7525 #else
7526   indexsize_nt = indexsize;
7527 #endif
7528 
7529 
7530   if (totalpositions <= 0) {
7531     debug(printf("Quitting because totalpositions is zero\n"));
7532     all_results = (List_T) NULL;
7533 
7534   } else {
7535     debug(printf("Proceeding because maxnconsecutive is %d and pct_coverage is %f or ncovered = %d > %d\n",
7536 		 maxnconsecutive,pct_coverage,ncovered,SUFF_NCOVERED));
7537 
7538     debug(printf("Performing diag on genomiclength %u\n",chrend-chrstart));
7539     Diag_max_bounds(minactive,maxactive,querylength,chrstart,chrend,chroffset,chrhigh,plusp);
7540 
7541     if ((all_paths = align_compute_lookback(mappings,npositions,totalpositions,
7542 					    oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
7543 					    queryseq_ptr,queryuc_ptr,querylength,
7544 					    /*querystart*/0,/*queryend*/querylength-1,
7545 					    chroffset,chrhigh,plusp,indexsize,pairpool,
7546 					    localp,skip_repetitive_p,use_canonical_middle_p,NON_CANONICAL_PENALTY_MIDDLE,
7547 					    favor_right_p,/*middlep*/false,max_nalignments,debug_graphic_p)) == NULL) {
7548       all_results = (List_T) NULL;
7549 
7550     } else if (snps_p == true) {
7551       all_results = (List_T) NULL;
7552       for (p = all_paths; p != NULL; p = List_next(p)) {
7553 	pairs = List_head(p);
7554 	path = List_reverse(pairs);
7555 	pairs = convert_to_nucleotides_snps(path,
7556 #ifndef PMAP
7557 					    queryseq_ptr,queryuc_ptr,
7558 #endif
7559 					    chroffset,chrhigh,/*watsonp*/plusp,
7560 					    query_offset,pairpool,indexsize_nt,
7561 					    /*include_gapholders_p*/false);
7562 	debug0(printf("END\n"));
7563 	debug0(Pair_dump_list(pairs,true));
7564 	if (pairs != NULL) {
7565 	  all_results = List_push(all_results,(void *) pairs);
7566 	}
7567       }
7568 
7569     } else {
7570       all_results = (List_T) NULL;
7571       for (p = all_paths; p != NULL; p = List_next(p)) {
7572 	pairs = List_head(p);
7573 	path = List_reverse(pairs);
7574 	pairs = convert_to_nucleotides(path,
7575 #ifndef PMAP
7576 				       queryseq_ptr,queryuc_ptr,
7577 #endif
7578 				       chroffset,chrhigh,/*watsonp*/plusp,
7579 				       query_offset,pairpool,indexsize_nt,
7580 				       /*include_gapholders_p*/false);
7581 	debug0(printf("END\n"));
7582 	debug0(Pair_dump_list(pairs,true));
7583 	if (pairs != NULL) {
7584 	  all_results = List_push(all_results,(void *) pairs);
7585 	}
7586       }
7587     }
7588 
7589     List_free(&all_paths);
7590   }
7591 
7592   FREEA(nactive);
7593   FREEA(firstactive);
7594   FREEA(maxactive);
7595   FREEA(minactive);
7596   FREEA(npositions);
7597   FREEA(coveredp);
7598   FREEA(mappings);		/* Don't need to free contents of mappings */
7599 
7600 #if 1
7601   for (source = 0; source < Oligoindex_array_length(oligoindices); source++) {
7602     oligoindex = Oligoindex_array_elt(oligoindices,source);
7603     Oligoindex_untally(oligoindex);
7604   }
7605 #endif
7606 
7607 #ifdef USE_DIAGPOOL
7608   /* No need to free diagonals */
7609 #else
7610   for (p = diagonals; p != NULL; p = List_next(p)) {
7611     diag = (Diag_T) List_head(p);
7612     Diag_free(&diag);
7613   }
7614   List_free(&diagonals);
7615 #endif
7616 
7617   debug0(printf("Before filtering ends, %d\n",List_length(all_results)));
7618   all_results = Stage2pairs_filter_unique_ends(all_results);
7619   debug0(printf("After filtering ends, %d\n",List_length(all_results)));
7620 
7621   return all_results;
7622 }
7623 
7624