1 static char rcsid[] = "$Id: dynprog_genome.c 218161 2019-01-17 05:57:37Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "dynprog_genome.h"
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <math.h>		/* For ceil, log, pow */
11 #include <ctype.h>		/* For tolower */
12 
13 
14 #include "bool.h"
15 #include "except.h"
16 #include "assert.h"
17 #include "mem.h"
18 #include "comp.h"
19 #include "pair.h"
20 #include "pairdef.h"
21 #include "intron.h"
22 #include "complement.h"
23 #include "maxent.h"
24 #include "maxent_hr.h"
25 #include "dynprog.h"		/* For parameters */
26 #include "dynprog_simd.h"
27 
28 
29 #ifdef DEBUG
30 #define debug(x) x
31 #else
32 #define debug(x)
33 #endif
34 
35 /* Prints all winning bridge scores */
36 #ifdef DEBUG3
37 #define debug3(x) x
38 #else
39 #define debug3(x)
40 #endif
41 
42 /* Prints all losing bridge scores */
43 #ifdef DEBUG3A
44 #define debug3a(x) x
45 #else
46 #define debug3a(x)
47 #endif
48 
49 /* Known splicing */
50 #ifdef DEBUG5
51 #define debug5(x) x
52 #else
53 #define debug5(x)
54 #endif
55 
56 /* Getting genomic nt */
57 #ifdef DEBUG8
58 #define debug8(x) x
59 #else
60 #define debug8(x)
61 #endif
62 
63 /* Splice site probabilities */
64 #ifdef DEBUG9
65 #define debug9(x) x
66 #else
67 #define debug9(x)
68 #endif
69 
70 /* print_vector */
71 #ifdef DEBUG15
72 #define debug15(x) x
73 #else
74 #define debug15(x)
75 #endif
76 
77 
78 #define USE_SCOREI 1
79 /* #define USE_WEAK_SCOREI 1 */
80 
81 #define PROB_CEILING 0.85
82 #define PROB_FLOOR 0.50
83 /* #define PROB_BAD 0.50 */
84 
85 /* Prefer alternate intron to other non-canonicals, but don't
86    introduce mismatches or gaps to identify */
87 #ifdef USE_WEAK_SCOREI
88 #define CANONICAL_INTRON 6
89 #define GCAG_INTRON 4
90 #define ATAC_INTRON 2
91 #define FINAL_GCAG_INTRON 4    /* Amount above regular should approximately
92 				   match FINAL_CANONICAL_INTRON - CANONICAL_INTRON */
93 #define FINAL_ATAC_INTRON 2
94 
95 #else
96 /* Values were 15, 12, 20, and 12 */
97 #define GCAG_INTRON 8
98 #define ATAC_INTRON 4
99 #define FINAL_GCAG_INTRON 10    /* Amount above regular should approximately
100 				   match FINAL_CANONICAL_INTRON - CANONICAL_INTRON */
101 #define FINAL_ATAC_INTRON 8
102 #endif
103 
104 
105 /* Don't want to make too high, otherwise we will harm evaluation of
106    dual introns vs. single intron */
107 /* Values were 10, 16, 22 */
108 #define CANONICAL_INTRON_HIGHQ 14 /* GT-AG */
109 #define CANONICAL_INTRON_MEDQ  18
110 #define CANONICAL_INTRON_LOWQ  22
111 
112 /* Values were 30, 36, 42 */
113 #define FINAL_CANONICAL_INTRON_HIGHQ 16 /* GT-AG */
114 #define FINAL_CANONICAL_INTRON_MEDQ  20
115 #define FINAL_CANONICAL_INTRON_LOWQ  24
116 
117 #define KNOWN_SPLICESITE_REWARD 20
118 
119 
120 #define T Dynprog_T
121 
122 static bool novelsplicingp;
123 
124 static IIT_T splicing_iit;
125 static int *splicing_divint_crosstable;
126 static int donor_typeint;
127 static int acceptor_typeint;
128 
129 static int intron_score_array_sense_final[256];
130 static int intron_score_array_antisense_final[256];
131 static int intron_score_array_either_final[256];
132 static int intron_score_array_sense_prelim[256];
133 static int intron_score_array_antisense_prelim[256];
134 static int intron_score_array_either_prelim[256];
135 
136 
137 static void
intron_score_setup()138 intron_score_setup () {
139   memset(intron_score_array_sense_final,0,256*sizeof(int));
140   memset(intron_score_array_antisense_final,0,256*sizeof(int));
141   memset(intron_score_array_either_final,0,256*sizeof(int));
142   memset(intron_score_array_sense_prelim,0,256*sizeof(int));
143   memset(intron_score_array_antisense_prelim,0,256*sizeof(int));
144   memset(intron_score_array_either_prelim,0,256*sizeof(int));
145 
146   /* Not handling PMAP yet */
147 
148   intron_score_array_sense_final[GTAG_FWD] = FINAL_CANONICAL_INTRON_HIGHQ;
149   intron_score_array_sense_final[GCAG_FWD] = FINAL_GCAG_INTRON;
150   intron_score_array_sense_final[ATAC_FWD] = FINAL_ATAC_INTRON;
151 
152   intron_score_array_sense_prelim[GTAG_FWD] = CANONICAL_INTRON_HIGHQ;
153   intron_score_array_sense_prelim[GCAG_FWD] = GCAG_INTRON;
154   intron_score_array_sense_prelim[ATAC_FWD] = ATAC_INTRON;
155 
156 
157   intron_score_array_antisense_final[GTAG_REV] = FINAL_CANONICAL_INTRON_HIGHQ;
158   intron_score_array_antisense_final[GCAG_REV] = FINAL_GCAG_INTRON;
159   intron_score_array_antisense_final[ATAC_REV] = FINAL_ATAC_INTRON;
160 
161   intron_score_array_antisense_prelim[GTAG_REV] = CANONICAL_INTRON_HIGHQ;
162   intron_score_array_antisense_prelim[GCAG_REV] = GCAG_INTRON;
163   intron_score_array_antisense_prelim[ATAC_REV] = ATAC_INTRON;
164 
165 
166   intron_score_array_either_final[GTAG_FWD] = FINAL_CANONICAL_INTRON_HIGHQ;
167   intron_score_array_either_final[GCAG_FWD] = FINAL_GCAG_INTRON;
168   intron_score_array_either_final[ATAC_FWD] = FINAL_ATAC_INTRON;
169   intron_score_array_either_final[GTAG_REV] = CANONICAL_INTRON_HIGHQ;
170   intron_score_array_either_final[GCAG_REV] = FINAL_GCAG_INTRON;
171   intron_score_array_either_final[ATAC_REV] = FINAL_ATAC_INTRON;
172 
173   intron_score_array_either_prelim[GTAG_FWD] = FINAL_CANONICAL_INTRON_HIGHQ;
174   intron_score_array_either_prelim[GCAG_FWD] = GCAG_INTRON;
175   intron_score_array_either_prelim[ATAC_FWD] = ATAC_INTRON;
176   intron_score_array_either_prelim[GTAG_REV] = CANONICAL_INTRON_HIGHQ;
177   intron_score_array_either_prelim[GCAG_REV] = GCAG_INTRON;
178   intron_score_array_either_prelim[ATAC_REV] = ATAC_INTRON;
179 
180   return;
181 }
182 
183 
184 
185 void
Dynprog_genome_setup(bool novelsplicingp_in,IIT_T splicing_iit_in,int * splicing_divint_crosstable_in,int donor_typeint_in,int acceptor_typeint_in)186 Dynprog_genome_setup (bool novelsplicingp_in,
187 		      IIT_T splicing_iit_in, int *splicing_divint_crosstable_in,
188 		      int donor_typeint_in, int acceptor_typeint_in) {
189   novelsplicingp = novelsplicingp_in;
190 
191   splicing_iit = splicing_iit_in;
192   splicing_divint_crosstable = splicing_divint_crosstable_in;
193   donor_typeint = donor_typeint_in;
194   acceptor_typeint = acceptor_typeint_in;
195 
196   intron_score_setup();
197 
198   return;
199 }
200 
201 
202 #if 0
203 /************************************************************************
204  * get_genomic_nt
205  ************************************************************************/
206 
207 static char complCode[128] = COMPLEMENT_LC;
208 
209 static char
210 get_genomic_nt (char *g_alt, int genomicpos, Univcoord_T chroffset, Univcoord_T chrhigh,
211 		bool watsonp) {
212   char c2, c2_alt;
213   Univcoord_T pos;
214 
215 #if 0
216   /* If the read has a deletion, then we will extend beyond 0 or genomiclength, so do not restrict. */
217   if (genomicpos < 0) {
218     return '*';
219 
220   } else if (genomicpos >= genomiclength) {
221     return '*';
222 
223   }
224 #endif
225 
226   if (watsonp) {
227     if ((pos = chroffset + genomicpos) < chroffset) { /* Must be <, and not <=, or dynamic programming will fail */
228       *g_alt = '*';
229       return '*';
230 
231     } else if (pos >= chrhigh) {
232       *g_alt = '*';
233       return '*';
234 
235 #if 0
236     } else if (genome) {
237       /* Not necessary, because Genome_get_char_blocks should work */
238       debug8(printf("At %u, genomicnt is %c\n",
239 		    genomicpos,Genome_get_char(genome,pos)));
240       return Genome_get_char(genome,pos);
241 #endif
242 
243     } else {
244       /* GMAP with user-supplied genomic segment */
245       debug8(printf("At %u, genomicnt is %c\n",
246 		    genomicpos,Genome_get_char_blocks(pos)));
247       return Genome_get_char_blocks(&(*g_alt),pos);
248     }
249 
250   } else {
251     if ((pos = chrhigh - genomicpos) < chroffset) { /* Must be <, and not <=, or dynamic programming will fail */
252       *g_alt = '*';
253       return '*';
254 
255     } else if (pos >= chrhigh) {
256       *g_alt = '*';
257       return '*';
258 
259 #if 0
260     } else if (genome) {
261       /* Not necessary, because Genome_get_char_blocks should work */
262       c2 = Genome_get_char(genome,pos);
263 #endif
264 
265     } else {
266       /* GMAP with user-supplied genomic segment */
267       c2 = Genome_get_char_blocks(&c2_alt,pos);
268     }
269     debug8(printf("At %u, genomicnt is %c\n",genomicpos,complCode[(int) c2]));
270     *g_alt = complCode[(int) c2_alt];
271     return complCode[(int) c2];
272   }
273 }
274 #endif
275 
276 
277 #if 0
278 /* Too slow for numerous calls.  Using arrays above instead */
279 static int
280 intron_score (int *introntype, int leftdi, int rightdi, int cdna_direction, int canonical_reward,
281 	      bool finalp) {
282   int scoreI;
283 
284 #ifdef USE_WEAK_SCOREI
285   canonical_reward = CANONICAL_INTRON;
286 #endif
287 
288 #ifdef PMAP
289   if ((*introntype = leftdi & rightdi) == NONINTRON) {
290     scoreI = 0.0;
291   } else {
292     switch (*introntype) {
293     case GTAG_FWD: scoreI = canonical_reward; break;
294     case GCAG_FWD: scoreI = finalp == true ? FINAL_GCAG_INTRON : GCAG_INTRON; break;
295     case ATAC_FWD: scoreI = finalp == true ? FINAL_ATAC_INTRON : ATAC_INTRON; break;
296     default: *introntype = NONINTRON; scoreI = 0.0;
297     }
298   }
299 #else
300   if ((*introntype = leftdi & rightdi) == NONINTRON) {
301     scoreI = 0.0;
302   } else if (cdna_direction > 0) {
303     switch (*introntype) {
304     case GTAG_FWD: scoreI = canonical_reward; break;
305     case GCAG_FWD: scoreI = finalp == true ? FINAL_GCAG_INTRON : GCAG_INTRON; break;
306     case ATAC_FWD: scoreI = finalp == true ? FINAL_ATAC_INTRON : ATAC_INTRON; break;
307     default: *introntype = NONINTRON; scoreI = 0.0;
308     }
309   } else if (cdna_direction < 0) {
310     switch (*introntype) {
311     case GTAG_REV: scoreI = canonical_reward; break;
312     case GCAG_REV: scoreI = finalp == true ? FINAL_GCAG_INTRON : GCAG_INTRON; break;
313     case ATAC_REV: scoreI = finalp == true ? FINAL_ATAC_INTRON : ATAC_INTRON; break;
314     default: *introntype = NONINTRON; scoreI = 0.0;
315     }
316   } else {
317     switch (*introntype) {
318     case GTAG_FWD: case GTAG_REV: scoreI = canonical_reward; break;
319     case GCAG_FWD: case GCAG_REV: scoreI = finalp == true ? FINAL_GCAG_INTRON : GCAG_INTRON; break;
320     case ATAC_FWD: case ATAC_REV: scoreI = finalp == true ? FINAL_ATAC_INTRON : ATAC_INTRON; break;
321     default: *introntype = NONINTRON; scoreI = 0.0;
322     }
323   }
324 #endif
325 
326   return scoreI;
327 }
328 #endif
329 
330 
331 
332 static void
get_splicesite_probs(double * left_prob,double * right_prob,int cL,int cR,int * left_known,int * right_known,Univcoord_T leftoffset,Univcoord_T rightoffset,Univcoord_T chroffset,Univcoord_T chrhigh,int cdna_direction,bool watsonp)333 get_splicesite_probs (double *left_prob, double *right_prob, int cL, int cR,
334 		      int *left_known, int *right_known, Univcoord_T leftoffset, Univcoord_T rightoffset,
335 		      Univcoord_T chroffset, Univcoord_T chrhigh, int cdna_direction, bool watsonp) {
336   Univcoord_T splicesitepos;
337 
338   if (left_known[cL] > 0) {
339     debug9(printf("left position is known, so prob is 1.0\n"));
340     *left_prob = 1.0;
341 
342   } else if (watsonp == true) {
343     splicesitepos = chroffset + leftoffset + cL;
344     if (cdna_direction > 0) {
345       *left_prob = Maxent_hr_donor_prob(splicesitepos,chroffset);
346       debug9(printf("1. donor splicesitepos is %u, prob %f, known %d\n",
347 		    splicesitepos,*left_prob,left_known[cL]));
348 
349     } else {
350       *left_prob = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
351       debug9(printf("2. antiacceptor splicesitepos is %u, prob %f, known %d\n",
352 		    splicesitepos,*left_prob,left_known[cL]));
353 
354     }
355   } else {
356     splicesitepos = chrhigh - leftoffset - cL + 1;
357     if (cdna_direction > 0) {
358       *left_prob = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
359       debug9(printf("3. antidonor splicesitepos is %u, prob %f, known %d\n",
360 		    splicesitepos,*left_prob,left_known[cL]));
361 
362     } else {
363       *left_prob = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
364       debug9(printf("4. acceptor splicesitepos is %u, prob %f, known %d\n",
365 		    splicesitepos,*left_prob,left_known[cL]));
366     }
367   }
368 
369   if (right_known[cR] > 0) {
370     debug9(printf("right position is known, so prob is 1.0\n"));
371     *right_prob = 1.0;
372 
373   } else if (watsonp == true) {
374     splicesitepos = chroffset + rightoffset - cR + 1;
375     if (cdna_direction > 0) {
376       *right_prob = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
377       debug9(printf("5. acceptor splicesitepos is %u, prob %f, known %d\n",
378 		    splicesitepos,*right_prob,right_known[cR]));
379     } else {
380       *right_prob = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
381       debug9(printf("6. antidonor splicesitepos is %u, prob %f, known %d\n",
382 		    splicesitepos,*right_prob,right_known[cR]));
383 
384     }
385   } else {
386     splicesitepos = chrhigh - rightoffset + cR;
387     if (cdna_direction > 0) {
388       *right_prob = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
389       debug9(printf("7. antiacceptor splicesitepos is %u, prob %f, known %d\n",
390 		    splicesitepos,*right_prob,right_known[cR]));
391 
392     } else {
393       *right_prob = Maxent_hr_donor_prob(splicesitepos,chroffset);
394       debug9(printf("8. donor splicesitepos is %u, prob %f, known %d\n",
395 		    splicesitepos,*right_prob,right_known[cR]));
396     }
397   }
398 
399   return;
400 }
401 
402 
403 static void
get_known_splicesites(int * left_known,int * right_known,int glengthL,int glengthR,int leftoffset,int rightoffset,int cdna_direction,bool watsonp,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh)404 get_known_splicesites (int *left_known, int *right_known, int glengthL, int glengthR,
405 		       int leftoffset, int rightoffset, int cdna_direction, bool watsonp,
406 		       Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh) {
407 
408   int *matches, nmatches, i;
409   Univcoord_T splicesitepos;
410 
411   if (splicing_iit != NULL && donor_typeint >= 0 && acceptor_typeint >= 0) {
412     /* Handle known splicing, splice site level */
413     if (watsonp == true) {
414       if (cdna_direction > 0) {
415 	/* splicesitepos = leftoffset + cL;  cL = 0 to < glengthL - 1 */
416 	matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
417 						  leftoffset+1,leftoffset+glengthL-2,donor_typeint,/*sign*/+1,/*sortp*/false);
418 	for (i = 0; i < nmatches; i++) {
419 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
420 	  debug5(printf("1. Found known donor at %u\n",splicesitepos));
421 	  left_known[splicesitepos - leftoffset] = KNOWN_SPLICESITE_REWARD;
422 	}
423 	FREE(matches);
424 
425 	/* splicesitepos = rightoffset - cR + 1; cR = 0 to < glengthR - 1 */
426 	matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
427 						  rightoffset-glengthR+4,rightoffset+1,acceptor_typeint,/*sign*/+1,/*sortp*/false);
428 	for (i = 0; i < nmatches; i++) {
429 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
430 	  debug5(printf("2. Found known acceptor at %u\n",splicesitepos));
431 	  right_known[rightoffset - splicesitepos + 1] = KNOWN_SPLICESITE_REWARD;
432 	}
433 	FREE(matches);
434 
435       } else {
436 	/* splicesitepos = leftoffset + cL;  cL = 0 to < glengthL - 1 */
437 	matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
438 						  leftoffset+1,leftoffset+glengthL-2,acceptor_typeint,/*sign*/-1,/*sortp*/false);
439 	for (i = 0; i < nmatches; i++) {
440 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
441 	  debug5(printf("3. Found known antiacceptor at %u\n",splicesitepos));
442 	  left_known[splicesitepos - leftoffset] = KNOWN_SPLICESITE_REWARD;
443 	}
444 	FREE(matches);
445 
446 	/* splicesitepos = rightoffset - cR + 1; cR = 0 to < glengthR - 1 */
447 	matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
448 						  rightoffset-glengthR+4,rightoffset+1,donor_typeint,/*sign*/-1,/*sortp*/false);
449 	for (i = 0; i < nmatches; i++) {
450 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
451 	  debug5(printf("4. Found known antidonor at %u\n",splicesitepos));
452 	  right_known[rightoffset - splicesitepos + 1] = KNOWN_SPLICESITE_REWARD;
453 	}
454 	FREE(matches);
455 
456       }
457 
458     } else {
459       if (cdna_direction > 0) {
460 	/* splicesitepos = (chrhigh - chroffset) - leftoffset - cL + 1; cL = 0 to < glengthL - 1 */
461 	matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
462 						  (chrhigh - chroffset) - leftoffset - glengthL + 4,
463 						  (chrhigh - chroffset) - leftoffset + 1,
464 						  donor_typeint,/*sign*/-1,/*sortp*/false);
465 	for (i = 0; i < nmatches; i++) {
466 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
467 	  debug5(printf("5. Found known antidonor at %u\n",splicesitepos));
468 	  left_known[(chrhigh - chroffset) - leftoffset - splicesitepos + 1] = KNOWN_SPLICESITE_REWARD;
469 	}
470 	FREE(matches);
471 
472 	/* splicesitepos = (chrhigh - chroffset) - rightoffset + cR; cR = 0 to < glengthR - 1 */
473 	matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
474 						  (chrhigh - chroffset) - rightoffset + 1,
475 						  (chrhigh - chroffset) - rightoffset + glengthR - 2,
476 						  acceptor_typeint,/*sign*/-1,/*sortp*/false);
477 	for (i = 0; i < nmatches; i++) {
478 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
479 	  debug5(printf("6. Found known antiacceptor at %u\n",splicesitepos));
480 	  right_known[splicesitepos - (chrhigh - chroffset) + rightoffset] = KNOWN_SPLICESITE_REWARD;
481 	}
482 	FREE(matches);
483 
484       } else {
485 	/* splicesitepos = (chrhigh - chroffset) - leftoffset - cL + 1; cL = 0 to < glengthL - 1 */
486 	matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
487 						  (chrhigh - chroffset) - leftoffset - glengthL + 4,
488 						  (chrhigh - chroffset) - leftoffset + 1,
489 						  acceptor_typeint,/*sign*/+1,/*sortp*/false);
490 	for (i = 0; i < nmatches; i++) {
491 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
492 	  debug5(printf("7. Found known acceptor at %u\n",splicesitepos));
493 	  left_known[(chrhigh - chroffset) - leftoffset - splicesitepos + 1] = KNOWN_SPLICESITE_REWARD;
494 	}
495 	FREE(matches);
496 
497 	/* splicesitepos = (chrhigh - chroffset) - rightoffset + cR; cR = 0 to < glengthR - 1 */
498 	matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
499 						  (chrhigh - chroffset) - rightoffset + 1,
500 						  (chrhigh - chroffset) - rightoffset + glengthR - 2,
501 						  donor_typeint,/*sign*/+1,/*sortp*/false);
502 	for (i = 0; i < nmatches; i++) {
503 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
504 	  debug5(printf("8. Found known donor at %u\n",splicesitepos));
505 	  right_known[splicesitepos - (chrhigh - chroffset) + rightoffset] = KNOWN_SPLICESITE_REWARD;
506 	}
507 	FREE(matches);
508 
509       }
510     }
511 
512   } else if (splicing_iit != NULL) {
513     /* Handle known splicing, intron level */
514     if (watsonp == true) {
515       if (cdna_direction > 0) {
516 	/* splicesitepos = leftoffset + cL; cL = 0 to < glengthL - 1 */
517 	matches = IIT_get_lows_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
518 				      leftoffset,leftoffset+glengthL-2,/*sign*/+1);
519 	for (i = 0; i < nmatches; i++) {
520 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
521 	  debug5(printf("1. Found known donor at %u\n",splicesitepos));
522 	  left_known[splicesitepos - leftoffset] = KNOWN_SPLICESITE_REWARD;
523 	}
524 	FREE(matches);
525 
526 	/* splicesitepos+1U = rightoffset - cR + 2; cR = 0 to < glengthR - 1 */
527 	matches = IIT_get_highs_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
528 				       rightoffset-glengthR+4,rightoffset+2,/*sign*/+1);
529 	for (i = 0; i < nmatches; i++) {
530 	  splicesitepos = IIT_interval_high(splicing_iit,matches[i]);
531 	  debug5(printf("2. Found known acceptor at %u\n",splicesitepos));
532 	  right_known[rightoffset - splicesitepos + 2] = KNOWN_SPLICESITE_REWARD;
533 	}
534 	FREE(matches);
535 
536       } else {
537 	/* splicesitepos = leftoffset + cL; cL = 0 to < glengthL - 1 */
538 	matches = IIT_get_lows_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
539 				      leftoffset,leftoffset+glengthL-2,/*sign*/-1);
540 	for (i = 0; i < nmatches; i++) {
541 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
542 	  debug5(printf("3. Found known antiacceptor at %u\n",splicesitepos));
543 	  left_known[splicesitepos - leftoffset] = KNOWN_SPLICESITE_REWARD;
544 	}
545 	FREE(matches);
546 
547 	/* splicesitepos+1U = rightoffset - cR + 2; cR = 0 to < glengthR - 1 */
548 	matches = IIT_get_highs_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
549 				       rightoffset-glengthR+4,rightoffset+2,/*sign*/-1);
550 	for (i = 0; i < nmatches; i++) {
551 	  splicesitepos = IIT_interval_high(splicing_iit,matches[i]);
552 	  debug5(printf("4. Found known antidonor at %u\n",splicesitepos));
553 	  right_known[rightoffset - splicesitepos + 2] = KNOWN_SPLICESITE_REWARD;
554 	}
555 	FREE(matches);
556 
557       }
558     } else {
559       if (cdna_direction > 0) {
560 	/* splicesitepos+1U = (chrhigh - chroffset) - leftoffset - cL + 2; cL = 0 to < glengthL - 1 */
561 	matches = IIT_get_highs_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
562 				       (chrhigh - chroffset) - leftoffset - glengthL + 4,
563 				       (chrhigh - chroffset) - leftoffset + 2,/*sign*/-1);
564 	for (i = 0; i < nmatches; i++) {
565 	  splicesitepos = IIT_interval_high(splicing_iit,matches[i]);
566 	  debug5(printf("5. Found known antidonor at %u\n",splicesitepos));
567 	  left_known[(chrhigh - chroffset) - leftoffset - splicesitepos + 2] = KNOWN_SPLICESITE_REWARD;
568 	}
569 	FREE(matches);
570 
571 	/* splicesitepos = (chrhigh - chroffset) - rightoffset + cR; cR = 0 to < glengthR - 1 */
572 	matches = IIT_get_lows_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
573 				      (chrhigh - chroffset) - rightoffset,
574 				      (chrhigh - chroffset) - rightoffset + glengthR - 2,/*sign*/-1);
575 	for (i = 0; i < nmatches; i++) {
576 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
577 	  debug5(printf("6. Found known antiacceptor at %u\n",splicesitepos));
578 	  right_known[splicesitepos - (chrhigh - chroffset) + rightoffset] = KNOWN_SPLICESITE_REWARD;
579 	}
580 	FREE(matches);
581 
582       } else {
583 	/* splicesitepos+1U = (chrhigh - chroffset) - leftoffset - cL + 2; cL = 0 to < glengthL - 1 */
584 	matches = IIT_get_highs_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
585 				       (chrhigh - chroffset) - leftoffset - glengthL + 4,
586 				       (chrhigh - chroffset) - leftoffset + 2,/*sign*/+1);
587 	for (i = 0; i < nmatches; i++) {
588 	  splicesitepos = IIT_interval_high(splicing_iit,matches[i]);
589 	  debug5(printf("7. Found known acceptor at %u\n",splicesitepos));
590 	  left_known[(chrhigh - chroffset) - leftoffset - splicesitepos + 2] = KNOWN_SPLICESITE_REWARD;
591 	}
592 	FREE(matches);
593 
594 	/* splicesitepos = (chrhigh - chroffset) - rightoffset + cR; cR = 0 to < glengthR - 1 */
595 	matches = IIT_get_lows_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
596 				      (chrhigh - chroffset) - rightoffset,
597 				      (chrhigh - chroffset) - rightoffset + glengthR - 2,/*sign*/+1);
598 	for (i = 0; i < nmatches; i++) {
599 	  splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
600 	  debug5(printf("8. Found known donor at %u\n",splicesitepos));
601 	  right_known[splicesitepos - (chrhigh - chroffset) + rightoffset] = KNOWN_SPLICESITE_REWARD;
602 	}
603 	FREE(matches);
604       }
605     }
606   }
607 
608   return;
609 }
610 
611 
612 #if defined(HAVE_SSE2)
613 /* Returns finalscore */
614 static int
bridge_intron_gap_8_intron_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,Score8_T ** matrixL_upper,Score8_T ** matrixL_lower,Score8_T ** matrixR_upper,Score8_T ** matrixR_lower,Direction8_T ** directionsL_upper_nogap,Direction8_T ** directionsL_lower_nogap,Direction8_T ** directionsR_upper_nogap,Direction8_T ** directionsR_lower_nogap,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool jump_late_p)615 bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR, int *best_introntype,
616 				  Score8_T **matrixL_upper, Score8_T **matrixL_lower,
617 				  Score8_T **matrixR_upper, Score8_T **matrixR_lower,
618 				  Direction8_T **directionsL_upper_nogap, Direction8_T **directionsL_lower_nogap,
619 				  Direction8_T **directionsR_upper_nogap, Direction8_T **directionsR_lower_nogap,
620 				  int *left_known, int *right_known,
621 				  int rlength, int glengthL, int glengthR,
622 				  int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
623 				  int leftoffset, int rightoffset,
624 				  Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
625 				  bool jump_late_p) {
626   int rL, rR, cL, cR;
627   int cloL, chighL;
628   int cloR, chighR;
629   int bestscore = NEG_INFINITY_8, score, scoreL, scoreI, scoreR;
630   Univcoord_T splicesitepos1, splicesitepos2;
631   bool bestp;
632 
633   scoreI = 0;			/* Because we constrain splices to given introns */
634 
635   for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
636     debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
637     if ((cloL = rL - lbandL) < 1) {
638       cloL = 1;
639     }
640     if ((chighL = rL + ubandL) > glengthL-1) {
641       chighL = glengthL-1;
642     }
643 
644     if ((cloR = rR - lbandR) < 1) {
645       cloR = 1;
646     }
647     if ((chighR = rR + ubandR) > glengthR-1) {
648       chighR = glengthR-1;
649     }
650 
651     /* Test indels on left and right */
652     for (cL = cloL; cL < /* left of main diagonal*/rL; cL++) {
653       /* The following check limits genomic inserts (horizontal) and
654 	 multiple cDNA inserts (vertical). */
655       if (left_known[cL] > 0 && directionsL_lower_nogap[rL][cL] == DIAG) {
656 	scoreL = (int) matrixL_lower[rL][cL];
657 #if 0
658 	if (directionsL_lower_nogap[rL][cL] != DIAG) {
659 	  /* Favor gaps away from intron if possible */
660 	  scoreL -= 100;
661 	}
662 #endif
663 
664 	/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
665 	for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
666 	  if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG) {
667 	    scoreR = (int) matrixR_lower[rR][cR];
668 #if 0
669 	    if (directionsR_lower_nogap[rR][cR] != DIAG) {
670 	      /* Favor gaps away from intron if possible */
671 	      scoreR -= 100;
672 	    }
673 #endif
674 
675 	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
676 		(score >= bestscore && jump_late_p)) { /* Use >= for jump late */
677 	      bestp = false;
678 	      if (watsonp == true) {
679 		splicesitepos1 = leftoffset + cL;
680 		splicesitepos2 = rightoffset - cR + 1;
681 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
682 						 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
683 		  bestp = true;
684 		}
685 	      } else {
686 		splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
687 		splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
688 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
689 						 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
690 		  bestp = true;
691 		}
692 	      }
693 	      if (bestp == true) {
694 		debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
695 			      cL,cR,scoreL,scoreR,score));
696 		bestscore = score;
697 		*bestrL = rL;
698 		*bestrR = rR;
699 		*bestcL = cL;
700 		*bestcR = cR;
701 	      } else {
702 		debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
703 			       cL,cR,scoreL,scoreR,score));
704 	      }
705 	    }
706 	  }
707 	}
708 
709 	for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
710 	  if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
711 	    scoreR = (int) matrixR_upper[cR][rR];
712 #if 0
713 	    if (directionsR_upper_nogap[cR][rR] != DIAG) {
714 	      /* Favor gaps away from intron if possible */
715 	      scoreR -= 100;
716 	    }
717 #endif
718 
719 	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
720 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
721 	      bestp = false;
722 	      if (watsonp == true) {
723 		splicesitepos1 = leftoffset + cL;
724 		splicesitepos2 = rightoffset - cR + 1;
725 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
726 						 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
727 		  bestp = true;
728 		}
729 	      } else {
730 		splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
731 		splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
732 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
733 						 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
734 		  bestp = true;
735 		}
736 	      }
737 	      if (bestp == true) {
738 		debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
739 			      cL,cR,scoreL,scoreR,score));
740 		bestscore = score;
741 		*bestrL = rL;
742 		*bestrR = rR;
743 		*bestcL = cL;
744 		*bestcR = cR;
745 	      } else {
746 		debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
747 			       cL,cR,scoreL,scoreR,score));
748 	      }
749 	    }
750 	  }
751 	}
752       }
753     }
754 
755     for (/* at main diagonal*/; cL < chighL; cL++) {
756       /* The following check limits genomic inserts (horizontal) and
757 	 multiple cDNA inserts (vertical). */
758       if (left_known[cL] > 0 && directionsL_upper_nogap[cL][rL] == DIAG) {
759 	scoreL = (int) matrixL_upper[cL][rL];
760 #if 0
761 	if (directionsL_upper_nogap[cL][rL] != DIAG) {
762 	  /* Favor gaps away from intron if possible */
763 	  scoreL -= 100;
764 	}
765 #endif
766 
767 	/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
768 	for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
769 	  if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG)  {
770 	    scoreR = (int) matrixR_lower[rR][cR];
771 #if 0
772 	    if (directionsR_lower_nogap[rR][cR] != DIAG) {
773 	      /* Favor gaps away from intron if possible */
774 	      scoreR -= 100;
775 	    }
776 #endif
777 
778 	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
779 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
780 	      bestp = false;
781 	      if (watsonp == true) {
782 		splicesitepos1 = leftoffset + cL;
783 		splicesitepos2 = rightoffset - cR + 1;
784 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
785 						 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
786 		  bestp = true;
787 		}
788 	      } else {
789 		splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
790 		splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
791 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
792 						 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
793 		  bestp = true;
794 		}
795 	      }
796 	      if (bestp == true) {
797 		debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
798 			      cL,cR,scoreL,scoreR,score));
799 		bestscore = score;
800 		*bestrL = rL;
801 		*bestrR = rR;
802 		*bestcL = cL;
803 		*bestcR = cR;
804 	      } else {
805 		debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
806 			       cL,cR,scoreL,scoreR,score));
807 	      }
808 	    }
809 	  }
810 	}
811 
812 	for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
813 	  if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
814 	    scoreR = (int) matrixR_upper[cR][rR];
815 #if 0
816 	    if (directionsR_upper_nogap[cR][rR] != DIAG) {
817 	      /* Favor gaps away from intron if possible */
818 	      scoreR -= 100;
819 	    }
820 #endif
821 
822 	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
823 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
824 	      bestp = false;
825 	      if (watsonp == true) {
826 		splicesitepos1 = leftoffset + cL;
827 		splicesitepos2 = rightoffset - cR + 1;
828 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
829 						 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
830 		  bestp = true;
831 		}
832 	      } else {
833 		splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
834 		splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
835 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
836 						 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
837 		  bestp = true;
838 		}
839 	      }
840 	      if (bestp == true) {
841 		debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
842 			      cL,cR,scoreL,scoreR,score));
843 		bestscore = score;
844 		*bestrL = rL;
845 		*bestrR = rR;
846 		*bestcL = cL;
847 		*bestcR = cR;
848 	      } else {
849 		debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
850 			       cL,cR,scoreL,scoreR,score));
851 	      }
852 	    }
853 	  }
854 	}
855       }
856     }
857   }
858 
859   *best_introntype = NONINTRON;
860   return bestscore;
861 }
862 
863 
864 /* Returns finalscore */
865 static int
bridge_intron_gap_8_site_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,Score8_T ** matrixL_upper,Score8_T ** matrixL_lower,Score8_T ** matrixR_upper,Score8_T ** matrixR_lower,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp)866 bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
867 				Score8_T **matrixL_upper, Score8_T **matrixL_lower,
868 				Score8_T **matrixR_upper, Score8_T **matrixR_lower,
869 #if 0
870 				Direction8_T **directionsL_upper_nogap, Direction8_T **directionsL_lower_nogap,
871 				Direction8_T **directionsR_upper_nogap, Direction8_T **directionsR_lower_nogap,
872 				int goffsetL, int rev_goffsetR, int canonical_reward,
873 #endif
874 				char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
875 				int *left_known, int *right_known, int rlength, int glengthL, int glengthR,
876 				int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
877 				int leftoffset, int rightoffset, Univcoord_T chroffset, Univcoord_T chrhigh,
878 				bool halfp, bool finalp) {
879   int rL, rR, cL, cR;
880   int bestrL_with_dinucl, bestrR_with_dinucl, bestcL_with_dinucl, bestcR_with_dinucl;
881   int cloL, chighL;
882   int cloR, chighR;
883   /* int introntype; */
884   int bestscore = NEG_INFINITY_8, score, scoreL, scoreR, scoreI;
885   int bestscore_with_dinucl = NEG_INFINITY_8;
886   double *left_probabilities, *right_probabilities, probL, probR, bestprob_with_score, bestprob_with_dinucl;
887   Univcoord_T splicesitepos;
888   char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
889   int *leftdi, *rightdi;
890   bool use_dinucl_p;
891 
892   int *intron_score_array;
893 
894   if (cdna_direction > 0) {
895     if (finalp == true) {
896       intron_score_array = intron_score_array_sense_final;
897     } else {
898       intron_score_array = intron_score_array_sense_prelim;
899     }
900   } else if (cdna_direction < 0) {
901     if (finalp == true) {
902       intron_score_array = intron_score_array_antisense_final;
903     } else {
904       intron_score_array = intron_score_array_antisense_prelim;
905     }
906   } else {
907     if (finalp == true) {
908       intron_score_array = intron_score_array_either_final;
909     } else {
910       intron_score_array = intron_score_array_either_prelim;
911     }
912   }
913 
914   /* Read dinucleotides */
915   leftdi = (int *) MALLOCA((glengthL+1) * sizeof(int));
916   rightdi = (int *) MALLOCA((glengthR+1) * sizeof(int));
917 
918   for (cL = 0; cL < glengthL - 1; cL++) {
919     left1 = gsequenceL[cL];
920     left1_alt = gsequenceL_alt[cL];
921     left2 = gsequenceL[cL+1];
922     left2_alt = gsequenceL_alt[cL+1];
923     /* Assertions may not hold for transcriptome alignment */
924     /* assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp)); */
925     /* assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp)); */
926 
927     if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
928       leftdi[cL] = LEFT_GT;
929     } else if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'C' || left2_alt == 'C')) {
930       leftdi[cL] = LEFT_GC;
931     } else if ((left1 == 'A' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
932       leftdi[cL] = LEFT_AT;
933 #ifndef PMAP
934     } else if ((left1 == 'C' || left1_alt == 'C') && (left2 == 'T' || left2_alt == 'T')) {
935       leftdi[cL] = LEFT_CT;
936 #endif
937     } else {
938       leftdi[cL] = 0x00;
939     }
940   }
941   leftdi[glengthL-1] = leftdi[glengthL] = 0x00;
942 
943   for (cR = 0; cR < glengthR - 1; cR++) {
944     right2 = rev_gsequenceR[-cR-1];
945     right2_alt = rev_gsequenceR_alt[-cR-1];
946     right1 = rev_gsequenceR[-cR];
947     right1_alt = rev_gsequenceR_alt[-cR];
948     /* Assertions may not hold for transcriptome alignment */
949     /* assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp)); */
950     /* assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp)); */
951 
952     if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
953       rightdi[cR] = RIGHT_AG;
954     } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'C' || right1_alt == 'C')) {
955       rightdi[cR] = RIGHT_AC;
956 #ifndef PMAP
957     } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
958       rightdi[cR] = RIGHT_GC;
959     } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
960       rightdi[cR] = RIGHT_AT;
961 #endif
962     } else {
963       rightdi[cR] = 0x00;
964     }
965   }
966   rightdi[glengthR-1] = rightdi[glengthR] = 0x00;
967 
968 
969   left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
970   right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
971 
972   debug3(printf("watsonp is %d.  cdna_direction is %d\n",watsonp,cdna_direction));
973   if (watsonp == true) {
974     if (cdna_direction > 0) {
975       for (cL = 0; cL < glengthL - 1; cL++) {
976 	splicesitepos = chroffset + leftoffset + cL;
977 	if (left_known[cL]) {
978 	  left_probabilities[cL] = 1.0;
979 	} else {
980 	  left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
981 	  debug3(printf("left donor probability at cL %d is %f\n",cL,left_probabilities[cL]));
982 	}
983       }
984 
985       for (cR = 0; cR < glengthR - 1; cR++) {
986 	splicesitepos = chroffset + rightoffset - cR + 1;
987 	if (right_known[cR]) {
988 	  right_probabilities[cR] = 1.0;
989 	} else {
990 	  right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
991 	  debug3(printf("right acceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
992 	}
993       }
994 
995     } else {
996       for (cL = 0; cL < glengthL - 1; cL++) {
997 	splicesitepos = chroffset + leftoffset + cL;
998 	if (left_known[cL]) {
999 	  left_probabilities[cL] = 1.0;
1000 	} else {
1001 	  left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
1002 	  debug3(printf("left antiacceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1003 	}
1004       }
1005 
1006       for (cR = 0; cR < glengthR - 1; cR++) {
1007 	splicesitepos = chroffset + rightoffset - cR + 1;
1008 	if (right_known[cR]) {
1009 	  right_probabilities[cR] = 1.0;
1010 	} else {
1011 	  right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
1012 	  debug3(printf("right antidonor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1013 	}
1014       }
1015     }
1016 
1017   } else {
1018     if (cdna_direction > 0) {
1019       for (cL = 0; cL < glengthL - 1; cL++) {
1020 	splicesitepos = chrhigh - leftoffset - cL + 1;
1021 	if (left_known[cL]) {
1022 	  left_probabilities[cL] = 1.0;
1023 	} else {
1024 	  left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
1025 	  debug3(printf("left antidonor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1026 	}
1027       }
1028 
1029       for (cR = 0; cR < glengthR - 1; cR++) {
1030 	splicesitepos = chrhigh - rightoffset + cR;
1031 	if (right_known[cR]) {
1032 	  right_probabilities[cR] = 1.0;
1033 	} else {
1034 	  right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
1035 	  debug3(printf("right antiacceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1036 	}
1037       }
1038 
1039     } else {
1040       for (cL = 0; cL < glengthL - 1; cL++) {
1041 	splicesitepos = chrhigh - leftoffset - cL + 1;
1042 	if (left_known[cL]) {
1043 	  left_probabilities[cL] = 1.0;
1044 	} else {
1045 	  left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
1046 	  debug3(printf("left acceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1047 	}
1048       }
1049 
1050       for (cR = 0; cR < glengthR - 1; cR++) {
1051 	splicesitepos = chrhigh - rightoffset + cR;
1052 	if (right_known[cR]) {
1053 	  right_probabilities[cR] = 1.0;
1054 	} else {
1055 	  right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
1056 	  debug3(printf("right donor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1057 	}
1058       }
1059     }
1060   }
1061 
1062   /* Search using probs and without simultaneously */
1063   bestscore = NEG_INFINITY_8;
1064   bestprob_with_score = bestprob_with_dinucl = 0.0;
1065   for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
1066     debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
1067     if ((cloL = rL - lbandL) < 1) {
1068       cloL = 1;
1069     }
1070     if ((chighL = rL + ubandL) > glengthL-1) {
1071       chighL = glengthL-1;
1072     }
1073 
1074     if ((cloR = rR - lbandR) < 1) {
1075       cloR = 1;
1076     }
1077     if ((chighR = rR + ubandR) > glengthR-1) {
1078       chighR = glengthR-1;
1079     }
1080 
1081     debug3(printf("A. Test no indels\n"));
1082     cL = rL;
1083     probL = left_probabilities[cL];
1084     scoreL = (int) matrixL_upper[cL][rL];
1085 
1086     cR = rR;
1087     probR = right_probabilities[cR];
1088     scoreR = (int) matrixR_upper[cR][rR];
1089 
1090 #ifdef USE_SCOREI
1091     /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1092     scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1093 #else
1094     scoreI = 0;
1095 #endif
1096 
1097     if ((score = scoreL + scoreI + scoreR) > bestscore) {
1098       debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1099 		    cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1100       debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1101       bestscore = score;
1102       *bestrL = rL;
1103       *bestrR = rR;
1104       *bestcL = cL;
1105       *bestcR = cR;
1106       bestprob_with_score = probL + probR;
1107     } else if (score == bestscore && probL + probR > bestprob_with_score) {
1108       debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1109 		    cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1110       debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1111       *bestrL = rL;
1112       *bestrR = rR;
1113       *bestcL = cL;
1114       *bestcR = cR;
1115       bestprob_with_score = probL + probR;
1116     } else {
1117       debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1118 		     cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1119     }
1120 
1121     /* Perform only without indels */
1122     if (scoreI > 0) {
1123       debug3a(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
1124 		     cL,cR,scoreI,probL,probR,probL+probR));
1125       if (probL + probR > bestprob_with_dinucl) {
1126 	bestscore_with_dinucl = scoreL + scoreI + scoreR;
1127 	bestcL_with_dinucl = cL;
1128 	bestcR_with_dinucl = cR;
1129 	bestrL_with_dinucl = rL;
1130 	bestrR_with_dinucl = rR;
1131 	bestprob_with_dinucl = probL + probR;
1132       }
1133     }
1134 
1135 
1136     debug3(printf("B. Test indel on right (1)\n"));
1137     /* Test indel on right */
1138     cL = rL;
1139     probL = left_probabilities[cL];
1140     scoreL = (int) matrixL_upper[cL][rL];
1141 #if 0
1142     if (directionsL_upper_nogap[cL][rL] != DIAG) {
1143       /* Favor gaps away from intron if possible */
1144       scoreL -= 100;
1145     }
1146 #endif
1147 
1148     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
1149     for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
1150       probR = right_probabilities[cR];
1151       scoreR = (int) matrixR_lower[rR][cR];
1152 #if 0
1153       if (directionsR_lower_nogap[rR][cR] != DIAG) {
1154 	/* Favor gaps away from intron if possible */
1155 	scoreR -= 100;
1156       }
1157 #endif
1158 
1159 #ifdef USE_SCOREI
1160       /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1161       scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1162 #else
1163       scoreI = 0;
1164 #endif
1165 
1166       if ((score = scoreL + scoreI + scoreR) > bestscore) {
1167 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1168 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1169 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1170 	bestscore = score;
1171 	*bestrL = rL;
1172 	*bestrR = rR;
1173 	*bestcL = cL;
1174 	*bestcR = cR;
1175 	bestprob_with_score = probL + probR;
1176       } else if (score == bestscore && probL + probR > bestprob_with_score) {
1177 	debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1178 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1179 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1180 	*bestrL = rL;
1181 	*bestrR = rR;
1182 	*bestcL = cL;
1183 	*bestcR = cR;
1184 	bestprob_with_score = probL + probR;
1185       } else {
1186 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1187 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1188 	debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1189       }
1190     }
1191 
1192     debug3(printf("Skip main diagonal\n"));
1193     for (/*skip main diagonal*/cR++; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
1194       probR = right_probabilities[cR];
1195       scoreR = (int) matrixR_upper[cR][rR];
1196 #if 0
1197       if (directionsR_upper_nogap[cR][rR] != DIAG) {
1198 	/* Favor gaps away from intron if possible */
1199 	scoreR -= 100;
1200       }
1201 #endif
1202 
1203 #ifdef USE_SCOREI
1204       /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1205       scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1206 #else
1207       scoreI = 0;
1208 #endif
1209 
1210       if ((score = scoreL + scoreI + scoreR) > bestscore) {
1211 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1212 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1213 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1214 	bestscore = score;
1215 	*bestrL = rL;
1216 	*bestrR = rR;
1217 	*bestcL = cL;
1218 	*bestcR = cR;
1219 	bestprob_with_score = probL + probR;
1220       } else if (score == bestscore && probL + probR > bestprob_with_score) {
1221 	debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1222 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1223 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1224 	*bestrL = rL;
1225 	*bestrR = rR;
1226 	*bestcL = cL;
1227 	*bestcR = cR;
1228 	bestprob_with_score = probL + probR;
1229       } else {
1230 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1231 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1232 	debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1233       }
1234     }
1235 
1236     debug3(printf("C. Test indel on left (1)\n"));
1237     /* Test indel on left */
1238     cR = rR;
1239     probR = right_probabilities[cR];
1240     scoreR = (int) matrixR_upper[cR][rR];
1241 #if 0
1242     if (directionsR_upper_nogap[cR][rR] != DIAG) {
1243       /* Favor gaps away from intron if possible */
1244       scoreR -= 100;
1245     }
1246 #endif
1247 
1248     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
1249     for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
1250       probL = left_probabilities[cL];
1251       scoreL = (int) matrixL_lower[rL][cL];
1252 #if 0
1253       if (directionsL_lower_nogap[rL][cL] != DIAG) {
1254 	/* Favor gaps away from intron if possible */
1255 	scoreL -= 100;
1256       }
1257 #endif
1258 
1259 #ifdef USE_SCOREI
1260       /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1261       scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1262 #else
1263       scoreI = 0;
1264 #endif
1265 
1266       if ((score = scoreL + scoreI + scoreR) > bestscore) {
1267 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1268 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1269 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1270 	bestscore = score;
1271 	*bestrL = rL;
1272 	*bestrR = rR;
1273 	*bestcL = cL;
1274 	*bestcR = cR;
1275 	bestprob_with_score = probL + probR;
1276       } else if (score == bestscore && probL + probR > bestprob_with_score) {
1277 	debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1278 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1279 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1280 	*bestrL = rL;
1281 	*bestrR = rR;
1282 	*bestcL = cL;
1283 	*bestcR = cR;
1284 	bestprob_with_score = probL + probR;
1285       } else {
1286 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1287 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1288 	debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1289       }
1290     }
1291 
1292     debug3(printf("Skip main diagonal\n"));
1293     for (/*Skip main diagonal*/cL++; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
1294       probL = left_probabilities[cL];
1295       scoreL = (int) matrixL_upper[cL][rL];
1296 #if 0
1297       if (directionsL_upper_nogap[cL][rL] != DIAG) {
1298 	/* Favor gaps away from intron if possible */
1299 	scoreL -= 100;
1300       }
1301 #endif
1302 
1303 #ifdef USE_SCOREI
1304       /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1305       scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1306 #else
1307       scoreI = 0;
1308 #endif
1309 
1310       if ((score = scoreL + scoreI + scoreR) > bestscore) {
1311 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1312 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1313 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1314 	bestscore = score;
1315 	*bestrL = rL;
1316 	*bestrR = rR;
1317 	*bestcL = cL;
1318 	*bestcR = cR;
1319 	bestprob_with_score = probL + probR;
1320       } else if (score == bestscore && probL + probR > bestprob_with_score) {
1321 	debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1322 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1323 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1324 	*bestrL = rL;
1325 	*bestrR = rR;
1326 	*bestcL = cL;
1327 	*bestcR = cR;
1328 	bestprob_with_score = probL + probR;
1329       } else {
1330 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1331 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1332 	debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1333       }
1334     }
1335   }
1336 
1337   if (bestprob_with_score > 2*PROB_CEILING) {
1338     /* Probability is good with best alignment, so take that */
1339     debug(printf("Best alignment based on score alone has good probability\n"));
1340     use_dinucl_p = false;		/* was previously true (bug) */
1341   } else if (bestprob_with_dinucl == 0.0) {
1342     debug(printf("No dinucleotides found\n"));
1343     use_dinucl_p = false;
1344   } else if (0 && left_probabilities[bestcL_with_dinucl] < PROB_CEILING && right_probabilities[bestcR_with_dinucl] < PROB_CEILING) {
1345     /* Dinucleotide-based solution is bad, so use alignment */
1346     debug(printf("Dinucleotide-based solution is bad on both sites\n"));
1347     use_dinucl_p = false;
1348   } else if (bestscore_with_dinucl < 0 || bestscore_with_dinucl < bestscore - 9) {
1349     debug(printf("Dinucleotide-based solution requires very bad alignment, because bestscore_with_dinucl %d < bestscore %d - 9\n",
1350 		  bestscore_with_dinucl,bestscore));
1351     use_dinucl_p = false;
1352   } else {
1353     use_dinucl_p = true;
1354   }
1355 
1356   debug(printf("SIMD 8. bestscore %d (bestprob_with_score %f)\n",bestscore,bestprob_with_score));
1357   if (use_dinucl_p == true) {
1358     debug(printf("SIMD 8. bestscore %d (bestprob_with_score %f) vs bestscore_with_dinucl %d (bestprob_with_dinucl %f and %f)\n",
1359 		 bestscore,bestprob_with_score,bestscore_with_dinucl,left_probabilities[bestcL_with_dinucl],right_probabilities[bestcR_with_dinucl]));
1360     /* Best alignment yields bad probability, and dinucleotide-based alignment yields good probability, so switch */
1361     debug(printf("Switch to dinucleotide-based solution\n"));
1362     *bestcL = bestcL_with_dinucl;
1363     *bestcR = bestcR_with_dinucl;
1364     *bestrL = bestrL_with_dinucl;
1365     *bestrR = bestrR_with_dinucl;
1366     bestscore = bestscore_with_dinucl;
1367   }
1368 
1369   FREEA(rightdi);
1370   FREEA(leftdi);
1371   FREEA(left_probabilities);
1372   FREEA(right_probabilities);
1373 
1374   if (bestscore < 0) {
1375     return bestscore;
1376   } else if (halfp == true) {
1377     /* scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],cdna_direction,canonical_reward,finalp); */
1378     scoreI = intron_score_array[leftdi[*bestcL] & rightdi[*bestcR]];
1379     return (bestscore - scoreI/2);
1380   } else {
1381     return bestscore;
1382   }
1383 }
1384 
1385 
1386 static int
bridge_intron_gap_8_ud(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,double * left_prob,double * right_prob,Score8_T ** matrixL_upper,Score8_T ** matrixL_lower,Score8_T ** matrixR_upper,Score8_T ** matrixR_lower,Direction8_T ** directionsL_upper_nogap,Direction8_T ** directionsL_lower_nogap,Direction8_T ** directionsR_upper_nogap,Direction8_T ** directionsR_lower_nogap,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int goffsetL,int rev_goffsetR,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp,bool jump_late_p)1387 bridge_intron_gap_8_ud (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
1388 			int *best_introntype, double *left_prob, double *right_prob,
1389 			Score8_T **matrixL_upper, Score8_T **matrixL_lower,
1390 			Score8_T **matrixR_upper, Score8_T **matrixR_lower,
1391 			Direction8_T **directionsL_upper_nogap, Direction8_T **directionsL_lower_nogap,
1392 			Direction8_T **directionsR_upper_nogap, Direction8_T **directionsR_lower_nogap,
1393 			char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
1394 			int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
1395 			int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
1396 			int leftoffset, int rightoffset,
1397 			Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
1398 			bool halfp, bool finalp, bool jump_late_p) {
1399   int finalscore;
1400   int *left_known, *right_known;
1401 
1402   debug(printf("Running bridge_intron_gap_8_ud\n"));
1403 
1404   if (glengthL+1 <= 0) {
1405     fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
1406     abort();
1407   }
1408 
1409   if (glengthR+1 <= 0) {
1410     fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
1411     abort();
1412   }
1413 
1414   left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
1415   right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
1416   get_known_splicesites(left_known,right_known,glengthL,glengthR,
1417 			/*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
1418 			cdna_direction,watsonp,chrnum,chroffset,chrhigh);
1419 
1420   if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
1421     /* Constrain to given introns */
1422     finalscore = bridge_intron_gap_8_intron_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),&(*best_introntype),
1423 						  matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
1424 						  directionsL_upper_nogap,directionsL_lower_nogap,
1425 						  directionsR_upper_nogap,directionsR_lower_nogap,
1426 						  left_known,right_known,rlength,glengthL,glengthR,
1427 						  cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
1428 						  leftoffset,rightoffset,chrnum,chroffset,chrhigh,jump_late_p);
1429   } else {
1430     finalscore = bridge_intron_gap_8_site_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),
1431 						matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
1432 #if 0
1433 						directionsL_upper_nogap,directionsL_lower_nogap,
1434 						directionsR_upper_nogap,directionsR_lower_nogap,
1435 						goffsetL,rev_goffsetR,canonical_reward,
1436 #endif
1437 						gsequenceL,gsequenceL_alt,rev_gsequenceR,rev_gsequenceR_alt,
1438 						left_known,right_known,rlength,glengthL,glengthR,
1439 						cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
1440 						leftoffset,rightoffset,chroffset,chrhigh,halfp,finalp);
1441   }
1442 
1443 
1444 #if 0
1445   /* Determine if result meets given constraints */
1446   if (*finalscore < 0) {
1447     result = false;
1448   } else if (novelsplicingp == true) {
1449     result = true;
1450   } else if (splicing_iit == NULL) {
1451     result = true;
1452   } else if (donor_typeint >= 0 && acceptor_typeint >= 0) {
1453     /* If novelsplicingp is false and using splicing at splice site level, require sites to be known */
1454     if (left_known[*bestcL] == 0 || right_known[*bestcR] == 0) {
1455       debug(printf("Novel splicing not allowed, so bridge_intron_gap returning false\n"));
1456       result = false;
1457     } else {
1458       result = true;
1459     }
1460   } else {
1461     /* If novelsplicingp is false and using splicing at splice site level, result was already constrained */
1462     result = true;
1463   }
1464 #endif
1465 
1466   if (finalscore >= 0) {
1467     get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
1468 			 left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
1469 			 cdna_direction,watsonp);
1470   }
1471 
1472   FREEA(right_known);
1473   FREEA(left_known);
1474 
1475 #if defined(DEBUG) || defined(DEBUG3)
1476   printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
1477 	 finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob);
1478 #endif
1479 
1480   return finalscore;
1481 }
1482 #endif
1483 
1484 
1485 #if defined(HAVE_SSE2)
1486 static int
bridge_intron_gap_16_intron_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,Score16_T ** matrixL_upper,Score16_T ** matrixL_lower,Score16_T ** matrixR_upper,Score16_T ** matrixR_lower,Direction16_T ** directionsL_upper_nogap,Direction16_T ** directionsL_lower_nogap,Direction16_T ** directionsR_upper_nogap,Direction16_T ** directionsR_lower_nogap,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool jump_late_p)1487 bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
1488 				  int *best_introntype,
1489 				  Score16_T **matrixL_upper, Score16_T **matrixL_lower,
1490 				  Score16_T **matrixR_upper, Score16_T **matrixR_lower,
1491 				  Direction16_T **directionsL_upper_nogap, Direction16_T **directionsL_lower_nogap,
1492 				  Direction16_T **directionsR_upper_nogap, Direction16_T **directionsR_lower_nogap,
1493 				  int *left_known, int *right_known,
1494 				  int rlength, int glengthL, int glengthR,
1495 				  int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
1496 				  int leftoffset, int rightoffset,
1497 				  Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
1498 				  bool jump_late_p) {
1499   int rL, rR, cL, cR;
1500   int cloL, chighL;
1501   int cloR, chighR;
1502   int bestscore = NEG_INFINITY_16, score, scoreL, scoreI, scoreR;
1503   Univcoord_T splicesitepos1, splicesitepos2;
1504   bool bestp;
1505 
1506   scoreI = 0;			/* Because we constrain splices to given introns */
1507 
1508   for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
1509     debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
1510     if ((cloL = rL - lbandL) < 1) {
1511       cloL = 1;
1512     }
1513     if ((chighL = rL + ubandL) > glengthL-1) {
1514       chighL = glengthL-1;
1515     }
1516 
1517     if ((cloR = rR - lbandR) < 1) {
1518       cloR = 1;
1519     }
1520     if ((chighR = rR + ubandR) > glengthR-1) {
1521       chighR = glengthR-1;
1522     }
1523 
1524     /* Test indels on left and right */
1525     for (cL = cloL; cL < /* left of main diagonal*/rL; cL++) {
1526       /* The following check limits genomic inserts (horizontal) and
1527 	 multiple cDNA inserts (vertical). */
1528       if (left_known[cL] > 0 && directionsL_lower_nogap[rL][cL] == DIAG) {
1529 	scoreL = (int) matrixL_lower[rL][cL];
1530 #if 0
1531 	if (directionsL_lower_nogap[rL][cL] != DIAG) {
1532 	  /* Favor gaps away from intron if possible */
1533 	  scoreL -= 100;
1534 	}
1535 #endif
1536 
1537 	/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
1538 	for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
1539 	  if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG) {
1540 	    scoreR = (int) matrixR_lower[rR][cR];
1541 #if 0
1542 	    if (directionsR_lower_nogap[rR][cR] != DIAG) {
1543 	      /* Favor gaps away from intron if possible */
1544 	      scoreR -= 100;
1545 	    }
1546 #endif
1547 
1548 	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
1549 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
1550 	      bestp = false;
1551 	      if (watsonp == true) {
1552 		splicesitepos1 = leftoffset + cL;
1553 		splicesitepos2 = rightoffset - cR + 1;
1554 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1555 						 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
1556 		  bestp = true;
1557 		}
1558 	      } else {
1559 		splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
1560 		splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
1561 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1562 						 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
1563 		  bestp = true;
1564 		}
1565 	      }
1566 	      if (bestp == true) {
1567 		debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
1568 			      cL,cR,scoreL,scoreR,score));
1569 		bestscore = score;
1570 		*bestrL = rL;
1571 		*bestrR = rR;
1572 		*bestcL = cL;
1573 		*bestcR = cR;
1574 	      } else {
1575 		debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
1576 			       cL,cR,scoreL,scoreR,score));
1577 	      }
1578 	    }
1579 	  }
1580 	}
1581 
1582 	for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
1583 	  if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
1584 	    scoreR = (int) matrixR_upper[cR][rR];
1585 #if 0
1586 	    if (directionsR_upper_nogap[cR][rR] != DIAG) {
1587 	      /* Favor gaps away from intron if possible */
1588 	      scoreR -= 100;
1589 	    }
1590 #endif
1591 
1592 	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
1593 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
1594 	      bestp = false;
1595 	      if (watsonp == true) {
1596 		splicesitepos1 = leftoffset + cL;
1597 		splicesitepos2 = rightoffset - cR + 1;
1598 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1599 						 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
1600 		  bestp = true;
1601 		}
1602 	      } else {
1603 		splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
1604 		splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
1605 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1606 						 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
1607 		  bestp = true;
1608 		}
1609 	      }
1610 	      if (bestp == true) {
1611 		debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
1612 			      cL,cR,scoreL,scoreR,score));
1613 		bestscore = score;
1614 		*bestrL = rL;
1615 		*bestrR = rR;
1616 		*bestcL = cL;
1617 		*bestcR = cR;
1618 	      } else {
1619 		debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
1620 			       cL,cR,scoreL,scoreR,score));
1621 	      }
1622 	    }
1623 	  }
1624 	}
1625       }
1626     }
1627 
1628     for (/* at main diagonal*/; cL < chighL; cL++) {
1629       /* The following check limits genomic inserts (horizontal) and
1630 	 multiple cDNA inserts (vertical). */
1631       if (left_known[cL] > 0 && directionsL_upper_nogap[cL][rL] == DIAG) {
1632 	scoreL = (int) matrixL_upper[cL][rL];
1633 #if 0
1634 	if (directionsL_upper_nogap[cL][rL] != DIAG) {
1635 	  /* Favor gaps away from intron if possible */
1636 	  scoreL -= 100;
1637 	}
1638 #endif
1639 
1640 	/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
1641 	for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
1642 	  if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG) {
1643 	    scoreR = (int) matrixR_lower[rR][cR];
1644 #if 0
1645 	    if (directionsR_lower_nogap[rR][cR] != DIAG) {
1646 	      /* Favor gaps away from intron if possible */
1647 	      scoreR -= 100;
1648 	    }
1649 #endif
1650 
1651 	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
1652 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
1653 	      bestp = false;
1654 	      if (watsonp == true) {
1655 		splicesitepos1 = leftoffset + cL;
1656 		splicesitepos2 = rightoffset - cR + 1;
1657 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1658 						 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
1659 		  bestp = true;
1660 		}
1661 	      } else {
1662 		splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
1663 		splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
1664 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1665 						 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
1666 		  bestp = true;
1667 		}
1668 	      }
1669 	      if (bestp == true) {
1670 		debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
1671 			      cL,cR,scoreL,scoreR,score));
1672 		bestscore = score;
1673 		*bestrL = rL;
1674 		*bestrR = rR;
1675 		*bestcL = cL;
1676 		*bestcR = cR;
1677 	      } else {
1678 		debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
1679 			       cL,cR,scoreL,scoreR,score));
1680 	      }
1681 	    }
1682 	  }
1683 	}
1684 
1685 	for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
1686 	  if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
1687 	    scoreR = (int) matrixR_upper[cR][rR];
1688 #if 0
1689 	    if (directionsR_upper_nogap[cR][rR] != DIAG) {
1690 	      /* Favor gaps away from intron if possible */
1691 	      scoreR -= 100;
1692 	    }
1693 #endif
1694 
1695 	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
1696 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
1697 	      bestp = false;
1698 	      if (watsonp == true) {
1699 		splicesitepos1 = leftoffset + cL;
1700 		splicesitepos2 = rightoffset - cR + 1;
1701 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1702 						 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
1703 		  bestp = true;
1704 		}
1705 	      } else {
1706 		splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
1707 		splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
1708 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1709 						 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
1710 		  bestp = true;
1711 		}
1712 	      }
1713 	      if (bestp == true) {
1714 		debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
1715 			      cL,cR,scoreL,scoreR,score));
1716 		bestscore = score;
1717 		*bestrL = rL;
1718 		*bestrR = rR;
1719 		*bestcL = cL;
1720 		*bestcR = cR;
1721 	      } else {
1722 		debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
1723 			       cL,cR,scoreL,scoreR,score));
1724 	      }
1725 	    }
1726 	  }
1727 	}
1728       }
1729     }
1730   }
1731 
1732   *best_introntype = NONINTRON;
1733   return bestscore;
1734 }
1735 
1736 
1737 /* Returns finalscore */
1738 static int
bridge_intron_gap_16_site_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,Score16_T ** matrixL_upper,Score16_T ** matrixL_lower,Score16_T ** matrixR_upper,Score16_T ** matrixR_lower,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp)1739 bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
1740 				 Score16_T **matrixL_upper, Score16_T **matrixL_lower,
1741 				 Score16_T **matrixR_upper, Score16_T **matrixR_lower,
1742 #if 0
1743 				 Direction16_T **directionsL_upper_nogap, Direction16_T **directionsL_lower_nogap,
1744 				 Direction16_T **directionsR_upper_nogap, Direction16_T **directionsR_lower_nogap,
1745 				 int goffsetL, int rev_goffsetR, int canonical_reward,
1746 #endif
1747 				 char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
1748 				 int *left_known, int *right_known, int rlength, int glengthL, int glengthR,
1749 				 int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
1750 				 int leftoffset, int rightoffset, Univcoord_T chroffset, Univcoord_T chrhigh,
1751 				 bool halfp, bool finalp) {
1752   int rL, rR, cL, cR;
1753   int bestrL_with_dinucl, bestrR_with_dinucl, bestcL_with_dinucl, bestcR_with_dinucl;
1754   int cloL, chighL;
1755   int cloR, chighR;
1756   /* int introntype; */
1757   int bestscore = NEG_INFINITY_16, score, scoreL, scoreR, scoreI;
1758   int bestscore_with_dinucl = NEG_INFINITY_16;
1759   double *left_probabilities, *right_probabilities, probL, probR, bestprob_with_score, bestprob_with_dinucl;
1760   Univcoord_T splicesitepos;
1761   char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
1762   int *leftdi, *rightdi;
1763   bool use_dinucl_p;
1764 
1765   int *intron_score_array;
1766 
1767   if (cdna_direction > 0) {
1768     if (finalp == true) {
1769       intron_score_array = intron_score_array_sense_final;
1770     } else {
1771       intron_score_array = intron_score_array_sense_prelim;
1772     }
1773   } else if (cdna_direction < 0) {
1774     if (finalp == true) {
1775       intron_score_array = intron_score_array_antisense_final;
1776     } else {
1777       intron_score_array = intron_score_array_antisense_prelim;
1778     }
1779   } else {
1780     if (finalp == true) {
1781       intron_score_array = intron_score_array_either_final;
1782     } else {
1783       intron_score_array = intron_score_array_either_prelim;
1784     }
1785   }
1786 
1787 
1788   /* Read dinucleotides */
1789   leftdi = (int *) MALLOCA((glengthL+1) * sizeof(int));
1790   rightdi = (int *) MALLOCA((glengthR+1) * sizeof(int));
1791 
1792   for (cL = 0; cL < glengthL - 1; cL++) {
1793     left1 = gsequenceL[cL];
1794     left1_alt = gsequenceL_alt[cL];
1795     left2 = gsequenceL[cL+1];
1796     left2_alt = gsequenceL_alt[cL+1];
1797     /* Assertions may not hold for transcriptome alignment */
1798     /* assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp)); */
1799     /* assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp)); */
1800 
1801     if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
1802       leftdi[cL] = LEFT_GT;
1803     } else if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'C' || left2_alt == 'C')) {
1804       leftdi[cL] = LEFT_GC;
1805     } else if ((left1 == 'A' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
1806       leftdi[cL] = LEFT_AT;
1807 #ifndef PMAP
1808     } else if ((left1 == 'C' || left1_alt == 'C') && (left2 == 'T' || left2_alt == 'T')) {
1809       leftdi[cL] = LEFT_CT;
1810 #endif
1811     } else {
1812       leftdi[cL] = 0x00;
1813     }
1814   }
1815   leftdi[glengthL-1] = leftdi[glengthL] = 0x00;
1816 
1817   for (cR = 0; cR < glengthR - 1; cR++) {
1818     right2 = rev_gsequenceR[-cR-1];
1819     right2_alt = rev_gsequenceR_alt[-cR-1];
1820     right1 = rev_gsequenceR[-cR];
1821     right1_alt = rev_gsequenceR_alt[-cR];
1822     /* Assertions may not hold for transcriptome alignment */
1823     /* assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp)); */
1824     /* assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp)); */
1825 
1826     if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
1827       rightdi[cR] = RIGHT_AG;
1828     } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'C' || right1_alt == 'C')) {
1829       rightdi[cR] = RIGHT_AC;
1830 #ifndef PMAP
1831     } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
1832       rightdi[cR] = RIGHT_GC;
1833     } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
1834       rightdi[cR] = RIGHT_AT;
1835 #endif
1836     } else {
1837       rightdi[cR] = 0x00;
1838     }
1839   }
1840   rightdi[glengthR-1] = rightdi[glengthR] = 0x00;
1841 
1842 
1843   left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
1844   right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
1845 
1846   debug3(printf("watsonp is %d.  cdna_direction is %d\n",watsonp,cdna_direction));
1847   if (watsonp == true) {
1848     if (cdna_direction > 0) {
1849       for (cL = 0; cL < glengthL - 1; cL++) {
1850 	splicesitepos = chroffset + leftoffset + cL;
1851 	if (left_known[cL]) {
1852 	  left_probabilities[cL] = 1.0;
1853 	} else {
1854 	  left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
1855 	  debug3(printf("left donor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1856 	}
1857       }
1858 
1859       for (cR = 0; cR < glengthR - 1; cR++) {
1860 	splicesitepos = chroffset + rightoffset - cR + 1;
1861 	if (right_known[cR]) {
1862 	  right_probabilities[cR] = 1.0;
1863 	} else {
1864 	  right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
1865 	  debug3(printf("right acceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1866 	}
1867       }
1868 
1869     } else {
1870       for (cL = 0; cL < glengthL - 1; cL++) {
1871 	splicesitepos = chroffset + leftoffset + cL;
1872 	if (left_known[cL]) {
1873 	  left_probabilities[cL] = 1.0;
1874 	} else {
1875 	  left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
1876 	  debug3(printf("left antiacceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1877 	}
1878       }
1879 
1880       for (cR = 0; cR < glengthR - 1; cR++) {
1881 	splicesitepos = chroffset + rightoffset - cR + 1;
1882 	if (right_known[cR]) {
1883 	  right_probabilities[cR] = 1.0;
1884 	} else {
1885 	  right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
1886 	  debug3(printf("right antidonor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1887 	}
1888       }
1889     }
1890 
1891   } else {
1892     if (cdna_direction > 0) {
1893       for (cL = 0; cL < glengthL - 1; cL++) {
1894 	splicesitepos = chrhigh - leftoffset - cL + 1;
1895 	if (left_known[cL]) {
1896 	  left_probabilities[cL] = 1.0;
1897 	} else {
1898 	  left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
1899 	  debug3(printf("left antidonor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1900 	}
1901       }
1902 
1903       for (cR = 0; cR < glengthR - 1; cR++) {
1904 	splicesitepos = chrhigh - rightoffset + cR;
1905 	if (right_known[cR]) {
1906 	  right_probabilities[cR] = 1.0;
1907 	} else {
1908 	  right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
1909 	  debug3(printf("right antiacceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1910 	}
1911       }
1912 
1913     } else {
1914       for (cL = 0; cL < glengthL - 1; cL++) {
1915 	splicesitepos = chrhigh - leftoffset - cL + 1;
1916 	if (left_known[cL]) {
1917 	  left_probabilities[cL] = 1.0;
1918 	} else {
1919 	  left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
1920 	  debug3(printf("left acceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1921 	}
1922       }
1923 
1924       for (cR = 0; cR < glengthR - 1; cR++) {
1925 	splicesitepos = chrhigh - rightoffset + cR;
1926 	if (right_known[cR]) {
1927 	  right_probabilities[cR] = 1.0;
1928 	} else {
1929 	  right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
1930 	  debug3(printf("right donor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1931 	}
1932       }
1933     }
1934   }
1935 
1936   /* Search using probs and without simultaneously */
1937   bestscore = NEG_INFINITY_16;
1938   bestprob_with_score = bestprob_with_dinucl = 0.0;
1939   for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
1940     debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
1941     if ((cloL = rL - lbandL) < 1) {
1942       cloL = 1;
1943     }
1944     if ((chighL = rL + ubandL) > glengthL-1) {
1945       chighL = glengthL-1;
1946     }
1947 
1948     if ((cloR = rR - lbandR) < 1) {
1949       cloR = 1;
1950     }
1951     if ((chighR = rR + ubandR) > glengthR-1) {
1952       chighR = glengthR-1;
1953     }
1954 
1955     debug3(printf("A. Test no indels\n"));
1956     cL = rL;
1957     probL = left_probabilities[cL];
1958     scoreL = (int) matrixL_upper[cL][rL];
1959 
1960     cR = rR;
1961     probR = right_probabilities[cR];
1962     scoreR = (int) matrixR_upper[cR][rR];
1963 
1964 #ifdef USE_SCOREI
1965     /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1966     scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1967 #else
1968     scoreI = 0;
1969 #endif
1970 
1971     if ((score = scoreL + scoreI + scoreR) > bestscore) {
1972       debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1973 		    cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1974       debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1975       bestscore = score;
1976       *bestrL = rL;
1977       *bestrR = rR;
1978       *bestcL = cL;
1979       *bestcR = cR;
1980       bestprob_with_score = probL + probR;
1981     } else if (score == bestscore && probL + probR > bestprob_with_score) {
1982       debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1983 		    cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1984       debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1985       *bestrL = rL;
1986       *bestrR = rR;
1987       *bestcL = cL;
1988       *bestcR = cR;
1989       bestprob_with_score = probL + probR;
1990     } else {
1991       debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1992 		     cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1993     }
1994 
1995     /* Perform only without indels */
1996     if (scoreI > 0) {
1997       debug3(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
1998 		    cL,cR,scoreI,probL,probR,probL+probR));
1999       if (probL + probR > bestprob_with_dinucl) {
2000 	bestscore_with_dinucl = scoreL + scoreI + scoreR;
2001 	bestcL_with_dinucl = cL;
2002 	bestcR_with_dinucl = cR;
2003 	bestrL_with_dinucl = rL;
2004 	bestrR_with_dinucl = rR;
2005 	bestprob_with_dinucl = probL + probR;
2006       }
2007     }
2008 
2009 
2010     debug3(printf("B. Test indel on right\n"));
2011     /* Test indel on right */
2012     cL = rL;
2013     probL = left_probabilities[cL];
2014     scoreL = (int) matrixL_upper[cL][rL];
2015 #if 0
2016     if (directionsL_upper_nogap[cL][rL] != DIAG) {
2017       /* Favor gaps away from intron if possible */
2018       scoreL -= 100;
2019     }
2020 #endif
2021 
2022     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
2023     for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
2024       probR = right_probabilities[cR];
2025       scoreR = (int) matrixR_lower[rR][cR];
2026 #if 0
2027       if (directionsR_lower_nogap[rR][cR] != DIAG) {
2028 	/* Favor gaps away from intron if possible */
2029 	scoreR -= 100;
2030       }
2031 #endif
2032 
2033 #ifdef USE_SCOREI
2034       /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2035       scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2036 #else
2037       scoreI = 0;
2038 #endif
2039 
2040       if ((score = scoreL + scoreI + scoreR) > bestscore) {
2041 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2042 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2043 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2044 	bestscore = score;
2045 	*bestrL = rL;
2046 	*bestrR = rR;
2047 	*bestcL = cL;
2048 	*bestcR = cR;
2049 	bestprob_with_score = probL + probR;
2050       } else if (score == bestscore && probL + probR > bestprob_with_score) {
2051 	debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2052 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2053 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2054 	*bestrL = rL;
2055 	*bestrR = rR;
2056 	*bestcL = cL;
2057 	*bestcR = cR;
2058 	bestprob_with_score = probL + probR;
2059       } else {
2060 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2061 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2062       }
2063     }
2064 
2065     debug3(printf("Skip main diagonal\n"));
2066     for (/*Skip main diagonal*/cR++; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
2067       probR = right_probabilities[cR];
2068       scoreR = (int) matrixR_upper[cR][rR];
2069 #if 0
2070       if (directionsR_upper_nogap[cR][rR] != DIAG) {
2071 	/* Favor gaps away from intron if possible */
2072 	scoreR -= 100;
2073       }
2074 #endif
2075 
2076 #ifdef USE_SCOREI
2077       /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2078       scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2079 #else
2080       scoreI = 0;
2081 #endif
2082 
2083       if ((score = scoreL + scoreI + scoreR) > bestscore) {
2084 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2085 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2086 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2087 	bestscore = score;
2088 	*bestrL = rL;
2089 	*bestrR = rR;
2090 	*bestcL = cL;
2091 	*bestcR = cR;
2092 	bestprob_with_score = probL + probR;
2093       } else if (score == bestscore && probL + probR > bestprob_with_score) {
2094 	debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2095 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2096 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2097 	*bestrL = rL;
2098 	*bestrR = rR;
2099 	*bestcL = cL;
2100 	*bestcR = cR;
2101 	bestprob_with_score = probL + probR;
2102       } else {
2103 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2104 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2105       }
2106     }
2107 
2108 
2109     debug3(printf("C. Test indel on left (2)\n"));
2110     /* Test indel on left */
2111     cR = rR;
2112     probR = right_probabilities[cR];
2113     scoreR = (int) matrixR_upper[cR][rR];
2114 #if 0
2115     if (directionsR_upper_nogap[cR][rR] != DIAG) {
2116       /* Favor gaps away from intron if possible */
2117       scoreR -= 100;
2118     }
2119 #endif
2120 
2121     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
2122     for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
2123       probL = left_probabilities[cL];
2124       scoreL = (int) matrixL_lower[rL][cL];
2125 #if 0
2126       if (directionsL_lower_nogap[rL][cL] != DIAG) {
2127 	/* Favor gaps away from intron if possible */
2128 	scoreL -= 100;
2129       }
2130 #endif
2131 
2132 #ifdef USE_SCOREI
2133       /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2134       scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2135 #else
2136       scoreI = 0;
2137 #endif
2138 
2139       if ((score = scoreL + scoreI + scoreR) > bestscore) {
2140 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2141 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2142 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2143 	bestscore = score;
2144 	*bestrL = rL;
2145 	*bestrR = rR;
2146 	*bestcL = cL;
2147 	*bestcR = cR;
2148 	bestprob_with_score = probL + probR;
2149       } else if (score == bestscore && probL + probR > bestprob_with_score) {
2150 	debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2151 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2152 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2153 	*bestrL = rL;
2154 	*bestrR = rR;
2155 	*bestcL = cL;
2156 	*bestcR = cR;
2157 	bestprob_with_score = probL + probR;
2158       } else {
2159 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2160 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2161       }
2162     }
2163 
2164     debug3(printf("Skip main diagonal\n"));
2165     for (/*Skip main diagonal*/cL++; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
2166       probL = left_probabilities[cL];
2167       scoreL = (int) matrixL_upper[cL][rL];
2168 #if 0
2169       if (directionsL_upper_nogap[cL][rL] != DIAG) {
2170 	/* Favor gaps away from intron if possible */
2171 	scoreL -= 100;
2172       }
2173 #endif
2174 
2175 #ifdef USE_SCOREI
2176       /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2177       scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2178 #else
2179       scoreI = 0;
2180 #endif
2181 
2182       if ((score = scoreL + scoreI + scoreR) > bestscore) {
2183 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2184 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2185 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2186 	bestscore = score;
2187 	*bestrL = rL;
2188 	*bestrR = rR;
2189 	*bestcL = cL;
2190 	*bestcR = cR;
2191 	bestprob_with_score = probL + probR;
2192       } else if (score == bestscore && probL + probR > bestprob_with_score) {
2193 	debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2194 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2195 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2196 	*bestrL = rL;
2197 	*bestrR = rR;
2198 	*bestcL = cL;
2199 	*bestcR = cR;
2200 	bestprob_with_score = probL + probR;
2201       } else {
2202 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2203 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2204       }
2205     }
2206   }
2207 
2208   if (bestprob_with_score > 2*PROB_CEILING) {
2209     /* Probability is good with best alignment, so take that */
2210     debug(printf("Best alignment based on score alone has good probability\n"));
2211     use_dinucl_p = false;		/* was previously true (bug) */
2212   } else if (bestprob_with_dinucl == 0.0) {
2213     debug(printf("No dinucleotides found\n"));
2214     use_dinucl_p = false;
2215   } else if (0 && left_probabilities[bestcL_with_dinucl] < PROB_CEILING && right_probabilities[bestcR_with_dinucl] < PROB_CEILING) {
2216     /* Dinucleotide-based solution is bad, so use alignment */
2217     debug(printf("Dinucleotide-based solution is bad on both sites\n"));
2218     use_dinucl_p = false;
2219   } else if (bestscore_with_dinucl < 0 || bestscore_with_dinucl < bestscore - 9) {
2220     debug(printf("Dinucleotide-based solution requires very bad alignment, because bestscore_with_dinucl %d < bestscore %d - 9\n",
2221 		  bestscore_with_dinucl,bestscore));
2222     use_dinucl_p = false;
2223   } else {
2224     use_dinucl_p = true;
2225   }
2226 
2227   debug(printf("SIMD 16. bestscore %d (bestprob_with_score %f)\n",bestscore,bestprob_with_score));
2228   if (use_dinucl_p == true) {
2229     debug(printf("SIMD 16. bestscore %d (bestprob_with_score %f) vs bestscore_with_dinucl %d (bestprob_with_dinucl %f and %f)\n",
2230 		 bestscore,bestprob_with_score,bestscore_with_dinucl,left_probabilities[bestcL_with_dinucl],right_probabilities[bestcR_with_dinucl]));
2231 
2232     /* Best alignment yields bad probability, and dinucleotide-based alignment yields good probability, so switch */
2233     debug(printf("Switch to dinucleotide-based solution\n"));
2234     *bestcL = bestcL_with_dinucl;
2235     *bestcR = bestcR_with_dinucl;
2236     *bestrL = bestrL_with_dinucl;
2237     *bestrR = bestrR_with_dinucl;
2238     bestscore = bestscore_with_dinucl;
2239   }
2240 
2241   FREEA(rightdi);
2242   FREEA(leftdi);
2243   FREEA(left_probabilities);
2244   FREEA(right_probabilities);
2245 
2246   if (bestscore < 0) {
2247     return bestscore;
2248   } else if (halfp == true) {
2249     /* scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],cdna_direction,canonical_reward,finalp); */
2250     scoreI = intron_score_array[leftdi[*bestcL] & rightdi[*bestcR]];
2251     return (bestscore - scoreI/2);
2252   } else {
2253     return bestscore;
2254   }
2255 }
2256 
2257 
2258 
2259 static int
bridge_intron_gap_16_ud(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,double * left_prob,double * right_prob,Score16_T ** matrixL_upper,Score16_T ** matrixL_lower,Score16_T ** matrixR_upper,Score16_T ** matrixR_lower,Direction16_T ** directionsL_upper_nogap,Direction16_T ** directionsL_lower_nogap,Direction16_T ** directionsR_upper_nogap,Direction16_T ** directionsR_lower_nogap,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int goffsetL,int rev_goffsetR,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp,bool jump_late_p)2260 bridge_intron_gap_16_ud (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
2261 			 int *best_introntype, double *left_prob, double *right_prob,
2262 			 Score16_T **matrixL_upper, Score16_T **matrixL_lower,
2263 			 Score16_T **matrixR_upper, Score16_T **matrixR_lower,
2264 			 Direction16_T **directionsL_upper_nogap, Direction16_T **directionsL_lower_nogap,
2265 			 Direction16_T **directionsR_upper_nogap, Direction16_T **directionsR_lower_nogap,
2266 			 char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
2267 			 int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
2268 			 int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
2269 			 int leftoffset, int rightoffset,
2270 			 Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
2271 			 bool halfp, bool finalp, bool jump_late_p) {
2272   int finalscore;
2273   int *left_known, *right_known;
2274 
2275   debug(printf("Running bridge_intron_gap_16_ud\n"));
2276 
2277   if (glengthL+1 <= 0) {
2278     fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
2279     abort();
2280   }
2281 
2282   if (glengthR+1 <= 0) {
2283     fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
2284     abort();
2285   }
2286 
2287   left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
2288   right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
2289   get_known_splicesites(left_known,right_known,glengthL,glengthR,
2290 			/*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
2291 			cdna_direction,watsonp,chrnum,chroffset,chrhigh);
2292 
2293   if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
2294     /* Constrain to given introns */
2295     finalscore = bridge_intron_gap_16_intron_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),&(*best_introntype),
2296 						   matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
2297 						   directionsL_upper_nogap,directionsL_lower_nogap,
2298 						   directionsR_upper_nogap,directionsR_lower_nogap,
2299 						   left_known,right_known,rlength,glengthL,glengthR,
2300 						   cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
2301 						   leftoffset,rightoffset,chrnum,chroffset,chrhigh,jump_late_p);
2302 
2303   } else {
2304     finalscore = bridge_intron_gap_16_site_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),
2305 						 matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
2306 #if 0
2307 						 directionsL_upper_nogap,directionsL_lower_nogap,
2308 						 directionsR_upper_nogap,directionsR_lower_nogap,
2309 						 goffsetL,rev_goffsetR,canonical_reward,
2310 #endif
2311 						 gsequenceL,gsequenceL_alt,rev_gsequenceR,rev_gsequenceR_alt,
2312 						 left_known,right_known,rlength,glengthL,glengthR,
2313 						 cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
2314 						 leftoffset,rightoffset,chroffset,chrhigh,halfp,finalp);
2315   }
2316 
2317 
2318 #if 0
2319   /* Determine if result meets given constraints */
2320   if (*finalscore < 0) {
2321     result = false;
2322   } else if (novelsplicingp == true) {
2323     result = true;
2324   } else if (splicing_iit == NULL) {
2325     result = true;
2326   } else if (donor_typeint >= 0 && acceptor_typeint >= 0) {
2327     /* If novelsplicingp is false and using splicing at splice site level, require sites to be known */
2328     if (left_known[*bestcL] == 0 || right_known[*bestcR] == 0) {
2329       debug(printf("Novel splicing not allowed, so bridge_intron_gap returning false\n"));
2330       result = false;
2331     } else {
2332       result = true;
2333     }
2334   } else {
2335     /* If novelsplicingp is false and using splicing at splice site level, result was already constrained */
2336     result = true;
2337   }
2338 #endif
2339 
2340   if (finalscore >= 0) {
2341     get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
2342 			 left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
2343 			 cdna_direction,watsonp);
2344   }
2345 
2346   FREEA(right_known);
2347   FREEA(left_known);
2348 
2349 #if defined(DEBUG) || defined(DEBUG3)
2350   printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
2351 	 finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob);
2352 #endif
2353 
2354   return finalscore;
2355 }
2356 #endif
2357 
2358 
2359 #ifndef HAVE_SSE2
2360 static int
bridge_intron_gap_intron_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,Score32_T ** matrixL,Score32_T ** matrixR,Direction32_T ** directionsL_nogap,Direction32_T ** directionsR_nogap,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool jump_late_p)2361 bridge_intron_gap_intron_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
2362 				int *best_introntype,
2363 				Score32_T **matrixL, Score32_T **matrixR,
2364 				Direction32_T **directionsL_nogap, Direction32_T **directionsR_nogap,
2365 				int *left_known, int *right_known,
2366 				int rlength, int glengthL, int glengthR,
2367 				int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
2368 				int leftoffset, int rightoffset,
2369 				Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
2370 				bool jump_late_p) {
2371   int rL, rR, cL, cR;
2372   int cloL, chighL;
2373   int cloR, chighR;
2374   int bestscore = NEG_INFINITY_32, score, scoreL, scoreI, scoreR;
2375   Univcoord_T splicesitepos1, splicesitepos2;
2376   bool bestp;
2377 
2378   scoreI = 0;			/* Because we constrain splices to given introns */
2379 
2380   for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
2381     debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
2382     if ((cloL = rL - lbandL) < 1) {
2383       cloL = 1;
2384     }
2385     if ((chighL = rL + ubandL) > glengthL-1) {
2386       chighL = glengthL-1;
2387     }
2388 
2389     if ((cloR = rR - lbandR) < 1) {
2390       cloR = 1;
2391     }
2392     if ((chighR = rR + ubandR) > glengthR-1) {
2393       chighR = glengthR-1;
2394     }
2395 
2396     /* Test indels on left and right */
2397     for (cL = cloL; cL < chighL; cL++) {
2398       /* The following check limits genomic inserts (horizontal) and
2399 	 multiple cDNA inserts (vertical). */
2400       if (left_known[cL] > 0 && directionsL_nogap[cL][rL] == DIAG) {
2401 	scoreL = (int) matrixL[cL][rL];
2402 #if 0
2403 	if (directionsL_nogap[cL][rL] != DIAG) {
2404 	  /* Favor gaps away from intron if possible */
2405 	  scoreL -= 100;
2406 	}
2407 #endif
2408 
2409 	/* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
2410 	for (cR = cloR; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
2411 	  if (right_known[cR] > 0 && directionsR_nogap[cR][rR] == DIAG) {
2412 	    scoreR = (int) matrixR[cR][rR];
2413 #if 0
2414 	    if (directionsR_nogap[cR][rR] != DIAG) {
2415 	      /* Favor gaps away from intron if possible */
2416 	      scoreR -= 100;
2417 	    }
2418 #endif
2419 
2420 	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
2421 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
2422 	      bestp = false;
2423 	      if (watsonp == true) {
2424 		splicesitepos1 = leftoffset + cL;
2425 		splicesitepos2 = rightoffset - cR + 1;
2426 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
2427 						 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
2428 		  bestp = true;
2429 		}
2430 	      } else {
2431 		splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
2432 		splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
2433 		if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
2434 						 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
2435 		  bestp = true;
2436 		}
2437 	      }
2438 	      if (bestp == true) {
2439 		debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
2440 			      cL,cR,scoreL,scoreR,score));
2441 		bestscore = score;
2442 		*bestrL = rL;
2443 		*bestrR = rR;
2444 		*bestcL = cL;
2445 		*bestcR = cR;
2446 	      } else {
2447 		debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
2448 			       cL,cR,scoreL,scoreR,score));
2449 	      }
2450 	    }
2451 	  }
2452 	}
2453       }
2454     }
2455   }
2456 
2457   *best_introntype = NONINTRON;
2458   return bestscore;
2459 }
2460 
2461 
2462 /* Returns finalscore */
2463 static int
bridge_intron_gap_site_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,Score32_T ** matrixL,Score32_T ** matrixR,Direction32_T ** directionsL_nogap,Direction32_T ** directionsR_nogap,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int goffsetL,int rev_goffsetR,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp)2464 bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
2465 			      Score32_T **matrixL, Score32_T **matrixR,
2466 			      Direction32_T **directionsL_nogap, Direction32_T **directionsR_nogap,
2467 			      char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
2468 			      int goffsetL, int rev_goffsetR, int *left_known, int *right_known,
2469 			      int rlength, int glengthL, int glengthR,
2470 			      int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
2471 			      int leftoffset, int rightoffset, Univcoord_T chroffset, Univcoord_T chrhigh,
2472 			      bool halfp, bool finalp) {
2473   int rL, rR, cL, cR;
2474   int bestrL_with_dinucl, bestrR_with_dinucl, bestcL_with_dinucl, bestcR_with_dinucl;
2475   int cloL, chighL;
2476   int cloR, chighR;
2477   int introntype;
2478   int bestscore = NEG_INFINITY_32, score, scoreL, scoreR, scoreI;
2479   int bestscore_with_dinucl = NEG_INFINITY_32;
2480   double *left_probabilities, *right_probabilities, probL, probR, bestprob_with_score, bestprob_with_dinucl;
2481   Univcoord_T splicesitepos;
2482   char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
2483   int *leftdi, *rightdi;
2484   bool use_dinucl_p;
2485 
2486   int *intron_score_array;
2487 
2488   if (cdna_direction > 0) {
2489     if (finalp == true) {
2490       intron_score_array = intron_score_array_sense_final;
2491     } else {
2492       intron_score_array = intron_score_array_sense_prelim;
2493     }
2494   } else if (cdna_direction < 0) {
2495     if (finalp == true) {
2496       intron_score_array = intron_score_array_antisense_final;
2497     } else {
2498       intron_score_array = intron_score_array_antisense_prelim;
2499     }
2500   } else {
2501     if (finalp == true) {
2502       intron_score_array = intron_score_array_either_final;
2503     } else {
2504       intron_score_array = intron_score_array_either_prelim;
2505     }
2506   }
2507 
2508 
2509   /* Read dinucleotides */
2510   leftdi = (int *) MALLOCA((glengthL+1) * sizeof(int));
2511   rightdi = (int *) MALLOCA((glengthR+1) * sizeof(int));
2512 
2513   for (cL = 0; cL < glengthL - 1; cL++) {
2514     left1 = gsequenceL[cL];
2515     left1_alt = gsequenceL_alt[cL];
2516     left2 = gsequenceL[cL+1];
2517     left2_alt = gsequenceL_alt[cL+1];
2518     /* Assertions may not hold for transcriptome alignment */
2519     /* assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp)); */
2520     /* assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp)); */
2521 
2522     if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
2523       leftdi[cL] = LEFT_GT;
2524     } else if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'C' || left2_alt == 'C')) {
2525       leftdi[cL] = LEFT_GC;
2526     } else if ((left1 == 'A' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
2527       leftdi[cL] = LEFT_AT;
2528 #ifndef PMAP
2529     } else if ((left1 == 'C' || left1_alt == 'C') && (left2 == 'T' || left2_alt == 'T')) {
2530       leftdi[cL] = LEFT_CT;
2531 #endif
2532     } else {
2533       leftdi[cL] = 0x00;
2534     }
2535   }
2536   leftdi[glengthL-1] = leftdi[glengthL] = 0x00;
2537 
2538   for (cR = 0; cR < glengthR - 1; cR++) {
2539     right2 = rev_gsequenceR[-cR-1];
2540     right2_alt = rev_gsequenceR_alt[-cR-1];
2541     right1 = rev_gsequenceR[-cR];
2542     right1_alt = rev_gsequenceR_alt[-cR];
2543     /* Assertions may not hold for transcriptome alignment */
2544     /* assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp)); */
2545     /* assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp)); */
2546 
2547     if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
2548       rightdi[cR] = RIGHT_AG;
2549     } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'C' || right1_alt == 'C')) {
2550       rightdi[cR] = RIGHT_AC;
2551 #ifndef PMAP
2552     } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
2553       rightdi[cR] = RIGHT_GC;
2554     } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
2555       rightdi[cR] = RIGHT_AT;
2556 #endif
2557     } else {
2558       rightdi[cR] = 0x00;
2559     }
2560   }
2561   rightdi[glengthR-1] = rightdi[glengthR] = 0x00;
2562 
2563 
2564   left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
2565   right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
2566 
2567   debug3(printf("watsonp is %d.  cdna_direction is %d\n",watsonp,cdna_direction));
2568   if (watsonp == true) {
2569     if (cdna_direction > 0) {
2570       for (cL = 0; cL < glengthL - 1; cL++) {
2571 	splicesitepos = chroffset + leftoffset + cL;
2572 	if (left_known[cL]) {
2573 	  left_probabilities[cL] = 1.0;
2574 	} else {
2575 	  left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
2576 	  debug3(printf("left donor probability at cL %d is %f\n",cL,left_probabilities[cL]));
2577 	}
2578       }
2579 
2580       for (cR = 0; cR < glengthR - 1; cR++) {
2581 	splicesitepos = chroffset + rightoffset - cR + 1;
2582 	if (right_known[cR]) {
2583 	  right_probabilities[cR] = 1.0;
2584 	} else {
2585 	  right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
2586 	  debug3(printf("right acceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
2587 	}
2588       }
2589 
2590     } else {
2591       for (cL = 0; cL < glengthL - 1; cL++) {
2592 	splicesitepos = chroffset + leftoffset + cL;
2593 	if (left_known[cL]) {
2594 	  left_probabilities[cL] = 1.0;
2595 	} else {
2596 	  left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
2597 	  debug3(printf("left antiacceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
2598 	}
2599       }
2600 
2601       for (cR = 0; cR < glengthR - 1; cR++) {
2602 	splicesitepos = chroffset + rightoffset - cR + 1;
2603 	if (right_known[cR]) {
2604 	  right_probabilities[cR] = 1.0;
2605 	} else {
2606 	  right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
2607 	  debug3(printf("right antidonor probability at cR %d is %f\n",cR,right_probabilities[cR]));
2608 	}
2609       }
2610     }
2611 
2612   } else {
2613     if (cdna_direction > 0) {
2614       for (cL = 0; cL < glengthL - 1; cL++) {
2615 	splicesitepos = chrhigh - leftoffset - cL + 1;
2616 	if (left_known[cL]) {
2617 	  left_probabilities[cL] = 1.0;
2618 	} else {
2619 	  left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
2620 	  debug3(printf("left antidonor probability at cL %d is %f\n",cL,left_probabilities[cL]));
2621 	}
2622       }
2623 
2624       for (cR = 0; cR < glengthR - 1; cR++) {
2625 	splicesitepos = chrhigh - rightoffset + cR;
2626 	if (right_known[cR]) {
2627 	  right_probabilities[cR] = 1.0;
2628 	} else {
2629 	  right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
2630 	  debug3(printf("right antiacceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
2631 	}
2632       }
2633 
2634     } else {
2635       for (cL = 0; cL < glengthL - 1; cL++) {
2636 	splicesitepos = chrhigh - leftoffset - cL + 1;
2637 	if (left_known[cL]) {
2638 	  left_probabilities[cL] = 1.0;
2639 	} else {
2640 	  left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
2641 	  debug3(printf("left acceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
2642 	}
2643       }
2644 
2645       for (cR = 0; cR < glengthR - 1; cR++) {
2646 	splicesitepos = chrhigh - rightoffset + cR;
2647 	if (right_known[cR]) {
2648 	  right_probabilities[cR] = 1.0;
2649 	} else {
2650 	  right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
2651 	  debug3(printf("right donor probability at cR %d is %f\n",cR,right_probabilities[cR]));
2652 	}
2653       }
2654     }
2655   }
2656 
2657   /* Search using probs and without simultaneously */
2658   bestscore = NEG_INFINITY_32;
2659   bestprob_with_score = bestprob_with_dinucl = 0.0;
2660   for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
2661     debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
2662     if ((cloL = rL - lbandL) < 1) {
2663       cloL = 1;
2664     }
2665     if ((chighL = rL + ubandL) > glengthL-1) {
2666       chighL = glengthL-1;
2667     }
2668 
2669     if ((cloR = rR - lbandR) < 1) {
2670       cloR = 1;
2671     }
2672     if ((chighR = rR + ubandR) > glengthR-1) {
2673       chighR = glengthR-1;
2674     }
2675 
2676     debug3(printf("A. Test no indels\n"));
2677     cL = rL;
2678     probL = left_probabilities[cL];
2679     scoreL = (int) matrixL[cL][rL];
2680 
2681     cR = rR;
2682     probR = right_probabilities[cR];
2683     scoreR = (int) matrixR[cR][rR];
2684 
2685 #ifdef USE_SCOREI
2686     /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2687     scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2688 #else
2689     scoreI = 0;
2690 #endif
2691 
2692     if ((score = scoreL + scoreI + scoreR) > bestscore) {
2693       debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2694 		    cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2695       debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2696       bestscore = score;
2697       *bestrL = rL;
2698       *bestrR = rR;
2699       *bestcL = cL;
2700       *bestcR = cR;
2701       bestprob_with_score = probL + probR;
2702     } else if (score == bestscore && probL + probR > bestprob_with_score) {
2703       debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2704 		    cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2705       debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2706       *bestrL = rL;
2707       *bestrR = rR;
2708       *bestcL = cL;
2709       *bestcR = cR;
2710       bestprob_with_score = probL + probR;
2711     } else {
2712       debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2713 		     cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2714     }
2715 
2716     /* Perform only without indels */
2717     if (scoreI > 0) {
2718       debug3a(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
2719 		     cL,cR,scoreI,probL,probR,probL+probR));
2720       if (probL + probR > bestprob_with_dinucl) {
2721 	bestscore_with_dinucl = scoreL + scoreI + scoreR;
2722 	bestcL_with_dinucl = cL;
2723 	bestcR_with_dinucl = cR;
2724 	bestrL_with_dinucl = rL;
2725 	bestrR_with_dinucl = rR;
2726 	bestprob_with_dinucl = probL + probR;
2727       }
2728     }
2729 
2730 
2731     debug3(printf("B. Test indel on right\n"));
2732     /* Test indel on right */
2733     cL = rL;
2734     probL = left_probabilities[cL];
2735     scoreL = (int) matrixL[cL][rL];
2736 #if 0
2737     if (directionsL_nogap[cL][rL] != DIAG) {
2738       /* Favor gaps away from intron if possible */
2739       scoreL -= 100;
2740     }
2741 #endif
2742 
2743     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
2744     for (cR = cloR; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
2745       probR = right_probabilities[cR];
2746       scoreR = (int) matrixR[cR][rR];
2747 #if 0
2748       if (directionsR_nogap[cR][rR] != DIAG) {
2749 	/* Favor gaps away from intron if possible */
2750 	scoreR -= 100;
2751       }
2752 #endif
2753 
2754 #ifdef USE_SCOREI
2755       /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2756       scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2757 #else
2758       scoreI = 0;
2759 #endif
2760 
2761       if ((score = scoreL + scoreI + scoreR) > bestscore) {
2762 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2763 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2764 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2765 	bestscore = score;
2766 	*bestrL = rL;
2767 	*bestrR = rR;
2768 	*bestcL = cL;
2769 	*bestcR = cR;
2770 	bestprob_with_score = probL + probR;
2771       } else if (score == bestscore && probL + probR > bestprob_with_score) {
2772 	debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2773 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2774 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2775 	*bestrL = rL;
2776 	*bestrR = rR;
2777 	*bestcL = cL;
2778 	*bestcR = cR;
2779 	bestprob_with_score = probL + probR;
2780       } else {
2781 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2782 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2783       }
2784     }
2785 
2786     debug3(printf("C. Test indel on left (3)\n"));
2787     /* Test indel on left */
2788     cR = rR;
2789     probR = right_probabilities[cR];
2790     scoreR = (int) matrixR[cR][rR];
2791 #if 0
2792     if (directionsR_nogap[cR][rR] != DIAG) {
2793       /* Favor gaps away from intron if possible */
2794       scoreR -= 100;
2795     }
2796 #endif
2797 
2798     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
2799     for (cL = cloL; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
2800       probL = left_probabilities[cL];
2801       scoreL = (int) matrixL[cL][rL];
2802 #if 0
2803       if (directionsL_nogap[cL][rL] != DIAG) {
2804 	/* Favor gaps away from intron if possible */
2805 	scoreL -= 100;
2806       }
2807 #endif
2808 
2809 #ifdef USE_SCOREI
2810       /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2811       scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2812 #else
2813       scoreI = 0;
2814 #endif
2815 
2816       if ((score = scoreL + scoreI + scoreR) > bestscore) {
2817 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2818 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2819 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2820 	bestscore = score;
2821 	*bestrL = rL;
2822 	*bestrR = rR;
2823 	*bestcL = cL;
2824 	*bestcR = cR;
2825 	bestprob_with_score = probL + probR;
2826       } else if (score == bestscore && probL + probR > bestprob_with_score) {
2827 	debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2828 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2829 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2830 	*bestrL = rL;
2831 	*bestrR = rR;
2832 	*bestcL = cL;
2833 	*bestcR = cR;
2834 	bestprob_with_score = probL + probR;
2835       } else {
2836 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2837 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2838       }
2839     }
2840   }
2841 
2842   if (bestprob_with_score > 2*PROB_CEILING) {
2843     /* Probability is good with best alignment, so take that */
2844     debug(printf("Best alignment based on score alone has good probability\n"));
2845     use_dinucl_p = false;		/* was previously true (bug) */
2846   } else if (bestprob_with_dinucl == 0.0) {
2847     debug(printf("No dinucleotides found\n"));
2848     use_dinucl_p = false;		/* was previously true (bug) */
2849   } else if (0 && left_probabilities[bestcL_with_dinucl] < PROB_CEILING && right_probabilities[bestcR_with_dinucl] < PROB_CEILING) {
2850     /* Dinucleotide-based solution is bad, so use alignment */
2851     debug(printf("Dinucleotide-based solution is bad on both sites\n"));
2852     use_dinucl_p = false;
2853   } else if (bestscore_with_dinucl < 0 || bestscore_with_dinucl < bestscore - 9) {
2854     debug(printf("Dinucleotide-based solution requires very bad alignment\n"));
2855     use_dinucl_p = false;
2856   } else {
2857     use_dinucl_p = true;
2858   }
2859 
2860   debug(printf("Non-SIMD. bestscore %d (bestprob_with_score %f)\n",bestscore,bestprob_with_score));
2861   if (use_dinucl_p == true) {
2862     debug(printf("Non-SIMD. bestscore %d (bestprob_with_score %f) vs bestscore_with_dinucl %d (bestprob_with_dinucl %f and %f)\n",
2863 		 bestscore,bestprob_with_score,bestscore_with_dinucl,left_probabilities[bestcL_with_dinucl],right_probabilities[bestcR_with_dinucl]));
2864     /* Best alignment yields bad probability, and dinucleotide-based alignment yields good probability, so switch */
2865     debug(printf("Switch to dinucleotide-based solution\n"));
2866     *bestcL = bestcL_with_dinucl;
2867     *bestcR = bestcR_with_dinucl;
2868     *bestrL = bestrL_with_dinucl;
2869     *bestrR = bestrR_with_dinucl;
2870     bestscore = bestscore_with_dinucl;
2871   }
2872 
2873 
2874   FREEA(rightdi);
2875   FREEA(leftdi);
2876   FREEA(left_probabilities);
2877   FREEA(right_probabilities);
2878 
2879   if (bestscore < 0) {
2880     return bestscore;
2881   } else if (halfp == true) {
2882     /* scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],cdna_direction,canonical_reward,finalp); */
2883     scoreI = intron_score_array[leftdi[*bestcL] & rightdi[*bestcR]];
2884     return (bestscore - scoreI/2);
2885   } else {
2886     return bestscore;
2887   }
2888 }
2889 
2890 
2891 static int
bridge_intron_gap(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,double * left_prob,double * right_prob,Score32_T ** matrixL,Score32_T ** matrixR,Direction32_T ** directionsL_nogap,Direction32_T ** directionsR_nogap,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int goffsetL,int rev_goffsetR,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int extraband_paired,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp,bool jump_late_p)2892 bridge_intron_gap (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
2893 		   int *best_introntype, double *left_prob, double *right_prob,
2894 		   Score32_T **matrixL, Score32_T **matrixR,
2895 		   Direction32_T **directionsL_nogap, Direction32_T **directionsR_nogap,
2896 		   char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
2897 		   int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
2898 		   int cdna_direction, bool watsonp, int extraband_paired,
2899 		   int leftoffset, int rightoffset, Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
2900 		   bool halfp, bool finalp, bool jump_late_p) {
2901   int finalscore;
2902   int *left_known, *right_known;
2903   int ubandL, lbandL, ubandR, lbandR;
2904 
2905 
2906   if (glengthL+1 <= 0) {
2907     fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
2908     abort();
2909   }
2910 
2911   if (glengthR+1 <= 0) {
2912     fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
2913     abort();
2914   }
2915 
2916 #if 1
2917   /* Allows unlimited indel lengths */
2918   ubandL = glengthL - rlength + extraband_paired;
2919   lbandL = extraband_paired;
2920 
2921   ubandR = glengthR - rlength + extraband_paired;
2922   lbandR = extraband_paired;
2923 #else
2924   /* Limit indels to 3 bp around splice sites.  Doesn't work on PacBio reads. */
2925   ubandL = 3;
2926   lbandL = 3;
2927 
2928   ubandR = 3;
2929   lbandR = 3;
2930 #endif
2931 
2932   left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
2933   right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
2934   get_known_splicesites(left_known,right_known,glengthL,glengthR,
2935 			/*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
2936 			cdna_direction,watsonp,chrnum,chroffset,chrhigh);
2937 
2938   if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
2939     /* Constrain to given introns */
2940     finalscore = bridge_intron_gap_intron_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),&(*best_introntype),
2941 						matrixL,matrixR,directionsL_nogap,directionsR_nogap,
2942 						left_known,right_known,rlength,glengthL,glengthR,
2943 						cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
2944 						leftoffset,rightoffset,chrnum,chroffset,chrhigh,jump_late_p);
2945 
2946   } else {
2947     finalscore = bridge_intron_gap_site_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),
2948 					      matrixL,matrixR,directionsL_nogap,directionsR_nogap,
2949 					      gsequenceL,gsequenceL_alt,rev_gsequenceR,rev_gsequenceR_alt,goffsetL,rev_goffsetR,
2950 					      left_known,right_known,rlength,glengthL,glengthR,
2951 					      cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
2952 					      leftoffset,rightoffset,chroffset,chrhigh,halfp,finalp);
2953   }
2954 
2955 
2956 #if 0
2957   /* Determine if result meets given constraints */
2958   if (*finalscore < 0) {
2959     result = false;
2960   } else if (novelsplicingp == true) {
2961     result = true;
2962   } else if (splicing_iit == NULL) {
2963     result = true;
2964   } else if (donor_typeint >= 0 && acceptor_typeint >= 0) {
2965     /* If novelsplicingp is false and using splicing at splice site level, require sites to be known */
2966     if (left_known[*bestcL] == 0 || right_known[*bestcR] == 0) {
2967       debug(printf("Novel splicing not allowed, so bridge_intron_gap returning false\n"));
2968       result = false;
2969     } else {
2970       result = true;
2971     }
2972   } else {
2973     /* If novelsplicingp is false and using splicing at splice site level, result was already constrained */
2974     result = true;
2975   }
2976 #endif
2977 
2978   if (finalscore >= 0) {
2979     get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
2980 			 left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
2981 			 cdna_direction,watsonp);
2982   }
2983 
2984   FREEA(right_known);
2985   FREEA(left_known);
2986 
2987 #if defined(DEBUG) || defined(DEBUG3)
2988   printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
2989 	 finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob);
2990 #endif
2991 
2992   return finalscore;
2993 }
2994 #endif
2995 
2996 
2997 static List_T
genome_gap_simple(int * finalscore,int * best_introntype,int * new_leftgenomepos,int * new_rightgenomepos,double * left_prob,double * right_prob,int * exonhead,int * nmatches,int * nmismatches,char * rsequence,char * rsequenceuc,char * rev_rsequence,char * rev_rsequenceuc,int rlength,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int roffset,int rev_roffset,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,Pairpool_T pairpool,int mismatchtype,int cdna_direction,bool watsonp,int genestrand,int dynprogindex,bool halfp)2998 genome_gap_simple (int *finalscore, int *best_introntype, int *new_leftgenomepos, int *new_rightgenomepos,
2999 		   double *left_prob, double *right_prob, int *exonhead, int *nmatches, int *nmismatches,
3000 		   char *rsequence, char *rsequenceuc, char *rev_rsequence, char *rev_rsequenceuc, int rlength,
3001 		   char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
3002 		   int roffset, int rev_roffset, int leftoffset, int rightoffset,
3003 		   Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Pairpool_T pairpool,
3004 		   int mismatchtype, int cdna_direction, bool watsonp, int genestrand, int dynprogindex, bool halfp) {
3005   List_T pairs = NULL;
3006   Pair_T gappair;
3007   bool result;
3008   int bestrL, bestrR, rL, rR, r;
3009   int querycoord, genomecoord;
3010   int na1, na2, na2_alt, c1, c1_uc, c2, c2_alt;
3011   char left1, left1_alt, left2, left2_alt, right2, right2_alt, right1, right1_alt;
3012   int bestscore = 0, bestscoreI = 0, scoreL, scoreI, scoreR, pairscore, score;
3013   int leftdi, rightdi;
3014   int *left_known, *right_known;
3015   int introntype;
3016   Pairdistance_T **pairdistance_array_type;
3017 
3018   int *intron_score_array;
3019 
3020   /* Assuming finalp is false */
3021   if (cdna_direction > 0) {
3022     intron_score_array = intron_score_array_sense_prelim;
3023   } else if (cdna_direction < 0) {
3024     intron_score_array = intron_score_array_antisense_prelim;
3025   } else {
3026     intron_score_array = intron_score_array_either_prelim;
3027   }
3028 
3029 
3030   debug(printf("Starting genome_gap_simple with cdna_direction %d and watsonp %d\n",cdna_direction,watsonp));
3031   pairdistance_array_type = pairdistance_array[mismatchtype];
3032 
3033   left_known = (int *) CALLOCA(rlength+1,sizeof(int));
3034   right_known = (int *) CALLOCA(rlength+1,sizeof(int));
3035   get_known_splicesites(left_known,right_known,/*glengthL*/rlength,/*glengthR*/rlength,
3036 			leftoffset,rightoffset,
3037 			cdna_direction,watsonp,chrnum,chroffset,chrhigh);
3038 
3039   scoreR = 0;
3040   for (rR = 1; rR < rlength; rR++) {
3041     na1 = rev_rsequenceuc[1-rR];
3042     na2 = rev_gsequenceR[1-rR];
3043     na2_alt = rev_gsequenceR_alt[1-rR];
3044     pairscore = pairdistance_array_type[na1][na2];
3045     if ((score = pairdistance_array_type[na1][na2_alt]) > pairscore) {
3046       pairscore = score;
3047     }
3048     scoreR += pairscore;
3049   }
3050 
3051   scoreL = 0;
3052   for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
3053     na1 = rsequenceuc[rL-1];
3054     na2 = gsequenceL[rL-1];
3055     na2_alt = gsequenceL_alt[rL-1];
3056     pairscore = pairdistance_array_type[na1][na2];
3057     if ((score = pairdistance_array_type[na1][na2_alt]) > pairscore) {
3058       pairscore = score;
3059     }
3060     scoreL += pairscore;
3061 
3062     /* Read dinucleotides */
3063     left1 = gsequenceL[rL];
3064     left1_alt = gsequenceL_alt[rL];
3065     left2 = gsequenceL[rL+1];
3066     left2_alt = gsequenceL_alt[rL+1];
3067 
3068     if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
3069       leftdi = LEFT_GT;
3070     } else if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'C' || left2_alt == 'C')) {
3071       leftdi = LEFT_GC;
3072     } else if ((left1 == 'A' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
3073       leftdi = LEFT_AT;
3074 #ifndef PMAP
3075     } else if ((left1 == 'C' || left1_alt == 'C') && (left2 == 'T' || left2_alt == 'T')) {
3076       leftdi = LEFT_CT;
3077 #endif
3078     } else {
3079       leftdi = 0x00;
3080     }
3081 
3082     right2 = rev_gsequenceR[-rR-1];
3083     right2_alt = rev_gsequenceR_alt[-rR-1];
3084     right1 = rev_gsequenceR[-rR];
3085     right1_alt = rev_gsequenceR_alt[-rR];
3086     if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
3087       rightdi = RIGHT_AG;
3088     } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'C' || right1_alt == 'C')) {
3089       rightdi = RIGHT_AC;
3090 #ifndef PMAP
3091     } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
3092       rightdi = RIGHT_GC;
3093     } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
3094       rightdi = RIGHT_AT;
3095 #endif
3096     } else {
3097       rightdi = 0x00;
3098     }
3099 
3100 #if 0
3101     scoreI = intron_score(&introntype,leftdi,rightdi,cdna_direction,canonical_reward,/*finalp*/false);
3102 #endif
3103     introntype = leftdi & rightdi;
3104     scoreI = intron_score_array[introntype];
3105 
3106     if ((introntype != NONINTRON || left_known[rL] > 0 || right_known[rR] > 0) &&
3107 	(score = scoreL + left_known[rL] + scoreI + right_known[rR] + scoreR) >= bestscore) {  /* Use >= for jump late */
3108       debug(printf("At %d left (%c%c) to %d right (%c%c), score is (%d)+(%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
3109 		   rL,left1,left2,rR,right2,right1,scoreL,left_known[rL],scoreI,right_known[rR],scoreR,
3110 		   scoreL+left_known[rL]+scoreI+right_known[rR]+scoreR));
3111       bestscore = score;
3112       bestscoreI = scoreI;
3113       bestrL = /* *bestcL = */ rL;
3114       bestrR = /* *bestcR = */ rR;
3115       *best_introntype = introntype;
3116     } else {
3117       debug(printf("At %d left (%c%c) to %d right (%c%c), score is (%d)+(%d)+(%d)+(%d)+(%d) = %d\n",
3118 		   rL,left1,left2,rR,right2,right1,scoreL,left_known[rL],scoreI,right_known[rR],scoreR,
3119 		   scoreL+left_known[rL]+scoreI+right_known[rR]+scoreR));
3120     }
3121 
3122     /* Subtract pairscore from cumulative scoreR */
3123     na1 = rev_rsequenceuc[1-rR];
3124     na2 = rev_gsequenceR[1-rR];
3125     na2_alt = rev_gsequenceR_alt[1-rR];
3126     pairscore = pairdistance_array_type[na1][na2];
3127     if ((score = pairdistance_array_type[na1][na2_alt]) > pairscore) {
3128       pairscore = score;
3129     }
3130     scoreR -= pairscore;
3131   }
3132 
3133   if (halfp == true) {
3134     *finalscore = (int) (bestscore - bestscoreI/2);
3135   } else {
3136     *finalscore = (int) bestscore;
3137   }
3138 
3139   if (*finalscore <= 0) {
3140     result = false;
3141   } else if (novelsplicingp == true) {
3142     result = true;
3143   } else if (splicing_iit == NULL) {
3144     result = true;
3145   } else if (donor_typeint >= 0 && acceptor_typeint >= 0) {
3146     /* If novelsplicingp is false and using splicing at splice site level, require sites to be known */
3147     if (left_known[bestrL] == 0 || right_known[bestrR] == 0) {
3148       debug(printf("Novel splicing not allowed, so bridge_intron_gap returning false\n"));
3149       result = false;
3150     } else {
3151       result = true;
3152     }
3153   } else {
3154     /* If novelsplicingp is false and using splicing at splice site level, result was already constrained */
3155     result = true;
3156   }
3157 
3158   if (result == true) {
3159     get_splicesite_probs(&(*left_prob),&(*right_prob),bestrL,bestrR,
3160 			 left_known,right_known,leftoffset,rightoffset,
3161 			 chroffset,chrhigh,cdna_direction,watsonp);
3162     debug(printf("Probabilities are %f and %f\n",*left_prob,*right_prob));
3163     if (*left_prob < 0.90 || *right_prob < 0.90) {
3164       result = false;
3165     }
3166   }
3167 
3168   if (result == true) {
3169     *nmatches = *nmismatches = 0;
3170 
3171     /* Push from left to right, so we don't need to do List_reverse() later */
3172     for (r = 1; r <= bestrL; r++) {
3173       querycoord = genomecoord = r-1;
3174 
3175       c1 = rsequence[querycoord];
3176       c1_uc = rsequenceuc[querycoord];
3177       c2 = gsequenceL[genomecoord];
3178       c2_alt = gsequenceL_alt[genomecoord];
3179 
3180       if (c2 == '*') {
3181 	/* Don't push pairs past end of chromosome */
3182 	debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u, chroffset %u, chrhigh %u, watsonp %d\n",
3183 		     leftoffset,genomecoord,chroffset,chrhigh,watsonp));
3184 
3185       } else if (c1_uc == c2 || c1_uc == c2_alt) {
3186 	debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - match\n",
3187 		     r,/*c*/r,roffset+querycoord,leftoffset+genomecoord,c1_uc,c2));
3188 	*nmatches += 1;
3189 	pairs = Pairpool_push(pairs,pairpool,roffset+querycoord,leftoffset+genomecoord,
3190 			      c1,DYNPROG_MATCH_COMP,c2,c2_alt,dynprogindex);
3191 
3192       } else if (Dynprog_consistent_p(c1_uc,/*g*/c2,/*g_alt*/c2_alt,genestrand) == true) {
3193 	debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - ambiguous\n",
3194 		     r,/*c*/r,roffset+querycoord,leftoffset+genomecoord,c1_uc,c2));
3195 	*nmatches += 1;
3196 	pairs = Pairpool_push(pairs,pairpool,roffset+querycoord,leftoffset+genomecoord,
3197 			      c1,AMBIGUOUS_COMP,c2,c2_alt,dynprogindex);
3198 
3199       } else {
3200 	debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - mismatch\n",
3201 		     r,/*c*/r,roffset+querycoord,leftoffset+genomecoord,c1_uc,c2));
3202 	*nmismatches += 1;
3203 	pairs = Pairpool_push(pairs,pairpool,roffset+querycoord,leftoffset+genomecoord,
3204 			      c1,MISMATCH_COMP,c2,c2_alt,dynprogindex);
3205       }
3206     }
3207 
3208     debug(printf("Pushing a gap\n"));
3209     *new_leftgenomepos = leftoffset+(bestrL-1);
3210     *new_rightgenomepos = *exonhead = rightoffset-(bestrR-1);
3211 #ifndef NOGAPHOLDER
3212     pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/(*new_rightgenomepos)-(*new_leftgenomepos)-1,
3213 				    /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
3214     gappair = (Pair_T) pairs->first;
3215     gappair->introntype = introntype;
3216     gappair->donor_prob = *left_prob;
3217     gappair->acceptor_prob = *right_prob;
3218 #endif
3219 
3220     for (r = bestrR; r > 0; r--) {
3221       querycoord = genomecoord = 1-r;
3222 
3223       c1 = rev_rsequence[querycoord];
3224       c1_uc = rev_rsequenceuc[querycoord];
3225       c2 = rev_gsequenceR[genomecoord];
3226       c2_alt = rev_gsequenceR_alt[genomecoord];
3227 
3228       if (c2 == '*') {
3229 	/* Don't push pairs past end of chromosome */
3230 	debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u, chroffset %u, chrhigh %u, watsonp %d\n",
3231 		     rightoffset,genomecoord,chroffset,chrhigh,watsonp));
3232 
3233       } else if (c1_uc == c2 || c1_uc == c2_alt) {
3234 	debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - match\n",
3235 		     r,/*c*/r,rev_roffset+querycoord,rightoffset+genomecoord,c1_uc,c2));
3236 	*nmatches += 1;
3237 	pairs = Pairpool_push(pairs,pairpool,rev_roffset+querycoord,rightoffset+genomecoord,
3238 			      c1,DYNPROG_MATCH_COMP,c2,c2_alt,dynprogindex);
3239 
3240       } else if (Dynprog_consistent_p(c1_uc,/*g*/c2,/*g_alt*/c2_alt,genestrand) == true) {
3241 	debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - ambiguous\n",
3242 		     r,/*c*/r,rev_roffset+querycoord,rightoffset+genomecoord,c1_uc,c2));
3243 	*nmatches += 1;
3244 	pairs = Pairpool_push(pairs,pairpool,rev_roffset+querycoord,rightoffset+genomecoord,
3245 			      c1,AMBIGUOUS_COMP,c2,c2_alt,dynprogindex);
3246 
3247       } else {
3248 	debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - mismatch\n",
3249 		     r,/*c*/r,rev_roffset+querycoord,rightoffset+genomecoord,c1_uc,c2));
3250 	*nmismatches += 1;
3251 	pairs = Pairpool_push(pairs,pairpool,rev_roffset+querycoord,rightoffset+genomecoord,
3252 			      c1,MISMATCH_COMP,c2,c2_alt,dynprogindex);
3253       }
3254     }
3255 
3256   }
3257 
3258   FREEA(right_known);
3259   FREEA(left_known);
3260 
3261   return pairs;
3262 }
3263 
3264 
3265 
3266 
3267 /* A genome gap is usually an intron.  Sequence 2L and 2R represent
3268    the two genomic ends of the intron. */
3269 List_T
Dynprog_genome_gap(int * dynprogindex,int * finalscore,int * new_leftgenomepos,int * new_rightgenomepos,double * left_prob,double * right_prob,int * nmatches,int * nmismatches,int * nopens,int * nindels,int * exonhead,int * introntype,T dynprogL,T dynprogR,char * rsequence,char * rsequenceuc,int rlength,int glengthL,int glengthR,int roffset,int goffsetL,int rev_goffsetR,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,int cdna_direction,bool watsonp,int genestrand,bool jump_late_p,Pairpool_T pairpool,int extraband_paired,double defect_rate,int maxpeelback,bool halfp,bool finalp)3270 Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos, int *new_rightgenomepos,
3271 		    double *left_prob, double *right_prob,
3272 		    int *nmatches, int *nmismatches, int *nopens, int *nindels, int *exonhead, int *introntype,
3273 		    T dynprogL, T dynprogR,
3274 		    char *rsequence, char *rsequenceuc, int rlength, int glengthL, int glengthR,
3275 		    int roffset, int goffsetL, int rev_goffsetR,
3276 		    Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
3277 		    int cdna_direction, bool watsonp, int genestrand,
3278 		    bool jump_late_p, Pairpool_T pairpool, int extraband_paired,
3279 		    double defect_rate, int maxpeelback, bool halfp, bool finalp) {
3280   List_T pairs = NULL;
3281   Pair_T gappair;
3282   char *gsequenceL, *gsequenceL_alt, *rev_gsequenceR, *rev_gsequenceR_alt;
3283   char *rev_rsequence, *rev_rsequenceuc;
3284   Mismatchtype_T mismatchtype;
3285   /* int canonical_reward; */
3286   int rev_roffset, bestrL = -1, bestrR, bestcL, bestcR;
3287   int lbandL, ubandL, lbandR, ubandR;
3288   int open, extend;
3289 #if defined(HAVE_SSE2)
3290   Score8_T **matrix8L_upper, **matrix8L_lower, **matrix8R_upper, **matrix8R_lower;
3291   Direction8_T **directions8L_upper_nogap, **directions8L_upper_Egap,
3292     **directions8L_lower_nogap, **directions8L_lower_Egap,
3293     **directions8R_upper_nogap, **directions8R_upper_Egap,
3294     **directions8R_lower_nogap, **directions8R_lower_Egap;
3295   bool use8p;
3296 
3297   Score16_T **matrix16L_upper, **matrix16L_lower, **matrix16R_upper, **matrix16R_lower;
3298   Direction16_T **directions16L_upper_nogap, **directions16L_upper_Egap,
3299     **directions16L_lower_nogap, **directions16L_lower_Egap,
3300     **directions16R_upper_nogap, **directions16R_upper_Egap,
3301     **directions16R_lower_nogap, **directions16R_lower_Egap;
3302 #else
3303   Score32_T **matrixL, **matrixR;
3304   Direction32_T **directionsL_nogap, **directionsL_Egap, **directionsL_Fgap,
3305     **directionsR_nogap, **directionsR_Egap, **directionsR_Fgap;
3306 #endif
3307   /* int queryjump, genomejump; */
3308 
3309   debug(printf("\n"));
3310   debug(printf("%c:  ",*dynprogindex > 0 ? (*dynprogindex-1)%26+'a' : (-(*dynprogindex)-1)%26+'A'));
3311   debug(printf("Aligning genome gap with cdna_direction %d, defect_rate %f\n",cdna_direction,defect_rate));
3312 #ifdef EXTRACT_GENOMICSEG
3313   debug(printf("At genomic offset %d-%d, %.*s\n",goffsetL,goffsetL+glengthL-1,glengthL,gsequenceL));
3314   debug(printf("At genomic offset %d-%d, %.*s\n",rev_goffsetR-glengthR+1,rev_goffsetR,glengthR,&(rev_gsequenceR[-glengthR+1])));
3315 #endif
3316   debug(printf("\n"));
3317 
3318   /* ?check if offsets are too close.  But this eliminates a segment
3319      of the cDNA.  Should check in stage 3, and do single gap instead. */
3320   /*
3321   if (goffsetL+glengthL-1 >= rev_goffsetR-glengthR+1) {
3322     debug(printf("Bounds don't make sense\n"));
3323     *finalscore = NEG_INFINITY_16;
3324     return NULL;
3325   }
3326   */
3327 
3328   *nmatches = *nmismatches = *nopens = *nindels = 0;
3329   *left_prob = *right_prob = 0.0;
3330   *introntype = NONINTRON;
3331   if (rlength <= 1) {
3332     *finalscore = NEG_INFINITY_32;
3333     return (List_T) NULL;
3334   }
3335 
3336   if (defect_rate < DEFECT_HIGHQ) {
3337     mismatchtype = HIGHQ;
3338     if (rlength > maxpeelback * 4) {
3339       debug(printf("rlength %d is greater than maxpeelback %d * 4, so using single gap penalties\n",
3340 		   rlength,maxpeelback));
3341       open = SINGLE_OPEN_HIGHQ;
3342       extend = SINGLE_EXTEND_HIGHQ;
3343     } else {
3344       open = PAIRED_OPEN_HIGHQ;
3345       extend = PAIRED_EXTEND_HIGHQ;
3346     }
3347 #if 0
3348     if (splicingp == false) {
3349       canonical_reward = 0;
3350     } else if (finalp == true) {
3351       canonical_reward = FINAL_CANONICAL_INTRON_HIGHQ;
3352     } else {
3353       canonical_reward = CANONICAL_INTRON_HIGHQ;
3354     }
3355 #endif
3356 
3357   } else if (defect_rate < DEFECT_MEDQ) {
3358     mismatchtype = MEDQ;
3359     if (rlength > maxpeelback * 4) {
3360       debug(printf("rlength %d is greater than maxpeelback %d * 4, so using single gap penalties\n",
3361 		   rlength,maxpeelback));
3362       open = SINGLE_OPEN_MEDQ;
3363       extend = SINGLE_EXTEND_MEDQ;
3364     } else {
3365       open = PAIRED_OPEN_MEDQ;
3366       extend = PAIRED_EXTEND_MEDQ;
3367     }
3368 #if 0
3369     if (splicingp == false) {
3370       canonical_reward = 0;
3371     } else if (finalp == true) {
3372       canonical_reward = FINAL_CANONICAL_INTRON_MEDQ;
3373     } else {
3374       canonical_reward = CANONICAL_INTRON_MEDQ;
3375     }
3376 #endif
3377 
3378   } else {
3379     mismatchtype = LOWQ;
3380     if (rlength > maxpeelback * 4) {
3381       debug(printf("rlength %d is greater than maxpeelback %d * 4, so using single gap penalties\n",
3382 		   rlength,maxpeelback));
3383       open = SINGLE_OPEN_LOWQ;
3384       extend = SINGLE_EXTEND_LOWQ;
3385     } else {
3386       open = PAIRED_OPEN_LOWQ;
3387       extend = PAIRED_EXTEND_LOWQ;
3388     }
3389 #if 0
3390     if (splicingp == false) {
3391       canonical_reward = 0;
3392     } else if (finalp == true) {
3393       canonical_reward = FINAL_CANONICAL_INTRON_LOWQ;
3394     } else {
3395       canonical_reward = CANONICAL_INTRON_LOWQ;
3396     }
3397 #endif
3398   }
3399 
3400   if (rlength > dynprogL->max_rlength || glengthL > dynprogL->max_glength) {
3401     debug(printf("rlength %d or glengthL %d is too long.  Returning NULL\n",rlength,glengthL));
3402     *new_leftgenomepos = goffsetL-1;
3403     *new_rightgenomepos = rev_goffsetR+1;
3404     *exonhead = rev_roffset = roffset+rlength-1;
3405 #ifndef NOGAPHOLDER
3406     /*
3407     queryjump = rev_roffset - roffset + 1;
3408     genomejump = rev_goffsetR - goffsetL + 1;
3409     pairs = Pairpool_push_gapholder(NULL,pairpool,queryjump,genomejump,false);
3410     */
3411 #endif
3412     *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3413     *finalscore = NEG_INFINITY_32;
3414     *introntype = NONINTRON;
3415     return (List_T) NULL;
3416   }
3417 
3418   if (rlength > dynprogR->max_rlength || glengthR > dynprogR->max_glength) {
3419     debug(printf("rlength %d or glengthR %d is too long.  Returning NULL\n",rlength,glengthR));
3420     *new_leftgenomepos = goffsetL-1;
3421     *new_rightgenomepos = rev_goffsetR+1;
3422     *exonhead = rev_roffset = roffset+rlength-1;
3423 #ifndef NOGAPHOLDER
3424     /*
3425     queryjump = rev_roffset - roffset + 1;
3426     genomejump = rev_goffsetR - goffsetL + 1;
3427     pairs = Pairpool_push_gapholder(NULL,pairpool,queryjump,genomejump,false);
3428     */
3429 #endif
3430     *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3431     *finalscore = NEG_INFINITY_32;
3432     *introntype = NONINTRON;
3433     return (List_T) NULL;
3434   }
3435 
3436   rev_rsequence = &(rsequence[rlength-1]);
3437   rev_rsequenceuc = &(rsequenceuc[rlength-1]);
3438   debug(printf("At query offset %d-%d, %.*s\n",roffset,roffset+rlength-1,rlength,rsequence));
3439 
3440   rev_roffset = roffset+rlength-1;
3441 
3442   gsequenceL = (char *) MALLOCA((glengthL+1) * sizeof(char));
3443   gsequenceL_alt = (char *) MALLOCA((glengthL+1) * sizeof(char));
3444   rev_gsequenceR = (char *) MALLOCA((glengthR+1) * sizeof(char));
3445   rev_gsequenceR_alt = (char *) MALLOCA((glengthR+1) * sizeof(char));
3446 
3447   if (watsonp) {
3448     Genome_get_segment_blocks_right(gsequenceL,gsequenceL_alt,/*left*/chroffset+goffsetL,
3449                                     glengthL,chrhigh,/*revcomp*/false);
3450     Genome_get_segment_blocks_left(rev_gsequenceR,rev_gsequenceR_alt,/*right*/chroffset+rev_goffsetR+1,
3451                                    glengthR,chroffset,/*revcomp*/false);
3452   } else {
3453     Genome_get_segment_blocks_left(gsequenceL,gsequenceL_alt,/*right*/chrhigh-goffsetL+1,
3454                                    glengthL,chroffset,/*revcomp*/true);
3455     Genome_get_segment_blocks_right(rev_gsequenceR,rev_gsequenceR_alt,/*left*/chrhigh-rev_goffsetR,
3456                                     glengthR,chrhigh,/*revcomp*/true);
3457   }
3458   if (gsequenceL[0] == '\0' || rev_gsequenceR[0] == '\0') {
3459     FREEA(rev_gsequenceR_alt);
3460     FREEA(rev_gsequenceR);
3461     FREEA(gsequenceL_alt);
3462     FREEA(gsequenceL);
3463     *finalscore = NEG_INFINITY_32;
3464     return (List_T) NULL;
3465   }
3466 
3467 
3468   debug(printf("At genomic offset %d-%d, %.*s\n",goffsetL,goffsetL+glengthL-1,glengthL,gsequenceL));
3469   debug(printf("At genomic offset %d-%d, %.*s\n",rev_goffsetR-glengthR+1,rev_goffsetR,glengthR,rev_gsequenceR));
3470 
3471   /* In low-identity alignments, the simple procedure can
3472      lead to multiple mismatches, which will invalidate the intron
3473      because of its neighborhood */
3474   if (finalp == false && defect_rate < DEFECT_MEDQ &&
3475       (pairs = genome_gap_simple(&(*finalscore),&(*introntype),&(*new_leftgenomepos),&(*new_rightgenomepos),
3476 				 &(*left_prob),&(*right_prob),&(*exonhead),&(*nmatches),&(*nmismatches),
3477 				 rsequence,rsequenceuc,rev_rsequence,rev_rsequenceuc,
3478 				 rlength,gsequenceL,gsequenceL_alt,
3479 				 &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3480 				 roffset,rev_roffset,goffsetL,rev_goffsetR,
3481 				 chrnum,chroffset,chrhigh,pairpool,mismatchtype,
3482 				 cdna_direction,watsonp,genestrand,*dynprogindex,halfp)) != NULL) {
3483     debug(printf("simple procedure worked\n"));
3484 
3485     FREEA(rev_gsequenceR_alt);
3486     FREEA(rev_gsequenceR);
3487     FREEA(gsequenceL_alt);
3488     FREEA(gsequenceL);
3489 
3490     *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3491     return pairs;		/* Already reversed */
3492   }
3493 
3494 
3495 #if defined(HAVE_SSE2)
3496   /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
3497   if (rlength < use8p_size[mismatchtype] || (glengthL < use8p_size[mismatchtype] && glengthR < use8p_size[mismatchtype])) {
3498     use8p = true;
3499   } else {
3500     use8p = false;
3501   }
3502 #endif
3503 
3504 #if defined(HAVE_SSE2)
3505   if (use8p == true) {
3506     Dynprog_compute_bands(&lbandL,&ubandL,rlength,glengthL,extraband_paired,/*widebandp*/true);
3507     debug3(printf("Computing matrix8L_upper\n"));
3508     matrix8L_upper = Dynprog_simd_8_upper(&directions8L_upper_nogap,&directions8L_upper_Egap,dynprogL,
3509 					  rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
3510 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3511 					  goffsetL,chroffset,chrhigh,watsonp,
3512 #endif
3513 					  mismatchtype,open,extend,ubandL,jump_late_p,/*revp*/false);
3514 
3515     debug3(printf("Computing matrix8L_lower\n"));
3516     matrix8L_lower = Dynprog_simd_8_lower(&directions8L_lower_nogap,&directions8L_lower_Egap,dynprogL,
3517 					  rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
3518 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3519 					  goffsetL,chroffset,chrhigh,watsonp,
3520 #endif
3521 					  mismatchtype,open,extend,lbandL,jump_late_p,/*revp*/false);
3522 
3523 
3524     Dynprog_compute_bands(&lbandR,&ubandR,rlength,glengthR,extraband_paired,/*widebandp*/true);
3525     debug3(printf("Computing matrix8R_upper\n"));
3526     matrix8R_upper = Dynprog_simd_8_upper(&directions8R_upper_nogap,&directions8R_upper_Egap,dynprogR,
3527 					  rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3528 					  rlength,glengthR,
3529 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3530 					  rev_goffsetR,chroffset,chrhigh,watsonp,
3531 #endif
3532 					  mismatchtype,open,extend,ubandR,/*for revp true*/!jump_late_p,/*revp*/true);
3533 
3534     debug3(printf("Computing matrix8R_lower\n"));
3535     matrix8R_lower = Dynprog_simd_8_lower(&directions8R_lower_nogap,&directions8R_lower_Egap,dynprogR,
3536 					  rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3537 					  rlength,glengthR,
3538 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3539 					  rev_goffsetR,chroffset,chrhigh,watsonp,
3540 #endif
3541 					  mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
3542 
3543     if ((*finalscore = bridge_intron_gap_8_ud(&bestrL,&bestrR,&bestcL,&bestcR,
3544 					      &(*introntype),&(*left_prob),&(*right_prob),
3545 					      matrix8L_upper,matrix8L_lower,matrix8R_upper,matrix8R_lower,
3546 					      directions8L_upper_nogap,directions8L_lower_nogap,
3547 					      directions8R_upper_nogap,directions8R_lower_nogap,
3548 					      gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3549 					      goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
3550 					      cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
3551 					      goffsetL,rev_goffsetR,
3552 					      chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
3553       FREEA(rev_gsequenceR_alt);
3554       FREEA(rev_gsequenceR);
3555       FREEA(gsequenceL_alt);
3556       FREEA(gsequenceL);
3557 
3558       return (List_T) NULL;
3559 
3560     } else {
3561       *new_leftgenomepos = goffsetL+(bestcL-1);
3562       *new_rightgenomepos = rev_goffsetR-(bestcR-1);
3563       debug(printf("New leftgenomepos = %d, New rightgenomepos = %d\n",*new_leftgenomepos,*new_rightgenomepos));
3564 
3565       *exonhead = rev_roffset-(bestrR-1);
3566 
3567       if (bestcR >= bestrR) {
3568 	pairs = Dynprog_traceback_8_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3569 					  directions8R_upper_nogap,directions8R_upper_Egap,
3570 					  bestrR,bestcR,rev_rsequence,rev_rsequenceuc,
3571 					  &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3572 					  rev_roffset,rev_goffsetR,pairpool,/*revp*/true,
3573 					  chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3574       } else {
3575 	pairs = Dynprog_traceback_8_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3576 					  directions8R_lower_nogap,directions8R_lower_Egap,
3577 					  bestrR,bestcR,rev_rsequence,rev_rsequenceuc,
3578 					  &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3579 					  rev_roffset,rev_goffsetR,pairpool,genestrand,/*revp*/true,
3580 					  *dynprogindex);
3581       }
3582       pairs = List_reverse(pairs);
3583 
3584       /* queryjump = (rev_roffset-bestrR) - (roffset+bestrL) + 1; */
3585       /* genomejump = (rev_goffsetR-bestcR) - (goffsetL+bestcL) + 1; */
3586       /* No need to revise queryjump or genomejump, because the above
3587 	 coordinates are internal to the gap. */
3588 
3589       debug(printf("Pushing a gap with genomejump %d, introntype %s, prob %f and %f\n",
3590 		   (*new_rightgenomepos)-(*new_leftgenomepos)-1,
3591 		   Intron_type_string(*introntype),*left_prob,*right_prob));
3592 
3593 #ifndef NOGAPHOLDER
3594       pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/(rev_roffset-bestrR) - (roffset+bestrL) + 1,
3595 				      /*genomejump*/(*new_rightgenomepos)-(*new_leftgenomepos)-1,
3596 				      /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
3597       gappair = (Pair_T) pairs->first;
3598       gappair->introntype = *introntype;
3599       gappair->donor_prob = *left_prob;
3600       gappair->acceptor_prob = *right_prob;
3601 #endif
3602 
3603       if (bestcL >= bestrL) {
3604 	pairs = Dynprog_traceback_8_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3605 					  directions8L_upper_nogap,directions8L_upper_Egap,
3606 					  bestrL,bestcL,rsequence,rsequenceuc,gsequenceL,gsequenceL_alt,
3607 					  roffset,goffsetL,pairpool,/*revp*/false,
3608 					  chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3609       } else {
3610 	pairs = Dynprog_traceback_8_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3611 					  directions8L_lower_nogap,directions8L_lower_Egap,
3612 					  bestrL,bestcL,rsequence,rsequenceuc,gsequenceL,gsequenceL_alt,
3613 					  roffset,goffsetL,pairpool,genestrand,/*revp*/false,
3614 					  *dynprogindex);
3615       }
3616 
3617       if (List_length(pairs) == 1) {
3618 	/* Only a gap inserted */
3619 	pairs = (List_T) NULL;
3620       }
3621 
3622       FREEA(rev_gsequenceR_alt);
3623       FREEA(rev_gsequenceR);
3624       FREEA(gsequenceL_alt);
3625       FREEA(gsequenceL);
3626 
3627       debug(printf("End of dynprog genome gap\n"));
3628 
3629       *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3630       debug3(Pair_dump_list(pairs,true));
3631       debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
3632 
3633       if (Pair_maxnegscore(pairs) < -10) {
3634 	*finalscore = -100;	/* Otherwise calling procedure will act on finalscore */
3635 	return (List_T) NULL;
3636       } else {
3637 	return List_reverse(pairs);
3638       }
3639     }
3640 
3641   } else {
3642     /* Use 16-mers */
3643     Dynprog_compute_bands(&lbandL,&ubandL,rlength,glengthL,extraband_paired,/*widebandp*/true);
3644     debug3(printf("Computing matrix16L_upper\n"));
3645     matrix16L_upper = Dynprog_simd_16_upper(&directions16L_upper_nogap,&directions16L_upper_Egap,dynprogL,
3646 					    rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
3647 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3648 					    goffsetL,chroffset,chrhigh,watsonp,
3649 #endif
3650 					    mismatchtype,open,extend,ubandL,jump_late_p,/*revp*/false);
3651 
3652     debug3(printf("Computing matrix16L_lower\n"));
3653     matrix16L_lower = Dynprog_simd_16_lower(&directions16L_lower_nogap,&directions16L_lower_Egap,dynprogL,
3654 					    rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
3655 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3656 					    goffsetL,chroffset,chrhigh,watsonp,
3657 #endif
3658 					    mismatchtype,open,extend,lbandL,jump_late_p,/*revp*/false);
3659 
3660     Dynprog_compute_bands(&lbandR,&ubandR,rlength,glengthR,extraband_paired,/*widebandp*/true);
3661     debug3(printf("Computing matrix16R_upper\n"));
3662     matrix16R_upper = Dynprog_simd_16_upper(&directions16R_upper_nogap,&directions16R_upper_Egap,dynprogR,
3663 					    rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3664 					    rlength,glengthR,
3665 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3666 					    rev_goffsetR,chroffset,chrhigh,watsonp,
3667 #endif
3668 					    mismatchtype,open,extend,ubandR,/*for revp true*/!jump_late_p,/*revp*/true);
3669 
3670     debug3(printf("Computing matrix16R_lower\n"));
3671     matrix16R_lower = Dynprog_simd_16_lower(&directions16R_lower_nogap,&directions16R_lower_Egap,dynprogR,
3672 					    rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3673 					    rlength,glengthR,
3674 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3675 					    rev_goffsetR,chroffset,chrhigh,watsonp,
3676 #endif
3677 					    mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
3678 
3679     if ((*finalscore = bridge_intron_gap_16_ud(&bestrL,&bestrR,&bestcL,&bestcR,
3680 					       &(*introntype),&(*left_prob),&(*right_prob),
3681 					       matrix16L_upper,matrix16L_lower,matrix16R_upper,matrix16R_lower,
3682 					       directions16L_upper_nogap,directions16L_lower_nogap,
3683 					       directions16R_upper_nogap,directions16R_lower_nogap,
3684 					       gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3685 					       goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
3686 					       cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
3687 					       goffsetL,rev_goffsetR,
3688 					       chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
3689 
3690       FREEA(rev_gsequenceR_alt);
3691       FREEA(rev_gsequenceR);
3692       FREEA(gsequenceL_alt);
3693       FREEA(gsequenceL);
3694 
3695       return (List_T) NULL;
3696 
3697     } else {
3698       *new_leftgenomepos = goffsetL+(bestcL-1);
3699       *new_rightgenomepos = rev_goffsetR-(bestcR-1);
3700       debug(printf("New leftgenomepos = %d, New rightgenomepos = %d\n",*new_leftgenomepos,*new_rightgenomepos));
3701 
3702       *exonhead = rev_roffset-(bestrR-1);
3703 
3704       if (bestcR >= bestrR) {
3705 	pairs = Dynprog_traceback_16_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3706 					   directions16R_upper_nogap,directions16R_upper_Egap,
3707 					   bestrR,bestcR,rev_rsequence,rev_rsequenceuc,
3708 					   &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3709 					   rev_roffset,rev_goffsetR,pairpool,/*revp*/true,
3710 					   chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3711       } else {
3712 	pairs = Dynprog_traceback_16_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3713 					   directions16R_lower_nogap,directions16R_lower_Egap,
3714 					   bestrR,bestcR,rev_rsequence,rev_rsequenceuc,
3715 					   &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3716 					   rev_roffset,rev_goffsetR,pairpool,genestrand,/*revp*/true,
3717 					   *dynprogindex);
3718       }
3719       pairs = List_reverse(pairs);
3720 
3721       /* queryjump = (rev_roffset-bestrR) - (roffset+bestrL) + 1; */
3722       /* genomejump = (rev_goffsetR-bestcR) - (goffsetL+bestcL) + 1; */
3723       /* No need to revise queryjump or genomejump, because the above
3724 	 coordinates are internal to the gap. */
3725 
3726       debug(printf("Pushing a gap with genomejump %d, introntype %s, prob %f and %f\n",
3727 		   (*new_rightgenomepos)-(*new_leftgenomepos)-1,
3728 		   Intron_type_string(*introntype),*left_prob,*right_prob));
3729 #ifndef NOGAPHOLDER
3730       pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/(rev_roffset-bestrR) - (roffset+bestrL) + 1,
3731 				      /*genomejump*/(*new_rightgenomepos)-(*new_leftgenomepos)-1,
3732 				      /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
3733       gappair = (Pair_T) pairs->first;
3734       gappair->introntype = *introntype;
3735       gappair->donor_prob = *left_prob;
3736       gappair->acceptor_prob = *right_prob;
3737 #endif
3738 
3739       if (bestcL >= bestrL) {
3740 	pairs = Dynprog_traceback_16_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3741 					   directions16L_upper_nogap,directions16L_upper_Egap,
3742 					   bestrL,bestcL,rsequence,rsequenceuc,
3743 					   gsequenceL,gsequenceL_alt,roffset,goffsetL,pairpool,/*revp*/false,
3744 					   chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3745       } else {
3746 	pairs = Dynprog_traceback_16_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3747 					   directions16L_lower_nogap,directions16L_lower_Egap,
3748 					   bestrL,bestcL,rsequence,rsequenceuc,
3749 					   gsequenceL,gsequenceL_alt,roffset,goffsetL,pairpool,
3750 					   genestrand,/*revp*/false,*dynprogindex);
3751       }
3752 
3753       if (List_length(pairs) == 1) {
3754 	/* Only a gap inserted */
3755 	pairs = (List_T) NULL;
3756       }
3757 
3758       FREEA(rev_gsequenceR_alt);
3759       FREEA(rev_gsequenceR);
3760       FREEA(gsequenceL_alt);
3761       FREEA(gsequenceL);
3762 
3763       debug(printf("End of dynprog genome gap\n"));
3764 
3765       *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3766       debug3(Pair_dump_list(pairs,true));
3767       debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
3768       if (Pair_maxnegscore(pairs) < -10) {
3769 	*finalscore = -100;	/* Otherwise calling procedure will act on finalscore */
3770 	return (List_T) NULL;
3771       } else {
3772 	return List_reverse(pairs);
3773       }
3774     }
3775 
3776   }
3777 
3778 #else
3779   /* Non-SIMD methods */
3780   Dynprog_compute_bands(&lbandL,&ubandL,rlength,glengthL,extraband_paired,/*widebandp*/true);
3781   debug3(printf("Computing matrixL\n"));
3782   matrixL = Dynprog_standard(&directionsL_nogap,&directionsL_Egap,&directionsL_Fgap,dynprogL,
3783 			     rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
3784 			     goffsetL,chroffset,chrhigh,watsonp,
3785 			     mismatchtype,open,extend,lbandL,ubandL,
3786 			     jump_late_p,/*revp*/false,/*saturation*/NEG_INFINITY_INT,
3787 			     /*upperp*/true,/*lowerp*/true);
3788 
3789   Dynprog_compute_bands(&lbandR,&ubandR,rlength,glengthR,extraband_paired,/*widebandp*/true);
3790   debug3(printf("Computing matrixR\n"));
3791   matrixR = Dynprog_standard(&directionsR_nogap,&directionsR_Egap,&directionsR_Fgap,dynprogR,
3792 			     rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3793 			     rlength,glengthR,rev_goffsetR,chroffset,chrhigh,watsonp,
3794 			     mismatchtype,open,extend,lbandL,ubandR,
3795 			     /*for revp true*/!jump_late_p,/*revp*/true,/*saturation*/NEG_INFINITY_INT,
3796 			     /*upperp*/true,/*lowerp*/true);
3797 
3798   if ((*finalscore = bridge_intron_gap(&bestrL,&bestrR,&bestcL,&bestcR,
3799 				       &(*introntype),&(*left_prob),&(*right_prob),
3800 				       matrixL,matrixR,directionsL_nogap,directionsR_nogap,
3801 				       gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3802 				       goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
3803 				       cdna_direction,watsonp,extraband_paired,goffsetL,rev_goffsetR,
3804 				       chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
3805 
3806     FREEA(gsequenceL_alt);
3807     FREEA(rev_gsequenceR_alt);
3808     FREEA(gsequenceL);
3809     FREEA(rev_gsequenceR);
3810 
3811     return (List_T) NULL;
3812 
3813   } else {
3814     *new_leftgenomepos = goffsetL+(bestcL-1);
3815     *new_rightgenomepos = rev_goffsetR-(bestcR-1);
3816     debug(printf("New leftgenomepos = %d, New rightgenomepos = %d\n",*new_leftgenomepos,*new_rightgenomepos));
3817 
3818     *exonhead = rev_roffset-(bestrR-1);
3819 
3820     pairs = Dynprog_traceback_std(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3821 				  directionsR_nogap,directionsR_Egap,directionsR_Fgap,bestrR,bestcR,
3822 				  rev_rsequence,rev_rsequenceuc,
3823 				  &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3824 				  rev_roffset,rev_goffsetR,pairpool,/*revp*/true,
3825 				  chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3826     pairs = List_reverse(pairs);
3827 
3828     /* queryjump = (rev_roffset-bestrR) - (roffset+bestrL) + 1; */
3829     /* genomejump = (rev_goffsetR-bestcR) - (goffsetL+bestcL) + 1; */
3830     /* No need to revise queryjump or genomejump, because the above
3831        coordinates are internal to the gap. */
3832 
3833     debug(printf("Pushing a gap with genomejump %d, introntype %s, prob %f and %f\n",
3834 		 (*new_rightgenomepos)-(*new_leftgenomepos)-1,
3835 		 Intron_type_string(*introntype),*left_prob,*right_prob));
3836 #ifndef NOGAPHOLDER
3837     pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/(rev_roffset-bestrR) - (roffset+bestrL) + 1,
3838 				    /*genomejump*/(*new_rightgenomepos)-(*new_leftgenomepos)-1,
3839 				    /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
3840     gappair = (Pair_T) pairs->first;
3841     gappair->introntype = *introntype;
3842     gappair->donor_prob = *left_prob;
3843     gappair->acceptor_prob = *right_prob;
3844 #endif
3845 
3846     pairs = Dynprog_traceback_std(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3847 				  directionsL_nogap,directionsL_Egap,directionsL_Fgap,bestrL,bestcL,
3848 				  rsequence,rsequenceuc,
3849 				  gsequenceL,gsequenceL_alt,roffset,goffsetL,pairpool,/*revp*/false,
3850 				  chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3851 
3852     if (List_length(pairs) == 1) {
3853       /* Only a gap inserted */
3854       pairs = (List_T) NULL;
3855     }
3856 
3857     FREEA(gsequenceL_alt);
3858     FREEA(rev_gsequenceR_alt);
3859     FREEA(gsequenceL);
3860     FREEA(rev_gsequenceR);
3861 
3862     debug(printf("End of dynprog genome gap\n"));
3863 
3864     *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3865     debug3(Pair_dump_list(pairs,true));
3866     debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
3867     if (Pair_maxnegscore(pairs) < -10) {
3868       *finalscore = -100;	/* Otherwise calling procedure will act on finalscore */
3869       return (List_T) NULL;
3870     } else {
3871       return List_reverse(pairs);
3872     }
3873   }
3874 #endif
3875 
3876 }
3877 
3878 
3879