1 static char rcsid[] = "$Id: dynprog_genome.c 218161 2019-01-17 05:57:37Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #include "dynprog_genome.h"
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <math.h> /* For ceil, log, pow */
11 #include <ctype.h> /* For tolower */
12
13
14 #include "bool.h"
15 #include "except.h"
16 #include "assert.h"
17 #include "mem.h"
18 #include "comp.h"
19 #include "pair.h"
20 #include "pairdef.h"
21 #include "intron.h"
22 #include "complement.h"
23 #include "maxent.h"
24 #include "maxent_hr.h"
25 #include "dynprog.h" /* For parameters */
26 #include "dynprog_simd.h"
27
28
29 #ifdef DEBUG
30 #define debug(x) x
31 #else
32 #define debug(x)
33 #endif
34
35 /* Prints all winning bridge scores */
36 #ifdef DEBUG3
37 #define debug3(x) x
38 #else
39 #define debug3(x)
40 #endif
41
42 /* Prints all losing bridge scores */
43 #ifdef DEBUG3A
44 #define debug3a(x) x
45 #else
46 #define debug3a(x)
47 #endif
48
49 /* Known splicing */
50 #ifdef DEBUG5
51 #define debug5(x) x
52 #else
53 #define debug5(x)
54 #endif
55
56 /* Getting genomic nt */
57 #ifdef DEBUG8
58 #define debug8(x) x
59 #else
60 #define debug8(x)
61 #endif
62
63 /* Splice site probabilities */
64 #ifdef DEBUG9
65 #define debug9(x) x
66 #else
67 #define debug9(x)
68 #endif
69
70 /* print_vector */
71 #ifdef DEBUG15
72 #define debug15(x) x
73 #else
74 #define debug15(x)
75 #endif
76
77
78 #define USE_SCOREI 1
79 /* #define USE_WEAK_SCOREI 1 */
80
81 #define PROB_CEILING 0.85
82 #define PROB_FLOOR 0.50
83 /* #define PROB_BAD 0.50 */
84
85 /* Prefer alternate intron to other non-canonicals, but don't
86 introduce mismatches or gaps to identify */
87 #ifdef USE_WEAK_SCOREI
88 #define CANONICAL_INTRON 6
89 #define GCAG_INTRON 4
90 #define ATAC_INTRON 2
91 #define FINAL_GCAG_INTRON 4 /* Amount above regular should approximately
92 match FINAL_CANONICAL_INTRON - CANONICAL_INTRON */
93 #define FINAL_ATAC_INTRON 2
94
95 #else
96 /* Values were 15, 12, 20, and 12 */
97 #define GCAG_INTRON 8
98 #define ATAC_INTRON 4
99 #define FINAL_GCAG_INTRON 10 /* Amount above regular should approximately
100 match FINAL_CANONICAL_INTRON - CANONICAL_INTRON */
101 #define FINAL_ATAC_INTRON 8
102 #endif
103
104
105 /* Don't want to make too high, otherwise we will harm evaluation of
106 dual introns vs. single intron */
107 /* Values were 10, 16, 22 */
108 #define CANONICAL_INTRON_HIGHQ 14 /* GT-AG */
109 #define CANONICAL_INTRON_MEDQ 18
110 #define CANONICAL_INTRON_LOWQ 22
111
112 /* Values were 30, 36, 42 */
113 #define FINAL_CANONICAL_INTRON_HIGHQ 16 /* GT-AG */
114 #define FINAL_CANONICAL_INTRON_MEDQ 20
115 #define FINAL_CANONICAL_INTRON_LOWQ 24
116
117 #define KNOWN_SPLICESITE_REWARD 20
118
119
120 #define T Dynprog_T
121
122 static bool novelsplicingp;
123
124 static IIT_T splicing_iit;
125 static int *splicing_divint_crosstable;
126 static int donor_typeint;
127 static int acceptor_typeint;
128
129 static int intron_score_array_sense_final[256];
130 static int intron_score_array_antisense_final[256];
131 static int intron_score_array_either_final[256];
132 static int intron_score_array_sense_prelim[256];
133 static int intron_score_array_antisense_prelim[256];
134 static int intron_score_array_either_prelim[256];
135
136
137 static void
intron_score_setup()138 intron_score_setup () {
139 memset(intron_score_array_sense_final,0,256*sizeof(int));
140 memset(intron_score_array_antisense_final,0,256*sizeof(int));
141 memset(intron_score_array_either_final,0,256*sizeof(int));
142 memset(intron_score_array_sense_prelim,0,256*sizeof(int));
143 memset(intron_score_array_antisense_prelim,0,256*sizeof(int));
144 memset(intron_score_array_either_prelim,0,256*sizeof(int));
145
146 /* Not handling PMAP yet */
147
148 intron_score_array_sense_final[GTAG_FWD] = FINAL_CANONICAL_INTRON_HIGHQ;
149 intron_score_array_sense_final[GCAG_FWD] = FINAL_GCAG_INTRON;
150 intron_score_array_sense_final[ATAC_FWD] = FINAL_ATAC_INTRON;
151
152 intron_score_array_sense_prelim[GTAG_FWD] = CANONICAL_INTRON_HIGHQ;
153 intron_score_array_sense_prelim[GCAG_FWD] = GCAG_INTRON;
154 intron_score_array_sense_prelim[ATAC_FWD] = ATAC_INTRON;
155
156
157 intron_score_array_antisense_final[GTAG_REV] = FINAL_CANONICAL_INTRON_HIGHQ;
158 intron_score_array_antisense_final[GCAG_REV] = FINAL_GCAG_INTRON;
159 intron_score_array_antisense_final[ATAC_REV] = FINAL_ATAC_INTRON;
160
161 intron_score_array_antisense_prelim[GTAG_REV] = CANONICAL_INTRON_HIGHQ;
162 intron_score_array_antisense_prelim[GCAG_REV] = GCAG_INTRON;
163 intron_score_array_antisense_prelim[ATAC_REV] = ATAC_INTRON;
164
165
166 intron_score_array_either_final[GTAG_FWD] = FINAL_CANONICAL_INTRON_HIGHQ;
167 intron_score_array_either_final[GCAG_FWD] = FINAL_GCAG_INTRON;
168 intron_score_array_either_final[ATAC_FWD] = FINAL_ATAC_INTRON;
169 intron_score_array_either_final[GTAG_REV] = CANONICAL_INTRON_HIGHQ;
170 intron_score_array_either_final[GCAG_REV] = FINAL_GCAG_INTRON;
171 intron_score_array_either_final[ATAC_REV] = FINAL_ATAC_INTRON;
172
173 intron_score_array_either_prelim[GTAG_FWD] = FINAL_CANONICAL_INTRON_HIGHQ;
174 intron_score_array_either_prelim[GCAG_FWD] = GCAG_INTRON;
175 intron_score_array_either_prelim[ATAC_FWD] = ATAC_INTRON;
176 intron_score_array_either_prelim[GTAG_REV] = CANONICAL_INTRON_HIGHQ;
177 intron_score_array_either_prelim[GCAG_REV] = GCAG_INTRON;
178 intron_score_array_either_prelim[ATAC_REV] = ATAC_INTRON;
179
180 return;
181 }
182
183
184
185 void
Dynprog_genome_setup(bool novelsplicingp_in,IIT_T splicing_iit_in,int * splicing_divint_crosstable_in,int donor_typeint_in,int acceptor_typeint_in)186 Dynprog_genome_setup (bool novelsplicingp_in,
187 IIT_T splicing_iit_in, int *splicing_divint_crosstable_in,
188 int donor_typeint_in, int acceptor_typeint_in) {
189 novelsplicingp = novelsplicingp_in;
190
191 splicing_iit = splicing_iit_in;
192 splicing_divint_crosstable = splicing_divint_crosstable_in;
193 donor_typeint = donor_typeint_in;
194 acceptor_typeint = acceptor_typeint_in;
195
196 intron_score_setup();
197
198 return;
199 }
200
201
202 #if 0
203 /************************************************************************
204 * get_genomic_nt
205 ************************************************************************/
206
207 static char complCode[128] = COMPLEMENT_LC;
208
209 static char
210 get_genomic_nt (char *g_alt, int genomicpos, Univcoord_T chroffset, Univcoord_T chrhigh,
211 bool watsonp) {
212 char c2, c2_alt;
213 Univcoord_T pos;
214
215 #if 0
216 /* If the read has a deletion, then we will extend beyond 0 or genomiclength, so do not restrict. */
217 if (genomicpos < 0) {
218 return '*';
219
220 } else if (genomicpos >= genomiclength) {
221 return '*';
222
223 }
224 #endif
225
226 if (watsonp) {
227 if ((pos = chroffset + genomicpos) < chroffset) { /* Must be <, and not <=, or dynamic programming will fail */
228 *g_alt = '*';
229 return '*';
230
231 } else if (pos >= chrhigh) {
232 *g_alt = '*';
233 return '*';
234
235 #if 0
236 } else if (genome) {
237 /* Not necessary, because Genome_get_char_blocks should work */
238 debug8(printf("At %u, genomicnt is %c\n",
239 genomicpos,Genome_get_char(genome,pos)));
240 return Genome_get_char(genome,pos);
241 #endif
242
243 } else {
244 /* GMAP with user-supplied genomic segment */
245 debug8(printf("At %u, genomicnt is %c\n",
246 genomicpos,Genome_get_char_blocks(pos)));
247 return Genome_get_char_blocks(&(*g_alt),pos);
248 }
249
250 } else {
251 if ((pos = chrhigh - genomicpos) < chroffset) { /* Must be <, and not <=, or dynamic programming will fail */
252 *g_alt = '*';
253 return '*';
254
255 } else if (pos >= chrhigh) {
256 *g_alt = '*';
257 return '*';
258
259 #if 0
260 } else if (genome) {
261 /* Not necessary, because Genome_get_char_blocks should work */
262 c2 = Genome_get_char(genome,pos);
263 #endif
264
265 } else {
266 /* GMAP with user-supplied genomic segment */
267 c2 = Genome_get_char_blocks(&c2_alt,pos);
268 }
269 debug8(printf("At %u, genomicnt is %c\n",genomicpos,complCode[(int) c2]));
270 *g_alt = complCode[(int) c2_alt];
271 return complCode[(int) c2];
272 }
273 }
274 #endif
275
276
277 #if 0
278 /* Too slow for numerous calls. Using arrays above instead */
279 static int
280 intron_score (int *introntype, int leftdi, int rightdi, int cdna_direction, int canonical_reward,
281 bool finalp) {
282 int scoreI;
283
284 #ifdef USE_WEAK_SCOREI
285 canonical_reward = CANONICAL_INTRON;
286 #endif
287
288 #ifdef PMAP
289 if ((*introntype = leftdi & rightdi) == NONINTRON) {
290 scoreI = 0.0;
291 } else {
292 switch (*introntype) {
293 case GTAG_FWD: scoreI = canonical_reward; break;
294 case GCAG_FWD: scoreI = finalp == true ? FINAL_GCAG_INTRON : GCAG_INTRON; break;
295 case ATAC_FWD: scoreI = finalp == true ? FINAL_ATAC_INTRON : ATAC_INTRON; break;
296 default: *introntype = NONINTRON; scoreI = 0.0;
297 }
298 }
299 #else
300 if ((*introntype = leftdi & rightdi) == NONINTRON) {
301 scoreI = 0.0;
302 } else if (cdna_direction > 0) {
303 switch (*introntype) {
304 case GTAG_FWD: scoreI = canonical_reward; break;
305 case GCAG_FWD: scoreI = finalp == true ? FINAL_GCAG_INTRON : GCAG_INTRON; break;
306 case ATAC_FWD: scoreI = finalp == true ? FINAL_ATAC_INTRON : ATAC_INTRON; break;
307 default: *introntype = NONINTRON; scoreI = 0.0;
308 }
309 } else if (cdna_direction < 0) {
310 switch (*introntype) {
311 case GTAG_REV: scoreI = canonical_reward; break;
312 case GCAG_REV: scoreI = finalp == true ? FINAL_GCAG_INTRON : GCAG_INTRON; break;
313 case ATAC_REV: scoreI = finalp == true ? FINAL_ATAC_INTRON : ATAC_INTRON; break;
314 default: *introntype = NONINTRON; scoreI = 0.0;
315 }
316 } else {
317 switch (*introntype) {
318 case GTAG_FWD: case GTAG_REV: scoreI = canonical_reward; break;
319 case GCAG_FWD: case GCAG_REV: scoreI = finalp == true ? FINAL_GCAG_INTRON : GCAG_INTRON; break;
320 case ATAC_FWD: case ATAC_REV: scoreI = finalp == true ? FINAL_ATAC_INTRON : ATAC_INTRON; break;
321 default: *introntype = NONINTRON; scoreI = 0.0;
322 }
323 }
324 #endif
325
326 return scoreI;
327 }
328 #endif
329
330
331
332 static void
get_splicesite_probs(double * left_prob,double * right_prob,int cL,int cR,int * left_known,int * right_known,Univcoord_T leftoffset,Univcoord_T rightoffset,Univcoord_T chroffset,Univcoord_T chrhigh,int cdna_direction,bool watsonp)333 get_splicesite_probs (double *left_prob, double *right_prob, int cL, int cR,
334 int *left_known, int *right_known, Univcoord_T leftoffset, Univcoord_T rightoffset,
335 Univcoord_T chroffset, Univcoord_T chrhigh, int cdna_direction, bool watsonp) {
336 Univcoord_T splicesitepos;
337
338 if (left_known[cL] > 0) {
339 debug9(printf("left position is known, so prob is 1.0\n"));
340 *left_prob = 1.0;
341
342 } else if (watsonp == true) {
343 splicesitepos = chroffset + leftoffset + cL;
344 if (cdna_direction > 0) {
345 *left_prob = Maxent_hr_donor_prob(splicesitepos,chroffset);
346 debug9(printf("1. donor splicesitepos is %u, prob %f, known %d\n",
347 splicesitepos,*left_prob,left_known[cL]));
348
349 } else {
350 *left_prob = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
351 debug9(printf("2. antiacceptor splicesitepos is %u, prob %f, known %d\n",
352 splicesitepos,*left_prob,left_known[cL]));
353
354 }
355 } else {
356 splicesitepos = chrhigh - leftoffset - cL + 1;
357 if (cdna_direction > 0) {
358 *left_prob = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
359 debug9(printf("3. antidonor splicesitepos is %u, prob %f, known %d\n",
360 splicesitepos,*left_prob,left_known[cL]));
361
362 } else {
363 *left_prob = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
364 debug9(printf("4. acceptor splicesitepos is %u, prob %f, known %d\n",
365 splicesitepos,*left_prob,left_known[cL]));
366 }
367 }
368
369 if (right_known[cR] > 0) {
370 debug9(printf("right position is known, so prob is 1.0\n"));
371 *right_prob = 1.0;
372
373 } else if (watsonp == true) {
374 splicesitepos = chroffset + rightoffset - cR + 1;
375 if (cdna_direction > 0) {
376 *right_prob = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
377 debug9(printf("5. acceptor splicesitepos is %u, prob %f, known %d\n",
378 splicesitepos,*right_prob,right_known[cR]));
379 } else {
380 *right_prob = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
381 debug9(printf("6. antidonor splicesitepos is %u, prob %f, known %d\n",
382 splicesitepos,*right_prob,right_known[cR]));
383
384 }
385 } else {
386 splicesitepos = chrhigh - rightoffset + cR;
387 if (cdna_direction > 0) {
388 *right_prob = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
389 debug9(printf("7. antiacceptor splicesitepos is %u, prob %f, known %d\n",
390 splicesitepos,*right_prob,right_known[cR]));
391
392 } else {
393 *right_prob = Maxent_hr_donor_prob(splicesitepos,chroffset);
394 debug9(printf("8. donor splicesitepos is %u, prob %f, known %d\n",
395 splicesitepos,*right_prob,right_known[cR]));
396 }
397 }
398
399 return;
400 }
401
402
403 static void
get_known_splicesites(int * left_known,int * right_known,int glengthL,int glengthR,int leftoffset,int rightoffset,int cdna_direction,bool watsonp,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh)404 get_known_splicesites (int *left_known, int *right_known, int glengthL, int glengthR,
405 int leftoffset, int rightoffset, int cdna_direction, bool watsonp,
406 Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh) {
407
408 int *matches, nmatches, i;
409 Univcoord_T splicesitepos;
410
411 if (splicing_iit != NULL && donor_typeint >= 0 && acceptor_typeint >= 0) {
412 /* Handle known splicing, splice site level */
413 if (watsonp == true) {
414 if (cdna_direction > 0) {
415 /* splicesitepos = leftoffset + cL; cL = 0 to < glengthL - 1 */
416 matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
417 leftoffset+1,leftoffset+glengthL-2,donor_typeint,/*sign*/+1,/*sortp*/false);
418 for (i = 0; i < nmatches; i++) {
419 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
420 debug5(printf("1. Found known donor at %u\n",splicesitepos));
421 left_known[splicesitepos - leftoffset] = KNOWN_SPLICESITE_REWARD;
422 }
423 FREE(matches);
424
425 /* splicesitepos = rightoffset - cR + 1; cR = 0 to < glengthR - 1 */
426 matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
427 rightoffset-glengthR+4,rightoffset+1,acceptor_typeint,/*sign*/+1,/*sortp*/false);
428 for (i = 0; i < nmatches; i++) {
429 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
430 debug5(printf("2. Found known acceptor at %u\n",splicesitepos));
431 right_known[rightoffset - splicesitepos + 1] = KNOWN_SPLICESITE_REWARD;
432 }
433 FREE(matches);
434
435 } else {
436 /* splicesitepos = leftoffset + cL; cL = 0 to < glengthL - 1 */
437 matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
438 leftoffset+1,leftoffset+glengthL-2,acceptor_typeint,/*sign*/-1,/*sortp*/false);
439 for (i = 0; i < nmatches; i++) {
440 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
441 debug5(printf("3. Found known antiacceptor at %u\n",splicesitepos));
442 left_known[splicesitepos - leftoffset] = KNOWN_SPLICESITE_REWARD;
443 }
444 FREE(matches);
445
446 /* splicesitepos = rightoffset - cR + 1; cR = 0 to < glengthR - 1 */
447 matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
448 rightoffset-glengthR+4,rightoffset+1,donor_typeint,/*sign*/-1,/*sortp*/false);
449 for (i = 0; i < nmatches; i++) {
450 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
451 debug5(printf("4. Found known antidonor at %u\n",splicesitepos));
452 right_known[rightoffset - splicesitepos + 1] = KNOWN_SPLICESITE_REWARD;
453 }
454 FREE(matches);
455
456 }
457
458 } else {
459 if (cdna_direction > 0) {
460 /* splicesitepos = (chrhigh - chroffset) - leftoffset - cL + 1; cL = 0 to < glengthL - 1 */
461 matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
462 (chrhigh - chroffset) - leftoffset - glengthL + 4,
463 (chrhigh - chroffset) - leftoffset + 1,
464 donor_typeint,/*sign*/-1,/*sortp*/false);
465 for (i = 0; i < nmatches; i++) {
466 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
467 debug5(printf("5. Found known antidonor at %u\n",splicesitepos));
468 left_known[(chrhigh - chroffset) - leftoffset - splicesitepos + 1] = KNOWN_SPLICESITE_REWARD;
469 }
470 FREE(matches);
471
472 /* splicesitepos = (chrhigh - chroffset) - rightoffset + cR; cR = 0 to < glengthR - 1 */
473 matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
474 (chrhigh - chroffset) - rightoffset + 1,
475 (chrhigh - chroffset) - rightoffset + glengthR - 2,
476 acceptor_typeint,/*sign*/-1,/*sortp*/false);
477 for (i = 0; i < nmatches; i++) {
478 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
479 debug5(printf("6. Found known antiacceptor at %u\n",splicesitepos));
480 right_known[splicesitepos - (chrhigh - chroffset) + rightoffset] = KNOWN_SPLICESITE_REWARD;
481 }
482 FREE(matches);
483
484 } else {
485 /* splicesitepos = (chrhigh - chroffset) - leftoffset - cL + 1; cL = 0 to < glengthL - 1 */
486 matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
487 (chrhigh - chroffset) - leftoffset - glengthL + 4,
488 (chrhigh - chroffset) - leftoffset + 1,
489 acceptor_typeint,/*sign*/+1,/*sortp*/false);
490 for (i = 0; i < nmatches; i++) {
491 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
492 debug5(printf("7. Found known acceptor at %u\n",splicesitepos));
493 left_known[(chrhigh - chroffset) - leftoffset - splicesitepos + 1] = KNOWN_SPLICESITE_REWARD;
494 }
495 FREE(matches);
496
497 /* splicesitepos = (chrhigh - chroffset) - rightoffset + cR; cR = 0 to < glengthR - 1 */
498 matches = IIT_get_typed_signed_with_divno(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
499 (chrhigh - chroffset) - rightoffset + 1,
500 (chrhigh - chroffset) - rightoffset + glengthR - 2,
501 donor_typeint,/*sign*/+1,/*sortp*/false);
502 for (i = 0; i < nmatches; i++) {
503 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
504 debug5(printf("8. Found known donor at %u\n",splicesitepos));
505 right_known[splicesitepos - (chrhigh - chroffset) + rightoffset] = KNOWN_SPLICESITE_REWARD;
506 }
507 FREE(matches);
508
509 }
510 }
511
512 } else if (splicing_iit != NULL) {
513 /* Handle known splicing, intron level */
514 if (watsonp == true) {
515 if (cdna_direction > 0) {
516 /* splicesitepos = leftoffset + cL; cL = 0 to < glengthL - 1 */
517 matches = IIT_get_lows_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
518 leftoffset,leftoffset+glengthL-2,/*sign*/+1);
519 for (i = 0; i < nmatches; i++) {
520 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
521 debug5(printf("1. Found known donor at %u\n",splicesitepos));
522 left_known[splicesitepos - leftoffset] = KNOWN_SPLICESITE_REWARD;
523 }
524 FREE(matches);
525
526 /* splicesitepos+1U = rightoffset - cR + 2; cR = 0 to < glengthR - 1 */
527 matches = IIT_get_highs_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
528 rightoffset-glengthR+4,rightoffset+2,/*sign*/+1);
529 for (i = 0; i < nmatches; i++) {
530 splicesitepos = IIT_interval_high(splicing_iit,matches[i]);
531 debug5(printf("2. Found known acceptor at %u\n",splicesitepos));
532 right_known[rightoffset - splicesitepos + 2] = KNOWN_SPLICESITE_REWARD;
533 }
534 FREE(matches);
535
536 } else {
537 /* splicesitepos = leftoffset + cL; cL = 0 to < glengthL - 1 */
538 matches = IIT_get_lows_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
539 leftoffset,leftoffset+glengthL-2,/*sign*/-1);
540 for (i = 0; i < nmatches; i++) {
541 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
542 debug5(printf("3. Found known antiacceptor at %u\n",splicesitepos));
543 left_known[splicesitepos - leftoffset] = KNOWN_SPLICESITE_REWARD;
544 }
545 FREE(matches);
546
547 /* splicesitepos+1U = rightoffset - cR + 2; cR = 0 to < glengthR - 1 */
548 matches = IIT_get_highs_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
549 rightoffset-glengthR+4,rightoffset+2,/*sign*/-1);
550 for (i = 0; i < nmatches; i++) {
551 splicesitepos = IIT_interval_high(splicing_iit,matches[i]);
552 debug5(printf("4. Found known antidonor at %u\n",splicesitepos));
553 right_known[rightoffset - splicesitepos + 2] = KNOWN_SPLICESITE_REWARD;
554 }
555 FREE(matches);
556
557 }
558 } else {
559 if (cdna_direction > 0) {
560 /* splicesitepos+1U = (chrhigh - chroffset) - leftoffset - cL + 2; cL = 0 to < glengthL - 1 */
561 matches = IIT_get_highs_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
562 (chrhigh - chroffset) - leftoffset - glengthL + 4,
563 (chrhigh - chroffset) - leftoffset + 2,/*sign*/-1);
564 for (i = 0; i < nmatches; i++) {
565 splicesitepos = IIT_interval_high(splicing_iit,matches[i]);
566 debug5(printf("5. Found known antidonor at %u\n",splicesitepos));
567 left_known[(chrhigh - chroffset) - leftoffset - splicesitepos + 2] = KNOWN_SPLICESITE_REWARD;
568 }
569 FREE(matches);
570
571 /* splicesitepos = (chrhigh - chroffset) - rightoffset + cR; cR = 0 to < glengthR - 1 */
572 matches = IIT_get_lows_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
573 (chrhigh - chroffset) - rightoffset,
574 (chrhigh - chroffset) - rightoffset + glengthR - 2,/*sign*/-1);
575 for (i = 0; i < nmatches; i++) {
576 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
577 debug5(printf("6. Found known antiacceptor at %u\n",splicesitepos));
578 right_known[splicesitepos - (chrhigh - chroffset) + rightoffset] = KNOWN_SPLICESITE_REWARD;
579 }
580 FREE(matches);
581
582 } else {
583 /* splicesitepos+1U = (chrhigh - chroffset) - leftoffset - cL + 2; cL = 0 to < glengthL - 1 */
584 matches = IIT_get_highs_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
585 (chrhigh - chroffset) - leftoffset - glengthL + 4,
586 (chrhigh - chroffset) - leftoffset + 2,/*sign*/+1);
587 for (i = 0; i < nmatches; i++) {
588 splicesitepos = IIT_interval_high(splicing_iit,matches[i]);
589 debug5(printf("7. Found known acceptor at %u\n",splicesitepos));
590 left_known[(chrhigh - chroffset) - leftoffset - splicesitepos + 2] = KNOWN_SPLICESITE_REWARD;
591 }
592 FREE(matches);
593
594 /* splicesitepos = (chrhigh - chroffset) - rightoffset + cR; cR = 0 to < glengthR - 1 */
595 matches = IIT_get_lows_signed(&nmatches,splicing_iit,splicing_divint_crosstable[chrnum],
596 (chrhigh - chroffset) - rightoffset,
597 (chrhigh - chroffset) - rightoffset + glengthR - 2,/*sign*/+1);
598 for (i = 0; i < nmatches; i++) {
599 splicesitepos = IIT_interval_low(splicing_iit,matches[i]);
600 debug5(printf("8. Found known donor at %u\n",splicesitepos));
601 right_known[splicesitepos - (chrhigh - chroffset) + rightoffset] = KNOWN_SPLICESITE_REWARD;
602 }
603 FREE(matches);
604 }
605 }
606 }
607
608 return;
609 }
610
611
612 #if defined(HAVE_SSE2)
613 /* Returns finalscore */
614 static int
bridge_intron_gap_8_intron_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,Score8_T ** matrixL_upper,Score8_T ** matrixL_lower,Score8_T ** matrixR_upper,Score8_T ** matrixR_lower,Direction8_T ** directionsL_upper_nogap,Direction8_T ** directionsL_lower_nogap,Direction8_T ** directionsR_upper_nogap,Direction8_T ** directionsR_lower_nogap,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool jump_late_p)615 bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR, int *best_introntype,
616 Score8_T **matrixL_upper, Score8_T **matrixL_lower,
617 Score8_T **matrixR_upper, Score8_T **matrixR_lower,
618 Direction8_T **directionsL_upper_nogap, Direction8_T **directionsL_lower_nogap,
619 Direction8_T **directionsR_upper_nogap, Direction8_T **directionsR_lower_nogap,
620 int *left_known, int *right_known,
621 int rlength, int glengthL, int glengthR,
622 int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
623 int leftoffset, int rightoffset,
624 Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
625 bool jump_late_p) {
626 int rL, rR, cL, cR;
627 int cloL, chighL;
628 int cloR, chighR;
629 int bestscore = NEG_INFINITY_8, score, scoreL, scoreI, scoreR;
630 Univcoord_T splicesitepos1, splicesitepos2;
631 bool bestp;
632
633 scoreI = 0; /* Because we constrain splices to given introns */
634
635 for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
636 debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
637 if ((cloL = rL - lbandL) < 1) {
638 cloL = 1;
639 }
640 if ((chighL = rL + ubandL) > glengthL-1) {
641 chighL = glengthL-1;
642 }
643
644 if ((cloR = rR - lbandR) < 1) {
645 cloR = 1;
646 }
647 if ((chighR = rR + ubandR) > glengthR-1) {
648 chighR = glengthR-1;
649 }
650
651 /* Test indels on left and right */
652 for (cL = cloL; cL < /* left of main diagonal*/rL; cL++) {
653 /* The following check limits genomic inserts (horizontal) and
654 multiple cDNA inserts (vertical). */
655 if (left_known[cL] > 0 && directionsL_lower_nogap[rL][cL] == DIAG) {
656 scoreL = (int) matrixL_lower[rL][cL];
657 #if 0
658 if (directionsL_lower_nogap[rL][cL] != DIAG) {
659 /* Favor gaps away from intron if possible */
660 scoreL -= 100;
661 }
662 #endif
663
664 /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
665 for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
666 if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG) {
667 scoreR = (int) matrixR_lower[rR][cR];
668 #if 0
669 if (directionsR_lower_nogap[rR][cR] != DIAG) {
670 /* Favor gaps away from intron if possible */
671 scoreR -= 100;
672 }
673 #endif
674
675 if ((score = scoreL + scoreI + scoreR) > bestscore ||
676 (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
677 bestp = false;
678 if (watsonp == true) {
679 splicesitepos1 = leftoffset + cL;
680 splicesitepos2 = rightoffset - cR + 1;
681 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
682 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
683 bestp = true;
684 }
685 } else {
686 splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
687 splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
688 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
689 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
690 bestp = true;
691 }
692 }
693 if (bestp == true) {
694 debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
695 cL,cR,scoreL,scoreR,score));
696 bestscore = score;
697 *bestrL = rL;
698 *bestrR = rR;
699 *bestcL = cL;
700 *bestcR = cR;
701 } else {
702 debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
703 cL,cR,scoreL,scoreR,score));
704 }
705 }
706 }
707 }
708
709 for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
710 if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
711 scoreR = (int) matrixR_upper[cR][rR];
712 #if 0
713 if (directionsR_upper_nogap[cR][rR] != DIAG) {
714 /* Favor gaps away from intron if possible */
715 scoreR -= 100;
716 }
717 #endif
718
719 if ((score = scoreL + scoreI + scoreR) > bestscore ||
720 (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
721 bestp = false;
722 if (watsonp == true) {
723 splicesitepos1 = leftoffset + cL;
724 splicesitepos2 = rightoffset - cR + 1;
725 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
726 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
727 bestp = true;
728 }
729 } else {
730 splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
731 splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
732 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
733 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
734 bestp = true;
735 }
736 }
737 if (bestp == true) {
738 debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
739 cL,cR,scoreL,scoreR,score));
740 bestscore = score;
741 *bestrL = rL;
742 *bestrR = rR;
743 *bestcL = cL;
744 *bestcR = cR;
745 } else {
746 debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
747 cL,cR,scoreL,scoreR,score));
748 }
749 }
750 }
751 }
752 }
753 }
754
755 for (/* at main diagonal*/; cL < chighL; cL++) {
756 /* The following check limits genomic inserts (horizontal) and
757 multiple cDNA inserts (vertical). */
758 if (left_known[cL] > 0 && directionsL_upper_nogap[cL][rL] == DIAG) {
759 scoreL = (int) matrixL_upper[cL][rL];
760 #if 0
761 if (directionsL_upper_nogap[cL][rL] != DIAG) {
762 /* Favor gaps away from intron if possible */
763 scoreL -= 100;
764 }
765 #endif
766
767 /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
768 for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
769 if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG) {
770 scoreR = (int) matrixR_lower[rR][cR];
771 #if 0
772 if (directionsR_lower_nogap[rR][cR] != DIAG) {
773 /* Favor gaps away from intron if possible */
774 scoreR -= 100;
775 }
776 #endif
777
778 if ((score = scoreL + scoreI + scoreR) > bestscore ||
779 (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
780 bestp = false;
781 if (watsonp == true) {
782 splicesitepos1 = leftoffset + cL;
783 splicesitepos2 = rightoffset - cR + 1;
784 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
785 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
786 bestp = true;
787 }
788 } else {
789 splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
790 splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
791 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
792 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
793 bestp = true;
794 }
795 }
796 if (bestp == true) {
797 debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
798 cL,cR,scoreL,scoreR,score));
799 bestscore = score;
800 *bestrL = rL;
801 *bestrR = rR;
802 *bestcL = cL;
803 *bestcR = cR;
804 } else {
805 debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
806 cL,cR,scoreL,scoreR,score));
807 }
808 }
809 }
810 }
811
812 for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
813 if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
814 scoreR = (int) matrixR_upper[cR][rR];
815 #if 0
816 if (directionsR_upper_nogap[cR][rR] != DIAG) {
817 /* Favor gaps away from intron if possible */
818 scoreR -= 100;
819 }
820 #endif
821
822 if ((score = scoreL + scoreI + scoreR) > bestscore ||
823 (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
824 bestp = false;
825 if (watsonp == true) {
826 splicesitepos1 = leftoffset + cL;
827 splicesitepos2 = rightoffset - cR + 1;
828 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
829 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
830 bestp = true;
831 }
832 } else {
833 splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
834 splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
835 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
836 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
837 bestp = true;
838 }
839 }
840 if (bestp == true) {
841 debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
842 cL,cR,scoreL,scoreR,score));
843 bestscore = score;
844 *bestrL = rL;
845 *bestrR = rR;
846 *bestcL = cL;
847 *bestcR = cR;
848 } else {
849 debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
850 cL,cR,scoreL,scoreR,score));
851 }
852 }
853 }
854 }
855 }
856 }
857 }
858
859 *best_introntype = NONINTRON;
860 return bestscore;
861 }
862
863
864 /* Returns finalscore */
865 static int
bridge_intron_gap_8_site_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,Score8_T ** matrixL_upper,Score8_T ** matrixL_lower,Score8_T ** matrixR_upper,Score8_T ** matrixR_lower,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp)866 bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
867 Score8_T **matrixL_upper, Score8_T **matrixL_lower,
868 Score8_T **matrixR_upper, Score8_T **matrixR_lower,
869 #if 0
870 Direction8_T **directionsL_upper_nogap, Direction8_T **directionsL_lower_nogap,
871 Direction8_T **directionsR_upper_nogap, Direction8_T **directionsR_lower_nogap,
872 int goffsetL, int rev_goffsetR, int canonical_reward,
873 #endif
874 char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
875 int *left_known, int *right_known, int rlength, int glengthL, int glengthR,
876 int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
877 int leftoffset, int rightoffset, Univcoord_T chroffset, Univcoord_T chrhigh,
878 bool halfp, bool finalp) {
879 int rL, rR, cL, cR;
880 int bestrL_with_dinucl, bestrR_with_dinucl, bestcL_with_dinucl, bestcR_with_dinucl;
881 int cloL, chighL;
882 int cloR, chighR;
883 /* int introntype; */
884 int bestscore = NEG_INFINITY_8, score, scoreL, scoreR, scoreI;
885 int bestscore_with_dinucl = NEG_INFINITY_8;
886 double *left_probabilities, *right_probabilities, probL, probR, bestprob_with_score, bestprob_with_dinucl;
887 Univcoord_T splicesitepos;
888 char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
889 int *leftdi, *rightdi;
890 bool use_dinucl_p;
891
892 int *intron_score_array;
893
894 if (cdna_direction > 0) {
895 if (finalp == true) {
896 intron_score_array = intron_score_array_sense_final;
897 } else {
898 intron_score_array = intron_score_array_sense_prelim;
899 }
900 } else if (cdna_direction < 0) {
901 if (finalp == true) {
902 intron_score_array = intron_score_array_antisense_final;
903 } else {
904 intron_score_array = intron_score_array_antisense_prelim;
905 }
906 } else {
907 if (finalp == true) {
908 intron_score_array = intron_score_array_either_final;
909 } else {
910 intron_score_array = intron_score_array_either_prelim;
911 }
912 }
913
914 /* Read dinucleotides */
915 leftdi = (int *) MALLOCA((glengthL+1) * sizeof(int));
916 rightdi = (int *) MALLOCA((glengthR+1) * sizeof(int));
917
918 for (cL = 0; cL < glengthL - 1; cL++) {
919 left1 = gsequenceL[cL];
920 left1_alt = gsequenceL_alt[cL];
921 left2 = gsequenceL[cL+1];
922 left2_alt = gsequenceL_alt[cL+1];
923 /* Assertions may not hold for transcriptome alignment */
924 /* assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp)); */
925 /* assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp)); */
926
927 if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
928 leftdi[cL] = LEFT_GT;
929 } else if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'C' || left2_alt == 'C')) {
930 leftdi[cL] = LEFT_GC;
931 } else if ((left1 == 'A' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
932 leftdi[cL] = LEFT_AT;
933 #ifndef PMAP
934 } else if ((left1 == 'C' || left1_alt == 'C') && (left2 == 'T' || left2_alt == 'T')) {
935 leftdi[cL] = LEFT_CT;
936 #endif
937 } else {
938 leftdi[cL] = 0x00;
939 }
940 }
941 leftdi[glengthL-1] = leftdi[glengthL] = 0x00;
942
943 for (cR = 0; cR < glengthR - 1; cR++) {
944 right2 = rev_gsequenceR[-cR-1];
945 right2_alt = rev_gsequenceR_alt[-cR-1];
946 right1 = rev_gsequenceR[-cR];
947 right1_alt = rev_gsequenceR_alt[-cR];
948 /* Assertions may not hold for transcriptome alignment */
949 /* assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp)); */
950 /* assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp)); */
951
952 if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
953 rightdi[cR] = RIGHT_AG;
954 } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'C' || right1_alt == 'C')) {
955 rightdi[cR] = RIGHT_AC;
956 #ifndef PMAP
957 } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
958 rightdi[cR] = RIGHT_GC;
959 } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
960 rightdi[cR] = RIGHT_AT;
961 #endif
962 } else {
963 rightdi[cR] = 0x00;
964 }
965 }
966 rightdi[glengthR-1] = rightdi[glengthR] = 0x00;
967
968
969 left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
970 right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
971
972 debug3(printf("watsonp is %d. cdna_direction is %d\n",watsonp,cdna_direction));
973 if (watsonp == true) {
974 if (cdna_direction > 0) {
975 for (cL = 0; cL < glengthL - 1; cL++) {
976 splicesitepos = chroffset + leftoffset + cL;
977 if (left_known[cL]) {
978 left_probabilities[cL] = 1.0;
979 } else {
980 left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
981 debug3(printf("left donor probability at cL %d is %f\n",cL,left_probabilities[cL]));
982 }
983 }
984
985 for (cR = 0; cR < glengthR - 1; cR++) {
986 splicesitepos = chroffset + rightoffset - cR + 1;
987 if (right_known[cR]) {
988 right_probabilities[cR] = 1.0;
989 } else {
990 right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
991 debug3(printf("right acceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
992 }
993 }
994
995 } else {
996 for (cL = 0; cL < glengthL - 1; cL++) {
997 splicesitepos = chroffset + leftoffset + cL;
998 if (left_known[cL]) {
999 left_probabilities[cL] = 1.0;
1000 } else {
1001 left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
1002 debug3(printf("left antiacceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1003 }
1004 }
1005
1006 for (cR = 0; cR < glengthR - 1; cR++) {
1007 splicesitepos = chroffset + rightoffset - cR + 1;
1008 if (right_known[cR]) {
1009 right_probabilities[cR] = 1.0;
1010 } else {
1011 right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
1012 debug3(printf("right antidonor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1013 }
1014 }
1015 }
1016
1017 } else {
1018 if (cdna_direction > 0) {
1019 for (cL = 0; cL < glengthL - 1; cL++) {
1020 splicesitepos = chrhigh - leftoffset - cL + 1;
1021 if (left_known[cL]) {
1022 left_probabilities[cL] = 1.0;
1023 } else {
1024 left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
1025 debug3(printf("left antidonor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1026 }
1027 }
1028
1029 for (cR = 0; cR < glengthR - 1; cR++) {
1030 splicesitepos = chrhigh - rightoffset + cR;
1031 if (right_known[cR]) {
1032 right_probabilities[cR] = 1.0;
1033 } else {
1034 right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
1035 debug3(printf("right antiacceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1036 }
1037 }
1038
1039 } else {
1040 for (cL = 0; cL < glengthL - 1; cL++) {
1041 splicesitepos = chrhigh - leftoffset - cL + 1;
1042 if (left_known[cL]) {
1043 left_probabilities[cL] = 1.0;
1044 } else {
1045 left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
1046 debug3(printf("left acceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1047 }
1048 }
1049
1050 for (cR = 0; cR < glengthR - 1; cR++) {
1051 splicesitepos = chrhigh - rightoffset + cR;
1052 if (right_known[cR]) {
1053 right_probabilities[cR] = 1.0;
1054 } else {
1055 right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
1056 debug3(printf("right donor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1057 }
1058 }
1059 }
1060 }
1061
1062 /* Search using probs and without simultaneously */
1063 bestscore = NEG_INFINITY_8;
1064 bestprob_with_score = bestprob_with_dinucl = 0.0;
1065 for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
1066 debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
1067 if ((cloL = rL - lbandL) < 1) {
1068 cloL = 1;
1069 }
1070 if ((chighL = rL + ubandL) > glengthL-1) {
1071 chighL = glengthL-1;
1072 }
1073
1074 if ((cloR = rR - lbandR) < 1) {
1075 cloR = 1;
1076 }
1077 if ((chighR = rR + ubandR) > glengthR-1) {
1078 chighR = glengthR-1;
1079 }
1080
1081 debug3(printf("A. Test no indels\n"));
1082 cL = rL;
1083 probL = left_probabilities[cL];
1084 scoreL = (int) matrixL_upper[cL][rL];
1085
1086 cR = rR;
1087 probR = right_probabilities[cR];
1088 scoreR = (int) matrixR_upper[cR][rR];
1089
1090 #ifdef USE_SCOREI
1091 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1092 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1093 #else
1094 scoreI = 0;
1095 #endif
1096
1097 if ((score = scoreL + scoreI + scoreR) > bestscore) {
1098 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1099 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1100 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1101 bestscore = score;
1102 *bestrL = rL;
1103 *bestrR = rR;
1104 *bestcL = cL;
1105 *bestcR = cR;
1106 bestprob_with_score = probL + probR;
1107 } else if (score == bestscore && probL + probR > bestprob_with_score) {
1108 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1109 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1110 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1111 *bestrL = rL;
1112 *bestrR = rR;
1113 *bestcL = cL;
1114 *bestcR = cR;
1115 bestprob_with_score = probL + probR;
1116 } else {
1117 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1118 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1119 }
1120
1121 /* Perform only without indels */
1122 if (scoreI > 0) {
1123 debug3a(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
1124 cL,cR,scoreI,probL,probR,probL+probR));
1125 if (probL + probR > bestprob_with_dinucl) {
1126 bestscore_with_dinucl = scoreL + scoreI + scoreR;
1127 bestcL_with_dinucl = cL;
1128 bestcR_with_dinucl = cR;
1129 bestrL_with_dinucl = rL;
1130 bestrR_with_dinucl = rR;
1131 bestprob_with_dinucl = probL + probR;
1132 }
1133 }
1134
1135
1136 debug3(printf("B. Test indel on right (1)\n"));
1137 /* Test indel on right */
1138 cL = rL;
1139 probL = left_probabilities[cL];
1140 scoreL = (int) matrixL_upper[cL][rL];
1141 #if 0
1142 if (directionsL_upper_nogap[cL][rL] != DIAG) {
1143 /* Favor gaps away from intron if possible */
1144 scoreL -= 100;
1145 }
1146 #endif
1147
1148 /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
1149 for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
1150 probR = right_probabilities[cR];
1151 scoreR = (int) matrixR_lower[rR][cR];
1152 #if 0
1153 if (directionsR_lower_nogap[rR][cR] != DIAG) {
1154 /* Favor gaps away from intron if possible */
1155 scoreR -= 100;
1156 }
1157 #endif
1158
1159 #ifdef USE_SCOREI
1160 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1161 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1162 #else
1163 scoreI = 0;
1164 #endif
1165
1166 if ((score = scoreL + scoreI + scoreR) > bestscore) {
1167 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1168 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1169 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1170 bestscore = score;
1171 *bestrL = rL;
1172 *bestrR = rR;
1173 *bestcL = cL;
1174 *bestcR = cR;
1175 bestprob_with_score = probL + probR;
1176 } else if (score == bestscore && probL + probR > bestprob_with_score) {
1177 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1178 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1179 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1180 *bestrL = rL;
1181 *bestrR = rR;
1182 *bestcL = cL;
1183 *bestcR = cR;
1184 bestprob_with_score = probL + probR;
1185 } else {
1186 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1187 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1188 debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1189 }
1190 }
1191
1192 debug3(printf("Skip main diagonal\n"));
1193 for (/*skip main diagonal*/cR++; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
1194 probR = right_probabilities[cR];
1195 scoreR = (int) matrixR_upper[cR][rR];
1196 #if 0
1197 if (directionsR_upper_nogap[cR][rR] != DIAG) {
1198 /* Favor gaps away from intron if possible */
1199 scoreR -= 100;
1200 }
1201 #endif
1202
1203 #ifdef USE_SCOREI
1204 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1205 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1206 #else
1207 scoreI = 0;
1208 #endif
1209
1210 if ((score = scoreL + scoreI + scoreR) > bestscore) {
1211 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1212 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1213 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1214 bestscore = score;
1215 *bestrL = rL;
1216 *bestrR = rR;
1217 *bestcL = cL;
1218 *bestcR = cR;
1219 bestprob_with_score = probL + probR;
1220 } else if (score == bestscore && probL + probR > bestprob_with_score) {
1221 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1222 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1223 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1224 *bestrL = rL;
1225 *bestrR = rR;
1226 *bestcL = cL;
1227 *bestcR = cR;
1228 bestprob_with_score = probL + probR;
1229 } else {
1230 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1231 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1232 debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1233 }
1234 }
1235
1236 debug3(printf("C. Test indel on left (1)\n"));
1237 /* Test indel on left */
1238 cR = rR;
1239 probR = right_probabilities[cR];
1240 scoreR = (int) matrixR_upper[cR][rR];
1241 #if 0
1242 if (directionsR_upper_nogap[cR][rR] != DIAG) {
1243 /* Favor gaps away from intron if possible */
1244 scoreR -= 100;
1245 }
1246 #endif
1247
1248 /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
1249 for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
1250 probL = left_probabilities[cL];
1251 scoreL = (int) matrixL_lower[rL][cL];
1252 #if 0
1253 if (directionsL_lower_nogap[rL][cL] != DIAG) {
1254 /* Favor gaps away from intron if possible */
1255 scoreL -= 100;
1256 }
1257 #endif
1258
1259 #ifdef USE_SCOREI
1260 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1261 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1262 #else
1263 scoreI = 0;
1264 #endif
1265
1266 if ((score = scoreL + scoreI + scoreR) > bestscore) {
1267 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1268 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1269 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1270 bestscore = score;
1271 *bestrL = rL;
1272 *bestrR = rR;
1273 *bestcL = cL;
1274 *bestcR = cR;
1275 bestprob_with_score = probL + probR;
1276 } else if (score == bestscore && probL + probR > bestprob_with_score) {
1277 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1278 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1279 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1280 *bestrL = rL;
1281 *bestrR = rR;
1282 *bestcL = cL;
1283 *bestcR = cR;
1284 bestprob_with_score = probL + probR;
1285 } else {
1286 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1287 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1288 debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1289 }
1290 }
1291
1292 debug3(printf("Skip main diagonal\n"));
1293 for (/*Skip main diagonal*/cL++; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
1294 probL = left_probabilities[cL];
1295 scoreL = (int) matrixL_upper[cL][rL];
1296 #if 0
1297 if (directionsL_upper_nogap[cL][rL] != DIAG) {
1298 /* Favor gaps away from intron if possible */
1299 scoreL -= 100;
1300 }
1301 #endif
1302
1303 #ifdef USE_SCOREI
1304 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1305 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1306 #else
1307 scoreI = 0;
1308 #endif
1309
1310 if ((score = scoreL + scoreI + scoreR) > bestscore) {
1311 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1312 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1313 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1314 bestscore = score;
1315 *bestrL = rL;
1316 *bestrR = rR;
1317 *bestcL = cL;
1318 *bestcR = cR;
1319 bestprob_with_score = probL + probR;
1320 } else if (score == bestscore && probL + probR > bestprob_with_score) {
1321 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1322 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1323 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1324 *bestrL = rL;
1325 *bestrR = rR;
1326 *bestcL = cL;
1327 *bestcR = cR;
1328 bestprob_with_score = probL + probR;
1329 } else {
1330 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1331 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1332 debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1333 }
1334 }
1335 }
1336
1337 if (bestprob_with_score > 2*PROB_CEILING) {
1338 /* Probability is good with best alignment, so take that */
1339 debug(printf("Best alignment based on score alone has good probability\n"));
1340 use_dinucl_p = false; /* was previously true (bug) */
1341 } else if (bestprob_with_dinucl == 0.0) {
1342 debug(printf("No dinucleotides found\n"));
1343 use_dinucl_p = false;
1344 } else if (0 && left_probabilities[bestcL_with_dinucl] < PROB_CEILING && right_probabilities[bestcR_with_dinucl] < PROB_CEILING) {
1345 /* Dinucleotide-based solution is bad, so use alignment */
1346 debug(printf("Dinucleotide-based solution is bad on both sites\n"));
1347 use_dinucl_p = false;
1348 } else if (bestscore_with_dinucl < 0 || bestscore_with_dinucl < bestscore - 9) {
1349 debug(printf("Dinucleotide-based solution requires very bad alignment, because bestscore_with_dinucl %d < bestscore %d - 9\n",
1350 bestscore_with_dinucl,bestscore));
1351 use_dinucl_p = false;
1352 } else {
1353 use_dinucl_p = true;
1354 }
1355
1356 debug(printf("SIMD 8. bestscore %d (bestprob_with_score %f)\n",bestscore,bestprob_with_score));
1357 if (use_dinucl_p == true) {
1358 debug(printf("SIMD 8. bestscore %d (bestprob_with_score %f) vs bestscore_with_dinucl %d (bestprob_with_dinucl %f and %f)\n",
1359 bestscore,bestprob_with_score,bestscore_with_dinucl,left_probabilities[bestcL_with_dinucl],right_probabilities[bestcR_with_dinucl]));
1360 /* Best alignment yields bad probability, and dinucleotide-based alignment yields good probability, so switch */
1361 debug(printf("Switch to dinucleotide-based solution\n"));
1362 *bestcL = bestcL_with_dinucl;
1363 *bestcR = bestcR_with_dinucl;
1364 *bestrL = bestrL_with_dinucl;
1365 *bestrR = bestrR_with_dinucl;
1366 bestscore = bestscore_with_dinucl;
1367 }
1368
1369 FREEA(rightdi);
1370 FREEA(leftdi);
1371 FREEA(left_probabilities);
1372 FREEA(right_probabilities);
1373
1374 if (bestscore < 0) {
1375 return bestscore;
1376 } else if (halfp == true) {
1377 /* scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],cdna_direction,canonical_reward,finalp); */
1378 scoreI = intron_score_array[leftdi[*bestcL] & rightdi[*bestcR]];
1379 return (bestscore - scoreI/2);
1380 } else {
1381 return bestscore;
1382 }
1383 }
1384
1385
1386 static int
bridge_intron_gap_8_ud(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,double * left_prob,double * right_prob,Score8_T ** matrixL_upper,Score8_T ** matrixL_lower,Score8_T ** matrixR_upper,Score8_T ** matrixR_lower,Direction8_T ** directionsL_upper_nogap,Direction8_T ** directionsL_lower_nogap,Direction8_T ** directionsR_upper_nogap,Direction8_T ** directionsR_lower_nogap,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int goffsetL,int rev_goffsetR,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp,bool jump_late_p)1387 bridge_intron_gap_8_ud (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
1388 int *best_introntype, double *left_prob, double *right_prob,
1389 Score8_T **matrixL_upper, Score8_T **matrixL_lower,
1390 Score8_T **matrixR_upper, Score8_T **matrixR_lower,
1391 Direction8_T **directionsL_upper_nogap, Direction8_T **directionsL_lower_nogap,
1392 Direction8_T **directionsR_upper_nogap, Direction8_T **directionsR_lower_nogap,
1393 char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
1394 int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
1395 int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
1396 int leftoffset, int rightoffset,
1397 Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
1398 bool halfp, bool finalp, bool jump_late_p) {
1399 int finalscore;
1400 int *left_known, *right_known;
1401
1402 debug(printf("Running bridge_intron_gap_8_ud\n"));
1403
1404 if (glengthL+1 <= 0) {
1405 fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
1406 abort();
1407 }
1408
1409 if (glengthR+1 <= 0) {
1410 fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
1411 abort();
1412 }
1413
1414 left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
1415 right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
1416 get_known_splicesites(left_known,right_known,glengthL,glengthR,
1417 /*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
1418 cdna_direction,watsonp,chrnum,chroffset,chrhigh);
1419
1420 if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
1421 /* Constrain to given introns */
1422 finalscore = bridge_intron_gap_8_intron_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),&(*best_introntype),
1423 matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
1424 directionsL_upper_nogap,directionsL_lower_nogap,
1425 directionsR_upper_nogap,directionsR_lower_nogap,
1426 left_known,right_known,rlength,glengthL,glengthR,
1427 cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
1428 leftoffset,rightoffset,chrnum,chroffset,chrhigh,jump_late_p);
1429 } else {
1430 finalscore = bridge_intron_gap_8_site_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),
1431 matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
1432 #if 0
1433 directionsL_upper_nogap,directionsL_lower_nogap,
1434 directionsR_upper_nogap,directionsR_lower_nogap,
1435 goffsetL,rev_goffsetR,canonical_reward,
1436 #endif
1437 gsequenceL,gsequenceL_alt,rev_gsequenceR,rev_gsequenceR_alt,
1438 left_known,right_known,rlength,glengthL,glengthR,
1439 cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
1440 leftoffset,rightoffset,chroffset,chrhigh,halfp,finalp);
1441 }
1442
1443
1444 #if 0
1445 /* Determine if result meets given constraints */
1446 if (*finalscore < 0) {
1447 result = false;
1448 } else if (novelsplicingp == true) {
1449 result = true;
1450 } else if (splicing_iit == NULL) {
1451 result = true;
1452 } else if (donor_typeint >= 0 && acceptor_typeint >= 0) {
1453 /* If novelsplicingp is false and using splicing at splice site level, require sites to be known */
1454 if (left_known[*bestcL] == 0 || right_known[*bestcR] == 0) {
1455 debug(printf("Novel splicing not allowed, so bridge_intron_gap returning false\n"));
1456 result = false;
1457 } else {
1458 result = true;
1459 }
1460 } else {
1461 /* If novelsplicingp is false and using splicing at splice site level, result was already constrained */
1462 result = true;
1463 }
1464 #endif
1465
1466 if (finalscore >= 0) {
1467 get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
1468 left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
1469 cdna_direction,watsonp);
1470 }
1471
1472 FREEA(right_known);
1473 FREEA(left_known);
1474
1475 #if defined(DEBUG) || defined(DEBUG3)
1476 printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
1477 finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob);
1478 #endif
1479
1480 return finalscore;
1481 }
1482 #endif
1483
1484
1485 #if defined(HAVE_SSE2)
1486 static int
bridge_intron_gap_16_intron_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,Score16_T ** matrixL_upper,Score16_T ** matrixL_lower,Score16_T ** matrixR_upper,Score16_T ** matrixR_lower,Direction16_T ** directionsL_upper_nogap,Direction16_T ** directionsL_lower_nogap,Direction16_T ** directionsR_upper_nogap,Direction16_T ** directionsR_lower_nogap,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool jump_late_p)1487 bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
1488 int *best_introntype,
1489 Score16_T **matrixL_upper, Score16_T **matrixL_lower,
1490 Score16_T **matrixR_upper, Score16_T **matrixR_lower,
1491 Direction16_T **directionsL_upper_nogap, Direction16_T **directionsL_lower_nogap,
1492 Direction16_T **directionsR_upper_nogap, Direction16_T **directionsR_lower_nogap,
1493 int *left_known, int *right_known,
1494 int rlength, int glengthL, int glengthR,
1495 int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
1496 int leftoffset, int rightoffset,
1497 Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
1498 bool jump_late_p) {
1499 int rL, rR, cL, cR;
1500 int cloL, chighL;
1501 int cloR, chighR;
1502 int bestscore = NEG_INFINITY_16, score, scoreL, scoreI, scoreR;
1503 Univcoord_T splicesitepos1, splicesitepos2;
1504 bool bestp;
1505
1506 scoreI = 0; /* Because we constrain splices to given introns */
1507
1508 for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
1509 debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
1510 if ((cloL = rL - lbandL) < 1) {
1511 cloL = 1;
1512 }
1513 if ((chighL = rL + ubandL) > glengthL-1) {
1514 chighL = glengthL-1;
1515 }
1516
1517 if ((cloR = rR - lbandR) < 1) {
1518 cloR = 1;
1519 }
1520 if ((chighR = rR + ubandR) > glengthR-1) {
1521 chighR = glengthR-1;
1522 }
1523
1524 /* Test indels on left and right */
1525 for (cL = cloL; cL < /* left of main diagonal*/rL; cL++) {
1526 /* The following check limits genomic inserts (horizontal) and
1527 multiple cDNA inserts (vertical). */
1528 if (left_known[cL] > 0 && directionsL_lower_nogap[rL][cL] == DIAG) {
1529 scoreL = (int) matrixL_lower[rL][cL];
1530 #if 0
1531 if (directionsL_lower_nogap[rL][cL] != DIAG) {
1532 /* Favor gaps away from intron if possible */
1533 scoreL -= 100;
1534 }
1535 #endif
1536
1537 /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
1538 for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
1539 if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG) {
1540 scoreR = (int) matrixR_lower[rR][cR];
1541 #if 0
1542 if (directionsR_lower_nogap[rR][cR] != DIAG) {
1543 /* Favor gaps away from intron if possible */
1544 scoreR -= 100;
1545 }
1546 #endif
1547
1548 if ((score = scoreL + scoreI + scoreR) > bestscore ||
1549 (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
1550 bestp = false;
1551 if (watsonp == true) {
1552 splicesitepos1 = leftoffset + cL;
1553 splicesitepos2 = rightoffset - cR + 1;
1554 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1555 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
1556 bestp = true;
1557 }
1558 } else {
1559 splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
1560 splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
1561 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1562 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
1563 bestp = true;
1564 }
1565 }
1566 if (bestp == true) {
1567 debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
1568 cL,cR,scoreL,scoreR,score));
1569 bestscore = score;
1570 *bestrL = rL;
1571 *bestrR = rR;
1572 *bestcL = cL;
1573 *bestcR = cR;
1574 } else {
1575 debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
1576 cL,cR,scoreL,scoreR,score));
1577 }
1578 }
1579 }
1580 }
1581
1582 for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
1583 if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
1584 scoreR = (int) matrixR_upper[cR][rR];
1585 #if 0
1586 if (directionsR_upper_nogap[cR][rR] != DIAG) {
1587 /* Favor gaps away from intron if possible */
1588 scoreR -= 100;
1589 }
1590 #endif
1591
1592 if ((score = scoreL + scoreI + scoreR) > bestscore ||
1593 (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
1594 bestp = false;
1595 if (watsonp == true) {
1596 splicesitepos1 = leftoffset + cL;
1597 splicesitepos2 = rightoffset - cR + 1;
1598 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1599 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
1600 bestp = true;
1601 }
1602 } else {
1603 splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
1604 splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
1605 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1606 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
1607 bestp = true;
1608 }
1609 }
1610 if (bestp == true) {
1611 debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
1612 cL,cR,scoreL,scoreR,score));
1613 bestscore = score;
1614 *bestrL = rL;
1615 *bestrR = rR;
1616 *bestcL = cL;
1617 *bestcR = cR;
1618 } else {
1619 debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
1620 cL,cR,scoreL,scoreR,score));
1621 }
1622 }
1623 }
1624 }
1625 }
1626 }
1627
1628 for (/* at main diagonal*/; cL < chighL; cL++) {
1629 /* The following check limits genomic inserts (horizontal) and
1630 multiple cDNA inserts (vertical). */
1631 if (left_known[cL] > 0 && directionsL_upper_nogap[cL][rL] == DIAG) {
1632 scoreL = (int) matrixL_upper[cL][rL];
1633 #if 0
1634 if (directionsL_upper_nogap[cL][rL] != DIAG) {
1635 /* Favor gaps away from intron if possible */
1636 scoreL -= 100;
1637 }
1638 #endif
1639
1640 /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
1641 for (cR = cloR; cR < /* left of main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
1642 if (right_known[cR] > 0 && directionsR_lower_nogap[rR][cR] == DIAG) {
1643 scoreR = (int) matrixR_lower[rR][cR];
1644 #if 0
1645 if (directionsR_lower_nogap[rR][cR] != DIAG) {
1646 /* Favor gaps away from intron if possible */
1647 scoreR -= 100;
1648 }
1649 #endif
1650
1651 if ((score = scoreL + scoreI + scoreR) > bestscore ||
1652 (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
1653 bestp = false;
1654 if (watsonp == true) {
1655 splicesitepos1 = leftoffset + cL;
1656 splicesitepos2 = rightoffset - cR + 1;
1657 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1658 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
1659 bestp = true;
1660 }
1661 } else {
1662 splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
1663 splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
1664 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1665 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
1666 bestp = true;
1667 }
1668 }
1669 if (bestp == true) {
1670 debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
1671 cL,cR,scoreL,scoreR,score));
1672 bestscore = score;
1673 *bestrL = rL;
1674 *bestrR = rR;
1675 *bestcL = cL;
1676 *bestcR = cR;
1677 } else {
1678 debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
1679 cL,cR,scoreL,scoreR,score));
1680 }
1681 }
1682 }
1683 }
1684
1685 for (/* at main diagonal*/; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
1686 if (right_known[cR] > 0 && directionsR_upper_nogap[cR][rR] == DIAG) {
1687 scoreR = (int) matrixR_upper[cR][rR];
1688 #if 0
1689 if (directionsR_upper_nogap[cR][rR] != DIAG) {
1690 /* Favor gaps away from intron if possible */
1691 scoreR -= 100;
1692 }
1693 #endif
1694
1695 if ((score = scoreL + scoreI + scoreR) > bestscore ||
1696 (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
1697 bestp = false;
1698 if (watsonp == true) {
1699 splicesitepos1 = leftoffset + cL;
1700 splicesitepos2 = rightoffset - cR + 1;
1701 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1702 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
1703 bestp = true;
1704 }
1705 } else {
1706 splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
1707 splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
1708 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
1709 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
1710 bestp = true;
1711 }
1712 }
1713 if (bestp == true) {
1714 debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
1715 cL,cR,scoreL,scoreR,score));
1716 bestscore = score;
1717 *bestrL = rL;
1718 *bestrR = rR;
1719 *bestcL = cL;
1720 *bestcR = cR;
1721 } else {
1722 debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
1723 cL,cR,scoreL,scoreR,score));
1724 }
1725 }
1726 }
1727 }
1728 }
1729 }
1730 }
1731
1732 *best_introntype = NONINTRON;
1733 return bestscore;
1734 }
1735
1736
1737 /* Returns finalscore */
1738 static int
bridge_intron_gap_16_site_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,Score16_T ** matrixL_upper,Score16_T ** matrixL_lower,Score16_T ** matrixR_upper,Score16_T ** matrixR_lower,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp)1739 bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
1740 Score16_T **matrixL_upper, Score16_T **matrixL_lower,
1741 Score16_T **matrixR_upper, Score16_T **matrixR_lower,
1742 #if 0
1743 Direction16_T **directionsL_upper_nogap, Direction16_T **directionsL_lower_nogap,
1744 Direction16_T **directionsR_upper_nogap, Direction16_T **directionsR_lower_nogap,
1745 int goffsetL, int rev_goffsetR, int canonical_reward,
1746 #endif
1747 char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
1748 int *left_known, int *right_known, int rlength, int glengthL, int glengthR,
1749 int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
1750 int leftoffset, int rightoffset, Univcoord_T chroffset, Univcoord_T chrhigh,
1751 bool halfp, bool finalp) {
1752 int rL, rR, cL, cR;
1753 int bestrL_with_dinucl, bestrR_with_dinucl, bestcL_with_dinucl, bestcR_with_dinucl;
1754 int cloL, chighL;
1755 int cloR, chighR;
1756 /* int introntype; */
1757 int bestscore = NEG_INFINITY_16, score, scoreL, scoreR, scoreI;
1758 int bestscore_with_dinucl = NEG_INFINITY_16;
1759 double *left_probabilities, *right_probabilities, probL, probR, bestprob_with_score, bestprob_with_dinucl;
1760 Univcoord_T splicesitepos;
1761 char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
1762 int *leftdi, *rightdi;
1763 bool use_dinucl_p;
1764
1765 int *intron_score_array;
1766
1767 if (cdna_direction > 0) {
1768 if (finalp == true) {
1769 intron_score_array = intron_score_array_sense_final;
1770 } else {
1771 intron_score_array = intron_score_array_sense_prelim;
1772 }
1773 } else if (cdna_direction < 0) {
1774 if (finalp == true) {
1775 intron_score_array = intron_score_array_antisense_final;
1776 } else {
1777 intron_score_array = intron_score_array_antisense_prelim;
1778 }
1779 } else {
1780 if (finalp == true) {
1781 intron_score_array = intron_score_array_either_final;
1782 } else {
1783 intron_score_array = intron_score_array_either_prelim;
1784 }
1785 }
1786
1787
1788 /* Read dinucleotides */
1789 leftdi = (int *) MALLOCA((glengthL+1) * sizeof(int));
1790 rightdi = (int *) MALLOCA((glengthR+1) * sizeof(int));
1791
1792 for (cL = 0; cL < glengthL - 1; cL++) {
1793 left1 = gsequenceL[cL];
1794 left1_alt = gsequenceL_alt[cL];
1795 left2 = gsequenceL[cL+1];
1796 left2_alt = gsequenceL_alt[cL+1];
1797 /* Assertions may not hold for transcriptome alignment */
1798 /* assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp)); */
1799 /* assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp)); */
1800
1801 if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
1802 leftdi[cL] = LEFT_GT;
1803 } else if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'C' || left2_alt == 'C')) {
1804 leftdi[cL] = LEFT_GC;
1805 } else if ((left1 == 'A' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
1806 leftdi[cL] = LEFT_AT;
1807 #ifndef PMAP
1808 } else if ((left1 == 'C' || left1_alt == 'C') && (left2 == 'T' || left2_alt == 'T')) {
1809 leftdi[cL] = LEFT_CT;
1810 #endif
1811 } else {
1812 leftdi[cL] = 0x00;
1813 }
1814 }
1815 leftdi[glengthL-1] = leftdi[glengthL] = 0x00;
1816
1817 for (cR = 0; cR < glengthR - 1; cR++) {
1818 right2 = rev_gsequenceR[-cR-1];
1819 right2_alt = rev_gsequenceR_alt[-cR-1];
1820 right1 = rev_gsequenceR[-cR];
1821 right1_alt = rev_gsequenceR_alt[-cR];
1822 /* Assertions may not hold for transcriptome alignment */
1823 /* assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp)); */
1824 /* assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp)); */
1825
1826 if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
1827 rightdi[cR] = RIGHT_AG;
1828 } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'C' || right1_alt == 'C')) {
1829 rightdi[cR] = RIGHT_AC;
1830 #ifndef PMAP
1831 } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
1832 rightdi[cR] = RIGHT_GC;
1833 } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
1834 rightdi[cR] = RIGHT_AT;
1835 #endif
1836 } else {
1837 rightdi[cR] = 0x00;
1838 }
1839 }
1840 rightdi[glengthR-1] = rightdi[glengthR] = 0x00;
1841
1842
1843 left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
1844 right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
1845
1846 debug3(printf("watsonp is %d. cdna_direction is %d\n",watsonp,cdna_direction));
1847 if (watsonp == true) {
1848 if (cdna_direction > 0) {
1849 for (cL = 0; cL < glengthL - 1; cL++) {
1850 splicesitepos = chroffset + leftoffset + cL;
1851 if (left_known[cL]) {
1852 left_probabilities[cL] = 1.0;
1853 } else {
1854 left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
1855 debug3(printf("left donor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1856 }
1857 }
1858
1859 for (cR = 0; cR < glengthR - 1; cR++) {
1860 splicesitepos = chroffset + rightoffset - cR + 1;
1861 if (right_known[cR]) {
1862 right_probabilities[cR] = 1.0;
1863 } else {
1864 right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
1865 debug3(printf("right acceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1866 }
1867 }
1868
1869 } else {
1870 for (cL = 0; cL < glengthL - 1; cL++) {
1871 splicesitepos = chroffset + leftoffset + cL;
1872 if (left_known[cL]) {
1873 left_probabilities[cL] = 1.0;
1874 } else {
1875 left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
1876 debug3(printf("left antiacceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1877 }
1878 }
1879
1880 for (cR = 0; cR < glengthR - 1; cR++) {
1881 splicesitepos = chroffset + rightoffset - cR + 1;
1882 if (right_known[cR]) {
1883 right_probabilities[cR] = 1.0;
1884 } else {
1885 right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
1886 debug3(printf("right antidonor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1887 }
1888 }
1889 }
1890
1891 } else {
1892 if (cdna_direction > 0) {
1893 for (cL = 0; cL < glengthL - 1; cL++) {
1894 splicesitepos = chrhigh - leftoffset - cL + 1;
1895 if (left_known[cL]) {
1896 left_probabilities[cL] = 1.0;
1897 } else {
1898 left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
1899 debug3(printf("left antidonor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1900 }
1901 }
1902
1903 for (cR = 0; cR < glengthR - 1; cR++) {
1904 splicesitepos = chrhigh - rightoffset + cR;
1905 if (right_known[cR]) {
1906 right_probabilities[cR] = 1.0;
1907 } else {
1908 right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
1909 debug3(printf("right antiacceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1910 }
1911 }
1912
1913 } else {
1914 for (cL = 0; cL < glengthL - 1; cL++) {
1915 splicesitepos = chrhigh - leftoffset - cL + 1;
1916 if (left_known[cL]) {
1917 left_probabilities[cL] = 1.0;
1918 } else {
1919 left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
1920 debug3(printf("left acceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
1921 }
1922 }
1923
1924 for (cR = 0; cR < glengthR - 1; cR++) {
1925 splicesitepos = chrhigh - rightoffset + cR;
1926 if (right_known[cR]) {
1927 right_probabilities[cR] = 1.0;
1928 } else {
1929 right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
1930 debug3(printf("right donor probability at cR %d is %f\n",cR,right_probabilities[cR]));
1931 }
1932 }
1933 }
1934 }
1935
1936 /* Search using probs and without simultaneously */
1937 bestscore = NEG_INFINITY_16;
1938 bestprob_with_score = bestprob_with_dinucl = 0.0;
1939 for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
1940 debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
1941 if ((cloL = rL - lbandL) < 1) {
1942 cloL = 1;
1943 }
1944 if ((chighL = rL + ubandL) > glengthL-1) {
1945 chighL = glengthL-1;
1946 }
1947
1948 if ((cloR = rR - lbandR) < 1) {
1949 cloR = 1;
1950 }
1951 if ((chighR = rR + ubandR) > glengthR-1) {
1952 chighR = glengthR-1;
1953 }
1954
1955 debug3(printf("A. Test no indels\n"));
1956 cL = rL;
1957 probL = left_probabilities[cL];
1958 scoreL = (int) matrixL_upper[cL][rL];
1959
1960 cR = rR;
1961 probR = right_probabilities[cR];
1962 scoreR = (int) matrixR_upper[cR][rR];
1963
1964 #ifdef USE_SCOREI
1965 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
1966 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
1967 #else
1968 scoreI = 0;
1969 #endif
1970
1971 if ((score = scoreL + scoreI + scoreR) > bestscore) {
1972 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1973 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1974 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1975 bestscore = score;
1976 *bestrL = rL;
1977 *bestrR = rR;
1978 *bestcL = cL;
1979 *bestcR = cR;
1980 bestprob_with_score = probL + probR;
1981 } else if (score == bestscore && probL + probR > bestprob_with_score) {
1982 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1983 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1984 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
1985 *bestrL = rL;
1986 *bestrR = rR;
1987 *bestcL = cL;
1988 *bestcR = cR;
1989 bestprob_with_score = probL + probR;
1990 } else {
1991 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
1992 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
1993 }
1994
1995 /* Perform only without indels */
1996 if (scoreI > 0) {
1997 debug3(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
1998 cL,cR,scoreI,probL,probR,probL+probR));
1999 if (probL + probR > bestprob_with_dinucl) {
2000 bestscore_with_dinucl = scoreL + scoreI + scoreR;
2001 bestcL_with_dinucl = cL;
2002 bestcR_with_dinucl = cR;
2003 bestrL_with_dinucl = rL;
2004 bestrR_with_dinucl = rR;
2005 bestprob_with_dinucl = probL + probR;
2006 }
2007 }
2008
2009
2010 debug3(printf("B. Test indel on right\n"));
2011 /* Test indel on right */
2012 cL = rL;
2013 probL = left_probabilities[cL];
2014 scoreL = (int) matrixL_upper[cL][rL];
2015 #if 0
2016 if (directionsL_upper_nogap[cL][rL] != DIAG) {
2017 /* Favor gaps away from intron if possible */
2018 scoreL -= 100;
2019 }
2020 #endif
2021
2022 /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
2023 for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
2024 probR = right_probabilities[cR];
2025 scoreR = (int) matrixR_lower[rR][cR];
2026 #if 0
2027 if (directionsR_lower_nogap[rR][cR] != DIAG) {
2028 /* Favor gaps away from intron if possible */
2029 scoreR -= 100;
2030 }
2031 #endif
2032
2033 #ifdef USE_SCOREI
2034 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2035 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2036 #else
2037 scoreI = 0;
2038 #endif
2039
2040 if ((score = scoreL + scoreI + scoreR) > bestscore) {
2041 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2042 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2043 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2044 bestscore = score;
2045 *bestrL = rL;
2046 *bestrR = rR;
2047 *bestcL = cL;
2048 *bestcR = cR;
2049 bestprob_with_score = probL + probR;
2050 } else if (score == bestscore && probL + probR > bestprob_with_score) {
2051 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2052 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2053 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2054 *bestrL = rL;
2055 *bestrR = rR;
2056 *bestcL = cL;
2057 *bestcR = cR;
2058 bestprob_with_score = probL + probR;
2059 } else {
2060 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2061 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2062 }
2063 }
2064
2065 debug3(printf("Skip main diagonal\n"));
2066 for (/*Skip main diagonal*/cR++; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
2067 probR = right_probabilities[cR];
2068 scoreR = (int) matrixR_upper[cR][rR];
2069 #if 0
2070 if (directionsR_upper_nogap[cR][rR] != DIAG) {
2071 /* Favor gaps away from intron if possible */
2072 scoreR -= 100;
2073 }
2074 #endif
2075
2076 #ifdef USE_SCOREI
2077 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2078 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2079 #else
2080 scoreI = 0;
2081 #endif
2082
2083 if ((score = scoreL + scoreI + scoreR) > bestscore) {
2084 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2085 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2086 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2087 bestscore = score;
2088 *bestrL = rL;
2089 *bestrR = rR;
2090 *bestcL = cL;
2091 *bestcR = cR;
2092 bestprob_with_score = probL + probR;
2093 } else if (score == bestscore && probL + probR > bestprob_with_score) {
2094 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2095 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2096 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2097 *bestrL = rL;
2098 *bestrR = rR;
2099 *bestcL = cL;
2100 *bestcR = cR;
2101 bestprob_with_score = probL + probR;
2102 } else {
2103 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2104 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2105 }
2106 }
2107
2108
2109 debug3(printf("C. Test indel on left (2)\n"));
2110 /* Test indel on left */
2111 cR = rR;
2112 probR = right_probabilities[cR];
2113 scoreR = (int) matrixR_upper[cR][rR];
2114 #if 0
2115 if (directionsR_upper_nogap[cR][rR] != DIAG) {
2116 /* Favor gaps away from intron if possible */
2117 scoreR -= 100;
2118 }
2119 #endif
2120
2121 /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
2122 for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
2123 probL = left_probabilities[cL];
2124 scoreL = (int) matrixL_lower[rL][cL];
2125 #if 0
2126 if (directionsL_lower_nogap[rL][cL] != DIAG) {
2127 /* Favor gaps away from intron if possible */
2128 scoreL -= 100;
2129 }
2130 #endif
2131
2132 #ifdef USE_SCOREI
2133 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2134 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2135 #else
2136 scoreI = 0;
2137 #endif
2138
2139 if ((score = scoreL + scoreI + scoreR) > bestscore) {
2140 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2141 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2142 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2143 bestscore = score;
2144 *bestrL = rL;
2145 *bestrR = rR;
2146 *bestcL = cL;
2147 *bestcR = cR;
2148 bestprob_with_score = probL + probR;
2149 } else if (score == bestscore && probL + probR > bestprob_with_score) {
2150 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2151 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2152 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2153 *bestrL = rL;
2154 *bestrR = rR;
2155 *bestcL = cL;
2156 *bestcR = cR;
2157 bestprob_with_score = probL + probR;
2158 } else {
2159 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2160 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2161 }
2162 }
2163
2164 debug3(printf("Skip main diagonal\n"));
2165 for (/*Skip main diagonal*/cL++; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
2166 probL = left_probabilities[cL];
2167 scoreL = (int) matrixL_upper[cL][rL];
2168 #if 0
2169 if (directionsL_upper_nogap[cL][rL] != DIAG) {
2170 /* Favor gaps away from intron if possible */
2171 scoreL -= 100;
2172 }
2173 #endif
2174
2175 #ifdef USE_SCOREI
2176 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2177 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2178 #else
2179 scoreI = 0;
2180 #endif
2181
2182 if ((score = scoreL + scoreI + scoreR) > bestscore) {
2183 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2184 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2185 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2186 bestscore = score;
2187 *bestrL = rL;
2188 *bestrR = rR;
2189 *bestcL = cL;
2190 *bestcR = cR;
2191 bestprob_with_score = probL + probR;
2192 } else if (score == bestscore && probL + probR > bestprob_with_score) {
2193 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2194 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2195 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2196 *bestrL = rL;
2197 *bestrR = rR;
2198 *bestcL = cL;
2199 *bestcR = cR;
2200 bestprob_with_score = probL + probR;
2201 } else {
2202 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2203 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2204 }
2205 }
2206 }
2207
2208 if (bestprob_with_score > 2*PROB_CEILING) {
2209 /* Probability is good with best alignment, so take that */
2210 debug(printf("Best alignment based on score alone has good probability\n"));
2211 use_dinucl_p = false; /* was previously true (bug) */
2212 } else if (bestprob_with_dinucl == 0.0) {
2213 debug(printf("No dinucleotides found\n"));
2214 use_dinucl_p = false;
2215 } else if (0 && left_probabilities[bestcL_with_dinucl] < PROB_CEILING && right_probabilities[bestcR_with_dinucl] < PROB_CEILING) {
2216 /* Dinucleotide-based solution is bad, so use alignment */
2217 debug(printf("Dinucleotide-based solution is bad on both sites\n"));
2218 use_dinucl_p = false;
2219 } else if (bestscore_with_dinucl < 0 || bestscore_with_dinucl < bestscore - 9) {
2220 debug(printf("Dinucleotide-based solution requires very bad alignment, because bestscore_with_dinucl %d < bestscore %d - 9\n",
2221 bestscore_with_dinucl,bestscore));
2222 use_dinucl_p = false;
2223 } else {
2224 use_dinucl_p = true;
2225 }
2226
2227 debug(printf("SIMD 16. bestscore %d (bestprob_with_score %f)\n",bestscore,bestprob_with_score));
2228 if (use_dinucl_p == true) {
2229 debug(printf("SIMD 16. bestscore %d (bestprob_with_score %f) vs bestscore_with_dinucl %d (bestprob_with_dinucl %f and %f)\n",
2230 bestscore,bestprob_with_score,bestscore_with_dinucl,left_probabilities[bestcL_with_dinucl],right_probabilities[bestcR_with_dinucl]));
2231
2232 /* Best alignment yields bad probability, and dinucleotide-based alignment yields good probability, so switch */
2233 debug(printf("Switch to dinucleotide-based solution\n"));
2234 *bestcL = bestcL_with_dinucl;
2235 *bestcR = bestcR_with_dinucl;
2236 *bestrL = bestrL_with_dinucl;
2237 *bestrR = bestrR_with_dinucl;
2238 bestscore = bestscore_with_dinucl;
2239 }
2240
2241 FREEA(rightdi);
2242 FREEA(leftdi);
2243 FREEA(left_probabilities);
2244 FREEA(right_probabilities);
2245
2246 if (bestscore < 0) {
2247 return bestscore;
2248 } else if (halfp == true) {
2249 /* scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],cdna_direction,canonical_reward,finalp); */
2250 scoreI = intron_score_array[leftdi[*bestcL] & rightdi[*bestcR]];
2251 return (bestscore - scoreI/2);
2252 } else {
2253 return bestscore;
2254 }
2255 }
2256
2257
2258
2259 static int
bridge_intron_gap_16_ud(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,double * left_prob,double * right_prob,Score16_T ** matrixL_upper,Score16_T ** matrixL_lower,Score16_T ** matrixR_upper,Score16_T ** matrixR_lower,Direction16_T ** directionsL_upper_nogap,Direction16_T ** directionsL_lower_nogap,Direction16_T ** directionsR_upper_nogap,Direction16_T ** directionsR_lower_nogap,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int goffsetL,int rev_goffsetR,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp,bool jump_late_p)2260 bridge_intron_gap_16_ud (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
2261 int *best_introntype, double *left_prob, double *right_prob,
2262 Score16_T **matrixL_upper, Score16_T **matrixL_lower,
2263 Score16_T **matrixR_upper, Score16_T **matrixR_lower,
2264 Direction16_T **directionsL_upper_nogap, Direction16_T **directionsL_lower_nogap,
2265 Direction16_T **directionsR_upper_nogap, Direction16_T **directionsR_lower_nogap,
2266 char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
2267 int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
2268 int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
2269 int leftoffset, int rightoffset,
2270 Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
2271 bool halfp, bool finalp, bool jump_late_p) {
2272 int finalscore;
2273 int *left_known, *right_known;
2274
2275 debug(printf("Running bridge_intron_gap_16_ud\n"));
2276
2277 if (glengthL+1 <= 0) {
2278 fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
2279 abort();
2280 }
2281
2282 if (glengthR+1 <= 0) {
2283 fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
2284 abort();
2285 }
2286
2287 left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
2288 right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
2289 get_known_splicesites(left_known,right_known,glengthL,glengthR,
2290 /*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
2291 cdna_direction,watsonp,chrnum,chroffset,chrhigh);
2292
2293 if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
2294 /* Constrain to given introns */
2295 finalscore = bridge_intron_gap_16_intron_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),&(*best_introntype),
2296 matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
2297 directionsL_upper_nogap,directionsL_lower_nogap,
2298 directionsR_upper_nogap,directionsR_lower_nogap,
2299 left_known,right_known,rlength,glengthL,glengthR,
2300 cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
2301 leftoffset,rightoffset,chrnum,chroffset,chrhigh,jump_late_p);
2302
2303 } else {
2304 finalscore = bridge_intron_gap_16_site_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),
2305 matrixL_upper,matrixL_lower,matrixR_upper,matrixR_lower,
2306 #if 0
2307 directionsL_upper_nogap,directionsL_lower_nogap,
2308 directionsR_upper_nogap,directionsR_lower_nogap,
2309 goffsetL,rev_goffsetR,canonical_reward,
2310 #endif
2311 gsequenceL,gsequenceL_alt,rev_gsequenceR,rev_gsequenceR_alt,
2312 left_known,right_known,rlength,glengthL,glengthR,
2313 cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
2314 leftoffset,rightoffset,chroffset,chrhigh,halfp,finalp);
2315 }
2316
2317
2318 #if 0
2319 /* Determine if result meets given constraints */
2320 if (*finalscore < 0) {
2321 result = false;
2322 } else if (novelsplicingp == true) {
2323 result = true;
2324 } else if (splicing_iit == NULL) {
2325 result = true;
2326 } else if (donor_typeint >= 0 && acceptor_typeint >= 0) {
2327 /* If novelsplicingp is false and using splicing at splice site level, require sites to be known */
2328 if (left_known[*bestcL] == 0 || right_known[*bestcR] == 0) {
2329 debug(printf("Novel splicing not allowed, so bridge_intron_gap returning false\n"));
2330 result = false;
2331 } else {
2332 result = true;
2333 }
2334 } else {
2335 /* If novelsplicingp is false and using splicing at splice site level, result was already constrained */
2336 result = true;
2337 }
2338 #endif
2339
2340 if (finalscore >= 0) {
2341 get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
2342 left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
2343 cdna_direction,watsonp);
2344 }
2345
2346 FREEA(right_known);
2347 FREEA(left_known);
2348
2349 #if defined(DEBUG) || defined(DEBUG3)
2350 printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
2351 finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob);
2352 #endif
2353
2354 return finalscore;
2355 }
2356 #endif
2357
2358
2359 #ifndef HAVE_SSE2
2360 static int
bridge_intron_gap_intron_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,Score32_T ** matrixL,Score32_T ** matrixR,Direction32_T ** directionsL_nogap,Direction32_T ** directionsR_nogap,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool jump_late_p)2361 bridge_intron_gap_intron_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
2362 int *best_introntype,
2363 Score32_T **matrixL, Score32_T **matrixR,
2364 Direction32_T **directionsL_nogap, Direction32_T **directionsR_nogap,
2365 int *left_known, int *right_known,
2366 int rlength, int glengthL, int glengthR,
2367 int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
2368 int leftoffset, int rightoffset,
2369 Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
2370 bool jump_late_p) {
2371 int rL, rR, cL, cR;
2372 int cloL, chighL;
2373 int cloR, chighR;
2374 int bestscore = NEG_INFINITY_32, score, scoreL, scoreI, scoreR;
2375 Univcoord_T splicesitepos1, splicesitepos2;
2376 bool bestp;
2377
2378 scoreI = 0; /* Because we constrain splices to given introns */
2379
2380 for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
2381 debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
2382 if ((cloL = rL - lbandL) < 1) {
2383 cloL = 1;
2384 }
2385 if ((chighL = rL + ubandL) > glengthL-1) {
2386 chighL = glengthL-1;
2387 }
2388
2389 if ((cloR = rR - lbandR) < 1) {
2390 cloR = 1;
2391 }
2392 if ((chighR = rR + ubandR) > glengthR-1) {
2393 chighR = glengthR-1;
2394 }
2395
2396 /* Test indels on left and right */
2397 for (cL = cloL; cL < chighL; cL++) {
2398 /* The following check limits genomic inserts (horizontal) and
2399 multiple cDNA inserts (vertical). */
2400 if (left_known[cL] > 0 && directionsL_nogap[cL][rL] == DIAG) {
2401 scoreL = (int) matrixL[cL][rL];
2402 #if 0
2403 if (directionsL_nogap[cL][rL] != DIAG) {
2404 /* Favor gaps away from intron if possible */
2405 scoreL -= 100;
2406 }
2407 #endif
2408
2409 /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
2410 for (cR = cloR; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
2411 if (right_known[cR] > 0 && directionsR_nogap[cR][rR] == DIAG) {
2412 scoreR = (int) matrixR[cR][rR];
2413 #if 0
2414 if (directionsR_nogap[cR][rR] != DIAG) {
2415 /* Favor gaps away from intron if possible */
2416 scoreR -= 100;
2417 }
2418 #endif
2419
2420 if ((score = scoreL + scoreI + scoreR) > bestscore ||
2421 (score >= bestscore && jump_late_p)) { /* Use >= for jump late */
2422 bestp = false;
2423 if (watsonp == true) {
2424 splicesitepos1 = leftoffset + cL;
2425 splicesitepos2 = rightoffset - cR + 1;
2426 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
2427 splicesitepos1,splicesitepos2+1U,/*sign*/cdna_direction) == true) {
2428 bestp = true;
2429 }
2430 } else {
2431 splicesitepos1 = (chrhigh - chroffset) - leftoffset - cL + 1;
2432 splicesitepos2 = (chrhigh - chroffset) - rightoffset + cR;
2433 if (IIT_exists_with_divno_signed(splicing_iit,splicing_divint_crosstable[chrnum],
2434 splicesitepos2,splicesitepos1+1U,/*sign*/-cdna_direction) == true) {
2435 bestp = true;
2436 }
2437 }
2438 if (bestp == true) {
2439 debug3(printf("At %d left to %d right, score is (%d)+(%d) = %d (bestscore)\n",
2440 cL,cR,scoreL,scoreR,score));
2441 bestscore = score;
2442 *bestrL = rL;
2443 *bestrR = rR;
2444 *bestcL = cL;
2445 *bestcR = cR;
2446 } else {
2447 debug3a(printf("At %d left to %d right, score is (%d)+(%d) = %d\n",
2448 cL,cR,scoreL,scoreR,score));
2449 }
2450 }
2451 }
2452 }
2453 }
2454 }
2455 }
2456
2457 *best_introntype = NONINTRON;
2458 return bestscore;
2459 }
2460
2461
2462 /* Returns finalscore */
2463 static int
bridge_intron_gap_site_level(int * bestrL,int * bestrR,int * bestcL,int * bestcR,Score32_T ** matrixL,Score32_T ** matrixR,Direction32_T ** directionsL_nogap,Direction32_T ** directionsR_nogap,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int goffsetL,int rev_goffsetR,int * left_known,int * right_known,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int lbandL,int ubandL,int lbandR,int ubandR,int leftoffset,int rightoffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp)2464 bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
2465 Score32_T **matrixL, Score32_T **matrixR,
2466 Direction32_T **directionsL_nogap, Direction32_T **directionsR_nogap,
2467 char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
2468 int goffsetL, int rev_goffsetR, int *left_known, int *right_known,
2469 int rlength, int glengthL, int glengthR,
2470 int cdna_direction, bool watsonp, int lbandL, int ubandL, int lbandR, int ubandR,
2471 int leftoffset, int rightoffset, Univcoord_T chroffset, Univcoord_T chrhigh,
2472 bool halfp, bool finalp) {
2473 int rL, rR, cL, cR;
2474 int bestrL_with_dinucl, bestrR_with_dinucl, bestcL_with_dinucl, bestcR_with_dinucl;
2475 int cloL, chighL;
2476 int cloR, chighR;
2477 int introntype;
2478 int bestscore = NEG_INFINITY_32, score, scoreL, scoreR, scoreI;
2479 int bestscore_with_dinucl = NEG_INFINITY_32;
2480 double *left_probabilities, *right_probabilities, probL, probR, bestprob_with_score, bestprob_with_dinucl;
2481 Univcoord_T splicesitepos;
2482 char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt;
2483 int *leftdi, *rightdi;
2484 bool use_dinucl_p;
2485
2486 int *intron_score_array;
2487
2488 if (cdna_direction > 0) {
2489 if (finalp == true) {
2490 intron_score_array = intron_score_array_sense_final;
2491 } else {
2492 intron_score_array = intron_score_array_sense_prelim;
2493 }
2494 } else if (cdna_direction < 0) {
2495 if (finalp == true) {
2496 intron_score_array = intron_score_array_antisense_final;
2497 } else {
2498 intron_score_array = intron_score_array_antisense_prelim;
2499 }
2500 } else {
2501 if (finalp == true) {
2502 intron_score_array = intron_score_array_either_final;
2503 } else {
2504 intron_score_array = intron_score_array_either_prelim;
2505 }
2506 }
2507
2508
2509 /* Read dinucleotides */
2510 leftdi = (int *) MALLOCA((glengthL+1) * sizeof(int));
2511 rightdi = (int *) MALLOCA((glengthR+1) * sizeof(int));
2512
2513 for (cL = 0; cL < glengthL - 1; cL++) {
2514 left1 = gsequenceL[cL];
2515 left1_alt = gsequenceL_alt[cL];
2516 left2 = gsequenceL[cL+1];
2517 left2_alt = gsequenceL_alt[cL+1];
2518 /* Assertions may not hold for transcriptome alignment */
2519 /* assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp)); */
2520 /* assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp)); */
2521
2522 if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
2523 leftdi[cL] = LEFT_GT;
2524 } else if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'C' || left2_alt == 'C')) {
2525 leftdi[cL] = LEFT_GC;
2526 } else if ((left1 == 'A' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
2527 leftdi[cL] = LEFT_AT;
2528 #ifndef PMAP
2529 } else if ((left1 == 'C' || left1_alt == 'C') && (left2 == 'T' || left2_alt == 'T')) {
2530 leftdi[cL] = LEFT_CT;
2531 #endif
2532 } else {
2533 leftdi[cL] = 0x00;
2534 }
2535 }
2536 leftdi[glengthL-1] = leftdi[glengthL] = 0x00;
2537
2538 for (cR = 0; cR < glengthR - 1; cR++) {
2539 right2 = rev_gsequenceR[-cR-1];
2540 right2_alt = rev_gsequenceR_alt[-cR-1];
2541 right1 = rev_gsequenceR[-cR];
2542 right1_alt = rev_gsequenceR_alt[-cR];
2543 /* Assertions may not hold for transcriptome alignment */
2544 /* assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp)); */
2545 /* assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp)); */
2546
2547 if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
2548 rightdi[cR] = RIGHT_AG;
2549 } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'C' || right1_alt == 'C')) {
2550 rightdi[cR] = RIGHT_AC;
2551 #ifndef PMAP
2552 } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
2553 rightdi[cR] = RIGHT_GC;
2554 } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
2555 rightdi[cR] = RIGHT_AT;
2556 #endif
2557 } else {
2558 rightdi[cR] = 0x00;
2559 }
2560 }
2561 rightdi[glengthR-1] = rightdi[glengthR] = 0x00;
2562
2563
2564 left_probabilities = (double *) MALLOCA(glengthL * sizeof(double));
2565 right_probabilities = (double *) MALLOCA(glengthR * sizeof(double));
2566
2567 debug3(printf("watsonp is %d. cdna_direction is %d\n",watsonp,cdna_direction));
2568 if (watsonp == true) {
2569 if (cdna_direction > 0) {
2570 for (cL = 0; cL < glengthL - 1; cL++) {
2571 splicesitepos = chroffset + leftoffset + cL;
2572 if (left_known[cL]) {
2573 left_probabilities[cL] = 1.0;
2574 } else {
2575 left_probabilities[cL] = Maxent_hr_donor_prob(splicesitepos,chroffset);
2576 debug3(printf("left donor probability at cL %d is %f\n",cL,left_probabilities[cL]));
2577 }
2578 }
2579
2580 for (cR = 0; cR < glengthR - 1; cR++) {
2581 splicesitepos = chroffset + rightoffset - cR + 1;
2582 if (right_known[cR]) {
2583 right_probabilities[cR] = 1.0;
2584 } else {
2585 right_probabilities[cR] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
2586 debug3(printf("right acceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
2587 }
2588 }
2589
2590 } else {
2591 for (cL = 0; cL < glengthL - 1; cL++) {
2592 splicesitepos = chroffset + leftoffset + cL;
2593 if (left_known[cL]) {
2594 left_probabilities[cL] = 1.0;
2595 } else {
2596 left_probabilities[cL] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
2597 debug3(printf("left antiacceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
2598 }
2599 }
2600
2601 for (cR = 0; cR < glengthR - 1; cR++) {
2602 splicesitepos = chroffset + rightoffset - cR + 1;
2603 if (right_known[cR]) {
2604 right_probabilities[cR] = 1.0;
2605 } else {
2606 right_probabilities[cR] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
2607 debug3(printf("right antidonor probability at cR %d is %f\n",cR,right_probabilities[cR]));
2608 }
2609 }
2610 }
2611
2612 } else {
2613 if (cdna_direction > 0) {
2614 for (cL = 0; cL < glengthL - 1; cL++) {
2615 splicesitepos = chrhigh - leftoffset - cL + 1;
2616 if (left_known[cL]) {
2617 left_probabilities[cL] = 1.0;
2618 } else {
2619 left_probabilities[cL] = Maxent_hr_antidonor_prob(splicesitepos,chroffset);
2620 debug3(printf("left antidonor probability at cL %d is %f\n",cL,left_probabilities[cL]));
2621 }
2622 }
2623
2624 for (cR = 0; cR < glengthR - 1; cR++) {
2625 splicesitepos = chrhigh - rightoffset + cR;
2626 if (right_known[cR]) {
2627 right_probabilities[cR] = 1.0;
2628 } else {
2629 right_probabilities[cR] = Maxent_hr_antiacceptor_prob(splicesitepos,chroffset);
2630 debug3(printf("right antiacceptor probability at cR %d is %f\n",cR,right_probabilities[cR]));
2631 }
2632 }
2633
2634 } else {
2635 for (cL = 0; cL < glengthL - 1; cL++) {
2636 splicesitepos = chrhigh - leftoffset - cL + 1;
2637 if (left_known[cL]) {
2638 left_probabilities[cL] = 1.0;
2639 } else {
2640 left_probabilities[cL] = Maxent_hr_acceptor_prob(splicesitepos,chroffset);
2641 debug3(printf("left acceptor probability at cL %d is %f\n",cL,left_probabilities[cL]));
2642 }
2643 }
2644
2645 for (cR = 0; cR < glengthR - 1; cR++) {
2646 splicesitepos = chrhigh - rightoffset + cR;
2647 if (right_known[cR]) {
2648 right_probabilities[cR] = 1.0;
2649 } else {
2650 right_probabilities[cR] = Maxent_hr_donor_prob(splicesitepos,chroffset);
2651 debug3(printf("right donor probability at cR %d is %f\n",cR,right_probabilities[cR]));
2652 }
2653 }
2654 }
2655 }
2656
2657 /* Search using probs and without simultaneously */
2658 bestscore = NEG_INFINITY_32;
2659 bestprob_with_score = bestprob_with_dinucl = 0.0;
2660 for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
2661 debug3(printf("\nAt row %d on left and %d on right\n",rL,rR));
2662 if ((cloL = rL - lbandL) < 1) {
2663 cloL = 1;
2664 }
2665 if ((chighL = rL + ubandL) > glengthL-1) {
2666 chighL = glengthL-1;
2667 }
2668
2669 if ((cloR = rR - lbandR) < 1) {
2670 cloR = 1;
2671 }
2672 if ((chighR = rR + ubandR) > glengthR-1) {
2673 chighR = glengthR-1;
2674 }
2675
2676 debug3(printf("A. Test no indels\n"));
2677 cL = rL;
2678 probL = left_probabilities[cL];
2679 scoreL = (int) matrixL[cL][rL];
2680
2681 cR = rR;
2682 probR = right_probabilities[cR];
2683 scoreR = (int) matrixR[cR][rR];
2684
2685 #ifdef USE_SCOREI
2686 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2687 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2688 #else
2689 scoreI = 0;
2690 #endif
2691
2692 if ((score = scoreL + scoreI + scoreR) > bestscore) {
2693 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2694 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2695 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2696 bestscore = score;
2697 *bestrL = rL;
2698 *bestrR = rR;
2699 *bestcL = cL;
2700 *bestcR = cR;
2701 bestprob_with_score = probL + probR;
2702 } else if (score == bestscore && probL + probR > bestprob_with_score) {
2703 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2704 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2705 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2706 *bestrL = rL;
2707 *bestrR = rR;
2708 *bestcL = cL;
2709 *bestcR = cR;
2710 bestprob_with_score = probL + probR;
2711 } else {
2712 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2713 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2714 }
2715
2716 /* Perform only without indels */
2717 if (scoreI > 0) {
2718 debug3a(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
2719 cL,cR,scoreI,probL,probR,probL+probR));
2720 if (probL + probR > bestprob_with_dinucl) {
2721 bestscore_with_dinucl = scoreL + scoreI + scoreR;
2722 bestcL_with_dinucl = cL;
2723 bestcR_with_dinucl = cR;
2724 bestrL_with_dinucl = rL;
2725 bestrR_with_dinucl = rR;
2726 bestprob_with_dinucl = probL + probR;
2727 }
2728 }
2729
2730
2731 debug3(printf("B. Test indel on right\n"));
2732 /* Test indel on right */
2733 cL = rL;
2734 probL = left_probabilities[cL];
2735 scoreL = (int) matrixL[cL][rL];
2736 #if 0
2737 if (directionsL_nogap[cL][rL] != DIAG) {
2738 /* Favor gaps away from intron if possible */
2739 scoreL -= 100;
2740 }
2741 #endif
2742
2743 /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
2744 for (cR = cloR; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
2745 probR = right_probabilities[cR];
2746 scoreR = (int) matrixR[cR][rR];
2747 #if 0
2748 if (directionsR_nogap[cR][rR] != DIAG) {
2749 /* Favor gaps away from intron if possible */
2750 scoreR -= 100;
2751 }
2752 #endif
2753
2754 #ifdef USE_SCOREI
2755 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2756 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2757 #else
2758 scoreI = 0;
2759 #endif
2760
2761 if ((score = scoreL + scoreI + scoreR) > bestscore) {
2762 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2763 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2764 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2765 bestscore = score;
2766 *bestrL = rL;
2767 *bestrR = rR;
2768 *bestcL = cL;
2769 *bestcR = cR;
2770 bestprob_with_score = probL + probR;
2771 } else if (score == bestscore && probL + probR > bestprob_with_score) {
2772 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2773 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2774 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2775 *bestrL = rL;
2776 *bestrR = rR;
2777 *bestcL = cL;
2778 *bestcR = cR;
2779 bestprob_with_score = probL + probR;
2780 } else {
2781 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2782 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2783 }
2784 }
2785
2786 debug3(printf("C. Test indel on left (3)\n"));
2787 /* Test indel on left */
2788 cR = rR;
2789 probR = right_probabilities[cR];
2790 scoreR = (int) matrixR[cR][rR];
2791 #if 0
2792 if (directionsR_nogap[cR][rR] != DIAG) {
2793 /* Favor gaps away from intron if possible */
2794 scoreR -= 100;
2795 }
2796 #endif
2797
2798 /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
2799 for (cL = cloL; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
2800 probL = left_probabilities[cL];
2801 scoreL = (int) matrixL[cL][rL];
2802 #if 0
2803 if (directionsL_nogap[cL][rL] != DIAG) {
2804 /* Favor gaps away from intron if possible */
2805 scoreL -= 100;
2806 }
2807 #endif
2808
2809 #ifdef USE_SCOREI
2810 /* scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp); */
2811 scoreI = intron_score_array[leftdi[cL] & rightdi[cR]];
2812 #else
2813 scoreI = 0;
2814 #endif
2815
2816 if ((score = scoreL + scoreI + scoreR) > bestscore) {
2817 debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2818 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2819 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2820 bestscore = score;
2821 *bestrL = rL;
2822 *bestrR = rR;
2823 *bestcL = cL;
2824 *bestcR = cR;
2825 bestprob_with_score = probL + probR;
2826 } else if (score == bestscore && probL + probR > bestprob_with_score) {
2827 debug3(printf("Improved prob: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2828 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2829 debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
2830 *bestrL = rL;
2831 *bestrR = rR;
2832 *bestcL = cL;
2833 *bestcR = cR;
2834 bestprob_with_score = probL + probR;
2835 } else {
2836 debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
2837 cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
2838 }
2839 }
2840 }
2841
2842 if (bestprob_with_score > 2*PROB_CEILING) {
2843 /* Probability is good with best alignment, so take that */
2844 debug(printf("Best alignment based on score alone has good probability\n"));
2845 use_dinucl_p = false; /* was previously true (bug) */
2846 } else if (bestprob_with_dinucl == 0.0) {
2847 debug(printf("No dinucleotides found\n"));
2848 use_dinucl_p = false; /* was previously true (bug) */
2849 } else if (0 && left_probabilities[bestcL_with_dinucl] < PROB_CEILING && right_probabilities[bestcR_with_dinucl] < PROB_CEILING) {
2850 /* Dinucleotide-based solution is bad, so use alignment */
2851 debug(printf("Dinucleotide-based solution is bad on both sites\n"));
2852 use_dinucl_p = false;
2853 } else if (bestscore_with_dinucl < 0 || bestscore_with_dinucl < bestscore - 9) {
2854 debug(printf("Dinucleotide-based solution requires very bad alignment\n"));
2855 use_dinucl_p = false;
2856 } else {
2857 use_dinucl_p = true;
2858 }
2859
2860 debug(printf("Non-SIMD. bestscore %d (bestprob_with_score %f)\n",bestscore,bestprob_with_score));
2861 if (use_dinucl_p == true) {
2862 debug(printf("Non-SIMD. bestscore %d (bestprob_with_score %f) vs bestscore_with_dinucl %d (bestprob_with_dinucl %f and %f)\n",
2863 bestscore,bestprob_with_score,bestscore_with_dinucl,left_probabilities[bestcL_with_dinucl],right_probabilities[bestcR_with_dinucl]));
2864 /* Best alignment yields bad probability, and dinucleotide-based alignment yields good probability, so switch */
2865 debug(printf("Switch to dinucleotide-based solution\n"));
2866 *bestcL = bestcL_with_dinucl;
2867 *bestcR = bestcR_with_dinucl;
2868 *bestrL = bestrL_with_dinucl;
2869 *bestrR = bestrR_with_dinucl;
2870 bestscore = bestscore_with_dinucl;
2871 }
2872
2873
2874 FREEA(rightdi);
2875 FREEA(leftdi);
2876 FREEA(left_probabilities);
2877 FREEA(right_probabilities);
2878
2879 if (bestscore < 0) {
2880 return bestscore;
2881 } else if (halfp == true) {
2882 /* scoreI = intron_score(&introntype,leftdi[*bestcL],rightdi[*bestcR],cdna_direction,canonical_reward,finalp); */
2883 scoreI = intron_score_array[leftdi[*bestcL] & rightdi[*bestcR]];
2884 return (bestscore - scoreI/2);
2885 } else {
2886 return bestscore;
2887 }
2888 }
2889
2890
2891 static int
bridge_intron_gap(int * bestrL,int * bestrR,int * bestcL,int * bestcR,int * best_introntype,double * left_prob,double * right_prob,Score32_T ** matrixL,Score32_T ** matrixR,Direction32_T ** directionsL_nogap,Direction32_T ** directionsR_nogap,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int goffsetL,int rev_goffsetR,int rlength,int glengthL,int glengthR,int cdna_direction,bool watsonp,int extraband_paired,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,bool halfp,bool finalp,bool jump_late_p)2892 bridge_intron_gap (int *bestrL, int *bestrR, int *bestcL, int *bestcR,
2893 int *best_introntype, double *left_prob, double *right_prob,
2894 Score32_T **matrixL, Score32_T **matrixR,
2895 Direction32_T **directionsL_nogap, Direction32_T **directionsR_nogap,
2896 char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
2897 int goffsetL, int rev_goffsetR, int rlength, int glengthL, int glengthR,
2898 int cdna_direction, bool watsonp, int extraband_paired,
2899 int leftoffset, int rightoffset, Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
2900 bool halfp, bool finalp, bool jump_late_p) {
2901 int finalscore;
2902 int *left_known, *right_known;
2903 int ubandL, lbandL, ubandR, lbandR;
2904
2905
2906 if (glengthL+1 <= 0) {
2907 fprintf(stderr,"Problem with glengthL = %d\n",glengthL);
2908 abort();
2909 }
2910
2911 if (glengthR+1 <= 0) {
2912 fprintf(stderr,"Problem with glengthR = %d\n",glengthR);
2913 abort();
2914 }
2915
2916 #if 1
2917 /* Allows unlimited indel lengths */
2918 ubandL = glengthL - rlength + extraband_paired;
2919 lbandL = extraband_paired;
2920
2921 ubandR = glengthR - rlength + extraband_paired;
2922 lbandR = extraband_paired;
2923 #else
2924 /* Limit indels to 3 bp around splice sites. Doesn't work on PacBio reads. */
2925 ubandL = 3;
2926 lbandL = 3;
2927
2928 ubandR = 3;
2929 lbandR = 3;
2930 #endif
2931
2932 left_known = (int *) CALLOCA(glengthL+1,sizeof(int));
2933 right_known = (int *) CALLOCA(glengthR+1,sizeof(int));
2934 get_known_splicesites(left_known,right_known,glengthL,glengthR,
2935 /*leftoffset*/goffsetL,/*rightoffset*/rev_goffsetR,
2936 cdna_direction,watsonp,chrnum,chroffset,chrhigh);
2937
2938 if (novelsplicingp == false && splicing_iit != NULL && (donor_typeint < 0 || acceptor_typeint < 0)) {
2939 /* Constrain to given introns */
2940 finalscore = bridge_intron_gap_intron_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),&(*best_introntype),
2941 matrixL,matrixR,directionsL_nogap,directionsR_nogap,
2942 left_known,right_known,rlength,glengthL,glengthR,
2943 cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
2944 leftoffset,rightoffset,chrnum,chroffset,chrhigh,jump_late_p);
2945
2946 } else {
2947 finalscore = bridge_intron_gap_site_level(&(*bestrL),&(*bestrR),&(*bestcL),&(*bestcR),
2948 matrixL,matrixR,directionsL_nogap,directionsR_nogap,
2949 gsequenceL,gsequenceL_alt,rev_gsequenceR,rev_gsequenceR_alt,goffsetL,rev_goffsetR,
2950 left_known,right_known,rlength,glengthL,glengthR,
2951 cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
2952 leftoffset,rightoffset,chroffset,chrhigh,halfp,finalp);
2953 }
2954
2955
2956 #if 0
2957 /* Determine if result meets given constraints */
2958 if (*finalscore < 0) {
2959 result = false;
2960 } else if (novelsplicingp == true) {
2961 result = true;
2962 } else if (splicing_iit == NULL) {
2963 result = true;
2964 } else if (donor_typeint >= 0 && acceptor_typeint >= 0) {
2965 /* If novelsplicingp is false and using splicing at splice site level, require sites to be known */
2966 if (left_known[*bestcL] == 0 || right_known[*bestcR] == 0) {
2967 debug(printf("Novel splicing not allowed, so bridge_intron_gap returning false\n"));
2968 result = false;
2969 } else {
2970 result = true;
2971 }
2972 } else {
2973 /* If novelsplicingp is false and using splicing at splice site level, result was already constrained */
2974 result = true;
2975 }
2976 #endif
2977
2978 if (finalscore >= 0) {
2979 get_splicesite_probs(&(*left_prob),&(*right_prob),*bestcL,*bestcR,
2980 left_known,right_known,leftoffset,rightoffset,chroffset,chrhigh,
2981 cdna_direction,watsonp);
2982 }
2983
2984 FREEA(right_known);
2985 FREEA(left_known);
2986
2987 #if defined(DEBUG) || defined(DEBUG3)
2988 printf("Returning final score of %d at (%d,%d) left to (%d,%d) right, with probs %f and %f\n",
2989 finalscore,*bestrL,*bestcL,*bestrR,*bestcR,*left_prob,*right_prob);
2990 #endif
2991
2992 return finalscore;
2993 }
2994 #endif
2995
2996
2997 static List_T
genome_gap_simple(int * finalscore,int * best_introntype,int * new_leftgenomepos,int * new_rightgenomepos,double * left_prob,double * right_prob,int * exonhead,int * nmatches,int * nmismatches,char * rsequence,char * rsequenceuc,char * rev_rsequence,char * rev_rsequenceuc,int rlength,char * gsequenceL,char * gsequenceL_alt,char * rev_gsequenceR,char * rev_gsequenceR_alt,int roffset,int rev_roffset,int leftoffset,int rightoffset,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,Pairpool_T pairpool,int mismatchtype,int cdna_direction,bool watsonp,int genestrand,int dynprogindex,bool halfp)2998 genome_gap_simple (int *finalscore, int *best_introntype, int *new_leftgenomepos, int *new_rightgenomepos,
2999 double *left_prob, double *right_prob, int *exonhead, int *nmatches, int *nmismatches,
3000 char *rsequence, char *rsequenceuc, char *rev_rsequence, char *rev_rsequenceuc, int rlength,
3001 char *gsequenceL, char *gsequenceL_alt, char *rev_gsequenceR, char *rev_gsequenceR_alt,
3002 int roffset, int rev_roffset, int leftoffset, int rightoffset,
3003 Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh, Pairpool_T pairpool,
3004 int mismatchtype, int cdna_direction, bool watsonp, int genestrand, int dynprogindex, bool halfp) {
3005 List_T pairs = NULL;
3006 Pair_T gappair;
3007 bool result;
3008 int bestrL, bestrR, rL, rR, r;
3009 int querycoord, genomecoord;
3010 int na1, na2, na2_alt, c1, c1_uc, c2, c2_alt;
3011 char left1, left1_alt, left2, left2_alt, right2, right2_alt, right1, right1_alt;
3012 int bestscore = 0, bestscoreI = 0, scoreL, scoreI, scoreR, pairscore, score;
3013 int leftdi, rightdi;
3014 int *left_known, *right_known;
3015 int introntype;
3016 Pairdistance_T **pairdistance_array_type;
3017
3018 int *intron_score_array;
3019
3020 /* Assuming finalp is false */
3021 if (cdna_direction > 0) {
3022 intron_score_array = intron_score_array_sense_prelim;
3023 } else if (cdna_direction < 0) {
3024 intron_score_array = intron_score_array_antisense_prelim;
3025 } else {
3026 intron_score_array = intron_score_array_either_prelim;
3027 }
3028
3029
3030 debug(printf("Starting genome_gap_simple with cdna_direction %d and watsonp %d\n",cdna_direction,watsonp));
3031 pairdistance_array_type = pairdistance_array[mismatchtype];
3032
3033 left_known = (int *) CALLOCA(rlength+1,sizeof(int));
3034 right_known = (int *) CALLOCA(rlength+1,sizeof(int));
3035 get_known_splicesites(left_known,right_known,/*glengthL*/rlength,/*glengthR*/rlength,
3036 leftoffset,rightoffset,
3037 cdna_direction,watsonp,chrnum,chroffset,chrhigh);
3038
3039 scoreR = 0;
3040 for (rR = 1; rR < rlength; rR++) {
3041 na1 = rev_rsequenceuc[1-rR];
3042 na2 = rev_gsequenceR[1-rR];
3043 na2_alt = rev_gsequenceR_alt[1-rR];
3044 pairscore = pairdistance_array_type[na1][na2];
3045 if ((score = pairdistance_array_type[na1][na2_alt]) > pairscore) {
3046 pairscore = score;
3047 }
3048 scoreR += pairscore;
3049 }
3050
3051 scoreL = 0;
3052 for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
3053 na1 = rsequenceuc[rL-1];
3054 na2 = gsequenceL[rL-1];
3055 na2_alt = gsequenceL_alt[rL-1];
3056 pairscore = pairdistance_array_type[na1][na2];
3057 if ((score = pairdistance_array_type[na1][na2_alt]) > pairscore) {
3058 pairscore = score;
3059 }
3060 scoreL += pairscore;
3061
3062 /* Read dinucleotides */
3063 left1 = gsequenceL[rL];
3064 left1_alt = gsequenceL_alt[rL];
3065 left2 = gsequenceL[rL+1];
3066 left2_alt = gsequenceL_alt[rL+1];
3067
3068 if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
3069 leftdi = LEFT_GT;
3070 } else if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'C' || left2_alt == 'C')) {
3071 leftdi = LEFT_GC;
3072 } else if ((left1 == 'A' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
3073 leftdi = LEFT_AT;
3074 #ifndef PMAP
3075 } else if ((left1 == 'C' || left1_alt == 'C') && (left2 == 'T' || left2_alt == 'T')) {
3076 leftdi = LEFT_CT;
3077 #endif
3078 } else {
3079 leftdi = 0x00;
3080 }
3081
3082 right2 = rev_gsequenceR[-rR-1];
3083 right2_alt = rev_gsequenceR_alt[-rR-1];
3084 right1 = rev_gsequenceR[-rR];
3085 right1_alt = rev_gsequenceR_alt[-rR];
3086 if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
3087 rightdi = RIGHT_AG;
3088 } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'C' || right1_alt == 'C')) {
3089 rightdi = RIGHT_AC;
3090 #ifndef PMAP
3091 } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
3092 rightdi = RIGHT_GC;
3093 } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
3094 rightdi = RIGHT_AT;
3095 #endif
3096 } else {
3097 rightdi = 0x00;
3098 }
3099
3100 #if 0
3101 scoreI = intron_score(&introntype,leftdi,rightdi,cdna_direction,canonical_reward,/*finalp*/false);
3102 #endif
3103 introntype = leftdi & rightdi;
3104 scoreI = intron_score_array[introntype];
3105
3106 if ((introntype != NONINTRON || left_known[rL] > 0 || right_known[rR] > 0) &&
3107 (score = scoreL + left_known[rL] + scoreI + right_known[rR] + scoreR) >= bestscore) { /* Use >= for jump late */
3108 debug(printf("At %d left (%c%c) to %d right (%c%c), score is (%d)+(%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
3109 rL,left1,left2,rR,right2,right1,scoreL,left_known[rL],scoreI,right_known[rR],scoreR,
3110 scoreL+left_known[rL]+scoreI+right_known[rR]+scoreR));
3111 bestscore = score;
3112 bestscoreI = scoreI;
3113 bestrL = /* *bestcL = */ rL;
3114 bestrR = /* *bestcR = */ rR;
3115 *best_introntype = introntype;
3116 } else {
3117 debug(printf("At %d left (%c%c) to %d right (%c%c), score is (%d)+(%d)+(%d)+(%d)+(%d) = %d\n",
3118 rL,left1,left2,rR,right2,right1,scoreL,left_known[rL],scoreI,right_known[rR],scoreR,
3119 scoreL+left_known[rL]+scoreI+right_known[rR]+scoreR));
3120 }
3121
3122 /* Subtract pairscore from cumulative scoreR */
3123 na1 = rev_rsequenceuc[1-rR];
3124 na2 = rev_gsequenceR[1-rR];
3125 na2_alt = rev_gsequenceR_alt[1-rR];
3126 pairscore = pairdistance_array_type[na1][na2];
3127 if ((score = pairdistance_array_type[na1][na2_alt]) > pairscore) {
3128 pairscore = score;
3129 }
3130 scoreR -= pairscore;
3131 }
3132
3133 if (halfp == true) {
3134 *finalscore = (int) (bestscore - bestscoreI/2);
3135 } else {
3136 *finalscore = (int) bestscore;
3137 }
3138
3139 if (*finalscore <= 0) {
3140 result = false;
3141 } else if (novelsplicingp == true) {
3142 result = true;
3143 } else if (splicing_iit == NULL) {
3144 result = true;
3145 } else if (donor_typeint >= 0 && acceptor_typeint >= 0) {
3146 /* If novelsplicingp is false and using splicing at splice site level, require sites to be known */
3147 if (left_known[bestrL] == 0 || right_known[bestrR] == 0) {
3148 debug(printf("Novel splicing not allowed, so bridge_intron_gap returning false\n"));
3149 result = false;
3150 } else {
3151 result = true;
3152 }
3153 } else {
3154 /* If novelsplicingp is false and using splicing at splice site level, result was already constrained */
3155 result = true;
3156 }
3157
3158 if (result == true) {
3159 get_splicesite_probs(&(*left_prob),&(*right_prob),bestrL,bestrR,
3160 left_known,right_known,leftoffset,rightoffset,
3161 chroffset,chrhigh,cdna_direction,watsonp);
3162 debug(printf("Probabilities are %f and %f\n",*left_prob,*right_prob));
3163 if (*left_prob < 0.90 || *right_prob < 0.90) {
3164 result = false;
3165 }
3166 }
3167
3168 if (result == true) {
3169 *nmatches = *nmismatches = 0;
3170
3171 /* Push from left to right, so we don't need to do List_reverse() later */
3172 for (r = 1; r <= bestrL; r++) {
3173 querycoord = genomecoord = r-1;
3174
3175 c1 = rsequence[querycoord];
3176 c1_uc = rsequenceuc[querycoord];
3177 c2 = gsequenceL[genomecoord];
3178 c2_alt = gsequenceL_alt[genomecoord];
3179
3180 if (c2 == '*') {
3181 /* Don't push pairs past end of chromosome */
3182 debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u, chroffset %u, chrhigh %u, watsonp %d\n",
3183 leftoffset,genomecoord,chroffset,chrhigh,watsonp));
3184
3185 } else if (c1_uc == c2 || c1_uc == c2_alt) {
3186 debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - match\n",
3187 r,/*c*/r,roffset+querycoord,leftoffset+genomecoord,c1_uc,c2));
3188 *nmatches += 1;
3189 pairs = Pairpool_push(pairs,pairpool,roffset+querycoord,leftoffset+genomecoord,
3190 c1,DYNPROG_MATCH_COMP,c2,c2_alt,dynprogindex);
3191
3192 } else if (Dynprog_consistent_p(c1_uc,/*g*/c2,/*g_alt*/c2_alt,genestrand) == true) {
3193 debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - ambiguous\n",
3194 r,/*c*/r,roffset+querycoord,leftoffset+genomecoord,c1_uc,c2));
3195 *nmatches += 1;
3196 pairs = Pairpool_push(pairs,pairpool,roffset+querycoord,leftoffset+genomecoord,
3197 c1,AMBIGUOUS_COMP,c2,c2_alt,dynprogindex);
3198
3199 } else {
3200 debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - mismatch\n",
3201 r,/*c*/r,roffset+querycoord,leftoffset+genomecoord,c1_uc,c2));
3202 *nmismatches += 1;
3203 pairs = Pairpool_push(pairs,pairpool,roffset+querycoord,leftoffset+genomecoord,
3204 c1,MISMATCH_COMP,c2,c2_alt,dynprogindex);
3205 }
3206 }
3207
3208 debug(printf("Pushing a gap\n"));
3209 *new_leftgenomepos = leftoffset+(bestrL-1);
3210 *new_rightgenomepos = *exonhead = rightoffset-(bestrR-1);
3211 #ifndef NOGAPHOLDER
3212 pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/0,/*genomejump*/(*new_rightgenomepos)-(*new_leftgenomepos)-1,
3213 /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
3214 gappair = (Pair_T) pairs->first;
3215 gappair->introntype = introntype;
3216 gappair->donor_prob = *left_prob;
3217 gappair->acceptor_prob = *right_prob;
3218 #endif
3219
3220 for (r = bestrR; r > 0; r--) {
3221 querycoord = genomecoord = 1-r;
3222
3223 c1 = rev_rsequence[querycoord];
3224 c1_uc = rev_rsequenceuc[querycoord];
3225 c2 = rev_gsequenceR[genomecoord];
3226 c2_alt = rev_gsequenceR_alt[genomecoord];
3227
3228 if (c2 == '*') {
3229 /* Don't push pairs past end of chromosome */
3230 debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u, chroffset %u, chrhigh %u, watsonp %d\n",
3231 rightoffset,genomecoord,chroffset,chrhigh,watsonp));
3232
3233 } else if (c1_uc == c2 || c1_uc == c2_alt) {
3234 debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - match\n",
3235 r,/*c*/r,rev_roffset+querycoord,rightoffset+genomecoord,c1_uc,c2));
3236 *nmatches += 1;
3237 pairs = Pairpool_push(pairs,pairpool,rev_roffset+querycoord,rightoffset+genomecoord,
3238 c1,DYNPROG_MATCH_COMP,c2,c2_alt,dynprogindex);
3239
3240 } else if (Dynprog_consistent_p(c1_uc,/*g*/c2,/*g_alt*/c2_alt,genestrand) == true) {
3241 debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - ambiguous\n",
3242 r,/*c*/r,rev_roffset+querycoord,rightoffset+genomecoord,c1_uc,c2));
3243 *nmatches += 1;
3244 pairs = Pairpool_push(pairs,pairpool,rev_roffset+querycoord,rightoffset+genomecoord,
3245 c1,AMBIGUOUS_COMP,c2,c2_alt,dynprogindex);
3246
3247 } else {
3248 debug(printf("Pushing simple %d,%d [%d,%d] (%c,%c) - mismatch\n",
3249 r,/*c*/r,rev_roffset+querycoord,rightoffset+genomecoord,c1_uc,c2));
3250 *nmismatches += 1;
3251 pairs = Pairpool_push(pairs,pairpool,rev_roffset+querycoord,rightoffset+genomecoord,
3252 c1,MISMATCH_COMP,c2,c2_alt,dynprogindex);
3253 }
3254 }
3255
3256 }
3257
3258 FREEA(right_known);
3259 FREEA(left_known);
3260
3261 return pairs;
3262 }
3263
3264
3265
3266
3267 /* A genome gap is usually an intron. Sequence 2L and 2R represent
3268 the two genomic ends of the intron. */
3269 List_T
Dynprog_genome_gap(int * dynprogindex,int * finalscore,int * new_leftgenomepos,int * new_rightgenomepos,double * left_prob,double * right_prob,int * nmatches,int * nmismatches,int * nopens,int * nindels,int * exonhead,int * introntype,T dynprogL,T dynprogR,char * rsequence,char * rsequenceuc,int rlength,int glengthL,int glengthR,int roffset,int goffsetL,int rev_goffsetR,Chrnum_T chrnum,Univcoord_T chroffset,Univcoord_T chrhigh,int cdna_direction,bool watsonp,int genestrand,bool jump_late_p,Pairpool_T pairpool,int extraband_paired,double defect_rate,int maxpeelback,bool halfp,bool finalp)3270 Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos, int *new_rightgenomepos,
3271 double *left_prob, double *right_prob,
3272 int *nmatches, int *nmismatches, int *nopens, int *nindels, int *exonhead, int *introntype,
3273 T dynprogL, T dynprogR,
3274 char *rsequence, char *rsequenceuc, int rlength, int glengthL, int glengthR,
3275 int roffset, int goffsetL, int rev_goffsetR,
3276 Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
3277 int cdna_direction, bool watsonp, int genestrand,
3278 bool jump_late_p, Pairpool_T pairpool, int extraband_paired,
3279 double defect_rate, int maxpeelback, bool halfp, bool finalp) {
3280 List_T pairs = NULL;
3281 Pair_T gappair;
3282 char *gsequenceL, *gsequenceL_alt, *rev_gsequenceR, *rev_gsequenceR_alt;
3283 char *rev_rsequence, *rev_rsequenceuc;
3284 Mismatchtype_T mismatchtype;
3285 /* int canonical_reward; */
3286 int rev_roffset, bestrL = -1, bestrR, bestcL, bestcR;
3287 int lbandL, ubandL, lbandR, ubandR;
3288 int open, extend;
3289 #if defined(HAVE_SSE2)
3290 Score8_T **matrix8L_upper, **matrix8L_lower, **matrix8R_upper, **matrix8R_lower;
3291 Direction8_T **directions8L_upper_nogap, **directions8L_upper_Egap,
3292 **directions8L_lower_nogap, **directions8L_lower_Egap,
3293 **directions8R_upper_nogap, **directions8R_upper_Egap,
3294 **directions8R_lower_nogap, **directions8R_lower_Egap;
3295 bool use8p;
3296
3297 Score16_T **matrix16L_upper, **matrix16L_lower, **matrix16R_upper, **matrix16R_lower;
3298 Direction16_T **directions16L_upper_nogap, **directions16L_upper_Egap,
3299 **directions16L_lower_nogap, **directions16L_lower_Egap,
3300 **directions16R_upper_nogap, **directions16R_upper_Egap,
3301 **directions16R_lower_nogap, **directions16R_lower_Egap;
3302 #else
3303 Score32_T **matrixL, **matrixR;
3304 Direction32_T **directionsL_nogap, **directionsL_Egap, **directionsL_Fgap,
3305 **directionsR_nogap, **directionsR_Egap, **directionsR_Fgap;
3306 #endif
3307 /* int queryjump, genomejump; */
3308
3309 debug(printf("\n"));
3310 debug(printf("%c: ",*dynprogindex > 0 ? (*dynprogindex-1)%26+'a' : (-(*dynprogindex)-1)%26+'A'));
3311 debug(printf("Aligning genome gap with cdna_direction %d, defect_rate %f\n",cdna_direction,defect_rate));
3312 #ifdef EXTRACT_GENOMICSEG
3313 debug(printf("At genomic offset %d-%d, %.*s\n",goffsetL,goffsetL+glengthL-1,glengthL,gsequenceL));
3314 debug(printf("At genomic offset %d-%d, %.*s\n",rev_goffsetR-glengthR+1,rev_goffsetR,glengthR,&(rev_gsequenceR[-glengthR+1])));
3315 #endif
3316 debug(printf("\n"));
3317
3318 /* ?check if offsets are too close. But this eliminates a segment
3319 of the cDNA. Should check in stage 3, and do single gap instead. */
3320 /*
3321 if (goffsetL+glengthL-1 >= rev_goffsetR-glengthR+1) {
3322 debug(printf("Bounds don't make sense\n"));
3323 *finalscore = NEG_INFINITY_16;
3324 return NULL;
3325 }
3326 */
3327
3328 *nmatches = *nmismatches = *nopens = *nindels = 0;
3329 *left_prob = *right_prob = 0.0;
3330 *introntype = NONINTRON;
3331 if (rlength <= 1) {
3332 *finalscore = NEG_INFINITY_32;
3333 return (List_T) NULL;
3334 }
3335
3336 if (defect_rate < DEFECT_HIGHQ) {
3337 mismatchtype = HIGHQ;
3338 if (rlength > maxpeelback * 4) {
3339 debug(printf("rlength %d is greater than maxpeelback %d * 4, so using single gap penalties\n",
3340 rlength,maxpeelback));
3341 open = SINGLE_OPEN_HIGHQ;
3342 extend = SINGLE_EXTEND_HIGHQ;
3343 } else {
3344 open = PAIRED_OPEN_HIGHQ;
3345 extend = PAIRED_EXTEND_HIGHQ;
3346 }
3347 #if 0
3348 if (splicingp == false) {
3349 canonical_reward = 0;
3350 } else if (finalp == true) {
3351 canonical_reward = FINAL_CANONICAL_INTRON_HIGHQ;
3352 } else {
3353 canonical_reward = CANONICAL_INTRON_HIGHQ;
3354 }
3355 #endif
3356
3357 } else if (defect_rate < DEFECT_MEDQ) {
3358 mismatchtype = MEDQ;
3359 if (rlength > maxpeelback * 4) {
3360 debug(printf("rlength %d is greater than maxpeelback %d * 4, so using single gap penalties\n",
3361 rlength,maxpeelback));
3362 open = SINGLE_OPEN_MEDQ;
3363 extend = SINGLE_EXTEND_MEDQ;
3364 } else {
3365 open = PAIRED_OPEN_MEDQ;
3366 extend = PAIRED_EXTEND_MEDQ;
3367 }
3368 #if 0
3369 if (splicingp == false) {
3370 canonical_reward = 0;
3371 } else if (finalp == true) {
3372 canonical_reward = FINAL_CANONICAL_INTRON_MEDQ;
3373 } else {
3374 canonical_reward = CANONICAL_INTRON_MEDQ;
3375 }
3376 #endif
3377
3378 } else {
3379 mismatchtype = LOWQ;
3380 if (rlength > maxpeelback * 4) {
3381 debug(printf("rlength %d is greater than maxpeelback %d * 4, so using single gap penalties\n",
3382 rlength,maxpeelback));
3383 open = SINGLE_OPEN_LOWQ;
3384 extend = SINGLE_EXTEND_LOWQ;
3385 } else {
3386 open = PAIRED_OPEN_LOWQ;
3387 extend = PAIRED_EXTEND_LOWQ;
3388 }
3389 #if 0
3390 if (splicingp == false) {
3391 canonical_reward = 0;
3392 } else if (finalp == true) {
3393 canonical_reward = FINAL_CANONICAL_INTRON_LOWQ;
3394 } else {
3395 canonical_reward = CANONICAL_INTRON_LOWQ;
3396 }
3397 #endif
3398 }
3399
3400 if (rlength > dynprogL->max_rlength || glengthL > dynprogL->max_glength) {
3401 debug(printf("rlength %d or glengthL %d is too long. Returning NULL\n",rlength,glengthL));
3402 *new_leftgenomepos = goffsetL-1;
3403 *new_rightgenomepos = rev_goffsetR+1;
3404 *exonhead = rev_roffset = roffset+rlength-1;
3405 #ifndef NOGAPHOLDER
3406 /*
3407 queryjump = rev_roffset - roffset + 1;
3408 genomejump = rev_goffsetR - goffsetL + 1;
3409 pairs = Pairpool_push_gapholder(NULL,pairpool,queryjump,genomejump,false);
3410 */
3411 #endif
3412 *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3413 *finalscore = NEG_INFINITY_32;
3414 *introntype = NONINTRON;
3415 return (List_T) NULL;
3416 }
3417
3418 if (rlength > dynprogR->max_rlength || glengthR > dynprogR->max_glength) {
3419 debug(printf("rlength %d or glengthR %d is too long. Returning NULL\n",rlength,glengthR));
3420 *new_leftgenomepos = goffsetL-1;
3421 *new_rightgenomepos = rev_goffsetR+1;
3422 *exonhead = rev_roffset = roffset+rlength-1;
3423 #ifndef NOGAPHOLDER
3424 /*
3425 queryjump = rev_roffset - roffset + 1;
3426 genomejump = rev_goffsetR - goffsetL + 1;
3427 pairs = Pairpool_push_gapholder(NULL,pairpool,queryjump,genomejump,false);
3428 */
3429 #endif
3430 *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3431 *finalscore = NEG_INFINITY_32;
3432 *introntype = NONINTRON;
3433 return (List_T) NULL;
3434 }
3435
3436 rev_rsequence = &(rsequence[rlength-1]);
3437 rev_rsequenceuc = &(rsequenceuc[rlength-1]);
3438 debug(printf("At query offset %d-%d, %.*s\n",roffset,roffset+rlength-1,rlength,rsequence));
3439
3440 rev_roffset = roffset+rlength-1;
3441
3442 gsequenceL = (char *) MALLOCA((glengthL+1) * sizeof(char));
3443 gsequenceL_alt = (char *) MALLOCA((glengthL+1) * sizeof(char));
3444 rev_gsequenceR = (char *) MALLOCA((glengthR+1) * sizeof(char));
3445 rev_gsequenceR_alt = (char *) MALLOCA((glengthR+1) * sizeof(char));
3446
3447 if (watsonp) {
3448 Genome_get_segment_blocks_right(gsequenceL,gsequenceL_alt,/*left*/chroffset+goffsetL,
3449 glengthL,chrhigh,/*revcomp*/false);
3450 Genome_get_segment_blocks_left(rev_gsequenceR,rev_gsequenceR_alt,/*right*/chroffset+rev_goffsetR+1,
3451 glengthR,chroffset,/*revcomp*/false);
3452 } else {
3453 Genome_get_segment_blocks_left(gsequenceL,gsequenceL_alt,/*right*/chrhigh-goffsetL+1,
3454 glengthL,chroffset,/*revcomp*/true);
3455 Genome_get_segment_blocks_right(rev_gsequenceR,rev_gsequenceR_alt,/*left*/chrhigh-rev_goffsetR,
3456 glengthR,chrhigh,/*revcomp*/true);
3457 }
3458 if (gsequenceL[0] == '\0' || rev_gsequenceR[0] == '\0') {
3459 FREEA(rev_gsequenceR_alt);
3460 FREEA(rev_gsequenceR);
3461 FREEA(gsequenceL_alt);
3462 FREEA(gsequenceL);
3463 *finalscore = NEG_INFINITY_32;
3464 return (List_T) NULL;
3465 }
3466
3467
3468 debug(printf("At genomic offset %d-%d, %.*s\n",goffsetL,goffsetL+glengthL-1,glengthL,gsequenceL));
3469 debug(printf("At genomic offset %d-%d, %.*s\n",rev_goffsetR-glengthR+1,rev_goffsetR,glengthR,rev_gsequenceR));
3470
3471 /* In low-identity alignments, the simple procedure can
3472 lead to multiple mismatches, which will invalidate the intron
3473 because of its neighborhood */
3474 if (finalp == false && defect_rate < DEFECT_MEDQ &&
3475 (pairs = genome_gap_simple(&(*finalscore),&(*introntype),&(*new_leftgenomepos),&(*new_rightgenomepos),
3476 &(*left_prob),&(*right_prob),&(*exonhead),&(*nmatches),&(*nmismatches),
3477 rsequence,rsequenceuc,rev_rsequence,rev_rsequenceuc,
3478 rlength,gsequenceL,gsequenceL_alt,
3479 &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3480 roffset,rev_roffset,goffsetL,rev_goffsetR,
3481 chrnum,chroffset,chrhigh,pairpool,mismatchtype,
3482 cdna_direction,watsonp,genestrand,*dynprogindex,halfp)) != NULL) {
3483 debug(printf("simple procedure worked\n"));
3484
3485 FREEA(rev_gsequenceR_alt);
3486 FREEA(rev_gsequenceR);
3487 FREEA(gsequenceL_alt);
3488 FREEA(gsequenceL);
3489
3490 *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3491 return pairs; /* Already reversed */
3492 }
3493
3494
3495 #if defined(HAVE_SSE2)
3496 /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
3497 if (rlength < use8p_size[mismatchtype] || (glengthL < use8p_size[mismatchtype] && glengthR < use8p_size[mismatchtype])) {
3498 use8p = true;
3499 } else {
3500 use8p = false;
3501 }
3502 #endif
3503
3504 #if defined(HAVE_SSE2)
3505 if (use8p == true) {
3506 Dynprog_compute_bands(&lbandL,&ubandL,rlength,glengthL,extraband_paired,/*widebandp*/true);
3507 debug3(printf("Computing matrix8L_upper\n"));
3508 matrix8L_upper = Dynprog_simd_8_upper(&directions8L_upper_nogap,&directions8L_upper_Egap,dynprogL,
3509 rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
3510 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3511 goffsetL,chroffset,chrhigh,watsonp,
3512 #endif
3513 mismatchtype,open,extend,ubandL,jump_late_p,/*revp*/false);
3514
3515 debug3(printf("Computing matrix8L_lower\n"));
3516 matrix8L_lower = Dynprog_simd_8_lower(&directions8L_lower_nogap,&directions8L_lower_Egap,dynprogL,
3517 rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
3518 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3519 goffsetL,chroffset,chrhigh,watsonp,
3520 #endif
3521 mismatchtype,open,extend,lbandL,jump_late_p,/*revp*/false);
3522
3523
3524 Dynprog_compute_bands(&lbandR,&ubandR,rlength,glengthR,extraband_paired,/*widebandp*/true);
3525 debug3(printf("Computing matrix8R_upper\n"));
3526 matrix8R_upper = Dynprog_simd_8_upper(&directions8R_upper_nogap,&directions8R_upper_Egap,dynprogR,
3527 rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3528 rlength,glengthR,
3529 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3530 rev_goffsetR,chroffset,chrhigh,watsonp,
3531 #endif
3532 mismatchtype,open,extend,ubandR,/*for revp true*/!jump_late_p,/*revp*/true);
3533
3534 debug3(printf("Computing matrix8R_lower\n"));
3535 matrix8R_lower = Dynprog_simd_8_lower(&directions8R_lower_nogap,&directions8R_lower_Egap,dynprogR,
3536 rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3537 rlength,glengthR,
3538 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3539 rev_goffsetR,chroffset,chrhigh,watsonp,
3540 #endif
3541 mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
3542
3543 if ((*finalscore = bridge_intron_gap_8_ud(&bestrL,&bestrR,&bestcL,&bestcR,
3544 &(*introntype),&(*left_prob),&(*right_prob),
3545 matrix8L_upper,matrix8L_lower,matrix8R_upper,matrix8R_lower,
3546 directions8L_upper_nogap,directions8L_lower_nogap,
3547 directions8R_upper_nogap,directions8R_lower_nogap,
3548 gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3549 goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
3550 cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
3551 goffsetL,rev_goffsetR,
3552 chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
3553 FREEA(rev_gsequenceR_alt);
3554 FREEA(rev_gsequenceR);
3555 FREEA(gsequenceL_alt);
3556 FREEA(gsequenceL);
3557
3558 return (List_T) NULL;
3559
3560 } else {
3561 *new_leftgenomepos = goffsetL+(bestcL-1);
3562 *new_rightgenomepos = rev_goffsetR-(bestcR-1);
3563 debug(printf("New leftgenomepos = %d, New rightgenomepos = %d\n",*new_leftgenomepos,*new_rightgenomepos));
3564
3565 *exonhead = rev_roffset-(bestrR-1);
3566
3567 if (bestcR >= bestrR) {
3568 pairs = Dynprog_traceback_8_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3569 directions8R_upper_nogap,directions8R_upper_Egap,
3570 bestrR,bestcR,rev_rsequence,rev_rsequenceuc,
3571 &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3572 rev_roffset,rev_goffsetR,pairpool,/*revp*/true,
3573 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3574 } else {
3575 pairs = Dynprog_traceback_8_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3576 directions8R_lower_nogap,directions8R_lower_Egap,
3577 bestrR,bestcR,rev_rsequence,rev_rsequenceuc,
3578 &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3579 rev_roffset,rev_goffsetR,pairpool,genestrand,/*revp*/true,
3580 *dynprogindex);
3581 }
3582 pairs = List_reverse(pairs);
3583
3584 /* queryjump = (rev_roffset-bestrR) - (roffset+bestrL) + 1; */
3585 /* genomejump = (rev_goffsetR-bestcR) - (goffsetL+bestcL) + 1; */
3586 /* No need to revise queryjump or genomejump, because the above
3587 coordinates are internal to the gap. */
3588
3589 debug(printf("Pushing a gap with genomejump %d, introntype %s, prob %f and %f\n",
3590 (*new_rightgenomepos)-(*new_leftgenomepos)-1,
3591 Intron_type_string(*introntype),*left_prob,*right_prob));
3592
3593 #ifndef NOGAPHOLDER
3594 pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/(rev_roffset-bestrR) - (roffset+bestrL) + 1,
3595 /*genomejump*/(*new_rightgenomepos)-(*new_leftgenomepos)-1,
3596 /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
3597 gappair = (Pair_T) pairs->first;
3598 gappair->introntype = *introntype;
3599 gappair->donor_prob = *left_prob;
3600 gappair->acceptor_prob = *right_prob;
3601 #endif
3602
3603 if (bestcL >= bestrL) {
3604 pairs = Dynprog_traceback_8_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3605 directions8L_upper_nogap,directions8L_upper_Egap,
3606 bestrL,bestcL,rsequence,rsequenceuc,gsequenceL,gsequenceL_alt,
3607 roffset,goffsetL,pairpool,/*revp*/false,
3608 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3609 } else {
3610 pairs = Dynprog_traceback_8_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3611 directions8L_lower_nogap,directions8L_lower_Egap,
3612 bestrL,bestcL,rsequence,rsequenceuc,gsequenceL,gsequenceL_alt,
3613 roffset,goffsetL,pairpool,genestrand,/*revp*/false,
3614 *dynprogindex);
3615 }
3616
3617 if (List_length(pairs) == 1) {
3618 /* Only a gap inserted */
3619 pairs = (List_T) NULL;
3620 }
3621
3622 FREEA(rev_gsequenceR_alt);
3623 FREEA(rev_gsequenceR);
3624 FREEA(gsequenceL_alt);
3625 FREEA(gsequenceL);
3626
3627 debug(printf("End of dynprog genome gap\n"));
3628
3629 *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3630 debug3(Pair_dump_list(pairs,true));
3631 debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
3632
3633 if (Pair_maxnegscore(pairs) < -10) {
3634 *finalscore = -100; /* Otherwise calling procedure will act on finalscore */
3635 return (List_T) NULL;
3636 } else {
3637 return List_reverse(pairs);
3638 }
3639 }
3640
3641 } else {
3642 /* Use 16-mers */
3643 Dynprog_compute_bands(&lbandL,&ubandL,rlength,glengthL,extraband_paired,/*widebandp*/true);
3644 debug3(printf("Computing matrix16L_upper\n"));
3645 matrix16L_upper = Dynprog_simd_16_upper(&directions16L_upper_nogap,&directions16L_upper_Egap,dynprogL,
3646 rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
3647 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3648 goffsetL,chroffset,chrhigh,watsonp,
3649 #endif
3650 mismatchtype,open,extend,ubandL,jump_late_p,/*revp*/false);
3651
3652 debug3(printf("Computing matrix16L_lower\n"));
3653 matrix16L_lower = Dynprog_simd_16_lower(&directions16L_lower_nogap,&directions16L_lower_Egap,dynprogL,
3654 rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
3655 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3656 goffsetL,chroffset,chrhigh,watsonp,
3657 #endif
3658 mismatchtype,open,extend,lbandL,jump_late_p,/*revp*/false);
3659
3660 Dynprog_compute_bands(&lbandR,&ubandR,rlength,glengthR,extraband_paired,/*widebandp*/true);
3661 debug3(printf("Computing matrix16R_upper\n"));
3662 matrix16R_upper = Dynprog_simd_16_upper(&directions16R_upper_nogap,&directions16R_upper_Egap,dynprogR,
3663 rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3664 rlength,glengthR,
3665 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3666 rev_goffsetR,chroffset,chrhigh,watsonp,
3667 #endif
3668 mismatchtype,open,extend,ubandR,/*for revp true*/!jump_late_p,/*revp*/true);
3669
3670 debug3(printf("Computing matrix16R_lower\n"));
3671 matrix16R_lower = Dynprog_simd_16_lower(&directions16R_lower_nogap,&directions16R_lower_Egap,dynprogR,
3672 rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3673 rlength,glengthR,
3674 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
3675 rev_goffsetR,chroffset,chrhigh,watsonp,
3676 #endif
3677 mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
3678
3679 if ((*finalscore = bridge_intron_gap_16_ud(&bestrL,&bestrR,&bestcL,&bestcR,
3680 &(*introntype),&(*left_prob),&(*right_prob),
3681 matrix16L_upper,matrix16L_lower,matrix16R_upper,matrix16R_lower,
3682 directions16L_upper_nogap,directions16L_lower_nogap,
3683 directions16R_upper_nogap,directions16R_lower_nogap,
3684 gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3685 goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
3686 cdna_direction,watsonp,lbandL,ubandL,lbandR,ubandR,
3687 goffsetL,rev_goffsetR,
3688 chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
3689
3690 FREEA(rev_gsequenceR_alt);
3691 FREEA(rev_gsequenceR);
3692 FREEA(gsequenceL_alt);
3693 FREEA(gsequenceL);
3694
3695 return (List_T) NULL;
3696
3697 } else {
3698 *new_leftgenomepos = goffsetL+(bestcL-1);
3699 *new_rightgenomepos = rev_goffsetR-(bestcR-1);
3700 debug(printf("New leftgenomepos = %d, New rightgenomepos = %d\n",*new_leftgenomepos,*new_rightgenomepos));
3701
3702 *exonhead = rev_roffset-(bestrR-1);
3703
3704 if (bestcR >= bestrR) {
3705 pairs = Dynprog_traceback_16_upper(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3706 directions16R_upper_nogap,directions16R_upper_Egap,
3707 bestrR,bestcR,rev_rsequence,rev_rsequenceuc,
3708 &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3709 rev_roffset,rev_goffsetR,pairpool,/*revp*/true,
3710 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3711 } else {
3712 pairs = Dynprog_traceback_16_lower(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3713 directions16R_lower_nogap,directions16R_lower_Egap,
3714 bestrR,bestcR,rev_rsequence,rev_rsequenceuc,
3715 &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3716 rev_roffset,rev_goffsetR,pairpool,genestrand,/*revp*/true,
3717 *dynprogindex);
3718 }
3719 pairs = List_reverse(pairs);
3720
3721 /* queryjump = (rev_roffset-bestrR) - (roffset+bestrL) + 1; */
3722 /* genomejump = (rev_goffsetR-bestcR) - (goffsetL+bestcL) + 1; */
3723 /* No need to revise queryjump or genomejump, because the above
3724 coordinates are internal to the gap. */
3725
3726 debug(printf("Pushing a gap with genomejump %d, introntype %s, prob %f and %f\n",
3727 (*new_rightgenomepos)-(*new_leftgenomepos)-1,
3728 Intron_type_string(*introntype),*left_prob,*right_prob));
3729 #ifndef NOGAPHOLDER
3730 pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/(rev_roffset-bestrR) - (roffset+bestrL) + 1,
3731 /*genomejump*/(*new_rightgenomepos)-(*new_leftgenomepos)-1,
3732 /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
3733 gappair = (Pair_T) pairs->first;
3734 gappair->introntype = *introntype;
3735 gappair->donor_prob = *left_prob;
3736 gappair->acceptor_prob = *right_prob;
3737 #endif
3738
3739 if (bestcL >= bestrL) {
3740 pairs = Dynprog_traceback_16_upper(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3741 directions16L_upper_nogap,directions16L_upper_Egap,
3742 bestrL,bestcL,rsequence,rsequenceuc,
3743 gsequenceL,gsequenceL_alt,roffset,goffsetL,pairpool,/*revp*/false,
3744 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3745 } else {
3746 pairs = Dynprog_traceback_16_lower(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3747 directions16L_lower_nogap,directions16L_lower_Egap,
3748 bestrL,bestcL,rsequence,rsequenceuc,
3749 gsequenceL,gsequenceL_alt,roffset,goffsetL,pairpool,
3750 genestrand,/*revp*/false,*dynprogindex);
3751 }
3752
3753 if (List_length(pairs) == 1) {
3754 /* Only a gap inserted */
3755 pairs = (List_T) NULL;
3756 }
3757
3758 FREEA(rev_gsequenceR_alt);
3759 FREEA(rev_gsequenceR);
3760 FREEA(gsequenceL_alt);
3761 FREEA(gsequenceL);
3762
3763 debug(printf("End of dynprog genome gap\n"));
3764
3765 *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3766 debug3(Pair_dump_list(pairs,true));
3767 debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
3768 if (Pair_maxnegscore(pairs) < -10) {
3769 *finalscore = -100; /* Otherwise calling procedure will act on finalscore */
3770 return (List_T) NULL;
3771 } else {
3772 return List_reverse(pairs);
3773 }
3774 }
3775
3776 }
3777
3778 #else
3779 /* Non-SIMD methods */
3780 Dynprog_compute_bands(&lbandL,&ubandL,rlength,glengthL,extraband_paired,/*widebandp*/true);
3781 debug3(printf("Computing matrixL\n"));
3782 matrixL = Dynprog_standard(&directionsL_nogap,&directionsL_Egap,&directionsL_Fgap,dynprogL,
3783 rsequence,gsequenceL,gsequenceL_alt,rlength,glengthL,
3784 goffsetL,chroffset,chrhigh,watsonp,
3785 mismatchtype,open,extend,lbandL,ubandL,
3786 jump_late_p,/*revp*/false,/*saturation*/NEG_INFINITY_INT,
3787 /*upperp*/true,/*lowerp*/true);
3788
3789 Dynprog_compute_bands(&lbandR,&ubandR,rlength,glengthR,extraband_paired,/*widebandp*/true);
3790 debug3(printf("Computing matrixR\n"));
3791 matrixR = Dynprog_standard(&directionsR_nogap,&directionsR_Egap,&directionsR_Fgap,dynprogR,
3792 rev_rsequence,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3793 rlength,glengthR,rev_goffsetR,chroffset,chrhigh,watsonp,
3794 mismatchtype,open,extend,lbandL,ubandR,
3795 /*for revp true*/!jump_late_p,/*revp*/true,/*saturation*/NEG_INFINITY_INT,
3796 /*upperp*/true,/*lowerp*/true);
3797
3798 if ((*finalscore = bridge_intron_gap(&bestrL,&bestrR,&bestcL,&bestcR,
3799 &(*introntype),&(*left_prob),&(*right_prob),
3800 matrixL,matrixR,directionsL_nogap,directionsR_nogap,
3801 gsequenceL,gsequenceL_alt,&(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3802 goffsetL,rev_goffsetR,rlength,glengthL,glengthR,
3803 cdna_direction,watsonp,extraband_paired,goffsetL,rev_goffsetR,
3804 chrnum,chroffset,chrhigh,halfp,finalp,jump_late_p)) < 0) {
3805
3806 FREEA(gsequenceL_alt);
3807 FREEA(rev_gsequenceR_alt);
3808 FREEA(gsequenceL);
3809 FREEA(rev_gsequenceR);
3810
3811 return (List_T) NULL;
3812
3813 } else {
3814 *new_leftgenomepos = goffsetL+(bestcL-1);
3815 *new_rightgenomepos = rev_goffsetR-(bestcR-1);
3816 debug(printf("New leftgenomepos = %d, New rightgenomepos = %d\n",*new_leftgenomepos,*new_rightgenomepos));
3817
3818 *exonhead = rev_roffset-(bestrR-1);
3819
3820 pairs = Dynprog_traceback_std(NULL,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3821 directionsR_nogap,directionsR_Egap,directionsR_Fgap,bestrR,bestcR,
3822 rev_rsequence,rev_rsequenceuc,
3823 &(rev_gsequenceR[glengthR-1]),&(rev_gsequenceR_alt[glengthR-1]),
3824 rev_roffset,rev_goffsetR,pairpool,/*revp*/true,
3825 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3826 pairs = List_reverse(pairs);
3827
3828 /* queryjump = (rev_roffset-bestrR) - (roffset+bestrL) + 1; */
3829 /* genomejump = (rev_goffsetR-bestcR) - (goffsetL+bestcL) + 1; */
3830 /* No need to revise queryjump or genomejump, because the above
3831 coordinates are internal to the gap. */
3832
3833 debug(printf("Pushing a gap with genomejump %d, introntype %s, prob %f and %f\n",
3834 (*new_rightgenomepos)-(*new_leftgenomepos)-1,
3835 Intron_type_string(*introntype),*left_prob,*right_prob));
3836 #ifndef NOGAPHOLDER
3837 pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/(rev_roffset-bestrR) - (roffset+bestrL) + 1,
3838 /*genomejump*/(*new_rightgenomepos)-(*new_leftgenomepos)-1,
3839 /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
3840 gappair = (Pair_T) pairs->first;
3841 gappair->introntype = *introntype;
3842 gappair->donor_prob = *left_prob;
3843 gappair->acceptor_prob = *right_prob;
3844 #endif
3845
3846 pairs = Dynprog_traceback_std(pairs,&(*nmatches),&(*nmismatches),&(*nopens),&(*nindels),
3847 directionsL_nogap,directionsL_Egap,directionsL_Fgap,bestrL,bestcL,
3848 rsequence,rsequenceuc,
3849 gsequenceL,gsequenceL_alt,roffset,goffsetL,pairpool,/*revp*/false,
3850 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
3851
3852 if (List_length(pairs) == 1) {
3853 /* Only a gap inserted */
3854 pairs = (List_T) NULL;
3855 }
3856
3857 FREEA(gsequenceL_alt);
3858 FREEA(rev_gsequenceR_alt);
3859 FREEA(gsequenceL);
3860 FREEA(rev_gsequenceR);
3861
3862 debug(printf("End of dynprog genome gap\n"));
3863
3864 *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
3865 debug3(Pair_dump_list(pairs,true));
3866 debug3(printf("maxnegscore = %d\n",Pair_maxnegscore(pairs)));
3867 if (Pair_maxnegscore(pairs) < -10) {
3868 *finalscore = -100; /* Otherwise calling procedure will act on finalscore */
3869 return (List_T) NULL;
3870 } else {
3871 return List_reverse(pairs);
3872 }
3873 }
3874 #endif
3875
3876 }
3877
3878
3879