1 static char rcsid[] = "$Id: dynprog_cdna.c 214361 2018-03-21 01:24:28Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #include "dynprog_cdna.h"
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <math.h> /* For ceil, log, pow */
11 #include <ctype.h> /* For tolower */
12
13
14 #include "bool.h"
15 #include "except.h"
16 #include "assert.h"
17 #include "mem.h"
18 #include "comp.h"
19 #include "pair.h"
20 #include "pairdef.h"
21 #include "complement.h"
22 #include "dynprog_simd.h"
23
24 /* These values were set to -5, -4, -3, but this led to chopped ends
25 in GMAP alignments, and failure to find chimeras */
26 #define MISMATCH_HIGHQ -3
27 #define MISMATCH_MEDQ -2
28 #define MISMATCH_LOWQ -1
29
30 /* cDNA insertions are biologically not meaningful, so look for a good
31 gap opening somewhere */
32 #define CDNA_OPEN_HIGHQ -10
33 #define CDNA_OPEN_MEDQ -10
34 #define CDNA_OPEN_LOWQ -10
35
36 #define CDNA_EXTEND_HIGHQ -7
37 #define CDNA_EXTEND_MEDQ -7
38 #define CDNA_EXTEND_LOWQ -7
39
40 #define INSERT_PAIRS 9
41
42
43 #ifdef DEBUG
44 #define debug(x) x
45 #else
46 #define debug(x)
47 #endif
48
49 #ifdef DEBUG3
50 #define debug3(x) x
51 #else
52 #define debug3(x)
53 #endif
54
55 #ifdef DEBUG8
56 #define debug8(x) x
57 #else
58 #define debug8(x)
59 #endif
60
61 #define T Dynprog_T
62
63
64 /************************************************************************
65 * get_genomic_nt
66 ************************************************************************/
67
68 static char complCode[128] = COMPLEMENT_LC;
69
70 static char
get_genomic_nt(char * g_alt,int genomicpos,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp)71 get_genomic_nt (char *g_alt, int genomicpos, Univcoord_T chroffset, Univcoord_T chrhigh,
72 bool watsonp) {
73 char c2, c2_alt;
74 Univcoord_T pos;
75
76 #if 0
77 /* If the read has a deletion, then we will extend beyond 0 or genomiclength, so do not restrict. */
78 if (genomicpos < 0) {
79 return '*';
80
81 } else if (genomicpos >= genomiclength) {
82 return '*';
83
84 }
85 #endif
86
87 if (watsonp) {
88 if ((pos = chroffset + genomicpos) < chroffset) { /* Must be <, and not <=, or dynamic programming will fail */
89 *g_alt = '*';
90 return '*';
91
92 } else if (pos >= chrhigh) {
93 *g_alt = '*';
94 return '*';
95
96 #if 0
97 } else if (genome) {
98 /* Not necessary, because Genome_get_char_blocks should work */
99 debug8(printf("At %u, genomicnt is %c\n",
100 genomicpos,Genome_get_char(genome,pos)));
101 return Genome_get_char(genome,pos);
102 #endif
103
104 } else {
105 /* GMAP with user-supplied genomic segment */
106 debug8(printf("At %u, genomicnt is %c\n",
107 genomicpos,Genome_get_char_blocks(&(g_alt),pos)));
108 return Genome_get_char_blocks(&(*g_alt),pos);
109 }
110
111 } else {
112 if ((pos = chrhigh - genomicpos) < chroffset) { /* Must be <, and not <=, or dynamic programming will fail */
113 *g_alt = '*';
114 return '*';
115
116 } else if (pos >= chrhigh) {
117 *g_alt = '*';
118 return '*';
119
120 #if 0
121 } else if (genome) {
122 /* Not necessary, because Genome_get_char_blocks should work */
123 c2 = Genome_get_char(genome,pos);
124 #endif
125
126 } else {
127 /* GMAP with user-supplied genomic segment */
128 c2 = Genome_get_char_blocks(&c2_alt,pos);
129 }
130 debug8(printf("At %u, genomicnt is %c\n",genomicpos,complCode[(int) c2]));
131 *g_alt = complCode[(int) c2_alt];
132 return complCode[(int) c2];
133 }
134 }
135
136
137 #if defined(HAVE_SSE2)
138 /* Columns are always genomic. Rows are always query. Bridging across common columns */
139 static void
bridge_cdna_gap_8_ud(int * finalscore,int * bestcL,int * bestcR,int * bestrL,int * bestrR,Score8_T ** matrixL_upper,Score8_T ** matrixL_lower,Score8_T ** matrixR_upper,Score8_T ** matrixR_lower,int glength,int rlengthL,int rlengthR,int lbandL,int ubandL,int lbandR,int ubandR,int open,int extend,int leftoffset,int rightoffset,bool jump_late_p)140 bridge_cdna_gap_8_ud (int *finalscore, int *bestcL, int *bestcR, int *bestrL, int *bestrR,
141 Score8_T **matrixL_upper, Score8_T **matrixL_lower,
142 Score8_T **matrixR_upper, Score8_T **matrixR_lower,
143 int glength, int rlengthL, int rlengthR,
144 int lbandL, int ubandL, int lbandR, int ubandR,
145 int open, int extend, int leftoffset, int rightoffset, bool jump_late_p) {
146 int bestscore = NEG_INFINITY_8, score, scoreL, scoreR, pen, end_reward = 0;
147 int rL, rR, cL, cR;
148 int rloL, rhighL;
149 int rloR, rhighR;
150
151 #if 0
152 /* Perform computations */
153 lbandL = rlengthL - glength + extraband_paired;
154 ubandL = extraband_paired;
155
156 lbandR = rlengthR - glength + extraband_paired;
157 ubandR = extraband_paired;
158 #endif
159
160 /* Need a double loop on rows here, in contrast with a single loop
161 for introns, because we allow a genomic insertion that doesn't
162 match the cDNA. So we need to add a penalty for a genomic
163 insertion */
164
165 if (jump_late_p) {
166 for (cL = 1; cL < glength; cL++) {
167
168 /* Note: opening penalty is added at the bottom of the loop */
169 for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
170 /* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
171 if ((rloL = cL - ubandL) < 1) {
172 rloL = 1;
173 }
174 if ((rhighL = cL + lbandL) > rlengthL-1) {
175 rhighL = rlengthL-1;
176 }
177
178 if ((rloR = cR - ubandR) < 1) {
179 rloR = 1;
180 }
181 if ((rhighR = cR + lbandR) > rlengthR-1) {
182 rhighR = rlengthR-1;
183 }
184
185 for (rL = rloL; rL < /*to main diagonal*/cL; rL++) {
186 scoreL = (int) matrixL_upper[cL][rL];
187
188 /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
189 debug3(printf(" Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
190 for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
191 scoreR = (int) matrixR_upper[cR][rR];
192
193 if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) { /* Use >= for jump late */
194 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
195 rL,rR,scoreL,scoreR,pen,end_reward,score));
196
197 bestscore = score;
198 *bestcL = cL;
199 *bestcR = cR;
200 *bestrL = rL;
201 *bestrR = rR;
202
203 } else {
204 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
205 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
206 }
207 }
208
209 for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
210 scoreR = (int) matrixR_lower[rR][cR];
211
212 if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) { /* Use >= for jump late */
213 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
214 rL,rR,scoreL,scoreR,pen,end_reward,score));
215
216 bestscore = score;
217 *bestcL = cL;
218 *bestcR = cR;
219 *bestrL = rL;
220 *bestrR = rR;
221
222 } else {
223 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
224 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
225 }
226 }
227 }
228
229 for (/*at main diagonal*/; rL <= rhighL; rL++) {
230 scoreL = (int) matrixL_lower[rL][cL];
231
232 /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
233 debug3(printf(" Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
234 for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
235 scoreR = (int) matrixR_upper[cR][rR];
236
237 if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) { /* Use >= for jump late */
238 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
239 rL,rR,scoreL,scoreR,pen,end_reward,score));
240
241 bestscore = score;
242 *bestcL = cL;
243 *bestcR = cR;
244 *bestrL = rL;
245 *bestrR = rR;
246
247 } else {
248 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
249 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
250 }
251 }
252
253 for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
254 scoreR = (int) matrixR_lower[rR][cR];
255
256 if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) { /* Use >= for jump late */
257 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
258 rL,rR,scoreL,scoreR,pen,end_reward,score));
259
260 bestscore = score;
261 *bestcL = cL;
262 *bestcR = cR;
263 *bestrL = rL;
264 *bestrR = rR;
265
266 } else {
267 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
268 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
269 }
270 }
271 }
272
273 pen = open - extend; /* Subtract extend to compensate for
274 its addition in the for loop */
275 }
276 }
277
278 } else {
279 /* Do not jump late */
280 for (cL = 1; cL < glength; cL++) {
281
282 /* Note: opening penalty is added at the bottom of the loop */
283 for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
284 /* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
285 if ((rloL = cL - ubandL) < 1) {
286 rloL = 1;
287 }
288 if ((rhighL = cL + lbandL) > rlengthL-1) {
289 rhighL = rlengthL-1;
290 }
291
292 if ((rloR = cR - ubandR) < 1) {
293 rloR = 1;
294 }
295 if ((rhighR = cR + lbandR) > rlengthR-1) {
296 rhighR = rlengthR-1;
297 }
298
299 for (rL = rloL; rL < /*to main diagonal*/cL; rL++) {
300 scoreL = (int) matrixL_upper[cL][rL];
301
302 /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
303 debug3(printf(" Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
304 for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
305 scoreR = (int) matrixR_upper[cR][rR];
306
307 if ((score = scoreL + scoreR + pen + end_reward) > bestscore) { /* Use > for jump early */
308 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
309 rL,rR,scoreL,scoreR,pen,end_reward,score));
310
311 bestscore = score;
312 *bestcL = cL;
313 *bestcR = cR;
314 *bestrL = rL;
315 *bestrR = rR;
316
317 } else {
318 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
319 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
320 }
321 }
322
323 for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
324 scoreR = (int) matrixR_lower[rR][cR];
325
326 if ((score = scoreL + scoreR + pen + end_reward) > bestscore) { /* Use > for jump early */
327 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
328 rL,rR,scoreL,scoreR,pen,end_reward,score));
329
330 bestscore = score;
331 *bestcL = cL;
332 *bestcR = cR;
333 *bestrL = rL;
334 *bestrR = rR;
335
336 } else {
337 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
338 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
339 }
340 }
341 }
342
343 for (/*at main diagonal*/; rL <= rhighL; rL++) {
344 scoreL = (int) matrixL_lower[rL][cL];
345
346 /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
347 debug3(printf(" Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
348 for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
349 scoreR = (int) matrixR_upper[cR][rR];
350
351 if ((score = scoreL + scoreR + pen + end_reward) > bestscore) { /* Use > for jump early */
352 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
353 rL,rR,scoreL,scoreR,pen,end_reward,score));
354
355 bestscore = score;
356 *bestcL = cL;
357 *bestcR = cR;
358 *bestrL = rL;
359 *bestrR = rR;
360
361 } else {
362 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
363 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
364 }
365 }
366
367 for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
368 scoreR = (int) matrixR_lower[rR][cR];
369
370 if ((score = scoreL + scoreR + pen + end_reward) > bestscore) { /* Use > for jump early */
371 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
372 rL,rR,scoreL,scoreR,pen,end_reward,score));
373
374 bestscore = score;
375 *bestcL = cL;
376 *bestcR = cR;
377 *bestrL = rL;
378 *bestrR = rR;
379
380 } else {
381 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
382 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
383 }
384 }
385 }
386
387 pen = open - extend; /* Subtract extend to compensate for
388 its addition in the for loop */
389 }
390 }
391 }
392
393 *finalscore = (int) bestscore;
394 debug3(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right\n",
395 *finalscore,*bestcL,*bestrL,*bestcR,*bestrR));
396
397 return;
398 }
399 #endif
400
401 #if defined(HAVE_SSE2)
402 /* Columns are always genomic. Rows are always query. Bridging across common columns */
403 static void
bridge_cdna_gap_16_ud(int * finalscore,int * bestcL,int * bestcR,int * bestrL,int * bestrR,Score16_T ** matrixL_upper,Score16_T ** matrixL_lower,Score16_T ** matrixR_upper,Score16_T ** matrixR_lower,int glength,int rlengthL,int rlengthR,int lbandL,int ubandL,int lbandR,int ubandR,int open,int extend,int leftoffset,int rightoffset,bool jump_late_p)404 bridge_cdna_gap_16_ud (int *finalscore, int *bestcL, int *bestcR, int *bestrL, int *bestrR,
405 Score16_T **matrixL_upper, Score16_T **matrixL_lower,
406 Score16_T **matrixR_upper, Score16_T **matrixR_lower,
407 int glength, int rlengthL, int rlengthR,
408 int lbandL, int ubandL, int lbandR, int ubandR,
409 int open, int extend, int leftoffset, int rightoffset, bool jump_late_p) {
410 int bestscore = NEG_INFINITY_16, score, scoreL, scoreR, pen, end_reward = 0;
411 int rL, rR, cL, cR;
412 int rloL, rhighL;
413 int rloR, rhighR;
414
415 #if 0
416 /* Perform computations */
417 lbandL = rlengthL - glength + extraband_paired;
418 ubandL = extraband_paired;
419
420 lbandR = rlengthR - glength + extraband_paired;
421 ubandR = extraband_paired;
422 #endif
423
424 /* Need a double loop on rows here, in contrast with a single loop
425 for introns, because we allow a genomic insertion that doesn't
426 match the cDNA. So we need to add a penalty for a genomic
427 insertion */
428
429 if (jump_late_p) {
430 for (cL = 1; cL < glength; cL++) {
431
432 /* Note: opening penalty is added at the bottom of the loop */
433 for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
434 /* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
435 if ((rloL = cL - ubandL) < 1) {
436 rloL = 1;
437 }
438 if ((rhighL = cL + lbandL) > rlengthL-1) {
439 rhighL = rlengthL-1;
440 }
441
442 if ((rloR = cR - ubandR) < 1) {
443 rloR = 1;
444 }
445 if ((rhighR = cR + lbandR) > rlengthR-1) {
446 rhighR = rlengthR-1;
447 }
448
449 for (rL = rloL; rL < /*to main diagonal*/cL; rL++) {
450 scoreL = (int) matrixL_upper[cL][rL];
451
452 /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
453 debug3(printf(" Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
454 for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
455 scoreR = (int) matrixR_upper[cR][rR];
456
457 if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) { /* Use >= for jump late */
458 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
459 rL,rR,scoreL,scoreR,pen,end_reward,score));
460
461 bestscore = score;
462 *bestcL = cL;
463 *bestcR = cR;
464 *bestrL = rL;
465 *bestrR = rR;
466
467 } else {
468 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
469 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
470 }
471 }
472
473 for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
474 scoreR = (int) matrixR_lower[rR][cR];
475
476 if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) { /* Use >= for jump late */
477 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
478 rL,rR,scoreL,scoreR,pen,end_reward,score));
479
480 bestscore = score;
481 *bestcL = cL;
482 *bestcR = cR;
483 *bestrL = rL;
484 *bestrR = rR;
485
486 } else {
487 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
488 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
489 }
490 }
491 }
492
493 for (/*at main diagonal*/; rL <= rhighL; rL++) {
494 scoreL = (int) matrixL_lower[rL][cL];
495
496 /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
497 debug3(printf(" Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
498 for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
499 scoreR = (int) matrixR_upper[cR][rR];
500
501 if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) { /* Use >= for jump late */
502 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
503 rL,rR,scoreL,scoreR,pen,end_reward,score));
504
505 bestscore = score;
506 *bestcL = cL;
507 *bestcR = cR;
508 *bestrL = rL;
509 *bestrR = rR;
510
511 } else {
512 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
513 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
514 }
515 }
516
517 for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
518 scoreR = (int) matrixR_lower[rR][cR];
519
520 if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) { /* Use >= for jump late */
521 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
522 rL,rR,scoreL,scoreR,pen,end_reward,score));
523
524 bestscore = score;
525 *bestcL = cL;
526 *bestcR = cR;
527 *bestrL = rL;
528 *bestrR = rR;
529
530 } else {
531 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
532 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
533 }
534 }
535 }
536
537 pen = open - extend; /* Subtract extend to compensate for
538 its addition in the for loop */
539 }
540 }
541
542 } else {
543 /* Do not jump late */
544 for (cL = 1; cL < glength; cL++) {
545
546 /* Note: opening penalty is added at the bottom of the loop */
547 for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
548 /* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
549 if ((rloL = cL - ubandL) < 1) {
550 rloL = 1;
551 }
552 if ((rhighL = cL + lbandL) > rlengthL-1) {
553 rhighL = rlengthL-1;
554 }
555
556 if ((rloR = cR - ubandR) < 1) {
557 rloR = 1;
558 }
559 if ((rhighR = cR + lbandR) > rlengthR-1) {
560 rhighR = rlengthR-1;
561 }
562
563 for (rL = rloL; rL < /*to main diagonal*/cL; rL++) {
564 scoreL = (int) matrixL_upper[cL][rL];
565
566 /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
567 debug3(printf(" Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
568 for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
569 scoreR = (int) matrixR_upper[cR][rR];
570
571 if ((score = scoreL + scoreR + pen + end_reward) > bestscore) { /* Use > for jump early */
572 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
573 rL,rR,scoreL,scoreR,pen,end_reward,score));
574
575 bestscore = score;
576 *bestcL = cL;
577 *bestcR = cR;
578 *bestrL = rL;
579 *bestrR = rR;
580
581 } else {
582 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
583 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
584 }
585 }
586
587 for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
588 scoreR = (int) matrixR_lower[rR][cR];
589
590 if ((score = scoreL + scoreR + pen + end_reward) > bestscore) { /* Use > for jump early */
591 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
592 rL,rR,scoreL,scoreR,pen,end_reward,score));
593
594 bestscore = score;
595 *bestcL = cL;
596 *bestcR = cR;
597 *bestrL = rL;
598 *bestrR = rR;
599
600 } else {
601 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
602 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
603 }
604 }
605 }
606
607 for (/*at main diagonal*/; rL <= rhighL; rL++) {
608 scoreL = (int) matrixL_lower[rL][cL];
609
610 /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
611 debug3(printf(" Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
612 for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
613 scoreR = (int) matrixR_upper[cR][rR];
614
615 if ((score = scoreL + scoreR + pen + end_reward) > bestscore) { /* Use > for jump early */
616 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
617 rL,rR,scoreL,scoreR,pen,end_reward,score));
618
619 bestscore = score;
620 *bestcL = cL;
621 *bestcR = cR;
622 *bestrL = rL;
623 *bestrR = rR;
624
625 } else {
626 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
627 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
628 }
629 }
630
631 for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
632 scoreR = (int) matrixR_lower[rR][cR];
633
634 if ((score = scoreL + scoreR + pen + end_reward) > bestscore) { /* Use > for jump early */
635 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
636 rL,rR,scoreL,scoreR,pen,end_reward,score));
637
638 bestscore = score;
639 *bestcL = cL;
640 *bestcR = cR;
641 *bestrL = rL;
642 *bestrR = rR;
643
644 } else {
645 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
646 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
647 }
648 }
649 }
650
651 pen = open - extend; /* Subtract extend to compensate for
652 its addition in the for loop */
653 }
654 }
655 }
656
657 *finalscore = (int) bestscore;
658 debug3(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right\n",
659 *finalscore,*bestcL,*bestrL,*bestcR,*bestrR));
660
661 return;
662 }
663 #endif
664
665
666
667 #ifndef HAVE_SSE2
668 /* Columns are always genomic. Rows are always query. Bridging across common columns */
669 static void
bridge_cdna_gap(int * finalscore,int * bestcL,int * bestcR,int * bestrL,int * bestrR,Score16_T ** matrixL,Score16_T ** matrixR,int glength,int rlengthL,int rlengthR,int extraband_paired,int open,int extend,int leftoffset,int rightoffset,bool jump_late_p)670 bridge_cdna_gap (int *finalscore, int *bestcL, int *bestcR, int *bestrL, int *bestrR,
671 #ifdef HAVE_SSE2
672 Score16_T **matrixL, Score16_T **matrixR,
673 #else
674 Score32_T **matrixL, Score32_T **matrixR,
675 #endif
676 int glength, int rlengthL, int rlengthR, int extraband_paired,
677 int open, int extend, int leftoffset, int rightoffset, bool jump_late_p) {
678 int bestscore = NEG_INFINITY_32, score, scoreL, scoreR, pen, end_reward = 0;
679 int rL, rR, cL, cR;
680 int lbandL, ubandL, rloL, rhighL;
681 int lbandR, ubandR, rloR, rhighR;
682
683 /* Perform computations */
684 lbandL = rlengthL - glength + extraband_paired;
685 ubandL = extraband_paired;
686
687 lbandR = rlengthR - glength + extraband_paired;
688 ubandR = extraband_paired;
689
690 /* Need a double loop on rows here, in contrast with a single loop
691 for introns, because we allow a genomic insertion that doesn't
692 match the cDNA. So we need to add a penalty for a genomic
693 insertion */
694
695 if (jump_late_p) {
696 for (cL = 1; cL < glength; cL++) {
697
698 /* Note: opening penalty is added at the bottom of the loop */
699 for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
700 /* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
701 if ((rloL = cL - ubandL) < 1) {
702 rloL = 1;
703 }
704 if ((rhighL = cL + lbandL) > rlengthL-1) {
705 rhighL = rlengthL-1;
706 }
707
708 if ((rloR = cR - ubandR) < 1) {
709 rloR = 1;
710 }
711 if ((rhighR = cR + lbandR) > rlengthR-1) {
712 rhighR = rlengthR-1;
713 }
714
715 for (rL = rloL; rL <= rhighL; rL++) {
716 scoreL = (int) matrixL[cL][rL];
717
718 /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
719 debug3(printf(" Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
720 for (rR = rloR; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
721 scoreR = (int) matrixR[cR][rR];
722
723 if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) { /* Use >= for jump late */
724 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
725 rL,rR,scoreL,scoreR,pen,end_reward,score));
726
727 bestscore = score;
728 *bestcL = cL;
729 *bestcR = cR;
730 *bestrL = rL;
731 *bestrR = rR;
732
733 } else {
734 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
735 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
736 }
737 }
738 }
739 pen = open - extend; /* Subtract extend to compensate for
740 its addition in the for loop */
741 }
742 }
743
744 } else {
745 /* Do not jump late */
746 for (cL = 1; cL < glength; cL++) {
747
748 /* Note: opening penalty is added at the bottom of the loop */
749 for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
750 /* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
751 if ((rloL = cL - ubandL) < 1) {
752 rloL = 1;
753 }
754 if ((rhighL = cL + lbandL) > rlengthL-1) {
755 rhighL = rlengthL-1;
756 }
757
758 if ((rloR = cR - ubandR) < 1) {
759 rloR = 1;
760 }
761 if ((rhighR = cR + lbandR) > rlengthR-1) {
762 rhighR = rlengthR-1;
763 }
764
765 for (rL = rloL; rL <= rhighL; rL++) {
766 scoreL = (int) matrixL[cL][rL];
767
768 /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
769 debug3(printf(" Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
770 for (rR = rloR; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
771 scoreR = (int) matrixR[cR][rR];
772
773 if ((score = scoreL + scoreR + pen + end_reward) > bestscore) { /* Use > for jump early */
774 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
775 rL,rR,scoreL,scoreR,pen,end_reward,score));
776
777 bestscore = score;
778 *bestcL = cL;
779 *bestcR = cR;
780 *bestrL = rL;
781 *bestrR = rR;
782
783 } else {
784 debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
785 rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
786 }
787 }
788 }
789 pen = open - extend; /* Subtract extend to compensate for
790 its addition in the for loop */
791 }
792 }
793 }
794
795 *finalscore = (int) bestscore;
796 debug3(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right\n",
797 *finalscore,*bestcL,*bestrL,*bestcR,*bestrR));
798
799 return;
800 }
801 #endif
802
803
804 /* Sequences rsequenceL and rsequenceR represent the two ends of the cDNA insertion */
805 List_T
Dynprog_cdna_gap(int * dynprogindex,int * finalscore,bool * incompletep,T dynprogL,T dynprogR,char * rsequenceL,char * rsequence_ucL,char * rev_rsequenceR,char * rev_rsequence_ucR,int rlengthL,int rlengthR,int glength,int roffsetL,int rev_roffsetR,int goffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp,int genestrand,bool jump_late_p,Pairpool_T pairpool,int extraband_paired,double defect_rate)806 Dynprog_cdna_gap (int *dynprogindex, int *finalscore, bool *incompletep,
807 T dynprogL, T dynprogR, char *rsequenceL, char *rsequence_ucL,
808 char *rev_rsequenceR, char *rev_rsequence_ucR,
809 #if 0
810 char *gsequence, char *gsequence_uc,
811 #endif
812 int rlengthL, int rlengthR, int glength,
813 int roffsetL, int rev_roffsetR, int goffset,
814 Univcoord_T chroffset, Univcoord_T chrhigh,
815 bool watsonp, int genestrand, bool jump_late_p, Pairpool_T pairpool,
816 int extraband_paired, double defect_rate) {
817 List_T pairs = NULL;
818 char *gsequence, *gsequence_alt, *rev_gsequence, *rev_gsequence_alt;
819 Mismatchtype_T mismatchtype;
820 int lbandL, ubandL, lbandR, ubandR;
821 int open, extend;
822 #if defined(HAVE_SSE2)
823 Score8_T **matrix8L_upper, **matrix8L_lower, **matrix8R_upper, **matrix8R_lower;
824 Direction8_T **directions8L_upper_nogap, **directions8L_upper_Egap,
825 **directions8L_lower_nogap, **directions8L_lower_Egap,
826 **directions8R_upper_nogap, **directions8R_upper_Egap,
827 **directions8R_lower_nogap, **directions8R_lower_Egap;
828 bool use8p;
829
830 Score16_T **matrix16L_upper, **matrix16L_lower, **matrix16R_upper, **matrix16R_lower;
831 Direction16_T **directions16L_upper_nogap, **directions16L_upper_Egap,
832 **directions16L_lower_nogap, **directions16L_lower_Egap,
833 **directions16R_upper_nogap, **directions16R_upper_Egap,
834 **directions16R_lower_nogap, **directions16R_lower_Egap;
835 #else
836 Score32_T **matrixL, **matrixR;
837 Direction32_T **directionsL_nogap, **directionsL_Egap, **directionsL_Fgap,
838 **directionsR_nogap, **directionsR_Egap, **directionsR_Fgap;
839 #endif
840 int rev_goffset, bestrL, bestrR, bestcL, bestcR, k;
841 int nmatches, nmismatches, nopens, nindels;
842 int queryjump, genomejump;
843 char c2, c2_alt;
844
845
846 if (glength <= 1) {
847 return NULL;
848 }
849
850 debug(printf("\n"));
851 debug(printf("%c: ",*dynprogindex > 0 ? (*dynprogindex-1)%26+'a' : (-(*dynprogindex)-1)%26+'A'));
852 debug(printf("Aligning cdna gap\n"));
853 #ifdef EXTRACT_GENOMICSEG
854 debug(printf("At genomic offset %d-%d, %.*s\n",goffset,goffset+glength-1,glength,gsequence));
855 #endif
856 debug(printf("\n"));
857
858 /* ?check if offsets are too close. But this eliminates a segment
859 of the cDNA. Should check in stage 3, and do single gap instead. */
860 /*
861 if (roffsetL+rlengthL-1 >= rev_roffsetR-rlengthR+1) {
862 debug(printf("Bounds don't make sense\n"));
863 *finalscore = NEG_INFINITY_16;
864 return NULL;
865 }
866 */
867
868 if (defect_rate < DEFECT_HIGHQ) {
869 mismatchtype = HIGHQ;
870 /* mismatch = MISMATCH_HIGHQ; */
871 open = CDNA_OPEN_HIGHQ;
872 extend = CDNA_EXTEND_HIGHQ;
873 } else if (defect_rate < DEFECT_MEDQ) {
874 mismatchtype = MEDQ;
875 /* mismatch = MISMATCH_MEDQ; */
876 open = CDNA_OPEN_MEDQ;
877 extend = CDNA_EXTEND_MEDQ;
878 } else {
879 mismatchtype = LOWQ;
880 /* mismatch = MISMATCH_LOWQ; */
881 open = CDNA_OPEN_LOWQ;
882 extend = CDNA_EXTEND_LOWQ;
883 }
884
885 if (glength > dynprogR->max_glength || rlengthR > dynprogR->max_rlength) {
886 debug(printf("glength %d or rlengthR %d is too long. Returning NULL\n",glength,rlengthR));
887 #if 0
888 rev_goffset = goffset + glength - 1;
889 queryjump = rev_roffsetR - roffsetL + 1;
890 genomejump = rev_goffset - goffset + 1;
891 pairs = Pairpool_push_gapholder(NULL,pairpool,queryjump,genomejump,
892 /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
893 #endif
894 *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
895 return (List_T) NULL;
896 }
897
898 if (glength > dynprogL->max_glength || rlengthL > dynprogL->max_rlength) {
899 debug(printf("glength %d or rlengthL %d is too long. Returning NULL\n",glength,rlengthL));
900 #if 0
901 rev_goffset = goffset + glength - 1;
902 queryjump = rev_roffsetR - roffsetL + 1;
903 genomejump = rev_goffset - goffset + 1;
904 pairs = Pairpool_push_gapholder(NULL,pairpool,queryjump,genomejump,
905 /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
906 #endif
907 *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
908 return (List_T) NULL;
909 }
910
911 #if 0
912 /* Right side looks like 5' end */
913 /* Note: sequence 1 and 2 flipped, because 1 has extramaterial */
914 rev_gsequence = &(gsequence[glength-1]);
915 rev_gsequence_uc = &(gsequence_uc[glength-1]);
916 #endif
917 rev_goffset = goffset+glength-1;
918
919 debug(printf("At query offset %d-%d, %.*s\n",roffsetL,roffsetL+rlengthL-1,rlengthL,rsequenceL));
920 debug(printf("At query offset %d-%d, %.*s\n",rev_roffsetR-rlengthR+1,rev_roffsetR,rlengthR,&(rev_rsequenceR[-rlengthR+1])));
921 debug(printf("Whole piece at query offset %d-%d, %.*s\n",roffsetL,rev_roffsetR,rev_roffsetR-roffsetL+1,rsequenceL));
922
923 #if defined(HAVE_SSE2)
924 /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
925 if (glength < use8p_size[mismatchtype] || (rlengthL < use8p_size[mismatchtype] && rlengthR <= use8p_size[mismatchtype])) {
926 use8p = true;
927 } else {
928 use8p = false;
929 }
930 #endif
931
932
933 rev_gsequence = (char *) MALLOCA((glength+1) * sizeof(char));
934 rev_gsequence_alt = (char *) MALLOCA((glength+1) * sizeof(char));
935 gsequence = (char *) MALLOCA((glength+1) * sizeof(char));
936 gsequence_alt = (char *) MALLOCA((glength+1) * sizeof(char));
937
938 if (watsonp) {
939 Genome_get_segment_blocks_left(rev_gsequence,rev_gsequence_alt,/*right*/chroffset+rev_goffset+1,
940 glength,chroffset,/*revcomp*/false);
941 Genome_get_segment_blocks_right(gsequence,gsequence_alt,/*left*/chroffset+goffset,
942 glength,chrhigh,/*revcomp*/false);
943 } else {
944 Genome_get_segment_blocks_right(rev_gsequence,rev_gsequence_alt,/*left*/chrhigh-rev_goffset,
945 glength,chrhigh,/*revcomp*/true);
946 Genome_get_segment_blocks_left(gsequence,gsequence_alt,/*right*/chrhigh-goffset+1,
947 glength,chroffset,/*revcomp*/true);
948 }
949 if (gsequence[0] == '\0' || rev_gsequence[0] == '\0') {
950 FREEA(gsequence_alt);
951 FREEA(gsequence);
952 FREEA(rev_gsequence_alt);
953 FREEA(rev_gsequence);
954 return (List_T) NULL;
955 }
956
957
958 #if defined(HAVE_SSE2)
959 if (use8p == true) {
960 Dynprog_compute_bands(&lbandL,&ubandL,rlengthL,glength,extraband_paired,/*widebandp*/true);
961 matrix8L_upper = Dynprog_simd_8_upper(&directions8L_upper_nogap,&directions8L_upper_Egap,dynprogL,
962 rsequenceL,gsequence,gsequence_alt,rlengthL,glength,
963 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
964 goffset,chroffset,chrhigh,watsonp,
965 #endif
966 mismatchtype,open,extend,ubandL,jump_late_p,/*revp*/false);
967
968 matrix8L_lower = Dynprog_simd_8_lower(&directions8L_lower_nogap,&directions8L_lower_Egap,dynprogL,
969 rsequenceL,gsequence,gsequence_alt,rlengthL,glength,
970 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
971 goffset,chroffset,chrhigh,watsonp,
972 #endif
973 mismatchtype,open,extend,lbandL,jump_late_p,/*revp*/false);
974
975
976 Dynprog_compute_bands(&lbandR,&ubandR,rlengthR,glength,extraband_paired,/*widebandp*/true);
977 matrix8R_upper = Dynprog_simd_8_upper(&directions8R_upper_nogap,&directions8R_upper_Egap,dynprogR,
978 rev_rsequenceR,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
979 rlengthR,glength,
980 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
981 rev_goffset,chroffset,chrhigh,watsonp,
982 #endif
983 mismatchtype,open,extend,ubandR,/*for revp true*/!jump_late_p,/*revp*/true);
984
985 matrix8R_lower = Dynprog_simd_8_lower(&directions8R_lower_nogap,&directions8R_lower_Egap,dynprogR,
986 rev_rsequenceR,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
987 rlengthR,glength,
988 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
989 rev_goffset,chroffset,chrhigh,watsonp,
990 #endif
991 mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
992
993
994 nmatches = nmismatches = nopens = nindels = 0;
995 bridge_cdna_gap_8_ud(&(*finalscore),&bestcL,&bestcR,&bestrL,&bestrR,
996 matrix8L_upper,matrix8L_lower,matrix8R_upper,matrix8R_lower,
997 glength,rlengthL,rlengthR,lbandL,ubandL,lbandR,ubandR,
998 open,extend,roffsetL,rev_roffsetR,jump_late_p);
999
1000 if (bestcR >= bestrR) {
1001 pairs = Dynprog_traceback_8_upper(NULL,&nmatches,&nmismatches,&nopens,&nindels,
1002 directions8R_upper_nogap,directions8R_upper_Egap,
1003 bestrR,bestcR,rev_rsequenceR,rev_rsequence_ucR,
1004 &(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1005 rev_roffsetR,rev_goffset,pairpool,/*revp*/true,
1006 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1007 } else {
1008 pairs = Dynprog_traceback_8_lower(NULL,&nmatches,&nmismatches,&nopens,&nindels,
1009 directions8R_lower_nogap,directions8R_lower_Egap,
1010 bestrR,bestcR,rev_rsequenceR,rev_rsequence_ucR,
1011 &(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1012 rev_roffsetR,rev_goffset,pairpool,genestrand,/*revp*/true,
1013 *dynprogindex);
1014 }
1015
1016 pairs = List_reverse(pairs);
1017
1018 queryjump = (rev_roffsetR-bestrR) - (roffsetL+bestrL) + 1;
1019 genomejump = (rev_goffset-bestcR) - (goffset+bestcL) + 1;
1020 /* No need to revise queryjump or genomejump, because the above
1021 coordinates are internal to the gap. */
1022
1023 if (queryjump == INSERT_PAIRS && genomejump == INSERT_PAIRS) {
1024 /* Add cDNA insertion, if any */
1025 for (k = rev_roffsetR-bestrR; k >= roffsetL+bestrL; k--) {
1026 debug(printf("cDNA insertion, Pushing [%d,%d] (%c,-)\n",k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL]));
1027 pairs = Pairpool_push(pairs,pairpool,k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL],SHORTGAP_COMP,
1028 /*genome*/' ',/*genomealt*/' ',*dynprogindex);
1029 }
1030 debug(printf("\n"));
1031
1032
1033 /* This loop not yet checked for get_genomic_nt giving correct answer */
1034 for (k = rev_goffset-bestcR; k >= goffset+bestcL; k--) {
1035 c2 = get_genomic_nt(&c2_alt,k,chroffset,chrhigh,watsonp);
1036 debug(printf("genome insertion, Pushing [%d,%d] (-,%c)\n",roffsetL+bestrL,k,c2));
1037 #if 0
1038 assert(c2 == gsequence[k-goffset]);
1039 pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,
1040 gsequence[k-goffset],/*genomealt*/GENOMEALT_DEFERRED,*dynprogindex);
1041 #else
1042 pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,c2,c2_alt,*dynprogindex);
1043 #endif
1044 }
1045 debug(printf("\n"));
1046
1047 } else {
1048
1049 /* Add gapholder to be solved in the future */
1050 #ifndef NOGAPHOLDER
1051 debug(printf("Pushing a gap with queryjump = %d, genomejump = %d\n",queryjump,genomejump));
1052 pairs = Pairpool_push_gapholder(pairs,pairpool,queryjump,genomejump,
1053 /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
1054 #endif
1055 *incompletep = true;
1056 }
1057
1058 if (bestcL >= bestrL) {
1059 pairs = Dynprog_traceback_8_upper(pairs,&nmatches,&nmismatches,&nopens,&nindels,
1060 directions8L_upper_nogap,directions8L_upper_Egap,
1061 bestrL,bestcL,rsequenceL,rsequence_ucL,
1062 gsequence,gsequence_alt,roffsetL,goffset,pairpool,/*revp*/false,
1063 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1064 } else {
1065 pairs = Dynprog_traceback_8_lower(pairs,&nmatches,&nmismatches,&nopens,&nindels,
1066 directions8L_lower_nogap,directions8L_lower_Egap,
1067 bestrL,bestcL,rsequenceL,rsequence_ucL,
1068 gsequence,gsequence_alt,roffsetL,goffset,pairpool,
1069 genestrand,/*revp*/false,*dynprogindex);
1070 }
1071
1072 if (List_length(pairs) == 1) {
1073 /* Only a gap added */
1074 pairs = (List_T) NULL;
1075 }
1076
1077 FREEA(gsequence_alt);
1078 FREEA(gsequence);
1079 FREEA(rev_gsequence_alt);
1080 FREEA(rev_gsequence);
1081
1082 debug(printf("End of dynprog cDNA gap\n"));
1083
1084 *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
1085 return List_reverse(pairs);
1086
1087 } else {
1088 /* Use 16-mers */
1089 Dynprog_compute_bands(&lbandL,&ubandL,rlengthL,glength,extraband_paired,/*widebandp*/true);
1090 matrix16L_upper = Dynprog_simd_16_upper(&directions16L_upper_nogap,&directions16L_upper_Egap,dynprogL,
1091 rsequenceL,gsequence,gsequence_alt,rlengthL,glength,
1092 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
1093 goffset,chroffset,chrhigh,watsonp,
1094 #endif
1095 mismatchtype,open,extend,ubandL,jump_late_p,/*revp*/false);
1096
1097 matrix16L_lower = Dynprog_simd_16_lower(&directions16L_lower_nogap,&directions16L_lower_Egap,dynprogL,
1098 rsequenceL,gsequence,gsequence_alt,rlengthL,glength,
1099 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
1100 goffset,chroffset,chrhigh,watsonp,
1101 #endif
1102 mismatchtype,open,extend,lbandL,jump_late_p,/*revp*/false);
1103
1104 Dynprog_compute_bands(&lbandR,&ubandR,rlengthR,glength,extraband_paired,/*widebandp*/true);
1105 matrix16R_upper = Dynprog_simd_16_upper(&directions16R_upper_nogap,&directions16R_upper_Egap,dynprogR,
1106 rev_rsequenceR,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1107 rlengthR,glength,
1108 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
1109 rev_goffset,chroffset,chrhigh,watsonp,
1110 #endif
1111 mismatchtype,open,extend,ubandR,/*for revp true*/!jump_late_p,/*revp*/true);
1112
1113 matrix16R_lower = Dynprog_simd_16_lower(&directions16R_lower_nogap,&directions16R_lower_Egap,dynprogR,
1114 rev_rsequenceR,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1115 rlengthR,glength,
1116 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
1117 rev_goffset,chroffset,chrhigh,watsonp,
1118 #endif
1119 mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
1120
1121 nmatches = nmismatches = nopens = nindels = 0;
1122
1123 bridge_cdna_gap_16_ud(&(*finalscore),&bestcL,&bestcR,&bestrL,&bestrR,
1124 matrix16L_upper,matrix16L_lower,matrix16R_upper,matrix16R_lower,
1125 glength,rlengthL,rlengthR,lbandL,ubandL,lbandR,ubandR,
1126 open,extend,roffsetL,rev_roffsetR,jump_late_p);
1127
1128 if (bestcR >= bestrR) {
1129 pairs = Dynprog_traceback_16_upper(NULL,&nmatches,&nmismatches,&nopens,&nindels,
1130 directions16R_upper_nogap,directions16R_upper_Egap,
1131 bestrR,bestcR,rev_rsequenceR,rev_rsequence_ucR,
1132 &(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1133 rev_roffsetR,rev_goffset,pairpool,/*revp*/true,
1134 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1135 } else {
1136 pairs = Dynprog_traceback_16_lower(NULL,&nmatches,&nmismatches,&nopens,&nindels,
1137 directions16R_lower_nogap,directions16R_lower_Egap,
1138 bestrR,bestcR,rev_rsequenceR,rev_rsequence_ucR,
1139 &(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1140 rev_roffsetR,rev_goffset,pairpool,genestrand,/*revp*/true,
1141 *dynprogindex);
1142 }
1143
1144 pairs = List_reverse(pairs);
1145
1146 queryjump = (rev_roffsetR-bestrR) - (roffsetL+bestrL) + 1;
1147 genomejump = (rev_goffset-bestcR) - (goffset+bestcL) + 1;
1148 /* No need to revise queryjump or genomejump, because the above
1149 coordinates are internal to the gap. */
1150
1151 if (queryjump == INSERT_PAIRS && genomejump == INSERT_PAIRS) {
1152 /* Add cDNA insertion, if any */
1153 for (k = rev_roffsetR-bestrR; k >= roffsetL+bestrL; k--) {
1154 debug(printf("cDNA insertion, Pushing [%d,%d] (%c,-)\n",k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL]));
1155 pairs = Pairpool_push(pairs,pairpool,k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL],SHORTGAP_COMP,
1156 /*genome*/' ',/*genomealt*/' ',*dynprogindex);
1157 }
1158 debug(printf("\n"));
1159
1160
1161 /* This loop not yet checked for get_genomic_nt giving correct answer */
1162 for (k = rev_goffset-bestcR; k >= goffset+bestcL; k--) {
1163 c2 = get_genomic_nt(&c2_alt,k,chroffset,chrhigh,watsonp);
1164 debug(printf("genome insertion, Pushing [%d,%d] (-,%c)\n",roffsetL+bestrL,k,c2));
1165 #if 0
1166 assert(c2 == gsequence[k-goffset]);
1167 pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,
1168 gsequence[k-goffset],/*genomealt*/GENOMEALT_DEFERRED,*dynprogindex);
1169 #else
1170 pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,c2,c2_alt,*dynprogindex);
1171 #endif
1172 }
1173 debug(printf("\n"));
1174
1175 } else {
1176
1177 /* Add gapholder to be solved in the future */
1178 #ifndef NOGAPHOLDER
1179 debug(printf("Pushing a gap with queryjump = %d, genomejump = %d\n",queryjump,genomejump));
1180 pairs = Pairpool_push_gapholder(pairs,pairpool,queryjump,genomejump,
1181 /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
1182 #endif
1183 *incompletep = true;
1184 }
1185
1186 if (bestcL >= bestrL) {
1187 pairs = Dynprog_traceback_16_upper(pairs,&nmatches,&nmismatches,&nopens,&nindels,
1188 directions16L_upper_nogap,directions16L_upper_Egap,
1189 bestrL,bestcL,rsequenceL,rsequence_ucL,
1190 gsequence,gsequence_alt,roffsetL,goffset,pairpool,/*revp*/false,
1191 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1192 } else {
1193 pairs = Dynprog_traceback_16_lower(pairs,&nmatches,&nmismatches,&nopens,&nindels,
1194 directions16L_lower_nogap,directions16L_lower_Egap,
1195 bestrL,bestcL,rsequenceL,rsequence_ucL,
1196 gsequence,gsequence_alt,roffsetL,goffset,pairpool,
1197 genestrand,/*revp*/false,*dynprogindex);
1198 }
1199
1200 if (List_length(pairs) == 1) {
1201 /* Only a gap added */
1202 pairs = (List_T) NULL;
1203 }
1204
1205 FREEA(gsequence_alt);
1206 FREEA(gsequence);
1207 FREEA(rev_gsequence_alt);
1208 FREEA(rev_gsequence);
1209
1210 debug(printf("End of dynprog cDNA gap\n"));
1211
1212 *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
1213 return List_reverse(pairs);
1214 }
1215
1216 #else
1217 /* Non-SIMD methods */
1218 Dynprog_compute_bands(&lbandL,&ubandL,rlengthL,glength,extraband_paired,/*widebandp*/true);
1219 matrixL = Dynprog_standard(&directionsL_nogap,&directionsL_Egap,&directionsL_Fgap,dynprogL,
1220 rsequenceL,gsequence,gsequence_alt,rlengthL,glength,
1221 goffset,chroffset,chrhigh,watsonp,
1222 mismatchtype,open,extend,lbandL,ubandL,
1223 jump_late_p,/*revp*/false,/*saturation*/NEG_INFINITY_INT,
1224 /*upperp*/true,/*lowerp*/true);
1225
1226 Dynprog_compute_bands(&lbandR,&ubandR,rlengthR,glength,extraband_paired,/*widebandp*/true);
1227 matrixR = Dynprog_standard(&directionsR_nogap,&directionsR_Egap,&directionsR_Fgap,dynprogR,
1228 rev_rsequenceR,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1229 rlengthR,glength,rev_goffset,chroffset,chrhigh,watsonp,
1230 mismatchtype,open,extend,lbandL,ubandR,
1231 /*for revp true*/!jump_late_p,/*revp*/true,/*saturation*/NEG_INFINITY_INT,
1232 /*upperp*/true,/*lowerp*/true);
1233
1234 nmatches = nmismatches = nopens = nindels = 0;
1235
1236 bridge_cdna_gap(&(*finalscore),&bestcL,&bestcR,&bestrL,&bestrR,matrixL,matrixR,
1237 glength,rlengthL,rlengthR,extraband_paired,
1238 open,extend,roffsetL,rev_roffsetR,jump_late_p);
1239
1240 pairs = Dynprog_traceback_std(NULL,&nmatches,&nmismatches,&nopens,&nindels,
1241 directionsR_nogap,directionsR_Egap,directionsR_Fgap,bestrR,bestcR,
1242 rev_rsequenceR,rev_rsequence_ucR,
1243 &(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1244 rev_roffsetR,rev_goffset,pairpool,/*revp*/true,
1245 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1246
1247 pairs = List_reverse(pairs);
1248
1249 queryjump = (rev_roffsetR-bestrR) - (roffsetL+bestrL) + 1;
1250 genomejump = (rev_goffset-bestcR) - (goffset+bestcL) + 1;
1251 /* No need to revise queryjump or genomejump, because the above
1252 coordinates are internal to the gap. */
1253
1254 if (queryjump == INSERT_PAIRS && genomejump == INSERT_PAIRS) {
1255 /* Add cDNA insertion, if any */
1256 for (k = rev_roffsetR-bestrR; k >= roffsetL+bestrL; k--) {
1257 debug(printf("cDNA insertion, Pushing [%d,%d] (%c,-)\n",k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL]));
1258 pairs = Pairpool_push(pairs,pairpool,k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL],SHORTGAP_COMP,
1259 /*genome*/' ',/*genomealt*/' ',*dynprogindex);
1260 }
1261 debug(printf("\n"));
1262
1263
1264 /* This loop not yet checked for get_genomic_nt giving correct answer */
1265 for (k = rev_goffset-bestcR; k >= goffset+bestcL; k--) {
1266 c2 = get_genomic_nt(&c2_alt,k,chroffset,chrhigh,watsonp);
1267 debug(printf("genome insertion, Pushing [%d,%d] (-,%c)\n",roffsetL+bestrL,k,c2));
1268 #if 0
1269 assert(c2 == gsequence[k-goffset]);
1270 pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,
1271 gsequence[k-goffset],/*genomealt*/GENOMEALT_DEFERRED,*dynprogindex);
1272 #else
1273 pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,c2,c2_alt,*dynprogindex);
1274 #endif
1275 }
1276 debug(printf("\n"));
1277
1278 } else {
1279
1280 /* Add gapholder to be solved in the future */
1281 #ifndef NOGAPHOLDER
1282 debug(printf("Pushing a gap with queryjump = %d, genomejump = %d\n",queryjump,genomejump));
1283 pairs = Pairpool_push_gapholder(pairs,pairpool,queryjump,genomejump,
1284 /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
1285 #endif
1286 *incompletep = true;
1287 }
1288
1289 pairs = Dynprog_traceback_std(pairs,&nmatches,&nmismatches,&nopens,&nindels,
1290 directionsL_nogap,directionsL_Egap,directionsL_Fgap,bestrL,bestcL,
1291 rsequenceL,rsequence_ucL,
1292 gsequence,gsequence_alt,roffsetL,goffset,pairpool,/*revp*/false,
1293 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1294
1295 if (List_length(pairs) == 1) {
1296 /* Only a gap added */
1297 pairs = (List_T) NULL;
1298 }
1299
1300 FREEA(gsequence_alt);
1301 FREEA(gsequence);
1302 FREEA(rev_gsequence_alt);
1303 FREEA(rev_gsequence);
1304
1305 debug(printf("End of dynprog cDNA gap\n"));
1306
1307 *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
1308 return List_reverse(pairs);
1309
1310 #endif
1311
1312 }
1313
1314