1 static char rcsid[] = "$Id: dynprog_cdna.c 214361 2018-03-21 01:24:28Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "dynprog_cdna.h"
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <math.h>		/* For ceil, log, pow */
11 #include <ctype.h>		/* For tolower */
12 
13 
14 #include "bool.h"
15 #include "except.h"
16 #include "assert.h"
17 #include "mem.h"
18 #include "comp.h"
19 #include "pair.h"
20 #include "pairdef.h"
21 #include "complement.h"
22 #include "dynprog_simd.h"
23 
24 /* These values were set to -5, -4, -3, but this led to chopped ends
25    in GMAP alignments, and failure to find chimeras */
26 #define MISMATCH_HIGHQ -3
27 #define MISMATCH_MEDQ -2
28 #define MISMATCH_LOWQ -1
29 
30 /* cDNA insertions are biologically not meaningful, so look for a good
31    gap opening somewhere */
32 #define CDNA_OPEN_HIGHQ -10
33 #define CDNA_OPEN_MEDQ -10
34 #define CDNA_OPEN_LOWQ -10
35 
36 #define CDNA_EXTEND_HIGHQ -7
37 #define CDNA_EXTEND_MEDQ -7
38 #define CDNA_EXTEND_LOWQ -7
39 
40 #define INSERT_PAIRS 9
41 
42 
43 #ifdef DEBUG
44 #define debug(x) x
45 #else
46 #define debug(x)
47 #endif
48 
49 #ifdef DEBUG3
50 #define debug3(x) x
51 #else
52 #define debug3(x)
53 #endif
54 
55 #ifdef DEBUG8
56 #define debug8(x) x
57 #else
58 #define debug8(x)
59 #endif
60 
61 #define T Dynprog_T
62 
63 
64 /************************************************************************
65  * get_genomic_nt
66  ************************************************************************/
67 
68 static char complCode[128] = COMPLEMENT_LC;
69 
70 static char
get_genomic_nt(char * g_alt,int genomicpos,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp)71 get_genomic_nt (char *g_alt, int genomicpos, Univcoord_T chroffset, Univcoord_T chrhigh,
72 		bool watsonp) {
73   char c2, c2_alt;
74   Univcoord_T pos;
75 
76 #if 0
77   /* If the read has a deletion, then we will extend beyond 0 or genomiclength, so do not restrict. */
78   if (genomicpos < 0) {
79     return '*';
80 
81   } else if (genomicpos >= genomiclength) {
82     return '*';
83 
84   }
85 #endif
86 
87   if (watsonp) {
88     if ((pos = chroffset + genomicpos) < chroffset) { /* Must be <, and not <=, or dynamic programming will fail */
89       *g_alt = '*';
90       return '*';
91 
92     } else if (pos >= chrhigh) {
93       *g_alt = '*';
94       return '*';
95 
96 #if 0
97     } else if (genome) {
98       /* Not necessary, because Genome_get_char_blocks should work */
99       debug8(printf("At %u, genomicnt is %c\n",
100 		    genomicpos,Genome_get_char(genome,pos)));
101       return Genome_get_char(genome,pos);
102 #endif
103 
104     } else {
105       /* GMAP with user-supplied genomic segment */
106       debug8(printf("At %u, genomicnt is %c\n",
107 		    genomicpos,Genome_get_char_blocks(&(g_alt),pos)));
108       return Genome_get_char_blocks(&(*g_alt),pos);
109     }
110 
111   } else {
112     if ((pos = chrhigh - genomicpos) < chroffset) { /* Must be <, and not <=, or dynamic programming will fail */
113       *g_alt = '*';
114       return '*';
115 
116     } else if (pos >= chrhigh) {
117       *g_alt = '*';
118       return '*';
119 
120 #if 0
121     } else if (genome) {
122       /* Not necessary, because Genome_get_char_blocks should work */
123       c2 = Genome_get_char(genome,pos);
124 #endif
125 
126     } else {
127       /* GMAP with user-supplied genomic segment */
128       c2 = Genome_get_char_blocks(&c2_alt,pos);
129     }
130     debug8(printf("At %u, genomicnt is %c\n",genomicpos,complCode[(int) c2]));
131     *g_alt = complCode[(int) c2_alt];
132     return complCode[(int) c2];
133   }
134 }
135 
136 
137 #if defined(HAVE_SSE2)
138 /* Columns are always genomic.  Rows are always query.  Bridging across common columns */
139 static void
bridge_cdna_gap_8_ud(int * finalscore,int * bestcL,int * bestcR,int * bestrL,int * bestrR,Score8_T ** matrixL_upper,Score8_T ** matrixL_lower,Score8_T ** matrixR_upper,Score8_T ** matrixR_lower,int glength,int rlengthL,int rlengthR,int lbandL,int ubandL,int lbandR,int ubandR,int open,int extend,int leftoffset,int rightoffset,bool jump_late_p)140 bridge_cdna_gap_8_ud (int *finalscore, int *bestcL, int *bestcR, int *bestrL, int *bestrR,
141 		      Score8_T **matrixL_upper, Score8_T **matrixL_lower,
142 		      Score8_T **matrixR_upper, Score8_T **matrixR_lower,
143 		      int glength, int rlengthL, int rlengthR,
144 		      int lbandL, int ubandL, int lbandR, int ubandR,
145 		      int open, int extend, int leftoffset, int rightoffset, bool jump_late_p) {
146   int bestscore = NEG_INFINITY_8, score, scoreL, scoreR, pen, end_reward = 0;
147   int rL, rR, cL, cR;
148   int rloL, rhighL;
149   int rloR, rhighR;
150 
151 #if 0
152   /* Perform computations */
153   lbandL = rlengthL - glength + extraband_paired;
154   ubandL = extraband_paired;
155 
156   lbandR = rlengthR - glength + extraband_paired;
157   ubandR = extraband_paired;
158 #endif
159 
160   /* Need a double loop on rows here, in contrast with a single loop
161      for introns, because we allow a genomic insertion that doesn't
162      match the cDNA.  So we need to add a penalty for a genomic
163      insertion */
164 
165   if (jump_late_p) {
166     for (cL = 1; cL < glength; cL++) {
167 
168       /* Note: opening penalty is added at the bottom of the loop */
169       for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
170 	/* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
171 	if ((rloL = cL - ubandL) < 1) {
172 	  rloL = 1;
173 	}
174 	if ((rhighL = cL + lbandL) > rlengthL-1) {
175 	  rhighL = rlengthL-1;
176 	}
177 
178 	if ((rloR = cR - ubandR) < 1) {
179 	  rloR = 1;
180 	}
181 	if ((rhighR = cR + lbandR) > rlengthR-1) {
182 	  rhighR = rlengthR-1;
183 	}
184 
185 	for (rL = rloL; rL < /*to main diagonal*/cL; rL++) {
186 	  scoreL = (int) matrixL_upper[cL][rL];
187 
188 	  /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
189 	  debug3(printf("  Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
190 	  for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
191 	    scoreR = (int) matrixR_upper[cR][rR];
192 
193 	    if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) {  /* Use >= for jump late */
194 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
195 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
196 
197 	      bestscore = score;
198 	      *bestcL = cL;
199 	      *bestcR = cR;
200 	      *bestrL = rL;
201 	      *bestrR = rR;
202 
203 	    } else {
204 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
205 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
206 	    }
207 	  }
208 
209 	  for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
210 	    scoreR = (int) matrixR_lower[rR][cR];
211 
212 	    if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) {  /* Use >= for jump late */
213 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
214 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
215 
216 	      bestscore = score;
217 	      *bestcL = cL;
218 	      *bestcR = cR;
219 	      *bestrL = rL;
220 	      *bestrR = rR;
221 
222 	    } else {
223 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
224 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
225 	    }
226 	  }
227 	}
228 
229 	for (/*at main diagonal*/; rL <= rhighL; rL++) {
230 	  scoreL = (int) matrixL_lower[rL][cL];
231 
232 	  /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
233 	  debug3(printf("  Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
234 	  for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
235 	    scoreR = (int) matrixR_upper[cR][rR];
236 
237 	    if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) {  /* Use >= for jump late */
238 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
239 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
240 
241 	      bestscore = score;
242 	      *bestcL = cL;
243 	      *bestcR = cR;
244 	      *bestrL = rL;
245 	      *bestrR = rR;
246 
247 	    } else {
248 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
249 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
250 	    }
251 	  }
252 
253 	  for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
254 	    scoreR = (int) matrixR_lower[rR][cR];
255 
256 	    if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) {  /* Use >= for jump late */
257 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
258 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
259 
260 	      bestscore = score;
261 	      *bestcL = cL;
262 	      *bestcR = cR;
263 	      *bestrL = rL;
264 	      *bestrR = rR;
265 
266 	    } else {
267 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
268 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
269 	    }
270 	  }
271 	}
272 
273 	pen = open - extend;	/* Subtract extend to compensate for
274                                    its addition in the for loop */
275       }
276     }
277 
278   } else {
279     /* Do not jump late */
280     for (cL = 1; cL < glength; cL++) {
281 
282       /* Note: opening penalty is added at the bottom of the loop */
283       for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
284 	/* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
285 	if ((rloL = cL - ubandL) < 1) {
286 	  rloL = 1;
287 	}
288 	if ((rhighL = cL + lbandL) > rlengthL-1) {
289 	  rhighL = rlengthL-1;
290 	}
291 
292 	if ((rloR = cR - ubandR) < 1) {
293 	  rloR = 1;
294 	}
295 	if ((rhighR = cR + lbandR) > rlengthR-1) {
296 	  rhighR = rlengthR-1;
297 	}
298 
299 	for (rL = rloL; rL < /*to main diagonal*/cL; rL++) {
300 	  scoreL = (int) matrixL_upper[cL][rL];
301 
302 	  /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
303 	  debug3(printf("  Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
304 	  for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
305 	    scoreR = (int) matrixR_upper[cR][rR];
306 
307 	    if ((score = scoreL + scoreR + pen + end_reward) > bestscore) {  /* Use > for jump early */
308 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
309 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
310 
311 	      bestscore = score;
312 	      *bestcL = cL;
313 	      *bestcR = cR;
314 	      *bestrL = rL;
315 	      *bestrR = rR;
316 
317 	    } else {
318 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
319 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
320 	    }
321 	  }
322 
323 	  for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
324 	    scoreR = (int) matrixR_lower[rR][cR];
325 
326 	    if ((score = scoreL + scoreR + pen + end_reward) > bestscore) {  /* Use > for jump early */
327 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
328 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
329 
330 	      bestscore = score;
331 	      *bestcL = cL;
332 	      *bestcR = cR;
333 	      *bestrL = rL;
334 	      *bestrR = rR;
335 
336 	    } else {
337 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
338 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
339 	    }
340 	  }
341 	}
342 
343 	for (/*at main diagonal*/; rL <= rhighL; rL++) {
344 	  scoreL = (int) matrixL_lower[rL][cL];
345 
346 	  /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
347 	  debug3(printf("  Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
348 	  for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
349 	    scoreR = (int) matrixR_upper[cR][rR];
350 
351 	    if ((score = scoreL + scoreR + pen + end_reward) > bestscore) {  /* Use > for jump early */
352 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
353 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
354 
355 	      bestscore = score;
356 	      *bestcL = cL;
357 	      *bestcR = cR;
358 	      *bestrL = rL;
359 	      *bestrR = rR;
360 
361 	    } else {
362 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
363 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
364 	    }
365 	  }
366 
367 	  for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
368 	    scoreR = (int) matrixR_lower[rR][cR];
369 
370 	    if ((score = scoreL + scoreR + pen + end_reward) > bestscore) {  /* Use > for jump early */
371 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
372 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
373 
374 	      bestscore = score;
375 	      *bestcL = cL;
376 	      *bestcR = cR;
377 	      *bestrL = rL;
378 	      *bestrR = rR;
379 
380 	    } else {
381 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
382 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
383 	    }
384 	  }
385 	}
386 
387 	pen = open - extend;	/* Subtract extend to compensate for
388                                    its addition in the for loop */
389       }
390     }
391   }
392 
393   *finalscore = (int) bestscore;
394   debug3(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right\n",
395 		*finalscore,*bestcL,*bestrL,*bestcR,*bestrR));
396 
397   return;
398 }
399 #endif
400 
401 #if defined(HAVE_SSE2)
402 /* Columns are always genomic.  Rows are always query.  Bridging across common columns */
403 static void
bridge_cdna_gap_16_ud(int * finalscore,int * bestcL,int * bestcR,int * bestrL,int * bestrR,Score16_T ** matrixL_upper,Score16_T ** matrixL_lower,Score16_T ** matrixR_upper,Score16_T ** matrixR_lower,int glength,int rlengthL,int rlengthR,int lbandL,int ubandL,int lbandR,int ubandR,int open,int extend,int leftoffset,int rightoffset,bool jump_late_p)404 bridge_cdna_gap_16_ud (int *finalscore, int *bestcL, int *bestcR, int *bestrL, int *bestrR,
405 		      Score16_T **matrixL_upper, Score16_T **matrixL_lower,
406 		      Score16_T **matrixR_upper, Score16_T **matrixR_lower,
407 		      int glength, int rlengthL, int rlengthR,
408 		      int lbandL, int ubandL, int lbandR, int ubandR,
409 		      int open, int extend, int leftoffset, int rightoffset, bool jump_late_p) {
410   int bestscore = NEG_INFINITY_16, score, scoreL, scoreR, pen, end_reward = 0;
411   int rL, rR, cL, cR;
412   int rloL, rhighL;
413   int rloR, rhighR;
414 
415 #if 0
416   /* Perform computations */
417   lbandL = rlengthL - glength + extraband_paired;
418   ubandL = extraband_paired;
419 
420   lbandR = rlengthR - glength + extraband_paired;
421   ubandR = extraband_paired;
422 #endif
423 
424   /* Need a double loop on rows here, in contrast with a single loop
425      for introns, because we allow a genomic insertion that doesn't
426      match the cDNA.  So we need to add a penalty for a genomic
427      insertion */
428 
429   if (jump_late_p) {
430     for (cL = 1; cL < glength; cL++) {
431 
432       /* Note: opening penalty is added at the bottom of the loop */
433       for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
434 	/* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
435 	if ((rloL = cL - ubandL) < 1) {
436 	  rloL = 1;
437 	}
438 	if ((rhighL = cL + lbandL) > rlengthL-1) {
439 	  rhighL = rlengthL-1;
440 	}
441 
442 	if ((rloR = cR - ubandR) < 1) {
443 	  rloR = 1;
444 	}
445 	if ((rhighR = cR + lbandR) > rlengthR-1) {
446 	  rhighR = rlengthR-1;
447 	}
448 
449 	for (rL = rloL; rL < /*to main diagonal*/cL; rL++) {
450 	  scoreL = (int) matrixL_upper[cL][rL];
451 
452 	  /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
453 	  debug3(printf("  Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
454 	  for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
455 	    scoreR = (int) matrixR_upper[cR][rR];
456 
457 	    if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) {  /* Use >= for jump late */
458 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
459 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
460 
461 	      bestscore = score;
462 	      *bestcL = cL;
463 	      *bestcR = cR;
464 	      *bestrL = rL;
465 	      *bestrR = rR;
466 
467 	    } else {
468 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
469 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
470 	    }
471 	  }
472 
473 	  for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
474 	    scoreR = (int) matrixR_lower[rR][cR];
475 
476 	    if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) {  /* Use >= for jump late */
477 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
478 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
479 
480 	      bestscore = score;
481 	      *bestcL = cL;
482 	      *bestcR = cR;
483 	      *bestrL = rL;
484 	      *bestrR = rR;
485 
486 	    } else {
487 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
488 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
489 	    }
490 	  }
491 	}
492 
493 	for (/*at main diagonal*/; rL <= rhighL; rL++) {
494 	  scoreL = (int) matrixL_lower[rL][cL];
495 
496 	  /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
497 	  debug3(printf("  Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
498 	  for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
499 	    scoreR = (int) matrixR_upper[cR][rR];
500 
501 	    if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) {  /* Use >= for jump late */
502 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
503 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
504 
505 	      bestscore = score;
506 	      *bestcL = cL;
507 	      *bestcR = cR;
508 	      *bestrL = rL;
509 	      *bestrR = rR;
510 
511 	    } else {
512 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
513 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
514 	    }
515 	  }
516 
517 	  for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
518 	    scoreR = (int) matrixR_lower[rR][cR];
519 
520 	    if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) {  /* Use >= for jump late */
521 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
522 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
523 
524 	      bestscore = score;
525 	      *bestcL = cL;
526 	      *bestcR = cR;
527 	      *bestrL = rL;
528 	      *bestrR = rR;
529 
530 	    } else {
531 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
532 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
533 	    }
534 	  }
535 	}
536 
537 	pen = open - extend;	/* Subtract extend to compensate for
538                                    its addition in the for loop */
539       }
540     }
541 
542   } else {
543     /* Do not jump late */
544     for (cL = 1; cL < glength; cL++) {
545 
546       /* Note: opening penalty is added at the bottom of the loop */
547       for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
548 	/* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
549 	if ((rloL = cL - ubandL) < 1) {
550 	  rloL = 1;
551 	}
552 	if ((rhighL = cL + lbandL) > rlengthL-1) {
553 	  rhighL = rlengthL-1;
554 	}
555 
556 	if ((rloR = cR - ubandR) < 1) {
557 	  rloR = 1;
558 	}
559 	if ((rhighR = cR + lbandR) > rlengthR-1) {
560 	  rhighR = rlengthR-1;
561 	}
562 
563 	for (rL = rloL; rL < /*to main diagonal*/cL; rL++) {
564 	  scoreL = (int) matrixL_upper[cL][rL];
565 
566 	  /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
567 	  debug3(printf("  Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
568 	  for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
569 	    scoreR = (int) matrixR_upper[cR][rR];
570 
571 	    if ((score = scoreL + scoreR + pen + end_reward) > bestscore) {  /* Use > for jump early */
572 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
573 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
574 
575 	      bestscore = score;
576 	      *bestcL = cL;
577 	      *bestcR = cR;
578 	      *bestrL = rL;
579 	      *bestrR = rR;
580 
581 	    } else {
582 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
583 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
584 	    }
585 	  }
586 
587 	  for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
588 	    scoreR = (int) matrixR_lower[rR][cR];
589 
590 	    if ((score = scoreL + scoreR + pen + end_reward) > bestscore) {  /* Use > for jump early */
591 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
592 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
593 
594 	      bestscore = score;
595 	      *bestcL = cL;
596 	      *bestcR = cR;
597 	      *bestrL = rL;
598 	      *bestrR = rR;
599 
600 	    } else {
601 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
602 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
603 	    }
604 	  }
605 	}
606 
607 	for (/*at main diagonal*/; rL <= rhighL; rL++) {
608 	  scoreL = (int) matrixL_lower[rL][cL];
609 
610 	  /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
611 	  debug3(printf("  Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
612 	  for (rR = rloR; rR < /*to main diagonal*/cR && rR < rightoffset-leftoffset-rL; rR++) {
613 	    scoreR = (int) matrixR_upper[cR][rR];
614 
615 	    if ((score = scoreL + scoreR + pen + end_reward) > bestscore) {  /* Use > for jump early */
616 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
617 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
618 
619 	      bestscore = score;
620 	      *bestcL = cL;
621 	      *bestcR = cR;
622 	      *bestrL = rL;
623 	      *bestrR = rR;
624 
625 	    } else {
626 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
627 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
628 	    }
629 	  }
630 
631 	  for (/*at main diagonal*/; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
632 	    scoreR = (int) matrixR_lower[rR][cR];
633 
634 	    if ((score = scoreL + scoreR + pen + end_reward) > bestscore) {  /* Use > for jump early */
635 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
636 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
637 
638 	      bestscore = score;
639 	      *bestcL = cL;
640 	      *bestcR = cR;
641 	      *bestrL = rL;
642 	      *bestrR = rR;
643 
644 	    } else {
645 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
646 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
647 	    }
648 	  }
649 	}
650 
651 	pen = open - extend;	/* Subtract extend to compensate for
652                                    its addition in the for loop */
653       }
654     }
655   }
656 
657   *finalscore = (int) bestscore;
658   debug3(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right\n",
659 		*finalscore,*bestcL,*bestrL,*bestcR,*bestrR));
660 
661   return;
662 }
663 #endif
664 
665 
666 
667 #ifndef HAVE_SSE2
668 /* Columns are always genomic.  Rows are always query.  Bridging across common columns */
669 static void
bridge_cdna_gap(int * finalscore,int * bestcL,int * bestcR,int * bestrL,int * bestrR,Score16_T ** matrixL,Score16_T ** matrixR,int glength,int rlengthL,int rlengthR,int extraband_paired,int open,int extend,int leftoffset,int rightoffset,bool jump_late_p)670 bridge_cdna_gap (int *finalscore, int *bestcL, int *bestcR, int *bestrL, int *bestrR,
671 #ifdef HAVE_SSE2
672 		 Score16_T **matrixL, Score16_T **matrixR,
673 #else
674 		 Score32_T **matrixL, Score32_T **matrixR,
675 #endif
676 		 int glength, int rlengthL, int rlengthR, int extraband_paired,
677 		 int open, int extend, int leftoffset, int rightoffset, bool jump_late_p) {
678   int bestscore = NEG_INFINITY_32, score, scoreL, scoreR, pen, end_reward = 0;
679   int rL, rR, cL, cR;
680   int lbandL, ubandL, rloL, rhighL;
681   int lbandR, ubandR, rloR, rhighR;
682 
683   /* Perform computations */
684   lbandL = rlengthL - glength + extraband_paired;
685   ubandL = extraband_paired;
686 
687   lbandR = rlengthR - glength + extraband_paired;
688   ubandR = extraband_paired;
689 
690   /* Need a double loop on rows here, in contrast with a single loop
691      for introns, because we allow a genomic insertion that doesn't
692      match the cDNA.  So we need to add a penalty for a genomic
693      insertion */
694 
695   if (jump_late_p) {
696     for (cL = 1; cL < glength; cL++) {
697 
698       /* Note: opening penalty is added at the bottom of the loop */
699       for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
700 	/* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
701 	if ((rloL = cL - ubandL) < 1) {
702 	  rloL = 1;
703 	}
704 	if ((rhighL = cL + lbandL) > rlengthL-1) {
705 	  rhighL = rlengthL-1;
706 	}
707 
708 	if ((rloR = cR - ubandR) < 1) {
709 	  rloR = 1;
710 	}
711 	if ((rhighR = cR + lbandR) > rlengthR-1) {
712 	  rhighR = rlengthR-1;
713 	}
714 
715 	for (rL = rloL; rL <= rhighL; rL++) {
716 	  scoreL = (int) matrixL[cL][rL];
717 
718 	  /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
719 	  debug3(printf("  Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
720 	  for (rR = rloR; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
721 	    scoreR = (int) matrixR[cR][rR];
722 
723 	    if ((score = scoreL + scoreR + pen + end_reward) >= bestscore) {  /* Use >= for jump late */
724 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
725 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
726 
727 	      bestscore = score;
728 	      *bestcL = cL;
729 	      *bestcR = cR;
730 	      *bestrL = rL;
731 	      *bestrR = rR;
732 
733 	    } else {
734 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
735 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
736 	    }
737 	  }
738 	}
739 	pen = open - extend;	/* Subtract extend to compensate for
740                                    its addition in the for loop */
741       }
742     }
743 
744   } else {
745     /* Do not jump late */
746     for (cL = 1; cL < glength; cL++) {
747 
748       /* Note: opening penalty is added at the bottom of the loop */
749       for (cR = glength-cL, pen = 0; cR >= 0; cR--, pen += extend) {
750 	/* debug3(printf("\nAt row %d on left and %d on right\n",cL,cR)); */
751 	if ((rloL = cL - ubandL) < 1) {
752 	  rloL = 1;
753 	}
754 	if ((rhighL = cL + lbandL) > rlengthL-1) {
755 	  rhighL = rlengthL-1;
756 	}
757 
758 	if ((rloR = cR - ubandR) < 1) {
759 	  rloR = 1;
760 	}
761 	if ((rhighR = cR + lbandR) > rlengthR-1) {
762 	  rhighR = rlengthR-1;
763 	}
764 
765 	for (rL = rloL; rL <= rhighL; rL++) {
766 	  scoreL = (int) matrixL[cL][rL];
767 
768 	  /* Disallow leftoffset + rL >= rightoffset - rR, or rR >= rightoffset - leftoffset - rL */
769 	  debug3(printf("  Disallowing rR to be >= %d\n",rightoffset-leftoffset-rL));
770 	  for (rR = rloR; rR <= rhighR && rR < rightoffset-leftoffset-rL; rR++) {
771 	    scoreR = (int) matrixR[cR][rR];
772 
773 	    if ((score = scoreL + scoreR + pen + end_reward) > bestscore) {  /* Use > for jump early */
774 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d)+(%d) = %d (bestscore)\n",
775 			    rL,rR,scoreL,scoreR,pen,end_reward,score));
776 
777 	      bestscore = score;
778 	      *bestcL = cL;
779 	      *bestcR = cR;
780 	      *bestrL = rL;
781 	      *bestrR = rR;
782 
783 	    } else {
784 	      debug3(printf("At %d left to %d right, score is (%d)+(%d)+(%d) = %d\n",
785 			    rL,rR,scoreL,scoreR,pen,scoreL+scoreR+pen));
786 	    }
787 	  }
788 	}
789 	pen = open - extend;	/* Subtract extend to compensate for
790                                    its addition in the for loop */
791       }
792     }
793   }
794 
795   *finalscore = (int) bestscore;
796   debug3(printf("Returning final score of %d at (%d,%d) left to (%d,%d) right\n",
797 		*finalscore,*bestcL,*bestrL,*bestcR,*bestrR));
798 
799   return;
800 }
801 #endif
802 
803 
804 /* Sequences rsequenceL and rsequenceR represent the two ends of the cDNA insertion */
805 List_T
Dynprog_cdna_gap(int * dynprogindex,int * finalscore,bool * incompletep,T dynprogL,T dynprogR,char * rsequenceL,char * rsequence_ucL,char * rev_rsequenceR,char * rev_rsequence_ucR,int rlengthL,int rlengthR,int glength,int roffsetL,int rev_roffsetR,int goffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp,int genestrand,bool jump_late_p,Pairpool_T pairpool,int extraband_paired,double defect_rate)806 Dynprog_cdna_gap (int *dynprogindex, int *finalscore, bool *incompletep,
807 		  T dynprogL, T dynprogR, char *rsequenceL, char *rsequence_ucL,
808 		  char *rev_rsequenceR, char *rev_rsequence_ucR,
809 #if 0
810 		  char *gsequence, char *gsequence_uc,
811 #endif
812 		  int rlengthL, int rlengthR, int glength,
813 		  int roffsetL, int rev_roffsetR, int goffset,
814 		  Univcoord_T chroffset, Univcoord_T chrhigh,
815 		  bool watsonp, int genestrand, bool jump_late_p, Pairpool_T pairpool,
816 		  int extraband_paired, double defect_rate) {
817   List_T pairs = NULL;
818   char *gsequence, *gsequence_alt, *rev_gsequence, *rev_gsequence_alt;
819   Mismatchtype_T mismatchtype;
820   int lbandL, ubandL, lbandR, ubandR;
821   int open, extend;
822 #if defined(HAVE_SSE2)
823   Score8_T **matrix8L_upper, **matrix8L_lower, **matrix8R_upper, **matrix8R_lower;
824   Direction8_T **directions8L_upper_nogap, **directions8L_upper_Egap,
825     **directions8L_lower_nogap, **directions8L_lower_Egap,
826     **directions8R_upper_nogap, **directions8R_upper_Egap,
827     **directions8R_lower_nogap, **directions8R_lower_Egap;
828   bool use8p;
829 
830   Score16_T **matrix16L_upper, **matrix16L_lower, **matrix16R_upper, **matrix16R_lower;
831   Direction16_T **directions16L_upper_nogap, **directions16L_upper_Egap,
832     **directions16L_lower_nogap, **directions16L_lower_Egap,
833     **directions16R_upper_nogap, **directions16R_upper_Egap,
834     **directions16R_lower_nogap, **directions16R_lower_Egap;
835 #else
836   Score32_T **matrixL, **matrixR;
837   Direction32_T **directionsL_nogap, **directionsL_Egap, **directionsL_Fgap,
838     **directionsR_nogap, **directionsR_Egap, **directionsR_Fgap;
839 #endif
840   int rev_goffset, bestrL, bestrR, bestcL, bestcR, k;
841   int nmatches, nmismatches, nopens, nindels;
842   int queryjump, genomejump;
843   char c2, c2_alt;
844 
845 
846   if (glength <= 1) {
847     return NULL;
848   }
849 
850   debug(printf("\n"));
851   debug(printf("%c:  ",*dynprogindex > 0 ? (*dynprogindex-1)%26+'a' : (-(*dynprogindex)-1)%26+'A'));
852   debug(printf("Aligning cdna gap\n"));
853 #ifdef EXTRACT_GENOMICSEG
854   debug(printf("At genomic offset %d-%d, %.*s\n",goffset,goffset+glength-1,glength,gsequence));
855 #endif
856   debug(printf("\n"));
857 
858   /* ?check if offsets are too close.  But this eliminates a segment
859      of the cDNA.  Should check in stage 3, and do single gap instead. */
860   /*
861   if (roffsetL+rlengthL-1 >= rev_roffsetR-rlengthR+1) {
862     debug(printf("Bounds don't make sense\n"));
863     *finalscore = NEG_INFINITY_16;
864     return NULL;
865   }
866   */
867 
868   if (defect_rate < DEFECT_HIGHQ) {
869     mismatchtype = HIGHQ;
870     /* mismatch = MISMATCH_HIGHQ; */
871     open = CDNA_OPEN_HIGHQ;
872     extend = CDNA_EXTEND_HIGHQ;
873   } else if (defect_rate < DEFECT_MEDQ) {
874     mismatchtype = MEDQ;
875     /* mismatch = MISMATCH_MEDQ; */
876     open = CDNA_OPEN_MEDQ;
877     extend = CDNA_EXTEND_MEDQ;
878   } else {
879     mismatchtype = LOWQ;
880     /* mismatch = MISMATCH_LOWQ; */
881     open = CDNA_OPEN_LOWQ;
882     extend = CDNA_EXTEND_LOWQ;
883   }
884 
885   if (glength > dynprogR->max_glength || rlengthR > dynprogR->max_rlength) {
886     debug(printf("glength %d or rlengthR %d is too long.  Returning NULL\n",glength,rlengthR));
887 #if 0
888     rev_goffset = goffset + glength - 1;
889     queryjump = rev_roffsetR - roffsetL + 1;
890     genomejump = rev_goffset - goffset + 1;
891     pairs = Pairpool_push_gapholder(NULL,pairpool,queryjump,genomejump,
892 				    /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
893 #endif
894     *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
895     return (List_T) NULL;
896   }
897 
898   if (glength > dynprogL->max_glength || rlengthL > dynprogL->max_rlength) {
899     debug(printf("glength %d or rlengthL %d is too long.  Returning NULL\n",glength,rlengthL));
900 #if 0
901     rev_goffset = goffset + glength - 1;
902     queryjump = rev_roffsetR - roffsetL + 1;
903     genomejump = rev_goffset - goffset + 1;
904     pairs = Pairpool_push_gapholder(NULL,pairpool,queryjump,genomejump,
905 				    /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
906 #endif
907     *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
908     return (List_T) NULL;
909   }
910 
911 #if 0
912   /* Right side looks like 5' end */
913   /* Note: sequence 1 and 2 flipped, because 1 has extramaterial */
914   rev_gsequence = &(gsequence[glength-1]);
915   rev_gsequence_uc = &(gsequence_uc[glength-1]);
916 #endif
917   rev_goffset = goffset+glength-1;
918 
919   debug(printf("At query offset %d-%d, %.*s\n",roffsetL,roffsetL+rlengthL-1,rlengthL,rsequenceL));
920   debug(printf("At query offset %d-%d, %.*s\n",rev_roffsetR-rlengthR+1,rev_roffsetR,rlengthR,&(rev_rsequenceR[-rlengthR+1])));
921   debug(printf("Whole piece at query offset %d-%d, %.*s\n",roffsetL,rev_roffsetR,rev_roffsetR-roffsetL+1,rsequenceL));
922 
923 #if defined(HAVE_SSE2)
924   /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
925   if (glength < use8p_size[mismatchtype] || (rlengthL < use8p_size[mismatchtype] && rlengthR <= use8p_size[mismatchtype])) {
926     use8p = true;
927   } else {
928     use8p = false;
929   }
930 #endif
931 
932 
933   rev_gsequence = (char *) MALLOCA((glength+1) * sizeof(char));
934   rev_gsequence_alt = (char *) MALLOCA((glength+1) * sizeof(char));
935   gsequence = (char *) MALLOCA((glength+1) * sizeof(char));
936   gsequence_alt = (char *) MALLOCA((glength+1) * sizeof(char));
937 
938   if (watsonp) {
939     Genome_get_segment_blocks_left(rev_gsequence,rev_gsequence_alt,/*right*/chroffset+rev_goffset+1,
940 				   glength,chroffset,/*revcomp*/false);
941     Genome_get_segment_blocks_right(gsequence,gsequence_alt,/*left*/chroffset+goffset,
942 				    glength,chrhigh,/*revcomp*/false);
943   } else {
944     Genome_get_segment_blocks_right(rev_gsequence,rev_gsequence_alt,/*left*/chrhigh-rev_goffset,
945 				    glength,chrhigh,/*revcomp*/true);
946     Genome_get_segment_blocks_left(gsequence,gsequence_alt,/*right*/chrhigh-goffset+1,
947 				   glength,chroffset,/*revcomp*/true);
948   }
949   if (gsequence[0] == '\0' || rev_gsequence[0] == '\0') {
950     FREEA(gsequence_alt);
951     FREEA(gsequence);
952     FREEA(rev_gsequence_alt);
953     FREEA(rev_gsequence);
954     return (List_T) NULL;
955   }
956 
957 
958 #if defined(HAVE_SSE2)
959   if (use8p == true) {
960     Dynprog_compute_bands(&lbandL,&ubandL,rlengthL,glength,extraband_paired,/*widebandp*/true);
961     matrix8L_upper = Dynprog_simd_8_upper(&directions8L_upper_nogap,&directions8L_upper_Egap,dynprogL,
962 					  rsequenceL,gsequence,gsequence_alt,rlengthL,glength,
963 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
964 					  goffset,chroffset,chrhigh,watsonp,
965 #endif
966 					  mismatchtype,open,extend,ubandL,jump_late_p,/*revp*/false);
967 
968     matrix8L_lower = Dynprog_simd_8_lower(&directions8L_lower_nogap,&directions8L_lower_Egap,dynprogL,
969 					  rsequenceL,gsequence,gsequence_alt,rlengthL,glength,
970 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
971 					  goffset,chroffset,chrhigh,watsonp,
972 #endif
973 					  mismatchtype,open,extend,lbandL,jump_late_p,/*revp*/false);
974 
975 
976     Dynprog_compute_bands(&lbandR,&ubandR,rlengthR,glength,extraband_paired,/*widebandp*/true);
977     matrix8R_upper = Dynprog_simd_8_upper(&directions8R_upper_nogap,&directions8R_upper_Egap,dynprogR,
978 					  rev_rsequenceR,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
979 					  rlengthR,glength,
980 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
981 					  rev_goffset,chroffset,chrhigh,watsonp,
982 #endif
983 					  mismatchtype,open,extend,ubandR,/*for revp true*/!jump_late_p,/*revp*/true);
984 
985     matrix8R_lower = Dynprog_simd_8_lower(&directions8R_lower_nogap,&directions8R_lower_Egap,dynprogR,
986 					  rev_rsequenceR,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
987 					  rlengthR,glength,
988 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
989 					  rev_goffset,chroffset,chrhigh,watsonp,
990 #endif
991 					  mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
992 
993 
994     nmatches = nmismatches = nopens = nindels = 0;
995     bridge_cdna_gap_8_ud(&(*finalscore),&bestcL,&bestcR,&bestrL,&bestrR,
996 			 matrix8L_upper,matrix8L_lower,matrix8R_upper,matrix8R_lower,
997 			 glength,rlengthL,rlengthR,lbandL,ubandL,lbandR,ubandR,
998 			 open,extend,roffsetL,rev_roffsetR,jump_late_p);
999 
1000     if (bestcR >= bestrR) {
1001       pairs = Dynprog_traceback_8_upper(NULL,&nmatches,&nmismatches,&nopens,&nindels,
1002 					directions8R_upper_nogap,directions8R_upper_Egap,
1003 					bestrR,bestcR,rev_rsequenceR,rev_rsequence_ucR,
1004 					&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1005 					rev_roffsetR,rev_goffset,pairpool,/*revp*/true,
1006 					chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1007     } else {
1008       pairs = Dynprog_traceback_8_lower(NULL,&nmatches,&nmismatches,&nopens,&nindels,
1009 					directions8R_lower_nogap,directions8R_lower_Egap,
1010 					bestrR,bestcR,rev_rsequenceR,rev_rsequence_ucR,
1011 					&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1012 					rev_roffsetR,rev_goffset,pairpool,genestrand,/*revp*/true,
1013 					*dynprogindex);
1014     }
1015 
1016     pairs = List_reverse(pairs);
1017 
1018     queryjump = (rev_roffsetR-bestrR) - (roffsetL+bestrL) + 1;
1019     genomejump = (rev_goffset-bestcR) - (goffset+bestcL) + 1;
1020     /* No need to revise queryjump or genomejump, because the above
1021        coordinates are internal to the gap. */
1022 
1023     if (queryjump == INSERT_PAIRS && genomejump == INSERT_PAIRS) {
1024       /* Add cDNA insertion, if any */
1025       for (k = rev_roffsetR-bestrR; k >= roffsetL+bestrL; k--) {
1026 	debug(printf("cDNA insertion, Pushing [%d,%d] (%c,-)\n",k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL]));
1027 	pairs = Pairpool_push(pairs,pairpool,k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL],SHORTGAP_COMP,
1028 			      /*genome*/' ',/*genomealt*/' ',*dynprogindex);
1029       }
1030       debug(printf("\n"));
1031 
1032 
1033       /* This loop not yet checked for get_genomic_nt giving correct answer */
1034       for (k = rev_goffset-bestcR; k >= goffset+bestcL; k--) {
1035 	c2 = get_genomic_nt(&c2_alt,k,chroffset,chrhigh,watsonp);
1036 	debug(printf("genome insertion, Pushing [%d,%d] (-,%c)\n",roffsetL+bestrL,k,c2));
1037 #if 0
1038 	assert(c2 == gsequence[k-goffset]);
1039 	pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,
1040 			      gsequence[k-goffset],/*genomealt*/GENOMEALT_DEFERRED,*dynprogindex);
1041 #else
1042 	pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,c2,c2_alt,*dynprogindex);
1043 #endif
1044       }
1045       debug(printf("\n"));
1046 
1047     } else {
1048 
1049       /* Add gapholder to be solved in the future */
1050 #ifndef NOGAPHOLDER
1051       debug(printf("Pushing a gap with queryjump = %d, genomejump = %d\n",queryjump,genomejump));
1052       pairs = Pairpool_push_gapholder(pairs,pairpool,queryjump,genomejump,
1053 				      /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
1054 #endif
1055       *incompletep = true;
1056     }
1057 
1058     if (bestcL >= bestrL) {
1059       pairs = Dynprog_traceback_8_upper(pairs,&nmatches,&nmismatches,&nopens,&nindels,
1060 					directions8L_upper_nogap,directions8L_upper_Egap,
1061 					bestrL,bestcL,rsequenceL,rsequence_ucL,
1062 					gsequence,gsequence_alt,roffsetL,goffset,pairpool,/*revp*/false,
1063 					chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1064     } else {
1065       pairs = Dynprog_traceback_8_lower(pairs,&nmatches,&nmismatches,&nopens,&nindels,
1066 					directions8L_lower_nogap,directions8L_lower_Egap,
1067 					bestrL,bestcL,rsequenceL,rsequence_ucL,
1068 					gsequence,gsequence_alt,roffsetL,goffset,pairpool,
1069 					genestrand,/*revp*/false,*dynprogindex);
1070     }
1071 
1072     if (List_length(pairs) == 1) {
1073       /* Only a gap added */
1074       pairs = (List_T) NULL;
1075     }
1076 
1077     FREEA(gsequence_alt);
1078     FREEA(gsequence);
1079     FREEA(rev_gsequence_alt);
1080     FREEA(rev_gsequence);
1081 
1082     debug(printf("End of dynprog cDNA gap\n"));
1083 
1084     *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
1085     return List_reverse(pairs);
1086 
1087   } else {
1088     /* Use 16-mers */
1089     Dynprog_compute_bands(&lbandL,&ubandL,rlengthL,glength,extraband_paired,/*widebandp*/true);
1090     matrix16L_upper = Dynprog_simd_16_upper(&directions16L_upper_nogap,&directions16L_upper_Egap,dynprogL,
1091 					    rsequenceL,gsequence,gsequence_alt,rlengthL,glength,
1092 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
1093 					    goffset,chroffset,chrhigh,watsonp,
1094 #endif
1095 					    mismatchtype,open,extend,ubandL,jump_late_p,/*revp*/false);
1096 
1097     matrix16L_lower = Dynprog_simd_16_lower(&directions16L_lower_nogap,&directions16L_lower_Egap,dynprogL,
1098 					    rsequenceL,gsequence,gsequence_alt,rlengthL,glength,
1099 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
1100 					    goffset,chroffset,chrhigh,watsonp,
1101 #endif
1102 					    mismatchtype,open,extend,lbandL,jump_late_p,/*revp*/false);
1103 
1104     Dynprog_compute_bands(&lbandR,&ubandR,rlengthR,glength,extraband_paired,/*widebandp*/true);
1105     matrix16R_upper = Dynprog_simd_16_upper(&directions16R_upper_nogap,&directions16R_upper_Egap,dynprogR,
1106 					    rev_rsequenceR,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1107 					    rlengthR,glength,
1108 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
1109 					    rev_goffset,chroffset,chrhigh,watsonp,
1110 #endif
1111 					    mismatchtype,open,extend,ubandR,/*for revp true*/!jump_late_p,/*revp*/true);
1112 
1113     matrix16R_lower = Dynprog_simd_16_lower(&directions16R_lower_nogap,&directions16R_lower_Egap,dynprogR,
1114 					    rev_rsequenceR,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1115 					    rlengthR,glength,
1116 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
1117 					    rev_goffset,chroffset,chrhigh,watsonp,
1118 #endif
1119 					    mismatchtype,open,extend,lbandR,/*for revp true*/!jump_late_p,/*revp*/true);
1120 
1121     nmatches = nmismatches = nopens = nindels = 0;
1122 
1123     bridge_cdna_gap_16_ud(&(*finalscore),&bestcL,&bestcR,&bestrL,&bestrR,
1124 			 matrix16L_upper,matrix16L_lower,matrix16R_upper,matrix16R_lower,
1125 			 glength,rlengthL,rlengthR,lbandL,ubandL,lbandR,ubandR,
1126 			 open,extend,roffsetL,rev_roffsetR,jump_late_p);
1127 
1128     if (bestcR >= bestrR) {
1129       pairs = Dynprog_traceback_16_upper(NULL,&nmatches,&nmismatches,&nopens,&nindels,
1130 					 directions16R_upper_nogap,directions16R_upper_Egap,
1131 					 bestrR,bestcR,rev_rsequenceR,rev_rsequence_ucR,
1132 					 &(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1133 					 rev_roffsetR,rev_goffset,pairpool,/*revp*/true,
1134 					 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1135     } else {
1136       pairs = Dynprog_traceback_16_lower(NULL,&nmatches,&nmismatches,&nopens,&nindels,
1137 					 directions16R_lower_nogap,directions16R_lower_Egap,
1138 					 bestrR,bestcR,rev_rsequenceR,rev_rsequence_ucR,
1139 					 &(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1140 					 rev_roffsetR,rev_goffset,pairpool,genestrand,/*revp*/true,
1141 					 *dynprogindex);
1142     }
1143 
1144     pairs = List_reverse(pairs);
1145 
1146     queryjump = (rev_roffsetR-bestrR) - (roffsetL+bestrL) + 1;
1147     genomejump = (rev_goffset-bestcR) - (goffset+bestcL) + 1;
1148     /* No need to revise queryjump or genomejump, because the above
1149        coordinates are internal to the gap. */
1150 
1151     if (queryjump == INSERT_PAIRS && genomejump == INSERT_PAIRS) {
1152       /* Add cDNA insertion, if any */
1153       for (k = rev_roffsetR-bestrR; k >= roffsetL+bestrL; k--) {
1154 	debug(printf("cDNA insertion, Pushing [%d,%d] (%c,-)\n",k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL]));
1155 	pairs = Pairpool_push(pairs,pairpool,k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL],SHORTGAP_COMP,
1156 			      /*genome*/' ',/*genomealt*/' ',*dynprogindex);
1157       }
1158       debug(printf("\n"));
1159 
1160 
1161       /* This loop not yet checked for get_genomic_nt giving correct answer */
1162       for (k = rev_goffset-bestcR; k >= goffset+bestcL; k--) {
1163 	c2 = get_genomic_nt(&c2_alt,k,chroffset,chrhigh,watsonp);
1164 	debug(printf("genome insertion, Pushing [%d,%d] (-,%c)\n",roffsetL+bestrL,k,c2));
1165 #if 0
1166 	assert(c2 == gsequence[k-goffset]);
1167 	pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,
1168 			      gsequence[k-goffset],/*genomealt*/GENOMEALT_DEFERRED,*dynprogindex);
1169 #else
1170 	pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,c2,c2_alt,*dynprogindex);
1171 #endif
1172       }
1173       debug(printf("\n"));
1174 
1175     } else {
1176 
1177       /* Add gapholder to be solved in the future */
1178 #ifndef NOGAPHOLDER
1179       debug(printf("Pushing a gap with queryjump = %d, genomejump = %d\n",queryjump,genomejump));
1180       pairs = Pairpool_push_gapholder(pairs,pairpool,queryjump,genomejump,
1181 				      /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
1182 #endif
1183       *incompletep = true;
1184     }
1185 
1186     if (bestcL >= bestrL) {
1187       pairs = Dynprog_traceback_16_upper(pairs,&nmatches,&nmismatches,&nopens,&nindels,
1188 					 directions16L_upper_nogap,directions16L_upper_Egap,
1189 					 bestrL,bestcL,rsequenceL,rsequence_ucL,
1190 					 gsequence,gsequence_alt,roffsetL,goffset,pairpool,/*revp*/false,
1191 					 chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1192     } else {
1193       pairs = Dynprog_traceback_16_lower(pairs,&nmatches,&nmismatches,&nopens,&nindels,
1194 					 directions16L_lower_nogap,directions16L_lower_Egap,
1195 					 bestrL,bestcL,rsequenceL,rsequence_ucL,
1196 					 gsequence,gsequence_alt,roffsetL,goffset,pairpool,
1197 					 genestrand,/*revp*/false,*dynprogindex);
1198     }
1199 
1200     if (List_length(pairs) == 1) {
1201       /* Only a gap added */
1202       pairs = (List_T) NULL;
1203     }
1204 
1205     FREEA(gsequence_alt);
1206     FREEA(gsequence);
1207     FREEA(rev_gsequence_alt);
1208     FREEA(rev_gsequence);
1209 
1210     debug(printf("End of dynprog cDNA gap\n"));
1211 
1212     *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
1213     return List_reverse(pairs);
1214   }
1215 
1216 #else
1217   /* Non-SIMD methods */
1218   Dynprog_compute_bands(&lbandL,&ubandL,rlengthL,glength,extraband_paired,/*widebandp*/true);
1219   matrixL = Dynprog_standard(&directionsL_nogap,&directionsL_Egap,&directionsL_Fgap,dynprogL,
1220 			     rsequenceL,gsequence,gsequence_alt,rlengthL,glength,
1221 			     goffset,chroffset,chrhigh,watsonp,
1222 			     mismatchtype,open,extend,lbandL,ubandL,
1223 			     jump_late_p,/*revp*/false,/*saturation*/NEG_INFINITY_INT,
1224 			     /*upperp*/true,/*lowerp*/true);
1225 
1226   Dynprog_compute_bands(&lbandR,&ubandR,rlengthR,glength,extraband_paired,/*widebandp*/true);
1227   matrixR = Dynprog_standard(&directionsR_nogap,&directionsR_Egap,&directionsR_Fgap,dynprogR,
1228 			     rev_rsequenceR,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1229 			     rlengthR,glength,rev_goffset,chroffset,chrhigh,watsonp,
1230 			     mismatchtype,open,extend,lbandL,ubandR,
1231 			     /*for revp true*/!jump_late_p,/*revp*/true,/*saturation*/NEG_INFINITY_INT,
1232 			     /*upperp*/true,/*lowerp*/true);
1233 
1234   nmatches = nmismatches = nopens = nindels = 0;
1235 
1236   bridge_cdna_gap(&(*finalscore),&bestcL,&bestcR,&bestrL,&bestrR,matrixL,matrixR,
1237 		  glength,rlengthL,rlengthR,extraband_paired,
1238 		  open,extend,roffsetL,rev_roffsetR,jump_late_p);
1239 
1240   pairs = Dynprog_traceback_std(NULL,&nmatches,&nmismatches,&nopens,&nindels,
1241 				directionsR_nogap,directionsR_Egap,directionsR_Fgap,bestrR,bestcR,
1242 				rev_rsequenceR,rev_rsequence_ucR,
1243 				&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
1244 				rev_roffsetR,rev_goffset,pairpool,/*revp*/true,
1245 				chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1246 
1247   pairs = List_reverse(pairs);
1248 
1249   queryjump = (rev_roffsetR-bestrR) - (roffsetL+bestrL) + 1;
1250   genomejump = (rev_goffset-bestcR) - (goffset+bestcL) + 1;
1251   /* No need to revise queryjump or genomejump, because the above
1252      coordinates are internal to the gap. */
1253 
1254   if (queryjump == INSERT_PAIRS && genomejump == INSERT_PAIRS) {
1255     /* Add cDNA insertion, if any */
1256     for (k = rev_roffsetR-bestrR; k >= roffsetL+bestrL; k--) {
1257       debug(printf("cDNA insertion, Pushing [%d,%d] (%c,-)\n",k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL]));
1258       pairs = Pairpool_push(pairs,pairpool,k,rev_goffset-bestcR+1,rsequenceL[k-roffsetL],SHORTGAP_COMP,
1259 			    /*genome*/' ',/*genomealt*/' ',*dynprogindex);
1260     }
1261     debug(printf("\n"));
1262 
1263 
1264     /* This loop not yet checked for get_genomic_nt giving correct answer */
1265     for (k = rev_goffset-bestcR; k >= goffset+bestcL; k--) {
1266       c2 = get_genomic_nt(&c2_alt,k,chroffset,chrhigh,watsonp);
1267       debug(printf("genome insertion, Pushing [%d,%d] (-,%c)\n",roffsetL+bestrL,k,c2));
1268 #if 0
1269       assert(c2 == gsequence[k-goffset]);
1270       pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,
1271 			    gsequence[k-goffset],/*genomealt*/GENOMEALT_DEFERRED,*dynprogindex);
1272 #else
1273       pairs = Pairpool_push(pairs,pairpool,roffsetL+bestrL,k,' ',SHORTGAP_COMP,c2,c2_alt,*dynprogindex);
1274 #endif
1275     }
1276     debug(printf("\n"));
1277 
1278   } else {
1279 
1280     /* Add gapholder to be solved in the future */
1281 #ifndef NOGAPHOLDER
1282     debug(printf("Pushing a gap with queryjump = %d, genomejump = %d\n",queryjump,genomejump));
1283     pairs = Pairpool_push_gapholder(pairs,pairpool,queryjump,genomejump,
1284 				    /*leftpair*/NULL,/*rightpair*/NULL,/*knownp*/false);
1285 #endif
1286     *incompletep = true;
1287   }
1288 
1289   pairs = Dynprog_traceback_std(pairs,&nmatches,&nmismatches,&nopens,&nindels,
1290 				directionsL_nogap,directionsL_Egap,directionsL_Fgap,bestrL,bestcL,
1291 				rsequenceL,rsequence_ucL,
1292 				gsequence,gsequence_alt,roffsetL,goffset,pairpool,/*revp*/false,
1293 				chroffset,chrhigh,watsonp,genestrand,*dynprogindex);
1294 
1295   if (List_length(pairs) == 1) {
1296     /* Only a gap added */
1297     pairs = (List_T) NULL;
1298   }
1299 
1300   FREEA(gsequence_alt);
1301   FREEA(gsequence);
1302   FREEA(rev_gsequence_alt);
1303   FREEA(rev_gsequence);
1304 
1305   debug(printf("End of dynprog cDNA gap\n"));
1306 
1307   *dynprogindex += (*dynprogindex > 0 ? +1 : -1);
1308   return List_reverse(pairs);
1309 
1310 #endif
1311 
1312 }
1313 
1314