1 static char rcsid[] = "$Id: dynprog.c 214361 2018-03-21 01:24:28Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #include "dynprog.h"
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <math.h> /* For ceil, log, pow */
11 #include <ctype.h> /* For tolower */
12 #ifdef HAVE_SSE2
13 #include <emmintrin.h>
14 #endif
15 #ifdef HAVE_SSE4_1
16 #include <smmintrin.h>
17 #endif
18
19
20 #include "bool.h"
21 #include "except.h"
22 #include "assert.h"
23 #include "mem.h"
24 #include "comp.h"
25 #include "pair.h"
26 #include "pairdef.h"
27 #include "boyer-moore.h"
28 #include "intron.h"
29 #include "complement.h"
30 #include "splicetrie.h"
31 #include "maxent.h"
32 #include "maxent_hr.h"
33 #include "fastlog.h"
34
35
36 /* Tests whether get_genomic_nt == genomicseg in compute_scores procedures */
37 /* #define EXTRACT_GENOMICSEG 1 */
38
39
40 /* Prints parameters and results */
41 #ifdef DEBUG
42 #define debug(x) x
43 #else
44 #define debug(x)
45 #endif
46
47 /* Prints matrices */
48 #ifdef DEBUG2
49 #define debug2(x) x
50 #else
51 #define debug2(x)
52 #endif
53
54 /* F gap */
55 #ifdef DEBUG3
56 #define debug3(x) x
57 #else
58 #define debug3(x)
59 #endif
60
61 /* Getting genomic nt */
62 #ifdef DEBUG8
63 #define debug8(x) x
64 #else
65 #define debug8(x)
66 #endif
67
68 /* Old matrix computations, details */
69 #ifdef DEBUG12A
70 #define debug12a(x) x
71 #else
72 #define debug12a(x)
73 #endif
74
75 /* Comparing SIMD with standard code. Define in dynprog.h */
76 #ifdef DEBUG_SIMD
77 #define debug_simd(x) x
78 #else
79 #define debug_simd(x)
80 #endif
81
82
83 /*
84 #define RIGHTANGLE 1
85 */
86
87 #if defined(DEBUG2) || defined(DEBUG_SIMD)
88 #define NEG_INFINITY_DISPLAY (-99)
89 #endif
90
91
92 /* Previously allowed lower mismatch scores on end to allow more
93 complete alignments to the end, and because ends are typically of
94 lower quality. Previously made equal to FULLMATCH, because
95 criterion is nmatches >= nmismatches. However, extensions at ends
96 appear to defeat purpose of trimming, so increase mismatch at end
97 from -3 to -4. */
98 #define MISMATCH_ENDQ -5
99
100
101 #define T Dynprog_T
102
103
104 char *
Dynprog_endalign_string(Endalign_T endalign)105 Dynprog_endalign_string (Endalign_T endalign) {
106 switch (endalign) {
107 case QUERYEND_GAP: return "queryend_gap";
108 case QUERYEND_INDELS: return "queryend_indels";
109 case QUERYEND_NOGAPS: return "queryend_nogaps";
110 case BEST_LOCAL: return "best_local";
111 default:
112 printf("endalign %d not recognized\n",endalign);
113 return "";
114 }
115 }
116
117
118
119 int
Dynprog_score(int matches,int mismatches,int qopens,int qindels,int topens,int tindels,double defect_rate)120 Dynprog_score (int matches, int mismatches, int qopens, int qindels, int topens, int tindels,
121 double defect_rate) {
122
123 if (defect_rate < DEFECT_HIGHQ) {
124 return FULLMATCH*matches + MISMATCH_HIGHQ*mismatches + SINGLE_OPEN_HIGHQ*qopens + SINGLE_EXTEND_HIGHQ*qindels
125 + SINGLE_OPEN_HIGHQ*topens + SINGLE_EXTEND_HIGHQ*tindels;
126 } else if (defect_rate < DEFECT_MEDQ) {
127 return FULLMATCH*matches + MISMATCH_MEDQ*mismatches + SINGLE_OPEN_MEDQ*qopens + SINGLE_EXTEND_MEDQ*qindels
128 + SINGLE_OPEN_MEDQ*topens + SINGLE_EXTEND_MEDQ*tindels;
129 } else {
130 return FULLMATCH*matches + MISMATCH_LOWQ*mismatches + SINGLE_OPEN_LOWQ*qopens + SINGLE_EXTEND_LOWQ*qindels
131 + SINGLE_OPEN_LOWQ*topens + SINGLE_EXTEND_LOWQ*tindels;
132 }
133 }
134
135
136 #if !defined(HAVE_SSE2) || defined(DEBUG2) || defined(DEBUG_SIMD)
137 static char complCode[128] = COMPLEMENT_LC;
138
139 static char
get_genomic_nt(char * g_alt,int genomicpos,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp)140 get_genomic_nt (char *g_alt, int genomicpos, Univcoord_T chroffset, Univcoord_T chrhigh,
141 bool watsonp) {
142 char c2, c2_alt;
143 Univcoord_T pos;
144
145 #if 0
146 /* If the read has a deletion, then we will extend beyond 0 or genomiclength, so do not restrict. */
147 if (genomicpos < 0) {
148 return '*';
149
150 } else if (genomicpos >= genomiclength) {
151 return '*';
152
153 }
154 #endif
155
156 if (watsonp) {
157 if ((pos = chroffset + genomicpos) < chroffset) { /* Must be <, and not <=, or dynamic programming will fail */
158 *g_alt = '*';
159 return '*';
160
161 } else if (pos >= chrhigh) {
162 *g_alt = '*';
163 return '*';
164
165 #if 0
166 } else if (genome) {
167 /* Not necessary, because Genome_get_char_blocks should work */
168 debug8(printf("At %u, genomicnt is %c\n",
169 genomicpos,Genome_get_char(genome,pos)));
170 return Genome_get_char(genome,pos);
171 #endif
172
173 } else {
174 /* GMAP with user-supplied genomic segment */
175 debug8(printf("At %u, genomicnt is %c\n",
176 genomicpos,Genome_get_char_blocks(pos)));
177 return Genome_get_char_blocks(&(*g_alt),pos);
178 }
179
180 } else {
181 if ((pos = chrhigh - genomicpos) < chroffset) { /* Must be <, and not <=, or dynamic programming will fail */
182 *g_alt = '*';
183 return '*';
184
185 } else if (pos >= chrhigh) {
186 *g_alt = '*';
187 return '*';
188
189 #if 0
190 } else if (genome) {
191 /* Not necessary, because Genome_get_char_blocks should work */
192 c2 = Genome_get_char(genome,pos);
193 #endif
194
195 } else {
196 /* GMAP with user-supplied genomic segment */
197 c2 = Genome_get_char_blocks(&c2_alt,pos);
198 }
199 debug8(printf("At %u, genomicnt is %c\n",genomicpos,complCode[(int) c2]));
200 *g_alt = complCode[(int) c2_alt];
201 return complCode[(int) c2];
202 }
203 }
204 #endif
205
206
207 /************************************************************************
208 * Matrix
209 ************************************************************************/
210
211 #if !defined(HAVE_SSE2) || defined(DEBUG_SIMD)
212 /* Makes a matrix of dimensions 0..glength x 0..rlength inclusive */
213 static Score32_T **
Matrix32_alloc(int rlength,int glength,Score32_T ** ptrs,Score32_T * space)214 Matrix32_alloc (int rlength, int glength, Score32_T **ptrs, Score32_T *space) {
215 Score32_T **matrix;
216 int i;
217
218 if (glength <= 0 || rlength <= 0) {
219 fprintf(stderr,"dynprog: lengths are negative: %d %d\n",rlength,glength);
220 abort();
221 }
222
223 matrix = ptrs;
224 matrix[0] = space;
225 for (i = 1; i <= glength; i++) {
226 matrix[i] = &(matrix[i-1][rlength + 1]);
227 }
228
229 memset((void *) space,0,(glength+1)*(rlength+1)*sizeof(Score32_T));
230
231 return matrix;
232 }
233
234 #endif
235
236
237 #if defined(DEBUG2) || defined(DEBUG_SIMD)
238 void
Dynprog_Matrix32_print(Score32_T ** matrix,int rlength,int glength,char * rsequence,char * gsequence,char * gsequencealt,int goffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp,bool revp,int lband,int uband)239 Dynprog_Matrix32_print (Score32_T **matrix, int rlength, int glength, char *rsequence,
240 char *gsequence, char *gsequencealt,
241 int goffset, Univcoord_T chroffset, Univcoord_T chrhigh,
242 bool watsonp, bool revp, int lband, int uband) {
243 int i, j;
244 char g_alt;
245
246 /* j */
247 printf(" "); /* For i */
248 printf(" ");
249 for (j = 0; j <= glength; ++j) {
250 printf(" %2d ",j);
251 }
252 printf("\n");
253
254 if (gsequence) {
255 printf(" "); /* For i */
256 printf(" ");
257 for (j = 0; j <= glength; ++j) {
258 if (j == 0) {
259 printf(" ");
260 } else {
261 printf(" %c ",revp ? gsequence[-j+1] : gsequence[j-1]);
262 }
263 }
264 printf("\n");
265 }
266
267 if (gsequencealt != gsequence) {
268 printf(" "); /* For i */
269 printf(" ");
270 for (j = 0; j <= glength; ++j) {
271 if (j == 0) {
272 printf(" ");
273 } else {
274 if (revp == false) {
275 printf(" %c ",get_genomic_nt(&g_alt,goffset+j-1,chroffset,chrhigh,watsonp));
276 } else {
277 printf(" %c ",get_genomic_nt(&g_alt,goffset+1-j,chroffset,chrhigh,watsonp));
278 }
279 }
280 }
281 printf("\n");
282 }
283
284 for (i = 0; i <= rlength; ++i) {
285 printf("%2d ",i);
286 if (i == 0) {
287 printf(" ");
288 } else {
289 printf("%c ",revp ? rsequence[-i+1] : rsequence[i-1]);
290 }
291 for (j = 0; j <= glength; ++j) {
292 if (j < i - lband) {
293 printf(" . ");
294 } else if (j > i + uband) {
295 printf(" . ");
296 } else if (matrix[j][i] < NEG_INFINITY_DISPLAY) {
297 printf("%3d ",NEG_INFINITY_DISPLAY);
298 } else {
299 printf("%3d ",matrix[j][i]);
300 }
301 }
302 printf("\n");
303 }
304 printf("\n");
305
306 return;
307 }
308 #endif
309
310
311
312 #ifdef DEBUG12
313 struct Int3_T {
314 Score16_T Egap;
315 Score16_T Fgap;
316 Score16_T nogap;
317 };
318 #endif
319
320
321 #ifdef DEBUG12
322 /* Makes a matrix of dimensions 0..glength x 0..rlength inclusive */
323 static struct Int3_T **
Matrix3_alloc(int rlength,int glength,struct Int3_T ** ptrs,struct Int3_T * space)324 Matrix3_alloc (int rlength, int glength, struct Int3_T **ptrs, struct Int3_T *space) {
325 struct Int3_T **matrix;
326 int i;
327
328 if (glength <= 0 || rlength <= 0) {
329 fprintf(stderr,"dynprog: lengths are negative: %d %d\n",rlength,glength);
330 abort();
331 }
332
333 matrix = ptrs;
334 matrix[0] = space;
335 for (i = 1; i <= glength; i++) {
336 matrix[i] = &(matrix[i-1][rlength + 1]);
337 }
338
339 memset((void *) space,0,(glength+1)*(rlength+1)*sizeof(struct Int3_T));
340
341 return matrix;
342 }
343 #endif
344
345
346 #ifdef DEBUG12A
347 static void
Matrix3_print(struct Int3_T ** matrix,int rlength,int glength,char * rsequence,char * gsequence,char * gsequencealt,int goffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp,bool revp)348 Matrix3_print (struct Int3_T **matrix, int rlength, int glength, char *rsequence,
349 char *gsequence, char *gsequencealt,
350 int goffset, Univcoord_T chroffset, Univcoord_T chrhigh,
351 bool watsonp, bool revp) {
352 int i, j;
353 char g_alt;
354
355 printf("G1");
356 if (gsequence) {
357 for (j = 0; j <= glength; ++j) {
358 if (j == 0) {
359 printf(" ");
360 } else {
361 printf(" %c ",revp ? gsequence[-j+1] : gsequence[j-1]);
362 }
363 }
364 printf("\n");
365 }
366
367 for (j = 0; j <= glength; ++j) {
368 if (j == 0) {
369 printf(" ");
370 } else {
371 if (revp == false) {
372 printf(" %c ",get_genomic_nt(&g_alt,goffset+j-1,chroffset,chrhigh,watsonp));
373 } else {
374 printf(" %c ",get_genomic_nt(&g_alt,goffset+1-j,chroffset,chrhigh,watsonp));
375 }
376 }
377 }
378 printf("\n");
379
380 for (i = 0; i <= rlength; ++i) {
381 if (i == 0) {
382 printf(" ");
383 } else {
384 printf("%c ",revp ? rsequence[-i+1] : rsequence[i-1]);
385 }
386 for (j = 0; j <= glength; ++j) {
387 if (matrix[j][i].Egap < NEG_INFINITY) {
388 printf("%3d ",NEG_INFINITY);
389 } else {
390 printf("%3d ",matrix[j][i].Egap);
391 }
392 }
393 printf("\n");
394 }
395 printf("\n");
396
397
398 printf("NG");
399 if (gsequence) {
400 for (j = 0; j <= glength; ++j) {
401 if (j == 0) {
402 printf(" ");
403 } else {
404 printf(" %c ",revp ? gsequence[-j+1] : gsequence[j-1]);
405 }
406 }
407 printf("\n");
408 }
409
410 for (j = 0; j <= glength; ++j) {
411 if (j == 0) {
412 printf(" ");
413 } else {
414 if (revp == false) {
415 printf(" %c ",get_genomic_nt(&g_alt,goffset+j-1,chroffset,chrhigh,watsonp));
416 } else {
417 printf(" %c ",get_genomic_nt(&g_alt,goffset+1-j,chroffset,chrhigh,watsonp));
418 }
419 }
420 }
421 printf("\n");
422
423 for (i = 0; i <= rlength; ++i) {
424 if (i == 0) {
425 printf(" ");
426 } else {
427 printf("%c ",revp ? rsequence[-i+1] : rsequence[i-1]);
428 }
429 for (j = 0; j <= glength; ++j) {
430 if (matrix[j][i].nogap < NEG_INFINITY) {
431 printf("%3d ",NEG_INFINITY);
432 } else {
433 printf("%3d ",matrix[j][i].nogap);
434 }
435 }
436 printf("\n");
437 }
438 printf("\n");
439
440
441 printf("G2");
442 if (gsequence) {
443 for (j = 0; j <= glength; ++j) {
444 if (j == 0) {
445 printf(" ");
446 } else {
447 printf(" %c ",revp ? gsequence[-j+1] : gsequence[j-1]);
448 }
449 }
450 printf("\n");
451 }
452
453 for (j = 0; j <= glength; ++j) {
454 if (j == 0) {
455 printf(" ");
456 } else {
457 if (revp == false) {
458 printf(" %c ",get_genomic_nt(&g_alt,goffset+j-1,chroffset,chrhigh,watsonp));
459 } else {
460 printf(" %c ",get_genomic_nt(&g_alt,goffset+1-j,chroffset,chrhigh,watsonp));
461 }
462 }
463 }
464 printf("\n");
465
466 for (i = 0; i <= rlength; ++i) {
467 if (i == 0) {
468 printf(" ");
469 } else {
470 printf("%c ",revp ? rsequence[-i+1] : rsequence[i-1]);
471 }
472 for (j = 0; j <= glength; ++j) {
473 if (matrix[j][i].Fgap < NEG_INFINITY) {
474 printf("%3d ",NEG_INFINITY);
475 } else {
476 printf("%3d ",matrix[j][i].Fgap);
477 }
478 }
479 printf("\n");
480 }
481 printf("\n");
482
483 return;
484 }
485 #endif
486
487
488 /************************************************************************/
489 /* Directions */
490 /************************************************************************/
491
492 #if !defined(HAVE_SSE2) || defined(DEBUG_SIMD)
493 /* Makes a matrix of dimensions 0..glength x 0..rlength inclusive */
494 static Direction32_T **
Directions32_alloc(int rlength,int glength,Direction32_T ** ptrs,Direction32_T * space)495 Directions32_alloc (int rlength, int glength, Direction32_T **ptrs, Direction32_T *space) {
496 Direction32_T **directions;
497 int i;
498
499 directions = ptrs;
500 directions[0] = space;
501 for (i = 1; i <= glength; i++) {
502 directions[i] = &(directions[i-1][rlength + 1]);
503 }
504
505 memset((void *) space,/*DIAG*/0,(glength+1)*(rlength+1)*sizeof(Direction32_T));
506
507 return directions;
508 }
509 #endif
510
511
512
513 #if defined(DEBUG2) || defined(DEBUG_SIMD)
514 void
Dynprog_Directions32_print(Direction32_T ** directions_nogap,Direction32_T ** directions_Egap,Direction32_T ** directions_Fgap,int rlength,int glength,char * rsequence,char * gsequence,char * gsequence_alt,int goffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp,bool revp,int lband,int uband)515 Dynprog_Directions32_print (Direction32_T **directions_nogap, Direction32_T **directions_Egap, Direction32_T **directions_Fgap,
516 int rlength, int glength, char *rsequence, char *gsequence, char *gsequence_alt,
517 int goffset, Univcoord_T chroffset, Univcoord_T chrhigh,
518 bool watsonp, bool revp, int lband, int uband) {
519 int i, j;
520 char g_alt;
521
522 /* j */
523 printf(" "); /* For i */
524 printf(" ");
525 for (j = 0; j <= glength; ++j) {
526 printf(" %2d ",j);
527 }
528 printf("\n");
529
530 if (gsequence) {
531 printf(" "); /* For i */
532 printf(" ");
533 for (j = 0; j <= glength; ++j) {
534 if (j == 0) {
535 printf(" ");
536 } else {
537 printf(" %c ",revp ? gsequence[-j+1] : gsequence[j-1]);
538 }
539 }
540 printf("\n");
541 }
542
543 if (gsequence_alt != gsequence) {
544 printf(" "); /* For i */
545 printf(" ");
546 for (j = 0; j <= glength; ++j) {
547 if (j == 0) {
548 printf(" ");
549 } else {
550 if (revp == false) {
551 printf(" %c ",get_genomic_nt(&g_alt,goffset+j-1,chroffset,chrhigh,watsonp));
552 } else {
553 printf(" %c ",get_genomic_nt(&g_alt,goffset+1-j,chroffset,chrhigh,watsonp));
554 }
555 }
556 }
557 printf("\n");
558 }
559
560 for (i = 0; i <= rlength; ++i) {
561 printf("%2d ",i);
562 if (i == 0) {
563 printf(" ");
564 } else {
565 printf("%c ",revp ? rsequence[-i+1] : rsequence[i-1]);
566 }
567 for (j = 0; j <= glength; ++j) {
568 if (j < i - lband) {
569 printf(" ");
570 } else if (j > i + uband) {
571 printf(" ");
572 } else {
573 if (directions_Egap[j][i] == DIAG) {
574 printf("D");
575 } else {
576 /* Must be HORIZ */
577 printf("H");
578 }
579 printf("|");
580 if (directions_nogap[j][i] == DIAG) {
581 printf("D");
582 } else if (directions_nogap[j][i] == HORIZ) {
583 printf("H");
584 } else {
585 /* Must be VERT */
586 printf("V");
587 }
588 printf("|");
589 if (directions_Fgap[j][i] == DIAG) {
590 printf("D");
591 } else {
592 /* Must be VERT */
593 printf("V");
594 }
595 }
596 printf(" ");
597 }
598 printf("\n");
599 }
600 printf("\n");
601
602 return;
603 }
604 #endif
605
606
607
608
609 #define QUERY_MAXLENGTH 500
610 #define GENOMIC_MAXLENGTH 2000
611
612
613 static void
compute_maxlengths(int * max_rlength,int * max_glength,int maxlookback,int extraquerygap,int maxpeelback,int extramaterial_end,int extramaterial_paired)614 compute_maxlengths (int *max_rlength, int *max_glength,
615 int maxlookback, int extraquerygap, int maxpeelback,
616 int extramaterial_end, int extramaterial_paired) {
617 *max_rlength = maxlookback + maxpeelback;
618 if (*max_rlength < QUERY_MAXLENGTH) {
619 *max_rlength = QUERY_MAXLENGTH;
620 }
621
622 *max_glength = *max_rlength + extraquerygap;
623 if (extramaterial_end > extramaterial_paired) {
624 *max_glength += extramaterial_end;
625 } else {
626 *max_glength += extramaterial_paired;
627 }
628
629 if (*max_glength < GENOMIC_MAXLENGTH) {
630 *max_glength = GENOMIC_MAXLENGTH;
631 }
632
633 return;
634 }
635
636
637 T
Dynprog_new(int maxlookback,int extraquerygap,int maxpeelback,int extramaterial_end,int extramaterial_paired,bool doublep)638 Dynprog_new (int maxlookback, int extraquerygap, int maxpeelback,
639 int extramaterial_end, int extramaterial_paired,
640 bool doublep) {
641 T new = (T) MALLOC(sizeof(*new));
642 int max_rlength, max_glength;
643
644 compute_maxlengths(&max_rlength,&max_glength,
645 maxlookback,extraquerygap,maxpeelback,
646 extramaterial_end,extramaterial_paired);
647 new->max_rlength = max_rlength;
648 new->max_glength = max_glength;
649
650 #ifdef DEBUG12
651 new->matrix3_ptrs = (struct Int3_T **) CALLOC(max_glength+1,sizeof(struct Int3_T *));
652 new->matrix3_space = (struct Int3_T *) CALLOC((max_glength+1)*(max_rlength+1),sizeof(struct Int3_T));
653 #endif
654 #if !defined(HAVE_SSE2) || defined(DEBUG_SIMD)
655 new->matrix_ptrs = (Score32_T **) CALLOC(max_glength+1,sizeof(Score32_T *));
656 new->matrix_space = (Score32_T *) CALLOC((max_glength+1)*(max_glength+1),sizeof(Score32_T));
657 new->directions_ptrs_0 = (Direction32_T **) CALLOC(max_glength+1,sizeof(Direction32_T *));
658 new->directions_space_0 = (Direction32_T *) CALLOC((max_glength+1)*(max_rlength+1),sizeof(Direction32_T));
659 new->directions_ptrs_1 = (Direction32_T **) CALLOC(max_glength+1,sizeof(Direction32_T *));
660 new->directions_space_1 = (Direction32_T *) CALLOC((max_glength+1)*(max_rlength+1),sizeof(Direction32_T));
661 new->directions_ptrs_2 = (Direction32_T **) CALLOC(max_glength+1,sizeof(Direction32_T *));
662 new->directions_space_2 = (Direction32_T *) CALLOC((max_glength+1)*(max_rlength+1),sizeof(Direction32_T));
663 #endif
664 #if defined(HAVE_SSE4_1) || defined(HAVE_SSE2)
665 /* Use SIMD_NCHARS > SIMD_NSHORTS and sizeof(Score16_T) > sizeof(Score8_T) */
666 if (doublep == true) {
667 new->aligned.two.upper_matrix_ptrs = (void **) CALLOC(max_glength+1,sizeof(void *));
668 new->aligned.two.upper_matrix_space = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
669 new->aligned.two.upper_directions_ptrs_0 = (void **) CALLOC(max_glength+1,sizeof(void *));
670 new->aligned.two.upper_directions_space_0 = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
671 new->aligned.two.upper_directions_ptrs_1 = (void **) CALLOC(max_glength+1,sizeof(void *));
672 new->aligned.two.upper_directions_space_1 = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
673
674 new->aligned.two.lower_matrix_ptrs = (void **) CALLOC(max_rlength+1,sizeof(void *));
675 new->aligned.two.lower_matrix_space = (void *) _mm_malloc((max_rlength+1)*(max_glength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
676 new->aligned.two.lower_directions_ptrs_0 = (void **) CALLOC(max_rlength+1,sizeof(void *));
677 new->aligned.two.lower_directions_space_0 = (void *) _mm_malloc((max_rlength+1)*(max_glength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
678 new->aligned.two.lower_directions_ptrs_1 = (void **) CALLOC(max_rlength+1,sizeof(void *));
679 new->aligned.two.lower_directions_space_1 = (void *) _mm_malloc((max_rlength+1)*(max_glength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
680
681 new->nspaces = 2;
682
683 } else {
684 new->aligned.one.matrix_ptrs = (void **) CALLOC(max_glength+1,sizeof(void *));
685 new->aligned.one.matrix_space = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
686 new->aligned.one.directions_ptrs_0 = (void **) CALLOC(max_glength+1,sizeof(void *));
687 new->aligned.one.directions_space_0 = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
688 new->aligned.one.directions_ptrs_1 = (void **) CALLOC(max_glength+1,sizeof(void *));
689 new->aligned.one.directions_space_1 = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
690 new->aligned.one.directions_ptrs_2 = (void **) CALLOC(max_glength+1,sizeof(void *));
691 new->aligned.one.directions_space_2 = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
692
693 new->nspaces = 1;
694 }
695 #endif
696 #if defined(DEBUG_AVX2)
697 /* Use SIMD_NCHARS > SIMD_NSHORTS and sizeof(Score16_T) > sizeof(Score8_T) */
698 if (doublep == true) {
699 new->aligned_std.two.upper_matrix_ptrs = (void **) CALLOC(max_glength+1,sizeof(void *));
700 new->aligned_std.two.upper_matrix_space = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
701 new->aligned_std.two.upper_directions_ptrs_0 = (void **) CALLOC(max_glength+1,sizeof(void *));
702 new->aligned_std.two.upper_directions_space_0 = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
703 new->aligned_std.two.upper_directions_ptrs_1 = (void **) CALLOC(max_glength+1,sizeof(void *));
704 new->aligned_std.two.upper_directions_space_1 = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
705
706 new->aligned_std.two.lower_matrix_ptrs = (void **) CALLOC(max_rlength+1,sizeof(void *));
707 new->aligned_std.two.lower_matrix_space = (void *) _mm_malloc((max_rlength+1)*(max_glength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
708 new->aligned_std.two.lower_directions_ptrs_0 = (void **) CALLOC(max_rlength+1,sizeof(void *));
709 new->aligned_std.two.lower_directions_space_0 = (void *) _mm_malloc((max_rlength+1)*(max_glength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
710 new->aligned_std.two.lower_directions_ptrs_1 = (void **) CALLOC(max_rlength+1,sizeof(void *));
711 new->aligned_std.two.lower_directions_space_1 = (void *) _mm_malloc((max_rlength+1)*(max_glength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
712
713 new->nspaces = 2;
714
715 } else {
716 new->aligned_std.one.matrix_ptrs = (void **) CALLOC(max_glength+1,sizeof(void *));
717 new->aligned_std.one.matrix_space = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
718 new->aligned_std.one.directions_ptrs_0 = (void **) CALLOC(max_glength+1,sizeof(void *));
719 new->aligned_std.one.directions_space_0 = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
720 new->aligned_std.one.directions_ptrs_1 = (void **) CALLOC(max_glength+1,sizeof(void *));
721 new->aligned_std.one.directions_space_1 = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
722 new->aligned_std.one.directions_ptrs_2 = (void **) CALLOC(max_glength+1,sizeof(void *));
723 new->aligned_std.one.directions_space_2 = (void *) _mm_malloc((max_glength+1)*(max_rlength+SIMD_NCHARS+SIMD_NCHARS)*sizeof(Score16_T),ALIGN_SIZE);
724
725 new->nspaces = 1;
726 }
727 #endif
728 return new;
729 }
730
731
732 void
Dynprog_free(T * old)733 Dynprog_free (T *old) {
734 if (*old) {
735 #ifdef DEBUG12
736 FREE((*old)->matrix3_ptrs);
737 FREE((*old)->matrix3_space);
738 #endif
739 #if !defined(HAVE_SSE2) || defined(DEBUG_SIMD)
740 FREE((*old)->matrix_ptrs);
741 FREE((*old)->matrix_space);
742 FREE((*old)->directions_ptrs_2);
743 FREE((*old)->directions_space_2);
744 FREE((*old)->directions_ptrs_1);
745 FREE((*old)->directions_space_1);
746 FREE((*old)->directions_ptrs_0);
747 FREE((*old)->directions_space_0);
748 #endif
749 #if defined(HAVE_SSE4_1) || defined(HAVE_SSE2)
750 if ((*old)->nspaces == 1) {
751 FREE((*old)->aligned.one.matrix_ptrs);
752 _mm_free((*old)->aligned.one.matrix_space);
753 FREE((*old)->aligned.one.directions_ptrs_2);
754 _mm_free((*old)->aligned.one.directions_space_2);
755 FREE((*old)->aligned.one.directions_ptrs_1);
756 _mm_free((*old)->aligned.one.directions_space_1);
757 FREE((*old)->aligned.one.directions_ptrs_0);
758 _mm_free((*old)->aligned.one.directions_space_0);
759 } else {
760 FREE((*old)->aligned.two.upper_matrix_ptrs);
761 _mm_free((*old)->aligned.two.upper_matrix_space);
762 FREE((*old)->aligned.two.upper_directions_ptrs_1);
763 _mm_free((*old)->aligned.two.upper_directions_space_1);
764 FREE((*old)->aligned.two.upper_directions_ptrs_0);
765 _mm_free((*old)->aligned.two.upper_directions_space_0);
766
767 FREE((*old)->aligned.two.lower_matrix_ptrs);
768 _mm_free((*old)->aligned.two.lower_matrix_space);
769 FREE((*old)->aligned.two.lower_directions_ptrs_1);
770 _mm_free((*old)->aligned.two.lower_directions_space_1);
771 FREE((*old)->aligned.two.lower_directions_ptrs_0);
772 _mm_free((*old)->aligned.two.lower_directions_space_0);
773 }
774 #endif
775 #if defined(DEBUG_AVX2)
776 if ((*old)->nspaces == 1) {
777 FREE((*old)->aligned_std.one.matrix_ptrs);
778 _mm_free((*old)->aligned_std.one.matrix_space);
779 FREE((*old)->aligned_std.one.directions_ptrs_2);
780 _mm_free((*old)->aligned_std.one.directions_space_2);
781 FREE((*old)->aligned_std.one.directions_ptrs_1);
782 _mm_free((*old)->aligned_std.one.directions_space_1);
783 FREE((*old)->aligned_std.one.directions_ptrs_0);
784 _mm_free((*old)->aligned_std.one.directions_space_0);
785 } else {
786 FREE((*old)->aligned_std.two.upper_matrix_ptrs);
787 _mm_free((*old)->aligned_std.two.upper_matrix_space);
788 FREE((*old)->aligned_std.two.upper_directions_ptrs_1);
789 _mm_free((*old)->aligned_std.two.upper_directions_space_1);
790 FREE((*old)->aligned_std.two.upper_directions_ptrs_0);
791 _mm_free((*old)->aligned_std.two.upper_directions_space_0);
792
793 FREE((*old)->aligned_std.two.lower_matrix_ptrs);
794 _mm_free((*old)->aligned_std.two.lower_matrix_space);
795 FREE((*old)->aligned_std.two.lower_directions_ptrs_1);
796 _mm_free((*old)->aligned_std.two.lower_directions_space_1);
797 FREE((*old)->aligned_std.two.lower_directions_ptrs_0);
798 _mm_free((*old)->aligned_std.two.lower_directions_space_0);
799 }
800 #endif
801
802 FREE(*old);
803 }
804
805 return;
806 }
807
808 /************************************************************************/
809
810 /* TODO: Replace pairdistance_array with reference to consistent_array */
811
812 /* These are extern arrays, used by all dynprog procedures */
813 int use8p_size[NMISMATCHTYPES];
814 Pairdistance_T **pairdistance_array[NMISMATCHTYPES];
815 #ifndef HAVE_SSE4_1
816 Pairdistance_T **pairdistance_array_plus_128[NMISMATCHTYPES];
817 #endif
818 bool **consistent_array[3]; /* For genestrands 0, +1, and +2 */
819 int *nt_to_int_array;
820
821
822 bool
Dynprog_consistent_p(int c,int g,int g_alt,int genestrand)823 Dynprog_consistent_p (int c, int g, int g_alt, int genestrand) {
824 if (consistent_array[genestrand][c][g] == true) {
825 return true;
826 } else {
827 return consistent_array[genestrand][c][g_alt];
828 }
829 }
830
831 static void
permute_cases(int NA1,int NA2,Pairdistance_T score,Mode_T mode)832 permute_cases (int NA1, int NA2, Pairdistance_T score, Mode_T mode) {
833 int i;
834
835 #ifdef PREUC
836 int na1, na2;
837
838 na1 = tolower(NA1);
839 na2 = tolower(NA2);
840 #endif
841
842 if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED || mode == TTOC_STRANDED) {
843 #ifdef PREUC
844 consistent_array[0][na1][na2] = true;
845 consistent_array[0][na1][NA2] = true;
846 consistent_array[0][NA1][na2] = true;
847 #endif
848 consistent_array[0][NA1][NA2] = true;
849
850 } else {
851 #ifdef PREUC
852 consistent_array[+1][na1][na2] = true;
853 consistent_array[+1][na1][NA2] = true;
854 consistent_array[+1][NA1][na2] = true;
855 consistent_array[+2][na1][na2] = true;
856 consistent_array[+2][na1][NA2] = true;
857 consistent_array[+2][NA1][na2] = true;
858 #endif
859 consistent_array[+1][NA1][NA2] = true;
860 consistent_array[+2][NA1][NA2] = true;
861 }
862
863 if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED || mode == TTOC_STRANDED) {
864 #ifdef PREUC
865 consistent_array[0][na2][na1] = true;
866 consistent_array[0][na2][NA1] = true;
867 consistent_array[0][NA2][na1] = true;
868 #endif
869 consistent_array[0][NA2][NA1] = true;
870
871 } else {
872 #ifdef PREUC
873 consistent_array[+1][na2][na1] = true;
874 consistent_array[+1][na2][NA1] = true;
875 consistent_array[+1][NA2][na1] = true;
876 consistent_array[+2][na2][na1] = true;
877 consistent_array[+2][na2][NA1] = true;
878 consistent_array[+2][NA2][na1] = true;
879 #endif
880 consistent_array[+1][NA2][NA1] = true;
881 consistent_array[+2][NA2][NA1] = true;
882 }
883
884
885 for (i = 0; i < NMISMATCHTYPES; i++) {
886 #ifdef PREUC
887 pairdistance_array[i][na1][na2] = score;
888 pairdistance_array[i][na1][NA2] = score;
889 pairdistance_array[i][NA1][na2] = score;
890 #endif
891 pairdistance_array[i][NA1][NA2] = score;
892
893 #ifdef PREUC
894 pairdistance_array[i][na2][na1] = score;
895 pairdistance_array[i][na2][NA1] = score;
896 pairdistance_array[i][NA2][na1] = score;
897 #endif
898 pairdistance_array[i][NA2][NA1] = score;
899 }
900
901 return;
902 }
903
904 static void
permute_cases_oneway(int NA1,int NA2,Pairdistance_T score,int genestrand)905 permute_cases_oneway (int NA1, int NA2, Pairdistance_T score, int genestrand) {
906 int i;
907
908 #ifdef PREUC
909 int na1, na2;
910
911 na1 = tolower(NA1);
912 na2 = tolower(NA2);
913 #endif
914
915 #ifdef PREUC
916 consistent_array[genestrand][na1][na2] = true;
917 consistent_array[genestrand][na1][NA2] = true;
918 consistent_array[genestrand][NA1][na2] = true;
919 #endif
920 consistent_array[genestrand][NA1][NA2] = true;
921
922 for (i = 0; i < NMISMATCHTYPES; i++) {
923 #ifdef PREUC
924 pairdistance_array[i][na1][na2] = score;
925 pairdistance_array[i][na1][NA2] = score;
926 pairdistance_array[i][NA1][na2] = score;
927 #endif
928 pairdistance_array[i][NA1][NA2] = score;
929 }
930
931 return;
932 }
933
934
935 void
Dynprog_init(Mode_T mode)936 Dynprog_init (Mode_T mode) {
937 int i, j, ptr;
938 int c, c1, c2;
939
940 nt_to_int_array = (int *) CALLOC(128,sizeof(int));
941 for (j = 0; j < 128; j++) {
942 nt_to_int_array[j] = 4;
943 }
944 nt_to_int_array['A'] = nt_to_int_array['a'] = 0;
945 nt_to_int_array['C'] = nt_to_int_array['c'] = 1;
946 nt_to_int_array['G'] = nt_to_int_array['g'] = 2;
947 nt_to_int_array['T'] = nt_to_int_array['t'] = 3;
948
949
950 use8p_size[HIGHQ] = NEG_INFINITY_8 / MISMATCH_HIGHQ - 1;
951 use8p_size[MEDQ] = NEG_INFINITY_8 / MISMATCH_MEDQ - 1;
952 use8p_size[LOWQ] = NEG_INFINITY_8 / MISMATCH_LOWQ - 1;
953 use8p_size[ENDQ] = NEG_INFINITY_8 / MISMATCH_ENDQ - 1;
954 /* printf("use8p_sizes: %d %d %d %d\n",use8p_size[HIGHQ],use8p_size[MEDQ],use8p_size[LOWQ],use8p_size[ENDQ]); */
955
956 if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED || mode == TTOC_STRANDED) {
957 consistent_array[0] = (bool **) CALLOC(128,sizeof(bool *));
958 consistent_array[0][0] = (bool *) CALLOC(128*128,sizeof(bool));
959
960 ptr = 0;
961 for (j = 1; j < 128; j++) {
962 ptr += 128;
963 consistent_array[0][j] = &(consistent_array[0][0][ptr]);
964 }
965
966 } else {
967 consistent_array[+1] = (bool **) CALLOC(128,sizeof(bool *));
968 consistent_array[+1][0] = (bool *) CALLOC(128*128,sizeof(bool));
969 consistent_array[+2] = (bool **) CALLOC(128,sizeof(bool *));
970 consistent_array[+2][0] = (bool *) CALLOC(128*128,sizeof(bool));
971
972 ptr = 0;
973 for (j = 1; j < 128; j++) {
974 ptr += 128;
975 consistent_array[+1][j] = &(consistent_array[+1][0][ptr]);
976 consistent_array[+2][j] = &(consistent_array[+2][0][ptr]);
977 }
978 }
979
980 for (i = 0; i < NMISMATCHTYPES; i++) {
981 pairdistance_array[i] = (Pairdistance_T **) CALLOC(128,sizeof(Pairdistance_T *));
982 pairdistance_array[i][0] = (Pairdistance_T *) CALLOC(128*128,sizeof(Pairdistance_T));
983 #ifndef HAVE_SSE4_1
984 pairdistance_array_plus_128[i] = (Pairdistance_T **) CALLOC(128,sizeof(Pairdistance_T *));
985 pairdistance_array_plus_128[i][0] = (Pairdistance_T *) CALLOC(128*128,sizeof(Pairdistance_T));
986 #endif
987 ptr = 0;
988 for (j = 1; j < 128; j++) {
989 ptr += 128;
990 pairdistance_array[i][j] = &(pairdistance_array[i][0][ptr]);
991 #ifndef HAVE_SSE4_1
992 pairdistance_array_plus_128[i][j] = &(pairdistance_array_plus_128[i][0][ptr]);
993 #endif
994 }
995 }
996
997 #ifdef PREUC
998 for (c1 = 'A'; c1 <= 'z'; c1++) {
999 for (c2 = 'A'; c2 < 'z'; c2++) {
1000 pairdistance_array[HIGHQ][c1][c2] = MISMATCH_HIGHQ;
1001 pairdistance_array[MEDQ][c1][c2] = MISMATCH_MEDQ;
1002 pairdistance_array[LOWQ][c1][c2] = MISMATCH_LOWQ;
1003 pairdistance_array[ENDQ][c1][c2] = MISMATCH_ENDQ;
1004 }
1005 }
1006 #else
1007 for (c1 = 'A'; c1 <= 'Z'; c1++) {
1008 for (c2 = 'A'; c2 < 'Z'; c2++) {
1009 pairdistance_array[HIGHQ][c1][c2] = MISMATCH_HIGHQ;
1010 pairdistance_array[MEDQ][c1][c2] = MISMATCH_MEDQ;
1011 pairdistance_array[LOWQ][c1][c2] = MISMATCH_LOWQ;
1012 pairdistance_array[ENDQ][c1][c2] = MISMATCH_ENDQ;
1013 }
1014 }
1015 #endif
1016
1017 for (c = 'A'; c < 'Z'; c++) {
1018 permute_cases(c,c,FULLMATCH,mode);
1019 }
1020
1021 /* Exceptions */
1022 permute_cases('U','T',FULLMATCH,mode);
1023
1024 permute_cases('R','A',HALFMATCH,mode);
1025 permute_cases('R','G',HALFMATCH,mode);
1026
1027 permute_cases('Y','T',HALFMATCH,mode);
1028 permute_cases('Y','C',HALFMATCH,mode);
1029
1030 permute_cases('W','A',HALFMATCH,mode);
1031 permute_cases('W','T',HALFMATCH,mode);
1032
1033 permute_cases('S','G',HALFMATCH,mode);
1034 permute_cases('S','C',HALFMATCH,mode);
1035
1036 permute_cases('M','A',HALFMATCH,mode);
1037 permute_cases('M','C',HALFMATCH,mode);
1038
1039 permute_cases('K','G',HALFMATCH,mode);
1040 permute_cases('K','T',HALFMATCH,mode);
1041
1042 permute_cases('H','A',AMBIGUOUS,mode);
1043 permute_cases('H','T',AMBIGUOUS,mode);
1044 permute_cases('H','C',AMBIGUOUS,mode);
1045
1046 permute_cases('B','G',AMBIGUOUS,mode);
1047 permute_cases('B','C',AMBIGUOUS,mode);
1048 permute_cases('B','T',AMBIGUOUS,mode);
1049
1050 permute_cases('V','G',AMBIGUOUS,mode);
1051 permute_cases('V','A',AMBIGUOUS,mode);
1052 permute_cases('V','C',AMBIGUOUS,mode);
1053
1054 permute_cases('D','G',AMBIGUOUS,mode);
1055 permute_cases('D','A',AMBIGUOUS,mode);
1056 permute_cases('D','T',AMBIGUOUS,mode);
1057
1058 permute_cases('N','T',AMBIGUOUS,mode);
1059 permute_cases('N','C',AMBIGUOUS,mode);
1060 permute_cases('N','A',AMBIGUOUS,mode);
1061 permute_cases('N','G',AMBIGUOUS,mode);
1062
1063 permute_cases('X','T',AMBIGUOUS,mode);
1064 permute_cases('X','C',AMBIGUOUS,mode);
1065 permute_cases('X','A',AMBIGUOUS,mode);
1066 permute_cases('X','G',AMBIGUOUS,mode);
1067
1068 permute_cases('N','N',AMBIGUOUS,mode); /* Needed to start dynprog procedures with 0 at (0,0) */
1069 permute_cases('X','X',AMBIGUOUS,mode); /* Needed to start dynprog procedures with 0 at (0,0) */
1070
1071
1072 /* GMAP keeps query in same order and inverts genome, so need to keep only one direction for STRANDED */
1073 if (mode == STANDARD) {
1074 /* Skip */
1075
1076 } else if (mode == CMET_STRANDED) {
1077 /* Query-T can match Genomic-C */
1078 permute_cases_oneway('T','C',FULLMATCH,/*genestrand*/0);
1079
1080 } else if (mode == CMET_NONSTRANDED) {
1081 permute_cases_oneway('T','C',FULLMATCH,/*genestrand*/+1);
1082 permute_cases_oneway('A','G',FULLMATCH,/*genestrand*/+2);
1083
1084 } else if (mode == ATOI_STRANDED) {
1085 permute_cases_oneway('G','A',FULLMATCH,/*genestrand*/0);
1086
1087 } else if (mode == ATOI_NONSTRANDED) {
1088 permute_cases_oneway('G','A',FULLMATCH,/*genestrand*/+1);
1089 permute_cases_oneway('C','T',FULLMATCH,/*genestrand*/+2);
1090
1091 } else if (mode == TTOC_STRANDED) {
1092 permute_cases_oneway('C','T',FULLMATCH,/*genestrand*/0);
1093
1094 } else if (mode == TTOC_NONSTRANDED) {
1095 permute_cases_oneway('C','T',FULLMATCH,/*genestrand*/+1);
1096 permute_cases_oneway('G','A',FULLMATCH,/*genestrand*/+2);
1097
1098 } else {
1099 fprintf(stderr,"Mode %d not recognized\n",mode);
1100 exit(9);
1101 }
1102
1103
1104 #ifndef HAVE_SSE4_1
1105 #ifdef PREUC
1106 for (i = 0; i < NMISMATCHTYPES; i++) {
1107 for (c1 = 'A'; c1 <= 'z'; c1++) {
1108 for (c2 = 'A'; c2 < 'z'; c2++) {
1109 pairdistance_array_plus_128[i][c1][c2] = 128 + pairdistance_array[i][c1][c2];
1110 }
1111 }
1112 }
1113 #else
1114 for (i = 0; i < NMISMATCHTYPES; i++) {
1115 for (c1 = 'A'; c1 <= 'Z'; c1++) {
1116 for (c2 = 'A'; c2 < 'Z'; c2++) {
1117 pairdistance_array_plus_128[i][c1][c2] = 128 + pairdistance_array[i][c1][c2];
1118 }
1119 }
1120 }
1121 #endif
1122 #endif
1123
1124 return;
1125 }
1126
1127
1128 /************************************************************************/
1129
1130 void
Dynprog_term(Mode_T mode)1131 Dynprog_term (Mode_T mode) {
1132 int i;
1133
1134 #if 0
1135 for (i = 0; i < NJUMPTYPES; i++) {
1136 FREE(jump_penalty_array[i]);
1137 }
1138 #endif
1139
1140 for (i = 0; i < NMISMATCHTYPES; i++) {
1141 /*
1142 for (j = 0; j < 128; j++) {
1143 FREE(pairdistance_array[i][j]);
1144 }
1145 */
1146 #ifndef HAVE_SSE4_1
1147 FREE(pairdistance_array_plus_128[i][0]);
1148 FREE(pairdistance_array_plus_128[i]);
1149 #endif
1150 FREE(pairdistance_array[i][0]);
1151 FREE(pairdistance_array[i]);
1152 }
1153 /*
1154 for (j = 0; j < 128; j++) {
1155 FREE(consistent_array[j]);
1156 }
1157 */
1158 if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED || mode == TTOC_STRANDED) {
1159 FREE(consistent_array[0][0]);
1160 FREE(consistent_array[0]);
1161 } else {
1162 FREE(consistent_array[+1][0]);
1163 FREE(consistent_array[+1]);
1164 FREE(consistent_array[+2][0]);
1165 FREE(consistent_array[+2]);
1166 }
1167 FREE(nt_to_int_array);
1168
1169 return;
1170 }
1171
1172 /************************************************************************/
1173
1174 void
Dynprog_compute_bands(int * lband,int * uband,int rlength,int glength,int extraband,bool widebandp)1175 Dynprog_compute_bands (int *lband, int *uband, int rlength, int glength, int extraband, bool widebandp) {
1176 if (widebandp == false) {
1177 /* Just go along main diagonal */
1178 *lband = extraband;
1179 *uband = extraband;
1180 } else if (glength >= rlength) {
1181 /* Widen band to right to reach destination */
1182 *uband = glength - rlength + extraband;
1183 *lband = extraband;
1184 } else {
1185 /* Widen band to left to reach destination */
1186 *lband = rlength - glength + extraband;
1187 *uband = extraband;
1188 }
1189 return;
1190 }
1191
1192
1193 /* For upper triangular, we allow horizontal (E) jumps. For lower triangular, we allow vertical (F) jumps */
1194 #if !defined(HAVE_SSE2) || defined(DEBUG_SIMD)
1195 Score32_T **
Dynprog_standard(Direction32_T *** directions_nogap,Direction32_T *** directions_Egap,Direction32_T *** directions_Fgap,T this,char * rsequence,char * gsequence,char * gsequence_alt,int rlength,int glength,int goffset,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp,Mismatchtype_T mismatchtype,Score32_T open,Score32_T extend,int lband,int uband,bool jump_late_p,bool revp,int saturation,bool upperp,bool lowerp)1196 Dynprog_standard (Direction32_T ***directions_nogap, Direction32_T ***directions_Egap, Direction32_T ***directions_Fgap,
1197 T this, char *rsequence, char *gsequence, char *gsequence_alt,
1198 int rlength, int glength,
1199 int goffset, Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
1200 Mismatchtype_T mismatchtype, Score32_T open, Score32_T extend,
1201 int lband, int uband, bool jump_late_p, bool revp, int saturation,
1202 bool upperp, bool lowerp) {
1203 #ifdef DEBUG12
1204 Score32_T bestscore;
1205 Direction32_T bestdir;
1206 struct Int3_T **matrix3;
1207 #endif
1208 Score32_T penalty;
1209 Score32_T **matrix;
1210 Score32_T c_gap, *r_gap, *nogap, last_nogap, prev_nogap, first_nogap;
1211 int r, c, na1, na2;
1212 char na2_alt;
1213 Score32_T score, pairscore;
1214 int rlo, rhigh;
1215 Pairdistance_T **pairdistance_array_type;
1216
1217 pairdistance_array_type = pairdistance_array[mismatchtype];
1218
1219 debug2(printf("Dynprog_standard. jump_late_p %d, open %d, extend %d\n",jump_late_p,open,extend));
1220 debug(printf("compute_scores_standard: "));
1221 debug(printf("Lengths are %d and %d, so bands are %d on left and %d on right\n",rlength,glength,lband,uband));
1222
1223 matrix = Matrix32_alloc(rlength,glength,this->matrix_ptrs,this->matrix_space);
1224 *directions_nogap = Directions32_alloc(rlength,glength,this->directions_ptrs_0,this->directions_space_0);
1225 *directions_Egap = Directions32_alloc(rlength,glength,this->directions_ptrs_1,this->directions_space_1);
1226 *directions_Fgap = Directions32_alloc(rlength,glength,this->directions_ptrs_2,this->directions_space_2);
1227 /* (*directions_nogap)[0][0] = STOP; -- Check for r > 0 && c > 0 instead */
1228
1229 #ifdef ZERO_INITIAL_GAP_PENALTY
1230 /* Row 0 initialization */
1231 for (c = 1; c <= uband && c <= glength; c++) {
1232 matrix[c][0] = 0;
1233 (*directions_Egap)[c][0] = HORIZ;
1234 (*directions_nogap)[c][0] = HORIZ;
1235 }
1236
1237 /* Column 0 initialization */
1238 for (r = 1; r <= lband && r <= rlength; r++) {
1239 matrix[0][r] = 0;
1240 (*directions_Fgap)[0][r] = VERT;
1241 (*directions_nogap)[0][r] = VERT;
1242 }
1243
1244 #else
1245 /* Row 0 initialization */
1246 penalty = open;
1247 for (c = 1; c <= uband && c <= glength; c++) {
1248 penalty += extend;
1249 matrix[c][0] = penalty;
1250 (*directions_Egap)[c][0] = HORIZ;
1251 (*directions_nogap)[c][0] = HORIZ;
1252 }
1253 #if 0
1254 /* Already initialized to DIAG */
1255 (*directions_Egap)[1][0] = DIAG; /* previously used STOP */
1256 #endif
1257
1258 /* Column 0 initialization */
1259 penalty = open;
1260 for (r = 1; r <= lband && r <= rlength; r++) {
1261 penalty += extend;
1262 matrix[0][r] = penalty;
1263 (*directions_Fgap)[0][r] = VERT;
1264 (*directions_nogap)[0][r] = VERT;
1265 }
1266 #if 0
1267 /* Already initialized to DIAG */
1268 (*directions_Fgap)[0][1] = DIAG; /* previously used STOP */
1269 #endif
1270 #endif
1271
1272 r_gap = (Score32_T *) MALLOCA((rlength+1) * sizeof(Score32_T));
1273 nogap = (Score32_T *) MALLOCA((rlength+1) * sizeof(Score32_T));
1274
1275 #ifdef ZERO_INITIAL_GAP_PENALTY
1276 nogap[0] = 0;
1277 for (r = 1; r <= lband && r <= rlength; r++) {
1278 r_gap[r] = NEG_INFINITY_32;
1279 nogap[r] = 0;
1280 }
1281 for ( ; r <= rlength; r++) {
1282 r_gap[r] = NEG_INFINITY_32;
1283 nogap[r] = NEG_INFINITY_32;
1284 }
1285 #else
1286 nogap[0] = 0;
1287 penalty = open;
1288 for (r = 1; r <= lband && r <= rlength; r++) {
1289 penalty += extend;
1290 r_gap[r] = NEG_INFINITY_32;
1291 nogap[r] = penalty;
1292 }
1293 for ( ; r <= rlength; r++) {
1294 r_gap[r] = NEG_INFINITY_32;
1295 nogap[r] = NEG_INFINITY_32;
1296 }
1297 #endif
1298
1299
1300 #ifdef DEBUG12
1301 matrix3 = Matrix3_alloc(rlength,glength,this->matrix3_ptrs,this->matrix3_space);
1302 matrix3[0][0].nogap = 0;
1303 matrix3[0][0].Egap = matrix3[0][0].Fgap = NEG_INFINITY_32;
1304
1305 /* Row 0 initialization */
1306 penalty = open;
1307 for (c = 1; c <= uband && c <= glength; c++) {
1308 penalty += extend;
1309 matrix3[c][0].nogap = penalty;
1310 matrix3[c][0].Egap = penalty;
1311 matrix3[c][0].Fgap = NEG_INFINITY_32;
1312 }
1313
1314 /* Column 0 initialization */
1315 penalty = open;
1316 for (r = 1; r <= lband && r <= rlength; r++) {
1317 penalty += extend;
1318 matrix3[0][r].nogap = penalty;
1319 matrix3[0][r].Egap = NEG_INFINITY_32;
1320 matrix3[0][r].Fgap = penalty;
1321 }
1322 #endif
1323
1324
1325 first_nogap = 0;
1326 if (jump_late_p) {
1327 penalty = open + extend;
1328 for (c = 1; c <= glength; c++) {
1329 if (gsequence) {
1330 na2 = revp ? gsequence[1-c] : gsequence[c-1];
1331 na2_alt = revp ? gsequence_alt[1-c] : gsequence_alt[c-1];
1332 } else if (revp == false) {
1333 na2 = get_genomic_nt(&na2_alt,goffset+c-1,chroffset,chrhigh,watsonp);
1334 } else {
1335 na2 = get_genomic_nt(&na2_alt,goffset+1-c,chroffset,chrhigh,watsonp);
1336 }
1337
1338 c_gap = NEG_INFINITY_32;
1339 if (c == 1) {
1340 rlo = 1;
1341 prev_nogap = 0; /* was nogap[rlo-1] */
1342 #ifdef ZERO_INITIAL_GAP_PENALTY
1343 last_nogap = 0;
1344 #else
1345 last_nogap = NEG_INFINITY_32 - open + 1; /* making c_gap < last_nogap + open */
1346 #endif
1347 } else if ((rlo = c - uband) < 1) {
1348 rlo = 1;
1349 #ifdef ZERO_INITIAL_GAP_PENALTY
1350 prev_nogap = 0;
1351 last_nogap = 0;
1352 #else
1353 prev_nogap = penalty;
1354 penalty += extend;
1355 last_nogap = penalty;
1356 #endif
1357 } else if (rlo == 1) {
1358 #ifdef ZERO_INITIAL_GAP_PENALTY
1359 prev_nogap = 0;
1360 #else
1361 prev_nogap = penalty;
1362 #endif
1363 /* penalty += extend; */
1364 last_nogap = NEG_INFINITY_32;
1365 #ifdef DEBUG12
1366 matrix3[c][rlo-1].Fgap = NEG_INFINITY_32;
1367 matrix3[c][rlo-1].nogap = NEG_INFINITY_32;
1368 #endif
1369 } else {
1370 prev_nogap = first_nogap;
1371 last_nogap = NEG_INFINITY_32;
1372 #ifdef DEBUG12
1373 matrix3[c][rlo-1].Fgap = NEG_INFINITY_32;
1374 matrix3[c][rlo-1].nogap = NEG_INFINITY_32;
1375 #endif
1376 }
1377
1378 if ((rhigh = c + lband) > rlength) {
1379 rhigh = rlength;
1380 #ifdef DEBUG12
1381 } else {
1382 matrix3[c-1][rhigh].Egap = NEG_INFINITY_32;
1383 matrix3[c-1][rhigh].nogap = NEG_INFINITY_32;
1384 #endif
1385 }
1386
1387 for (r = rlo; r <= rhigh; r++) {
1388 na1 = revp ? rsequence[1-r] : rsequence[r-1];
1389
1390 #ifdef DEBUG12
1391 /* FGAP */
1392 bestscore = matrix3[c][r-1].nogap + open /* + extend */;
1393 bestdir = DIAG;
1394
1395 if ((score = matrix3[c][r-1].Fgap /* + extend */) >= bestscore) { /* Use >= for jump late */
1396 bestscore = score;
1397 bestdir = VERT;
1398 }
1399
1400 matrix3[c][r].Fgap = bestscore + extend;
1401 #endif
1402
1403 /* FGAP alt */
1404 #ifdef DEBUG12A
1405 printf("Fgap at r %d, c %d: matrix3[c][r-1].nogap %d vs last_nogap %d\n",r,c,matrix3[c][r-1].nogap,last_nogap);
1406 printf("Fgap at r %d, c %d: matrix3[c][r-1].Fgap %d vs c_gap %d\n",r,c,matrix3[c][r-1].Fgap,c_gap);
1407 #endif
1408 #ifdef DEBUG12
1409 assert(matrix3[c][r-1].nogap == last_nogap);
1410 assert(matrix3[c][r-1].Fgap == c_gap);
1411 #endif
1412 debug3(printf("std Fgap at r %d, c %d: c_gap + extend %d vs last_nogap + open + extend %d\n",r,c,c_gap + extend,last_nogap + open + extend));
1413 if (lowerp == true && c_gap /* + extend */ >= (score = last_nogap + open /* + extend */)) { /* Use >= for jump late */
1414 c_gap += extend;
1415 (*directions_Fgap)[c][r] = VERT;
1416 } else {
1417 c_gap = score + extend;
1418 /* bestdir2 = DIAG; -- Already initialized to DIAG */
1419 }
1420
1421
1422 #ifdef DEBUG12
1423 /* EGAP */
1424 bestscore = matrix3[c-1][r].nogap + open /* + extend */;
1425 bestdir = DIAG;
1426
1427 if ((score = matrix3[c-1][r].Egap /* + extend */) >= bestscore) { /* Use >= for jump late */
1428 bestscore = score;
1429 bestdir = HORIZ;
1430 }
1431
1432 matrix3[c][r].Egap = bestscore + extend;
1433 #endif
1434
1435 /* EGAP alt */
1436 #ifdef DEBUG12A
1437 printf("Egap at r %d, c %d: matrix3[c-1][r].nogap %d vs nogap[r] %d\n",r,c,matrix3[c-1][r].nogap,nogap[r]);
1438 printf("Egap at r %d, c %d: matrix3[c-1][r].Egap %d vs r_gap[r] %d\n",r,c,matrix3[c-1][r].Egap,r_gap[r]);
1439 #endif
1440 #ifdef DEBUG12
1441 assert(matrix3[c-1][r].nogap == nogap[r]);
1442 assert(matrix3[c-1][r].Egap == r_gap[r]);
1443 #endif
1444 /* debug3(printf("Egap at r %d, c %d: r_gap[r] %d vs nogap[r] + open %d\n",r,c,r_gap[r],nogap[r]+open)); */
1445 if (upperp == true && r_gap[r] /* + extend */ >= (score = nogap[r] + open /* + extend */)) { /* Use >= for jump late */
1446 r_gap[r] += extend;
1447 (*directions_Egap)[c][r] = HORIZ;
1448 } else {
1449 r_gap[r] = score + extend;
1450 /* bestdir2 = DIAG; -- Already initialized to DIAG */
1451 }
1452
1453
1454 /* NOGAP */
1455 pairscore = pairdistance_array_type[na1][na2];
1456 if ((score = pairdistance_array_type[na1][(int) na2_alt]) > pairscore) {
1457 pairscore = score;
1458 }
1459 #ifdef DEBUG12
1460 bestscore = matrix3[c-1][r-1].nogap + pairscore;
1461 bestdir = DIAG;
1462
1463 if ((score = matrix3[c][r].Egap) >= bestscore) { /* Use >= for jump late */
1464 bestscore = score;
1465 bestdir = HORIZ;
1466 }
1467
1468 if ((score = matrix3[c][r].Fgap) >= bestscore) { /* Use >= for jump late */
1469 bestscore = score;
1470 bestdir = VERT;
1471 }
1472
1473 matrix3[c][r].nogap = bestscore;
1474 #endif
1475
1476 /* NOGAP alt */
1477 #ifdef DEBUG12A
1478 printf("nogap at r %d, c %d: matrix3[c-1][r-1].nogap %d vs prev_nogap %d\n",r,c,matrix3[c-1][r-1].nogap,prev_nogap);
1479 printf("nogap at r %d, c %d: matrix3[c][r].Fgap %d vs c_gap %d\n",r,c,matrix3[c][r].Fgap,c_gap);
1480 printf("nogap at r %d, c %d: matrix3[c][r].Egap %d vs r_gap[r] %d\n",r,c,matrix3[c][r].Egap,r_gap[r]);
1481 #endif
1482 #ifdef DEBUG12
1483 assert(matrix3[c-1][r-1].nogap == prev_nogap);
1484 assert(matrix3[c][r].Fgap == c_gap);
1485 assert(matrix3[c][r].Egap == r_gap[r]);
1486 #endif
1487 last_nogap = prev_nogap + pairscore;
1488 /* bestdir2 = DIAG; -- Already initialized to DIAG */
1489 /* debug3(printf("assign nogap at r %d, c %d: H + pairscore %d vs r_horiz + extend %d vs vert + extend %d\n",
1490 r,c,last_nogap,r_gap[r],c_gap)); */
1491 if (upperp == true && r_gap[r] >= last_nogap) { /* Use >= for jump late */
1492 last_nogap = r_gap[r];
1493 (*directions_nogap)[c][r] = HORIZ;
1494 }
1495 if (lowerp == true && c_gap >= last_nogap) { /* Use >= for jump late */
1496 last_nogap = c_gap;
1497 (*directions_nogap)[c][r] = VERT;
1498 }
1499 /* (*directions_nogap)[c][r] = bestdir2; */
1500
1501 prev_nogap = nogap[r]; /* Save for next inner loop, before we wipe it out */
1502 matrix[c][r] = nogap[r] =
1503 (last_nogap < saturation) ? saturation : last_nogap; /* Save for next outer loop */
1504 if (r == rlo) {
1505 debug12a(printf("At row %d, storing first_nogap to be nogap[r] %d\n",r,nogap[r]));
1506 first_nogap = last_nogap;
1507 }
1508 }
1509 debug12a(printf("\n"));
1510 }
1511
1512 } else {
1513 /* Do not jump late */
1514 penalty = open + extend;
1515 for (c = 1; c <= glength; c++) {
1516 if (gsequence) {
1517 na2 = revp ? gsequence[1-c] : gsequence[c-1];
1518 na2_alt = revp ? gsequence_alt[1-c] : gsequence_alt[c-1];
1519 } else if (revp == false) {
1520 na2 = get_genomic_nt(&na2_alt,goffset+c-1,chroffset,chrhigh,watsonp);
1521 } else {
1522 na2 = get_genomic_nt(&na2_alt,goffset+1-c,chroffset,chrhigh,watsonp);
1523 }
1524
1525 c_gap = NEG_INFINITY_32;
1526 if (c == 1) {
1527 rlo = 1;
1528 prev_nogap = 0; /* was nogap[rlo-1] */
1529 #ifdef ZERO_INITIAL_GAP_PENALTY
1530 last_nogap = 0;
1531 #else
1532 last_nogap = NEG_INFINITY_32 - open + 1; /* making c_gap < last_nogap + open */
1533 #endif
1534 } else if ((rlo = c - uband) < 1) {
1535 rlo = 1;
1536 #ifdef ZERO_INITIAL_GAP_PENALTY
1537 prev_nogap = 0;
1538 last_nogap = 0;
1539 #else
1540 prev_nogap = penalty;
1541 penalty += extend;
1542 last_nogap = penalty;
1543 #endif
1544 } else if (rlo == 1) {
1545 #ifdef ZERO_INITIAL_GAP_PENALTY
1546 prev_nogap = 0;
1547 #else
1548 prev_nogap = penalty;
1549 #endif
1550 /* penalty += extend; */
1551 last_nogap = NEG_INFINITY_32;
1552 #ifdef DEBUG12
1553 matrix3[c][rlo-1].Fgap = NEG_INFINITY_32;
1554 matrix3[c][rlo-1].nogap = NEG_INFINITY_32;
1555 #endif
1556 } else {
1557 prev_nogap = first_nogap;
1558 last_nogap = NEG_INFINITY_32;
1559 #ifdef DEBUG12
1560 matrix3[c][rlo-1].Fgap = NEG_INFINITY_32;
1561 matrix3[c][rlo-1].nogap = NEG_INFINITY_32;
1562 #endif
1563 }
1564
1565 if ((rhigh = c + lband) > rlength) {
1566 rhigh = rlength;
1567 #ifdef DEBUG12
1568 } else {
1569 matrix3[c-1][rhigh].Egap = NEG_INFINITY_32;
1570 matrix3[c-1][rhigh].nogap = NEG_INFINITY_32;
1571 #endif
1572 }
1573
1574 for (r = rlo; r <= rhigh; r++) {
1575 na1 = revp ? rsequence[1-r] : rsequence[r-1];
1576
1577 #ifdef DEBUG12
1578 /* FGAP */
1579 bestscore = matrix3[c][r-1].nogap + open /* + extend */;
1580 bestdir = DIAG;
1581
1582 if ((score = matrix3[c][r-1].Fgap /* + extend */) > bestscore) { /* Use > for jump early */
1583 bestscore = score;
1584 bestdir = VERT;
1585 }
1586
1587 matrix3[c][r].Fgap = bestscore + extend;
1588 #endif
1589
1590 /* FGAP alt */
1591 #ifdef DEBUG12A
1592 printf("Fgap at r %d, c %d: matrix3[c][r-1].nogap %d vs last_nogap %d\n",r,c,matrix3[c][r-1].nogap,last_nogap);
1593 printf("Fgap at r %d, c %d: matrix3[c][r-1].Fgap %d vs c_gap %d\n",r,c,matrix3[c][r-1].Fgap,c_gap);
1594 #endif
1595 #ifdef DEBUG12
1596 assert(matrix3[c][r-1].nogap == last_nogap);
1597 assert(matrix3[c][r-1].Fgap == c_gap);
1598 #endif
1599 debug3(printf("std Fgap at r %d, c %d: c_gap + extend %d vs last_nogap + open + extend %d\n",r,c,c_gap + extend,last_nogap + open + extend));
1600 if (lowerp == true && c_gap /* + extend */ > (score = last_nogap + open /* + extend */)) { /* Use > for jump early */
1601 c_gap += extend;
1602 (*directions_Fgap)[c][r] = VERT;
1603 } else {
1604 c_gap = score + extend;
1605 /* bestdir2 = DIAG; -- Already initialized to DIAG */
1606 }
1607
1608
1609 #ifdef DEBUG12
1610 /* EGAP */
1611 bestscore = matrix3[c-1][r].nogap + open /* + extend */;
1612 bestdir = DIAG;
1613
1614 if ((score = matrix3[c-1][r].Egap /* + extend */) > bestscore) { /* Use > for jump early */
1615 bestscore = score;
1616 bestdir = HORIZ;
1617 }
1618
1619 matrix3[c][r].Egap = bestscore + extend;
1620 #endif
1621
1622 /* EGAP alt */
1623 #ifdef DEBUG12A
1624 printf("Egap at r %d, c %d: matrix3[c-1][r].nogap %d vs nogap[r] %d\n",r,c,matrix3[c-1][r].nogap,nogap[r]);
1625 printf("Egap at r %d, c %d: matrix3[c-1][r].Egap %d vs r_gap[r] %d\n",r,c,matrix3[c-1][r].Egap,r_gap[r]);
1626 #endif
1627 #ifdef DEBUG12
1628 assert(matrix3[c-1][r].nogap == nogap[r]);
1629 assert(matrix3[c-1][r].Egap == r_gap[r]);
1630 #endif
1631 /* debug3(printf("Egap at r %d, c %d: r_gap[r] %d vs nogap[r] + open %d\n",r,c,r_gap[r],nogap[r]+open)); */
1632 if (upperp == true && r_gap[r] /* + extend */ > (score = nogap[r] + open /* + extend */)) { /* Use > for jump early */
1633 r_gap[r] += extend;
1634 (*directions_Egap)[c][r] = HORIZ;
1635 } else {
1636 r_gap[r] = score + extend;
1637 /* bestdir2 = DIAG; -- Already initialized to DIAG */
1638 }
1639
1640
1641 /* NOGAP */
1642 pairscore = pairdistance_array_type[na1][na2];
1643 if ((score = pairdistance_array_type[na1][(int) na2_alt]) > pairscore) {
1644 pairscore = score;
1645 }
1646 #ifdef DEBUG12
1647 bestscore = matrix3[c-1][r-1].nogap + pairscore;
1648 bestdir = DIAG;
1649
1650 if ((score = matrix3[c][r].Egap) > bestscore) { /* Use > for jump early */
1651 bestscore = score;
1652 bestdir = HORIZ;
1653 }
1654
1655 if ((score = matrix3[c][r].Fgap) > bestscore) { /* Use > for jump early */
1656 bestscore = score;
1657 bestdir = VERT;
1658 }
1659
1660 matrix3[c][r].nogap = bestscore;
1661 #endif
1662
1663 /* NOGAP alt */
1664 #ifdef DEBUG12A
1665 printf("nogap at r %d, c %d: matrix3[c-1][r-1].nogap %d vs prev_nogap %d\n",r,c,matrix3[c-1][r-1].nogap,prev_nogap);
1666 printf("nogap at r %d, c %d: matrix3[c][r].Fgap %d vs c_gap %d\n",r,c,matrix3[c][r].Fgap,c_gap);
1667 printf("nogap at r %d, c %d: matrix3[c][r].Egap %d vs r_gap[r] %d\n",r,c,matrix3[c][r].Egap,r_gap[r]);
1668 #endif
1669 #ifdef DEBUG12
1670 assert(matrix3[c-1][r-1].nogap == prev_nogap);
1671 assert(matrix3[c][r].Fgap == c_gap);
1672 assert(matrix3[c][r].Egap == r_gap[r]);
1673 #endif
1674 last_nogap = prev_nogap + pairscore;
1675 /* bestdir2 = DIAG; -- Already initialized to DIAG */
1676 /* debug3(printf("assign nogap at r %d, c %d: H + pairscore %d vs r_horiz + extend %d vs vert + extend %d\n",
1677 r,c,last_nogap,r_gap[r],c_gap)); */
1678 if (upperp == true && r_gap[r] > last_nogap) { /* Use > for jump early */
1679 last_nogap = r_gap[r];
1680 (*directions_nogap)[c][r] = HORIZ;
1681 }
1682 if (lowerp == true && c_gap > last_nogap) { /* Use > for jump early */
1683 last_nogap = c_gap;
1684 (*directions_nogap)[c][r] = VERT;
1685 }
1686 /* (*directions_nogap)[c][r] = bestdir2; */
1687
1688 prev_nogap = nogap[r]; /* Save for next inner loop, before we wipe it out */
1689 matrix[c][r] = nogap[r] =
1690 (last_nogap < saturation) ? saturation : last_nogap; /* Save for next outer loop */
1691 if (r == rlo) {
1692 debug12a(printf("At row %d, storing first_nogap to be nogap[r] %d\n",r,nogap[r]));
1693 first_nogap = last_nogap;
1694 }
1695 }
1696 debug12a(printf("\n"));
1697 }
1698 }
1699
1700 debug2(printf("STD: Dynprog_standard\n"));
1701 debug2(Dynprog_Matrix32_print(matrix,rlength,glength,rsequence,gsequence,gsequence_alt,
1702 goffset,chroffset,chrhigh,watsonp,revp,lband,uband));
1703 debug2(Dynprog_Directions32_print(*directions_nogap,*directions_Egap,*directions_Fgap,
1704 rlength,glength,rsequence,gsequence,gsequence_alt,
1705 goffset,chroffset,chrhigh,watsonp,revp,lband,uband));
1706 debug12a(Matrix3_print(matrix3,rlength,glength,rsequence,gsequence,gsequence_alt,
1707 goffset,chroffset,chrhigh,watsonp,revp));
1708
1709 FREEA(r_gap);
1710 FREEA(nogap);
1711
1712 return matrix;
1713 }
1714 #endif
1715
1716
1717
1718 #define LAZY_INDEL 1 /* Don't advance to next coordinate on final indel, since could go over chromosome bounds. */
1719
1720 /* Identical to Dynprog_traceback_8 and Dynprog_traceback_16, except for types of directions matrices */
1721
1722 List_T
Dynprog_traceback_std(List_T pairs,int * nmatches,int * nmismatches,int * nopens,int * nindels,Direction32_T ** directions_nogap,Direction32_T ** directions_Egap,Direction32_T ** directions_Fgap,int r,int c,char * rsequence,char * rsequenceuc,char * gsequence,char * gsequence_alt,int queryoffset,int genomeoffset,Pairpool_T pairpool,bool revp,Univcoord_T chroffset,Univcoord_T chrhigh,bool watsonp,int genestrand,int dynprogindex)1723 Dynprog_traceback_std (List_T pairs, int *nmatches, int *nmismatches, int *nopens, int *nindels,
1724 Direction32_T **directions_nogap, Direction32_T **directions_Egap, Direction32_T **directions_Fgap,
1725 int r, int c, char *rsequence, char *rsequenceuc, char *gsequence, char *gsequence_alt,
1726 int queryoffset, int genomeoffset, Pairpool_T pairpool, bool revp,
1727 Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp, int genestrand, int dynprogindex) {
1728 char c1, c1_uc, c2, c2_alt;
1729 int dist;
1730 bool add_dashes_p;
1731 int querycoord, genomecoord;
1732 Direction32_T dir;
1733 #ifdef DEBUG_SIMD
1734 char c2_single;
1735 #endif
1736
1737 debug(printf("Starting traceback at r=%d,c=%d (roffset=%d, goffset=%d)\n",r,c,queryoffset,genomeoffset));
1738
1739 while (r > 0 && c > 0) { /* dir != STOP */
1740 if ((dir = directions_nogap[c][r]) == HORIZ) {
1741 dist = 1;
1742 while (c > 0 && directions_Egap[c--][r] != DIAG) {
1743 dist++;
1744 }
1745 #if 0
1746 if (c == 0) {
1747 /* Directions in column 0 can sometimes be DIAG */
1748 dir = VERT;
1749 } else {
1750 dir = directions_nogap[c][r];
1751 }
1752 #endif
1753
1754 debug(printf("H%d: ",dist));
1755 pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,r,c+dist,dist,/*genomesequence*/NULL,
1756 queryoffset,genomeoffset,pairpool,revp,chroffset,chrhigh,
1757 watsonp,dynprogindex);
1758 if (add_dashes_p == true) {
1759 *nopens += 1;
1760 *nindels += dist;
1761 }
1762 debug(printf("\n"));
1763
1764 } else if (dir == VERT) {
1765 dist = 1;
1766 while (r > 0 && directions_Fgap[c][r--] != DIAG) {
1767 dist++;
1768 }
1769 #if 0
1770 if (r == 0) {
1771 /* Directions in row 0 can sometimes be DIAG */
1772 dir = HORIZ;
1773 } else {
1774 dir = directions_nogap[c][r];
1775 }
1776 #endif
1777
1778 debug(printf("V%d: ",dist));
1779 pairs = Pairpool_add_queryskip(pairs,r+dist,c,dist,rsequence,
1780 queryoffset,genomeoffset,pairpool,revp,
1781 dynprogindex);
1782 *nopens += 1;
1783 *nindels += dist;
1784 debug(printf("\n"));
1785
1786 } else {
1787 querycoord = r-1;
1788 genomecoord = c-1;
1789 if (revp == true) {
1790 querycoord = -querycoord;
1791 genomecoord = -genomecoord;
1792 }
1793
1794 c1 = rsequence[querycoord];
1795 c1_uc = rsequenceuc[querycoord];
1796 c2 = gsequence[genomecoord];
1797 c2_alt = gsequence_alt[genomecoord];
1798 #ifdef DEBUG_SIMD
1799 c2_single = get_genomic_nt(&c2_alt,genomeoffset+genomecoord,chroffset,chrhigh,watsonp);
1800 if (c2 != c2_single) {
1801 abort();
1802 }
1803 #endif
1804
1805 #ifdef EXTRACT_GENOMICSEG
1806 assert(c2 == genomesequence[genomecoord]);
1807 #endif
1808
1809 if (c2 == '*') {
1810 /* Don't push pairs past end of chromosome */
1811 debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u, chroffset %u, chrhigh %u, watsonp %d\n",
1812 genomeoffset,genomecoord,chroffset,chrhigh,watsonp));
1813
1814 } else if (/*querysequenceuc[querycoord]*/c1_uc == c2 || c1_uc == c2_alt) {
1815 debug(printf("Pushing %d,%d [%d,%d] (%c,%c) - match\n",
1816 r,c,queryoffset+querycoord,genomeoffset+genomecoord,c1_uc,c2));
1817 *nmatches += 1;
1818 pairs = Pairpool_push(pairs,pairpool,queryoffset+querycoord,genomeoffset+genomecoord,
1819 c1,DYNPROG_MATCH_COMP,c2,c2_alt,dynprogindex);
1820
1821 } else if (Dynprog_consistent_p(c1_uc,/*g*/c2,/*g_alt*/c2_alt,genestrand) == true) {
1822 debug(printf("Pushing %d,%d [%d,%d] (%c,%c) - ambiguous\n",
1823 r,c,queryoffset+querycoord,genomeoffset+genomecoord,c1_uc,c2));
1824 *nmatches += 1;
1825 pairs = Pairpool_push(pairs,pairpool,queryoffset+querycoord,genomeoffset+genomecoord,
1826 c1,AMBIGUOUS_COMP,c2,c2_alt,dynprogindex);
1827
1828 } else {
1829 debug(printf("Pushing %d,%d [%d,%d] (%c,%c) - mismatch\n",
1830 r,c,queryoffset+querycoord,genomeoffset+genomecoord,c1_uc,c2));
1831 *nmismatches += 1;
1832 pairs = Pairpool_push(pairs,pairpool,queryoffset+querycoord,genomeoffset+genomecoord,
1833 c1,MISMATCH_COMP,c2,c2_alt,dynprogindex);
1834 }
1835
1836 r--; c--;
1837 }
1838 }
1839
1840 if (r == 0 && c == 0) {
1841 /* Finished with a diagonal step */
1842
1843 } else if (c == 0) {
1844 dist = r;
1845 debug(printf("V%d: ",dist));
1846 pairs = Pairpool_add_queryskip(pairs,r,/*c*/0+LAZY_INDEL,dist,rsequence,
1847 queryoffset,genomeoffset,pairpool,revp,
1848 dynprogindex);
1849 *nopens += 1;
1850 *nindels += dist;
1851 debug(printf("\n"));
1852
1853 } else {
1854 assert(r == 0);
1855 dist = c;
1856 debug(printf("H%d: ",dist));
1857 pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,/*r*/0+LAZY_INDEL,c,dist,/*genomesequence*/NULL,
1858 queryoffset,genomeoffset,pairpool,revp,chroffset,chrhigh,
1859 watsonp,dynprogindex);
1860 if (add_dashes_p == true) {
1861 *nopens += 1;
1862 *nindels += dist;
1863 }
1864 debug(printf("\n"));
1865 }
1866
1867 return pairs;
1868 }
1869
1870
1871