1 static char rcsid[] = "$Id: translation.c 218189 2019-01-17 13:16:23Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "translation.h"
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>		/* For toupper */
11 #include "assert.h"
12 #include "mem.h"
13 #include "comp.h"
14 #include "pairdef.h"
15 #include "complement.h"
16 #include "list.h"
17 
18 
19 #define IGNORE_MARGIN 6
20 #define HORIZON 99
21 #define MIN_NPAIRS 30
22 
23 
24 /* Finding coding sequence bounds */
25 #ifdef DEBUG
26 #define debug(x) x
27 #else
28 #define debug(x)
29 #endif
30 
31 /* Translation via reference */
32 #ifdef DEBUG1
33 #define debug1(x) x
34 #else
35 #define debug1(x)
36 #endif
37 
38 /* Mark and assign cdna */
39 #ifdef DEBUG2
40 #define debug2(x) x
41 #else
42 #define debug2(x)
43 #endif
44 
45 /* Translate cDNA for PMAP */
46 #ifdef DEBUG3
47 #define debug3(x) x
48 #else
49 #define debug3(x)
50 #endif
51 
52 
53 /* Assignments to aaphase_g */
54 #ifdef DEBUG4
55 #define debug4(x) x
56 #else
57 #define debug4(x)
58 #endif
59 
60 /* Assignments to aaphase_3 */
61 #ifdef DEBUG5
62 #define debug5(x) x
63 #else
64 #define debug5(x)
65 #endif
66 
67 
68 
69 typedef enum {FRAME0, FRAME1, FRAME2, NOFRAME} Frame_T;
70 static char complCode[128] = COMPLEMENT_UC;
71 static char uppercaseCode[128] = UPPERCASE_U2T;
72 
73 #define T Translation_T
74 struct T {
75   int querypos;
76   char aa;
77   char initc;
78   Frame_T frame;
79 };
80 
81 
82 static struct T *
Translation_array_new(struct Pair_T * pairs,int translationlen)83 Translation_array_new (struct Pair_T *pairs, int translationlen) {
84   struct T *new;
85   int i;
86 
87   new = (struct T *) CALLOC(translationlen,sizeof(struct T));
88   for (i = 0; i < translationlen; i++) {
89     new[i].querypos = pairs[i].querypos;
90     new[i].aa = ' ';
91     new[i].initc = ' ';
92     new[i].frame = NOFRAME;
93   }
94 
95   return new;
96 }
97 
98 #ifdef DEBUG
99 static void
Translation_dump(struct Pair_T * pairs,struct T * translation,int translationlen)100 Translation_dump (struct Pair_T *pairs, struct T *translation, int translationlen) {
101   int i;
102 
103   for (i = 0; i < translationlen; i++) {
104     if (pairs[i].aaphase_g == 0 || pairs[i].aaphase_e == 0) {
105       printf("=> %c %c ",pairs[i].aa_g,pairs[i].aa_e);
106     } else {
107       printf("       ");
108     }
109     printf("%d: querypos %d %d ",i,pairs[i].querypos,pairs[i].aapos);
110     switch (translation[i].frame) {
111     case NOFRAME: printf("%c %c %c",' ',' ',' '); break;
112     case FRAME0: printf("%c %c %c",translation[i].aa,' ',' '); break;
113     case FRAME1: printf("%c %c %c",' ',translation[i].aa,' '); break;
114     case FRAME2: printf("%c %c %c",' ',' ',translation[i].aa); break;
115     }
116     printf("\n");
117   }
118   return;
119 }
120 #endif
121 
122 
123 /************************************************************************/
124 
125 
126 #if 0
127 /* 1.  Standard code */
128 static char
129 Translation_get_codon_old (char a, char b, char c) {
130   switch (b) {
131   case 'T':
132     switch (a) {
133     case 'T':
134       switch (c) {
135       case 'T': case 'C': case 'Y': return 'F';
136       case 'A': case 'G': case 'R': return 'L';
137       default: 	return 'X';
138       }
139     case 'C': return 'L';
140     case 'A':
141       switch (c) {
142       case 'G': return 'M';
143       case 'T': case 'A': case 'C': case 'H': return 'I';
144       default: return 'X';
145       }
146     case 'G': return 'V';
147     default: return 'X';
148   }
149   case 'C':
150     switch (a) {
151     case 'T': return 'S';
152     case 'C': return 'P';
153     case 'A': return 'T';
154     case 'G': return 'A';
155     default: return 'X';
156     }
157   case 'A':
158     switch (a) {
159     case 'T':
160       switch (c) {
161       case 'T': case 'C': case 'Y': return 'Y';
162       case 'A': case 'G': case 'R': return '*';
163       default: return 'X';
164       }
165     case 'C':
166       switch (c) {
167       case 'T': case 'C': case 'Y': return 'H';
168       case 'A': case 'G': case 'R': return 'Q';
169       default: return 'X';
170       }
171     case 'A':
172       switch (c) {
173       case 'T': case 'C': case 'Y': return 'N';
174       case 'A': case 'G': case 'R': return 'K';
175       default: return 'X';
176       }
177     case 'G':
178       switch (c) {
179       case 'T': case 'C': case 'Y': return 'D';
180       case 'A': case 'G': case 'R': return 'E';
181       default: return 'X';
182       }
183     default: return 'X';
184     }
185   case 'G':
186     switch (a) {
187     case 'T':
188       switch (c) {
189       case 'T': case 'C': case 'Y': return 'C';
190       case 'A': return '*';
191       case 'G': return 'W';
192       default: return 'X';
193       }
194     case 'C': return 'R';
195     case 'A':
196       switch (c) {
197       case 'T': case 'C': case 'Y': return 'S';
198       case 'A': case 'G': case 'R': return 'R';
199       default: return 'X';
200       }
201     case 'G': return 'G';
202     default: return 'X';
203     }
204   default: return 'X';
205   }
206   return 'X';
207 }
208 #endif
209 
210 
211 static char *translation_table;
212 static char *initiation_table;
213 
214 /* Taken from http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi" on April 2016 */
215 
216 void
Translation_setup(int translation_code,bool alt_initiation_codons_p)217 Translation_setup (int translation_code, bool alt_initiation_codons_p) {
218   switch (translation_code) {
219   case 1: /* The Standard Code */
220     translation_table = "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
221     initiation_table  = "---M---------------M---------------M----------------------------";
222     break;
223 
224   case 2: /* The Vertebrate Mitochondrial Code */
225     translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG";
226     initiation_table  = "--------------------------------MMMM---------------M------------";
227     break;
228 
229   case 3: /* The Yeast Mitochondrial Code */
230     translation_table = "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
231     initiation_table  = "----------------------------------MM----------------------------";
232     break;
233 
234   case 4: /* The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code */
235     translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
236     initiation_table  = "--MM---------------M------------MMMM---------------M------------";
237     break;
238 
239   case 5: /* The Invertebrate Mitochondrial Code */
240     translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG";
241     initiation_table  = "---M----------------------------MMMM---------------M------------";
242     break;
243 
244   case 6: /* The Ciliate, Dasycladacean, and Hexamita Nuclear Code */
245     translation_table = "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
246     initiation_table  = "-----------------------------------M----------------------------";
247     break;
248 
249   case 9: /* The Echinoderm and Flatworm Mitochondrial Code */
250     translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
251     initiation_table  = "-----------------------------------M---------------M------------";
252     break;
253 
254   case 10: /* The Euplotid Nuclear Code */
255     translation_table = "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
256     initiation_table  = "-----------------------------------M----------------------------";
257     break;
258 
259   case 11: /* The Bacterial, Archael, and Plant Plastid Code */
260     translation_table = "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
261     initiation_table  = "---M---------------M------------MMMM---------------M------------";
262     break;
263 
264   case 12: /* The Alternative Yeast Nuclear Code */
265     translation_table = "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
266     initiation_table  = "-------------------M---------------M----------------------------";
267     break;
268 
269   case 13: /* The Ascidian Mitochondrial Code */
270     translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG";
271     initiation_table  = "---M------------------------------MM---------------M------------";
272     break;
273 
274   case 14: /* The Alternative Flatworm Mitochondrial Code */
275     translation_table = "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
276     initiation_table  = "-----------------------------------M----------------------------";
277     break;
278 
279   case 16: /* Chlorophycean Mitochondrial Code */
280     translation_table = "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
281     initiation_table  = "-----------------------------------M----------------------------";
282     break;
283 
284   case 21: /* Trematode Mitochondrial Code */
285     translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG";
286     initiation_table  = "-----------------------------------M---------------M------------";
287     break;
288 
289   case 22: /* Scenedesmus obliquus Mitochondrial Code */
290     translation_table = "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
291     initiation_table  = "-----------------------------------M----------------------------";
292     break;
293 
294   case 23: /* Thraustochytrium Mitochondrial Code */
295     translation_table = "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
296     initiation_table  = "--------------------------------M--M---------------M------------";
297     break;
298 
299   case 24: /* Pterobranchia Mitochondrial Code */
300     translation_table = "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG";
301     initiation_table  = "---M---------------M---------------M---------------M------------";
302     break;
303 
304   case 25: /* Candidate Division SR1 and Gracilibacteria Code */
305     translation_table = "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
306     initiation_table  = "---M-------------------------------M---------------M------------";
307     break;
308 
309   case 26: /* Pachysolen tannophilus Nuclear Code */
310     translation_table = "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";
311     initiation_table  = "-------------------M---------------M----------------------------";
312     break;
313 
314   default:
315     fprintf(stderr,"Translation code %d is not supported\n",translation_code);
316     exit(9);
317   }
318 
319   if (alt_initiation_codons_p == false) {
320     initiation_table  = "-----------------------------------M----------------------------";
321   }
322 
323   return;
324 }
325 
326 
327 static char
Translation_get_codon(char * initc,char base1,char base2,char base3)328 Translation_get_codon (char *initc, char base1, char base2, char base3) {
329   int codoni;
330 
331   switch (base1) {
332   case 'T': codoni = 0; break;
333   case 'C': codoni = 16; break;
334   case 'A': codoni = 32; break;
335   case 'G': codoni = 48; break;
336   default: codoni = -16;		/* Negative enough so += 12 and += 3 will still yield a negative value */
337   }
338 
339   switch (base2) {
340   case 'T': codoni += 0; break;
341   case 'C': codoni += 4; break;
342   case 'A': codoni += 8; break;
343   case 'G': codoni += 12; break;
344   default: codoni = -16;
345   }
346 
347   switch (base3) {
348   case 'T': codoni += 0; break;
349   case 'C': codoni += 1; break;
350   case 'A': codoni += 2; break;
351   case 'G': codoni += 3; break;
352   default: codoni = -16;
353   }
354 
355   if (codoni < 0) {
356     *initc = '-';
357     return 'X';
358   } else {
359     *initc = initiation_table[codoni];
360     return translation_table[codoni];
361   }
362 }
363 
364 
365 #ifndef PMAP
366 static void
find_bounds_forward(Frame_T * translation_frame,int * translation_starti,int * translation_endi,int * translation_length,bool * endstopp,struct T * translation,int translationlen,bool fulllengthp)367 find_bounds_forward (Frame_T *translation_frame, int *translation_starti,
368 		     int *translation_endi, int *translation_length,
369 		     bool *endstopp, struct T *translation,
370 		     int translationlen, bool fulllengthp) {
371   int beststart0, beststart1, beststart2, bestend0, bestend1, bestend2;
372   int bestorf0 = 0, bestorf1 = 0, bestorf2 = 0, orf0 = 0, orf1 = 0, orf2 = 0;
373   int start0 = 0, start1 = 0, start2 = 0;
374   bool needmet0p, needmet1p, needmet2p;
375   bool endstop0p = false, endstop1p = false, endstop2p = false;
376   char codon, initc;
377   int i, frame;
378 
379   if (fulllengthp == true) {
380     needmet0p = needmet1p = needmet2p = true;
381   } else {
382     needmet0p = needmet1p = needmet2p = false;
383   }
384 
385   debug(printf("Starting find_bounds_forward, from 0 to translationlen %d\n",translationlen));
386   for (i = 0; i < translationlen; i++) {
387     debug(printf("%d %c frame %d: %d %d %d\n",i,translation[i].aa,translation[i].frame,orf0,orf1,orf2));
388     frame = translation[i].frame;
389     if ((codon = translation[i].aa) != ' ') {
390       initc = translation[i].initc;
391       if (frame == FRAME0) {
392 	if (needmet0p) {
393 	  if (initc == 'M') {
394 	    orf0 = 1;
395 	    start0 = i;
396 	    needmet0p = false;
397 	  }
398 	} else if (codon == '*') {
399 	  orf0++;
400 	  if (orf0 > bestorf0) {
401 	    debug(printf("Frame 0: Best orf is %d\n",orf0));
402 	    bestorf0 = orf0;
403 	    beststart0 = start0;
404 	    bestend0 = i;
405 	    endstop0p = true;
406 	  }
407 	  needmet0p = true;
408 	} else {
409 	  debug(printf("Incrementing orf0\n"));
410 	  orf0++;
411 	}
412       } else if (frame == FRAME1) {
413 	if (needmet1p) {
414 	  if (initc == 'M') {
415 	    orf1 = 1;
416 	    start1 = i;
417 	    needmet1p = false;
418 	  }
419 	} else if (codon == '*') {
420 	  orf1++;
421 	  if (orf1 > bestorf1) {
422 	    debug(printf("Frame 1: Best orf is %d\n",orf1));
423 	    bestorf1 = orf1;
424 	    beststart1 = start1;
425 	    bestend1 = i;
426 	    endstop1p = true;
427 	  }
428 	  needmet1p = true;
429 	} else {
430 	  debug(printf("Incrementing orf1\n"));
431 	  orf1++;
432 	}
433       } else if (frame == FRAME2) {
434 	if (needmet2p) {
435 	  if (initc == 'M') {
436 	    orf2 = 1;
437 	    start2 = i;
438 	    needmet2p = false;
439 	  }
440 	} else if (codon == '*') {
441 	  orf2++;
442 	  if (orf2 > bestorf2) {
443 	    debug(printf("Frame 2: Best orf is %d\n",orf2));
444 	    bestorf2 = orf2;
445 	    beststart2 = start2;
446 	    bestend2 = i;
447 	    endstop2p = true;
448 	  }
449 	  needmet2p = true;
450 	} else {
451 	  debug(printf("Incrementing orf2\n"));
452 	  orf2++;
453 	}
454       } else {
455 	fprintf(stderr,"No frame at %d\n",i);
456       }
457     }
458   }
459 
460   /* Handle last segments */
461   if (orf0 > bestorf0) {
462     debug(printf("Frame 0: Best orf is %d\n",orf0));
463     bestorf0 = orf0;
464     beststart0 = start0;
465     bestend0 = translationlen-1;
466     endstop0p = false;
467   }
468   if (orf1 > bestorf1) {
469     debug(printf("Frame 1: Best orf is %d\n",orf1));
470     bestorf1 = orf1;
471     beststart1 = start1;
472     bestend1 = translationlen-1;
473     endstop1p = false;
474   }
475   if (orf2 > bestorf2) {
476     debug(printf("Frame 2: Best orf is %d\n",orf2));
477     bestorf2 = orf2;
478     beststart2 = start2;
479     bestend2 = translationlen-1;
480     endstop2p = false;
481   }
482 
483   /* Find overall best */
484   *translation_length = bestorf0;
485   *endstopp = endstop0p;
486   if (bestorf1 > *translation_length) {
487     *translation_length = bestorf1;
488     *endstopp = endstop1p;
489   }
490   if (bestorf2 > *translation_length) {
491     *translation_length = bestorf2;
492     *endstopp = endstop2p;
493   }
494 
495   if (bestorf2 == *translation_length) {
496     debug(printf("Assigning frame 2\n"));
497     *translation_frame = FRAME2;
498     *translation_starti = beststart2;
499     *translation_endi = bestend2;
500   } else if (bestorf1 == *translation_length) {
501     debug(printf("Assigning frame 1\n"));
502     *translation_frame = FRAME1;
503     *translation_starti = beststart1;
504     *translation_endi = bestend1;
505   } else if (bestorf0 == *translation_length) {
506     debug(printf("Assigning frame 0\n"));
507     *translation_frame = FRAME0;
508     *translation_starti = beststart0;
509     *translation_endi = bestend0;
510   } else {
511     abort();
512   }
513 
514   debug(printf("Frame0: %d, Frame1: %d, Frame2: %d\n",bestorf0,bestorf1,bestorf2));
515   debug(printf("Best orf is %d codons\n",*translation_length));
516   debug(printf("Frame0: %d %d\n",beststart0,bestend0));
517   debug(printf("Frame1: %d %d\n",beststart1,bestend1));
518   debug(printf("Frame2: %d %d\n",beststart2,bestend2));
519   debug(printf("Value of endstopp is %d\n",*endstopp));
520 
521   return;
522 }
523 #endif
524 
525 #ifndef PMAP
526 static void
find_bounds_backward(Frame_T * translation_frame,int * translation_starti,int * translation_endi,int * translation_length,bool * endstopp,struct T * translation,int translationlen,bool fulllengthp)527 find_bounds_backward (Frame_T *translation_frame, int *translation_starti,
528 		      int *translation_endi, int *translation_length,
529 		      bool *endstopp, struct T *translation, int translationlen, bool fulllengthp) {
530   int beststart0, beststart1, beststart2, bestend0, bestend1, bestend2;
531   int bestorf0 = 0, bestorf1 = 0, bestorf2 = 0, orf0 = 0, orf1 = 0, orf2 = 0;
532   int start0 = translationlen-1, start1 = translationlen-1, start2 = translationlen-1;
533   bool needmet0p, needmet1p, needmet2p;
534   bool endstop0p = false, endstop1p = false, endstop2p = false;
535   char codon, initc;
536   int i, frame;
537 
538   if (fulllengthp == true) {
539     needmet0p = needmet1p = needmet2p = true;
540   } else {
541     needmet0p = needmet1p = needmet2p = false;
542   }
543 
544   debug(printf("Starting find_bounds_forward, from translationlen-1 %d to 0\n",translationlen-1));
545   for (i = translationlen-1; i >= 0; --i) {
546     debug(printf("%d %c frame %d: %d %d %d\n",i,translation[i].aa,translation[i].frame,orf0,orf1,orf2));
547     frame = translation[i].frame;
548     if ((codon = translation[i].aa) != ' ') {
549       initc = translation[i].initc;
550       if (frame == FRAME0) {
551 	if (needmet0p) {
552 	  if (initc == 'M') {
553 	    orf0 = 1;
554 	    start0 = i;
555 	    needmet0p = false;
556 	  }
557 	} else if (codon == '*') {
558 	  orf0++;
559 	  if (orf0 > bestorf0) {
560 	    debug(printf("Frame 0: Best orf is %d\n",orf0));
561 	    bestorf0 = orf0;
562 	    bestend0 = i;
563 	    beststart0 = start0;
564 	    endstop0p = true;
565 	  }
566 	  needmet0p = true;
567 	} else {
568 	  orf0++;
569 	}
570       } else if (frame == FRAME1) {
571 	if (needmet1p) {
572 	  if (initc == 'M') {
573 	    orf1 = 1;
574 	    start1 = i;
575 	    needmet1p = false;
576 	  }
577 	} else if (codon == '*') {
578 	  orf1++;
579 	  if (orf1 > bestorf1) {
580 	    debug(printf("Frame 1: Best orf is %d\n",orf1));
581 	    bestorf1 = orf1;
582 	    bestend1 = i;
583 	    beststart1 = start1;
584 	    endstop1p = true;
585 	  }
586 	  needmet1p = true;
587 	} else {
588 	  orf1++;
589 	}
590       } else if (frame == FRAME2) {
591 	if (needmet2p) {
592 	  if (initc == 'M') {
593 	    orf2 = 1;
594 	    start2 = i;
595 	    needmet2p = false;
596 	  }
597 	} else if (codon == '*') {
598 	  orf2++;
599 	  if (orf2 > bestorf2) {
600 	    debug(printf("Frame 2: Best orf is %d\n",orf2));
601 	    bestorf2 = orf2;
602 	    bestend2 = i;
603 	    beststart2 = start2;
604 	    endstop2p = true;
605 	  }
606 	  needmet2p = true;
607 	} else {
608 	  orf2++;
609 	}
610       }
611     }
612   }
613 
614   /* Handle last segments */
615   if (orf0 > bestorf0) {
616     debug(printf("Frame 0: Best orf is %d\n",orf0));
617     bestorf0 = orf0;
618     bestend0 = 0;
619     beststart0 = start0;
620     endstop0p = false;
621   }
622   if (orf1 > bestorf1) {
623     debug(printf("Frame 1: Best orf is %d\n",orf1));
624     bestorf1 = orf1;
625     bestend1 = 0;
626     beststart1 = start1;
627     endstop1p = false;
628   }
629   if (orf2 > bestorf2) {
630     debug(printf("Frame 2: Best orf is %d\n",orf2));
631     bestorf2 = orf2;
632     bestend2 = 0;
633     beststart2 = start2;
634     endstop2p = false;
635   }
636 
637   /* Find overall best */
638   *translation_length = bestorf0;
639   *endstopp = endstop0p;
640   if (bestorf1 > *translation_length) {
641     *translation_length = bestorf1;
642     *endstopp = endstop1p;
643   }
644   if (bestorf2 > *translation_length) {
645     *translation_length = bestorf2;
646     *endstopp = endstop2p;
647   }
648 
649   if (bestorf0 == *translation_length) {
650     debug(printf("Assigning frame 0\n"));
651     *translation_frame = FRAME0;
652     *translation_starti = beststart0;
653     *translation_endi = bestend0;
654   } else if (bestorf1 == *translation_length) {
655     debug(printf("Assigning frame 1\n"));
656     *translation_frame = FRAME1;
657     *translation_starti = beststart1;
658     *translation_endi = bestend1;
659   } else if (bestorf2 == *translation_length) {
660     debug(printf("Assigning frame 2\n"));
661     *translation_frame = FRAME2;
662     *translation_starti = beststart2;
663     *translation_endi = bestend2;
664   } else {
665     abort();
666   }
667 
668   debug(printf("Frame0: %d, Frame1: %d, Frame2: %d\n",bestorf0,bestorf1,bestorf2));
669   debug(printf("Best orf is %d codons\n",*translation_length));
670   debug(printf("Frame0: %d %d\n",beststart0,bestend0));
671   debug(printf("Frame1: %d %d\n",beststart1,bestend1));
672   debug(printf("Frame2: %d %d\n",beststart2,bestend2));
673   debug(printf("Value of endstopp is %d\n",*endstopp));
674 
675   return;
676 }
677 #endif
678 
679 
680 #ifndef PMAP
681 static void
find_bounds_forward_fromstart(Frame_T * translation_frame,int * translation_starti,int * translation_endi,int * translation_length,bool * endstopp,struct T * translation,int translationlen,int cds_startpos)682 find_bounds_forward_fromstart (Frame_T *translation_frame, int *translation_starti,
683 			       int *translation_endi, int *translation_length,
684 			       bool *endstopp, struct T *translation, int translationlen,
685 			       int cds_startpos) {
686   Frame_T frame_fromstart;
687   int phase_fromstart;
688   int beststart0, bestend0;
689   int bestorf0 = 0, orf0 = 0;
690   int start0 = 0;
691   bool endstop0p = false;
692   char codon;
693   int i;
694 
695   phase_fromstart = (cds_startpos - 1) % 3;
696   if (translation[0].querypos % 3 == phase_fromstart) {
697     frame_fromstart = translation[0].frame;
698   } else if (translation[1].querypos % 3 == phase_fromstart) {
699     frame_fromstart = translation[1].frame;
700   } else if (translation[2].querypos % 3 == phase_fromstart) {
701     frame_fromstart = translation[2].frame;
702   } else {
703     fprintf(stderr,"Something wrong with Translation_T object\n");
704     abort();
705   }
706 
707   for (i = 0; i < translationlen && endstop0p == false; i++) {
708     if (translation[i].querypos >= cds_startpos - 1 &&
709 	translation[i].frame == frame_fromstart &&
710 	(codon = translation[i].aa) != ' ') {
711       debug(printf("%d %c\n",i,translation[i].aa));
712       if (orf0 == 0) {
713 	start0 = i;
714       }
715       if (codon == '*') {
716 	orf0++;
717 	if (orf0 > bestorf0) {
718 	  debug(printf("Frame 0: Best orf is %d\n",orf0));
719 	  bestorf0 = orf0;
720 	  beststart0 = start0;
721 	  bestend0 = i;
722 	  endstop0p = true;
723 	}
724 
725       } else {
726 	debug(printf("Incrementing orf0\n"));
727 	orf0++;
728       }
729     }
730   }
731 
732   /* Handle last segments */
733   if (orf0 > bestorf0) {
734     debug(printf("Frame 0: Best orf is %d\n",orf0));
735     bestorf0 = orf0;
736     beststart0 = start0;
737     bestend0 = translationlen-1;
738     endstop0p = false;
739   }
740 
741   /* Find overall best */
742   *translation_length = bestorf0;
743   *endstopp = endstop0p;
744 
745   debug(printf("Assigning frame %d\n",frame_fromstart));
746   *translation_frame = frame_fromstart;
747   *translation_starti = beststart0;
748   *translation_endi = bestend0;
749 
750   return;
751 }
752 #endif
753 
754 
755 #ifndef PMAP
756 static void
find_bounds_backward_fromstart(Frame_T * translation_frame,int * translation_starti,int * translation_endi,int * translation_length,bool * endstopp,struct T * translation,int translationlen,int cds_startpos)757 find_bounds_backward_fromstart (Frame_T *translation_frame, int *translation_starti,
758 				int *translation_endi, int *translation_length,
759 				bool *endstopp, struct T *translation, int translationlen,
760 				int cds_startpos) {
761   Frame_T frame_fromstart;
762   int phase_fromstart;
763   int beststart0, bestend0;
764   int bestorf0 = 0, orf0 = 0;
765   int start0 = translationlen-1;
766   bool endstop0p = false;
767   char codon;
768   int i;
769 
770   phase_fromstart = (translationlen - cds_startpos) % 3;
771   if (translation[translationlen-1].querypos % 3 == phase_fromstart) {
772     frame_fromstart = translation[translationlen-1].frame;
773   } else if (translation[translationlen-2].querypos % 3 == phase_fromstart) {
774     frame_fromstart = translation[translationlen-2].frame;
775   } else if (translation[translationlen-3].querypos % 3 == phase_fromstart) {
776     frame_fromstart = translation[translationlen-3].frame;
777   } else {
778     fprintf(stderr,"Something wrong with Translation_T object\n");
779     abort();
780   }
781 
782   for (i = translationlen-1; i >= 0 && endstop0p == false; --i) {
783     if (translation[i].querypos >= cds_startpos - 1 &&
784 	translation[i].frame == frame_fromstart &&
785 	(codon = translation[i].aa) != ' ') {
786       debug(printf("%d %c\n",i,translation[i].aa));
787       if (orf0 == 0) {
788 	start0 = i;
789       }
790       if (codon == '*') {
791 	orf0++;
792 	if (orf0 > bestorf0) {
793 	  debug(printf("Frame 0: Best orf is %d\n",orf0));
794 	  bestorf0 = orf0;
795 	  bestend0 = i;
796 	  beststart0 = start0;
797 	  endstop0p = true;
798 	}
799       } else {
800 	debug(printf("Incrementing orf0\n"));
801 	orf0++;
802       }
803     }
804   }
805 
806   /* Handle last segments */
807   if (orf0 > bestorf0) {
808     debug(printf("Frame 0: Best orf is %d\n",orf0));
809     bestorf0 = orf0;
810     bestend0 = 0;
811     beststart0 = start0;
812     endstop0p = false;
813   }
814 
815   /* Find overall best */
816   *translation_length = bestorf0;
817   *endstopp = endstop0p;
818 
819   debug(printf("Assigning frame %d\n",frame_fromstart));
820   *translation_frame = frame_fromstart;
821   *translation_starti = beststart0;
822   *translation_endi = bestend0;
823 
824   return;
825 }
826 #endif
827 
828 
829 
830 #ifdef PMAP
831 static void
translate_pairs_cdna(int * translation_starti,int * translation_endi,int * translation_length,struct Pair_T * pairs,int npairs,char * queryaaseq_ptr)832 translate_pairs_cdna (int *translation_starti, int *translation_endi, int *translation_length,
833 		      struct Pair_T *pairs, int npairs, char *queryaaseq_ptr) {
834   struct Pair_T *ptr, *pair;
835   int i;
836 
837   /* Go backward so we put aa at beginning of codon */
838   /* printf("lastquerypos is %d\n",pairs[npairs-1].querypos); */
839   i = npairs-1;
840   while (i >= 0 && pairs[i].querypos % 3 != 2) {
841     i--;
842   }
843   *translation_endi = i;
844   if (*translation_endi < 0) {
845     *translation_endi = 0;
846   }
847 
848   debug3(printf("Entering translate_pairs_cdna with npairs=%d, translation_endi = %d\n",
849 		npairs,*translation_endi));
850 
851   *translation_starti = *translation_endi;
852   *translation_length = 0;
853 
854   ptr = &(pairs[(*translation_endi)+1]);
855   for (i = *translation_endi; i >= 0; --i) {
856     pair = --ptr;
857     pair->aapos = pair->querypos/3 + 1;
858 
859     if (pair->cdna == ' ') {
860       /* pair->aa_e = ' '; */
861     } else {
862       debug5(printf("(0) Assigning aaphase_e to pair %d, based on aaphase_g\n",i));
863       if ((pair->aaphase_e = pair->querypos % 3) == 0) {
864 	pair->aa_e = queryaaseq_ptr[pair->querypos/3];
865 	*translation_starti = i;
866 	(*translation_length) += 1;
867       }
868     }
869   }
870 
871   return;
872 }
873 #endif
874 
875 
876 static struct Translation_T *
translate_pairs_forward(struct Pair_T * pairs,int npairs,bool revcompp)877 translate_pairs_forward (struct Pair_T *pairs, int npairs, bool revcompp) {
878   struct T *translation;
879   struct Pair_T *ptr, *pair;
880   int i, gpos = 0;
881   char codon, initc, nt2 = 'X', nt1 = 'X', nt0 = 'X';
882 
883   translation = Translation_array_new(pairs,npairs);
884 
885   /* Go backward so we put aa at beginning of codon */
886   ptr = &(pairs[npairs]);
887   for (i = npairs-1; i >= 0; --i) {
888     pair = --ptr;
889     if (pair->gapp == true) {
890       /* translation[i].aa = ' '; */
891     } else if (pair->genome == ' ') {
892       /* translation[i].aa = ' '; */
893     } else {
894       nt2 = nt1;
895       nt1 = nt0;
896       nt0 = revcompp ? complCode[(int) pair->genome] : uppercaseCode[(int) pair->genome];
897 
898       codon = Translation_get_codon(&initc,nt0,nt1,nt2);
899       if (gpos < 2 && codon == 'X') {
900 	/* Do not want to set translation[i].frame until the codon starts */
901 	/* translation[i].aa = ' '; */
902       } else {
903 	translation[i].aa = codon;
904 	translation[i].initc = initc;
905 	switch (gpos % 3) {
906 	case 0: translation[i].frame = FRAME0; break;
907 	case 1: translation[i].frame = FRAME1; break;
908 	case 2: translation[i].frame = FRAME2; break;
909 	}
910       }
911       gpos++;
912     }
913   }
914 
915   return translation;
916 }
917 
918 static struct Translation_T *
translate_pairs_backward(struct Pair_T * pairs,int npairs,bool revcompp)919 translate_pairs_backward (struct Pair_T *pairs, int npairs, bool revcompp) {
920   struct T *translation;
921   struct Pair_T *ptr, *pair;
922   int i, gpos = 0;
923   char codon, initc, nt2 = 'X', nt1 = 'X', nt0 = 'X';
924 
925   translation = Translation_array_new(pairs,npairs);
926 
927   /* Go forwards so we put aa at beginning of codon */
928   ptr = pairs;
929   for (i = 0; i < npairs; i++) {
930     pair = ptr++;
931     if (pair->gapp == true) {
932       /* translation[i].aa = ' '; */
933     } else if (pair->genome == ' ') {
934       /* translation[i].aa = ' '; */
935     } else {
936       nt2 = nt1;
937       nt1 = nt0;
938       nt0 = revcompp ? complCode[(int) pair->genome] : uppercaseCode[(int) pair->genome];
939 
940       codon = Translation_get_codon(&initc,nt0,nt1,nt2);
941       if (gpos < 2 && codon == 'X') {
942 	/* Do not want to set translation[i].frame until the codon starts */
943 	/* translation[i].aa = ' '; */
944       } else {
945 	translation[i].aa = codon;
946 	translation[i].initc = initc;
947 	switch (gpos % 3) {
948 	case 0: translation[i].frame = FRAME0; break;
949 	case 1: translation[i].frame = FRAME1; break;
950 	case 2: translation[i].frame = FRAME2; break;
951 	}
952       }
953       gpos++;
954     }
955   }
956 
957   return translation;
958 }
959 
960 
961 /************************************************************************/
962 
963 
964 #ifndef PMAP
965 static int
count_cdna_forward_strict(int * nexti,struct Pair_T * pairs,int npairs,int starti)966 count_cdna_forward_strict (int *nexti, struct Pair_T *pairs, int npairs, int starti) {
967   int ncdna = 0, j;
968 
969   j = starti;
970   while (j < npairs) {
971     if (ncdna >= 3 && pairs[j].cdna != ' ') {
972       *nexti = j;
973       return ncdna;
974     } else if (pairs[j].cdna != ' ') {
975       ncdna++;
976     }
977     j++;
978   }
979 
980   *nexti = j;
981   return ncdna;
982 }
983 #endif
984 
985 
986 static int
count_cdna_forward(int * nexti,struct Pair_T * pairs,int starti,int endi)987 count_cdna_forward (int *nexti, struct Pair_T *pairs, int starti, int endi) {
988   int ncdna = 0, j;
989 
990   j = starti;
991   while (j <= endi) {
992     if (j > starti && pairs[j].aaphase_g == 0 && pairs[j].cdna != ' ') {
993       *nexti = j;
994       return ncdna;
995     } else if (pairs[j].cdna != ' ') {
996       ncdna++;
997     }
998     j++;
999   }
1000 
1001   *nexti = j;
1002   return ncdna;
1003 }
1004 
1005 
1006 static int
count_cdna_forward_mod3(int * nexti,struct Pair_T * pairs,int starti,int endi)1007 count_cdna_forward_mod3 (int *nexti, struct Pair_T *pairs, int starti, int endi) {
1008   int ncdna = 0, j;
1009 
1010   j = starti;
1011   while (j <= endi && ncdna <= HORIZON) {
1012     if (j > starti && pairs[j].aaphase_g == 0 && pairs[j].cdna != ' ' &&
1013 	(ncdna % 3) == 0) {
1014       *nexti = j;
1015       return ncdna;
1016     } else if (pairs[j].cdna != ' ') {
1017       ncdna++;
1018     }
1019     j++;
1020   }
1021 
1022   *nexti = j;
1023   return 1;			/* any answer that is not mod 0 */
1024 }
1025 
1026 #ifndef PMAP
1027 static int
count_cdna_backward_strict(int * nexti,struct Pair_T * pairs,int starti)1028 count_cdna_backward_strict (int *nexti, struct Pair_T *pairs, int starti) {
1029   int ncdna = 0, j;
1030 
1031   j = starti;
1032   while (j >= 0) {
1033     if (ncdna >= 3 && pairs[j].cdna != ' ') {
1034       *nexti = j;
1035       return ncdna;
1036     } else if (pairs[j].cdna != ' ') {
1037       ncdna++;
1038     }
1039     j--;
1040   }
1041 
1042   *nexti = j;
1043   return ncdna;
1044 }
1045 #endif
1046 
1047 static int
count_cdna_backward(int * nexti,struct Pair_T * pairs,int starti,int endi)1048 count_cdna_backward (int *nexti, struct Pair_T *pairs, int starti, int endi) {
1049   int ncdna = 0, j;
1050 
1051   j = starti;
1052   while (j >= endi) {
1053     if (j < starti && pairs[j].aaphase_g == 0 && pairs[j].cdna != ' ') {
1054       *nexti = j;
1055       return ncdna;
1056     } else if (pairs[j].cdna != ' ') {
1057       ncdna++;
1058     }
1059     j--;
1060   }
1061 
1062   *nexti = j;
1063   return ncdna;
1064 }
1065 
1066 
1067 static int
count_cdna_backward_mod3(int * nexti,struct Pair_T * pairs,int starti,int endi)1068 count_cdna_backward_mod3 (int *nexti, struct Pair_T *pairs, int starti, int endi) {
1069   int ncdna = 0, j;
1070 
1071   j = starti;
1072   while (j >= endi && ncdna <= HORIZON) {
1073     if (j < starti && pairs[j].aaphase_g == 0 && pairs[j].cdna != ' ' &&
1074 	(ncdna % 3) == 0) {
1075       *nexti = j;
1076       return ncdna;
1077     } else if (pairs[j].cdna != ' ') {
1078       ncdna++;
1079     }
1080     j--;
1081   }
1082 
1083   *nexti = j;
1084   return 1;			/* any answer that is not mod 0 */
1085 }
1086 
1087 
1088 #if 0
1089 static int
1090 count_genomic_strict (int *nexti, struct Pair_T *pairs, int npairs, int starti) {
1091   int ngenomic = 0, j;
1092 
1093   j = starti;
1094   while (j < npairs) {
1095     /* Need to check gapp in addition to genome, because genome
1096        characters are provided at ends of introns */
1097     if (ngenomic >= 3 && pairs[j].gapp == false && pairs[j].genome != ' ') {
1098       *nexti = j;
1099       return ngenomic;
1100     } else if (pairs[j].gapp == false && pairs[j].genome != ' ') {
1101       ngenomic++;
1102     }
1103     j++;
1104   }
1105 
1106   *nexti = j;
1107   return ngenomic;
1108 }
1109 #endif
1110 
1111 #if 0
1112 static int
1113 count_genomic (int *nexti, struct Pair_T *pairs, int starti, int endi) {
1114   int ngenomic = 0, j;
1115 
1116   j = starti;
1117   while (j <= endi) {
1118     if (j > starti && pairs[j].aaphase_e == 0 && pairs[j].genome != ' ') {
1119       *nexti = j;
1120       return ngenomic;
1121     } else if (pairs[j].gapp == false && pairs[j].genome != ' ') {
1122       ngenomic++;
1123     }
1124     j++;
1125   }
1126 
1127   *nexti = j;
1128   return ngenomic;
1129 }
1130 #endif
1131 
1132 #if 0
1133 static int
1134 count_genomic_mod3 (int *nexti, struct Pair_T *pairs, int starti, int endi) {
1135   int ngenomic = 0, j;
1136 
1137   j = starti;
1138   while (j <= endi && ngenomic <= HORIZON) {
1139     if (j > starti && pairs[j].aaphase_e == 0 && pairs[j].genome != ' ' &&
1140 	(ngenomic % 3) == 0) {
1141       *nexti = j;
1142       return ngenomic;
1143     } else if (pairs[j].gapp == false && pairs[j].genome != ' ') {
1144       ngenomic++;
1145     }
1146     j++;
1147   }
1148 
1149   *nexti = j;
1150   return 1;			/* any answer that is not mod 0 */
1151 }
1152 #endif
1153 
1154 /************************************************************************/
1155 
1156 static int
get_codon_forward(int * nexti,struct Pair_T * pairs,int npairs,int starti,bool revcompp)1157 get_codon_forward (int *nexti, struct Pair_T *pairs, int npairs, int starti, bool revcompp) {
1158   char nt2 = 'X', nt1 = 'X', nt0 = 'X';
1159   int j2, j1, j0;
1160   int ncdna = 0, j;
1161   char initc;
1162 
1163   j = starti;
1164   j0 = j1 = j2 = starti;
1165   while (j < npairs && ncdna < 3) {
1166     if (pairs[j].cdna != ' ') {
1167       nt0 = nt1;
1168       nt1 = nt2;
1169       nt2 = revcompp ? complCode[(int) pairs[j].cdna] : uppercaseCode[(int) pairs[j].cdna];
1170       j0 = j1;
1171       j1 = j2;
1172       j2 = j;
1173       ncdna++;
1174     }
1175     j++;
1176   }
1177 
1178   /* assign function depends on nexti pointing to a valid position */
1179   while (j <= npairs - 1 && pairs[j].cdna == ' ') {
1180     j++;
1181   }
1182 
1183   *nexti = j;
1184 
1185   if (j2 > j0) {
1186     /* Loops handle indels */
1187     for (j = j0; j < j1; j++) {
1188       debug5(printf("(1) Assigning aaphase_e of 0 to pair %d\n",j));
1189       pairs[j].aaphase_e = 0;
1190     }
1191     for (j = j1; j < j2; j++) {
1192       debug5(printf("(2) Assigning aaphase_e of 1 to pair %d\n",j));
1193       pairs[j].aaphase_e = 1;
1194     }
1195     for (j = j2; j < *nexti; j++) {
1196       debug5(printf("(3) Assigning aaphase_e of 2 to pair %d\n",j));
1197       pairs[j].aaphase_e = 2;
1198     }
1199   }
1200 
1201   return Translation_get_codon(&initc,nt0,nt1,nt2);
1202 }
1203 
1204 static int
get_codon_backward(int * nexti,struct Pair_T * pairs,int starti,bool revcompp)1205 get_codon_backward (int *nexti, struct Pair_T *pairs, int starti, bool revcompp) {
1206   char nt2 = 'X', nt1 = 'X', nt0 = 'X';
1207   int j2, j1, j0;
1208   int ncdna = 0, j;
1209   char initc;
1210 
1211   j = starti;
1212   j0 = j1 = j2 = starti;
1213   while (j >= 0 && ncdna < 3) {
1214     if (pairs[j].cdna != ' ') {
1215       nt0 = nt1;
1216       nt1 = nt2;
1217       nt2 = revcompp ? complCode[(int) pairs[j].cdna] : uppercaseCode[(int) pairs[j].cdna];
1218       j0 = j1;
1219       j1 = j2;
1220       j2 = j;
1221       ncdna++;
1222     }
1223     --j;
1224   }
1225 
1226   /* assign function depends on nexti pointing to a valid position */
1227   while (j >= 0 && (pairs[j].cdna == ' ')) {
1228     --j;
1229   }
1230 
1231   *nexti = j;
1232 
1233   if (j2 < j0) {
1234     /* Loops handle indels */
1235     for (j = j0; j > j1; --j) {
1236       debug5(printf("(4) Assigning aaphase_e of 0 to pair %d\n",j));
1237       pairs[j].aaphase_e = 0;
1238     }
1239     for (j = j1; j > j2; --j) {
1240       debug5(printf("(5) Assigning aaphase_e of 1 to pair %d\n",j));
1241       pairs[j].aaphase_e = 1;
1242     }
1243     for (j = j2; j > *nexti; --j) {
1244       debug5(printf("(6) Assigning aaphase_e of 2 to pair %d\n",j));
1245       pairs[j].aaphase_e = 2;
1246     }
1247   }
1248 
1249   return Translation_get_codon(&initc,nt0,nt1,nt2);
1250 }
1251 
1252 
1253 #ifdef PMAP
1254 static int
get_codon_genomic(int * nexti,struct Pair_T * pairs,int npairs,int starti)1255 get_codon_genomic (int *nexti, struct Pair_T *pairs, int npairs, int starti) {
1256   char nt2 = 'X', nt1 = 'X', nt0 = 'X';
1257   int j0, j1, j2;
1258   int ngenomic = 0, j;
1259   char initc;
1260 
1261   j = starti;
1262   while (ngenomic < 3) {
1263     if (pairs[j].gapp == false && pairs[j].genome != ' ') {
1264       nt0 = nt1;
1265       nt1 = nt2;
1266       nt2 = uppercaseCode[(int) pairs[j].genome];
1267       j0 = j1;
1268       j1 = j2;
1269       j2 = j;
1270       ngenomic++;
1271     }
1272     j++;
1273   }
1274 
1275   /* assign function depends on nexti pointing to a valid position */
1276   while (j <= npairs-1 && (pairs[j].gapp == true || pairs[j].genome == ' ')) {
1277     j++;
1278   }
1279 
1280   *nexti = j;
1281 
1282   /* Loops handle indels */
1283   for (j = j0; j < j1; j++) {
1284     debug4(printf("(1) Assigning aaphase_g to pair %d\n",j));
1285     pairs[j].aaphase_g = 0;
1286   }
1287   for (j = j1; j < j2; j++) {
1288     debug4(printf("(2) Assigning aaphase_g to pair %d\n",j));
1289     pairs[j].aaphase_g = 1;
1290   }
1291   for (j = j2; j < *nexti; j++) {
1292     debug4(printf("(3) Assigning aaphase_g to pair %d\n",j));
1293     pairs[j].aaphase_g = 2;
1294   }
1295 
1296   return Translation_get_codon(&initc,nt0,nt1,nt2);
1297 }
1298 #endif
1299 
1300 
1301 
1302 static int
assign_cdna_forward(int ncdna,struct Pair_T * pairs,int npairs,bool revcompp,int starti)1303 assign_cdna_forward (int ncdna, struct Pair_T *pairs, int npairs, bool revcompp, int starti) {
1304   struct Pair_T *pair;
1305   int i, nexti, j = 0;
1306   int codon = ' ';
1307 
1308   i = starti;
1309   while (i < npairs && pairs[i].cdna == ' ') {
1310     i++;
1311   }
1312   while (j < ncdna) {
1313     pair = &(pairs[i]);
1314     codon = pair->aa_e = get_codon_forward(&nexti,pairs,npairs,i,revcompp);
1315     debug2(Pair_dump_one(pair,true));
1316     debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
1317     i = nexti;
1318     j += 3;
1319   }
1320   return codon;
1321 }
1322 
1323 static void
terminate_cdna_forward(struct Pair_T * pairs,int npairs,bool revcompp,int starti)1324 terminate_cdna_forward (struct Pair_T *pairs, int npairs, bool revcompp, int starti) {
1325   struct Pair_T *pair;
1326   int i, nexti;
1327   char lastcodon = ' ';
1328 
1329   i = starti;
1330   while (i < npairs && pairs[i].cdna == ' ') {
1331     i++;
1332   }
1333   while (i <= npairs-3 && lastcodon != '*') {
1334     pair = &(pairs[i]);
1335     lastcodon = pair->aa_e = get_codon_forward(&nexti,pairs,npairs,i,revcompp);
1336     debug2(Pair_dump_one(pair,true));
1337     debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
1338     i = nexti;
1339   }
1340   return;
1341 }
1342 
1343 static int
assign_cdna_backward(int ncdna,struct Pair_T * pairs,bool revcompp,int starti)1344 assign_cdna_backward (int ncdna, struct Pair_T *pairs, bool revcompp, int starti) {
1345   struct Pair_T *pair;
1346   int i, nexti, j = 0;
1347   int codon = ' ';
1348 
1349   i = starti;
1350   while (i >= 0 && pairs[i].cdna == ' ') {
1351     --i;
1352   }
1353   while (j < ncdna) {
1354     pair = &(pairs[i]);
1355     codon = pair->aa_e = get_codon_backward(&nexti,pairs,i,revcompp);
1356     debug2(Pair_dump_one(pair,true));
1357     debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
1358     i = nexti;
1359     j += 3;
1360   }
1361   return codon;
1362 }
1363 
1364 static void
terminate_cdna_backward(struct Pair_T * pairs,bool revcompp,int starti)1365 terminate_cdna_backward (struct Pair_T *pairs, bool revcompp, int starti) {
1366   struct Pair_T *pair;
1367   int i, nexti;
1368   char lastcodon = ' ';
1369 
1370   i = starti;
1371   while (i >= 0 && pairs[i].cdna == ' ') {
1372     --i;
1373   }
1374 
1375   /* i > 1 is equivalent to i >= 2 */
1376   while (i > 1 && lastcodon != '*') {
1377     pair = &(pairs[i]);
1378     lastcodon = pair->aa_e = get_codon_backward(&nexti,pairs,i,revcompp);
1379     debug2(Pair_dump_one(pair,true));
1380     debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
1381     i = nexti;
1382   }
1383   return;
1384 }
1385 
1386 #ifdef PMAP
1387 static int
assign_genomic(int ngenomic,struct Pair_T * pairs,int npairs,int starti)1388 assign_genomic (int ngenomic, struct Pair_T *pairs, int npairs, int starti) {
1389   struct Pair_T *pair;
1390   int i, nexti, j = 0, codon;
1391 
1392   i = starti;
1393   while (j < ngenomic) {
1394     pair = &(pairs[i]);
1395     codon = pair->aa_g = get_codon_genomic(&nexti,pairs,npairs,i);
1396     debug2(Pair_dump_one(pair,true));
1397     debug2(printf(" marked with amino acid %c for genomic\n",pair->aa_g));
1398     i = nexti;
1399     j += 3;
1400   }
1401   return codon;
1402 }
1403 
1404 static void
terminate_genomic(struct Pair_T * pairs,int npairs,int starti)1405 terminate_genomic (struct Pair_T *pairs, int npairs, int starti) {
1406   struct Pair_T *pair;
1407   int i, nexti;
1408   char lastcodon = ' ';
1409 
1410   i = starti;
1411   while (i <= npairs - 3 && lastcodon != '*') {
1412     pair = &(pairs[i]);
1413     lastcodon = pair->aa_g = get_codon_genomic(&nexti,pairs,npairs,i);
1414     debug2(Pair_dump_one(pair,true));
1415     debug2(printf(" marked with amino acid %c for genomic\n",pair->aa_g));
1416     i = nexti;
1417   }
1418   return;
1419 }
1420 #endif
1421 
1422 
1423 
1424 #ifndef PMAP
1425 static void
mark_cdna_forward_strict(struct Pair_T * pairs,int npairs,bool revcompp,int starti,int endi)1426 mark_cdna_forward_strict (struct Pair_T *pairs, int npairs, bool revcompp, int starti, int endi) {
1427   struct Pair_T *pair;
1428   int i, nexti, ncdna, codon = ' ';
1429 
1430   debug2(printf("mark_cdna_forward_strict called with pairs #%d..%d\n",starti,endi));
1431 
1432   i = starti;
1433 
1434   /* Advance to same start as genomic */
1435   pair = &(pairs[i]);
1436   while (i < endi && pair->aaphase_g != 0) {
1437     i++;
1438     pair = &(pairs[i]);
1439   }
1440 
1441   while (i < npairs && codon != '*') {
1442     pair = &(pairs[i]);
1443     ncdna = count_cdna_forward_strict(&nexti,pairs,npairs,i);
1444     if (ncdna == 3) {
1445       codon = assign_cdna_forward(ncdna,pairs,npairs,revcompp,i);
1446     }
1447     i = nexti;
1448   }
1449 
1450   if (codon != '*') {
1451     terminate_cdna_forward(pairs,npairs,revcompp,i);
1452   }
1453 
1454   return;
1455 }
1456 #endif
1457 
1458 static void
mark_cdna_forward(struct Pair_T * pairs,int npairs,bool revcompp,int starti,int endi)1459 mark_cdna_forward (struct Pair_T *pairs, int npairs, bool revcompp, int starti, int endi) {
1460   struct Pair_T *pair;
1461   int i, nexti, nexti_alt, ncdna, ncdna_alt;
1462 
1463   debug2(printf("mark_cdna_forward called with pairs #%d..%d\n",starti,endi));
1464 
1465   i = starti;
1466   while (i < endi) {
1467     pair = &(pairs[i]);
1468     if (pair->aaphase_g != 0) {
1469       i++;
1470     } else {
1471       ncdna = count_cdna_forward(&nexti,pairs,i,endi);
1472       if (ncdna == 3) {
1473 	assign_cdna_forward(ncdna,pairs,npairs,revcompp,i);
1474       } else if ((ncdna % 3) == 0) {
1475 	debug2(printf("At %d, saw %d with mod == 0\n",pair->aapos,ncdna));
1476 	assign_cdna_forward(ncdna,pairs,npairs,revcompp,i);
1477       } else if (i + 2 > endi) {
1478 	debug2(printf("At %d, saw last codon: %d+2 > %d\n",pair->aapos,i,endi));
1479 	assign_cdna_forward(ncdna,pairs,npairs,revcompp,i);
1480       } else {
1481 	debug2(printf("At %d, saw %d with mod != 0\n",pair->aapos,ncdna));
1482 	ncdna_alt = count_cdna_forward_mod3(&nexti_alt,pairs,i,endi);
1483 
1484 	debug2(printf("  Alternate search yields %d\n",ncdna_alt));
1485 	if ((ncdna_alt % 3) == 0) {
1486 	  debug2(printf("  Using alternate search\n"));
1487 	  assign_cdna_forward(ncdna_alt,pairs,npairs,revcompp,i);
1488 	  nexti = nexti_alt;
1489 
1490 	} else if (ncdna < 3) {
1491 	  debug2(printf("  Skipping\n"));
1492 
1493 	} else {
1494 	  debug2(printf("  Using original count - 3 = %d\n",ncdna-3));
1495 	  assign_cdna_forward(ncdna-3,pairs,npairs,revcompp,i);
1496 	}
1497       }
1498       i = nexti;
1499     }
1500   }
1501 
1502   debug2(printf("Calling terminate_cdna_forward\n"));
1503   terminate_cdna_forward(pairs,npairs,revcompp,i);
1504 
1505   return;
1506 }
1507 
1508 #ifndef PMAP
1509 static void
mark_cdna_backward_strict(struct Pair_T * pairs,bool revcompp,int starti,int endi)1510 mark_cdna_backward_strict (struct Pair_T *pairs, bool revcompp, int starti, int endi) {
1511   struct Pair_T *pair;
1512   int i, nexti, ncdna, codon = ' ';
1513 
1514   debug2(printf("mark_cdna_backward_strict called with pairs #%d..%d\n",starti,endi));
1515 
1516   i = starti;
1517 
1518   /* Advance to same start as genomic */
1519   pair = &(pairs[i]);
1520   while (i > endi && pair->aaphase_g != 0) {
1521     i--;
1522     pair = &(pairs[i]);
1523   }
1524 
1525   while (i >= 0 && codon != '*') {
1526     pair = &(pairs[i]);
1527     ncdna = count_cdna_backward_strict(&nexti,pairs,i);
1528     if (ncdna == 3) {
1529       codon = assign_cdna_backward(ncdna,pairs,revcompp,i);
1530     }
1531     i = nexti;
1532   }
1533 
1534   if (codon != '*') {
1535     terminate_cdna_backward(pairs,revcompp,i);
1536   }
1537 
1538   return;
1539 }
1540 #endif
1541 
1542 static void
mark_cdna_backward(struct Pair_T * pairs,bool revcompp,int starti,int endi)1543 mark_cdna_backward (struct Pair_T *pairs, bool revcompp, int starti, int endi) {
1544   struct Pair_T *pair;
1545   int i, nexti, nexti_alt, ncdna, ncdna_alt;
1546 
1547   debug2(printf("mark_cdna_backward called with pairs #%d..%d\n",starti,endi));
1548 
1549   i = starti;
1550   while (i > endi) {
1551     pair = &(pairs[i]);
1552     if (pair->aaphase_g != 0) {
1553       i--;
1554     } else {
1555       ncdna = count_cdna_backward(&nexti,pairs,i,endi);
1556       if (ncdna == 3) {
1557 	assign_cdna_backward(ncdna,pairs,revcompp,i);
1558       } else if ((ncdna % 3) == 0) {
1559 	debug2(printf("At %d, saw %d with mod == 0\n",pair->aapos,ncdna));
1560 	assign_cdna_backward(ncdna,pairs,revcompp,i);
1561       } else if (i - 2 < endi) {
1562 	debug2(printf("At %d, saw last codon: %d-2 < %d\n",pair->aapos,i,endi));
1563 	assign_cdna_backward(ncdna,pairs,revcompp,i);
1564       } else {
1565 	debug2(printf("At %d, saw %d with mod != 0\n",pair->aapos,ncdna));
1566 	ncdna_alt = count_cdna_backward_mod3(&nexti_alt,pairs,i,endi);
1567 
1568 	debug2(printf("  Alternate search yields %d\n",ncdna_alt));
1569 	if ((ncdna_alt % 3) == 0) {
1570 	  debug2(printf("  Using alternate search\n"));
1571 	  assign_cdna_backward(ncdna_alt,pairs,revcompp,i);
1572 	  nexti = nexti_alt;
1573 
1574 	} else if (ncdna < 3) {
1575 	  debug2(printf("Skipping\n"));
1576 
1577 	} else {
1578 	  debug2(printf("  Using original count - 3 = %d\n",ncdna-3));
1579 	  assign_cdna_backward(ncdna-3,pairs,revcompp,i);
1580 	}
1581       }
1582       i = nexti;
1583     }
1584   }
1585 
1586   debug2(printf("Calling terminate_cdna_backward\n"));
1587   terminate_cdna_backward(pairs,revcompp,i);
1588 
1589   return;
1590 }
1591 
1592 
1593 #ifdef PMAP
1594 static void
mark_genomic_strict(struct Pair_T * pairs,int npairs,int starti,int endi)1595 mark_genomic_strict (struct Pair_T *pairs, int npairs, int starti, int endi) {
1596   struct Pair_T *pair;
1597   int i, nexti, ngenomic, codon = ' ';
1598 
1599   debug2(printf("mark_genomic_strict called with pairs #%d %d\n",starti,endi));
1600 
1601   i = starti;
1602 
1603   /* Advance to same start as cDNA */
1604   pair = &(pairs[i]);
1605   while (i < endi && pair->aaphase_e != 0) {
1606     i++;
1607     pair = &(pairs[i]);
1608   }
1609 
1610   while (i < npairs && codon != '*') {
1611     pair = &(pairs[i]);
1612     ngenomic = count_genomic_strict(&nexti,pairs,npairs,i);
1613     if (ngenomic == 3) {
1614       codon = assign_genomic(ngenomic,pairs,npairs,i);
1615     }
1616     i = nexti;
1617   }
1618 
1619   if (codon != '*') {
1620     terminate_genomic(pairs,npairs,i);
1621   }
1622 
1623   return;
1624 }
1625 
1626 static void
mark_genomic(struct Pair_T * pairs,int npairs,int starti,int endi)1627 mark_genomic (struct Pair_T *pairs, int npairs, int starti, int endi) {
1628   struct Pair_T *pair;
1629   int i, nexti, nexti_alt, ngenomic, ngenomic_alt;
1630 
1631   debug2(printf("mark_genomic called with pairs #%d %d\n",starti,endi));
1632 
1633   i = starti;
1634   while (i < endi) {
1635     pair = &(pairs[i]);
1636     if (pair->aaphase_e != 0) {
1637       i++;
1638     } else {
1639       ngenomic = count_genomic(&nexti,pairs,i,endi);
1640       if (ngenomic == 3) {
1641 	assign_genomic(ngenomic,pairs,npairs,i);
1642       } else if ((ngenomic % 3) == 0) {
1643 	debug2(printf("At %d, saw %d with mod == 0\n",pair->aapos,ngenomic));
1644 	assign_genomic(ngenomic,pairs,npairs,i);
1645       } else if (i + 2 > endi) {
1646 	debug2(printf("At %d, saw last codon: %d+2 > %d\n",pair->aapos,i,endi));
1647 	assign_genomic(ngenomic,pairs,npairs,i);
1648       } else {
1649 	debug2(printf("At %d, saw %d with mod != 0\n",pair->aapos,ngenomic));
1650 	ngenomic_alt = count_genomic_mod3(&nexti_alt,pairs,i,endi);
1651 
1652 	debug2(printf("  Alternate search yields %d\n",ngenomic_alt));
1653 	if ((ngenomic_alt % 3) == 0) {
1654 	  debug2(printf("  Using alternate search\n"));
1655 	  assign_genomic(ngenomic_alt,pairs,npairs,i);
1656 	  nexti = nexti_alt;
1657 
1658 	} else if (ngenomic < 3) {
1659 	  debug2(printf("Skipping\n"));
1660 
1661 	} else {
1662 	  debug2(printf("  Using original count - 3 = %d\n",ngenomic-3));
1663 	  assign_genomic(ngenomic-3,pairs,npairs,i);
1664 	}
1665       }
1666       i = nexti;
1667     }
1668   }
1669 
1670   terminate_genomic(pairs,npairs,i);
1671 
1672   return;
1673 }
1674 #endif
1675 
1676 
1677 
1678 #ifdef PMAP
1679 void
Translation_via_cdna(int * translation_leftpos,int * translation_rightpos,int * translation_length,int * relaastart,int * relaaend,struct Pair_T * pairs,int npairs,char * queryaaseq_ptr,bool strictp)1680 Translation_via_cdna (int *translation_leftpos, int *translation_rightpos, int *translation_length,
1681 		      int *relaastart, int *relaaend,
1682 		      struct Pair_T *pairs, int npairs, char *queryaaseq_ptr, bool strictp) {
1683   int translation_starti, translation_endi, i;
1684 
1685   /* Don't check for MIN_NPAIRS */
1686 
1687   for (i = 0; i < npairs; i++) {
1688     pairs[i].refquerypos = pairs[i].querypos;
1689     pairs[i].aa_g = pairs[i].aa_e = ' ';
1690   }
1691 
1692   translate_pairs_cdna(&translation_starti,&translation_endi,&(*translation_length),
1693 		       pairs,npairs,queryaaseq_ptr);
1694 
1695   *translation_leftpos = pairs[translation_starti].querypos;
1696   *translation_rightpos = pairs[translation_endi].querypos;
1697 
1698   debug(printf("Translation start = pair #%d (querypos %d)\n",translation_starti,*translation_leftpos));
1699   debug(printf("Translation end = pair #%d (querypos %d)\n",translation_endi,*translation_rightpos));
1700 
1701   /* Take care of genomic side */
1702 
1703   *relaastart = pairs[translation_starti].aapos;
1704   *relaaend = pairs[translation_endi].aapos;
1705 
1706   if (strictp == true) {
1707     mark_genomic_strict(pairs,npairs,translation_starti,translation_endi);
1708   } else {
1709     mark_genomic(pairs,npairs,translation_starti,translation_endi);
1710   }
1711 
1712   return;
1713 }
1714 #else
1715 void
Translation_via_genomic(int * translation_leftpos,int * translation_rightpos,int * translation_length,int * relaastart,int * relaaend,struct Pair_T * pairs,int npairs,bool backwardp,bool revcompp,bool fulllengthp,int cds_startpos,int querylength,bool strictp)1716 Translation_via_genomic (int *translation_leftpos, int *translation_rightpos, int *translation_length,
1717 			 int *relaastart, int *relaaend,
1718 			 struct Pair_T *pairs, int npairs, bool backwardp, bool revcompp, bool fulllengthp,
1719 			 int cds_startpos, int querylength, bool strictp) {
1720   char lastaa;
1721   struct T *translation;
1722   bool endstopp;
1723   int i, j, aapos = 0;
1724   Frame_T translation_frame;
1725   int translation_starti = 0, translation_endi = 0, phase;
1726   int minpos, maxpos;
1727   bool cds_start_p = false;
1728 
1729   if (npairs < MIN_NPAIRS) {
1730     *translation_leftpos = 0;
1731     *translation_rightpos = 0;
1732     *translation_length = 0;
1733     *relaastart = 0;
1734     *relaaend = 0;
1735     return;
1736   }
1737 
1738   if (backwardp == false) {
1739     translation = translate_pairs_forward(pairs,npairs,revcompp);
1740     if (cds_startpos > 0) {
1741       find_bounds_forward_fromstart(&translation_frame,&translation_starti,
1742 				    &translation_endi,&(*translation_length),&endstopp,
1743 				    translation,npairs,cds_startpos);
1744     } else {
1745       find_bounds_forward(&translation_frame,&translation_starti,
1746 			  &translation_endi,&(*translation_length),&endstopp,
1747 			  translation,npairs,fulllengthp);
1748       if (fulllengthp == true && *translation_length == 0) {
1749 	/* fprintf(stderr,"No full length gene found.  Assuming partial length.\n"); */
1750 	find_bounds_forward(&translation_frame,&translation_starti,
1751 			    &translation_endi,&(*translation_length),&endstopp,
1752 			    translation,npairs,/*fulllengthp*/false);
1753       }
1754     }
1755 
1756   } else {
1757     translation = translate_pairs_backward(pairs,npairs,revcompp);
1758     if (cds_startpos > 0) {
1759       find_bounds_backward_fromstart(&translation_frame,&translation_starti,
1760 				     &translation_endi,&(*translation_length),&endstopp,
1761 				     translation,npairs,cds_startpos);
1762     } else {
1763       find_bounds_backward(&translation_frame,&translation_starti,
1764 			   &translation_endi,&(*translation_length),&endstopp,
1765 			   translation,npairs,fulllengthp);
1766       if (fulllengthp == true && *translation_length == 0) {
1767 	/* fprintf(stderr,"No full length gene found.  Assuming partial length.\n"); */
1768 	find_bounds_backward(&translation_frame,&translation_starti,
1769 			     &translation_endi,&(*translation_length),&endstopp,
1770 			     translation,npairs,/*fulllengthp*/false);
1771       }
1772     }
1773   }
1774   /* debug(printf("ref:\n")); */
1775   debug(Translation_dump(pairs,translation,npairs));
1776 
1777   for (i = 0; i < npairs; i++) {
1778     pairs[i].refquerypos = pairs[i].querypos;
1779     pairs[i].aa_g = pairs[i].aa_e = ' ';
1780   }
1781 
1782   if (translation_starti < 0 || translation_endi < 0) {
1783     *translation_leftpos = *translation_rightpos = -1;
1784     *relaastart = *relaaend = -1;
1785   } else {
1786     minpos = pairs[npairs-1].querypos;
1787     maxpos = pairs[0].querypos;
1788     if (backwardp == false) {
1789       /* forward */
1790       debug(printf("Translation is forward pairs %d..%d\n",translation_starti,translation_endi));
1791       for (i = translation_starti; i <= translation_endi; i++) {
1792 	/* Avoid problems with genome position advancing prematurely */
1793 	if (pairs[i].genome != ' ') {
1794 	  if (translation[i].frame == translation_frame) {
1795 	    if ((pairs[i].aa_g = translation[i].aa) != ' ') {
1796 	      if (pairs[i].querypos < minpos) {
1797 		minpos = pairs[i].querypos;
1798 	      }
1799 	      if (pairs[i].querypos > maxpos) {
1800 		maxpos = pairs[i].querypos;
1801 	      }
1802 	      lastaa = pairs[i].aa_g;
1803 	      aapos++;
1804 	      debug4(printf("(4) Assigning aaphase_g of zero to %d\n",i));
1805 	      pairs[i].aaphase_g = 0;
1806 	      cds_start_p = true;
1807 	    }
1808 
1809 	  } else if (cds_start_p == false) {
1810 	    /* Don't assign aaphase_g before cds */
1811 
1812 	  } else if (translation[i].frame != NOFRAME) {
1813 	    if ((phase = translation_frame - translation[i].frame) < 0) {
1814 	      phase += 3;
1815 	    }
1816 	    debug4(printf("(5) Assigning aaphase_g of %d to %d\n",phase,i));
1817 	    pairs[i].aaphase_g = phase;
1818 	  }
1819 	}
1820 	pairs[i].aapos = aapos;
1821       }
1822       *translation_leftpos = minpos;
1823       if ((*translation_rightpos = maxpos + 2) >= querylength) {
1824 	*translation_rightpos = querylength - 1;
1825       }
1826       if (lastaa == '*') {
1827 	*translation_length -= 1;
1828       }
1829 
1830 #if 0
1831       if (i < npairs) {
1832 	*translation_rightpos += 1;
1833 	pairs[i++].aapos = aapos;
1834       }
1835       if (i < npairs) {
1836 	*translation_rightpos += 1;
1837 	pairs[i].aapos = aapos;
1838       }
1839 #endif
1840 
1841       j = i;
1842       while (j < npairs && pairs[j].genome == ' ') {
1843 	j++;
1844       }
1845       if (j < npairs) {
1846 	debug4(printf("(6) Assigning aaphase_g of one to %d\n",j));
1847 	pairs[j++].aaphase_g = 1;
1848       }
1849       while (j < npairs && pairs[j].genome == ' ') {
1850 	j++;
1851       }
1852       if (j < npairs) {
1853 	debug4(printf("(7) Assigning aaphase_g of two to %d\n",j));
1854 	pairs[j].aaphase_g = 2;
1855       }
1856 
1857       /* Fill in aapos to the end */
1858       for ( ; i < npairs; i++) {
1859 	pairs[i].aapos = aapos;
1860       }
1861 
1862     } else {
1863       /* backward */
1864       debug(printf("Translation is backward pairs %d..%d\n",translation_starti,translation_endi));
1865 
1866       for (i = translation_starti; i >= translation_endi; --i) {
1867 	/* Avoid problems with genome position advancing prematurely */
1868 	if (pairs[i].genome != ' ') {
1869 	  if (translation[i].frame == translation_frame) {
1870 	    if ((pairs[i].aa_g = translation[i].aa) != ' ') {
1871 	      if (pairs[i].querypos < minpos) {
1872 		minpos = pairs[i].querypos;
1873 	      }
1874 	      if (pairs[i].querypos > maxpos) {
1875 		maxpos = pairs[i].querypos;
1876 	      }
1877 	      lastaa = pairs[i].aa_g;
1878 	      aapos++;
1879 	      debug4(printf("(8) Assigning aaphase_g of zero to %d\n",i));
1880 	      pairs[i].aaphase_g = 0;
1881 	      cds_start_p = true;
1882 	    }
1883 
1884 	  } else if (cds_start_p == false) {
1885 	    /* Don't assign aaphase_g before cds */
1886 
1887 	  } else if (translation[i].frame != NOFRAME) {
1888 	    if ((phase = translation_frame - translation[i].frame) < 0) {
1889 	      phase += 3;
1890 	    }
1891 	    debug4(printf("(9) Assigning aaphase_g of %d to %d\n",phase,i));
1892 	    pairs[i].aaphase_g = phase;
1893 	  }
1894 	}
1895 	pairs[i].aapos = aapos;
1896       }
1897       if ((*translation_leftpos = minpos - 2) < 0) {
1898 	*translation_leftpos = 0;
1899       }
1900       *translation_rightpos = maxpos;
1901       if (lastaa == '*') {
1902 	*translation_length -= 1;
1903       }
1904 
1905 #if 0
1906       if (i >= 0) {
1907 	*translation_leftpos -= 1;
1908 	pairs[i--].aapos = aapos;
1909       }
1910       if (i >= 0) {
1911 	*translation_leftpos -= 1;
1912 	pairs[i].aapos = aapos;
1913       }
1914 #endif
1915 
1916       j = i;
1917       while (j >= 0 && pairs[j].genome == ' ') {
1918 	j--;
1919       }
1920       if (j >= 0) {
1921 	debug4(printf("(10) Assigning aaphase_g of one to %d\n",j));
1922 	pairs[j--].aaphase_g = 1;
1923       }
1924       while (j >= 0 && pairs[j].genome == ' ') {
1925 	j--;
1926       }
1927       if (j >= 0) {
1928 	debug4(printf("(11) Assigning aaphase_g of two to %d\n",j));
1929 	pairs[j].aaphase_g = 2;
1930       }
1931 
1932       /* Fill in aapos to the end */
1933       for ( ; i >= 0; --i) {
1934 	pairs[i].aapos = aapos;
1935       }
1936     }
1937 
1938     /* Take care of cDNA side */
1939 
1940     *relaastart = pairs[translation_starti].aapos;
1941     *relaaend = pairs[translation_endi].aapos;
1942 
1943     debug(printf("Translation start = pair #%d (querypos %d, aa #%d)\n",
1944 		 translation_starti,*translation_rightpos,*relaaend));
1945     debug(printf("Translation end = pair #%d (querypos %d, aa#%d)\n",
1946 		 translation_endi,*translation_leftpos,*relaastart));
1947     debug(printf("Translation length = %d aa\n",*translation_length));
1948 
1949     if (strictp == true) {
1950       if (revcompp == false) {
1951 	mark_cdna_forward_strict(pairs,npairs,revcompp,translation_starti,translation_endi);
1952       } else {
1953 	mark_cdna_backward_strict(pairs,revcompp,translation_starti,translation_endi);
1954       }
1955     } else {
1956       if (revcompp == false) {
1957 	mark_cdna_forward(pairs,npairs,revcompp,translation_starti,translation_endi);
1958       } else {
1959 	mark_cdna_backward(pairs,revcompp,translation_starti,translation_endi);
1960       }
1961     }
1962   }
1963 
1964   FREE(translation);
1965 
1966   return;
1967 }
1968 #endif
1969 
1970 /* Pairs are always ordered by ascending cDNA position and genomic
1971    position.  For Crick strand matches, the genomic position needs to
1972    be standardized. */
1973 static void
bound_via_reference(int * start,int * end,struct Pair_T * pairs,int npairs,bool watsonp,struct Pair_T * refpairs,int nrefpairs,bool refwatsonp)1974 bound_via_reference (int *start, int *end, struct Pair_T *pairs, int npairs, bool watsonp,
1975 		     struct Pair_T *refpairs, int nrefpairs, bool refwatsonp) {
1976   int i, j, aapos = 0;
1977   int refquerypos, genomepos, refgenomepos;
1978 
1979   debug(Pair_dump_array(refpairs,nrefpairs,false));
1980 
1981   *start = *end = -1;
1982   if (refwatsonp == true && watsonp == true) {
1983     debug1(printf("refwatsonp == true && watsonp == true\n"));
1984     i = 0;
1985     j = 0;
1986     while (i < nrefpairs && j < npairs) {
1987       refquerypos = refpairs[i].querypos;
1988       refgenomepos = refpairs[i].genomepos;
1989       genomepos = pairs[j].genomepos;
1990       debug(printf("Comparing ref %d (%c) with %d\n",refgenomepos,refpairs[i].aa_e,genomepos));
1991       if (pairs[j].genome == ' ') {
1992 	debug(printf("Not incrementing aapos %d\n",aapos));
1993 	pairs[j].refquerypos = refquerypos;
1994 	pairs[j++].aapos = aapos;
1995       } else if (refgenomepos < genomepos) {
1996 	refquerypos = refpairs[i].querypos;
1997 	aapos = refpairs[i++].aapos;
1998       } else if (genomepos < refgenomepos) {
1999 	pairs[j].refquerypos = refquerypos;
2000 	pairs[j++].aapos = aapos;
2001       } else {
2002 	debug(printf("looking at refpairs %d, genomepos %d (%c)\n",
2003 		     i,refgenomepos,refpairs[i].aa_e));
2004 	if (refpairs[i].aa_e != ' ') {
2005 	  if (*start < 0) {
2006 	    *start = j;
2007 	  }
2008 	  *end = j;
2009 	}
2010 	pairs[j].aaphase_g = refpairs[i].aaphase_g;
2011 	refquerypos = refpairs[i].querypos;
2012 	aapos = refpairs[i++].aapos;
2013 	pairs[j].refquerypos = refquerypos;
2014 	pairs[j++].aapos = aapos;
2015       }
2016     }
2017 
2018     /*
2019     if (*end < npairs-1) {
2020       pairs[++*end].aapos = aapos;
2021     }
2022     if (*end < npairs-1) {
2023       pairs[++*end].aapos = aapos;
2024     }
2025     */
2026 
2027   } else if (refwatsonp == true && watsonp == false) {
2028     debug1(printf("refwatsonp == true && watsonp == false\n"));
2029     i = 0;
2030     j = npairs-1;
2031     while (i < nrefpairs && j >= 0) {
2032       refquerypos = refpairs[i].querypos;
2033       refgenomepos = refpairs[i].genomepos;
2034       genomepos = pairs[j].genomepos;
2035       debug(printf("Comparing ref %d (%c) with %d\n",refgenomepos,refpairs[i].aa_e,genomepos));
2036       if (pairs[j].genome == ' ') {
2037 	pairs[j].refquerypos = refquerypos;
2038 	pairs[j--].aapos = aapos;
2039       } else if (refgenomepos < genomepos) {
2040 	refquerypos = refpairs[i].querypos;
2041 	aapos = refpairs[i++].aapos;
2042       } else if (genomepos < refgenomepos) {
2043 	pairs[j].refquerypos = refquerypos;
2044 	pairs[j--].aapos = aapos;
2045       } else {
2046 	if (refpairs[i].aa_e != ' ') {
2047 	  if (*end < 0) {
2048 	    *end = j;
2049 	    /*
2050 	    aapos = refpairs[i].aapos;
2051 	    if (*end < npairs-1) {
2052 	      pairs[++*end].aapos = aapos;
2053 	    }
2054 	    if (*end < npairs-1) {
2055 	      pairs[++*end].aapos = aapos;
2056 	    }
2057 	    */
2058 	  }
2059 	  *start = j;
2060 	}
2061 	pairs[j].aaphase_g = refpairs[i].aaphase_g;
2062 	refquerypos = refpairs[i].querypos;
2063 	aapos = refpairs[i++].aapos;
2064 	pairs[j].refquerypos = refquerypos;
2065 	pairs[j--].aapos = aapos;
2066       }
2067     }
2068 
2069   } else if (refwatsonp == false && watsonp == true) {
2070     debug1(printf("refwatsonp == false && watsonp == true\n"));
2071     i = nrefpairs-1;
2072     j = 0;
2073     while (i >= 0 && j < npairs) {
2074       refquerypos = refpairs[i].querypos;
2075       refgenomepos = refpairs[i].genomepos;
2076       genomepos = pairs[j].genomepos;
2077       debug(printf("Comparing ref %d (%c) with %d\n",refgenomepos,refpairs[i].aa_e,genomepos));
2078       if (pairs[j].genome == ' ') {
2079 	pairs[j].refquerypos = refquerypos;
2080 	pairs[j++].aapos = aapos;
2081       } else if (refgenomepos < genomepos) {
2082 	refquerypos = refpairs[i].querypos;
2083 	aapos = refpairs[i--].aapos;
2084       } else if (genomepos < refgenomepos) {
2085 	pairs[j].refquerypos = refquerypos;
2086 	pairs[j++].aapos = aapos;
2087       } else {
2088 	if (refpairs[i].aa_e != ' ') {
2089 	  if (*start < 0) {
2090 	    *start = j;
2091 	  }
2092 	  *end = j;
2093 	}
2094 	pairs[j].aaphase_g = refpairs[i].aaphase_g;
2095 	refquerypos = refpairs[i].querypos;
2096 	aapos = refpairs[i--].aapos;
2097 	pairs[j].refquerypos = refquerypos;
2098 	pairs[j++].aapos = aapos;
2099       }
2100     }
2101 
2102     /*
2103     if (*end < npairs-1) {
2104       pairs[++*end].aapos = aapos;
2105     }
2106     if (*end < npairs-1) {
2107       pairs[++*end].aapos = aapos;
2108     }
2109     */
2110 
2111   } else {
2112     debug1(printf("refwatsonp == false && watsonp == false\n"));
2113     i = nrefpairs-1;
2114     j = npairs-1;
2115     while (i >= 0 && j >= 0) {
2116       refquerypos = refpairs[i].querypos;
2117       refgenomepos = refpairs[i].genomepos;
2118       genomepos = pairs[j].genomepos;
2119       debug(printf("Comparing ref %d (%c) with %d\n",refgenomepos,refpairs[i].aa_e,genomepos));
2120       if (pairs[j].genome == ' ') {
2121 	pairs[j].refquerypos = refquerypos;
2122 	pairs[j--].aapos = aapos;
2123       } else if (refgenomepos < genomepos) {
2124 	refquerypos = refpairs[i].querypos;
2125 	aapos = refpairs[i--].aapos;
2126       } else if (genomepos < refgenomepos) {
2127 	pairs[j].refquerypos = refquerypos;
2128 	pairs[j--].aapos = aapos;
2129       } else {
2130 	if (refpairs[i].aa_e != ' ') {
2131 	  if (*end < 0) {
2132 	    *end = j;
2133 	    /*
2134 	    aapos = refpairs[i].aapos;
2135 	    if (*end < npairs-1) {
2136 	      pairs[++*end].aapos = aapos;
2137 	    }
2138 	    if (*end < npairs-1) {
2139 	      pairs[++*end].aapos = aapos;
2140 	    }
2141 	    */
2142 	  }
2143 	  *start = j;
2144 	}
2145 	pairs[j].aaphase_g = refpairs[i].aaphase_g;
2146 	refquerypos = refpairs[i].querypos;
2147 	aapos = refpairs[i--].aapos;
2148 	pairs[j].refquerypos = refquerypos;
2149 	pairs[j--].aapos = aapos;
2150       }
2151     }
2152   }
2153 
2154   debug(printf("Bound by reference: start = %d, end = %d\n",*start,*end));
2155   debug(Pair_dump_array(pairs,npairs,false));
2156 
2157   return;
2158 }
2159 
2160 
2161 void
Translation_via_reference(int * relaastart,int * relaaend,struct Pair_T * pairs,int npairs,bool watsonp,bool backwardp,bool revcompp,struct Pair_T * refpairs,int nrefpairs,bool refwatsonp)2162 Translation_via_reference (int *relaastart, int *relaaend,
2163 			   struct Pair_T *pairs, int npairs, bool watsonp, bool backwardp, bool revcompp,
2164 			   struct Pair_T *refpairs, int nrefpairs, bool refwatsonp) {
2165   struct T *translation;
2166   int start, end, i;
2167 
2168   if (npairs < MIN_NPAIRS) {
2169     *relaastart = 0;
2170     *relaaend = 0;
2171     return;
2172   }
2173 
2174   for (i = 0; i < npairs; i++) {
2175     pairs[i].aa_g = pairs[i].aa_e = ' ';
2176   }
2177 
2178   debug2(printf("Translation_via_reference called with backwardp = %d\n",backwardp));
2179   bound_via_reference(&start,&end,pairs,npairs,watsonp,refpairs,nrefpairs,refwatsonp);
2180 
2181   if (start < 0 || end < 0) {
2182     *relaastart = *relaaend = -1;
2183   } else {
2184     *relaastart = pairs[start].aapos;
2185     *relaaend = pairs[end].aapos;
2186 
2187     if (backwardp == false) {
2188       translation = translate_pairs_forward(pairs,npairs,revcompp);
2189     } else {
2190       translation = translate_pairs_backward(pairs,npairs,revcompp);
2191     }
2192 
2193     for (i = 0; i < npairs; i++) {
2194       if (pairs[i].aaphase_g == 0) {
2195 	pairs[i].aa_g = translation[i].aa;
2196       } else {
2197 	pairs[i].aa_g = ' ';
2198       }
2199       pairs[i].aa_e = ' ';
2200     }
2201 
2202     debug2(printf("Bounding pairs #%d..%d to yield amino acid coordinates %d (%c) to %d (%c)\n",
2203 		  start,end,*relaastart,pairs[start].aa_g,*relaaend,pairs[end].aa_g));
2204 
2205     if (backwardp == false) {
2206       mark_cdna_forward(pairs,npairs,revcompp,start,end);
2207     } else {
2208       /* Note that we need to flip start and end here */
2209       mark_cdna_backward(pairs,revcompp,end,start);
2210     }
2211 
2212     debug1(Translation_dump(pairs,translation,npairs));
2213     FREE(translation);
2214 
2215   }
2216 
2217   return;
2218 }
2219 
2220 #if 0
2221 static char
2222 lookup_aa (int aapos, struct Pair_T *refpairs, int nrefpairs) {
2223   int i;
2224 
2225   for (i = 0; i < nrefpairs; i++) {
2226     if (refpairs[i].aapos == aapos && refpairs[i].aaphase_g == 0) {
2227       return refpairs[i].aa_g;
2228     }
2229   }
2230   return 'X';
2231 }
2232 #endif
2233 
2234 static int
next_aapos_fwd(struct Pair_T * pairs,int i,int npairs,int aapos)2235 next_aapos_fwd (struct Pair_T *pairs, int i, int npairs, int aapos) {
2236   struct Pair_T *this;
2237 
2238   this = &(pairs[i]);
2239   while (i < npairs && this->aapos == aapos) {
2240     this++;
2241     i++;
2242   }
2243 
2244   /* Advance to next amino acid on query side, since aapos assures
2245      us we have reached it on genome side */
2246   while (i < npairs && this->aa_e == ' ') {
2247     this++;
2248     i++;
2249   }
2250 
2251   return i;
2252 }
2253 
2254 static int
next_aapos_rev(struct Pair_T * pairs,int i,int aapos)2255 next_aapos_rev (struct Pair_T *pairs, int i, int aapos) {
2256   struct Pair_T *this;
2257 
2258   this = &(pairs[i]);
2259   while (i >= 0 && this->aapos == aapos) {
2260     --this;
2261     --i;
2262   }
2263 
2264   /* Advance to next amino acid on query side, since aapos assures
2265      us we have reached it on genome side */
2266   while (i >= 0 && this->aa_e == ' ') {
2267     --this;
2268     --i;
2269   }
2270 
2271   return i;
2272 }
2273 
2274 
2275 #define MAXMUT 100
2276 
2277 static void
fill_aa_fwd(int * strlen_g,int * strlen_c,int * netchars,char * aa_genomicseg,char * aa_queryseq,char * nt_genomicseg,char * nt_queryseq,struct Pair_T * start,struct Pair_T * end)2278 fill_aa_fwd (int *strlen_g, int *strlen_c, int *netchars, char *aa_genomicseg, char *aa_queryseq,
2279 	     char *nt_genomicseg, char *nt_queryseq, struct Pair_T *start, struct Pair_T *end) {
2280   int k1 = 0, k2 = 0, i1 = 0, i2 = 0;
2281   struct Pair_T *this;
2282 
2283   *netchars = 0;
2284   for (this = start; this <= end && i1 < MAXMUT && k1 < MAXMUT; this++) {
2285     if (this->gapp == false) {
2286       if (this->genome != ' ') {
2287 	nt_genomicseg[i1++] = uppercaseCode[(int) this->genome];
2288       } else {
2289 	*netchars += 1;
2290       }
2291       if (this->aa_g != ' ') {
2292 	aa_genomicseg[k1++] = this->aa_g;
2293       }
2294     }
2295   }
2296 
2297   for (this = start; this <= end && i2 < MAXMUT && k2 < MAXMUT; this++) {
2298     if (this->gapp == false) {
2299       if (this->cdna != ' ') {
2300 	nt_queryseq[i2++] = uppercaseCode[(int) this->cdna];
2301       } else {
2302 	*netchars -= 1;
2303       }
2304       if (this->aa_e != ' ') {
2305 	aa_queryseq[k2++] = this->aa_e;
2306       }
2307     }
2308   }
2309 
2310   if (i1 >= MAXMUT || k1 >= MAXMUT || i2 >= MAXMUT || k2 >= MAXMUT) {
2311     i1 = i2 = k1 = k2 = 0;
2312   }
2313   nt_genomicseg[i1] = '\0';
2314   aa_genomicseg[k1] = '\0';
2315   nt_queryseq[i2] = '\0';
2316   aa_queryseq[k2] = '\0';
2317 
2318   *strlen_g = k1;
2319   *strlen_c = k2;
2320 
2321   return;
2322 }
2323 
2324 static void
fill_aa_rev(int * strlen_g,int * strlen_c,int * netchars,char * aa_genomicseg,char * aa_queryseq,char * nt_genomicseg,char * nt_queryseq,struct Pair_T * start,struct Pair_T * end)2325 fill_aa_rev (int *strlen_g, int *strlen_c, int *netchars, char *aa_genomicseg, char *aa_queryseq,
2326 	     char *nt_genomicseg, char *nt_queryseq, struct Pair_T *start, struct Pair_T *end) {
2327   int k1 = 0, k2 = 0, i1 = 0, i2 = 0;
2328   struct Pair_T *this;
2329 
2330   *netchars = 0;
2331   for (this = end; this >= start && i1 < MAXMUT && k1 < MAXMUT; --this) {
2332     if (this->gapp == false) {
2333       if (this->genome != ' ') {
2334 	nt_genomicseg[i1++] = uppercaseCode[(int) this->genome];
2335       } else {
2336 	*netchars += 1;
2337       }
2338       if (this->aa_g != ' ') {
2339 	aa_genomicseg[k1++] = this->aa_g;
2340       }
2341     }
2342   }
2343 
2344   for (this = end; this >= start && i2 < MAXMUT && k2 < MAXMUT; --this) {
2345     if (this->gapp == false) {
2346       if (this->cdna != ' ') {
2347 	nt_queryseq[i2++] = uppercaseCode[(int) this->cdna];
2348       } else {
2349 	*netchars -= 1;
2350       }
2351       if (this->aa_e != ' ') {
2352 	aa_queryseq[k2++] = this->aa_e;
2353       }
2354     }
2355   }
2356 
2357   if (i1 >= MAXMUT || k1 >= MAXMUT || i2 >= MAXMUT || k2 >= MAXMUT) {
2358     i1 = i2 = k1 = k2 = 0;
2359   }
2360   nt_genomicseg[i1] = '\0';
2361   aa_genomicseg[k1] = '\0';
2362   nt_queryseq[i2] = '\0';
2363   aa_queryseq[k2] = '\0';
2364 
2365   *strlen_g = k1;
2366   *strlen_c = k2;
2367 
2368   return;
2369 }
2370 
2371 
2372 static void
print_mutation(Filestring_T fp,bool * printp,int aapos,int strlen_g,int strlen_c,int refquerypos,char * aa_genomicseg,char * aa_queryseq)2373 print_mutation (Filestring_T fp, bool *printp, int aapos, int strlen_g, int strlen_c, int refquerypos,
2374 		char *aa_genomicseg, char *aa_queryseq) {
2375   bool print_refquerypos_p = true;
2376 #if 0
2377   bool print_nt_p = true;
2378 #endif
2379 
2380   if (strlen_g > strlen_c) {
2381     if (*printp == true) FPRINTF(fp,", "); else *printp = true;
2382     if (aa_genomicseg[0] == aa_queryseq[0]) {
2383       FPRINTF(fp,"del%s%d%s ",&(aa_genomicseg[1]),aapos+1,&(aa_queryseq[1]));
2384       refquerypos += 3;
2385     } else {
2386       FPRINTF(fp,"del%s%d%s ",aa_genomicseg,aapos,aa_queryseq);
2387     }
2388   } else if (strlen_g < strlen_c) {
2389     if (*printp == true) FPRINTF(fp,", "); else *printp = true;
2390     if (strlen_c - strlen_g > 4) {
2391       FPRINTF(fp,"ins%d+%daa ",aapos,strlen_c-strlen_g);
2392 #if 0
2393       print_nt_p = false;
2394 #endif
2395     } else if (aa_genomicseg[0] == aa_queryseq[0]) {
2396       FPRINTF(fp,"ins%s%d%s ",&(aa_genomicseg[1]),aapos,&(aa_queryseq[1]));
2397     } else {
2398       FPRINTF(fp,"ins%s%d%s ",aa_genomicseg,aapos,aa_queryseq);
2399     }
2400   } else if (aa_genomicseg[0] == 'X' || aa_queryseq[0] == 'X') {
2401 #if 0
2402     print_nt_p = false;
2403 #endif
2404     print_refquerypos_p = false;
2405   } else {
2406     if (*printp == true) FPRINTF(fp,", "); else *printp = true;
2407     FPRINTF(fp,"%s%d%s ",aa_genomicseg,aapos,aa_queryseq);
2408   }
2409 #if 0
2410   if (print_nt_p == true) {
2411     FPRINTF(fp,"(%s>%s) ",nt_genomicseg,nt_queryseq);
2412   }
2413 #endif
2414   if (print_refquerypos_p == true) {
2415 #ifdef PMAP
2416     FPRINTF(fp,"[%d]",refquerypos+2);
2417 #else
2418     FPRINTF(fp,"[%d]",refquerypos);
2419 #endif
2420   }
2421 
2422   return;
2423 }
2424 
2425 static void
print_large_deletion(Filestring_T fp,bool * printp,int lastaapos,int nextaapos,int refquerypos)2426 print_large_deletion (Filestring_T fp, bool *printp, int lastaapos, int nextaapos, int refquerypos) {
2427 
2428   if (*printp == true) FPRINTF(fp,", "); else *printp = true;
2429   FPRINTF(fp,"del%d-%daa ",lastaapos+1,nextaapos-lastaapos-1);
2430   FPRINTF(fp,"[%d]",refquerypos+3);
2431 
2432   return;
2433 }
2434 
2435 
2436 void
Translation_print_comparison(Filestring_T fp,struct Pair_T * pairs,int npairs,int relaastart,int relaaend)2437 Translation_print_comparison (Filestring_T fp, struct Pair_T *pairs, int npairs,
2438 			      int relaastart, int relaaend) {
2439   int i, j;
2440   int aapos, strlen_g, strlen_c, netchars;
2441   bool printp = false;
2442   char nt_genomicseg[MAXMUT], aa_genomicseg[MAXMUT], nt_queryseq[MAXMUT], aa_queryseq[MAXMUT];
2443 
2444   FPRINTF(fp,"    Amino acid changes: ");
2445 
2446   if (relaastart < relaaend) {
2447     if ((aapos = pairs[i=0].aapos) == 0) {
2448       i = next_aapos_fwd(pairs,0,npairs,0);
2449     }
2450     while (i < npairs) {
2451       aapos = pairs[i].aapos;
2452       j = next_aapos_fwd(pairs,i,npairs,aapos);
2453       if (pairs[i].aa_g != ' ' && pairs[i].aa_e != ' ') {
2454 	/* not a frameshift */
2455 	fill_aa_fwd(&strlen_g,&strlen_c,&netchars,aa_genomicseg,aa_queryseq,nt_genomicseg,nt_queryseq,
2456 		    &(pairs[i]),&(pairs[j-1]));
2457 	if (strcmp(aa_genomicseg,aa_queryseq) && strcmp(nt_genomicseg,nt_queryseq)) {
2458 	  if (netchars % 3 == 0 || netchars > 12 || netchars < -12) {
2459 	    print_mutation(fp,&printp,aapos,strlen_g,strlen_c,pairs[i].refquerypos,
2460 			   aa_genomicseg,aa_queryseq);
2461 	  }
2462 	} else if (j < npairs && pairs[j].aapos - aapos > 4) {
2463 	  print_large_deletion(fp,&printp,aapos,pairs[j].aapos,pairs[i].refquerypos);
2464 	}
2465       }
2466       i = j;
2467     }
2468 
2469   } else {
2470     if ((aapos = pairs[i=npairs-1].aapos) == 0) {
2471       i = next_aapos_rev(pairs,/*i*/0,/*aapos*/0);
2472     }
2473     while (i >= 0) {
2474       aapos = pairs[i].aapos;
2475       j = next_aapos_rev(pairs,i,aapos);
2476       if (pairs[i].aa_g != ' ' && pairs[i].aa_e != ' ') {
2477 	/* not a frameshift */
2478 	fill_aa_rev(&strlen_g,&strlen_c,&netchars,aa_genomicseg,aa_queryseq,nt_genomicseg,nt_queryseq,
2479 		    &(pairs[i]),&(pairs[j+1]));
2480 	if (strcmp(aa_genomicseg,aa_queryseq) && strcmp(nt_genomicseg,nt_queryseq)) {
2481 	  if (netchars % 3 == 0 || netchars > 12 || netchars < -12) {
2482 	    print_mutation(fp,&printp,aapos,strlen_g,strlen_c,pairs[i].refquerypos,
2483 			   aa_genomicseg,aa_queryseq);
2484 	  }
2485 	} else if (j >= 0 && pairs[j].aapos - aapos > 4) {
2486 	  print_large_deletion(fp,&printp,aapos,pairs[j].aapos,pairs[i].refquerypos);
2487 	}
2488       }
2489       i = j;
2490     }
2491   }
2492 
2493   FPRINTF(fp,"\n");
2494 
2495   return;
2496 }
2497 
2498 
2499