1 /* $Id: dropfz2.c 1280 2014-08-21 00:47:55Z wrp $ */
2 /* $Revision: 1280 $  */
3 
4 /* copyright (c) 1998, 1999, 2014 by William R. Pearson and The Rector &
5    Vistors of the University of Virginia */
6 
7 /* Licensed under the Apache License, Version 2.0 (the "License");
8    you may not use this file except in compliance with the License.
9    You may obtain a copy of the License at
10 
11    http://www.apache.org/licenses/LICENSE-2.0
12 
13    Unless required by applicable law or agreed to in writing,
14    software distributed under this License is distributed on an "AS
15    IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
16    express or implied.  See the License for the specific language
17    governing permissions and limitations under the License.
18 */
19 
20 /* 18-Sept-2006 - removed static global variables for alignment */
21 
22 /* 2002/06/23 finally correctly implement fix to translate 'N' to 'X' */
23 
24 /* 1999/11/29 modification by Z. Zhang to translate DNA 'N' as 'X' */
25 
26 /* implements an improved version of the fasty algorithm, see:
27 
28    W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of
29    DNA sequences with protein sequences" Genomics 46:24-36
30 
31 */
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <math.h>
36 #include <ctype.h>
37 
38 #include "defs.h"
39 #include "param.h"
40 #define XTERNAL
41 #include "upam.h"
42 #include "uascii.h"
43 
44 #define NT_N 16
45 
46 /* globals for fasta */
47 #define MAXWINDOW 64
48 
49 #ifndef MAXSAV
50 #define MAXSAV 10
51 #endif
52 
53 #ifndef ALLOCN0
54 static char *verstr="3.8 June 2014";
55 #else
56 static char *verstr="3.8an0 Jul 2014";
57 #endif
58 
59 struct dstruct		/* diagonal structure for saving current run */
60 {
61    int     score;	/* hash score of current match */
62    int     start;	/* start of current match */
63    int     stop;	/* end of current match */
64    struct savestr *dmax;   /* location in vmax[] where best score data saved */
65 };
66 
67 struct savestr
68 {
69    int     score;		/* pam score with segment optimization */
70    int     score0;		/* pam score of best single segment */
71    int     gscore;		/* score from global match */
72    int     dp;			/* diagonal of match */
73    int     start;		/* start of match in lib seq */
74    int     stop;		/* end of match in lib seq */
75 };
76 
77 struct update_code_str {
78   int p_op_idx;
79   int p_op_cnt;
80   int show_code;
81   int cigar_order;
82   int show_ext;
83   char *op_map;
84 };
85 
86 #ifndef TFAST
87 static char *ori_code = "-x/=\\+*";	/* FASTX */
88 static char *cigar_code = "DXFMRI*";
89 #else
90 static char *ori_code = "+x/=\\-*";	/* TFASTX */
91 static char *cigar_code = "IXFMRD*";
92 #endif
93 
94 static struct update_code_str *
95 init_update_data(int show_code);
96 
97 static void
98 sprintf_code(char *tmp_str, struct update_code_str *, int op_idx, int op_cnt);
99 
100 static void
101 update_code(char *al_str, int al_str_max,
102 	    struct update_code_str *update_data, int op,
103 	    int sim_code, unsigned char sp0, unsigned char sp1);
104 
105 static void
106 close_update_data(char *al_str, int al_str_max,
107 		  struct update_code_str *update_data);
108 
109 void kpsort (struct savestr **v, int n);
110 extern void *init_stack(int, int);
111 extern void push_stack(void *, void *);
112 extern void *pop_stack(void *);
113 extern void *free_stack(void *);
114 
115 #define SGW1 100
116 #define SGW2 300
117 struct smgl_str {
118   int C[SGW1+1][SGW2+1];
119   int st[SGW1+1][SGW2+1];
120   int D[SGW2+7], I[SGW2+1];
121 };
122 
123 struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };
124 
125 struct wgt { int  iii, ii, iv;};
126 struct wgtc {char c2, c3, c4, c5;};
127 
128 typedef struct st_s { int C, I, D;} *st_ptr;
129 
130 struct f_struct {
131   struct dstruct *diag;
132   int frame;
133   int ndo;
134   int noff;
135   int hmask;			/* hash constants */
136   int *pamh1;			/* pam based array */
137   int *pamh2;			/* pam based kfact array */
138   int *link, *harr;		/* hash arrays */
139   int kshft;			/* shift width */
140   int nsav, lowscor;		/* number of saved runs, worst saved run */
141 #ifndef TFAST
142   unsigned char *aa0x, *aa0v;	/* aa0x - 111122223333 */
143 #else
144   unsigned char *aa1x, *aa1v;	/* aa1x - 111122223333 */
145 #endif				/* aa1v - computed codons */
146   struct sx_s *cur;
147   int cur_sp_size;
148   struct wgt **weight0;
149   struct wgt **weight1;
150   struct wgtc **weight_c;
151   int *waa;
152   int *res;
153   int max_res;
154   st_ptr up, down, tp;
155   struct smgl_str smgl_s;
156 };
157 
158 #define DROP_INTERN
159 #include "drop_func.h"
160 
161 static int dmatchz(const unsigned char *aa0, int n0,
162 		   const unsigned char *aa1, int n1,
163 		   const unsigned char *aa1v,
164 		   int hoff, int window,
165 		   int **pam2, int gdelval, int ggapval, int gshift,
166 		   struct f_struct *f_str);
167 
168 int shscore(unsigned char *aa0, int n0, int **pam2);
169 int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
170 extern int ELK_to_s(double E_join, int n0, int n1, double Lambda, double K, double H);
171 
172 int savemax (struct dstruct *, int,
173 	     struct savestr *vmax, struct savestr **lowmax);
174 
175 int spam (const unsigned char *aa0, const unsigned char *aa1,
176 	  struct savestr *dmax, int **pam2,
177 	  struct f_struct *f_str);
178 int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);
179 int lx_band(const unsigned char *prot_seq, int len_prot,
180 	    const unsigned char *dna_prot_seq, int len_dna_prot,
181 	    int **pam_matrix, int gopen, int gext,
182 	    int gshift, int start_diag, int width, struct f_struct *f_str);
183 
184 struct a_res_str *
185 merge_ares_chains(struct a_res_str *cur_ares,
186 		  struct a_res_str *tmpl_ares,
187 		  int score_ix, const char *msg);
188 
189 extern void w_abort (char *p, char *p1);
190 extern void aagetmap(char *to, int n);
191 
192 /* initialize for fasta */
193 /* modified 30-August-1999 by Zheng Zhang to work with an extended alphabet */
194 /* Assume naa=47, and wgts[47][23] matches both upper and lower case
195 amoino acids with another amino acid.  And also assume the DNA letter
196 does not have upper/lower case difference.  If you also allow DNA
197 sequence to be upper/lower case letters, more needs be changed. Not
198 only here, but also in the alignment code, the way that pack a codon
199 into a number between 0-63 need be changed. */
200 
201 /* modified so that if **weightci==NULL, do not fiddle with characters */
202 
203 /* modified 3-Aug-2010 for NCBIstdaa alphabet, which requires MAXUC
204    28, MAXLC 56, so we must have 58, not 47, entries */
205 
206 void
init_weights(struct wgt *** weighti,struct wgtc *** weightci,int ** wgts,int gshift,int gsubs,int naa)207 init_weights(struct wgt ***weighti, struct wgtc ***weightci,
208 	     int **wgts, int gshift, int gsubs, int naa)
209 {
210   int i, j, do_wgtc=0;
211   int aa, b, a, x, y, z;
212   int *wwt, e;
213   struct wgt **weight;
214   struct wgtc **weightc;
215   char aacmap[64];
216   int temp[MAXLC+1][64]; /*change*/
217   char le[MAXLC+1][64];
218 
219   if (naa > MAXLC) {
220     fprintf(stderr,"*** dropfz2.c compilation problem naa(%d) > MAXLX(%d) ***\n",
221 	   naa, MAXLC);
222   }
223 
224   if ((*weighti=(struct wgt **)calloc((size_t)(naa+1),sizeof(struct wgt *)))
225       ==NULL) {
226     fprintf(stderr," cannot allocate weights array: %d\n",naa);
227     exit(1);
228   }
229 
230   weight = *weighti;
231   /* allocate weight[aa 0..MAXLC] */
232   for (aa=0; aa <= naa; aa++) {
233     if ((weight[aa]=(struct wgt *)calloc((size_t)256,sizeof(struct wgt)))
234 	==NULL) {
235       fprintf(stderr," cannot allocate weight[]: %d/%d\n",aa,naa);
236       exit(1);
237     }
238   }
239 
240   /* allocate weightci[aa 0..MAXLC] */
241   if (weightci !=NULL) {
242     if ((*weightci=(struct wgtc **)calloc((size_t)(naa+1),
243 					  sizeof(struct wgtc *)))==NULL) {
244       fprintf(stderr," cannot allocate weight_c array: %d\n",naa);
245       exit(1);
246     }
247     weightc = *weightci;
248 
249     for (aa=0; aa <= naa; aa++) {
250       if ((weightc[aa]=(struct wgtc *)calloc((size_t)256,sizeof(struct wgtc)))
251 	  ==NULL) {
252 	fprintf(stderr," cannot allocate weightc[]: %d/%d\n",aa,naa);
253 	exit(1);
254       }
255     }
256     do_wgtc = 1;
257   }
258   else do_wgtc = 0;
259 
260   aagetmap(aacmap,64);
261 
262   for (aa = 0; aa < naa; aa++) {	/* change*/
263     wwt = wgts[aa];			/* pam matrix */
264     for (i = 0; i < 64; i++) {	/* i iterates through the codons */
265       x = -10000;		/* large negative */
266       y = i;
267       for (j = 0; j < 64; j++) {	/* j iterates through the codons */
268 	z = ((~i & j) | (i & ~j));
269 	b = 0;		/* score = 0 */
270 	if (z % 4) b-= gsubs;
271 	if (z /16) b-= gsubs;
272 	if ((z /4) % 4) b -= gsubs;
273 	b += wwt[aascii[aacmap[j]]];  /* add the match score for char j*/
274 	if (b > x) {
275 	  x = b;		/* x has the score */
276 	  y = j;		/* y has the character  (codon index)*/
277 	}
278       }
279 #ifdef DEBUG
280       if (y < 0 || y > 63) printf("%d %d %d %d ",aa, i, x, y);
281 #endif
282       temp[aa][i] = x;
283       le[aa][i] = y;
284     }
285     /*            printf("\n"); */
286   }
287 
288   for (aa= 0; aa < naa; aa++) {
289       wwt = temp[aa];
290       for (i = 0; i < 256; i++) {
291           for (x=-100,b = 0; b < 4; b++) {
292               z = (i/ (1 << ((b+1)*2)))*(1<<(b*2))+(i%(1<<(b*2)));
293 	      if (x < (e=wwt[z])) {
294 		  x = e;
295 		  if (do_wgtc) weightc[aa][i].c4 = aacmap[le[aa][z]];
296 	      }
297           }
298           weight[aa][i].iv=x-gshift;
299           weight[aa][i].iii = wwt[i%64];
300 
301 	  if (do_wgtc) {
302 	    weightc[aa][i].c5 = aacmap[le[aa][i%64]];
303 	    weightc[aa][i].c3 = aacmap[i%64];
304 	  }
305           x = i %16;
306           for (y = -100, b = 0; b < 3; b++) {
307               z = ((x >> (b*2)) << (b*2+2)) + (x % (1 << (b*2)));
308               for (a = 0; a < 4; a++) {
309 		  if ((e =wwt[z+(a<<(b*2))]) > y) {
310 		      y = e;
311 		      if (do_wgtc)
312 			weightc[aa][i].c2 = aacmap[le[aa][z+(a<<(b*2))]];
313 		  }
314               }
315           }
316           weight[aa][i].ii = y-gshift;
317       }
318   }
319   /*106=CGGG*/
320   for (aa = 0; aa < naa; aa++) {
321     weight[aa][106].iii = wgts[aa][23]; /* is 23 the code for 'X'?*/
322     weight[aa][106].iv = weight[aa][106].ii = weight[aa][106].iii-gshift;
323     if (do_wgtc) {
324       weightc[aa][106].c5 = weightc[aa][106].c4 = weightc[aa][106].c3
325 	= weightc[aa][106].c2 = 'X';
326     }
327   }
328 }
329 
330 void
free_weights(struct wgt *** weighti0,struct wgt *** weighti1,struct wgtc *** weightci,int naa)331 free_weights(struct wgt ***weighti0, struct wgt ***weighti1,
332 	     struct wgtc ***weightci, int naa)
333 {
334   int aa;
335   struct wgt **weight0;
336   struct wgt **weight1;
337   struct wgtc **weightc;
338 
339   weight0 = *weighti0;
340   weight1 = *weighti1;
341   weightc = *weightci;
342 
343   for (aa=0; aa < naa; aa++) {free(weight0[aa]);}
344   for (aa=0; aa < naa; aa++) {free(weight1[aa]);}
345   for (aa=0; aa < naa; aa++) {free(weightc[aa]);}
346 
347   free(weight0);
348   free(weight1);
349   free(weightc);
350 }
351 
352 static void
pre_com(const unsigned char * aa0,int n0,unsigned char * aa0v)353 pre_com(const unsigned char *aa0, int n0, unsigned char *aa0v) {
354   int dnav, i;
355   dnav = (hnt[aa0[0]]<<2) + hnt[aa0[1]];
356   for (i=2; i<n0; i++) {
357     dnav = ((dnav<<2)+hnt[aa0[i]])&255;
358     if (aa0[i] == NT_N  || aa0[i-1]==NT_N || aa0[i-2] == NT_N)  {
359       aa0v[i-2] = 106;
360     }
361     else {
362       if (dnav == 106/*CGGG*/) {dnav = 42/*AGGG*/;}
363       aa0v[i-2]=dnav;
364     }
365   }
366 }
367 
368 static void
pre_com_r(const unsigned char * aa0,int n0,unsigned char * aa0v)369 pre_com_r(const unsigned char *aa0, int n0, unsigned char *aa0v) {
370   int dnav, i, ir;
371   dnav = ((3-hnt[aa0[n0-1]])<<2) + 3-hnt[aa0[n0-2]];
372   for (i=2, ir=n0-3; i<n0; i++,ir--) {
373     dnav = ((dnav<<2)+3-hnt[aa0[ir]])&255;
374     if (aa0[ir] == NT_N || aa0[ir+1]==NT_N || aa0[ir+2] == NT_N)  {
375       aa0v[i-2] = 106;
376     }
377     else {
378       if (dnav == 106) dnav = 42;
379       aa0v[i-2]=dnav;
380     }
381   }
382 }
383 
384 void
init_work(unsigned char * aa0,int n0,struct pstruct * ppst,struct f_struct ** f_arg)385 init_work (unsigned char *aa0, int n0,
386 	   struct pstruct *ppst,
387 	   struct f_struct **f_arg)
388 {
389    int mhv, phv;
390    int hmax;
391    int i0, hv;
392    int pamfact;
393    int btemp;
394    struct f_struct *f_str;
395    struct bdstr *bss;
396    /* these used to be globals, but do not need to be */
397    int ktup, fact, kt1, lkt;
398 
399    int maxn0;
400    int *pwaa;
401    int i, j, q;
402    struct swstr *ss, *r_ss;
403    int *waa;
404    int *res;
405    int nsq, ip, *hsq, naat;
406 #ifndef TFAST
407    int last_n0, itemp, dnav;
408    unsigned char *fd, *fs, *aa0x, *aa0v;
409    int n0x, n0x3;
410 #endif
411 
412    if (nt[NT_N] != 'N') {
413      fprintf(stderr," nt[NT_N] (%d) != 'X' (%c) - recompile\n",NT_N,nt[NT_N]);
414      exit(1);
415    }
416 
417    if (ppst->ext_sq_set) {
418      nsq = ppst->nsqx; ip = 1;
419      hsq = ppst->hsqx;
420    }
421    else {
422      nsq = ppst->nsqx; ip = 0;
423      hsq = ppst->hsq;
424    }
425 
426    f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
427 
428    if (!ppst->param_u.fa.use_E_thresholds) {
429      btemp = 2 * ppst->param_u.fa.bestoff / 3 +
430        n0 / ppst->param_u.fa.bestscale +
431        ppst->param_u.fa.bkfact *
432        (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
433      btemp = min (btemp, ppst->param_u.fa.bestmax);
434      if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
435 
436      ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
437      if (ppst->param_u.fa.optcut_set != 1) {
438 #ifndef TFAST
439        ppst->param_u.fa.optcut = (btemp*5)/4;
440 #else
441        ppst->param_u.fa.optcut = (btemp*4)/3;
442 #endif
443      }
444    }
445 
446 #ifdef OLD_FASTA_GAP
447    ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
448 #else
449    ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
450 #endif
451    pamfact = ppst->param_u.fa.pamfact;
452    ktup = ppst->param_u.fa.ktup;
453    fact = ppst->param_u.fa.scfact * ktup;
454 
455 #ifndef TFAST
456    /* before hashing, we must set up some space and translate the sequence */
457 
458    maxn0 = n0 + 2;
459    if ((aa0x =(unsigned char *)calloc((size_t)maxn0,
460 					     sizeof(unsigned char)))
461        == NULL) {
462      fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);
463      exit (1);
464    }
465    aa0x++;
466    f_str->aa0x = aa0x;
467 
468 
469    if ((aa0v =(unsigned char *)calloc((size_t)maxn0,
470 					     sizeof(unsigned char)))
471        == NULL) {
472      fprintf (stderr, "cannot allocate aa0v array %d\n", maxn0);
473      exit (1);
474    }
475    aa0v++;
476    f_str->aa0v = aa0v;
477 
478    /* make a precomputed codon number series */
479    pre_com(aa0, n0, aa0v);
480 
481    last_n0 = 0;
482    for (itemp=0; itemp<3; itemp++) {
483      n0x=saatran(aa0,&aa0x[last_n0],n0,itemp);
484      /*         for (i=0; i<n0x; i++) {
485 	   fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
486 	   if ((i%60)==59) fprintf(stderr,"\n");
487 	   }
488 	   fprintf(stderr,"\n");
489 	   */
490      last_n0 += n0x+1;
491    }
492 
493    /*     fprintf(stderr,"\n"); */
494    n0x = n0;
495    n0x3 = n0x/3;
496 
497    /* now switch aa0 and aa0x for hashing functions */
498    fs = aa0;
499    aa0 = aa0x;
500    aa0x = fs;
501 #endif
502 
503    /* naat must always be MAXLC because library can have LC aa residues */
504    /*
505    if (ppst->ext_sq_set) naat = MAXLC;
506    else naat = MAXUC;
507    */
508    naat = MAXLC;
509 
510    init_weights(&f_str->weight0, NULL,
511 		ppst->pam2[ip],-ppst->gshift,-ppst->gsubs,naat);
512    init_weights(&f_str->weight1, &f_str->weight_c,
513 		ppst->pam2[0],-ppst->gshift,-ppst->gsubs,naat);
514 
515    if (pamfact == -1)
516       pamfact = 0;
517    else if (pamfact == -2)
518       pamfact = 1;
519 
520    for (i0 = 1, mhv = -1; i0 < ppst->nsq; i0++)
521       if (hsq[i0] < NMAP && hsq[i0] > mhv)
522 	 mhv = ppst->hsq[i0];
523 
524    if (mhv <= 0)
525    {
526       fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
527       exit (1);
528    }
529 
530    for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
531 
532 /*      kshft = 2;	*/
533    kt1 = ktup - 1;
534    hv = 1;
535    for (i0 = 0; i0 < ktup; i0++)
536       hv = hv << f_str->kshft;
537    hmax = hv;
538    f_str->hmask = (hmax >> f_str->kshft) - 1;
539 
540    if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
541      fprintf (stderr, " cannot allocate hash array\n");
542      exit (1);
543    }
544    if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
545      fprintf (stderr, " cannot allocate pamh1 array\n");
546      exit (1);
547    }
548    if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
549      fprintf (stderr, " cannot allocate pamh2 array\n");
550      exit (1);
551    }
552    if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
553      fprintf (stderr, " cannot allocate hash link array");
554      exit (1);
555    }
556 
557    for (i0 = 0; i0 < hmax; i0++)
558       f_str->harr[i0] = -1;
559    for (i0 = 0; i0 < n0; i0++)
560       f_str->link[i0] = -1;
561 
562    /* encode the aa0 array */
563    phv = hv = 0;
564    lkt = kt1;
565    for (i0 = 0; i0 < min(n0,lkt); i0++) {
566      if (hsq[aa0[i0]] >= NMAP) {
567        hv=phv=0; lkt = i0+ktup; continue;
568      }
569      hv = (hv << f_str->kshft) + ppst->hsq[aa0[i0]];
570      phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
571    }
572 
573    for (; i0 < n0; i0++) {
574      if (hsq[aa0[i0]] >= NMAP) {
575        hv=phv=0;
576        /* restart hv, phv calculation */
577        for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
578 	 if (hsq[aa0[i0]] >= NMAP) {
579 	   hv=phv=0;
580 	   lkt = i0+ktup;
581 	   continue;
582 	 }
583 	 hv = (hv << f_str->kshft) + hsq[aa0[i0]];
584 	 phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
585        }
586      }
587      if (i0 >= n0) break;
588      hv = ((hv & f_str->hmask) << f_str->kshft) + ppst->hsq[aa0[i0]];
589      f_str->link[i0] = f_str->harr[hv];
590      f_str->harr[hv] = i0;
591      if (pamfact) {
592        f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
593        if (hsq[aa0[i0-kt1]] < NMAP)
594 	 phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
595      }
596      else f_str->pamh2[hv] = fact * ktup;
597    }
598 
599 /* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
600    pam2[0][0] is now undefined for consistency with blast
601 */
602 
603    if (pamfact)
604       for (i0 = 1; i0 < ppst->nsq; i0++)
605 	 f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
606    else
607       for (i0 = 1; i0 < ppst->nsq; i0++)
608 	 f_str->pamh1[i0] = fact;
609 
610    f_str->ndo = 0;	/* used to save time on diagonals with long queries */
611 
612 
613 #ifndef ALLOCN0
614    if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
615 						 sizeof (struct dstruct)))==NULL) {
616       fprintf (stderr," cannot allocate diagonal arrays: %lu\n",
617 	       MAXDIAG *sizeof (struct dstruct));
618       exit (1);
619      };
620 #else
621    if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
622 					      sizeof (struct dstruct)))==NULL) {
623       fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
624 	      (long)n0*sizeof (struct dstruct));
625       exit (1);
626      };
627 #endif
628 
629 #ifndef TFAST
630    /* done hashing, now switch aa0, aa0x back */
631    fs = aa0;
632    aa0 = aa0x;
633    aa0x = fs;
634 #else
635    if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+4,
636 					     sizeof(unsigned char)))
637        == NULL) {
638      fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+4);
639      exit (1);
640    }
641    f_str->aa1x++;
642 
643    if ((f_str->aa1v =(unsigned char *)calloc((size_t)ppst->maxlen+4,
644 					     sizeof(unsigned char))) == NULL) {
645      fprintf (stderr, "cannot allocate aa1v array %d\n", ppst->maxlen+4);
646      exit (1);
647    }
648    f_str->aa1v++;
649 
650 #endif
651 
652    if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
653      fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
654      exit(1);
655    }
656 
657    pwaa = waa;
658    for (i=0; i<nsq; i++) {
659      for (j=0;j<n0; j++) {
660        *pwaa = ppst->pam2[ip][i][aa0[j]];
661        pwaa++;
662      }
663    }
664    f_str->waa = waa;
665 
666 #ifndef TFAST
667    maxn0 = max(2*n0,MIN_RES);
668 #else
669    maxn0 = max(4*n0,MIN_RES);
670 #endif
671    if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
672      fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
673      exit(1);
674    }
675    f_str->res = res;
676    f_str->max_res = maxn0;
677 
678    *f_arg = f_str;
679 }
680 
681 /* pstring1 is a message to the manager, currently 512 */
682 /* pstring2 is the same information, but in a markx==10 format */
683 void
get_param(const struct pstruct * ppst,char ** pstring1,char * pstring2,struct score_count_s * s_info)684 get_param (const struct pstruct *ppst,
685 	   char **pstring1, char *pstring2,
686 	   struct score_count_s *s_info)
687 {
688   char options_str1[128];
689   char options_str2[128];
690 #ifndef TFAST
691   char *pg_str="FASTY";
692 #else
693   char *pg_str="TFASTY";
694 #endif
695 
696   if (!ppst->param_u.fa.use_E_thresholds) {
697     sprintf(options_str1,"join: %d (%.3g), opt: %d (%.3g)",
698 	    ppst->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
699 	    ppst->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
700     sprintf(options_str2,"pg_join: %d (%.3g)\n; pg_optcut: %d (%.3g)",
701 	    ppst->param_u.fa.cgap, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
702 	    ppst->param_u.fa.optcut, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
703   }
704   else {
705     sprintf(options_str1,"E-join: %.2g (%.3g), E-opt: %.2g (%.3g)",
706 	    ppst->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
707 	    ppst->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
708     sprintf(options_str2,"pg_join_E(): %.2g (%.3g)\n; pg_optcut_E(): %.2g (%.3g)",
709 	    ppst->param_u.fa.E_join, (double)s_info->s_cnt[0]/(double)s_info->tot_scores,
710 	    ppst->param_u.fa.E_band_opt, (double)s_info->s_cnt[2]/(double)s_info->tot_scores);
711   }
712 
713   if (!ppst->param_u.fa.optflag)
714     sprintf (pstring1[0], "%s (%s)",pg_str, verstr);
715   else
716     sprintf (pstring1[0], "%s (%s) [optimized]",pg_str, verstr);
717 
718   sprintf (pstring1[1],
719 #ifdef OLD_FASTA_GAP
720 	   "%s matrix (%d:%d)%s, gap-pen: %3d/%3d, shift: %3d, subs: %3d\n ktup: %d, %s, width: %3d",
721 #else
722 	   "%s matrix (%d:%d)%s, open/ext: %3d/%3d, shift: %3d, subs: %3d\n ktup: %d, %s, width: %3d",
723 #endif
724 	   ppst->pam_name, ppst->pam_h,ppst->pam_l,
725 	   (ppst->ext_sq_set) ? "xS":"\0",
726 	   ppst->gdelval, ppst->ggapval,
727 	   ppst->gshift,ppst->gsubs,
728 	   ppst->param_u.fa.ktup, options_str1, ppst->param_u.fa.optwid);
729 
730   if (ppst->param_u.fa.iniflag) strcat(pstring1[0]," init1");
731 
732   if (pstring2 != NULL) {
733 #ifdef OLD_FASTA_GAP
734     sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n\
735 ; pg_gap-pen: %d %d\n; pg_ktup: %d\n; %s\n",
736 #else
737 	     sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)%s\n\
738 ; pg_open-ext: %d %d\n; pg_ktup: %d\n; %s\n",
739 #endif
740 		      pg_str,verstr,ppst->pam_name, ppst->pam_h,ppst->pam_l,
741 		      (ppst->ext_sq_set) ? "xS":"\0",  ppst->gdelval,
742 		      ppst->ggapval,ppst->param_u.fa.ktup,options_str2);
743 	     }
744   }
745 
746 void
close_work(const unsigned char * aa0,int n0,struct pstruct * ppst,struct f_struct ** f_arg)747 close_work (const unsigned char *aa0, int n0,
748 	    struct pstruct *ppst,
749 	    struct f_struct **f_arg)
750 {
751   struct f_struct *f_str;
752   int naat;
753 
754   f_str = *f_arg;
755 
756   if (f_str != NULL) {
757    if (ppst->ext_sq_set) naat = MAXLC;
758    else naat = MAXUC;
759     free_weights(&f_str->weight0,&f_str->weight1,&f_str->weight_c,naat);
760     free(f_str->cur);
761 #ifndef TFAST
762     f_str->aa0v--;
763     free(f_str->aa0v);
764     f_str->aa0x--;
765     free(f_str->aa0x);
766 #else	/* TFAST */
767     f_str->aa1x--;
768     free(f_str->aa1x);
769     f_str->aa1v--;
770     free(f_str->aa1v);
771 #endif
772     free(f_str->res);
773     free(f_str->waa);
774     free(f_str->diag);
775     free(f_str->link);
776     free(f_str->pamh2);
777     free(f_str->pamh1);
778     free(f_str->harr);
779     free(f_str);
780     *f_arg = NULL;
781   }
782 }
783 
do_fastz(const unsigned char * aa0,int n0,const unsigned char * aa1,int n1,const unsigned char * aa1v,const struct pstruct * ppst,struct f_struct * f_str,struct rstruct * rst,int * hoff,int shuff_flg,struct score_count_s * s_info)784 void do_fastz (const unsigned char *aa0, int n0,
785 	       const unsigned char *aa1, int n1,
786 	       const unsigned char *aa1v,
787 	       const struct pstruct *ppst, struct f_struct *f_str,
788 	       struct rstruct *rst, int *hoff, int shuff_flg,
789 	       struct score_count_s *s_info)
790 {
791    int     nd;		/* diagonal array size */
792    int     lhval;
793    int     kfact;
794    int i;
795    register struct dstruct *dptr;
796    struct savestr vmax[MAXSAV];	/* best matches saved for one sequence */
797    struct savestr *vptr[MAXSAV];
798    struct savestr *lowmax;
799    int lowscor;
800    register int tscor;
801    int xdebug = 0;
802 
803 #ifndef ALLOCN0
804    register struct dstruct *diagp;
805 #else
806    register int dpos;
807    int     lposn0;
808 #endif
809    struct dstruct *dpmax;
810    register int lpos;
811    int     tpos;
812    struct savestr *vmptr;
813    int     scor, tmp;
814    int     im, ib, nsave;
815    int ktup, kt1, ip, lkt, ktup_sq;
816    const int *hsq;
817    int c_gap, opt_cut;
818 #ifndef TFAST
819    int n0x31, n0x32;
820    n0x31 = (n0-2)/3;
821    n0x32 = n0x31+1+(n0-n0x31-1)/2;
822 #else
823    unsigned char *fs, *fd;
824    int n1x31, n1x32, last_n1, itemp;
825    n1x31 = (n1-2)/3;
826    n1x32 = n1x31+1+(n1-n1x31-1)/2;
827 #endif
828 
829   if (ppst->ext_sq_set) {
830     ip = 1;
831     hsq = ppst->hsqx;
832   }
833   else {
834     ip = 0;
835     hsq = ppst->hsq;
836   }
837 
838   ktup = ppst->param_u.fa.ktup;
839   ktup_sq = ktup*ktup;
840   if (ktup == 1) ktup_sq *= 2;
841 
842   kt1 = ktup-1;
843 
844   if (n1 < ktup) {
845     rst->score[0] = rst->score[1] = rst->score[2] = 0;
846     return;
847   }
848 
849   if (n0+n1+1 >= MAXDIAG) {
850     fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
851     rst->score[0] = rst->score[1] = rst->score[2] = -1;
852     return;
853   }
854 
855   if (ppst->param_u.fa.use_E_thresholds) {
856     c_gap = ELK_to_s(ppst->param_u.fa.E_join*ktup_sq*2.5, n0, n1, ppst->pLambda, ppst->pK, ppst->pH);
857     opt_cut = ELK_to_s(ppst->param_u.fa.E_band_opt*ktup_sq*2.0, n0, n1, ppst->pLambda, ppst->pK, ppst->pH);
858     rst->valid_stat = 0;
859   }
860   else {
861     c_gap = ppst->param_u.fa.cgap;
862     opt_cut = ppst->param_u.fa.optcut;
863     rst->valid_stat = 1;
864   }
865 
866   f_str->noff = n0 - 1;
867 
868 #ifdef ALLOCN0
869    nd = n0;
870 #endif
871 
872 #ifndef ALLOCN0
873    nd = n0 + n1;
874 #endif
875 
876    dpmax = &f_str->diag[nd];
877    for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
878    {
879       dptr->stop = -1;
880       dptr->dmax = NULL;
881       dptr++->score = 0;
882    }
883 
884    for (vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++)
885       vmptr->score = 0;
886    lowmax = vmax;
887    lowscor = 0;
888 
889   if (n1 > 1000 && aa1[0]==23 && aa1[100]==23 &&
890       aa1[1400]==23 && aa1[1401]!=23) {
891     xdebug = 1;
892   }
893   else xdebug = 0;
894 
895    /* start hashing */
896    lhval = 0;
897    lkt = kt1;
898    for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos<n1; lpos++) {
899      /* restart lhval calculation */
900      if (hsq[aa1[lpos]]>=NMAP) {
901        lhval = 0; lkt=lpos+ktup;
902        continue;
903      }
904      lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
905    }
906 
907 #ifndef ALLOCN0
908    diagp = &f_str->diag[f_str->noff + lkt];
909    for (; lpos < n1; lpos++, diagp++) {
910      if (hsq[aa1[lpos]]>=NMAP) {
911        lpos++ ; diagp++;
912        while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
913        if (lpos >= n1) break;
914        lhval = 0;
915      }
916      lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
917      for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
918        if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
919 #else
920    lposn0 = f_str->noff + lpos;
921    for (; lpos < n1; lpos++, lposn0++) {
922      if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
923      lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
924      for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
925        dpos = lposn0 - tpos;
926        if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
927 #endif
928 	 tscor += ktup;
929 	 if ((tscor -= lpos) <= 0) {
930 	   scor = dptr->score;
931 	   if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && lowscor < scor) {
932 #ifdef ALLOCN0
933 	     lowscor = savemax (dptr, dpos, vmax, &lowmax);
934 #else
935 	     lowscor = savemax (dptr, dptr- f_str->diag, vmax, &lowmax);
936 #endif
937 	   }
938 	   if ((tscor += scor) >= kfact) {
939 	     dptr->score = tscor;
940 	     dptr->stop = lpos;
941 	   }
942 	   else {
943 	     dptr->score = kfact;
944 	     dptr->start = (dptr->stop = lpos) - kt1;
945 	   }
946 	 }
947 	 else {
948 	   dptr->score += f_str->pamh1[aa0[tpos]];
949 	   dptr->stop = lpos;
950 	 }
951        }
952        else {
953 	 dptr->score = f_str->pamh2[lhval];
954 	 dptr->start = (dptr->stop = lpos) - kt1;
955        }
956      }				/* end tpos */
957 
958 #ifdef ALLOCN0
959       /* reinitialize diag structure */
960    loopl:
961      if ((dptr = &f_str->diag[lpos % nd])->score > lowscor)
962        lowscor = savemax (dptr, lpos, vmax, &lowmax);
963      dptr->stop = -1;
964      dptr->dmax = NULL;
965      dptr->score = 0;
966 #endif
967    }	/* end lpos */
968 
969 #ifdef ALLOCN0
970    for (tpos = 0, dpos = f_str->noff + n1 - 1; tpos < n0; tpos++, dpos--) {
971      if ((dptr = &f_str->diag[dpos % nd])->score > lowscor)
972        lowscor = savemax (dptr, dpos, vmax, &lowmax, f_str);
973    }
974 #else
975    for (dptr = f_str->diag; dptr < dpmax;) {
976      if (dptr->score > lowscor) savemax (dptr, dptr - f_str->diag, vmax, &lowmax);
977      dptr->stop = -1;
978      dptr->dmax = NULL;
979      dptr++->score = 0;
980    }
981    f_str->ndo = nd;
982 #endif
983 
984    for (nsave = 0, vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++) {
985      if (vmptr->score > 0) {
986        vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], f_str);
987        vptr[nsave++] = vmptr;
988      }
989    }
990 
991    if (nsave <= 0) {
992      rst->score[0] = rst->score[1] = rst->score[2] = 0;
993      return;
994    }
995 
996 #ifndef TFAST
997    /* FASTX code here to modify the start, stop points for
998       the three phases of the translated protein sequence
999    */
1000 
1001    for (ib=0; ib<nsave; ib++) {
1002      if (f_str->noff-vptr[ib]->dp+vptr[ib]->start >= n0x32)
1003        vptr[ib]->dp += n0x32;
1004      if (f_str->noff-vptr[ib]->dp +vptr[ib]->start >= n0x31)
1005        vptr[ib]->dp += n0x31;
1006    }
1007 #else
1008    /* TFAST code here to modify the start, stop points for
1009       the three phases of the translated protein sequence
1010       TFAST modifies library start points, rather than
1011       query start points
1012    */
1013 
1014    for (ib=0; ib<nsave; ib++) {
1015      if (vptr[ib]->start >= n1x32) {
1016        vptr[ib]->start -= n1x32;
1017        vptr[ib]->stop -= n1x32;
1018        vptr[ib]->dp -= n1x32;
1019      }
1020      if (vptr[ib]->start >= n1x31) {
1021        vptr[ib]->start -= n1x31;
1022        vptr[ib]->stop -= n1x31;
1023        vptr[ib]->dp -= n1x31;
1024      }
1025    }
1026 #endif /* TFAST */
1027 
1028    scor = sconn (vptr, nsave, c_gap,
1029 		 ppst->param_u.fa.pgap, f_str);
1030 
1031    for (vmptr=vptr[0],ib=1; ib<nsave; ib++)
1032      if (vptr[ib]->score > vmptr->score) vmptr=vptr[ib];
1033 
1034 /*  kssort (vptr, nsave); */
1035 
1036    rst->score[1] = vmptr->score;
1037    rst->score[0] = max (scor, vmptr->score);
1038    rst->score[2] = rst->score[0];		/* initn */
1039 
1040    s_info->tot_scores++;
1041    if (rst->score[0] > c_gap) { s_info->s_cnt[0]++;}
1042 #ifndef TFAST
1043    *hoff=f_str->noff - vmptr->dp;
1044 #else /* TFAST */
1045    *hoff=vmptr->dp-f_str->noff;
1046 #endif /* TFAST */
1047    if (ppst->param_u.fa.optflag) {
1048      if (/* shuff_flg || */ rst->score[0] > opt_cut) {
1049        s_info->s_cnt[2]++;
1050        rst->valid_stat = 1;
1051        rst->score[2] = dmatchz(aa0, n0,aa1,n1, aa1v,
1052 			       *hoff,ppst->param_u.fa.optwid,
1053 			       ppst->pam2[ip],
1054 			       ppst->gdelval,ppst->ggapval,ppst->gshift,
1055 			       f_str);
1056      }
1057    }
1058 }
1059 
1060 void do_work (const unsigned char *aa0, int n0,
1061 	      const unsigned char *aa1, int n1,
1062 	      int frame,
1063 	      const struct pstruct *ppst,
1064 	      struct f_struct *f_str,
1065 	      int qr_flg, int shuff_flg, struct rstruct *rst,
1066 	      struct score_count_s *s_info)
1067 {
1068   int hoff;
1069   int last_n1, itx, dnav, n10, i, ir;
1070   unsigned char *aa1x;
1071 
1072   rst->escore = 1.0;
1073   rst->segnum = rst->seglen = 1;
1074   rst->valid_stat = 0;
1075 
1076   if (n1 < ppst->param_u.fa.ktup) {
1077     rst->score[0] = rst->score[1] = rst->score[2] = 0;
1078     return;
1079   }
1080 
1081 #ifndef TFAST
1082   do_fastz (f_str->aa0x, n0, aa1, n1, f_str->aa0v, ppst, f_str, rst, &hoff, shuff_flg, s_info);
1083 #else
1084    /* make a precomputed codon number series */
1085 
1086   if (frame == 0) {
1087     pre_com(aa1, n1, f_str->aa1v);
1088   }
1089   else {
1090     pre_com_r(aa1, n1, f_str->aa1v);
1091   }
1092 
1093   /* make translated sequence */
1094   last_n1 = 0;
1095   aa1x = f_str->aa1x;
1096 #ifdef DEBUG
1097   if (frame > 1) {
1098     fprintf(stderr, "*** fz_walign - frame: %d - out of range [0,1]\n",frame);
1099   }
1100 #endif
1101 
1102   for (itx= frame*3; itx< frame*3+3; itx++) {
1103     n10  = saatran(aa1,&aa1x[last_n1],n1,itx);
1104     last_n1 += n10+1;
1105   }
1106   n10 = last_n1-1;
1107 
1108   do_fastz (aa0, n0, f_str->aa1x, n10, f_str->aa1v, ppst, f_str, rst, &hoff, shuff_flg, s_info);
1109 #endif
1110 
1111   rst->comp = rst->H = -1.0;
1112 }
1113 
1114 void do_opt (const unsigned char *aa0, int n0,
1115 	     const unsigned char *aa1, int n1,
1116 	     int frame,
1117 	     struct pstruct *ppst,
1118 	     struct f_struct *f_str,
1119 	     struct rstruct *rst)
1120 {
1121   int optflag, tscore, hoff;
1122   int last_n1, itx, n10, i, ir;
1123   unsigned char *aa1x;
1124   struct score_count_s s_info = {0, 0, 0, 0};
1125 
1126   optflag = ppst->param_u.fa.optflag;
1127   ppst->param_u.fa.optflag = 1;
1128 
1129 #ifndef TFAST
1130   do_fastz (f_str->aa0x, n0, aa1, n1, f_str->aa0v, ppst, f_str, rst, &hoff, 0, &s_info);
1131 #else
1132    /* make a precomputed codon number series */
1133 
1134   if (frame == 0) {
1135     pre_com(aa1, n1, f_str->aa1v);
1136   }
1137   else {
1138     pre_com_r(aa1, n1, f_str->aa1v);
1139   }
1140 
1141   /* make translated sequence */
1142   last_n1 = 0;
1143   aa1x = f_str->aa1x;
1144   for (itx= frame*3; itx< frame*3+3; itx++) {
1145     n10  = saatran(aa1,&aa1x[last_n1],n1,itx);
1146     last_n1 += n10+1;
1147   }
1148   n10 = last_n1-1;
1149 
1150   do_fastz (aa0, n0, f_str->aa1x, n10, f_str->aa1v, ppst, f_str, rst, &hoff, 0, &s_info );
1151 #endif
1152 
1153   ppst->param_u.fa.optflag = optflag;
1154 }
1155 
1156 int
1157 savemax (struct dstruct *dptr, int dpos,
1158 	 struct savestr *vmax, struct savestr **lowmax)
1159 {
1160    struct savestr *vmptr;
1161    int i;
1162 
1163 /* check to see if this is the continuation of a run that is already saved */
1164 
1165    if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
1166 	 vmptr->start == dptr->start) {
1167       vmptr->stop = dptr->stop;
1168       if ((i = dptr->score) <= vmptr->score) return (*lowmax)->score;
1169       vmptr->score = i;
1170       if (vmptr != *lowmax) return (*lowmax)->score;
1171    }
1172    else {
1173       i = (*lowmax)->score = dptr->score;
1174       (*lowmax)->dp = dpos;
1175       (*lowmax)->start = dptr->start;
1176       (*lowmax)->stop = dptr->stop;
1177       dptr->dmax = *lowmax;
1178    }
1179 
1180    for (vmptr = vmax; vmptr < vmax+MAXSAV; vmptr++) {
1181      if (vmptr->score < i) {
1182        i = vmptr->score;
1183        *lowmax = vmptr;
1184      }
1185    }
1186    return i;
1187 }
1188 
1189 int spam (const unsigned char *aa0,
1190 	  const unsigned char *aa1,
1191 	  struct savestr *dmax, int **pam2,
1192 	  struct f_struct *f_str)
1193 {
1194    int     lpos;
1195    int     tot, mtot;
1196    struct {
1197      int     start, stop, score;
1198    } curv, maxv;
1199    const unsigned char *aa0p, *aa1p;
1200 
1201    aa1p = &aa1[lpos = dmax->start];
1202    aa0p = &aa0[lpos - dmax->dp + f_str->noff];
1203    curv.start = lpos;
1204 
1205    tot = curv.score = maxv.score = 0;
1206    for (; lpos <= dmax->stop; lpos++) {
1207      tot += pam2[*aa0p++][*aa1p++];
1208      if (tot > curv.score) {
1209        curv.stop = lpos;
1210        curv.score = tot;
1211       }
1212       else if (tot < 0) {
1213 	if (curv.score > maxv.score) {
1214 	  maxv.start = curv.start;
1215 	  maxv.stop = curv.stop;
1216 	  maxv.score = curv.score;
1217 	}
1218 	tot = curv.score = 0;
1219 	curv.start = lpos+1;
1220       }
1221    }
1222 
1223    if (curv.score > maxv.score) {
1224      maxv.start = curv.start;
1225      maxv.stop = curv.stop;
1226      maxv.score = curv.score;
1227    }
1228 
1229 /*	if (maxv.start != dmax->start || maxv.stop != dmax->stop)
1230 		printf(" new region: %3d %3d %3d %3d\n",maxv.start,
1231 			dmax->start,maxv.stop,dmax->stop);
1232 */
1233    dmax->start = maxv.start;
1234    dmax->stop = maxv.stop;
1235 
1236    return maxv.score;
1237 }
1238 
1239 #define XFACT 10
1240 
1241 int sconn (struct savestr **v, int n,
1242        int cgap, int pgap, struct f_struct *f_str)
1243 {
1244   int     i, si;
1245   struct slink {
1246     int     score;
1247     struct savestr *vp;
1248     struct slink *next;
1249   }      *start, *sl, *sj, *so, sarr[MAXSAV];
1250   int     lstart, tstart, plstop, ptstop;
1251 
1252 /*	sort the score left to right in lib pos */
1253 
1254    kpsort (v, n);
1255 
1256    start = NULL;
1257 
1258 /*	for the remaining runs, see if they fit */
1259 
1260    for (i = 0, si = 0; i < n; i++)
1261    {
1262 
1263 /*	if the score is less than the gap penalty, it never helps */
1264       if (v[i]->score < cgap)
1265 	 continue;
1266       lstart = v[i]->start;
1267       tstart = lstart - v[i]->dp + f_str->noff;
1268 
1269 /*	put the run in the group */
1270       sarr[si].vp = v[i];
1271       sarr[si].score = v[i]->score;
1272       sarr[si].next = NULL;
1273 
1274 /* 	if it fits, then increase the score */
1275       for (sl = start; sl != NULL; sl = sl->next)
1276       {
1277 	 plstop = sl->vp->stop;
1278 	 ptstop = plstop - sl->vp->dp + f_str->noff;
1279 	 if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {
1280 	   sarr[si].score = sl->score + v[i]->score + pgap;
1281 	   break;
1282 	 }
1283       }
1284 
1285 /*	now recalculate where the score fits */
1286       if (start == NULL)
1287 	 start = &sarr[si];
1288       else
1289 	 for (sj = start, so = NULL; sj != NULL; sj = sj->next)
1290 	 {
1291 	    if (sarr[si].score > sj->score)
1292 	    {
1293 	       sarr[si].next = sj;
1294 	       if (so != NULL)
1295 		  so->next = &sarr[si];
1296 	       else
1297 		  start = &sarr[si];
1298 	       break;
1299 	    }
1300 	    so = sj;
1301 	 }
1302       si++;
1303    }
1304 
1305    if (start != NULL)
1306       return (start->score);
1307    else
1308       return (0);
1309 }
1310 
1311 void
1312 kssort (v, n)
1313 struct savestr *v[];
1314 int     n;
1315 {
1316    int     gap, i, j;
1317    struct savestr *tmp;
1318 
1319    for (gap = n / 2; gap > 0; gap /= 2)
1320       for (i = gap; i < n; i++)
1321 	 for (j = i - gap; j >= 0; j -= gap)
1322 	 {
1323 	    if (v[j]->score >= v[j + gap]->score)
1324 	       break;
1325 	    tmp = v[j];
1326 	    v[j] = v[j + gap];
1327 	    v[j + gap] = tmp;
1328 	 }
1329 }
1330 
1331 void
1332 kpsort (struct savestr **v, int n) {
1333   int gap, i, j, k;
1334   int incs[4] = { 21, 7, 3, 1 };
1335   struct savestr *tmp;
1336   int v_start;
1337 
1338   for ( k = 0; k < 4; k++) {
1339     gap = incs[k];
1340     for (i = gap; i < n; i++) {
1341       tmp = v[i];
1342       j = i;
1343       v_start = v[i]->start;
1344       while (j >= gap && v[j - gap]->start > v_start) {
1345 	v[j] = v[j - gap];
1346 	j -= gap;
1347       }
1348       v[j] = tmp;
1349     }
1350   }
1351 }
1352 
1353 static int
1354 dmatchz(const unsigned char *aa0, int n0,
1355 	const unsigned char *aa1, int n1,
1356 	const unsigned char *aa1v,
1357 	int hoff, int window,
1358 	int **pam2, int gdelval, int ggapval, int gshift,
1359 	struct f_struct *f_str)
1360 {
1361 
1362    hoff -= window/2;
1363 
1364 #ifndef TFAST
1365    return lx_band(aa1,n1,f_str->aa0v,n0-2,
1366 		  pam2,
1367 #ifdef OLD_FASTA_GAP
1368 		  -(gdelval - ggapval),
1369 #else
1370 		  -gdelval,
1371 #endif
1372 		  -ggapval,-gshift,
1373 		  hoff,window,f_str);
1374 #else
1375    return lx_band(aa0,n0,aa1v,n1-2,
1376 		  pam2,
1377 #ifdef OLD_FASTA_GAP
1378 		  -(gdelval - ggapval),
1379 #else
1380 		  -gdelval,
1381 #endif
1382 		  -ggapval,-gshift,
1383 		  hoff,window,f_str);
1384 #endif
1385 }
1386 
1387 static void
1388 init_row(struct sx_s *row, int sp) {
1389   int i;
1390   for (i = 0; i < sp; i++) {
1391       row[i].C1 = row[i].I1 = 0;
1392       row[i].C2 = row[i].I2 = 0;
1393       row[i].C3 = row[i].I3 = 0;
1394       row[i].flag = 0;
1395   }
1396 }
1397 
1398 int lx_band(const unsigned char *prot_seq,  /* array with protein sequence numbers*/
1399 	    int len_prot,    /* length of prot. seq */
1400 	    const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/
1401 	    int len_dna_prot,   /* length trans. seq. */
1402 	    int **pam_matrix,   /* scoring matrix */
1403 	    int gopen, int gext, /* gap open, gap extend penalties */
1404 	    int gshift,         /* frame-shift penalty */
1405 	    int start_diag,     /* start diagonal of band */
1406 	    int width,         /* width for band alignment */
1407 	    struct f_struct *f_str)
1408 {
1409   void *ckalloc();
1410   int i, j, bd, bd1, x1, x2, sp, p1=0, p2=0, end_prot;
1411   struct sx_s *last, *tmp;
1412   int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;
1413   const unsigned char *dp;
1414   register struct sx_s *ap, *aq;
1415   struct wgt *wt, *ww;
1416   int aa, b, a,x,y,z;
1417 
1418   sp = width+7;
1419   gg = gopen+gext;
1420   /* sp = sp/3+1; */
1421 
1422   if (f_str->cur == NULL ) {
1423     f_str->cur_sp_size = sp;
1424     f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
1425   }
1426   else if (f_str->cur_sp_size != sp) {
1427     free(f_str->cur);
1428     f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
1429     f_str->cur_sp_size = sp;
1430   }
1431 
1432   init_row(f_str->cur, sp);
1433 
1434   /*
1435   if (start_diag %3 !=0) start_diag = start_diag/3-1;
1436   else start_diag = start_diag/3;
1437   if (width % 3 != 0) width = width/3+1;
1438   else width = width /3;
1439   */
1440 
1441   x1 = start_diag; 		/* x1 = lower bound of DNA */
1442   x2 = 1;               /* the amount of position shift from last row*/
1443 
1444   end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;
1445   end_prot = min(end_prot,len_prot);
1446 
1447   /* i counts through protein sequence, x1 through DNAp */
1448 
1449   for (i = max(0, -width-start_diag), x1+=i; i < len_prot; i++, x1++) {
1450       bd = min(x1+width, (len_dna_prot+2)/3);	/* upper bound of band */
1451       bd1 = max(0,x1);	                /* lower bound of band */
1452       wt = f_str->weight0[prot_seq[i]];
1453       del = 1-x1;   /*adjustment*/
1454       bd += del;
1455       bd1 +=del;
1456 
1457       ap = &f_str->cur[bd1]; aq = ap+1;
1458       e1 = f_str->cur[bd1-1].C3; e2 = ap->C1; cd1 = cd2= cd3= 0;
1459       for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {
1460 	  ww = &wt[(unsigned char) *dp++];
1461 	  sc = max(max(e1+ww->iv, (e3=ap->C2)+ww->ii), e2+ww->iii);
1462 	  if (cd1 > sc) sc = cd1;
1463 	  cd1 -= gext;
1464 	  if ((ci = aq->I1) > 0) {
1465 	      if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}
1466 	      else {
1467 		  ap->C1 = sc;
1468 		  sc -= gg;
1469 		  if (sc > 0) {
1470 		      if (sc > best) best =sc;
1471 		      if (cd1 < sc) cd1 = sc;
1472 		      ap->I1 = max(ci-gext, sc);
1473 		  } else ap->I1 = ci-gext;
1474 	      }
1475 	  } else {
1476 	      if (sc <= 0) {
1477 		  ap->I1 = ap->C1 = 0;
1478 	      } else {
1479 		  ap->C1 = sc; sc-=gg;
1480 		  if (sc >0) {
1481 		      if (sc > best) best =sc;
1482 		      if (cd1 < sc) cd1 = sc;
1483 		      ap->I1 = sc;
1484 		  } else ap->I1 = 0;
1485 	      }
1486 	  }
1487 	  ww = &wt[(unsigned char) *dp++];
1488 	  sc = max(max(e2+ww->iv, (e1=ap->C3)+ww->ii), e3+ww->iii);
1489 	  if (cd2 > sc) sc = cd2;
1490 	  cd2 -= gext;
1491 	  if ((ci = aq->I2) > 0) {
1492 	      if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}
1493 	      else {
1494 		  ap->C2 = sc;
1495 		  sc -= gg;
1496 		  if (sc > 0) {
1497 		      if (sc > best) best =sc;
1498 		      if (cd2 < sc) cd2 = sc;
1499 		      ap->I2 = max(ci-gext, sc);
1500 		  }
1501 	      }
1502 	  } else {
1503 	      if (sc <= 0) {
1504 		  ap->I2 = ap->C2 = 0;
1505 	      } else {
1506 		  ap->C2 = sc; sc-=gg;
1507 		  if (sc >0) {
1508 		      if (sc > best) best =sc;
1509 		      if (cd2 < sc) cd2 = sc;
1510 		      ap->I2 = sc;
1511 		  } else ap->I2 = 0;
1512 	      }
1513 	  }
1514 	  ww = &wt[(unsigned char)*dp++];
1515 	  sc = max(max(e3+ww->iv, (e2=aq->C1)+ww->ii), e1+ww->iii);
1516 	  if (cd3 > sc) sc = cd3;
1517 	  cd3 -= gext;
1518 	  if ((ci = aq++->I3) > 0) {
1519 	      if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}
1520 	      else {
1521 		  ap->C3 = sc;
1522 		  sc -= gg;
1523 		  if (sc > 0) {
1524 		      if (sc > best) best =sc;
1525 		      if (cd3 < sc) cd3 = sc;
1526 		      ap->I3 = max(ci-gext, sc);
1527 		  }
1528 	      }
1529 	  } else {
1530 	      if (sc <= 0) {
1531 		  ap->I3 = ap->C3 = 0;
1532 	      } else {
1533 		  ap->C3 = sc; sc-=gg;
1534 		  if (sc >0) {
1535 		      if (sc > best) best =sc;
1536 		      if (cd3 < sc) cd3 = sc;
1537 		      ap->I3 = sc;
1538 		  } else ap->I3 = 0;
1539 	      }
1540 	  }
1541       }
1542   }
1543   /*  printf("The best score is %d\n", best); */
1544   return best+gg;
1545 }
1546 
1547 /* ckalloc - allocate space; check for success */
1548 void *ckalloc(size_t amount)
1549 {
1550 	void *p;
1551 
1552 	if ((p = (void *)malloc( (size_t)amount)) == NULL)
1553 		w_abort("Ran out of memory.","");
1554 	return(p);
1555 }
1556 
1557 /* calculate the 100% identical score */
1558 int
1559 shscore(unsigned char *aa0, int n0, int **pam2)
1560 {
1561   int i, sum;
1562   for (i=0,sum=0; i<n0; i++)
1563     sum += pam2[aa0[i]][aa0[i]];
1564   return sum;
1565 }
1566 
1567 #define WIDTH 60
1568 
1569 typedef struct mat *match_ptr;
1570 
1571 typedef struct mat {
1572 	int i, j, l;
1573 	match_ptr next;
1574 } match_node;
1575 
1576 typedef struct { int i,j;} state;
1577 typedef state *state_ptr;
1578 
1579 
1580 void *ckalloc();
1581 static match_ptr small_global(), global();
1582 static int local_align(), find_best();
1583 static void init_row2(), init_ROW();
1584 
1585 int
1586 pro_dna(const unsigned char *prot_seq,  /* array with prot. seq. numbers*/
1587 	int len_prot,    /* length of prot. seq */
1588 	const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/
1589 	int len_dna_prot,   /* length trans. seq. */
1590 	int **pam_matrix,   /* scoring matrix */
1591 	int gopen, int gext, /* gap open, gap extend penalties */
1592 	int gshift,         /* frame-shift penalty */
1593 	struct f_struct *f_str,
1594 	int max_res,
1595 	struct a_res_str *a_res) /* alignment info */
1596 {
1597   match_ptr align, ap, aq;
1598   int x, y, ex, ey, i, score;
1599   int *alignment;
1600 
1601   f_str->up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
1602   f_str->down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
1603   f_str->tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
1604 
1605   /*local alignment find the best local alignment x and y
1606     is the starting position of the best local alignment
1607     and ex ey is the ending position */
1608 
1609   score= local_align(&x, &y, &ex, &ey,
1610 		     pam_matrix, gopen, gext,
1611 		     dna_prot_seq, len_dna_prot,
1612 		     prot_seq, len_prot, f_str);
1613 
1614   f_str->up += 3; f_str->down += 3; f_str->tp += 3;
1615 
1616   /* x, y - start in prot, dna_prot */
1617   a_res->min0 = x;	/* prot */
1618   a_res->min1 = y;	/* DNA */
1619   a_res->max0 = ex;	/* prot */
1620   a_res->max1 = ey;	/* DNA */
1621 
1622   align = global(x, y, ex, ey,
1623 		 pam_matrix, gopen, gext,
1624 		 dna_prot_seq, prot_seq,
1625 		 0, 0, f_str);
1626 
1627   alignment = a_res->res;
1628 
1629   for (ap = align, i= 0; ap; i++) {
1630     if (i < max_res) alignment[i] = ap->l;
1631     aq = ap->next; free(ap); ap = aq;
1632   }
1633   if (i >= max_res)
1634     fprintf(stderr,"***alignment truncated: %d/%d***\n", max_res,i);
1635 
1636   /*   up = &up[-3]; down = &down[-3]; tp = &tp[-3]; */
1637   free(&f_str->up[-3]); free(&f_str->tp[-3]); free(&f_str->down[-3]);
1638 
1639   a_res->nres = i;
1640   return score;
1641 }
1642 
1643 static void
1644 swap(void **a, void **b)
1645 {
1646     void *t = *a;
1647     *a = *b;   *b = t;
1648 }
1649 
1650 /*
1651    local alignment find the best local alignment x and y
1652    is the starting position of the best local alignment
1653    and ex ey is the ending position
1654 */
1655 static int
1656 local_align(int *x, int *y, int *ex, int *ey,
1657 	    int **wgts, int gop, int gext,
1658 	    const unsigned char *dnap, int ld,
1659 	    const unsigned char *pro, int lp,
1660 	    struct f_struct *f_str)
1661 {
1662   int i, j,  score, x1,x2,x3,x4, e1 = 0, e2 = 0, e3,
1663     sc, del,  e, best = 0,  cd, ci, c;
1664   struct wgt *wt, *ww;
1665   state_ptr cur_st, last_st, cur_i_st;
1666   st_ptr cur, last;
1667   const unsigned char *dp;
1668   int *cur_d_st, *st_up;
1669 
1670   /*
1671 	  Array rowiC stores the best scores of alignment ending at a position
1672 	  Arrays rowiD and rowiI store the best scores of alignment ending
1673 	  at a position with a deletion or insrtion
1674 	  Arrays sti stores the starting position of the best alignment whose
1675 	  score stored in the corresponding row array.
1676 	  The program stores two rows to complete the computation, same is
1677 	  for the global alignment routine.
1678   */
1679 
1680 
1681   st_up = (int *) ckalloc(sizeof(int)*(ld+10));
1682   init_row2(st_up, ld+5);
1683 
1684   ld += 2;
1685 
1686   init_ROW(f_str->up, ld+1);
1687   init_ROW(f_str->down, ld+1);
1688   cur = f_str->up+1;
1689   last = f_str->down+1;
1690 
1691   cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
1692   last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
1693   cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
1694   cur_d_st = st_up;
1695   dp = dnap-2;
1696   for (i = 0; i < lp; i++) {
1697     wt = f_str->weight1[pro[i]];  e2 =0; e1 = last[0].C;
1698     for (j = 0; j < 2; j++) {
1699       cur_st[j].i = i+1;
1700       cur_st[j].j = j+1;
1701     }
1702     for (j = 2; j < ld; j++) {
1703       ww = &wt[(unsigned char) dp[j]];
1704       del = -1;
1705       if (j >= 3) {
1706 	sc = 0;
1707 	e3 = e2; e2 = e1;
1708 	e1 = last[j-2].C;
1709 	if ((e=e2+ww->iii) > sc) {sc = e; del = 3;}
1710 	if ((e=e1+ww->ii) > sc) {sc = e; del = 2;}
1711 	if ((e = e3+ww->iv) > sc) {sc = e; del = 4;}
1712       } else {
1713 	sc = e2  = 0;
1714 	if (ww->iii > 0) {sc = ww->iii; del = 3;}
1715       }
1716       if (sc < (ci=last[j].I)) {
1717 	sc = ci; del = 0;
1718       }
1719       if (sc < (cd=cur[j].D)) {
1720 	sc = cd; del = 5;
1721       }
1722       cur[j].C = sc;
1723       e = sc  - gop;
1724       if (e > cd) {
1725 	cur[j+3].D = e-gext;
1726 	cur_d_st[j+3] = 3;
1727       } else {
1728 	cur[j+3].D = cd-gext;
1729 	cur_d_st[j+3] = cur_d_st[j]+3;
1730       }
1731       switch(del) {
1732       case 5:
1733 	c = cur_d_st[j];
1734 	cur_st[j].i = cur_st[j-c].i;
1735 	cur_st[j].j = cur_st[j-c].j;
1736 	break;
1737       case 0:
1738 	cur_st[j].i = cur_i_st[j].i;
1739 	cur_st[j].j = cur_i_st[j].j;
1740 	break;
1741       case 2:
1742       case 3:
1743       case 4:
1744 	if (i) {
1745 	  if (j-del >= 0) {
1746 	    cur_st[j].i = last_st[j-del].i;
1747 	    cur_st[j].j = last_st[j-del].j;
1748 	  } else {
1749 	    cur_st[j].i = i;
1750 	    cur_st[j].j = 0;
1751 	  }
1752 	} else {
1753 	  cur_st[j].i = 0;
1754 	  cur_st[j].j = max(0, j-del+1);
1755 	}
1756 	break;
1757       case -1:
1758 	cur_st[j].i = i+1;
1759 	cur_st[j].j = j+1;
1760 	break;
1761       }
1762       if (e > ci) {
1763 	cur[j].I  = e -gext;
1764 	cur_i_st[j].i = cur_st[j].i;
1765 	cur_i_st[j].j = cur_st[j].j;
1766       } else {
1767 	cur[j].I  = ci- gext;
1768       }
1769       if (sc > best) {
1770 	x1 = cur_st[j].i;
1771 	x2 = cur_st[j].j;
1772 	best =sc;
1773 	x3 = i;
1774 	x4 = j;
1775       }
1776     }
1777     swap((void *)&last, (void *)&cur);
1778     swap((void *)&cur_st, (void *)&last_st);
1779   }
1780   /*	printf("The best score is %d\n", best);*/
1781   *x = x1; *y = x2; *ex = x3; *ey = x4;
1782   free(cur_st); free(last_st); free(cur_i_st);
1783   free(st_up);
1784   return best;
1785 }
1786 
1787 /*
1788    Both global_up and global_down do linear space score only global
1789    alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].
1790    global_up do the algorithm upwards, from row x towards row y.
1791    global_down do the algorithm downwards, from row y towards x.
1792 */
1793 
1794 static void
1795 global_up(st_ptr *row1, st_ptr *row2,
1796 	  int x, int y, int ex, int ey,
1797 	  int **wgts, int gop, int gext,
1798 	  unsigned char *dnap, unsigned char *pro,
1799 	  int N, struct f_struct *f_str)
1800 {
1801   int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score;
1802   struct wgt *wt, *ww;
1803   st_ptr cur, last;
1804 
1805   cur = *row1; last = *row2;
1806   sc = -gop;
1807   for (j = 0; j <= ey-y+1; j++) {
1808     if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}
1809     else { last[j].I = last[j].C = -10000;}
1810   }
1811   last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;
1812   last[0].D = last[1].D = last[2].D = -10000;
1813   if (N) last[0].I = -gext;
1814   for (i = 1; i <= ex-x+1; i++) {
1815     wt = f_str->weight1[pro[i+x-1]]; e1 = -10000; e2 = last[0].C;
1816     for (j = 0; j <= ey-y+1; j++) {
1817       t = j+y;
1818       sc = -10000;
1819       ww = &wt[(unsigned char) dnap[t-3]];
1820       if (j < 4) {
1821 	if (j == 3) {
1822 	  sc = e2+ww->iii;
1823 	} else if (j == 2) {
1824 	  sc = e2 + ww->ii;
1825 	}
1826       } else {
1827 	e3 = e2; e2 = e1;
1828 	e1 = last[j-2].C;
1829 	sc = max(e2+ww->iii, max(e1+ww->ii, e3+ww->iv));
1830       }
1831       sc = max(sc, max(ci=last[j].I, cd = cur[j].D));
1832       cur[j].C = sc;
1833       cur[j+3].D = max(cd, sc-gop)-gext;
1834       cur[j].I = max(ci, sc-gop)-gext;
1835     }
1836     swap((void *)&last, (void *)&cur);
1837   }
1838   /*printf("global up score =%d\n", last[ey-y+1].C);*/
1839   for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
1840   if (*row1 != last) swap((void *)row1, (void *)row2);
1841 }
1842 
1843 static void
1844 global_down(st_ptr *row1, st_ptr *row2,
1845 	    int x, int y, int ex, int ey,
1846 	    int **wgts, int gop, int gext,
1847 	    unsigned char *dnap, unsigned char *pro,
1848 	    int N, struct f_struct *f_str)
1849 {
1850   int i, j, k, sc, del, *tmp, e,  t, e1,e2,e3, ci,cd, score;
1851   struct wgt *wt, *w1, *w2, *w3;
1852   st_ptr cur, last;
1853 
1854   cur = (*row1); last = *row2;
1855   sc = -gop;
1856   for (j = ey-y+1; j >= 0; j--) {
1857     if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}
1858     else  last[j].I =  last[j].C = -10000;
1859     cur[j].I = -10000;
1860   }
1861   last[ey-y+1].C = 0;
1862   if (N) last[ey-y+1].I = -gext;
1863   cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;
1864   last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;
1865   for (i = ex-x; i >= 0; i--) {
1866     wt = f_str->weight1[pro[i+x]]; e2 = last[ey-y+1].C;
1867     e1 = -10000;
1868     w3 = &wt[(unsigned char) dnap[ey]];
1869     w2 = &wt[(unsigned char) dnap[ey-1]];
1870     for (j = ey-y+1; j >= 0; j--) {
1871       t = j+y;
1872       w1 = &wt[(unsigned char) dnap[t-1]];
1873       sc = -10000;
1874       if (t+3 > ey) {
1875 	if (t+2 == ey) {
1876 	  sc = e2+w2->iii;
1877 	} else if (t+1 == ey) {
1878 	  sc = e2+w1->ii;
1879 	}
1880       } else {
1881 	e3 = e2; e2 = e1;
1882 	e1 = last[j+2].C;
1883 	sc = max(e2+w2->iii, max(e1+w1->ii,e3+w3->iv)) ;
1884       }
1885       if (sc < (cd= cur[j].D)) {
1886 	sc = cd;
1887 	cur[j-3].D = cd-gext;
1888       } else cur[j-3].D =max(cd, sc-gop)-gext;
1889       if (sc < (ci= last[j].I)) {
1890 	sc = ci;
1891 	cur[j].I = ci - gext;
1892       } else cur[j].I = max(sc-gop,ci)-gext;
1893       cur[j].C = sc;
1894       w3 = w2; w2 = w1;
1895     }
1896     swap((void *)&last, (void *)&cur);
1897   }
1898   for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
1899   if (*row1 != last) swap((void *)row1, (void *)row2);
1900 }
1901 
1902 static void
1903 init_row2(int *row, int ld) {
1904   int i;
1905   for (i = 0; i < ld; i++) row[i] = 0;
1906 }
1907 
1908 static void init_ROW(st_ptr row, int ld) {
1909   int i;
1910   for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;
1911 }
1912 
1913 static match_ptr
1914 combine(match_ptr x1, match_ptr x2, int st) {
1915   match_ptr x;
1916 
1917   if (x1 == NULL) return x2;
1918   for (x = x1; x->next; x = x->next);
1919   x->next = x2;
1920   if (st) {
1921     for (x = x2; x; x = x->next) {
1922       x->j++;
1923       if (x->l == 3 || x->l == 4) break;
1924     }
1925     x->l--;
1926   }
1927   return x1;
1928 }
1929 
1930 /*
1931    global use the two upwards and downwards score only linear
1932    space global alignment subroutine to recursively build the
1933    alignment.
1934 */
1935 
1936 match_ptr
1937 global(int x, int y, int ex, int ey,
1938        int **wgts, int gop, int gext,
1939        unsigned char *dnap, unsigned char *pro, int N1, int N2,
1940        struct f_struct *f_str)
1941 {
1942   int m;
1943   int m1, m2;
1944   match_ptr x1, x2, mm1, mm2;
1945 
1946   /*printf("%d %d %d %d %d %d\n", x,y, ex, ey, N1, N2);*/
1947   /*
1948     if the space required is limited, we can do a quadratic space
1949     algorithm to find the alignment.
1950   */
1951 
1952   if (ex <= x) {
1953     mm1  = NULL;
1954     for (m = y+3; m <= ey; m+=3) {
1955       x1 = (match_ptr) ckalloc(sizeof(match_node));
1956       x1->l = 5; x1->next = mm1;
1957       if (mm1== NULL) mm2 = x1;
1958       mm1 = x1;
1959     }
1960     if (ex == x) {
1961       if ((ey-y) % 3 != 0) {
1962 	x1  = (match_ptr) ckalloc(sizeof(match_node));
1963 	x1->l = ((ey-y) % 3) +1; x1->next = NULL;
1964 	if (mm1) mm2->next = x1; else mm1 = x1;
1965       } else mm2->l = 4;
1966     }
1967     return mm1;
1968   }
1969   if (ey <= y) {
1970     mm1  = NULL;
1971     for (m = x; m <= ex; m++) {
1972       x1 = (match_ptr) ckalloc(sizeof(match_node));
1973       x1->l = 0; x1->next = mm1; mm1 = x1;
1974     }
1975     return mm1;
1976   }
1977   if (ex -x < SGW1 && ey-y < SGW2)
1978     return small_global(x,y,ex,ey,wgts, gop, gext,  dnap, pro, N1, N2,f_str);
1979   m = (x+ex)/2;
1980   /*
1981 	 Do the score only global alignment from row x to row m, m is
1982 	 the middle row of x and ex. Store the information of row m in
1983 	 upC, upD, and upI.
1984   */
1985   global_up(&f_str->up, &f_str->tp,  x, y, m, ey,
1986 	    wgts, gop, gext,
1987 	    dnap, pro, N1, f_str);
1988   /*
1989      Do the score only global alignment downwards from row ex
1990      to row m+1, store information of row m+1 in downC downI and downD
1991   */
1992   global_down(&f_str->down, &f_str->tp, m+1, y, ex, ey,
1993 	      wgts, gop, gext,
1994 	      dnap, pro, N2, f_str);
1995 
1996   /*
1997     Use this information for row m and m+1 to find the crossing
1998     point of the best alignment with the middle row. The crossing
1999     point is given by m1 and m2. Then we recursively call global
2000     itself to compute alignments in two smaller regions found by
2001     the crossing point and combine the two alignments to form a
2002     whole alignment. Return that alignment.
2003   */
2004   if (find_best(f_str->up, f_str->down, &m1, &m2, ey-y+1, y, gop)) {
2005     x1 = global(x, y, m, m1, wgts, gop, gext, dnap, pro, N1, 0, f_str);
2006     x2 = global(m+1, m2, ex, ey, wgts, gop, gext, dnap, pro, 0, N2, f_str);
2007     if (m1 == m2) x1 = combine(x1,x2,1);
2008     else x1 = combine(x1, x2,0);
2009   } else {
2010     x1 = global(x, y, m-1, m1, wgts, gop, gext, dnap, pro, N1, 1, f_str);
2011     x2 = global(m+2, m2, ex, ey, wgts, gop, gext, dnap, pro, 1, N2, f_str);
2012     mm1 = (match_ptr) ckalloc(sizeof(match_node));
2013     mm1->i = m; mm1->l = 0; mm1->j = m1;
2014     mm2 = (match_ptr) ckalloc(sizeof(match_node));
2015     mm2->i = m+1; mm2->l = 0; mm2->j = m1;
2016     mm1->next = mm2; mm2->next = x2;
2017     x1 = combine(x1, mm1, 0);
2018   }
2019   return x1;
2020 }
2021 
2022 static int
2023 find_best(st_ptr up, st_ptr down, int *m1, int *m2, int ld, int y, int gop) {
2024 
2025   int i, best = -1000, j = 0, s1, s2, s3, s4, st;
2026 
2027   for (i = 1; i < ld; i++) {
2028     s2 = up[i].C + down[i].C;
2029     s4 = up[i].I + down[i].I + gop;
2030     if (best < s2) {
2031       best = s2; j = i; st = 1;
2032     }
2033     if (best < s4) {
2034       best = s4; j = i; st = 0;
2035     }
2036   }
2037   *m1 = j-1+y;
2038   *m2 = j+y;
2039   /*printf("score=%d\n", best);*/
2040   return st;
2041 }
2042 
2043 /*
2044    An alignment is represented as a linked list whose element
2045    is of type match_node. Each element represent an edge in the
2046    path of the alignment graph. The fields of match_node are
2047    l ---  gives the type of the edge.
2048    i, j --- give the end position.
2049 */
2050 
2051 static match_ptr
2052 small_global(int x, int y, int ex, int ey,
2053 	     int **wgts, int gop, int gext,
2054 	     unsigned char *dnap, unsigned char *pro,
2055 	     int N1, int N2, struct f_struct *f_str) {
2056 
2057   /* int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1]; */
2058   int i, j, e, sc, score, del, k, t,  ci, cd;
2059   int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;
2060   match_ptr mp, first;
2061   struct wgt *wt, *ww;
2062 
2063   /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/
2064   sc = -gop-gext; f_str->smgl_s.C[0][0] = 0;
2065 
2066   cI = f_str->smgl_s.I;
2067   if (N1) cI[0] = -gext; else cI[0] = sc;
2068 
2069   for (j = 1; j <= ey-y+1; j++) {
2070     if (j % 3== 0) {
2071       f_str->smgl_s.C[0][j] = sc;
2072       sc -= gext;
2073       cI[j] = sc-gop;
2074     }
2075     else {
2076       cI[j] = f_str->smgl_s.C[0][j] = -10000;
2077     }
2078     f_str->smgl_s.st[0][j] = 5;
2079   }
2080 
2081   lC = &f_str->smgl_s.C[0][0];
2082   cD = f_str->smgl_s.D; cD[0] = cD[1] = cD[2] = -10000;
2083   for (i = 1; i <= ex-x+1; i++) {
2084     cC = &f_str->smgl_s.C[i][0];
2085     wt = f_str->weight1[pro[i+x-1]]; cst = &f_str->smgl_s.st[i][0];
2086     for (j = 0; j <=ey-y+1; j++) {
2087       ci = cI[j];
2088       cd= cD[j];
2089       t = j+y;
2090       ww = &wt[(unsigned char) dnap[t-3]];
2091       if (j >= 4) {
2092 	sc = lC[j-3]+ww->iii; e2 = lC[j-2]+ww->ii;
2093 	e4 = lC[j-4]+ww->iv; del = 3;
2094 	if (e2 > sc) { sc = e2; del = 2;}
2095 	if (e4 >= sc) { sc = e4; del = 4;}
2096       } else {
2097 	if (j == 3) {
2098 	  sc = lC[0]+ww->iii; del =3;
2099 	} else if (j == 2) {
2100 	  sc = lC[0]+ww->ii; del = 2;
2101 	} else {sc = -10000; del = 0;}
2102       }
2103       if (sc < ci) {
2104 	sc = ci; del = 0;
2105       }
2106       if (sc <= cd) {
2107 	sc = cd;
2108 	del = 5;
2109       }
2110       cC[j] = sc;
2111       sc -= gop;
2112       if (sc <= cd) {
2113 	del += 10;
2114 	cD[j+3] = cd - gext;
2115       } else cD[j+3] = sc -gext;
2116       if (sc < ci) {
2117 	del += 20;
2118 	cI[j] = ci-gext;
2119       } else cI[j] = sc-gext;
2120       *(cst++) = del;
2121     }
2122     lC = cC;
2123   }
2124   /*printf("small global score =%d\n", f_str->smgl_s.C[ex-x+1][ey-y+1]);*/
2125   if (N2 && cC[ey-y+1] <  ci+gop) f_str->smgl_s.st[ex-x+1][ey-y+1] =0;
2126   first = NULL; e = 1;
2127   for (i = ex+1, j = ey+1; i > x || j > y; i--) {
2128     mp = (match_ptr) ckalloc(sizeof(match_node));
2129     mp->i = i-1;
2130     k  = (t=f_str->smgl_s.st[i-x][j-y])%10;
2131     mp->j = j-1;
2132     if (e == 5 && (t/10)%2 == 1) k = 5;
2133     if (e == 0 && (t/20)== 1) k = 0;
2134     if (k == 5) { j -= 3; i++; e=5;}
2135     else {j -= k;if (k==0) e= 0; else e = 1;}
2136     mp->l = k;
2137     mp->next = first;
2138     first = mp;
2139   }
2140 
2141   /*	for (i = 0; i <= ex-x; i++) {
2142 	for (j = 0; j <= ey-y; j++)
2143 	printf("%d ", C[i][j]);
2144 	printf("\n");
2145 	}
2146   */
2147   return first;
2148 }
2149 
2150 #define XTERNAL
2151 #include "upam.h"
2152 
2153 void
2154 display_alig(int *a, unsigned char *dna, unsigned char *pro,
2155 	     int length, int ld, struct f_struct *f_str)
2156 {
2157   int len = 0, i, j, x, y, lines, k, iaa;
2158   static char line1[100], line2[100], line3[100],
2159     tmp[10] = "         ", *st;
2160   char *dna1, c1, c2, c3;
2161 
2162   line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-3;
2163 
2164   printf("\n%5d\n%5d", y+3, x);
2165   for (len = 0, j = 2, lines = 0; j < length; j++) {
2166     i = a[j];
2167     line3[len] = ' ';
2168     switch (i) {
2169     case 3:
2170       y += 3;
2171       line2[len] = NCBIstdaa[iaa=pro[x++]];
2172       line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c5;
2173       if (line1[len] != f_str->weight_c[iaa][(unsigned char) dna[y]].c3)
2174 	line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
2175       break;
2176     case 2:
2177       y += 2;
2178       line1[len] = '\\';
2179       line2[len++] = ' ';
2180       line2[len] = NCBIstdaa[iaa=pro[x++]];
2181       line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c2;
2182       line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
2183       break;
2184     case 4:
2185       y += 4;
2186       line1[len] = '/';
2187       line2[len++] = ' ';
2188       line2[len] = NCBIstdaa[iaa=pro[x++]];
2189       line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c4;
2190       line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
2191       break;
2192     case 5:
2193       y += 3;
2194       line1[len] = f_str->weight_c[0][(unsigned char) dna[y]].c3;
2195       line2[len] = '-';
2196       break;
2197     case 0:
2198       line1[len] = '-';
2199       line2[len] = NCBIstdaa[pro[x++]];
2200       break;
2201     }
2202     len++;
2203     line1[len] = line2[len]  = line3[len]  = '\0';
2204     if (len >= WIDTH) {
2205       for (k = 10; k <= WIDTH; k+=10)
2206 	printf("    .    :");
2207       if (k-5 < WIDTH) printf("    .");
2208       c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
2209       line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
2210       printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
2211       line1[WIDTH] = c1; line2[WIDTH] = c2;
2212       strncpy(line1, &line1[WIDTH], sizeof(line1)-1);
2213       strncpy(line2, &line2[WIDTH], sizeof(line2)-1);
2214       strncpy(line3, &line3[WIDTH], sizeof(line3)-1);
2215       len = len - WIDTH;
2216       printf("\n%5d\n%5d", y+3, x);
2217     }
2218   }
2219   for (k = 10; k < len; k+=10)
2220     printf("    .    :");
2221   if (k-5 < len) printf("    .");
2222   printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
2223 }
2224 
2225 
2226 /* alignment store the operation that align the protein and dna sequence.
2227    The code of the number in the array is as follows:
2228    0:     delete of an amino acid.
2229    2:     frame shift, 2 nucleotides match with an amino acid
2230    3:     match an  amino acid with a codon
2231    4:     the other type of frame shift
2232    5:     delete of a codon
2233 
2234 
2235    Also the first two element of the array stores the starting point
2236    in the protein and dna sequences in the local alignment.
2237 
2238    Display looks like where WIDTH is assumed to be divisible by 10.
2239 
2240     0    .    :    .    :    .    :    .    :    .    :    .    :
2241      AACE/N\PLK\G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ
2242           I S   G S  V F   N R Q L A     G S  V F   N R Q L A
2243      AACE P P-- G HK Y TWA A C E P P---- G HK Y TWA A C E P P----
2244 
2245    60    .    :    .    :    .    :    .    :    .    :    .    :
2246      /G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LW
2247       G S  V F   N R Q L A     G S  V F   N R Q L A     G S  V F
2248       G HK Y TWA A C E P P---- G HK Y TWA A C E P P---- G HK Y TW
2249 
2250 For frame shift, the middle row show the letter in the original sequence,
2251 and the letter in the top row is the amino acid that is chose by the
2252 alignment (translated codon chosen from 4 nucleotides, or 2+1).
2253 */
2254 
2255 /* fatal - print message and die */
2256 void
2257 fatal(msg)
2258      char *msg;
2259 {
2260   fprintf(stderr, "%s\n", msg);
2261   exit(1);
2262 }
2263 
2264 /* 10-Feb-2010 - fz_walign modified to ensure that the final alignment
2265    overlaps the initial lz_band() region.  In earlier versions, the
2266    final alignment (using pam2p[0]) might have been outside the band
2267    region */
2268 
2269 void
2270 fz_walign (const unsigned char *aa0, int n0,
2271 	   const unsigned char *aa1, int n1,
2272 	   int frame, int max_res,
2273 	   struct pstruct *ppst,
2274 	   struct f_struct *f_str,
2275 	   struct a_res_str *a_res,
2276 	   int score_thresh)
2277 {
2278   int score;
2279   int i, last_n1, itemp, n10;
2280   int hoff, nt_min, nt_max, n_nt, n_aa, w_fact;
2281   int l_min, l_max, window;
2282   unsigned char *fs, *fd;
2283   /*
2284   unsigned char *aa1_min_s, aa1_max_s;
2285   */
2286   unsigned char *local_aa1;
2287   int optflag_s;
2288   int itx;
2289   unsigned char *aa1x;
2290   struct score_count_s s_info = {0,0,0,0};;
2291 
2292 #ifndef TFAST
2293   do_fastz (f_str->aa0x, n0, aa1, n1, f_str->aa0v, ppst, f_str, &a_res->rst, &hoff, 1, &s_info);
2294 #else
2295   /* make translated sequence */
2296   last_n1 = 0;
2297   aa1x = f_str->aa1x;
2298   for (itx= frame*3; itx< frame*3+3; itx++) {
2299     n10  = saatran(aa1,&aa1x[last_n1],n1,itx);
2300     last_n1 += n10+1;
2301   }
2302   n10 = last_n1-1;
2303 
2304   /* do_fastz (lz_band) also needs a pre-computed number series */
2305   if (frame == 0) {
2306     pre_com(aa1, n1, f_str->aa1v);
2307   }
2308   else {
2309     pre_com_r(aa1, n1, f_str->aa1v);
2310   }
2311 
2312   do_fastz (aa0, n0, f_str->aa1x, n10, f_str->aa1v, ppst, f_str, &a_res->rst, &hoff, 1, &s_info);
2313 #endif
2314 
2315   if (a_res->rst.score[ppst->score_ix] < score_thresh) {
2316     a_res->sw_score = 0;
2317     a_res->n1 = n1;
2318     return;
2319   }
2320 
2321 #ifndef TFAST
2322   window = min(n1, ppst->param_u.fa.optwid);
2323   l_min = max(0, -window - hoff);
2324   l_max = min(n1, n0-hoff+window);
2325   local_aa1 = (unsigned char *)aa1;
2326   if (l_min > 0 || l_max < n1 -1 ) {
2327     local_aa1 = (unsigned char *)calloc(l_max - l_min + 2,sizeof(unsigned char));
2328     local_aa1++;
2329     memcpy(local_aa1,aa1+l_min, l_max - l_min);
2330   }
2331 
2332   /*
2333   if (l_min > 0) {
2334     aa1_min_s = aa1[l_min-1];
2335     aa1[l_min-1] = '\0';
2336   }
2337   if (l_max < n1 - 1) {
2338     aa1_max_s = aa1[l_max];
2339     aa1[l_max] = '\0';
2340   }
2341   */
2342 #else
2343   window = min(n0, ppst->param_u.fa.optwid);
2344   if (frame==0) {
2345     l_min = max(0,(hoff-window)*3);
2346     l_max = min((hoff+window+n0)*3,n1);
2347 
2348     local_aa1 = (unsigned char *)aa1;
2349     if (l_min > 0 || l_max < n1 -1 ) {
2350       local_aa1 = (unsigned char *)calloc(l_max - l_min + 2,sizeof(unsigned char));
2351       local_aa1++;
2352       memcpy(local_aa1,aa1+l_min, l_max - l_min);
2353     }
2354 
2355     /*
2356     if (l_min > 0) {
2357       aa1_min_s = aa1[l_min-1];
2358       aa1[l_min-1] = '\0';
2359     }
2360     if (l_max < n1-1) {
2361       aa1_max_s = aa1[l_max];
2362       aa1[l_max] = '\0';
2363     }
2364     */
2365     /* re-do precomputed codon number series for limited region */
2366     pre_com(local_aa1, l_max - l_min, f_str->aa1v);
2367   }
2368   else {
2369     /* things are more complicated here because the mapping of hoff is
2370        with respect to the reversed aa1 */
2371 
2372     l_max = n1 - max(0,(hoff-window)*3);
2373     l_min = n1 - min((hoff+window+n0)*3,n1);
2374 
2375     local_aa1 = (unsigned char *)aa1;
2376     if (l_min > 0 || l_max < n1 -1 ) {
2377       local_aa1 = (unsigned char *)calloc(l_max - l_min + 2,sizeof(unsigned char));
2378       local_aa1++;
2379       memcpy(local_aa1,aa1+l_min, l_max - l_min);
2380     }
2381 
2382     /*
2383     if (l_min > 0) {
2384       aa1_min_s = aa1[l_min-1];
2385       aa1[l_min-1] = '\0';
2386     }
2387     if (l_max < n1-1) {
2388       aa1_max_s = aa1[l_max];
2389       aa1[l_max] = '\0';
2390     }
2391     */
2392 
2393     pre_com_r(local_aa1, l_max - l_min, f_str->aa1v);
2394   }
2395 #endif
2396 
2397   a_res->sw_score =
2398     pro_dna(
2399 #ifndef TFAST
2400 	    aa1+l_min, l_max - l_min,
2401 	    f_str->aa0v, n0-2,
2402 #else
2403 	    aa0, n0,
2404 	    f_str->aa1v, l_max - l_min-2,
2405 #endif
2406 	    ppst->pam2[0],
2407 	    -ppst->gdelval, -ppst->ggapval, -ppst->gshift,
2408 	    f_str, f_str->max_res, a_res);
2409 
2410   if (l_min > 0 || l_max < n1 - 1) { free(--local_aa1); }
2411   /*
2412   if (l_min > 0) {
2413     aa1[l_min-1] = aa1_min_s;
2414   }
2415   if (l_max < n1 - 1) {
2416     aa1[l_max] = aa1_max_s;
2417   }
2418   */
2419 #ifndef TFAST
2420   a_res->min0 += l_min;
2421   a_res->max0 += l_min;
2422 #else
2423   if (frame==1) {
2424     a_res->min1 += n1 - l_max;
2425     a_res->max1 += n1 - l_max;
2426   }
2427   else {
2428     a_res->min1 += l_min;
2429     a_res->max1 += l_min;
2430   }
2431 #endif
2432 
2433   /* display_alig(f_str->res,f_str->aa0v+2,aa1,*nres,n0-2,f_str); */
2434 }
2435 
2436 /*
2437   fz_malign is a recursive interface to fz_walign() that is called
2438   from do_walign(). fz_malign() first does an alignment, then checks
2439   to see if the score is greater than the threshold. If so, it tries
2440   doing a left and right alignment.
2441 
2442   In this implementation, the DNA sequence is preserved as DNA for
2443   TFAST, so that it can be sub-setted and translated correctly.  Thus,
2444   the translation required for f_str->aa1x and f_str->aa1v is done at
2445   each recursive level (in fz_walign).
2446  */
2447 
2448 struct a_res_str *
2449 fz_malign (const unsigned char *aa0, int n0,
2450 	   const unsigned char *aa1, int n1,
2451 	   int frame,
2452 	   int score_thresh, int max_res,
2453 	   struct pstruct *ppst,
2454 	   struct f_struct *f_str,
2455 	   struct a_res_str *cur_ares,
2456 	   int first_align)
2457 {
2458   struct a_res_str *tmpl_ares, *tmpr_ares, *this_ares;
2459   struct a_res_str *mtmpl_ares, *mtmpr_ares, *mt_next;
2460   int sq_start, sq_end, sq_save;
2461   int hoff, score_ix;
2462   int min_alen;
2463   struct rstruct rst;
2464   unsigned char *local_aa1;
2465   /*   char save_res; */
2466   int iphase, i;
2467   unsigned char *fd;
2468   int max_sub_score = -1;
2469 
2470   score_ix = ppst->score_ix;
2471 
2472 #ifdef TFAST
2473   min_alen = min(n0,MIN_LOCAL_LEN)*3;	/* n0 in aa, min_alen in nt */
2474 #else
2475   min_alen = min(n0/3,MIN_LOCAL_LEN);	/* no in nt, min_alen in aa */
2476 #endif
2477 
2478   /* now we need alignment storage - get it */
2479   if ((cur_ares->res = (int *)calloc((size_t)max_res,sizeof(int)))==NULL) {
2480     fprintf(stderr," *** cannot allocate alignment results array %d\n",max_res);
2481     exit(1);
2482   }
2483 
2484   cur_ares->next = NULL;
2485 
2486   fz_walign(aa0, n0, aa1, n1, frame, max_res, ppst, f_str, cur_ares, (first_align ? 1 : score_thresh));
2487 
2488   /* in cur_ares, min0,max0 are always protein, min1,max1 are always
2489      DNA, but n0 could be protein or DNA, depending on
2490      FASTY/TFASTY */
2491 
2492   if (!ppst->do_rep || cur_ares->rst.score[ppst->score_ix] < score_thresh) {
2493     return cur_ares;
2494   }
2495 
2496   /* have a score >= threshold - try left and right */
2497 
2498   /* in code below, cur_ares->min0/max0 always refers to aa
2499      cur_ares->min1/max1 always refers to nt
2500 
2501      however, things are more complex because if frame==1, then
2502      offsets are from the end (n1), not the beginning.  There is no
2503      frame==1 for fasty, only for TFASTY
2504   */
2505   cur_ares->v_start = sq_start = 0;
2506 #ifdef TFAST
2507   if (frame == 0) {sq_end = cur_ares->min1-1;}   /* aa1[sq_start --> sq_end] */
2508   else {sq_end = n1 - cur_ares->max1;}
2509   sq_save = sq_end;
2510 #else
2511   sq_save = sq_end = cur_ares->min0;
2512 #endif
2513   cur_ares->v_len = sq_end - sq_start;
2514 
2515   if (cur_ares->v_len >= min_alen) { /* try the left  */
2516       /* allocate a_res */
2517       tmpl_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
2518       local_aa1 = (unsigned char *)calloc(cur_ares->v_len+2,sizeof(unsigned char));
2519       local_aa1++;
2520       memcpy(local_aa1, aa1, cur_ares->v_len);
2521 
2522       /*
2523       save_res = aa1[sq_save];
2524       aa1[sq_save] = '\0';
2525       */
2526       tmpl_ares = fz_malign(aa0, n0, local_aa1, cur_ares->v_len,
2527 			    frame, score_thresh, max_res,
2528 			    ppst, f_str, tmpl_ares,0);
2529 
2530       free(--local_aa1);
2531       /*       aa1[sq_save] = save_res; */
2532 
2533       if (tmpl_ares->rst.score[ppst->score_ix] > score_thresh) {
2534 	max_sub_score = tmpl_ares->rst.score[ppst->score_ix];
2535 #ifdef TFAST
2536 	if (frame == 1) {
2537 	  for (this_ares = tmpl_ares; this_ares; this_ares = this_ares->next) {
2538 	    this_ares->v_start += n1 - sq_end;
2539 	    this_ares->min1 += n1 - sq_end;
2540 	    this_ares->max1 += n1 - sq_end;
2541 	  }
2542 	}
2543 #endif
2544       }
2545       else {
2546 	if (tmpl_ares->res) free(tmpl_ares->res);
2547 	free(tmpl_ares);
2548 	tmpl_ares=NULL;
2549       }
2550     }
2551     else {tmpl_ares = NULL;}
2552 
2553   /* now the right end */
2554   /* for fasty -- max positions refer to the aa,codon, not the next
2555      residue, so they must be incremented */
2556 
2557     sq_end = n1;
2558 #if TFAST
2559     if (frame == 0) {sq_start = cur_ares->max1+1;}
2560     else {sq_start = n1 - cur_ares->min1;}
2561 #else
2562     sq_start = cur_ares->max0+1;
2563 #endif
2564     sq_save = sq_start-1;
2565     cur_ares->v_len = sq_end - sq_start;
2566 
2567   if (cur_ares->v_len >= min_alen) { /* try the right  */
2568       /* allocate a_res */
2569       tmpr_ares = (struct a_res_str *)calloc(1, sizeof(struct a_res_str));
2570 
2571       /* find boundaries */
2572       local_aa1 = (unsigned char *)calloc(cur_ares->v_len+2,sizeof(unsigned char));
2573       local_aa1++;
2574       memcpy(local_aa1,aa1+sq_start,cur_ares->v_len);
2575       /*
2576       save_res = aa1[sq_save];
2577       aa1[sq_save] = '\0';
2578       */
2579 
2580       tmpr_ares = fz_malign(aa0, n0,
2581 			    local_aa1, cur_ares->v_len,
2582 			    frame,
2583 			    score_thresh, max_res,
2584 			    ppst, f_str, tmpr_ares,0);
2585       free(--local_aa1);
2586       /*
2587       aa1[sq_save] = save_res;
2588       */
2589 
2590       if (tmpr_ares->rst.score[ppst->score_ix] > score_thresh) {
2591 	/* adjust the left boundary */
2592 	for (this_ares = tmpr_ares; this_ares; this_ares = this_ares->next) {
2593 #ifndef TFAST
2594 	  this_ares->min0 += sq_start;
2595 	  this_ares->max0 += sq_start;
2596 #else
2597 	  if (frame == 0) {
2598 	    this_ares->v_start += sq_start;
2599 	    this_ares->min1 += sq_start;
2600 	    this_ares->max1 += sq_start;
2601 	  }
2602 #endif
2603 	}
2604 	if (tmpr_ares->rst.score[ppst->score_ix] > max_sub_score) {
2605 	  max_sub_score = tmpr_ares->rst.score[ppst->score_ix];
2606 	}
2607       }
2608       else {
2609 	if (tmpr_ares->res) free(tmpr_ares->res);
2610 	free(tmpr_ares);
2611 	tmpr_ares=NULL;
2612       }
2613     }
2614     else {tmpr_ares = NULL;}
2615 
2616     if (max_sub_score < score_thresh) return cur_ares;
2617 
2618     cur_ares = merge_ares_chains(cur_ares, tmpl_ares, score_ix, "left");
2619     cur_ares = merge_ares_chains(cur_ares, tmpr_ares, score_ix, "right");
2620 
2621     return cur_ares;
2622 }
2623 
2624 /* do_walign() can be called with aa0,n0 as nt (FASTY) or
2625    aa0,n0 as aa (TFASTY).  if aa0 is nt, then f_str->aa0x,y have the
2626    translations already.  if aa0 is aa, then f_str->aa1x,y must be
2627    generated.
2628 */
2629 
2630 struct a_res_str *
2631 do_walign (const unsigned char *aa0, int n0,
2632 	   const unsigned char *aa1, int n1,
2633 	   int frame, int repeat_thresh,
2634 	   struct pstruct *ppst,
2635 	   struct f_struct *f_str,
2636 	   int *have_ares)
2637 {
2638   struct a_res_str *a_res, *tmp_a_res;
2639   int a_res_index;
2640   int hoff, use_E_thresholds_s, optflag_s, optcut_s, optwid_s, score;
2641   int last_n1, itx, itt, n10, iphase;
2642   unsigned char *fs, *fd;
2643   struct rstruct rst;
2644 #ifdef DEBUG
2645   unsigned long adler32_crc;
2646 #endif
2647 
2648   *have_ares = 0x3;	/* set 0x2 bit to indicate local copy */
2649 
2650   if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
2651     fprintf(stderr," [do_walign] Cannot allocate a_res");
2652     return NULL;
2653   }
2654 
2655 #ifdef DEBUG
2656   adler32_crc = adler32(1L,aa1,n1);
2657 #endif
2658 
2659   f_str->frame = frame;	/* need frame for later pre_cons() in calcons() */
2660 
2661   use_E_thresholds_s = ppst->param_u.fa.use_E_thresholds;
2662   optflag_s = ppst->param_u.fa.optflag;
2663   optcut_s = ppst->param_u.fa.optcut;
2664   optwid_s = ppst->param_u.fa.optwid;
2665   ppst->param_u.fa.use_E_thresholds = 0;
2666   ppst->param_u.fa.optflag = 1;
2667   if (!ppst->param_u.fa.optwid_set) {
2668     ppst->param_u.fa.optwid *= 2;
2669   }
2670 
2671   a_res = fz_malign(aa0, n0, aa1, n1, frame,
2672 		    repeat_thresh, f_str->max_res,
2673 		    ppst, f_str, a_res, 1);
2674 
2675 #ifdef DEBUG
2676   if (adler32(1L,aa1,n1) != adler32_crc) {
2677     fprintf(stderr,"*** error [%s:%d] adler32_crc mismatch n1: %d\n",__FILE__, __LINE__, n1);
2678   }
2679 #endif
2680 
2681   a_res_index = 0;
2682   for (tmp_a_res=a_res; tmp_a_res; tmp_a_res = tmp_a_res->next) {
2683     tmp_a_res->index = a_res_index++;
2684   }
2685 
2686   ppst->param_u.fa.use_E_thresholds = use_E_thresholds_s;
2687   ppst->param_u.fa.optflag = optflag_s;
2688   ppst->param_u.fa.optwid = optwid_s;
2689   return a_res;
2690 }
2691 
2692 void
2693 pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
2694 
2695 #ifdef TFAST
2696   int i, last_n1, itemp, n10;
2697   unsigned char *fs, *fd;
2698   int itx;
2699 
2700   /* make a precomputed codon number series */
2701   if (frame==0) {
2702     pre_com(aa1, n1, f_str->aa1v);
2703   }
2704   else { /* must do things backwards */
2705     pre_com_r(aa1, n1, f_str->aa1v);
2706   }
2707 #endif
2708 }
2709 
2710 /* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
2711 /* call from calcons, calc_id, calc_code */
2712 void
2713 aln_func_vals(int frame, struct a_struct *aln) {
2714 
2715 #ifndef TFAST
2716   aln->llrev = 0;
2717   aln->llfact = 1;
2718   aln->llmult = 1;
2719   aln->qlfact = 3;
2720   aln->frame = frame;
2721   if (frame > 0) aln->qlrev = 1;
2722   else aln->qlrev = 0;
2723   aln->llrev = 0;
2724 #else	/* TFASTX */
2725   aln->qlfact = 1;
2726   aln->qlrev = 0;
2727   aln->llfact = 3;
2728   aln->llmult = 1;
2729   aln->frame = frame;
2730   if (frame > 0) aln->llrev = 1;
2731   else aln->llrev = 0;
2732   aln->qlrev=0;
2733 #endif	/* TFASTX */
2734 }
2735 
2736 #include "structs.h"
2737 #include "a_mark.h"
2738 
2739 extern int align_type(int score, char sp0, char sp1, int nt_align, struct a_struct *aln, int pam_x_id_sim);
2740 
2741 extern int
2742 next_annot_match(int *itmp, int *pam2aa0v, long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
2743 	   int i_annot, int n_annot, struct annot_entry **annot_arr, char **ann_comment,
2744 	   void *annot_stack, int *have_push_features, int *v_delta,
2745 	   struct annot_entry **region_p, struct annot_entry *tmp_region_p, int init_score);
2746 
2747 extern void
2748 comment_var(long i0, char sp0, long i1, char sp1, char o_sp1, char sim_char,
2749 	    const char *ann_comment, struct dyn_string_str *annot_var_dyn, int target, int d_type);
2750 
2751 void
2752 display_push_features(void *annot_stack, struct dyn_string_str *annot_var_dyn,
2753 		      long i0, char sp0, long i1, char sp1, char sym,
2754 		      struct annot_entry **region0_p,
2755 		      struct annot_entry **region1_p,
2756 		      int score, double comp, int n0, int n1,
2757 		      void *pstat_void, int d_type);
2758 
2759 #define DP_FULL_FMT 1	/* Region: score: bits: id: ... */
2760 #define Q_TARGET 0
2761 #define L_TARGET 1
2762 
2763 int seq_pos(int pos, int rev,int off);
2764 
2765 int
2766 calc_cons_a(const unsigned char *aa0, int n0,
2767 	    const unsigned char *aa1, int n1,
2768 	    int *nc,
2769 	    struct a_struct *aln,
2770 	    struct a_res_str *a_res,
2771 	    struct pstruct *ppst,
2772 	    char *seqc0, char *seqc1, char *seqca, int *cumm_seq_score,
2773 	    const unsigned char *ann_arr,
2774 	    const unsigned char *aa0a, const struct annot_str *annot0_p, char *seqc0a,
2775 	    const unsigned char *aa1a, const struct annot_str *annot1_p, char *seqc1a,
2776 	    int *score_delta,
2777 	    struct dyn_string_str *annot_var_dyn,
2778 	    struct f_struct *f_str,
2779 	    void *pstat_void
2780 	    )
2781 {
2782   int i0, i1;
2783   int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
2784   char *sp0, *sp0a, *sp1, *sp1a, *spa, t_spa;
2785   int *i_spa;
2786   const unsigned char *sq;
2787   unsigned char aap;
2788 
2789   const unsigned char *ap0, *ap1;
2790   const unsigned char *ap1a;	/* ap1 always points to protein, and
2791 				   only protein has annotations */
2792   int *rp, *rpmax;
2793   int have_ann = 0;
2794 
2795   /* variables for variant changes */
2796   char tmp_str[MAX_LSTR];
2797   int *annot_stack, annot_stack_n, annot_top=0;
2798   char *sim_sym = aln_map_sym[MX_ACC];
2799   struct annot_entry **s_annot1_arr_p, *region1_p, pre_annot1;
2800   struct annot_entry **s_annot0_arr_p, *region0_p, pre_annot0;
2801   struct annot_entry *this_annot_p;
2802   int  i1_annot, v_delta, v_tmp;
2803   int have_push_features, prev_match;
2804   long i0_offset, i1_offset;
2805 
2806   char *ann_comment;
2807 
2808   *score_delta = 0;
2809 
2810   if (ppst->ext_sq_set) {sq = ppst->sqx;}
2811   else {sq = ppst->sq;}
2812 
2813   /* res[0] has start of protein sequence */
2814   /* res[1] has start of translated DNA sequence */
2815 
2816 #ifndef TFAST
2817   aln->amin1 = aln->smin1 = a_res->min0;	/* start in protein sequence */
2818   aln->amin0 = aln->smin0 = a_res->min1;	/* start in DNA/codon sequence */
2819 
2820   i0_offset = aln->q_offset;
2821   i1_offset = aln->l_offset;
2822 
2823   ap0 = f_str->aa0v;		/* computed codons -> ap0*/
2824   ap1 = aa1;			/* protein sequence -> ap1 */
2825 
2826   sp0 = seqc0;	/* translated DNA */
2827   sp1 = seqc1;	/* protein */
2828 
2829   have_ann = (seqc0a != NULL && aa1a != NULL);
2830   ap1a = aa1a;
2831   sp1a = seqc1a;	/* protein library can have annotation */
2832   sp0a = seqc0a;	/* sp0a is always ' ' - no translated
2833 			   annotation */
2834 #else	/* TFASTYZ */
2835   if (aln->frame == 0) {
2836     pre_com(aa1, n1, f_str->aa1v);
2837   }
2838   else {
2839     pre_com_r(aa1, n1, f_str->aa1v);
2840   }
2841 
2842   aln->amin0 = aln->smin0 = a_res->min0;	/* start in protein sequence */
2843   aln->amin1 = aln->smin1 = a_res->min1;	/* start in codon sequence */
2844 
2845   i1_offset = aln->q_offset;
2846   i0_offset = aln->l_offset;
2847 
2848   ap1 = aa0;			/* protein sequence */
2849   ap0 = f_str->aa1v;		/* computed codons -> ap0*/
2850 
2851   sp0 = seqc1;	/* protein */
2852   sp1 = seqc0;	/* translated DNA */
2853 
2854   have_ann = (seqc0a != NULL && aa0a != NULL);
2855   ap1a = aa0a;
2856   sp1a = seqc0a;	/* protein query can have annotation */
2857   sp0a = seqc1a;	/* sp0a is always ' ' - no translated
2858 			   annotation */
2859 #endif
2860   spa = seqca;
2861   i_spa = cumm_seq_score;
2862 
2863   rp = a_res->res;			/* start of alignment info */
2864   rpmax = &a_res->res[a_res->nres];	/* end of alignment info */
2865 
2866   lenc = not_c = aln->nident = aln->nmismatch = aln->nsim = aln->npos = ngap_d = ngap_p = nfs = 0;
2867   i0 = a_res->min1-3;	/* start of codon sequence */
2868   i1 = a_res->min0;	/* start of protein sequence */
2869 
2870   v_delta = 0;
2871   i1_annot = 0;
2872   region0_p = region1_p = NULL;
2873   s_annot0_arr_p = s_annot1_arr_p = NULL;
2874   annot_stack = NULL;
2875   have_push_features = prev_match = 0;
2876   if (have_ann) {
2877     if (annot1_p && annot1_p->n_annot > 0)  annot_stack = init_stack(64,64);
2878     if (annot1_p && annot1_p->n_annot > 0) {
2879       s_annot1_arr_p = annot1_p->s_annot_arr_p;
2880       while (i1_annot < annot1_p->n_annot && s_annot1_arr_p[i1_annot]->pos < i1) {
2881 	if (s_annot1_arr_p[i1_annot]->label == '[') {
2882 	  memcpy(&pre_annot1,s_annot1_arr_p[i1_annot], sizeof(struct annot_entry));
2883 	  pre_annot1.pos = aln->amin1 + i1_offset;
2884 	  pre_annot1.a_pos = aln->amin0 + i0_offset;
2885 	  region1_p = &pre_annot1;
2886 	  region1_p->score = region1_p->n_aln = region1_p->n_ident = 0;
2887 	}
2888 	else if (s_annot1_arr_p[i1_annot]->label == ']') {
2889 	  region1_p = NULL;
2890 	}
2891 	i1_annot++;
2892       }
2893     }
2894   }
2895 
2896   while (rp < rpmax ) {
2897     switch (*rp++) {
2898     case 3:		/* match */
2899       i0 += 3;
2900 
2901       *sp1 = sq[aap=ap1[i1]];
2902       *sp0 = f_str->weight_c[aap][ap0[i0]].c5;
2903       itmp = ppst->pam2[0][aap][pascii[*sp0]];
2904 
2905 
2906       if (have_ann) {
2907 	*sp1a = ann_arr[ap1a[i1]];
2908 	*sp0a = ' ';
2909 	if (s_annot1_arr_p) {
2910 	  if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
2911 	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[*sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
2912 					i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq,
2913 					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
2914 					&ann_comment, annot_stack, &have_push_features, &v_delta,
2915 					&region1_p, &pre_annot1, 0);
2916 
2917 	    /* must be out of the loop to capture the last value */
2918 	    if (ppst->sq[ap1[i1]] != *sp1) {
2919 	      t_spa = align_type(itmp, *sp0, *sp1, 0, NULL, ppst->pam_x_id_sim);
2920 
2921 	      comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
2922 			  i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
2923 			  sq[ap1[i1]], sim_sym[t_spa], ann_comment,
2924 			  annot_var_dyn,1,1);
2925 	    }
2926 	  }
2927 	  prev_match = 1;
2928 	  if (region1_p) {region1_p->score += itmp;}
2929 	}
2930 	sp0a++; sp1a++;
2931       }
2932 
2933       *spa = align_type(itmp, *sp0, *sp1, 0, aln, ppst->pam_x_id_sim);
2934 
2935       if (region1_p) {
2936 	region1_p->n_aln++;
2937 	if (*spa == M_IDENT) {region1_p->n_ident++;}
2938       }
2939 
2940       if (cumm_seq_score) *i_spa++ = itmp;
2941 
2942       if (have_ann && have_push_features) {
2943 	display_push_features(annot_stack, annot_var_dyn,
2944 			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
2945 			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
2946 			      sim_sym[*spa], &region0_p, &region1_p,
2947 			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
2948 	have_push_features = 0;
2949       }
2950 
2951       i1++;
2952       sp0++; sp1++; spa++;
2953       lenc++;
2954       break;
2955     case 2:		/* frame shift +2, then match */
2956       nfs++;
2957       i0 += 2;
2958       *sp0++ = '/';
2959       *sp1++ = '-';
2960       *spa++ = M_DEL;
2961       if (cumm_seq_score) *i_spa++ = ppst->gshift;
2962 
2963       if (have_ann) {*sp0a++ = *sp1a++ = ' ';}
2964       not_c++;
2965 
2966       *sp1 = sq[aap=ap1[i1]];
2967       *sp0 = f_str->weight_c[aap][ap0[i0]].c2;
2968       itmp = ppst->pam2[0][pascii[*sp0]][aap];
2969 
2970       if (have_ann) {
2971 	*sp1a = ann_arr[ap1a[i1]];
2972 	*sp0a = ' ';
2973 	if (s_annot1_arr_p) {
2974 	  if (i1 + i1_offset == s_annot1_arr_p[i1_annot]->pos) {
2975 	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[*sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
2976 					i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq,
2977 					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
2978 					&ann_comment, annot_stack, &have_push_features, &v_delta,
2979 					&region1_p, &pre_annot1, 0);
2980 
2981 	    /* must be out of the loop to capture the last value */
2982 	    if (ppst->sq[ap1[i1]] != *sp1) {
2983 	      t_spa = align_type(itmp, *sp0, *sp1, 0, NULL, ppst->pam_x_id_sim);
2984 
2985 	      comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
2986 			  i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
2987 			  sq[ap1[i1]], sim_sym[t_spa], ann_comment,
2988 			  annot_var_dyn,1,DP_FULL_FMT);
2989 	    }
2990 	  }
2991 	  if (region1_p) {
2992 	    region1_p->score += ppst->gshift;
2993 	    region1_p->score += itmp;
2994 	  }
2995 	  prev_match = 1;
2996 	}
2997 	sp0a++; sp1a++;
2998       }
2999 
3000       *spa = align_type(itmp, *sp0, *sp1, 0, aln, ppst->pam_x_id_sim);
3001 
3002       if (region1_p) {
3003 	region1_p->n_aln++;
3004 	if (*spa == M_IDENT) {region1_p->n_ident++;}
3005       }
3006 
3007       if (have_ann && have_push_features) {
3008 	display_push_features(annot_stack, annot_var_dyn,
3009 			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
3010 			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
3011 			      sim_sym[*spa], &region0_p, &region1_p,
3012 			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
3013 	have_push_features = 0;
3014       }
3015 
3016       if (cumm_seq_score) *i_spa++ = itmp;
3017 
3018       i1++;
3019       sp0++; sp1++; spa++;
3020       lenc++;
3021       break;
3022     case 4:		/* frame shift, -1, then match */
3023       nfs++;
3024       i0 += 4;
3025       if (have_ann) {
3026 	*sp1a++ = *sp0a++ = ' ';
3027       }
3028       *sp0++ = '\\';
3029       *sp1++ = '-';
3030       *spa++ = M_DEL;
3031       if (cumm_seq_score) *i_spa++ = ppst->gshift;
3032 
3033       not_c++;
3034 
3035       *sp1 = sq[aap=ap1[i1]];
3036       *sp0 = f_str->weight_c[aap][ap0[i0]].c4;
3037       itmp = ppst->pam2[0][pascii[*sp0]][aap];
3038 
3039       if (have_ann) {
3040 	*sp1a = ann_arr[ap1a[i1]];
3041 	*sp0a = ' ';
3042 	if (s_annot1_arr_p && i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
3043 	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[*sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
3044 				      i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq,
3045 				      i1_annot, annot1_p->n_annot, s_annot1_arr_p, &ann_comment,
3046 				      annot_stack, &have_push_features, &v_delta, &region1_p, &pre_annot1, 0);
3047 
3048 	  /* must be out of the loop to capture the last value */
3049 	  if (ppst->sq[ap1[i1]] != *sp1) {
3050 	    t_spa = align_type(itmp, *sp0, *sp1, 0, NULL, ppst->pam_x_id_sim);
3051 	    comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
3052 			i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
3053 			sq[ap1[i1]], sim_sym[t_spa], ann_comment,
3054 			annot_var_dyn,1,DP_FULL_FMT);
3055 	  }
3056 	  prev_match = 1;
3057 	  if (region1_p) {region1_p->score += itmp;}
3058 	}
3059 	sp0a++; sp1a++;
3060       }
3061 
3062       *spa = align_type(itmp, *sp0, *sp1, 0, aln, ppst->pam_x_id_sim);
3063       if (region1_p) {
3064 	region1_p->n_aln++;
3065 	if (*spa == M_IDENT) {region1_p->n_ident++;}
3066       }
3067 
3068       if (cumm_seq_score) *i_spa++ = itmp;
3069 
3070       if (have_ann && have_push_features) {
3071 	display_push_features(annot_stack, annot_var_dyn,
3072 			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
3073 			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
3074 			      sim_sym[*spa], &region0_p, &region1_p,
3075 			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
3076 	have_push_features = 0;
3077       }
3078 
3079       i1++;
3080       sp0++; sp1++; spa++;
3081       lenc++;
3082       break;
3083     case 5:		/* insertion in 0 */
3084       if (have_ann) {
3085 	*sp1a++ = *sp0a++ = ' ';
3086       }
3087       i0 += 3;
3088       *sp0++ = f_str->weight_c[0][ap0[i0]].c3;
3089       *sp1++ = '-';
3090       *spa++ = M_DEL;
3091       lenc++;
3092       ngap_p++;
3093       if (cumm_seq_score) *i_spa++ = ppst->gdelval;
3094       break;
3095     case 0:		/* insertion in 1 */
3096       *sp0++ = '-';
3097       *sp1++ = sq[ap1[i1]];
3098       *spa++ = M_DEL;
3099       if (cumm_seq_score) {
3100 	if (prev_match) *i_spa = ppst->gdelval;
3101 	*i_spa++ += ppst->gdelval;
3102       }
3103 
3104       if (have_ann) {
3105 	*sp0a = ' ';
3106 	*sp1a = ann_arr[ap1a[i1]];
3107 
3108 	if (s_annot1_arr_p) {
3109 	  /* coordiates are much more complex for next_annot_match,
3110 	     and comment_var, because they may need to be reversed */
3111 
3112 	  if (i1 + i1_offset == s_annot1_arr_p[i1_annot]->pos) {
3113 	    i1_annot = next_annot_match(&itmp, ppst->pam2[0][ap0[i0]], i1_offset+seq_pos(i1,aln->llrev,0),
3114 					i0_offset+seq_pos(i0,aln->qlrev,0), sp1, sp1a, sq,
3115 					i1_annot, annot1_p->n_annot, s_annot1_arr_p,
3116 					&ann_comment, annot_stack, &have_push_features, &v_delta,
3117 					&region1_p, &pre_annot1, 0);
3118 	  }
3119 
3120 	  if (region1_p) {
3121 	    if (prev_match) region1_p->score += ppst->gdelval;
3122 	    region1_p->score += ppst->ggapval;
3123 	    region1_p->n_aln++;
3124 	  }
3125 	  prev_match = 0;
3126 	}
3127 	sp0a++;  sp1a++;
3128       }
3129 
3130       if (have_ann && have_push_features) {
3131 	display_push_features(annot_stack, annot_var_dyn,
3132 			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
3133 			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
3134 			      sim_sym[*spa], &region0_p, &region1_p,
3135 			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
3136 	have_push_features = 0;
3137       }
3138 
3139       i1++;
3140       lenc++;
3141       ngap_d++;
3142       break;
3143     }
3144   }
3145 
3146   if (have_ann) {
3147     *sp0a = '\0';
3148     if (s_annot1_arr_p) {
3149       have_push_features = 0;
3150       while (i1_annot < annot1_p->n_annot && s_annot1_arr_p[i1_annot]->pos < n1) {
3151 	if (s_annot1_arr_p[i1_annot]->label == '[') break;
3152 	if (s_annot1_arr_p[i1_annot]->label == ']') {
3153 	  push_stack(annot_stack, s_annot1_arr_p[i1_annot]);
3154 	  have_push_features = 1;
3155 	}
3156 	i1_annot++;
3157       }
3158       if (have_push_features) {
3159 	display_push_features(annot_stack, annot_var_dyn,
3160 			      i0_offset+seq_pos(i0,aln->qlrev,0), *sp0,
3161 			      i1_offset+seq_pos(i1,aln->llrev,0), *sp1,
3162 			      sim_sym[*spa],&region0_p, &region1_p,
3163 			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, DP_FULL_FMT);
3164 	have_push_features = 0;
3165       }
3166     }
3167   }
3168   *spa = '\0';
3169 
3170 #ifndef TFAST
3171   aln->amax0 = i0+3;	/* end of codon sequence */
3172   aln->amax1 = i1;	/* end of protein sequence */
3173   aln->ngap_q = ngap_d;
3174   aln->ngap_l = ngap_p;
3175 #else
3176   aln->amax1 = i0+3;	/* end of codon sequence */
3177   aln->amax0 = i1;	/* end of protein sequence */
3178   aln->ngap_q = ngap_p;
3179   aln->ngap_l = ngap_d;
3180 #endif
3181   aln->calc_last_set = 1;
3182   aln->nfs = nfs;
3183   aln->amin0 = aln->smin0;
3184   aln->amin1 = aln->smin1;
3185 
3186   *score_delta = v_delta;
3187 
3188   free_stack(annot_stack);
3189 
3190   if (lenc < 0) lenc = 1;
3191 
3192   *nc = lenc;
3193 /*	now we have the middle, get the right end */
3194 
3195   return lenc+not_c;
3196 }
3197 
3198 void
3199 calc_astruct(struct a_struct *aln_p, struct a_res_str *a_res_p, struct f_struct *f_str) {
3200 
3201   aln_p->calc_last_set = 0;
3202 
3203 #ifndef TFAST	/* FASTX */
3204   aln_p->amin1 = a_res_p->min0;	/* prot */
3205   aln_p->amin0 = a_res_p->min1;	/* DNA */
3206   aln_p->amax1 = a_res_p->max0;	/* prot */
3207   aln_p->amax0 = a_res_p->max1;	/* DNA */
3208 #else		/* TFASTX */
3209   aln_p->amin0 = a_res_p->min0;	/* DNA */
3210   aln_p->amin1 = a_res_p->min1;	/* prot */
3211   aln_p->amax0 = a_res_p->max0;	/* DNA */
3212   aln_p->amax1 = a_res_p->max1;	/* prot */
3213 #endif
3214 }
3215 
3216 /* build an array of match/ins/del - length strings */
3217 
3218 /* modified 10-June-2014 to distinguish matches from mismatches, op=1
3219    (previously unused) indicates an aligned non-identity */
3220 
3221 /* op_codes are:  0 - aa insertion
3222    		  1 - (now) aligned non-identity
3223    		  2 - -1 frameshift
3224    		  3 - aligned identity
3225    		  4 - +1 frameshift
3226    		  5 - codon insertion
3227 */
3228 
3229 static struct update_code_str *
3230 init_update_data(show_code) {
3231 
3232   struct update_code_str *update_data_p;
3233 
3234   if ((update_data_p = (struct update_code_str *)calloc(1,sizeof(struct update_code_str)))==NULL) {
3235     fprintf(stderr,"*** error [%s:%d] - init_update_data(): cannot allocate update_code_str\n",
3236 	      __FILE__, __LINE__);
3237     return NULL;
3238   }
3239 
3240   update_data_p->p_op_cnt = 0;
3241   update_data_p->show_code = show_code;
3242 
3243   if ((show_code & SHOW_CODE_MASK) == SHOW_CODE_CIGAR) {
3244     update_data_p->op_map = cigar_code;
3245     update_data_p->cigar_order = 1;
3246   }
3247   else {
3248     update_data_p->op_map = ori_code;
3249     update_data_p->cigar_order = 0;
3250   }
3251 
3252   if ((show_code & SHOW_CODE_EXT) == SHOW_CODE_EXT) {
3253     update_data_p->show_ext = 1;
3254   }
3255   else {
3256     update_data_p->show_ext = 0;
3257   }
3258 
3259   return update_data_p;
3260 }
3261 
3262 static void
3263 close_update_data(char *al_str, int al_str_max,
3264 		  struct update_code_str *up_dp) {
3265   char tmp_cnt[MAX_SSTR];
3266 
3267   if (!up_dp) return;
3268   sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx, up_dp->p_op_cnt);
3269   strncat(al_str,tmp_cnt,al_str_max);
3270 
3271   free(up_dp);
3272 }
3273 
3274 /* update_indel_code() has been modified to work more correctly with
3275    ggsearch/glsearch, which, because alignments can start with either
3276    insertions or deletions, can produce an initial code of "0=".  When
3277    that happens, it is ignored and no code is added.
3278 
3279    *al_str - alignment string [al_str_max] - not dynamic
3280    op -- encoded operation, currently 0=match, 1-delete, 2-insert, 3-term-match, 4-mismatch
3281    op_cnt -- length of run
3282    show_code -- SHOW_CODE_CIGAR uses cigar_code, otherwise legacy
3283 */
3284 
3285 /* update_indel_code() is called for insertions and deletions
3286    update_match_code() is called for every match
3287 */
3288 
3289 static void
3290 sprintf_code(char *tmp_str, struct update_code_str *up_dp, int op_idx, int op_cnt) {
3291 
3292   if (op_cnt == 0) return;
3293 
3294   if (up_dp->cigar_order) {
3295     sprintf(tmp_str,"%d%c",op_cnt,up_dp->op_map[op_idx]);
3296   }
3297   else {
3298     sprintf(tmp_str,"%c%d",up_dp->op_map[op_idx],op_cnt);
3299   }
3300 }
3301 
3302 static void
3303 update_code(char *al_str, int al_str_max,
3304 	    struct update_code_str *up_dp, int op,
3305 	    int sim_code,  unsigned char sp0, unsigned char sp1)
3306 {
3307   char tmp_cnt[MAX_SSTR];
3308 
3309   /* there are two kinds of "op's", one time and accumulating */
3310   /* op == 2, 4 are one-time: */
3311 
3312   switch (op) {
3313   case 2:
3314   case 4:
3315     sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
3316     strncat(al_str,tmp_cnt,al_str_max);
3317     sprintf_code(tmp_cnt,up_dp, op, 1);
3318     strncat(al_str,tmp_cnt,al_str_max);
3319     up_dp->p_op_cnt = 0;
3320     break;
3321   case 0:
3322   case 5:
3323     if (op == up_dp->p_op_idx) {
3324       up_dp->p_op_cnt++;
3325     }
3326     else {
3327       sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
3328       strncat(al_str,tmp_cnt,al_str_max);
3329       up_dp->p_op_idx = op;
3330       up_dp->p_op_cnt = 1;
3331     }
3332     break;
3333   case 1:
3334   case 3:
3335     if (sp0 != '*' && sp1 != '*') {	/* default case, not termination */
3336       if (up_dp->show_ext) {
3337 	if (sim_code != M_IDENT) { op = 1;}
3338       }
3339     }
3340     else {	/* have a termination codon, output for !SHOW_CODE_CIGAR */
3341       if (!up_dp->cigar_order) {
3342 	if (sp0 == '*' || sp1 == '*') { op = 6;}
3343       }
3344       else if (up_dp->show_ext && (sp0 != sp1)) { op = 1;}
3345     }
3346 
3347     if (up_dp->p_op_cnt == 0) {
3348       up_dp->p_op_idx = op;
3349       up_dp->p_op_cnt = 1;
3350     }
3351     else if (op != up_dp->p_op_idx) {
3352       sprintf_code(tmp_cnt,up_dp, up_dp->p_op_idx,up_dp->p_op_cnt);
3353       strncat(al_str,tmp_cnt,al_str_max);
3354       up_dp->p_op_idx = op;
3355       up_dp->p_op_cnt = 1;
3356     }
3357     else {
3358       up_dp->p_op_cnt++;
3359     }
3360     break;
3361   }
3362   return;
3363 }
3364 
3365 int calc_code(const unsigned char *aa0, int n0,
3366 	      const unsigned char *aa1, int n1,
3367 	      struct a_struct *aln,
3368 	      struct a_res_str *a_res,
3369 	      struct pstruct *ppst,
3370 	      char *al_str, int al_str_n,
3371 	      const unsigned char *ann_arr,
3372 	      const unsigned char *aa0a,
3373 	      const struct annot_str *annot0_p,
3374 	      const unsigned char *aa1a,
3375 	      const struct annot_str *annot1_p,
3376 	      struct dyn_string_str *annot_code_dyn,
3377 	      int *score_delta,
3378 	      struct f_struct *f_str,
3379 	      void *pstat_void,
3380 	      int display_code)
3381 {
3382   int i0, i1;
3383   int lenc, not_c, ngap_d, ngap_p, nfs;
3384   char sp0, sp1;
3385   struct update_code_str *update_data_p;
3386   char op_char[10], ann_ch0, ann_ch1;
3387   unsigned char aap;
3388   const unsigned char *ap0, *ap1, *ap1a;
3389   int *rp, *rpmax;
3390   const unsigned char *sq;
3391   int have_ann = 0;
3392   char tmp_astr[MAX_STR];
3393   int sim_code, t_spa;
3394   int show_code, annot_fmt;
3395   char *sim_sym= aln_map_sym[MX_ACC];
3396   /* variables for variant changes */
3397   void *annot_stack;
3398   struct annot_entry **s_annot1_arr_p, *region1_p, pre_annot1;
3399   struct annot_entry **s_annot0_arr_p, *region0_p, pre_annot0;
3400   int  itmp, i1_annot, v_delta, v_tmp;
3401   int have_push_features, prev_match;
3402   long i0_offset, i1_offset;
3403 
3404   *score_delta = 0;
3405 
3406   show_code = (display_code & SHOW_CODE_MASK + SHOW_CODE_EXT);
3407   annot_fmt = 2;
3408   if (display_code & SHOW_ANNOT_FULL) {
3409     annot_fmt = 1;
3410   }
3411 
3412   if (ppst->ext_sq_set) {sq = ppst->sqx;}
3413   else {sq = ppst->sq;}
3414 
3415   /* don't fill in the ends */
3416 #ifndef TFAST
3417   ap0 = f_str->aa0v;		/* computed codons -> ap0*/
3418   ap1 = aa1;			/* protein sequence -> ap1 */
3419   aln->smin1 = a_res->min0;	/* start in protein sequence */
3420   aln->smin0 = a_res->min1;		/* start in DNA/codon sequence */
3421 
3422   i0_offset = aln->q_offset;
3423   i1_offset = aln->l_offset;
3424 
3425   have_ann = (ann_arr[0] != '\0' && aa1a != NULL);
3426   ap1a = aa1a;
3427 #else	/* TFASTYZ */
3428   if (aln->frame == 0) { pre_com(aa1, n1, f_str->aa1v);}
3429   else { pre_com_r(aa1, n1, f_str->aa1v);}
3430 
3431   ap0 = f_str->aa1v;		/* computed codons -> ap0*/
3432   ap1 = aa0;			/* protein sequence */
3433   aln->smin0 = a_res->min0;	/* start in protein sequence */
3434   aln->smin1 = a_res->min1;		/* start in codon sequence */
3435 
3436   i1_offset = aln->q_offset;
3437   i0_offset = aln->l_offset;
3438 
3439   have_ann = (ann_arr[0] != '\0' && aa0a != NULL);
3440   ap1a = aa0a;
3441 #endif
3442 
3443   rp = a_res->res;			/* start of alignment info */
3444   rpmax = &a_res->res[a_res->nres];		/* end of alignment info */
3445 
3446 /* now get the middle */
3447 
3448   lenc = not_c = aln->nident = aln->nmismatch = aln->nsim = aln->npos = ngap_d = ngap_p = nfs = 0;
3449   update_data_p = init_update_data(show_code);
3450 
3451   i0 = a_res->min1-3;	/* start of codon sequence */
3452   i1 = a_res->min0;	/* start of protein sequence */
3453 
3454   v_delta = 0;
3455   i1_annot = 0;
3456   have_push_features = prev_match = 0;
3457   region0_p = region1_p = NULL;
3458   s_annot0_arr_p = s_annot1_arr_p = NULL;
3459   annot_stack = NULL;
3460   if (have_ann) {
3461     if (annot1_p && annot1_p->n_annot > 0) annot_stack = init_stack(64,64);
3462     if (annot1_p && annot1_p->n_annot > 0) {
3463       s_annot1_arr_p = annot1_p->s_annot_arr_p;
3464       while (i1_annot < annot1_p->n_annot && s_annot1_arr_p[i1_annot]->pos < i1) {
3465 	if (s_annot1_arr_p[i1_annot]->label == '[') {
3466 	  memcpy(&pre_annot1,s_annot1_arr_p[i1_annot], sizeof(struct annot_entry));
3467 	  pre_annot1.pos = aln->amin1 + i1_offset;
3468 	  pre_annot1.a_pos = aln->amin0 + i0_offset;
3469 	  region1_p = &pre_annot1;
3470 	  region1_p->score = region1_p->n_aln = region1_p->n_ident = 0;
3471 	}
3472 	else if (s_annot1_arr_p[i1_annot]->label == ']') {
3473 	  region1_p = NULL;
3474 	}
3475 	i1_annot++;
3476       }
3477     }
3478   }
3479 
3480   while (rp < rpmax ) {
3481     switch (*rp++) {
3482     case 0:		/* insert in 0 */
3483       sim_code = 5; /* indel code */
3484       update_code(al_str, al_str_n-strlen(al_str), update_data_p, 0, sim_code,'-','-');
3485 
3486       if (s_annot1_arr_p) {
3487 	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
3488 	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
3489 				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
3490 				      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
3491 				      annot_stack, &have_push_features, &v_delta,
3492 				      &region1_p, &pre_annot1, 0);
3493 	}
3494 
3495 	if (region1_p) {
3496 	  if (prev_match) region1_p->score += ppst->gdelval;
3497 	  region1_p->score += ppst->ggapval;
3498 	  region1_p->n_aln++;
3499 	}
3500 	prev_match = 0;
3501 
3502 	if (have_push_features) {
3503 	  display_push_features(annot_stack, annot_code_dyn,
3504 				i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
3505 				i1_offset+seq_pos(i1,aln->llrev,0), sp1,
3506 				sim_sym[sim_code], &region0_p, &region1_p,
3507 				a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
3508 	  have_push_features = 0;
3509 	}
3510       }
3511 
3512       i1++;
3513       lenc++;
3514       ngap_d++;
3515       break;
3516 
3517     case 2:			/* -1 frame shift */
3518       update_code(al_str, al_str_n-strlen(al_str), update_data_p, 2, sim_code,'-','-');
3519 
3520       nfs++;
3521       i0 += 2;
3522       not_c++;
3523 
3524       sp1 = ppst->sq[aap=ap1[i1]];
3525       sp0 = f_str->weight_c[aap][ap0[i0]].c2;
3526       itmp = ppst->pam2[0][pascii[sp0]][aap];
3527 
3528       if (s_annot1_arr_p) {
3529 	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
3530 	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
3531 				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
3532 				      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
3533 				      annot_stack, &have_push_features, &v_delta,
3534 				      &region1_p, &pre_annot1, 0);
3535 	}
3536 
3537 	if (sq[aap] != sp1) {
3538 	  t_spa = align_type(itmp, sp0, sp1, 0, NULL, ppst->pam_x_id_sim);
3539 
3540 	  comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
3541 		      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
3542 		      sq[aap], sim_sym[t_spa], NULL,
3543 		      annot_code_dyn,1,annot_fmt);
3544 	}
3545 
3546 	if (region1_p) {
3547 	  region1_p->score += ppst->gshift;
3548 	  region1_p->score += itmp;
3549 	}
3550 	prev_match = 1;
3551       }
3552 
3553       sim_code = align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
3554       if (region1_p) {
3555 	region1_p->n_aln++;
3556 	if (sim_code == M_IDENT) {region1_p->n_ident++;}
3557       }
3558 
3559       /* check for an annotation */
3560       if (have_ann && !(ann_arr[aa1a[i1]] == ' ' || ann_arr[aa1a[i1]] == '[' || ann_arr[aa1a[i1]] == ']')) {
3561 #ifndef TFAST
3562 	sprintf(tmp_astr, "|X%c:%ld%c%c%ld%c",
3563 		ann_arr[ap1a[i1]],i0_offset+seq_pos(i0,aln->qlrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i1,aln->llrev,0)+1,sp1);
3564 #else
3565 	sprintf(tmp_astr, "|%cX:%ld%c%c%ld%c",
3566 		ann_arr[ap1a[i1]],i0_offset+seq_pos(i1,aln->llrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i0,aln->qlrev,0)+1,sp1);
3567 
3568 #endif
3569 	/* SAFE_STRNCAT(annot_code_s, tmp_astr, n_annot_code_s); */
3570 	dyn_strcat(annot_code_dyn, tmp_astr);
3571       }
3572 
3573       if (s_annot1_arr_p && have_push_features) {
3574 	display_push_features(annot_stack, annot_code_dyn,
3575 			      i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
3576 			      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
3577 			      sim_sym[sim_code], &region0_p, &region1_p,
3578 			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
3579 	have_push_features = 0;
3580       }
3581 
3582       i1++;
3583       lenc++;
3584       break;
3585 
3586     case 3:			/* match */
3587       i0 += 3;
3588 
3589       sp1 = ppst->sq[aap=ap1[i1]];
3590       sp0 = f_str->weight_c[aap][ap0[i0]].c5;
3591       itmp = ppst->pam2[0][aap][pascii[sp0]];
3592 
3593       if (s_annot1_arr_p) {
3594 	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
3595 	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
3596 				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
3597 				      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
3598 				      annot_stack, &have_push_features, &v_delta,
3599 				      &region1_p, &pre_annot1, 0);
3600 	}
3601 
3602 	if (sq[aap] != sp1) {
3603 	  t_spa = align_type(itmp, sp0, sp1, 0, NULL, ppst->pam_x_id_sim);
3604 
3605 	  comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
3606 		      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
3607 		      sq[aap], sim_sym[t_spa], NULL, annot_code_dyn,
3608 		      1,annot_fmt);
3609 	}
3610 
3611 	if (region1_p) {region1_p->score += itmp;}
3612 	prev_match = 1;
3613       }
3614 
3615       sim_code = align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
3616       if (region1_p) {
3617 	region1_p->n_aln++;
3618 	if (sim_code == M_IDENT) {region1_p->n_ident++;}
3619       }
3620 
3621       /* check for an annotation */
3622       if (have_ann && !(ann_arr[ap1a[i1]] == ' ' || ann_arr[ap1a[i1]]=='[' || ann_arr[ap1a[i1]]==']')) {
3623 #ifndef TFAST
3624 	sprintf(tmp_astr, "|X%c:%ld%c%c%ld%c",
3625 		ann_arr[ap1a[i1]],i0_offset+seq_pos(i0,aln->qlrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i1,aln->llrev,0)+1,sp1);
3626 #else
3627 	sprintf(tmp_astr, "|%cX:%ld%c%c%ld%c",
3628 		ann_arr[ap1a[i1]],i0_offset+seq_pos(i1,aln->llrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i0,aln->qlrev,0)+1,sp1);
3629 
3630 #endif
3631 	/* SAFE_STRNCAT(annot_code_s, tmp_astr, n_annot_code_s); */
3632 	dyn_strcat(annot_code_dyn, tmp_astr);
3633       }
3634 
3635       if (s_annot1_arr_p && have_push_features) {
3636 	display_push_features(annot_stack, annot_code_dyn,
3637 			      i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
3638 			      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
3639 			      sim_sym[sim_code], &region0_p, &region1_p,
3640 			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
3641 	have_push_features = 0;
3642       }
3643 
3644       update_code(al_str, al_str_n-strlen(al_str), update_data_p, 3, sim_code,sp0,sp1);
3645 
3646       i1++;
3647       lenc++;
3648       break;
3649 
3650     case 4:		/* +1 frame shift */
3651       /* finish previous run */
3652       update_code(al_str, al_str_n-strlen(al_str), update_data_p, 4, sim_code,'-','-');
3653       /* mark frameshift */
3654 
3655       nfs++;
3656       i0 += 4;
3657       not_c++;
3658 
3659       sp1 = ppst->sq[aap=ap1[i1]];
3660       sp0 = f_str->weight_c[aap][ap0[i0]].c2;
3661       itmp = ppst->pam2[0][aap][pascii[sp0]];
3662 
3663       if (s_annot1_arr_p) {
3664 	if (i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
3665 	  i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0),
3666 				      i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
3667 				      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
3668 				      annot_stack, &have_push_features, &v_delta, &region1_p, &pre_annot1, 0);
3669 	}
3670 
3671 	if (sq[aap] != sp1) {
3672 	  t_spa = align_type(itmp, sp0, sp1, 0, NULL, ppst->pam_x_id_sim);
3673 
3674 	  comment_var(i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
3675 		      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
3676 		      sq[aap], sim_sym[t_spa], NULL, annot_code_dyn,
3677 		      1,annot_fmt);
3678 	}
3679 
3680 	if (region1_p) {
3681 	  region1_p->score += ppst->gshift;
3682 	  region1_p->score += itmp;
3683 	}
3684 	prev_match = 1;
3685       }
3686 
3687       sim_code = align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
3688       if (region1_p) {
3689 	region1_p->n_aln++;
3690 	if (sim_code == M_IDENT) {region1_p->n_ident++;}
3691       }
3692 
3693       if (have_ann && !(ann_arr[ap1a[i1]] == ' ' || ann_arr[ap1a[i1]]=='[' || ann_arr[ap1a[i1]]==']')) {
3694 #ifndef TFAST
3695 	sprintf(tmp_astr, "|X%c:%ld%c%c%ld%c",
3696 		ann_arr[ap1a[i1]],i0_offset+seq_pos(i0,aln->qlrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i1,aln->llrev,0)+1,sp1);
3697 #else
3698 	sprintf(tmp_astr, "|%cX:%ld%c%c%ld%c",
3699 		ann_arr[ap1a[i1]],i0_offset+seq_pos(i1,aln->llrev,0)+1,sp0,sim_sym[sim_code],i1_offset+seq_pos(i0,aln->qlrev,0)+1,sp1);
3700 
3701 #endif
3702 	/* SAFE_STRNCAT(annot_code_s, tmp_astr, n_annot_code_s); */
3703 	dyn_strcat(annot_code_dyn, tmp_astr);
3704       }
3705 
3706       if (s_annot1_arr_p && have_push_features) {
3707 	display_push_features(annot_stack, annot_code_dyn,
3708 			      i0_offset+seq_pos(i0,aln->qlrev,0), sp0,
3709 			      i1_offset+seq_pos(i1,aln->llrev,0), sp1,
3710 			      sim_sym[sim_code], &region0_p, &region1_p,
3711 			      a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
3712 	have_push_features = 0;
3713       }
3714 
3715       i1++;
3716       lenc++;
3717       break;
3718 
3719     case 5:		/* insert in 1 */
3720       sim_code = 5;
3721       update_code(al_str, al_str_n-strlen(al_str), update_data_p, 5, sim_code,'-','-');
3722 
3723       i0 += 3;
3724       lenc++;
3725       ngap_p++;
3726       break;
3727     }
3728   }
3729 
3730   close_update_data(al_str, al_str_n-strlen(al_str), update_data_p);
3731 
3732 #ifndef TFAST
3733   aln->amax0 = i0+3;	/* end of codon sequence */
3734   aln->amax1 = i1;	/* end of protein sequence */
3735   aln->ngap_q = ngap_d;
3736   aln->ngap_l = ngap_p;
3737 #else
3738   aln->amax1 = i0+3;	/* end of codon sequence */
3739   aln->amax0 = i1;	/* end of protein sequence */
3740   aln->ngap_q = ngap_p;
3741   aln->ngap_l = ngap_d;
3742 #endif
3743   aln->calc_last_set = 1;
3744   aln->nfs = nfs;
3745   aln->amin0 = aln->smin0;
3746   aln->amin1 = aln->smin1;
3747 
3748   *score_delta = v_delta;
3749 
3750   if (have_ann) {
3751     have_push_features = 0;
3752     if (s_annot1_arr_p) {
3753       /* also check for regions after alignment */
3754       while (i1_annot < annot1_p->n_annot && s_annot1_arr_p[i1_annot]->pos < i1_offset+n1) {
3755 	if (s_annot1_arr_p[i1_annot]->label == '[') break;
3756 	if (s_annot1_arr_p[i1_annot]->label == ']') {
3757 	  push_stack(annot_stack, s_annot1_arr_p[i1_annot]);
3758 	  have_push_features = 1;
3759 	}
3760 	i1_annot++;
3761       }
3762     }
3763 
3764     if (have_push_features) {
3765       display_push_features(annot_stack, annot_code_dyn,
3766 			    i0_offset+a_res->max0-1, sp0,
3767 			    i1_offset+a_res->max1-1, sp1,
3768 			    sim_sym[sim_code], &region0_p, &region1_p,
3769 			    a_res->rst.score[ppst->score_ix], a_res->rst.comp, n0, n1, pstat_void, annot_fmt);
3770     }
3771 
3772     if (!annot_stack) free_stack(annot_stack);
3773   }
3774 
3775 
3776   if (lenc < 0) lenc = 1;
3777 
3778 /*	now we have the middle, get the right end */
3779 
3780   return lenc;
3781 }
3782 
3783 int calc_id(const unsigned char *aa0, int n0,
3784 	    const unsigned char *aa1, int n1,
3785 	    struct a_struct *aln,
3786 	    struct a_res_str *a_res,
3787 	    struct pstruct *ppst,
3788 	    const struct annot_str *annot0_p,
3789 	    const struct annot_str *annot1_p,
3790 	    int *score_delta,
3791 	    struct dyn_string_str *annot_var_dyn,
3792 	    struct f_struct *f_str)
3793 {
3794   int i0, i1;
3795   int lenc, not_c, ngap_d, ngap_p, nfs;
3796   char sp0, sp1;
3797   unsigned char aap;
3798   const unsigned char *ap0, *ap1;
3799   int *rp, *rpmax;
3800 
3801   int aa1c;
3802   /* variables for variant changes */
3803   struct annot_entry **s_annot1_arr_p;
3804   int  itmp, i1_annot, v_delta, v_tmp;
3805   long i0_offset, i1_offset;
3806 
3807   char tmp_str[MAX_SSTR];
3808   const unsigned char *sq;
3809 
3810   *score_delta = 0;
3811   NULL_dyn_string(annot_var_dyn);
3812 
3813   if (ppst->ext_sq_set) {sq = ppst->sqx;}
3814   else {sq = ppst->sq;}
3815 
3816   /* don't fill in the ends */
3817 #ifndef TFAST	/* FASTYZ */
3818   ap0 = f_str->aa0v;		/* computed codons -> ap0*/
3819   ap1 = aa1;			/* protein sequence -> ap1 */
3820   aln->smin1 = a_res->min0;	/* start in protein sequence */
3821   aln->smin0 = a_res->min1;		/* start in DNA/codon sequence */
3822 
3823   i0_offset = aln->q_offset;
3824   i1_offset = aln->l_offset;
3825 #else	/* TFASTYZ */
3826 
3827   if (aln->frame == 0) {
3828     pre_com(aa1, n1, f_str->aa1v);
3829   }
3830   else {
3831     pre_com_r(aa1, n1, f_str->aa1v);
3832   }
3833 
3834   ap0 = f_str->aa1v;		/* computed codons -> ap0*/
3835   ap1 = aa0;			/* protein sequence */
3836   aln->smin0 = a_res->min0;	/* start in protein sequence */
3837   aln->smin1 = a_res->min1;	/* start in codon sequence */
3838 
3839   i1_offset = aln->q_offset;
3840   i0_offset = aln->l_offset;
3841 #endif
3842 
3843   rp = a_res->res;			/* start of alignment info */
3844   rpmax = &a_res->res[a_res->nres];		/* end of alignment info */
3845 
3846 /* now get the middle */
3847 
3848   lenc = not_c = aln->nident = aln->nmismatch = aln->nsim = aln->npos = ngap_d = ngap_p = nfs = 0;
3849   i0 = a_res->min1-3;	/* start of codon sequence */
3850   i1 = a_res->min0;	/* start of protein sequence */
3851 
3852   v_delta = 0;
3853   i1_annot = 0;
3854   s_annot1_arr_p = NULL;
3855   if (annot1_p && annot1_p->n_annot > 0) s_annot1_arr_p = annot1_p->s_annot_arr_p;
3856 
3857   while (rp < rpmax ) {
3858     switch (*rp++) {
3859     case 3:	/* match */
3860       i0 += 3;
3861       sp1 = ppst->sq[aap=ap1[i1]];
3862       sp0 = f_str->weight_c[aap][ap0[i0]].c5;
3863 
3864       itmp = ppst->pam2[0][pascii[sp0]][aap];
3865 
3866       if (s_annot1_arr_p && i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
3867 	i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0), i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
3868 			      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
3869 			      NULL, NULL, &v_delta,NULL, NULL, 0);
3870 
3871 	if (ppst->sq[aap] != sp1) {
3872 	  sprintf(tmp_str,"%c%d%c;",ppst->sq[aap],i1+1,sp1);
3873 	  /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
3874 	  dyn_strcat(annot_var_dyn, tmp_str);
3875 	}
3876       }
3877 
3878       align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
3879 
3880       i1++;
3881       lenc++;
3882       break;
3883     case 2:
3884       nfs++;
3885       i0 += 2;
3886       not_c++;
3887       sp1 = ppst->sq[aap=ap1[i1]];
3888       sp0 = f_str->weight_c[aap][ap0[i0]].c2;
3889 
3890       itmp = ppst->pam2[0][aap][pascii[sp0]];
3891 
3892       if (s_annot1_arr_p && i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
3893 	i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0), i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
3894 			      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
3895 			      NULL, NULL, &v_delta,NULL, NULL, 0);
3896 
3897 	if (ppst->sq[aap] != sp1) {
3898 	  sprintf(tmp_str,"%c%d%c;",ppst->sq[aap],i1+1,sp1);
3899 	  /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
3900 	  dyn_strcat(annot_var_dyn, tmp_str);
3901 	}
3902       }
3903 
3904       align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
3905 
3906       i1++;
3907       lenc++;
3908       break;
3909     case 4:
3910       nfs++;
3911       i0 += 4;
3912       not_c++;
3913       sp1 = ppst->sq[aap=ap1[i1]];
3914       sp0 = f_str->weight_c[aap][ap0[i0]].c4;
3915       itmp = ppst->pam2[0][pascii[sp0]][aap];
3916 
3917       if (s_annot1_arr_p && i1+i1_offset == s_annot1_arr_p[i1_annot]->pos) {
3918 	i1_annot = next_annot_match(&itmp, ppst->pam2[0][pascii[sp0]], i1_offset+seq_pos(i1,aln->llrev,0), i0_offset+seq_pos(i0,aln->qlrev,0), &sp1, NULL, sq,
3919 			      i1_annot, annot1_p->n_annot, s_annot1_arr_p, NULL,
3920 			      NULL, NULL, &v_delta,NULL, NULL, 0);
3921 
3922 	if (ppst->sq[aap] != sp1) {
3923 	  sprintf(tmp_str,"%c%d%c;",ppst->sq[aap],i1+1,sp1);
3924 	  /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
3925 	  dyn_strcat(annot_var_dyn, tmp_str);
3926 	}
3927       }
3928 
3929       align_type(itmp, sp0, sp1, 0, aln, ppst->pam_x_id_sim);
3930 
3931       i1++;
3932       lenc++;
3933       break;
3934     case 5:
3935       i0 += 3;
3936       lenc++;
3937       ngap_p++;
3938       break;
3939     case 0:
3940       i1++;
3941       lenc++;
3942       ngap_d++;
3943       break;
3944     }
3945   }
3946 
3947 #ifndef TFAST
3948   aln->amax0 = i0+3;	/* end of codon sequence */
3949   aln->amax1 = i1;	/* end of protein sequence */
3950   aln->ngap_q = ngap_d;
3951   aln->ngap_l = ngap_p;
3952 #else
3953   aln->amax1 = i0+3;	/* end of codon sequence */
3954   aln->amax0 = i1;	/* end of protein sequence */
3955   aln->ngap_q = ngap_p;
3956   aln->ngap_l = ngap_d;
3957 #endif
3958   aln->calc_last_set = 1;
3959   aln->nfs = nfs;
3960   aln->amin0 = aln->smin0;
3961   aln->amin1 = aln->smin1;
3962 
3963   if (lenc < 0) lenc = 1;
3964 
3965 /*	now we have the middle, get the right end */
3966 
3967   return lenc;
3968 }
3969