1 /* $Id: dropff2.c 989 2012-07-24 19:37:38Z wrp $ */
2 
3 /* copyright (c) 1998, 1999, 2014 by William R. Pearson and The Rector &
4    Vistors of the University of Virginia */
5 
6 /* Licensed under the Apache License, Version 2.0 (the "License");
7    you may not use this file except in compliance with the License.
8    You may obtain a copy of the License at
9 
10    http://www.apache.org/licenses/LICENSE-2.0
11 
12    Unless required by applicable law or agreed to in writing,
13    software distributed under this License is distributed on an "AS
14    IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
15    express or implied.  See the License for the specific language
16    governing permissions and limitations under the License.
17 */
18 
19 /* this code implements the "fastf" algorithm, which is designed to
20    deconvolve mixtures of protein sequences derived from mixed-peptide
21    Edman sequencing.  The expected input is:
22 
23    >test | 40001 90043 | mgstm1
24    MGCEN,
25    MIDYP,
26    MLLAY,
27    MLLGY
28 
29    Where the ','s indicate the length/end of the sequencing cycle
30    data.  Thus, in this example, the sequence is from a mixture of 4
31    peptides, M was found in the first position, G,I, and L(2) at the second,
32    C,D, L(2) at the third, etc.
33 
34    Because the sequences are derived from mixtures, there need not be
35    any partial sequence "MGCEN", the actual deconvolved sequence might be
36    "MLDGN".
37 */
38 
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <math.h>
44 #include <ctype.h>
45 
46 #include "defs.h"
47 #include "param.h"
48 #include "structs.h"
49 #include "tatstats.h"
50 
51 #define EOSEQ 0
52 #define ESS 59
53 #define MAXHASH 32
54 #define NMAP MAXHASH+1
55 #define NMAP_X 23	/* re-code NMAP for 'X' */
56 #define NMAP_Z 24	/* re-code NMAP for '*' */
57 
58 #ifndef MAXSAV
59 #define MAXSAV 10
60 #endif
61 
62 #define DROP_INTERN
63 #include "drop_func.h"
64 
65 static char *verstr="4.21 May 2006 (ajm/wrp)";
66 
67 int shscore(unsigned char *aa0, const int n0, int **pam2, int nsq);
68 
69 #ifdef TFAST
70 extern int aatran(const unsigned char *ntseq, unsigned char *aaseq,
71 		  const int maxs, const int frame);
72 #endif
73 
74 struct hlstr { int next, pos;};
75 
76 void savemax(struct dstruct *, struct f_struct *);
77 
78 static int m0_spam(unsigned char *, const unsigned char *, int, struct savestr *,
79 	    int **, struct f_struct *);
80 static int m1_spam(unsigned char *, int,
81 		   const unsigned char *, int,
82 		   struct savestr *, int **, int, struct f_struct *);
83 
84 int sconn(struct savestr **v, int nsave, int cgap,
85 	  struct f_struct *, struct rstruct *, const struct pstruct *,
86 	  const unsigned char *aa0, int n0,
87 	  const unsigned char *aa1, int n1,
88 	  int opt_prob);
89 
90 void kpsort(struct savestr **, int);
91 void kssort(struct savestr **, int);
92 void kpsort(struct savestr **, int);
93 
94 int
95 sconn_a(unsigned char *, int, int, struct f_struct *,
96 	struct a_res_str *);
97 
98 /* initialize for fasta */
99 
100 void
init_work(unsigned char * aa0,int n0,struct pstruct * ppst,struct f_struct ** f_arg)101 init_work (unsigned char *aa0, int n0,
102 	   struct pstruct *ppst,
103 	   struct f_struct **f_arg)
104 {
105    int mhv, phv;
106    int hmax;
107    int i0, ii0, hv;
108    struct f_struct *f_str;
109 
110    int maxn0;
111    int i, j, q;
112    struct savestr *vmptr;
113    int *res;
114    int nsq;
115 
116    nsq = ppst->nsqx;
117 
118    f_str = (struct f_struct *) calloc(1, sizeof(struct f_struct));
119    if(f_str == NULL) {
120      fprintf(stderr, "Couldn't calloc f_str\n");
121      exit(1);
122    }
123 
124    ppst->sw_flag = 0;
125 
126    /* fastf3 cannot work with lowercase symbols as low complexity;
127       thus, NMAP must be disabled; this depends on aascii['X']  */
128    if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}
129    if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}
130 
131    /*   this does not work for share ppst structs, as in threads */
132    /*else {fprintf(stderr," cannot find 'X'==NMAP\n");} */
133 
134    for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
135       if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv) mhv = ppst->hsq[i0];
136 
137    if (mhv <= 0) {
138       fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
139       exit (1);
140    }
141 
142    for (f_str->kshft = 0; mhv > 0; mhv /= 2)
143       f_str->kshft++;
144 
145 /*      kshft = 2;	*/
146    hmax = hv = (1 << f_str->kshft);
147    f_str->hmask = (hmax >> f_str->kshft) - 1;
148 
149    if ((f_str->aa0 = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
150      fprintf (stderr, " cannot allocate f_str->aa0 array; %d\n",n0+1);
151      exit (1);
152    }
153    for (i=0; i<n0; i++) f_str->aa0[i] = aa0[i];
154    aa0 = f_str->aa0;
155 
156    if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
157      fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);
158      exit (1);
159    }
160    f_str->aa0ix = 0;
161 
162    if ((f_str->harr = (struct hlstr *) calloc (hmax, sizeof (struct hlstr))) == NULL) {
163      fprintf (stderr, " cannot allocate hash array; hmax: %d hmask: %d\n",
164 	      hmax,f_str->hmask);
165      exit (1);
166    }
167    if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) {
168      fprintf (stderr, " cannot allocate pamh1 array\n");
169      exit (1);
170    }
171    if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
172      fprintf (stderr, " cannot allocate pamh2 array\n");
173      exit (1);
174    }
175    if ((f_str->link = (struct hlstr *) calloc (n0, sizeof (struct hlstr))) == NULL) {
176      fprintf (stderr, " cannot allocate hash link array");
177      exit (1);
178    }
179 
180    for (i0 = 0; i0 < hmax; i0++) {
181       f_str->harr[i0].next = -1;
182       f_str->harr[i0].pos = -1;
183    }
184 
185    for (i0 = 0; i0 < n0; i0++) {
186       f_str->link[i0].next = -1;
187       f_str->link[i0].pos = -1;
188    }
189 
190    /* encode the aa0 array */
191    /*
192      this code has been modified to allow for mixed peptide sequences
193       aa0[] = 5 8 9 3 4 NULL 5 12 3 7 2 NULL
194       the 'NULL' character resets the hash position counter, to indicate that
195       any of several residues can be in the same position.
196       We also need to keep track of the number of times this has happened, so that
197       we can redivide the sequence later
198 
199       i0 counts through the sequence
200       ii0 counts through the hashed sequence
201 
202       */
203 
204    f_str->nm0 = 1;
205    f_str->nmoff = -1;
206    phv = hv = 0;
207    for (i0= ii0 = 0; i0 < n0; i0++, ii0++) {
208      /* reset the counter and start hashing again */
209      if (aa0[i0] == ESS || aa0[i0] == 0) {
210        aa0[i0] = 0;	/* set ESS to 0 */
211        /*       fprintf(stderr," converted ',' to 0\n");*/
212        i0++;	/* skip over the blank */
213        f_str->nm0++;
214        if (f_str->nmoff < 0) f_str->nmoff = i0;
215        phv = hv = 0;
216        ii0 = 0;
217      }
218      hv = ppst->hsq[aa0[i0]];
219      f_str->link[i0].next = f_str->harr[hv].next;
220      f_str->link[i0].pos = f_str->harr[hv].pos;
221      f_str->harr[hv].next = i0;
222      f_str->harr[hv].pos = ii0;
223      f_str->pamh2[hv] = ppst->pam2[0][aa0[i0]][aa0[i0]];
224    }
225    if (f_str-> nmoff < 0) f_str->nmoff = n0;
226 
227 
228 #ifdef DEBUG
229    /*
230    fprintf(stderr," nmoff: %d/%d nm0: %d\n", f_str->nmoff, n0,f_str->nm0);
231    */
232 #endif
233 
234 /*
235 #ifdef DEBUG
236    fprintf(stderr," hmax: %d\n",hmax);
237    for ( hv=0; hv<hmax; hv++)
238        fprintf(stderr,"%2d %c %3d %3d\n",hv,
239 	       (hv > 0 && hv < ppst->nsq ) ? ppst->sq[ppst->hsq[hv]] : ' ',
240 	       f_str->harr[hv].pos,f_str->harr[hv].next);
241    fprintf(stderr,"----\n");
242    for ( hv=0; hv<n0; hv++)
243        fprintf(stderr,"%2d: %3d %3d\n",hv,
244 	       f_str->link[hv].pos,f_str->link[hv].next);
245 #endif
246 */
247 
248    f_str->maxsav = MAXSAV;
249    if ((f_str->vmax = (struct savestr *)
250 	calloc(MAXSAV,sizeof(struct savestr)))==NULL) {
251      fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav);
252      exit(1);
253    }
254 
255    if ((f_str->vptr = (struct savestr **)
256 	calloc(MAXSAV,sizeof(struct savestr *)))==NULL) {
257      fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav);
258      exit(1);
259    }
260 
261    for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
262      vmptr->used = (int *) calloc(n0, sizeof(int));
263      if(vmptr->used == NULL) {
264        fprintf(stderr, "Couldn't alloc vmptr->used\n");
265        exit(1);
266      }
267    }
268 
269 /* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
270    pam2[0][0] is now undefined for consistency with blast
271 */
272 
273    for (i0 = 1; i0 <= ppst->nsq; i0++)
274      f_str->pamh1[i0] = ppst->pam2[0][i0][i0];
275 
276    ppst->param_u.fa.cgap = shscore(aa0,f_str->nmoff-1,ppst->pam2[0],ppst->nsq)/3;
277    if (ppst->param_u.fa.cgap > ppst->param_u.fa.bestmax/4)
278      ppst->param_u.fa.cgap = ppst->param_u.fa.bestmax/4;
279 
280    f_str->ndo = 0;
281    f_str->noff = n0-1;
282    if (f_str->diag==NULL)
283      f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
284 					      sizeof (struct dstruct));
285 
286    if (f_str->diag == NULL)
287    {
288       fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",
289 	      (long) MAXDIAG * (long) (sizeof (struct dstruct)));
290       exit (1);
291    }
292 
293 #ifdef TFAST
294    if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
295 					     sizeof(unsigned char)))
296        == NULL) {
297      fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
298      exit (1);
299    }
300    f_str->aa1x++;
301 #endif
302 
303    /* allocate space for the scoring arrays */
304    maxn0 = n0 + 4;
305 
306    maxn0 = max(3*n0/2,MIN_RES);
307    if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
308      fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
309      exit(1);
310    }
311    f_str->res = res;
312    f_str->max_res = maxn0;
313 
314    /* Tatusov Statistics Setup */
315 
316    /* initialize priors array. */
317    if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {
318      fprintf(stderr, "Couldn't allocate priors array.\n");
319      exit(1);
320    }
321    calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);
322 
323    f_str->dotat = 0;
324    f_str->shuff_cnt = ppst->shuff_node;
325 
326    /* End of Tatusov Statistics Setup */
327 
328    *f_arg = f_str;
329 }
330 
331 
332 /* pstring1 is a message to the manager, currently 512 */
333 /* pstring2 is the same information, but in a markx==10 format */
334 void
get_param(const struct pstruct * ppstr,char ** pstring1,char * pstring2,struct score_count_s * s_info)335 get_param (const struct pstruct *ppstr,
336 	   char **pstring1, char *pstring2,
337 	   struct score_count_s *s_info)
338 {
339 #ifndef TFAST
340   char *pg_str="FASTF";
341 #else
342   char *pg_str="TFASTF";
343 #endif
344 
345   sprintf (pstring1[0], "%s (%s)",pg_str,verstr);
346   sprintf (pstring1[1], "%s matrix (%d:%d), join: %d",
347 	   ppstr->pam_name, ppstr->pam_h,ppstr->pam_l,ppstr->param_u.fa.cgap);
348 
349   if (ppstr->param_u.fa.iniflag) strcat(pstring1[0]," init1");
350 
351   if (pstring2 != NULL) {
352     sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)\n\
353 ; pg_join: %d\n",
354 	     pg_str,verstr, ppstr->pam_name, ppstr->pam_h,ppstr->pam_l,
355 	     ppstr->param_u.fa.cgap);
356    }
357 }
358 
359 void
close_work(const unsigned char * aa0,const int n0,struct pstruct * ppst,struct f_struct ** f_arg)360 close_work (const unsigned char *aa0, const int n0,
361 	    struct pstruct *ppst,
362 	    struct f_struct **f_arg)
363 {
364   struct f_struct *f_str;
365   struct savestr *vmptr;
366 
367   f_str = *f_arg;
368 
369   if (f_str != NULL) {
370 
371     for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
372       free(vmptr->used);
373 
374     free(f_str->res);
375 #ifdef TFAST
376     free(f_str->aa1x - 1); /* allocated, then aa1x++'ed */
377 #endif
378     free(f_str->diag);
379     free(f_str->link);
380     free(f_str->pamh2);
381     free(f_str->pamh1);
382     free(f_str->harr);
383     free(f_str->aa0t);
384     free(f_str->aa0);
385     free(f_str->priors);
386     free(f_str->vmax);
387     free(f_str->vptr);
388     free(f_str);
389     *f_arg = NULL;
390   }
391 }
392 
do_fastf(unsigned char * aa0,int n0,const unsigned char * aa1,int n1,const struct pstruct * ppst,struct f_struct * f_str,struct rstruct * rst,int * hoff,int opt_prob)393 int do_fastf (unsigned char *aa0, int n0,
394 	      const unsigned char *aa1, int n1,
395 	      const struct pstruct *ppst, struct f_struct *f_str,
396 	      struct rstruct *rst, int *hoff, int opt_prob)
397 {
398    int     nd;		/* diagonal array size */
399    int     lhval;
400    int     kfact;
401    register struct dstruct *dptr;
402    register int tscor;
403    register struct dstruct *diagp;
404    struct dstruct *dpmax;
405    register int lpos;
406    int     tpos, npos;
407    struct savestr *vmptr;
408    int     scor, tmp;
409    int     im, ib, nsave;
410    int     cmps ();		/* comparison routine for ksort */
411    const int *hsq;
412 
413    hsq = ppst->hsq;
414 
415    if (n1 < 1) {
416      rst->score[0] = rst->score[1] = rst->score[2] = 0;
417      rst->escore = 1.0;
418      rst->segnum = 0;
419      rst->seglen = 0;
420      return 1;
421    }
422 
423    if (n0+n1+1 >= MAXDIAG) {
424      fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
425      rst->score[0] = rst->score[1] = rst->score[2] = -1;
426      rst->escore = 2.0;
427      rst->segnum = 0;
428      rst->seglen = 0;
429      return -1;
430    }
431 
432    nd = n0 + n1;
433 
434    dpmax = &f_str->diag[nd];
435    for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;) {
436       dptr->stop = -1;
437       dptr->dmax = NULL;
438       dptr++->score = 0;
439    }
440 
441    /* initialize the saved segment structures */
442    for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
443       vmptr->score = 0;
444       memset(vmptr->used, 0, n0 * sizeof(int));
445    }
446 
447    f_str->lowmax = f_str->vmax;
448    f_str->lowscor = 0;
449 
450    /* start hashing */
451 
452    diagp = &f_str->diag[f_str->noff];
453    for (lhval = lpos = 0; lpos < n1; lpos++, diagp++) {
454      if (hsq[aa1[lpos]]>=NMAP) {
455        lpos++ ; diagp++;
456        while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
457        if (lpos >= n1) break;
458        lhval = 0;
459      }
460      lhval = hsq[aa1[lpos]];
461      for (tpos = f_str->harr[lhval].pos, npos = f_str->harr[lhval].next;
462 	  tpos >= 0; tpos = f_str->link[npos].pos, npos = f_str->link[npos].next) {
463        /* tscor gets position of end of current lpos diag run */
464        if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
465 	 tscor++;		 /* move forward one */
466 	 if ((tscor -= lpos) <= 0) { /* check for size of gap to this hit - */
467 	                             /* includes implicit -1 mismatch penalty */
468 	   scor = dptr->score;	     /* current score of this run */
469 	   if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 &&
470 	       f_str->lowscor < scor)	/* if updating tscor makes run worse, */
471 	     savemax (dptr, f_str);     /* save it */
472 
473 	   if ((tscor += scor) >= kfact) {  /* add to current run if continuing */
474 	     				    /* is better than restart (kfact) */
475 	       dptr->score = tscor;
476 	       dptr->stop = lpos;
477 	     }
478 	     else {
479 	       dptr->score = kfact;	/* starting over is better */
480 	       dptr->start = (dptr->stop = lpos);
481 	     }
482 	 }
483 	 else {				/* continue current run */
484 	   dptr->score += f_str->pamh1[aa0[tpos]];
485 	   dptr->stop = lpos;
486 	 }
487        }
488        else {				/* no diagonal run yet */
489 	 dptr->score = f_str->pamh2[lhval];
490 	 dptr->start = (dptr->stop = lpos);
491        }
492      }				/* end tpos */
493    }				/* end lpos */
494 
495    for (dptr = f_str->diag; dptr < dpmax;) {
496      if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
497      dptr->stop = -1;
498      dptr->dmax = NULL;
499      dptr++->score = 0;
500    }
501    f_str->ndo = nd;
502 
503 /*
504         at this point all of the elements of aa1[lpos]
505         have been searched for elements of aa0[tpos]
506         with the results in diag[dpos]
507 */
508 
509    /* set up pointers for sorting */
510 
511    for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
512      if (vmptr->score > 0) {
513        vmptr->score = m0_spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str);
514        f_str->vptr[nsave++] = vmptr;
515      }
516    }
517 
518    /* sort them */
519    kssort (f_str->vptr, nsave);
520 
521 
522 #ifdef DEBUG
523    /*
524    for (ib=0; ib<nsave; ib++) {
525      fprintf(stderr,"0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",
526   	     f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
527   	     f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
528   	     f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
529   	     f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
530        for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
531          fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
532 		 ppst->sq[aa1[im]]);
533        fputc('\n',stderr);
534    }
535    fprintf(stderr,"---\n");
536    */
537    /* now use m_spam to re-evaluate */
538    /*
539    for (tpos = 0; tpos < n0; tpos++) {
540      fprintf(stderr,"%c:%2d ",ppst->sq[aa0[tpos]],aa0[tpos]);
541      if (tpos %10 == 9) fputc('\n',stderr);
542    }
543    fputc('\n',stderr);
544    */
545 #endif
546 
547    f_str->aa0ix = 0;
548    for (ib=0; ib < nsave; ib++) {
549      if ((vmptr=f_str->vptr[ib])->score > 0) {
550        vmptr->score = m1_spam (aa0, n0, aa1, n1, vmptr,
551 			       ppst->pam2[0], ppst->pam_l, f_str);
552      }
553    }
554    /* reset aa0 - modified by m1_spam */
555    for (tpos = 0; tpos < n0; tpos++) {
556      if (aa0[tpos] >= 32) aa0[tpos] -= 32;
557    }
558 
559    kssort(f_str->vptr,nsave);
560 
561    for ( ; nsave > 0; nsave--)
562      if (f_str->vptr[nsave-1]->score >0) break;
563 
564    if (nsave <= 0) {
565      f_str->nsave = 0;
566      rst->score[0] = rst->score[1] = rst->score[2] = 0;
567      rst->escore = 1.0;
568 
569      return 1;
570    }
571    else f_str->nsave = nsave;
572 
573 
574 #ifdef DEBUG
575    /*
576    fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff);
577    for (ib=0; ib<nsave; ib++) {
578      fprintf(stderr,"0: %4d-%4d  1: %4d-%4d  dp: %d score: %d\n",
579   	     f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
580   	     f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
581   	     f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
582   	     f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
583      for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
584        fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
585   	       ppst->sq[aa1[im]]);
586      fputc('\n',stderr);
587    }
588 
589    fprintf(stderr,"---\n");
590    */
591 #endif
592 
593    scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap, f_str,
594 		 rst, ppst, aa0, n0, aa1, n1, opt_prob);
595 
596    for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
597      if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
598 
599    rst->score[1] = vmptr->score;
600    rst->score[0] = rst->score[2] = max (scor, vmptr->score);
601 
602    return 1;
603 }
604 
do_work(const unsigned char * aa0,int n0,const unsigned char * aa1,int n1,int frame,const struct pstruct * ppst,struct f_struct * f_str,int qr_flg,int shuff_flg,struct rstruct * rst,struct score_count_s * s_info)605 void do_work (const unsigned char *aa0, int n0,
606 	      const unsigned char *aa1, int n1,
607 	      int frame,
608 	      const struct pstruct *ppst, struct f_struct *f_str,
609 	      int qr_flg, int shuff_flg, struct rstruct *rst,
610 	      struct score_count_s *s_info)
611 {
612   int opt_prob;
613   int hoff, n10, i;
614 
615   if (qr_flg==1 && f_str->shuff_cnt <= 0) {
616     rst->escore = 2.0;
617     rst->score[0]=rst->score[1]=rst->score[2]= -1;
618     rst->valid_stat = 0;
619     return;
620   }
621 
622   s_info->s_cnt[ppst->score_ix]++;
623   s_info->tot_scores++;
624 
625   rst->valid_stat = 1;
626   if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1;
627   else opt_prob = 0;
628   if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0;
629   if (qr_flg) {
630     opt_prob=1;
631     /*    if (frame==1) */
632       f_str->shuff_cnt--;
633   }
634 
635   if (n1 < 1) {
636     rst->score[0] = rst->score[1] = rst->score[2] = -1;
637     rst->escore = 2.0;
638     return;
639   }
640 
641 #ifdef TFAST
642   n10=aatran(aa1,f_str->aa1x,n1,frame);
643   if (ppst->debug_lib)
644     for (i=0; i<n10; i++)
645       if (f_str->aa1x[i]>ppst->nsq) {
646 	fprintf(stderr,
647 		"residue[%d/%d] %d range (%d)\n",i,n1,
648 		f_str->aa1x[i],ppst->nsq);
649 	f_str->aa1x[i]=0;
650 	n10=i-1;
651       }
652 
653   do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob);
654 #else	/* FASTF */
655   do_fastf (f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob);
656 #endif
657 
658   rst->comp = rst->H = -1.0;
659 
660 }
661 
do_opt(const unsigned char * aa0,int n0,const unsigned char * aa1,int n1,int frame,struct pstruct * ppst,struct f_struct * f_str,struct rstruct * rst)662 void do_opt (const unsigned char *aa0, int n0,
663 	     const unsigned char *aa1, int n1,
664 	     int frame,
665 	     struct pstruct *ppst,
666 	     struct f_struct *f_str,
667 	     struct rstruct *rst)
668 {
669   int optflag, tscore, hoff, n10;
670 
671   optflag = ppst->param_u.fa.optflag;
672   ppst->param_u.fa.optflag = 1;
673 
674 #ifdef TFAST
675   n10=aatran(aa1,f_str->aa1x,n1,frame);
676   do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1);
677 #else	/* FASTA */
678   do_fastf(f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, 1);
679 #endif
680   ppst->param_u.fa.optflag = optflag;
681 }
682 
683 void
savemax(dptr,f_str)684 savemax (dptr, f_str)
685   register struct dstruct *dptr;
686   struct f_struct *f_str;
687 {
688    register int dpos;
689    register struct savestr *vmptr;
690    register int i;
691 
692    dpos = (int) (dptr - f_str->diag);
693 
694 /* check to see if this is the continuation of a run that is already saved */
695 
696    if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
697 	 vmptr->start == dptr->start)
698    {
699       vmptr->stop = dptr->stop;
700       if ((i = dptr->score) <= vmptr->score)
701 	 return;
702       vmptr->score = i;
703       if (vmptr != f_str->lowmax)
704 	 return;
705    }
706    else
707    {
708       i = f_str->lowmax->score = dptr->score;
709       f_str->lowmax->dp = dpos;
710       f_str->lowmax->start = dptr->start;
711       f_str->lowmax->stop = dptr->stop;
712       dptr->dmax = f_str->lowmax;
713    }
714 
715    for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
716       if (vmptr->score < i)
717       {
718 	 i = vmptr->score;
719 	 f_str->lowmax = vmptr;
720       }
721    f_str->lowscor = i;
722 }
723 
724 /* this version of spam() is designed to work with a collection of
725    subfragments, selecting the best amino acid at each position so
726    that, from each subfragment, each position is only used once.
727 
728    As a result, m_spam needs to know the number of fragments.
729 
730    In addition, it now requires a global alignment to the fragment
731    and resets the start and stop positions
732 
733    */
734 
735 static int
m1_spam(unsigned char * aa0,int n0,const unsigned char * aa1,int n1,struct savestr * dmax,int ** pam2,int pam_l,struct f_struct * f_str)736 m1_spam (unsigned char *aa0, int n0,
737 	 const unsigned char *aa1, int n1,
738 	 struct savestr *dmax, int **pam2, int pam_l,
739 	 struct f_struct *f_str)
740 {
741   int     tpos, lpos, im, ii, nm, ci;
742   int     tot, ctot, pv;
743 
744   struct {
745     int     start, stop, score;
746   } curv, maxv;
747   unsigned char *aa0p;
748   const unsigned char *aa1p;
749 
750   lpos = dmax->start;                   /* position in library sequence */
751    tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
752    /* force global alignment, reset start*/
753    if (tpos < lpos) {
754      lpos = dmax->start -= tpos;
755      tpos = 0;
756    }
757    else {
758      tpos -= lpos;
759      lpos = dmax->start = 0;
760    }
761 
762    dmax->stop = dmax->start + (f_str->nmoff -2 - tpos);
763    if (dmax->stop > n1) dmax->stop = n1;
764 
765    /*
766    if (dmax->start < 0) {
767      tpos = -dmax->start;
768      lpos = dmax->start=0;
769    }
770    else tpos = 0;
771    */
772 
773    aa1p = &aa1[lpos];
774    aa0p = &aa0[tpos];
775 
776    nm = f_str->nm0;
777 
778    tot = curv.score = maxv.score = 0;
779    for (; lpos <= dmax->stop; lpos++,aa0p++,aa1p++) {
780      ctot = pam_l;
781      ci = -1;
782      for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
783        if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
784 	 ctot = pv;
785 	 ci = ii;
786 /*  	 fprintf(stderr, "lpos: %d im: %d ii: %d ci: %d ctot: %d pi: %d pv: %d\n", lpos, im, ii, ci, ctot, aa0p[ii], pam2[aa0p[ii]][*aa1p]); */
787        }
788      }
789      tot += ctot;
790      if (ci >= 0 && aa0p[ci] < 32) {
791 #ifdef DEBUG
792 /*         fprintf(stderr, "used: lpos: %d ci: %d : %c\n", lpos, ci, sq[aa0p[ci]]); */
793 #endif
794        aa0p[ci] +=  32;
795        dmax->used[&aa0p[ci] - aa0] = 1;
796      }
797    }
798    return tot;
799 }
800 
ma_spam(unsigned char * aa0,int n0,const unsigned char * aa1,struct savestr * dmax,struct pstruct * ppst,struct f_struct * f_str)801 int ma_spam (unsigned char *aa0, int n0, const unsigned char *aa1,
802 	     struct savestr *dmax, struct pstruct *ppst,
803 	     struct f_struct *f_str)
804 {
805   int **pam2;
806   int     tpos, lpos, im, ii, nm, ci, lp0;
807   int     tot, ctot, pv;
808   struct {
809     int     start, stop, score;
810   } curv, maxv;
811    const unsigned char *aa1p;
812    unsigned char *aa0p, *aa0pt;
813    int aa0t_flg;
814 
815    pam2 = ppst->pam2[0];
816    aa0t_flg = 0;
817 
818    lpos = dmax->start;			/* position in library sequence */
819    tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
820    lp0 = lpos = dmax->start;
821    aa1p = &aa1[lpos];
822    aa0p = &aa0[tpos];			/* real aa0 sequence */
823 
824 			/* the destination aa0 sequence (without nulls) */
825    aa0pt = &f_str->aa0t[f_str->aa0ix];
826 
827    curv.start = lpos;
828    nm = f_str->nm0;
829 
830    /* sometimes, tpos may be > 0, with lpos = 0 - fill with 'X' */
831    if (lpos == 0 && tpos > 0)
832      for (ii = 0; ii < tpos; ii++) *aa0pt++ = 31;  /* filler character */
833 
834    tot = curv.score = maxv.score = 0;
835    for (; lpos <= dmax->stop; lpos++) {
836      ctot = ppst->pam_l;
837      ci = -1;
838      for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
839        if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
840 	 ctot = pv;
841 	 ci = ii;
842        }
843      }
844      tot += ctot;
845      if (ci >= 0) {
846        if (ci >= n0) {fprintf(stderr," warning - ci off end %d/%d\n",ci,n0);}
847        else {
848 	 *aa0pt++ = aa0p[ci];
849 	 aa0p[ci] +=  32;
850 	 aa0t_flg=1;
851        }
852      }
853      aa0p++; aa1p++;
854    }
855 
856    if (aa0t_flg) {
857      dmax->dp -= f_str->aa0ix;		/* shift ->dp for aa0t */
858      if ((ci=(int)(aa0pt-f_str->aa0t)) > n0) {
859        fprintf(stderr," warning - aapt off %d/%d end\n",ci,n0);
860      }
861      else
862        *aa0pt++ = 0;			/* skip over NULL */
863 
864      aa0pt = &f_str->aa0t[f_str->aa0ix];
865      aa1p = &aa1[lp0];
866 
867      /*
868      for (im = 0; im < f_str->nmoff; im++)
869        fprintf(stderr,"%c:%c,",ppst->sq[aa0pt[im]],ppst->sq[aa1p[im]]);
870      fprintf(stderr,"- %3d (%3d:%3d)\n",dmax->score,f_str->aa0ix,lp0);
871      */
872 
873      f_str->aa0ix += f_str->nmoff;	/* update offset into aa0t */
874    }
875    /*
876       fprintf(stderr," ma_spam returning: %d\n",tot);
877    */
878    return tot;
879 }
880 
881 static int
m0_spam(unsigned char * aa0,const unsigned char * aa1,int n1,struct savestr * dmax,int ** pam2,struct f_struct * f_str)882 m0_spam (unsigned char *aa0, const unsigned char *aa1, int n1,
883 	 struct savestr *dmax, int **pam2,
884 	 struct f_struct *f_str)
885 {
886    int tpos, lpos, lend, im, ii, nm;
887    int     tot, ctot, pv;
888    struct {
889      int     start, stop, score;
890    } curv, maxv;
891    const unsigned char *aa0p, *aa1p;
892 
893    lpos = dmax->start;			/* position in library sequence */
894    tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
895    if (tpos > 0) {
896      if (lpos-tpos >= 0) {
897        lpos = dmax->start -= tpos;    /* force global alignment, reset start*/
898        tpos = 0;
899      }
900      else {
901        tpos -= lpos;
902        lpos = dmax->start = 0;
903      }
904    }
905 
906    nm = f_str->nm0;
907    lend = dmax->stop;
908    if (n1 - (lpos + f_str->nmoff-2) < 0 ) {
909      lend = dmax->stop = (lpos - tpos) + f_str->nmoff-2;
910      if (lend >= n1) lend = n1-1;
911    }
912 
913    aa1p = &aa1[lpos];
914    aa0p = &aa0[tpos];
915 
916    curv.start = lpos;
917 
918    tot = curv.score = maxv.score = 0;
919    for (; lpos <= lend; lpos++) {
920      ctot = -10000;
921      for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
922        if ((pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
923 	 ctot = pv;
924        }
925      }
926      tot += ctot;
927      aa0p++; aa1p++;
928    }
929 
930    /* reset dmax if necessary */
931 
932    return tot;
933 }
934 
935 /* sconn links up non-overlapping alignments and calculates the score */
936 
sconn(struct savestr ** v,int n,int cgap,struct f_struct * f_str,struct rstruct * rst,const struct pstruct * ppst,const unsigned char * aa0,int n0,const unsigned char * aa1,int n1,int opt_prob)937 int sconn (struct savestr **v, int n, int cgap, struct f_struct *f_str,
938 	   struct rstruct *rst, const struct pstruct *ppst,
939 	   const unsigned char *aa0, int n0,
940 	   const unsigned char *aa1, int n1,
941 	   int opt_prob)
942 {
943    int     i, si, cmpp ();
944    struct slink *start, *sl, *sj, *so, sarr[MAXSAV];
945    int     lstart, plstop;
946    double tatprob;
947 
948    /* sarr[] saves each alignment score/position, and provides a link
949       back to the previous alignment that maximizes the score */
950 
951    /*	sort the score left to right in lib pos */
952    kpsort (v, n);
953 
954    start = NULL;
955 
956    /* for the remaining runs, see if they fit */
957    for (i = 0, si = 0; i < n; i++) {
958 
959      /* if the score is less than the gap penalty, it never helps */
960      if (!opt_prob && (v[i]->score < cgap) ){ continue; }
961 
962      lstart = v[i]->start;
963 
964      /* put the run in the group */
965      sarr[si].vp = v[i];
966      sarr[si].score = v[i]->score;
967      sarr[si].next = NULL;
968      sarr[si].prev = NULL;
969      sarr[si].tat = NULL;
970 
971      if(opt_prob) {
972        sarr[si].tatprob =
973 	 calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
974 		      ppst->pam2[0],ppst->nsq, f_str,
975 		      ppst->pseudocts, opt_prob,ppst->zsflag);
976        sarr[si].tat = sarr[si].newtat;
977      }
978 
979      /* if it fits, then increase the score */
980      for (sl = start; sl != NULL; sl = sl->next) {
981        plstop = sl->vp->stop;
982        /* if end < start or start > end, add score */
983        if (plstop < lstart ) {
984 	 if(!opt_prob) {
985 	   sarr[si].score = sl->score + v[i]->score;
986 	   sarr[si].prev = sl;
987 	   /*
988 	     fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
989 	     i,v[i]->start, v[i]->score,sarr[si].score,si, 2.0);
990 	   */
991 	   break;
992 	 } else {
993 	   tatprob =
994 	     calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
995 			  ppst->pam2[0], ppst->nsq, f_str,
996 			  ppst->pseudocts, opt_prob, ppst->zsflag);
997 	   /* if our tatprob gets worse when we add this, forget it */
998 	   if(tatprob > sarr[si].tatprob) {
999 	     free(sarr[si].newtat->probs); /* get rid of new tat struct */
1000 	     free(sarr[si].newtat);
1001 	     continue;
1002 	   } else {
1003 	     sarr[si].tatprob = tatprob;
1004 	     free(sarr[si].tat->probs); /* get rid of old tat struct */
1005 	     free(sarr[si].tat);
1006 	     sarr[si].tat = sarr[si].newtat;
1007 	     sarr[si].prev = sl;
1008 	     sarr[si].score = sl->score + v[i]->score;
1009 	     /*
1010 	       fprintf(stderr,"sconn TAT %d added %d/%d getting %d; si: %d, tat: %g\n",
1011 	       i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
1012 	     */
1013 	     break;
1014 	   }
1015 	 }
1016        }
1017      }
1018 
1019      /*	now recalculate where the score fits - resort the scores */
1020      if (start == NULL) {
1021        start = &sarr[si];
1022      } else {
1023        if(!opt_prob) { /* sort by scores */
1024 	 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
1025 	   if (sarr[si].score > sj->score) { /* if new score > best score */
1026 	     sarr[si].next = sj;	     /* previous best linked to best */
1027 	     if (so != NULL)
1028 	       so->next = &sarr[si];	     /* old best points to new best */
1029 	     else
1030 	       start = &sarr[si];
1031 	     break;
1032 	   }
1033 	   so = sj;			     /* old-best saved in so */
1034 	 }
1035        } else { /* sort by tatprobs */
1036 	 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
1037 	   if ( sarr[si].tatprob < sj->tatprob ||
1038 		((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
1039 	     sarr[si].next = sj;
1040 	     if (so != NULL)
1041 	       so->next = &sarr[si];
1042 	     else
1043 	       start = &sarr[si];
1044 	     break;
1045 	   }
1046 	   so = sj;
1047 	 }
1048        }
1049      }
1050      si++;
1051    }
1052 
1053    if(opt_prob) {
1054      for (i = 0 ; i < si ; i++) {
1055        free(sarr[i].tat->probs);
1056        free(sarr[i].tat);
1057      }
1058    }
1059 
1060    if (start != NULL) {
1061 
1062      if(opt_prob)
1063        rst->escore = start->tatprob;
1064      else
1065        rst->escore = 2.0;
1066 
1067      rst->segnum = rst->seglen = 0;
1068      for(sj = start ; sj != NULL; sj = sj->prev) {
1069        rst->segnum++;
1070        rst->seglen += sj->vp->stop - sj->vp->start + 1;
1071      }
1072      return (start->score);
1073    } else {
1074 
1075      if(opt_prob)
1076        rst->escore = 1.0;
1077      else
1078        rst->escore = 2.0;
1079 
1080      rst->segnum = rst->seglen = 0;
1081      return (0);
1082    }
1083 }
1084 
1085 void
kssort(struct savestr ** v,int n)1086 kssort (struct savestr **v, int n)
1087 {
1088    int     gap, i, j;
1089    struct savestr *tmp;
1090 
1091    for (gap = n / 2; gap > 0; gap /= 2)
1092       for (i = gap; i < n; i++)
1093 	 for (j = i - gap; j >= 0; j -= gap)
1094 	 {
1095 	    if (v[j]->score >= v[j + gap]->score)
1096 	       break;
1097 	    tmp = v[j];
1098 	    v[j] = v[j + gap];
1099 	    v[j + gap] = tmp;
1100 	 }
1101 }
1102 void
kpsort(v,n)1103 kpsort (v, n)
1104 struct savestr *v[];
1105 int     n;
1106 {
1107    int     gap, i, j;
1108    struct savestr *tmp;
1109 
1110    for (gap = n / 2; gap > 0; gap /= 2)
1111       for (i = gap; i < n; i++)
1112 	 for (j = i - gap; j >= 0; j -= gap)
1113 	 {
1114 	    if (v[j]->start <= v[j + gap]->start)
1115 	       break;
1116 	    tmp = v[j];
1117 	    v[j] = v[j + gap];
1118 	    v[j + gap] = tmp;
1119 	 }
1120 }
1121 
1122 /* sorts alignments from right to left (back to front) based on stop */
1123 
1124 void
krsort(v,n)1125 krsort (v, n)
1126 struct savestr *v[];
1127 int     n;
1128 {
1129    int     gap, i, j;
1130    struct savestr *tmp;
1131 
1132    for (gap = n / 2; gap > 0; gap /= 2)
1133       for (i = gap; i < n; i++)
1134 	 for (j = i - gap; j >= 0; j -= gap)
1135 	 {
1136 	    if (v[j]->stop > v[j + gap]->stop)
1137 	       break;
1138 	    tmp = v[j];
1139 	    v[j] = v[j + gap];
1140 	    v[j + gap] = tmp;
1141 	 }
1142 }
1143 
1144 struct a_res_str *
do_walign(const unsigned char * aa0,int n0,const unsigned char * aa1,int n1,int frame,int repeat_thresh,struct pstruct * ppst,struct f_struct * f_str,int * have_ares)1145 do_walign (const unsigned char *aa0, int n0,
1146 	   const unsigned char *aa1, int n1,
1147 	   int frame, int repeat_thresh,
1148 	   struct pstruct *ppst,
1149 	   struct f_struct *f_str,
1150 	   int *have_ares)
1151 {
1152   struct a_res_str *a_res;
1153   int hoff, n10;
1154   int ib;
1155   unsigned char *aa0t;
1156   const unsigned char *aa1p;
1157 
1158   *have_ares = 0x2;	/* set 0x2 bit to indicate local copy */
1159 
1160   if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
1161     fprintf(stderr," [do_walign] Cannot allocate a_res");
1162     return NULL;
1163   }
1164 
1165 #ifdef TFAST
1166   f_str->n10 = n10 = aatran(aa1,f_str->aa1x,n1,frame);
1167   aa1p = f_str->aa1x;
1168 #else
1169   n10 = n1;
1170   aa1p = aa1;
1171 #endif
1172 
1173   do_fastf(f_str->aa0, n0, aa1p, n10, ppst, f_str, &a_res->rst, &hoff, 1);
1174 
1175   /* the alignment portion takes advantage of the information left
1176      over in f_str after do_fastf is done.  in particular, it is
1177      easy to run a modified sconn() to produce the alignments.
1178 
1179      unfortunately, the alignment display routine wants to have
1180      things encoded as with bd_align and sw_align, so we need to do that.
1181      */
1182 
1183   if ((aa0t = (unsigned char *)calloc(n0+1,sizeof(unsigned char)))==NULL) {
1184     fprintf(stderr," cannot allocate aa0t %d\n",n0+1);
1185     exit(1);
1186   }
1187 
1188    kssort (f_str->vptr, f_str->nsave);
1189    f_str->aa0ix = 0;
1190    if (f_str->nsave > f_str->nm0) f_str->nsave = f_str->nm0;
1191    for (ib=0; ib < f_str->nm0; ib++) {
1192      if (f_str->vptr[ib]->score > 0) {
1193        f_str->vptr[ib]->score =
1194 	 ma_spam (f_str->aa0, n0, aa1p, f_str->vptr[ib], ppst, f_str);
1195      }
1196    }
1197 
1198    /* after ma_spam is over, we need to reset aa0 */
1199    for (ib = 0; ib < n0; ib++) {
1200      if (f_str->aa0[ib] >= 32) f_str->aa0[ib] -= 32;
1201    }
1202 
1203    kssort(f_str->vptr,f_str->nsave);
1204 
1205    for ( ; f_str->nsave > 0; f_str->nsave--)
1206      if (f_str->vptr[f_str->nsave-1]->score >0) break;
1207 
1208   a_res->nres = sconn_a (aa0t,n0, ppst->param_u.fa.cgap, f_str,a_res);
1209   free(aa0t);
1210 
1211   a_res->res = f_str->res;
1212   a_res->sw_score = a_res->rst.score[0];
1213   return a_res;
1214 }
1215 
1216 /* this version of sconn is modified to provide alignment information */
1217 
sconn_a(unsigned char * aa0,int n0,int cgap,struct f_struct * f_str,struct a_res_str * a_res)1218 int sconn_a (unsigned char *aa0, int n0, int cgap,
1219 	     struct f_struct *f_str,
1220 	     struct a_res_str *a_res)
1221 {
1222    int     i, si, cmpp (), n;
1223    unsigned char *aa0p;
1224    int sx, dx, doff;
1225 
1226    struct savestr **v;
1227    struct slink {
1228      int     score;
1229      struct savestr *vp;
1230      struct slink *snext;
1231      struct slink *aprev;
1232    } *start, *sl, *sj, *so, sarr[MAXSAV];
1233    int     lstop, plstart;
1234    int *res, nres, tres;
1235 
1236 /*	sort the score left to right in lib pos */
1237 
1238    v = f_str->vptr;
1239    n = f_str->nsave;
1240 
1241    krsort (v, n);	/* sort from left to right in library */
1242 
1243    start = NULL;
1244 
1245 /*	for each alignment, see if it fits */
1246 
1247    for (i = 0, si = 0; i < n; i++) {
1248 
1249 /*	if the score is less than the join threshold, skip it */
1250      if (v[i]->score < cgap) continue;
1251 
1252      lstop = v[i]->stop;		/* have right-most lstart */
1253 
1254 /*	put the alignment in the group */
1255 
1256      sarr[si].vp = v[i];
1257      sarr[si].score = v[i]->score;
1258      sarr[si].snext = NULL;
1259      sarr[si].aprev = NULL;
1260 
1261 /* 	if it fits, then increase the score */
1262 /* start points to a sorted (by total score) list of candidate
1263    overlaps */
1264 
1265      for (sl = start; sl != NULL; sl = sl->snext) {
1266        plstart = sl->vp->start;
1267        if (plstart > lstop ) {
1268 	 sarr[si].score = sl->score + v[i]->score;
1269 	 sarr[si].aprev = sl;
1270 	 break;		/* quit as soon as the alignment has been added */
1271        }
1272      }
1273 
1274 /* now recalculate the list of best scores */
1275      if (start == NULL)
1276        start = &sarr[si];	/* put the first one in the list */
1277      else
1278        for (sj = start, so = NULL; sj != NULL; sj = sj->snext) {
1279 	 if (sarr[si].score > sj->score) { /* new score better than old */
1280 	   sarr[si].snext = sj;		/* snext best after new score */
1281 	   if (so != NULL)
1282 	     so->snext = &sarr[si];	/* prev_best->snext points to best */
1283 	   else  start = &sarr[si];	/* start points to best */
1284 	   break;			/* stop looking */
1285 	 }
1286 	 so = sj;		/* previous candidate best */
1287        }
1288      si++;				/* increment to snext alignment */
1289    }
1290 
1291    /* we have the best set of alignments, write them to *res */
1292    if (start != NULL) {
1293      res = f_str->res;	/* set a destination for the alignment ops */
1294      tres = nres = 0;	/* alignment op length = 0 */
1295      aa0p = aa0;	/* point into query (needed for calcons later) */
1296      a_res->min1 = start->vp->start;	/* start in library */
1297      a_res->min0 = 0;			/* start in query */
1298      for (sj = start; sj != NULL; sj = sj->aprev ) {
1299        doff = (int)(aa0p-aa0) - (sj->vp->start-sj->vp->dp+f_str->noff);
1300        /*
1301        fprintf(stderr,"doff: %3d\n",doff);
1302        */
1303        for (dx=sj->vp->start,sx=sj->vp->start-sj->vp->dp+f_str->noff;
1304 	    dx <= sj->vp->stop; dx++) {
1305 	 *aa0p++ = f_str->aa0t[sx++];	/* copy residue into aa0 */
1306 	 tres++;			/* bump alignment counter */
1307 	 res[nres++] = 0;		/* put 0-op in res */
1308        }
1309        sj->vp->dp -= doff;
1310        if (sj->aprev != NULL) {
1311 	 if (sj->aprev->vp->start - sj->vp->stop - 1 > 0 )
1312 	 /* put an insert op into res to get to next aligned block */
1313 	   tres += res[nres++] = (sj->aprev->vp->start - sj->vp->stop - 1);
1314        }
1315        /*
1316        fprintf(stderr,"t0: %3d, tx: %3d, l0: %3d, lx: %3d, dp: %3d noff: %3d, score: %3d\n",
1317 	       sj->vp->start - sj->vp->dp + f_str->noff,
1318 	       sj->vp->stop - sj->vp->dp + f_str->noff,
1319 	       sj->vp->start,sj->vp->stop,sj->vp->dp,
1320 	       f_str->noff,sj->vp->score);
1321        fprintf(stderr,"%3d - %3d: %3d\n",
1322 	       sj->vp->start,sj->vp->stop,sj->vp->score);
1323        */
1324        a_res->max1 = sj->vp->stop;
1325        a_res->max0 = a_res->max1 - sj->vp->dp + f_str->noff;
1326      }
1327 
1328      /*
1329      fprintf(stderr,"(%3d - %3d):(%3d - %3d)\n",
1330      a_res->min0,a_res->max0,a_res->min1,a_res->max1);
1331      */
1332 
1333      /* now replace f_str->aa0t with aa0 */
1334      for (i=0; i<n0; i++) f_str->aa0t[i] = aa0[i];
1335 
1336      return tres;
1337    }
1338    else return (0);
1339 }
1340 
1341 /* calculate the 100% identical score */
1342 int
shscore(unsigned char * aa0,int n0,int ** pam2,int nsq)1343 shscore(unsigned char *aa0, int n0, int **pam2, int nsq)
1344 {
1345   int i, sum;
1346   for (i=0,sum=0; i<n0; i++)
1347     if (aa0[i]!=0 && aa0[i]<=nsq) sum += pam2[aa0[i]][aa0[i]];
1348   return sum;
1349 }
1350 
1351 void
pre_cons(const unsigned char * aa1,int n1,int frame,struct f_struct * f_str)1352 pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
1353 
1354 #ifdef TFAST
1355   f_str->n10=aatran(aa1,f_str->aa1x,n1,frame);
1356 #endif
1357 }
1358 
1359 /* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
1360 /* call from calcons, calc_id, calc_code */
1361 void
aln_func_vals(int frame,struct a_struct * aln)1362 aln_func_vals(int frame, struct a_struct *aln) {
1363 
1364 #ifdef TFAST
1365   aln->qlrev = 0;
1366   aln->qlfact = 1;
1367   aln->llfact = aln->llmult = 3;
1368   aln->frame = 0;
1369   if (frame > 3) aln->llrev = 1;
1370 #else	/* FASTF */
1371   aln->llfact = aln->qlfact = aln->llmult = 1;
1372   aln->llrev = aln->qlrev = 0;
1373   aln->frame = 0;
1374 #endif
1375 }
1376 
aa0shuffle(unsigned char * aa0,int n0,struct f_struct * f_str)1377 void aa0shuffle(unsigned char *aa0, int n0, struct f_struct *f_str) {
1378 
1379   int i, j, k;
1380   unsigned char tmp;
1381 
1382   for (i = f_str->nmoff-1 ; --i ; ) {
1383 
1384     /* j = nrand(i); if (i == j) continue;*/       /* shuffle columns */
1385     j = (f_str->nmoff - 2) - i; if (i <= j) break; /* reverse columns */
1386 
1387     /* swap all i'th column residues for all j'th column residues */
1388     for(k = 0 ; k < f_str->nm0 ; k++) {
1389       tmp = aa0[(k * (f_str->nmoff)) + i];
1390       aa0[(k * (f_str->nmoff)) + i] = aa0[(k * (f_str->nmoff)) + j];
1391       aa0[(k * (f_str->nmoff)) + j] = tmp;
1392     }
1393   }
1394 }
1395