1 /* $Id: dropff2.c 989 2012-07-24 19:37:38Z wrp $ */
2
3 /* copyright (c) 1998, 1999, 2014 by William R. Pearson and The Rector &
4 Vistors of the University of Virginia */
5
6 /* Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing,
13 software distributed under this License is distributed on an "AS
14 IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
15 express or implied. See the License for the specific language
16 governing permissions and limitations under the License.
17 */
18
19 /* this code implements the "fastf" algorithm, which is designed to
20 deconvolve mixtures of protein sequences derived from mixed-peptide
21 Edman sequencing. The expected input is:
22
23 >test | 40001 90043 | mgstm1
24 MGCEN,
25 MIDYP,
26 MLLAY,
27 MLLGY
28
29 Where the ','s indicate the length/end of the sequencing cycle
30 data. Thus, in this example, the sequence is from a mixture of 4
31 peptides, M was found in the first position, G,I, and L(2) at the second,
32 C,D, L(2) at the third, etc.
33
34 Because the sequences are derived from mixtures, there need not be
35 any partial sequence "MGCEN", the actual deconvolved sequence might be
36 "MLDGN".
37 */
38
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <math.h>
44 #include <ctype.h>
45
46 #include "defs.h"
47 #include "param.h"
48 #include "structs.h"
49 #include "tatstats.h"
50
51 #define EOSEQ 0
52 #define ESS 59
53 #define MAXHASH 32
54 #define NMAP MAXHASH+1
55 #define NMAP_X 23 /* re-code NMAP for 'X' */
56 #define NMAP_Z 24 /* re-code NMAP for '*' */
57
58 #ifndef MAXSAV
59 #define MAXSAV 10
60 #endif
61
62 #define DROP_INTERN
63 #include "drop_func.h"
64
65 static char *verstr="4.21 May 2006 (ajm/wrp)";
66
67 int shscore(unsigned char *aa0, const int n0, int **pam2, int nsq);
68
69 #ifdef TFAST
70 extern int aatran(const unsigned char *ntseq, unsigned char *aaseq,
71 const int maxs, const int frame);
72 #endif
73
74 struct hlstr { int next, pos;};
75
76 void savemax(struct dstruct *, struct f_struct *);
77
78 static int m0_spam(unsigned char *, const unsigned char *, int, struct savestr *,
79 int **, struct f_struct *);
80 static int m1_spam(unsigned char *, int,
81 const unsigned char *, int,
82 struct savestr *, int **, int, struct f_struct *);
83
84 int sconn(struct savestr **v, int nsave, int cgap,
85 struct f_struct *, struct rstruct *, const struct pstruct *,
86 const unsigned char *aa0, int n0,
87 const unsigned char *aa1, int n1,
88 int opt_prob);
89
90 void kpsort(struct savestr **, int);
91 void kssort(struct savestr **, int);
92 void kpsort(struct savestr **, int);
93
94 int
95 sconn_a(unsigned char *, int, int, struct f_struct *,
96 struct a_res_str *);
97
98 /* initialize for fasta */
99
100 void
init_work(unsigned char * aa0,int n0,struct pstruct * ppst,struct f_struct ** f_arg)101 init_work (unsigned char *aa0, int n0,
102 struct pstruct *ppst,
103 struct f_struct **f_arg)
104 {
105 int mhv, phv;
106 int hmax;
107 int i0, ii0, hv;
108 struct f_struct *f_str;
109
110 int maxn0;
111 int i, j, q;
112 struct savestr *vmptr;
113 int *res;
114 int nsq;
115
116 nsq = ppst->nsqx;
117
118 f_str = (struct f_struct *) calloc(1, sizeof(struct f_struct));
119 if(f_str == NULL) {
120 fprintf(stderr, "Couldn't calloc f_str\n");
121 exit(1);
122 }
123
124 ppst->sw_flag = 0;
125
126 /* fastf3 cannot work with lowercase symbols as low complexity;
127 thus, NMAP must be disabled; this depends on aascii['X'] */
128 if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}
129 if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}
130
131 /* this does not work for share ppst structs, as in threads */
132 /*else {fprintf(stderr," cannot find 'X'==NMAP\n");} */
133
134 for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
135 if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv) mhv = ppst->hsq[i0];
136
137 if (mhv <= 0) {
138 fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
139 exit (1);
140 }
141
142 for (f_str->kshft = 0; mhv > 0; mhv /= 2)
143 f_str->kshft++;
144
145 /* kshft = 2; */
146 hmax = hv = (1 << f_str->kshft);
147 f_str->hmask = (hmax >> f_str->kshft) - 1;
148
149 if ((f_str->aa0 = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
150 fprintf (stderr, " cannot allocate f_str->aa0 array; %d\n",n0+1);
151 exit (1);
152 }
153 for (i=0; i<n0; i++) f_str->aa0[i] = aa0[i];
154 aa0 = f_str->aa0;
155
156 if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
157 fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);
158 exit (1);
159 }
160 f_str->aa0ix = 0;
161
162 if ((f_str->harr = (struct hlstr *) calloc (hmax, sizeof (struct hlstr))) == NULL) {
163 fprintf (stderr, " cannot allocate hash array; hmax: %d hmask: %d\n",
164 hmax,f_str->hmask);
165 exit (1);
166 }
167 if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) {
168 fprintf (stderr, " cannot allocate pamh1 array\n");
169 exit (1);
170 }
171 if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
172 fprintf (stderr, " cannot allocate pamh2 array\n");
173 exit (1);
174 }
175 if ((f_str->link = (struct hlstr *) calloc (n0, sizeof (struct hlstr))) == NULL) {
176 fprintf (stderr, " cannot allocate hash link array");
177 exit (1);
178 }
179
180 for (i0 = 0; i0 < hmax; i0++) {
181 f_str->harr[i0].next = -1;
182 f_str->harr[i0].pos = -1;
183 }
184
185 for (i0 = 0; i0 < n0; i0++) {
186 f_str->link[i0].next = -1;
187 f_str->link[i0].pos = -1;
188 }
189
190 /* encode the aa0 array */
191 /*
192 this code has been modified to allow for mixed peptide sequences
193 aa0[] = 5 8 9 3 4 NULL 5 12 3 7 2 NULL
194 the 'NULL' character resets the hash position counter, to indicate that
195 any of several residues can be in the same position.
196 We also need to keep track of the number of times this has happened, so that
197 we can redivide the sequence later
198
199 i0 counts through the sequence
200 ii0 counts through the hashed sequence
201
202 */
203
204 f_str->nm0 = 1;
205 f_str->nmoff = -1;
206 phv = hv = 0;
207 for (i0= ii0 = 0; i0 < n0; i0++, ii0++) {
208 /* reset the counter and start hashing again */
209 if (aa0[i0] == ESS || aa0[i0] == 0) {
210 aa0[i0] = 0; /* set ESS to 0 */
211 /* fprintf(stderr," converted ',' to 0\n");*/
212 i0++; /* skip over the blank */
213 f_str->nm0++;
214 if (f_str->nmoff < 0) f_str->nmoff = i0;
215 phv = hv = 0;
216 ii0 = 0;
217 }
218 hv = ppst->hsq[aa0[i0]];
219 f_str->link[i0].next = f_str->harr[hv].next;
220 f_str->link[i0].pos = f_str->harr[hv].pos;
221 f_str->harr[hv].next = i0;
222 f_str->harr[hv].pos = ii0;
223 f_str->pamh2[hv] = ppst->pam2[0][aa0[i0]][aa0[i0]];
224 }
225 if (f_str-> nmoff < 0) f_str->nmoff = n0;
226
227
228 #ifdef DEBUG
229 /*
230 fprintf(stderr," nmoff: %d/%d nm0: %d\n", f_str->nmoff, n0,f_str->nm0);
231 */
232 #endif
233
234 /*
235 #ifdef DEBUG
236 fprintf(stderr," hmax: %d\n",hmax);
237 for ( hv=0; hv<hmax; hv++)
238 fprintf(stderr,"%2d %c %3d %3d\n",hv,
239 (hv > 0 && hv < ppst->nsq ) ? ppst->sq[ppst->hsq[hv]] : ' ',
240 f_str->harr[hv].pos,f_str->harr[hv].next);
241 fprintf(stderr,"----\n");
242 for ( hv=0; hv<n0; hv++)
243 fprintf(stderr,"%2d: %3d %3d\n",hv,
244 f_str->link[hv].pos,f_str->link[hv].next);
245 #endif
246 */
247
248 f_str->maxsav = MAXSAV;
249 if ((f_str->vmax = (struct savestr *)
250 calloc(MAXSAV,sizeof(struct savestr)))==NULL) {
251 fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav);
252 exit(1);
253 }
254
255 if ((f_str->vptr = (struct savestr **)
256 calloc(MAXSAV,sizeof(struct savestr *)))==NULL) {
257 fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav);
258 exit(1);
259 }
260
261 for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
262 vmptr->used = (int *) calloc(n0, sizeof(int));
263 if(vmptr->used == NULL) {
264 fprintf(stderr, "Couldn't alloc vmptr->used\n");
265 exit(1);
266 }
267 }
268
269 /* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
270 pam2[0][0] is now undefined for consistency with blast
271 */
272
273 for (i0 = 1; i0 <= ppst->nsq; i0++)
274 f_str->pamh1[i0] = ppst->pam2[0][i0][i0];
275
276 ppst->param_u.fa.cgap = shscore(aa0,f_str->nmoff-1,ppst->pam2[0],ppst->nsq)/3;
277 if (ppst->param_u.fa.cgap > ppst->param_u.fa.bestmax/4)
278 ppst->param_u.fa.cgap = ppst->param_u.fa.bestmax/4;
279
280 f_str->ndo = 0;
281 f_str->noff = n0-1;
282 if (f_str->diag==NULL)
283 f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
284 sizeof (struct dstruct));
285
286 if (f_str->diag == NULL)
287 {
288 fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",
289 (long) MAXDIAG * (long) (sizeof (struct dstruct)));
290 exit (1);
291 }
292
293 #ifdef TFAST
294 if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
295 sizeof(unsigned char)))
296 == NULL) {
297 fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
298 exit (1);
299 }
300 f_str->aa1x++;
301 #endif
302
303 /* allocate space for the scoring arrays */
304 maxn0 = n0 + 4;
305
306 maxn0 = max(3*n0/2,MIN_RES);
307 if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
308 fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
309 exit(1);
310 }
311 f_str->res = res;
312 f_str->max_res = maxn0;
313
314 /* Tatusov Statistics Setup */
315
316 /* initialize priors array. */
317 if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {
318 fprintf(stderr, "Couldn't allocate priors array.\n");
319 exit(1);
320 }
321 calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);
322
323 f_str->dotat = 0;
324 f_str->shuff_cnt = ppst->shuff_node;
325
326 /* End of Tatusov Statistics Setup */
327
328 *f_arg = f_str;
329 }
330
331
332 /* pstring1 is a message to the manager, currently 512 */
333 /* pstring2 is the same information, but in a markx==10 format */
334 void
get_param(const struct pstruct * ppstr,char ** pstring1,char * pstring2,struct score_count_s * s_info)335 get_param (const struct pstruct *ppstr,
336 char **pstring1, char *pstring2,
337 struct score_count_s *s_info)
338 {
339 #ifndef TFAST
340 char *pg_str="FASTF";
341 #else
342 char *pg_str="TFASTF";
343 #endif
344
345 sprintf (pstring1[0], "%s (%s)",pg_str,verstr);
346 sprintf (pstring1[1], "%s matrix (%d:%d), join: %d",
347 ppstr->pam_name, ppstr->pam_h,ppstr->pam_l,ppstr->param_u.fa.cgap);
348
349 if (ppstr->param_u.fa.iniflag) strcat(pstring1[0]," init1");
350
351 if (pstring2 != NULL) {
352 sprintf (pstring2, "; pg_name_alg: %s\n; pg_ver_rel: %s\n; pg_matrix: %s (%d:%d)\n\
353 ; pg_join: %d\n",
354 pg_str,verstr, ppstr->pam_name, ppstr->pam_h,ppstr->pam_l,
355 ppstr->param_u.fa.cgap);
356 }
357 }
358
359 void
close_work(const unsigned char * aa0,const int n0,struct pstruct * ppst,struct f_struct ** f_arg)360 close_work (const unsigned char *aa0, const int n0,
361 struct pstruct *ppst,
362 struct f_struct **f_arg)
363 {
364 struct f_struct *f_str;
365 struct savestr *vmptr;
366
367 f_str = *f_arg;
368
369 if (f_str != NULL) {
370
371 for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
372 free(vmptr->used);
373
374 free(f_str->res);
375 #ifdef TFAST
376 free(f_str->aa1x - 1); /* allocated, then aa1x++'ed */
377 #endif
378 free(f_str->diag);
379 free(f_str->link);
380 free(f_str->pamh2);
381 free(f_str->pamh1);
382 free(f_str->harr);
383 free(f_str->aa0t);
384 free(f_str->aa0);
385 free(f_str->priors);
386 free(f_str->vmax);
387 free(f_str->vptr);
388 free(f_str);
389 *f_arg = NULL;
390 }
391 }
392
do_fastf(unsigned char * aa0,int n0,const unsigned char * aa1,int n1,const struct pstruct * ppst,struct f_struct * f_str,struct rstruct * rst,int * hoff,int opt_prob)393 int do_fastf (unsigned char *aa0, int n0,
394 const unsigned char *aa1, int n1,
395 const struct pstruct *ppst, struct f_struct *f_str,
396 struct rstruct *rst, int *hoff, int opt_prob)
397 {
398 int nd; /* diagonal array size */
399 int lhval;
400 int kfact;
401 register struct dstruct *dptr;
402 register int tscor;
403 register struct dstruct *diagp;
404 struct dstruct *dpmax;
405 register int lpos;
406 int tpos, npos;
407 struct savestr *vmptr;
408 int scor, tmp;
409 int im, ib, nsave;
410 int cmps (); /* comparison routine for ksort */
411 const int *hsq;
412
413 hsq = ppst->hsq;
414
415 if (n1 < 1) {
416 rst->score[0] = rst->score[1] = rst->score[2] = 0;
417 rst->escore = 1.0;
418 rst->segnum = 0;
419 rst->seglen = 0;
420 return 1;
421 }
422
423 if (n0+n1+1 >= MAXDIAG) {
424 fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
425 rst->score[0] = rst->score[1] = rst->score[2] = -1;
426 rst->escore = 2.0;
427 rst->segnum = 0;
428 rst->seglen = 0;
429 return -1;
430 }
431
432 nd = n0 + n1;
433
434 dpmax = &f_str->diag[nd];
435 for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;) {
436 dptr->stop = -1;
437 dptr->dmax = NULL;
438 dptr++->score = 0;
439 }
440
441 /* initialize the saved segment structures */
442 for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
443 vmptr->score = 0;
444 memset(vmptr->used, 0, n0 * sizeof(int));
445 }
446
447 f_str->lowmax = f_str->vmax;
448 f_str->lowscor = 0;
449
450 /* start hashing */
451
452 diagp = &f_str->diag[f_str->noff];
453 for (lhval = lpos = 0; lpos < n1; lpos++, diagp++) {
454 if (hsq[aa1[lpos]]>=NMAP) {
455 lpos++ ; diagp++;
456 while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
457 if (lpos >= n1) break;
458 lhval = 0;
459 }
460 lhval = hsq[aa1[lpos]];
461 for (tpos = f_str->harr[lhval].pos, npos = f_str->harr[lhval].next;
462 tpos >= 0; tpos = f_str->link[npos].pos, npos = f_str->link[npos].next) {
463 /* tscor gets position of end of current lpos diag run */
464 if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
465 tscor++; /* move forward one */
466 if ((tscor -= lpos) <= 0) { /* check for size of gap to this hit - */
467 /* includes implicit -1 mismatch penalty */
468 scor = dptr->score; /* current score of this run */
469 if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 &&
470 f_str->lowscor < scor) /* if updating tscor makes run worse, */
471 savemax (dptr, f_str); /* save it */
472
473 if ((tscor += scor) >= kfact) { /* add to current run if continuing */
474 /* is better than restart (kfact) */
475 dptr->score = tscor;
476 dptr->stop = lpos;
477 }
478 else {
479 dptr->score = kfact; /* starting over is better */
480 dptr->start = (dptr->stop = lpos);
481 }
482 }
483 else { /* continue current run */
484 dptr->score += f_str->pamh1[aa0[tpos]];
485 dptr->stop = lpos;
486 }
487 }
488 else { /* no diagonal run yet */
489 dptr->score = f_str->pamh2[lhval];
490 dptr->start = (dptr->stop = lpos);
491 }
492 } /* end tpos */
493 } /* end lpos */
494
495 for (dptr = f_str->diag; dptr < dpmax;) {
496 if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
497 dptr->stop = -1;
498 dptr->dmax = NULL;
499 dptr++->score = 0;
500 }
501 f_str->ndo = nd;
502
503 /*
504 at this point all of the elements of aa1[lpos]
505 have been searched for elements of aa0[tpos]
506 with the results in diag[dpos]
507 */
508
509 /* set up pointers for sorting */
510
511 for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
512 if (vmptr->score > 0) {
513 vmptr->score = m0_spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str);
514 f_str->vptr[nsave++] = vmptr;
515 }
516 }
517
518 /* sort them */
519 kssort (f_str->vptr, nsave);
520
521
522 #ifdef DEBUG
523 /*
524 for (ib=0; ib<nsave; ib++) {
525 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
526 f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
527 f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
528 f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
529 f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
530 for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
531 fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
532 ppst->sq[aa1[im]]);
533 fputc('\n',stderr);
534 }
535 fprintf(stderr,"---\n");
536 */
537 /* now use m_spam to re-evaluate */
538 /*
539 for (tpos = 0; tpos < n0; tpos++) {
540 fprintf(stderr,"%c:%2d ",ppst->sq[aa0[tpos]],aa0[tpos]);
541 if (tpos %10 == 9) fputc('\n',stderr);
542 }
543 fputc('\n',stderr);
544 */
545 #endif
546
547 f_str->aa0ix = 0;
548 for (ib=0; ib < nsave; ib++) {
549 if ((vmptr=f_str->vptr[ib])->score > 0) {
550 vmptr->score = m1_spam (aa0, n0, aa1, n1, vmptr,
551 ppst->pam2[0], ppst->pam_l, f_str);
552 }
553 }
554 /* reset aa0 - modified by m1_spam */
555 for (tpos = 0; tpos < n0; tpos++) {
556 if (aa0[tpos] >= 32) aa0[tpos] -= 32;
557 }
558
559 kssort(f_str->vptr,nsave);
560
561 for ( ; nsave > 0; nsave--)
562 if (f_str->vptr[nsave-1]->score >0) break;
563
564 if (nsave <= 0) {
565 f_str->nsave = 0;
566 rst->score[0] = rst->score[1] = rst->score[2] = 0;
567 rst->escore = 1.0;
568
569 return 1;
570 }
571 else f_str->nsave = nsave;
572
573
574 #ifdef DEBUG
575 /*
576 fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff);
577 for (ib=0; ib<nsave; ib++) {
578 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
579 f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
580 f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
581 f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
582 f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
583 for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
584 fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
585 ppst->sq[aa1[im]]);
586 fputc('\n',stderr);
587 }
588
589 fprintf(stderr,"---\n");
590 */
591 #endif
592
593 scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap, f_str,
594 rst, ppst, aa0, n0, aa1, n1, opt_prob);
595
596 for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
597 if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
598
599 rst->score[1] = vmptr->score;
600 rst->score[0] = rst->score[2] = max (scor, vmptr->score);
601
602 return 1;
603 }
604
do_work(const unsigned char * aa0,int n0,const unsigned char * aa1,int n1,int frame,const struct pstruct * ppst,struct f_struct * f_str,int qr_flg,int shuff_flg,struct rstruct * rst,struct score_count_s * s_info)605 void do_work (const unsigned char *aa0, int n0,
606 const unsigned char *aa1, int n1,
607 int frame,
608 const struct pstruct *ppst, struct f_struct *f_str,
609 int qr_flg, int shuff_flg, struct rstruct *rst,
610 struct score_count_s *s_info)
611 {
612 int opt_prob;
613 int hoff, n10, i;
614
615 if (qr_flg==1 && f_str->shuff_cnt <= 0) {
616 rst->escore = 2.0;
617 rst->score[0]=rst->score[1]=rst->score[2]= -1;
618 rst->valid_stat = 0;
619 return;
620 }
621
622 s_info->s_cnt[ppst->score_ix]++;
623 s_info->tot_scores++;
624
625 rst->valid_stat = 1;
626 if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1;
627 else opt_prob = 0;
628 if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0;
629 if (qr_flg) {
630 opt_prob=1;
631 /* if (frame==1) */
632 f_str->shuff_cnt--;
633 }
634
635 if (n1 < 1) {
636 rst->score[0] = rst->score[1] = rst->score[2] = -1;
637 rst->escore = 2.0;
638 return;
639 }
640
641 #ifdef TFAST
642 n10=aatran(aa1,f_str->aa1x,n1,frame);
643 if (ppst->debug_lib)
644 for (i=0; i<n10; i++)
645 if (f_str->aa1x[i]>ppst->nsq) {
646 fprintf(stderr,
647 "residue[%d/%d] %d range (%d)\n",i,n1,
648 f_str->aa1x[i],ppst->nsq);
649 f_str->aa1x[i]=0;
650 n10=i-1;
651 }
652
653 do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob);
654 #else /* FASTF */
655 do_fastf (f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob);
656 #endif
657
658 rst->comp = rst->H = -1.0;
659
660 }
661
do_opt(const unsigned char * aa0,int n0,const unsigned char * aa1,int n1,int frame,struct pstruct * ppst,struct f_struct * f_str,struct rstruct * rst)662 void do_opt (const unsigned char *aa0, int n0,
663 const unsigned char *aa1, int n1,
664 int frame,
665 struct pstruct *ppst,
666 struct f_struct *f_str,
667 struct rstruct *rst)
668 {
669 int optflag, tscore, hoff, n10;
670
671 optflag = ppst->param_u.fa.optflag;
672 ppst->param_u.fa.optflag = 1;
673
674 #ifdef TFAST
675 n10=aatran(aa1,f_str->aa1x,n1,frame);
676 do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1);
677 #else /* FASTA */
678 do_fastf(f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, 1);
679 #endif
680 ppst->param_u.fa.optflag = optflag;
681 }
682
683 void
savemax(dptr,f_str)684 savemax (dptr, f_str)
685 register struct dstruct *dptr;
686 struct f_struct *f_str;
687 {
688 register int dpos;
689 register struct savestr *vmptr;
690 register int i;
691
692 dpos = (int) (dptr - f_str->diag);
693
694 /* check to see if this is the continuation of a run that is already saved */
695
696 if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
697 vmptr->start == dptr->start)
698 {
699 vmptr->stop = dptr->stop;
700 if ((i = dptr->score) <= vmptr->score)
701 return;
702 vmptr->score = i;
703 if (vmptr != f_str->lowmax)
704 return;
705 }
706 else
707 {
708 i = f_str->lowmax->score = dptr->score;
709 f_str->lowmax->dp = dpos;
710 f_str->lowmax->start = dptr->start;
711 f_str->lowmax->stop = dptr->stop;
712 dptr->dmax = f_str->lowmax;
713 }
714
715 for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
716 if (vmptr->score < i)
717 {
718 i = vmptr->score;
719 f_str->lowmax = vmptr;
720 }
721 f_str->lowscor = i;
722 }
723
724 /* this version of spam() is designed to work with a collection of
725 subfragments, selecting the best amino acid at each position so
726 that, from each subfragment, each position is only used once.
727
728 As a result, m_spam needs to know the number of fragments.
729
730 In addition, it now requires a global alignment to the fragment
731 and resets the start and stop positions
732
733 */
734
735 static int
m1_spam(unsigned char * aa0,int n0,const unsigned char * aa1,int n1,struct savestr * dmax,int ** pam2,int pam_l,struct f_struct * f_str)736 m1_spam (unsigned char *aa0, int n0,
737 const unsigned char *aa1, int n1,
738 struct savestr *dmax, int **pam2, int pam_l,
739 struct f_struct *f_str)
740 {
741 int tpos, lpos, im, ii, nm, ci;
742 int tot, ctot, pv;
743
744 struct {
745 int start, stop, score;
746 } curv, maxv;
747 unsigned char *aa0p;
748 const unsigned char *aa1p;
749
750 lpos = dmax->start; /* position in library sequence */
751 tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
752 /* force global alignment, reset start*/
753 if (tpos < lpos) {
754 lpos = dmax->start -= tpos;
755 tpos = 0;
756 }
757 else {
758 tpos -= lpos;
759 lpos = dmax->start = 0;
760 }
761
762 dmax->stop = dmax->start + (f_str->nmoff -2 - tpos);
763 if (dmax->stop > n1) dmax->stop = n1;
764
765 /*
766 if (dmax->start < 0) {
767 tpos = -dmax->start;
768 lpos = dmax->start=0;
769 }
770 else tpos = 0;
771 */
772
773 aa1p = &aa1[lpos];
774 aa0p = &aa0[tpos];
775
776 nm = f_str->nm0;
777
778 tot = curv.score = maxv.score = 0;
779 for (; lpos <= dmax->stop; lpos++,aa0p++,aa1p++) {
780 ctot = pam_l;
781 ci = -1;
782 for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
783 if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
784 ctot = pv;
785 ci = ii;
786 /* fprintf(stderr, "lpos: %d im: %d ii: %d ci: %d ctot: %d pi: %d pv: %d\n", lpos, im, ii, ci, ctot, aa0p[ii], pam2[aa0p[ii]][*aa1p]); */
787 }
788 }
789 tot += ctot;
790 if (ci >= 0 && aa0p[ci] < 32) {
791 #ifdef DEBUG
792 /* fprintf(stderr, "used: lpos: %d ci: %d : %c\n", lpos, ci, sq[aa0p[ci]]); */
793 #endif
794 aa0p[ci] += 32;
795 dmax->used[&aa0p[ci] - aa0] = 1;
796 }
797 }
798 return tot;
799 }
800
ma_spam(unsigned char * aa0,int n0,const unsigned char * aa1,struct savestr * dmax,struct pstruct * ppst,struct f_struct * f_str)801 int ma_spam (unsigned char *aa0, int n0, const unsigned char *aa1,
802 struct savestr *dmax, struct pstruct *ppst,
803 struct f_struct *f_str)
804 {
805 int **pam2;
806 int tpos, lpos, im, ii, nm, ci, lp0;
807 int tot, ctot, pv;
808 struct {
809 int start, stop, score;
810 } curv, maxv;
811 const unsigned char *aa1p;
812 unsigned char *aa0p, *aa0pt;
813 int aa0t_flg;
814
815 pam2 = ppst->pam2[0];
816 aa0t_flg = 0;
817
818 lpos = dmax->start; /* position in library sequence */
819 tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
820 lp0 = lpos = dmax->start;
821 aa1p = &aa1[lpos];
822 aa0p = &aa0[tpos]; /* real aa0 sequence */
823
824 /* the destination aa0 sequence (without nulls) */
825 aa0pt = &f_str->aa0t[f_str->aa0ix];
826
827 curv.start = lpos;
828 nm = f_str->nm0;
829
830 /* sometimes, tpos may be > 0, with lpos = 0 - fill with 'X' */
831 if (lpos == 0 && tpos > 0)
832 for (ii = 0; ii < tpos; ii++) *aa0pt++ = 31; /* filler character */
833
834 tot = curv.score = maxv.score = 0;
835 for (; lpos <= dmax->stop; lpos++) {
836 ctot = ppst->pam_l;
837 ci = -1;
838 for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
839 if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
840 ctot = pv;
841 ci = ii;
842 }
843 }
844 tot += ctot;
845 if (ci >= 0) {
846 if (ci >= n0) {fprintf(stderr," warning - ci off end %d/%d\n",ci,n0);}
847 else {
848 *aa0pt++ = aa0p[ci];
849 aa0p[ci] += 32;
850 aa0t_flg=1;
851 }
852 }
853 aa0p++; aa1p++;
854 }
855
856 if (aa0t_flg) {
857 dmax->dp -= f_str->aa0ix; /* shift ->dp for aa0t */
858 if ((ci=(int)(aa0pt-f_str->aa0t)) > n0) {
859 fprintf(stderr," warning - aapt off %d/%d end\n",ci,n0);
860 }
861 else
862 *aa0pt++ = 0; /* skip over NULL */
863
864 aa0pt = &f_str->aa0t[f_str->aa0ix];
865 aa1p = &aa1[lp0];
866
867 /*
868 for (im = 0; im < f_str->nmoff; im++)
869 fprintf(stderr,"%c:%c,",ppst->sq[aa0pt[im]],ppst->sq[aa1p[im]]);
870 fprintf(stderr,"- %3d (%3d:%3d)\n",dmax->score,f_str->aa0ix,lp0);
871 */
872
873 f_str->aa0ix += f_str->nmoff; /* update offset into aa0t */
874 }
875 /*
876 fprintf(stderr," ma_spam returning: %d\n",tot);
877 */
878 return tot;
879 }
880
881 static int
m0_spam(unsigned char * aa0,const unsigned char * aa1,int n1,struct savestr * dmax,int ** pam2,struct f_struct * f_str)882 m0_spam (unsigned char *aa0, const unsigned char *aa1, int n1,
883 struct savestr *dmax, int **pam2,
884 struct f_struct *f_str)
885 {
886 int tpos, lpos, lend, im, ii, nm;
887 int tot, ctot, pv;
888 struct {
889 int start, stop, score;
890 } curv, maxv;
891 const unsigned char *aa0p, *aa1p;
892
893 lpos = dmax->start; /* position in library sequence */
894 tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
895 if (tpos > 0) {
896 if (lpos-tpos >= 0) {
897 lpos = dmax->start -= tpos; /* force global alignment, reset start*/
898 tpos = 0;
899 }
900 else {
901 tpos -= lpos;
902 lpos = dmax->start = 0;
903 }
904 }
905
906 nm = f_str->nm0;
907 lend = dmax->stop;
908 if (n1 - (lpos + f_str->nmoff-2) < 0 ) {
909 lend = dmax->stop = (lpos - tpos) + f_str->nmoff-2;
910 if (lend >= n1) lend = n1-1;
911 }
912
913 aa1p = &aa1[lpos];
914 aa0p = &aa0[tpos];
915
916 curv.start = lpos;
917
918 tot = curv.score = maxv.score = 0;
919 for (; lpos <= lend; lpos++) {
920 ctot = -10000;
921 for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
922 if ((pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
923 ctot = pv;
924 }
925 }
926 tot += ctot;
927 aa0p++; aa1p++;
928 }
929
930 /* reset dmax if necessary */
931
932 return tot;
933 }
934
935 /* sconn links up non-overlapping alignments and calculates the score */
936
sconn(struct savestr ** v,int n,int cgap,struct f_struct * f_str,struct rstruct * rst,const struct pstruct * ppst,const unsigned char * aa0,int n0,const unsigned char * aa1,int n1,int opt_prob)937 int sconn (struct savestr **v, int n, int cgap, struct f_struct *f_str,
938 struct rstruct *rst, const struct pstruct *ppst,
939 const unsigned char *aa0, int n0,
940 const unsigned char *aa1, int n1,
941 int opt_prob)
942 {
943 int i, si, cmpp ();
944 struct slink *start, *sl, *sj, *so, sarr[MAXSAV];
945 int lstart, plstop;
946 double tatprob;
947
948 /* sarr[] saves each alignment score/position, and provides a link
949 back to the previous alignment that maximizes the score */
950
951 /* sort the score left to right in lib pos */
952 kpsort (v, n);
953
954 start = NULL;
955
956 /* for the remaining runs, see if they fit */
957 for (i = 0, si = 0; i < n; i++) {
958
959 /* if the score is less than the gap penalty, it never helps */
960 if (!opt_prob && (v[i]->score < cgap) ){ continue; }
961
962 lstart = v[i]->start;
963
964 /* put the run in the group */
965 sarr[si].vp = v[i];
966 sarr[si].score = v[i]->score;
967 sarr[si].next = NULL;
968 sarr[si].prev = NULL;
969 sarr[si].tat = NULL;
970
971 if(opt_prob) {
972 sarr[si].tatprob =
973 calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
974 ppst->pam2[0],ppst->nsq, f_str,
975 ppst->pseudocts, opt_prob,ppst->zsflag);
976 sarr[si].tat = sarr[si].newtat;
977 }
978
979 /* if it fits, then increase the score */
980 for (sl = start; sl != NULL; sl = sl->next) {
981 plstop = sl->vp->stop;
982 /* if end < start or start > end, add score */
983 if (plstop < lstart ) {
984 if(!opt_prob) {
985 sarr[si].score = sl->score + v[i]->score;
986 sarr[si].prev = sl;
987 /*
988 fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
989 i,v[i]->start, v[i]->score,sarr[si].score,si, 2.0);
990 */
991 break;
992 } else {
993 tatprob =
994 calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
995 ppst->pam2[0], ppst->nsq, f_str,
996 ppst->pseudocts, opt_prob, ppst->zsflag);
997 /* if our tatprob gets worse when we add this, forget it */
998 if(tatprob > sarr[si].tatprob) {
999 free(sarr[si].newtat->probs); /* get rid of new tat struct */
1000 free(sarr[si].newtat);
1001 continue;
1002 } else {
1003 sarr[si].tatprob = tatprob;
1004 free(sarr[si].tat->probs); /* get rid of old tat struct */
1005 free(sarr[si].tat);
1006 sarr[si].tat = sarr[si].newtat;
1007 sarr[si].prev = sl;
1008 sarr[si].score = sl->score + v[i]->score;
1009 /*
1010 fprintf(stderr,"sconn TAT %d added %d/%d getting %d; si: %d, tat: %g\n",
1011 i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
1012 */
1013 break;
1014 }
1015 }
1016 }
1017 }
1018
1019 /* now recalculate where the score fits - resort the scores */
1020 if (start == NULL) {
1021 start = &sarr[si];
1022 } else {
1023 if(!opt_prob) { /* sort by scores */
1024 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
1025 if (sarr[si].score > sj->score) { /* if new score > best score */
1026 sarr[si].next = sj; /* previous best linked to best */
1027 if (so != NULL)
1028 so->next = &sarr[si]; /* old best points to new best */
1029 else
1030 start = &sarr[si];
1031 break;
1032 }
1033 so = sj; /* old-best saved in so */
1034 }
1035 } else { /* sort by tatprobs */
1036 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
1037 if ( sarr[si].tatprob < sj->tatprob ||
1038 ((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
1039 sarr[si].next = sj;
1040 if (so != NULL)
1041 so->next = &sarr[si];
1042 else
1043 start = &sarr[si];
1044 break;
1045 }
1046 so = sj;
1047 }
1048 }
1049 }
1050 si++;
1051 }
1052
1053 if(opt_prob) {
1054 for (i = 0 ; i < si ; i++) {
1055 free(sarr[i].tat->probs);
1056 free(sarr[i].tat);
1057 }
1058 }
1059
1060 if (start != NULL) {
1061
1062 if(opt_prob)
1063 rst->escore = start->tatprob;
1064 else
1065 rst->escore = 2.0;
1066
1067 rst->segnum = rst->seglen = 0;
1068 for(sj = start ; sj != NULL; sj = sj->prev) {
1069 rst->segnum++;
1070 rst->seglen += sj->vp->stop - sj->vp->start + 1;
1071 }
1072 return (start->score);
1073 } else {
1074
1075 if(opt_prob)
1076 rst->escore = 1.0;
1077 else
1078 rst->escore = 2.0;
1079
1080 rst->segnum = rst->seglen = 0;
1081 return (0);
1082 }
1083 }
1084
1085 void
kssort(struct savestr ** v,int n)1086 kssort (struct savestr **v, int n)
1087 {
1088 int gap, i, j;
1089 struct savestr *tmp;
1090
1091 for (gap = n / 2; gap > 0; gap /= 2)
1092 for (i = gap; i < n; i++)
1093 for (j = i - gap; j >= 0; j -= gap)
1094 {
1095 if (v[j]->score >= v[j + gap]->score)
1096 break;
1097 tmp = v[j];
1098 v[j] = v[j + gap];
1099 v[j + gap] = tmp;
1100 }
1101 }
1102 void
kpsort(v,n)1103 kpsort (v, n)
1104 struct savestr *v[];
1105 int n;
1106 {
1107 int gap, i, j;
1108 struct savestr *tmp;
1109
1110 for (gap = n / 2; gap > 0; gap /= 2)
1111 for (i = gap; i < n; i++)
1112 for (j = i - gap; j >= 0; j -= gap)
1113 {
1114 if (v[j]->start <= v[j + gap]->start)
1115 break;
1116 tmp = v[j];
1117 v[j] = v[j + gap];
1118 v[j + gap] = tmp;
1119 }
1120 }
1121
1122 /* sorts alignments from right to left (back to front) based on stop */
1123
1124 void
krsort(v,n)1125 krsort (v, n)
1126 struct savestr *v[];
1127 int n;
1128 {
1129 int gap, i, j;
1130 struct savestr *tmp;
1131
1132 for (gap = n / 2; gap > 0; gap /= 2)
1133 for (i = gap; i < n; i++)
1134 for (j = i - gap; j >= 0; j -= gap)
1135 {
1136 if (v[j]->stop > v[j + gap]->stop)
1137 break;
1138 tmp = v[j];
1139 v[j] = v[j + gap];
1140 v[j + gap] = tmp;
1141 }
1142 }
1143
1144 struct a_res_str *
do_walign(const unsigned char * aa0,int n0,const unsigned char * aa1,int n1,int frame,int repeat_thresh,struct pstruct * ppst,struct f_struct * f_str,int * have_ares)1145 do_walign (const unsigned char *aa0, int n0,
1146 const unsigned char *aa1, int n1,
1147 int frame, int repeat_thresh,
1148 struct pstruct *ppst,
1149 struct f_struct *f_str,
1150 int *have_ares)
1151 {
1152 struct a_res_str *a_res;
1153 int hoff, n10;
1154 int ib;
1155 unsigned char *aa0t;
1156 const unsigned char *aa1p;
1157
1158 *have_ares = 0x2; /* set 0x2 bit to indicate local copy */
1159
1160 if ((a_res = (struct a_res_str *)calloc(1, sizeof(struct a_res_str)))==NULL) {
1161 fprintf(stderr," [do_walign] Cannot allocate a_res");
1162 return NULL;
1163 }
1164
1165 #ifdef TFAST
1166 f_str->n10 = n10 = aatran(aa1,f_str->aa1x,n1,frame);
1167 aa1p = f_str->aa1x;
1168 #else
1169 n10 = n1;
1170 aa1p = aa1;
1171 #endif
1172
1173 do_fastf(f_str->aa0, n0, aa1p, n10, ppst, f_str, &a_res->rst, &hoff, 1);
1174
1175 /* the alignment portion takes advantage of the information left
1176 over in f_str after do_fastf is done. in particular, it is
1177 easy to run a modified sconn() to produce the alignments.
1178
1179 unfortunately, the alignment display routine wants to have
1180 things encoded as with bd_align and sw_align, so we need to do that.
1181 */
1182
1183 if ((aa0t = (unsigned char *)calloc(n0+1,sizeof(unsigned char)))==NULL) {
1184 fprintf(stderr," cannot allocate aa0t %d\n",n0+1);
1185 exit(1);
1186 }
1187
1188 kssort (f_str->vptr, f_str->nsave);
1189 f_str->aa0ix = 0;
1190 if (f_str->nsave > f_str->nm0) f_str->nsave = f_str->nm0;
1191 for (ib=0; ib < f_str->nm0; ib++) {
1192 if (f_str->vptr[ib]->score > 0) {
1193 f_str->vptr[ib]->score =
1194 ma_spam (f_str->aa0, n0, aa1p, f_str->vptr[ib], ppst, f_str);
1195 }
1196 }
1197
1198 /* after ma_spam is over, we need to reset aa0 */
1199 for (ib = 0; ib < n0; ib++) {
1200 if (f_str->aa0[ib] >= 32) f_str->aa0[ib] -= 32;
1201 }
1202
1203 kssort(f_str->vptr,f_str->nsave);
1204
1205 for ( ; f_str->nsave > 0; f_str->nsave--)
1206 if (f_str->vptr[f_str->nsave-1]->score >0) break;
1207
1208 a_res->nres = sconn_a (aa0t,n0, ppst->param_u.fa.cgap, f_str,a_res);
1209 free(aa0t);
1210
1211 a_res->res = f_str->res;
1212 a_res->sw_score = a_res->rst.score[0];
1213 return a_res;
1214 }
1215
1216 /* this version of sconn is modified to provide alignment information */
1217
sconn_a(unsigned char * aa0,int n0,int cgap,struct f_struct * f_str,struct a_res_str * a_res)1218 int sconn_a (unsigned char *aa0, int n0, int cgap,
1219 struct f_struct *f_str,
1220 struct a_res_str *a_res)
1221 {
1222 int i, si, cmpp (), n;
1223 unsigned char *aa0p;
1224 int sx, dx, doff;
1225
1226 struct savestr **v;
1227 struct slink {
1228 int score;
1229 struct savestr *vp;
1230 struct slink *snext;
1231 struct slink *aprev;
1232 } *start, *sl, *sj, *so, sarr[MAXSAV];
1233 int lstop, plstart;
1234 int *res, nres, tres;
1235
1236 /* sort the score left to right in lib pos */
1237
1238 v = f_str->vptr;
1239 n = f_str->nsave;
1240
1241 krsort (v, n); /* sort from left to right in library */
1242
1243 start = NULL;
1244
1245 /* for each alignment, see if it fits */
1246
1247 for (i = 0, si = 0; i < n; i++) {
1248
1249 /* if the score is less than the join threshold, skip it */
1250 if (v[i]->score < cgap) continue;
1251
1252 lstop = v[i]->stop; /* have right-most lstart */
1253
1254 /* put the alignment in the group */
1255
1256 sarr[si].vp = v[i];
1257 sarr[si].score = v[i]->score;
1258 sarr[si].snext = NULL;
1259 sarr[si].aprev = NULL;
1260
1261 /* if it fits, then increase the score */
1262 /* start points to a sorted (by total score) list of candidate
1263 overlaps */
1264
1265 for (sl = start; sl != NULL; sl = sl->snext) {
1266 plstart = sl->vp->start;
1267 if (plstart > lstop ) {
1268 sarr[si].score = sl->score + v[i]->score;
1269 sarr[si].aprev = sl;
1270 break; /* quit as soon as the alignment has been added */
1271 }
1272 }
1273
1274 /* now recalculate the list of best scores */
1275 if (start == NULL)
1276 start = &sarr[si]; /* put the first one in the list */
1277 else
1278 for (sj = start, so = NULL; sj != NULL; sj = sj->snext) {
1279 if (sarr[si].score > sj->score) { /* new score better than old */
1280 sarr[si].snext = sj; /* snext best after new score */
1281 if (so != NULL)
1282 so->snext = &sarr[si]; /* prev_best->snext points to best */
1283 else start = &sarr[si]; /* start points to best */
1284 break; /* stop looking */
1285 }
1286 so = sj; /* previous candidate best */
1287 }
1288 si++; /* increment to snext alignment */
1289 }
1290
1291 /* we have the best set of alignments, write them to *res */
1292 if (start != NULL) {
1293 res = f_str->res; /* set a destination for the alignment ops */
1294 tres = nres = 0; /* alignment op length = 0 */
1295 aa0p = aa0; /* point into query (needed for calcons later) */
1296 a_res->min1 = start->vp->start; /* start in library */
1297 a_res->min0 = 0; /* start in query */
1298 for (sj = start; sj != NULL; sj = sj->aprev ) {
1299 doff = (int)(aa0p-aa0) - (sj->vp->start-sj->vp->dp+f_str->noff);
1300 /*
1301 fprintf(stderr,"doff: %3d\n",doff);
1302 */
1303 for (dx=sj->vp->start,sx=sj->vp->start-sj->vp->dp+f_str->noff;
1304 dx <= sj->vp->stop; dx++) {
1305 *aa0p++ = f_str->aa0t[sx++]; /* copy residue into aa0 */
1306 tres++; /* bump alignment counter */
1307 res[nres++] = 0; /* put 0-op in res */
1308 }
1309 sj->vp->dp -= doff;
1310 if (sj->aprev != NULL) {
1311 if (sj->aprev->vp->start - sj->vp->stop - 1 > 0 )
1312 /* put an insert op into res to get to next aligned block */
1313 tres += res[nres++] = (sj->aprev->vp->start - sj->vp->stop - 1);
1314 }
1315 /*
1316 fprintf(stderr,"t0: %3d, tx: %3d, l0: %3d, lx: %3d, dp: %3d noff: %3d, score: %3d\n",
1317 sj->vp->start - sj->vp->dp + f_str->noff,
1318 sj->vp->stop - sj->vp->dp + f_str->noff,
1319 sj->vp->start,sj->vp->stop,sj->vp->dp,
1320 f_str->noff,sj->vp->score);
1321 fprintf(stderr,"%3d - %3d: %3d\n",
1322 sj->vp->start,sj->vp->stop,sj->vp->score);
1323 */
1324 a_res->max1 = sj->vp->stop;
1325 a_res->max0 = a_res->max1 - sj->vp->dp + f_str->noff;
1326 }
1327
1328 /*
1329 fprintf(stderr,"(%3d - %3d):(%3d - %3d)\n",
1330 a_res->min0,a_res->max0,a_res->min1,a_res->max1);
1331 */
1332
1333 /* now replace f_str->aa0t with aa0 */
1334 for (i=0; i<n0; i++) f_str->aa0t[i] = aa0[i];
1335
1336 return tres;
1337 }
1338 else return (0);
1339 }
1340
1341 /* calculate the 100% identical score */
1342 int
shscore(unsigned char * aa0,int n0,int ** pam2,int nsq)1343 shscore(unsigned char *aa0, int n0, int **pam2, int nsq)
1344 {
1345 int i, sum;
1346 for (i=0,sum=0; i<n0; i++)
1347 if (aa0[i]!=0 && aa0[i]<=nsq) sum += pam2[aa0[i]][aa0[i]];
1348 return sum;
1349 }
1350
1351 void
pre_cons(const unsigned char * aa1,int n1,int frame,struct f_struct * f_str)1352 pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
1353
1354 #ifdef TFAST
1355 f_str->n10=aatran(aa1,f_str->aa1x,n1,frame);
1356 #endif
1357 }
1358
1359 /* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
1360 /* call from calcons, calc_id, calc_code */
1361 void
aln_func_vals(int frame,struct a_struct * aln)1362 aln_func_vals(int frame, struct a_struct *aln) {
1363
1364 #ifdef TFAST
1365 aln->qlrev = 0;
1366 aln->qlfact = 1;
1367 aln->llfact = aln->llmult = 3;
1368 aln->frame = 0;
1369 if (frame > 3) aln->llrev = 1;
1370 #else /* FASTF */
1371 aln->llfact = aln->qlfact = aln->llmult = 1;
1372 aln->llrev = aln->qlrev = 0;
1373 aln->frame = 0;
1374 #endif
1375 }
1376
aa0shuffle(unsigned char * aa0,int n0,struct f_struct * f_str)1377 void aa0shuffle(unsigned char *aa0, int n0, struct f_struct *f_str) {
1378
1379 int i, j, k;
1380 unsigned char tmp;
1381
1382 for (i = f_str->nmoff-1 ; --i ; ) {
1383
1384 /* j = nrand(i); if (i == j) continue;*/ /* shuffle columns */
1385 j = (f_str->nmoff - 2) - i; if (i <= j) break; /* reverse columns */
1386
1387 /* swap all i'th column residues for all j'th column residues */
1388 for(k = 0 ; k < f_str->nm0 ; k++) {
1389 tmp = aa0[(k * (f_str->nmoff)) + i];
1390 aa0[(k * (f_str->nmoff)) + i] = aa0[(k * (f_str->nmoff)) + j];
1391 aa0[(k * (f_str->nmoff)) + j] = tmp;
1392 }
1393 }
1394 }
1395