1 /* fffasta.c 12-Feb-1984, 11-Mar-1985, 14-Oct-1985
2 copyright (c) 1985,1986,1987,1988,1991,1992 William R. Pearson
3 */
4 /*
5 Sept-1991 release 1.6 uses W. Miller l_band, g_band for optimization
6 25-Sept-1991 fixed RLOCAL_ALIGN() call in zzlgmata.c
7
8 14-Mar-87 added BIGMEM for large memory machines
9 Mar-87 added TFASTA for translation of DNA
10 1-Jun-87 added LFASTA for lfasta generation from same file
11 3-Jun-87 added SFASTA for sfasta generation from same file
12 28-Jun-87 made modifications for speed a la Warren Gish
13 changed name to flfasta.c
14 24-Aug-87 no more modulus counting in dhash(), 2X as
15 fast on the SUN, now MAXDIAG = MAXTST + MAXLIB;
16 (ALLOCN0 for modulus counting in LFASTA)
17 19-Oct-87 modified behavior for nbest > MAXBEST, now
18 resorts nbest and throws out bottom 25%
19 also modifying bestcut in the process
20 12-Nov-87 Added automatic detection of protein/DNA sequences
21 28-Feb-88 added getopt for options
22 21-Mar-88 added PAM scores for kfact
23 25-Mar-88 flag -k for using PAM scores for kfact
24 30-Mar-88 added menu for libraries
25 30-Mar-88 combine fasta/fastgb, tfasta/tfastgb
26 19-May-88 added options for fasta mail processing
27 -Q - quiet, does not ask for any input
28 with -Q, has heuristic for number of scores
29 to display, that number will be less than mshow,
30 which is set with the -o option.
31
32 4-Feb-89 modified libchoice for a variety of different
33 library formats - no more automatic library setting
34
35 20-Nov-89 1.3a fixed bug in pamfact that prevented -k option
36 from working by default with DNA
37
38 5-Jun-90 added optall option
39 3-Sept-90 use select() instead of sortbest()
40 allow individual library letters to be concatenated
41 13-Dec-90 fixed bug in select() due to lack of sentinel.
42 23-Jan-91 fixed bug in aatran() for short sequences
43 27-Jan-91 made certain than showbest() is called with nbest>0.
44 20-May-91 fixed ashow to reflect nshow
45 7-Dec-92 added -h option to suppress histogram
46 13-Dec-92 added -e option to scale scores by length
47 26-Aug-95 added -O option to provide filename for output
48
49 */
50 /* fastn is a derivative of dfastp for DNA sequences
51
52 fastn is designed to search a DNA sequence database
53 very rapidly. It first looks for diagonals with homology
54 using a hashing algorithm.
55
56 General structure:
57
58 Read in the test DNA sequence, build a hash table using
59 a hash length of ktup. DNA bases will be given values
60 between 0 and 3 (2 bits) and ktup of 1 to 6 will be allowed
61 (hash table of 4 to 1024 entries, on larger machines, 4096
62 (ktup=6) would be acceptable).
63
64 Start reading the library. Reading the library, looking up
65 the hash table and accumulating diagonals will all be done
66 in one loop. The max n diagonals will be identified and
67 scanned with the PAM matrix for each library sequence.
68
69 The max PAM score and diagonal positions are then saved
70 for comparison against the whole library. After all sequences
71 have been examined, a mean and s.d. are calculated and the
72 max matches displayed.
73 */
74
75 #include <stdio.h>
76 #include <stdlib.h>
77 #include <string.h>
78 #include <ctype.h>
79 #include <math.h>
80
81 char *refstr="\nPlease cite:\n W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448\n";
82 char *verstr="v2.1u00 Mar, 2001";
83 #ifdef TFASTX
84 char *progstr = "TFASTX";
85 #else
86 #ifdef TFASTA
87 char *progstr = "TFASTA";
88 #else
89 #ifdef FASTX
90 char *progstr = "FASTX";
91 #else
92 char *progstr = "FASTA";
93 #endif
94 #endif
95 #endif
96
97 #ifdef __MWERKS__
98 #include <Types.h>
99 #include <StandardFile.h>
100 StandardFileReply freply;
101 Point wpos;
102 int tval;
103 char prompt[256];
104 #define getenv mgetenv
105 #include <sioux.h>
106 #endif
107
108 #define YES 1
109 #define NO 0
110
111 #define max(a,b) (((a)>(b))?(a):(b))
112 #define min(a,b) (((a)<(b))?(a):(b))
113
114 #ifndef BIGMEM
115 #define BIGNUM 32000
116 #define QFILE_SIZE 40
117 #define LFILE_SIZE 80
118 #if defined(TFASTA) || defined(FASTX)
119 #define MAXTRN 5000 /* MAXTRN must be (MAXTST*3+MAXLIB)/3 */
120 #endif
121 #ifdef TFASTX
122 #define MAXTRN 15000
123 #endif
124 #if defined(TFASTA) || defined(TFASTX)
125 #define MAXTST 1000 /* longest test sequence */
126 #define MAXLIB 8000
127 #define MAXDIAG (MAXTST+MAXTRN)
128 #else
129 #define MAXTST 2000 /* longest test sequence */
130 #define MAXLIB 8000
131 #define MAXDIAG (MAXTST+MAXLIB)
132 #endif
133 #else
134 #define BIGNUM 1000000000
135 #define QFILE_SIZE 256
136 #define LFILE_SIZE 256
137 #define MAXTST 10000
138 #define MAXLIB 50000
139 #define MAXDIAG (MAXTST+MAXLIB)
140 #if defined(TFASTA) || defined(FASTX)
141 #define MAXTRN 30000
142 #endif
143 #ifdef TFASTX
144 #define MAXTRN 90000
145 #endif
146 #endif
147
148 #ifdef LFASTA
149 #define MAXSAV 250
150 #endif
151 #ifndef MAXSAV
152 #define MAXSAV 10 /* number of best diagonals saved */
153 #endif
154
155 #define MAXHIST 51 /* number of histogram divisions */
156
157 #ifndef BIGMEM
158 #define MAXBEST 5000 /* number of good matches remembered */
159 #else
160 #define MAXBEST 20000
161 #endif
162
163 FILE *outfd; /* fd for output file */
164 int smark[4];
165
166 FILE *tmpfd;
167 char tmpfname[LFILE_SIZE];
168 int dataflg=0;
169 int revflg = 0;
170 char *compstr="\0";
171 int lnsflg=0;
172 int optwid=32, optwidf=0; /* width for band optimization */
173 /* changed in initparm() */
174 /* globals for matching */
175
176 long lmark; /* position in library file from ftell() */
177 long nlib, onlib;
178 long ntt, ontt; /* number of library sequences, number of
179 residues scanned*/
180 #ifdef LFASTA
181 int have_stats = 0;
182 int oneseq;
183 #endif
184
185 #ifdef NCBIBL13
186 #define LASTLIB NCBIBL13+1 /* this must agree with altlib.h */
187 #else
188 #define LASTLIB 10
189 #endif
190
191 #ifndef LFASTA
192 extern int (*getlib)(), (*ranlib)();
193 extern int sfnum;
194 #define GETLIB (*getlib)
195 #define RANLIB (*ranlib)
196 #else
197 #define GETLIB getlib
198 #define RANLIB ranlib
199 #endif
200
201 char libstr[21]; /* partial title from library sequence */
202 char name0[20], name1[20]; /* for labeling output */
203 int ixstat; /* >0 if annotations displayed */
204
205 #define MAXLF 200 /* number of library names */
206 #ifdef BIGMEM
207 #define MAXLN QFILE_SIZE /* size of a library name */
208 #else
209 #define MAXLN QFILE_SIZE
210 #endif
211
212 char *lbnarr; /* name array of libraries to be opened in list */
213 char *lbnames[MAXLF]; /* names of libraries to be opened */
214 int nln; /* number of library files */
215
216 #ifndef LFASTA
217 int deftype=0; /* default library type */
218 #endif
219
220 int libfn; /* current library file being searched */
221 char ldname[LFILE_SIZE];
222
223 unsigned char *aa0, *aa1; /* amino acid sequence data */
224 #if defined(TFASTA)
225 unsigned char *aa10;
226 int nframe=6;
227 #endif
228 #ifdef FASTX
229 #define XFACT 10
230 unsigned char *aa0x, *aa0y;
231 int n0x, n0x31, n0x32;
232 #else
233 #ifdef TFASTX
234 #define XFACT 10
235 unsigned char *aa10, *aa1y;
236 int nframe=2;
237 int n1x31, n1x32;
238 #else
239 #define XFACT 0
240 #endif
241 #endif
242
243 int maxn, maxt; /* max space for lib sequence */
244 int n0, n1, nd, noff; /* length of aa0, length of aa1, n0+n1,
245 diagonal offset */
246 long sq0off=1, sq1off=1;
247 long loffset = 0l; /* offset into sequence */
248
249 struct dstruct { /* diagonal structure for saving current run */
250 int score; /* hash score of current match */
251 int start; /* start of current match */
252 int stop; /* end of current match */
253 struct beststr *dmax; /* location in vmax[] where best score data saved */
254 } *diag;
255
256 #ifdef I86BUG
257 #define L2DSTR 3 /* log(2) of sizeof dstruct for I86 bug in many 'C'
258 compilers */
259 #endif
260
261 struct beststr {
262 int score; /* pam score with segment optimization*/
263 int score0; /* pam score of best single segment */
264 int gscore; /* opt score */
265 int sscore; /* score used for sort */
266 float zscore;
267 float escore;
268 int n1; /* length of library sequence */
269 long lseek; /* position in library file */
270 int dp; /* diagonal of match */
271 int start; /* start of match in lib seq */
272 int stop; /* end of match in lib seq */
273 int cont; /* offset into sequence */
274 int frame;
275 int lib; /* library for current sequence */
276 }
277 #ifndef FAR_PTR
278 *bbp, /* pointer for fbest */
279 *bestptr, /* temp pointer */
280 **bptr, /* array of pointers for sorting */
281 *best, /* array of best score data */
282 #else
283 huge * bbp,
284 huge * best,
285 huge * bestptr, /* temp pointer */
286 huge * huge * bptr,
287 #endif
288 vmax[MAXSAV], /* best matches saved for one sequence */
289 *vptr[MAXSAV];
290
291 /* delete this: float *Evalue; */
292
293 #ifdef LFASTA
294 int lcrc0[2*MAXSAV];
295 int lcrc1[2*MAXSAV];
296 int maxcrc=2*MAXSAV;
297 int ncrc;
298 #endif
299
300 int iscore, gscore; /* for displaying scores without showbest */
301
302 int cgap; /* gap threshold */
303 int pgap; /* gap penalty for optimized alignment of diagonals */
304
305 int nbest; /* number of sequences better than bestcut in best */
306 int bestcut=1; /* cut off for getting into MAXBEST */
307 int optcut=0; /* cut off for optimization */
308
309 /* the following are defaults for values that are read by
310 pam.c from *.mat if SMATRIX is defined */
311
312 int histint;
313 int bestscale=300; /* values for calculating bestcut */
314 int bestoff=36; /* these values are for BLOSUM50 */
315 int bkfact=6;
316 int scfact=3;
317 int bktup=2;
318 int ktmax=2;
319 int bestmax=50;
320 int pamfact= 1; /* flag for using pam values for kfact */
321 int dnaseq = 0; /* true if DNA query sequence */
322 int dnasw_flg = 0;
323 int ldnaseq = 0;
324
325 int bestfull=0;
326 int igncnt=0;
327 int optall=1;
328 long optcount=0l;
329 int init1flg=0;
330
331 int nsav, lowscor; /* number of saved runs, worst saved
332 run, score of worst saved run */
333
334 long *hist; /* histogram of all score */
335 int min_hist, max_hist, maxh;
336 int histflg=1;
337 int long_info=0; /* long display of library sequence definition */
338 int dohist=0;
339 #ifndef LFASTA
340 int zsflag=1;
341 extern long num_db_entries, z_calls;
342 float zs_to_E(float, int);
343 float zs_to_Ec(float);
344 float find_zm(int, int), find_z(int, int);
345 extern float ks_dev;
346 extern int ks_df;
347 #else
348 int zsflag=0;
349 long num_db_entries;
350 #endif
351
352 static int fact, gm; /* scoring factors */
353 static int hmask, hmax; /* hash constants */
354 static int *pamh2; /* pam based kfact array */
355 static int *link, *harr; /* hash arrays */
356 static int ktup, kshft, kt1; /* ktuple constants */
357
358 int nshow=20, mshow=50, ashow= -1;
359 int mshow_flg=0;
360 #ifndef FASTX
361 float e_cut=10.0; /* threshold for E value display */
362 #else
363 float e_cut=5.0;
364 #endif
365 int e_cut_set=0; /* flag for set */
366 char rline[20],sline[20];
367 char resfile[QFILE_SIZE];
368
369 /* output options */
370 int showall, llen, markx; /* show all of both sequences */
371
372 char ttitle[60];
373 char ltitle[60];
374
375 long tstart, tscan, tdone, sstime();
376
377 extern int optind;
378
379 #ifndef LFASTA
380 int outtty;
381 #else
382 extern int outtty;
zs_to_E(float zs,int n)383 float zs_to_E(float zs, int n) {};
zs_to_Ec(float zs)384 float zs_to_Ec(float zs) {};
385 #endif
386
387 char *libenv, *aaenv, *smptr;
388 char smstr[QFILE_SIZE], sdstr[QFILE_SIZE];
389 char flstr[QFILE_SIZE];
390 #ifdef TPLOT
391 char lvstr[QFILE_SIZE];
392 #endif
393
394 #ifdef __MWERKS__
395 /* short ouvRef, q0vRef, q1vRef; */
396 FSSpec ouSpec, q0Spec, q1Spec;
397 OSErr error;
398 #define IntroDID 400 /* LFASTA */
399 #endif
400
401 #ifdef LFASTA
402 #define PgmDID 403
403 char *iprompt0=" LFASTA compares two sequences\n";
404 char *iprompt1=" first sequence file name: ";
405 char *iprompt2=" second sequence file name: ";
406 #else
407 char *iprompt1=" query sequence file name: ";
408 #ifdef TFASTA
409 #define PgmDID 402
410 char *iprompt0=" TFASTA translates and searches a DNA sequence data bank\n";
411 #else
412 #ifdef FASTX
413 #define PgmDID 405
414 char *iprompt0=" FASTX compares a DNA sequence to a protein data bank\n";
415 #else
416 #ifdef TFASTX
417 #define PgmDID 406
418 char *iprompt0=" TFASTX translates and searches a DNA sequence data bank\n";
419 #else
420 #define PgmDID 401
421 char *iprompt0=" FASTA searches a protein or DNA sequence data bank\n";
422 #endif
423 #endif
424 #endif
425 #endif
426
427 #include "upam.gbl" /* includes pam array */
428
main(argc,argv)429 main(argc, argv)
430 int argc; char **argv;
431 {
432 char tname[QFILE_SIZE], lname[LFILE_SIZE], llname[LFILE_SIZE], qline[QFILE_SIZE];
433 int itemp, iln,i;
434 #ifdef FASTX
435 int last_n0;
436 unsigned char *fs, *fd;
437 #endif
438 char *getenv(), *cptr, *bp;
439
440 #ifdef __MWERKS__
441 SIOUXSettings.showstatusline=FALSE;
442 #ifdef TPLOT
443 InitGraf(&qd.thePort);
444 SIOUXSettings.asktosaveonclose=FALSE;
445 #else
446 SIOUXSettings.asktosaveonclose=TRUE;
447 #endif
448 SIOUXSettings.autocloseonquit=TRUE;
449
450 argc = ccommand(&argv);
451 if (GetResource('DLOG',PgmDID)==(Handle)0 &&
452 OpenResFile("\pFASTA.rsrc")<0) {
453 SysBeep(100);
454 fprintf(stderr," WARNING FASTA.rsrc file could not be found\n");
455 }
456 InitEvent();
457 /* GetVol((unsigned char *)prompt,&ouvRef); */
458 error=HGetVol(NULL,&ouSpec.vRefNum, &ouSpec.parID);
459 if (error != noErr) {
460 fprintf(stderr," cannot get current directory\n");
461 exit(1);
462 }
463 wpos.h=50; wpos.v=100;
464 #endif
465
466 #ifndef LFASTA
467 #ifdef UNIX
468 outtty=isatty(1);
469 #else
470 outtty=1;
471 #endif
472 #endif
473
474 initenv(argc,argv);
475 #ifdef __MWERKS__
476 if (!outtty) SIOUXSettings.asktosaveonclose=FALSE;
477 #endif
478 if (dataflg && (tmpfd=fopen(tmpfname,"w"))==NULL) {
479 fprintf(stderr," cannot open temp file: %s\n",tmpfname);
480 dataflg=0;
481 }
482
483 #if defined(TFASTA) || defined(TFASTX)
484 aainit();
485 dnaseq = -1; /* force to protein */
486 ldnaseq = 1;
487 if (sqtype[0]=='D') {
488 fprintf(stderr," tfasta compares a protein to a translated\n\
489 DNA sequence library. Do not use a DNA scoring matrix.\n");
490 exit(1);
491 }
492 #endif
493
494 if ((aa0=calloc((size_t)MAXTST+MAXLIB,sizeof(char)))==0) {
495 fprintf(stderr," cannot allocate sequence array\n");
496 exit(1);
497 }
498 maxn = MAXTST+MAXLIB;
499
500 #ifdef FASTX
501 if ((aa0x=calloc((size_t)MAXTRN,sizeof(char)))==0) {
502 fprintf(stderr," cannot allocate FASTX translation array I\n");
503 exit(1);
504 }
505
506 if ((aa0y=calloc((size_t)MAXTRN,sizeof(char)))==0) {
507 fprintf(stderr," cannot allocate FASTX translation array II\n");
508 exit(1);
509 }
510 #endif
511 #ifdef TFASTX
512 if ((aa1y=calloc((size_t)MAXTRN,sizeof(char)))==0) {
513 fprintf(stderr," cannot allocate TFASTX translation array II\n");
514 exit(1);
515 }
516 #endif
517
518 #if defined(TFASTA) || defined(TFASTX)
519 if ((aa1=calloc((size_t)MAXTRN,sizeof(char)))==0) {
520 fprintf(stderr," cannot allocate translation array\n");
521 exit(1);
522 }
523 #endif
524
525 if (argc-optind < 3) {
526 #ifndef LFASTA
527 if (!outtty) {
528 fprintf(stderr," too few command line arguments with -q\n");
529 exit(1);
530 }
531 #endif
532
533 #ifndef __MWERKS__
534 fputs(iprompt0,stdout);
535 fprintf(stdout," %s%s\n",verstr,refstr);
536 l1: fputs(iprompt1,stdout);
537 fflush(stdout);
538 if (fgets(tname,sizeof(tname),stdin)==NULL) exit(0);
539 if (tname[strlen(tname)-1]=='\n') tname[strlen(tname)-1]='\0';
540 if (tname[0]=='\0') goto l1;
541 #else
542 /* NIntroDlog(IntroDID,iprompt0,verstr,refstr,"\0"); */
543 NIntroDlog(PgmDID,verstr,"\0","\0","\0");
544
545 l1: SFileDlog(iprompt1,&freply);
546 if (freply.sfGood==TRUE) {
547 PtoCstr((unsigned char *)freply.sfFile.name);
548 strcpy(tname,(char *)freply.sfFile.name);
549 /* q0vRef=freply.vRefNum;
550 SetVol(NULL,q0vRef);
551 */
552 q1Spec.vRefNum = q0Spec.vRefNum = freply.sfFile.vRefNum;
553 q1Spec.parID = q0Spec.parID = freply.sfFile.parID;
554 HSetVol(NULL,q0Spec.vRefNum,q0Spec.parID);
555 }
556 else exit(0);
557 #endif
558 #ifdef FASTX
559 if ((n0=getntseq(tname,aa0,maxn,&dnaseq))==0) {
560 fprintf(stderr," %s : %s sequence not found\n",tname,sqtype);
561 goto l1;
562 }
563 #else
564 if ((n0=getseq(tname,aa0,maxn,&dnaseq))==0) {
565 fprintf(stderr," %s : %s sequence not found\n",tname,sqtype);
566 goto l1;
567 }
568 #endif
569
570 #ifndef TFASTX
571 if (revflg) {
572 if (sqtype[0]=='D') revcomp(aa0,n0);
573 else {
574 fprintf(stderr," can only reverse complement DNA\n");
575 compstr="\0"; revflg = 0;
576 }
577 }
578 #endif
579
580 #ifndef FASTX
581 resetp(dnaseq);
582 #else
583 reseta(dnaseq);
584 #endif
585
586 #ifndef TFASTA
587 if (dnaseq==1 && n0>(MAXTST+MAXLIB)/3) {
588 fprintf(stderr," query sequence is too long %d %s\n",n0,qsqnam);
589 exit(1);
590 }
591 else if (dnaseq==0 && n0>(MAXTST+MAXLIB)/2) {
592 fprintf(stderr," query sequence is too long %d %s\n",n0,qsqnam);
593 exit(1);
594 }
595 #else
596 if (n0 > MAXTST) {
597 fprintf(stderr," query sequence is too long %d %s\n",n0,qsqnam);
598 exit(1);
599 }
600 #endif
601
602 #ifdef LFASTA
603 #ifndef __MWERKS__
604 l2: fputs(iprompt2,stdout);
605 fflush(stdout);
606 if (fgets(lname,sizeof(lname),stdin)==NULL) exit(1);
607 if (lname[strlen(lname)-1]=='\n') lname[strlen(lname)-1]='\0';
608 if (*lname==0) goto l2;
609 #else
610 l2: SFileDlog(iprompt2,&freply);
611 if (freply.sfGood==TRUE) {
612 PtoCstr((unsigned char *)freply.sfFile.name);
613 strcpy(lname,(char *)freply.sfFile.name);
614 /* q1vRef=freply.vRefNum;
615 SetVol(NULL,q1vRef);
616 */
617 q1Spec.vRefNum = freply.sfFile.vRefNum;
618 q1Spec.parID = freply.sfFile.parID;
619 HSetVol(NULL,q1Spec.vRefNum,q1Spec.parID);
620 }
621 else exit(0);
622 #endif
623 #else
624 #ifdef __MWERKS__
625 HSetVol(NULL,ouSpec.vRefNum,ouSpec.parID);
626 /* SetVol(NULL,ouvRef); */
627 #endif
628 libchoice(lname,sizeof(lname),aaenv);
629 #endif
630 libselect(lname);
631
632 fprintf(stderr," ktup? (1 to %d) [%d] ",ktmax,ktmax);
633 if (fgets(qline,sizeof(qline),stdin)==NULL) exit(0);
634 ktup = ktmax;
635 if (qline[0]!='\0' && qline[0]!='\n') {
636 sscanf(qline,"%d",&ktup);
637 if (ktup < 1 || ktup>ktmax ) {
638 printf(" warning ktup = %d out of range, reset to %d\n",ktup,ktmax);
639 ktup = ktmax;
640 }
641 }
642 #ifndef LFASTA
643 fprintf(stderr," use optimized scores? [%s]: ",(optall?"yes":"no"));
644 if (fgets(qline,sizeof(qline),stdin)==NULL) exit(0);
645 if (optall==1 && (*qline=='n' || *qline=='N')) optall = 0;
646 if (optall==0 && (*qline=='y' || *qline=='Y')) optall = 1;
647 #endif
648 }
649 else { /* all arguments from the command line */
650 #ifdef TPLOT
651 fputs(iprompt0,stderr);
652 fprintf(stderr," %s%s\n",verstr,refstr);
653 #else
654 fputs(iprompt0,stdout);
655 fprintf(stdout," %s%s\n",verstr,refstr);
656 #endif
657 strncpy(tname,argv[optind+1],sizeof(tname));
658 #ifndef FASTX
659 if ((n0=getseq(tname,aa0,maxn,&dnaseq))==0) {
660 fprintf(stderr," %s : %s sequence not found\n",tname,sqtype);
661 exit(1);
662 #else
663 if ((n0=getntseq(tname,aa0,maxn,&dnaseq))==0) {
664 fprintf(stderr," %s : %s sequence not found\n",tname,sqtype);
665 exit(1);
666 #endif
667 }
668
669 #ifndef TFASTX
670 if (revflg) {
671 if (dnaseq) revcomp(aa0,n0);
672 else {
673 fprintf(stderr," cannot reverse complement protein sequence\n");
674 compstr="\0"; revflg = 0;
675 }
676 }
677 #endif
678
679 #ifndef FASTX
680 resetp(dnaseq);
681 #else
682 reseta(dnaseq);
683 #endif
684
685 #ifndef TFASTA
686 if (dnaseq==1 && n0>(MAXTST+MAXLIB)/3) {
687 fprintf(stderr," query sequence is too long %d %s\n",n0,qsqnam);
688 exit(1);
689 }
690 else if (dnaseq==0 && n0>(MAXTST+MAXLIB)/2) {
691 fprintf(stderr," query sequence is too long %d %s\n",n0,qsqnam);
692 exit(1);
693 }
694 #else
695 if (n0 > MAXTST) {
696 fprintf(stderr," query sequence is too long %d %s\n",n0,qsqnam);
697 exit(1);
698 }
699 #endif
700 strncpy(lname,argv[optind+2],sizeof(lname));
701 #ifndef LFASTA
702 libselect(lname);
703 #else
704 addfile(lname,"");
705 #endif
706
707 if (argc-optind==4) sscanf(argv[optind+3],"%d",&ktup);
708 else ktup=ktmax;
709 if (ktup < 1 || ktup>ktmax ) {
710 fprintf(stderr," warning ktup = %d out of range, reset to %d\n",
711 ktup,ktmax);
712 ktup = ktmax;
713 }
714 }
715
716 #ifndef LFASTA
717 if (!outtty)
718 #ifndef TFASTX
719 fprintf(stdout," %s%s : %4d %-s\n",tname, compstr, n0, qsqnam);
720 #else
721 fprintf(stdout," %s : %4d %-s\n",tname, n0, qsqnam);
722 #endif /* TFASTX */
723 #endif /* LFASTA */
724 #ifdef __MWERKS__
725 HSetVol(NULL,q0Spec.vRefNum,q0Spec.parID);
726 /* SetVol(NULL,q0vRef); */
727 #endif
728 gettitle(tname,ttitle,50);
729 if (strlen(ttitle)>0)
730 if (*ttitle=='>') strncpy(name0,&ttitle[1],6);
731 else strncpy(name0,ttitle,6);
732 else
733 strncpy(name0,tname,6);
734 name0[6]='\0';
735 if (revflg) name0[5]='-';
736
737 #ifdef __MWERKS__
738 HSetVol(NULL,ouSpec.vRefNum,ouSpec.parID);
739 /* SetVol(NULL,ouvRef); */
740 #endif
741 #ifdef LFASTA
742 if (strcmp(tname,lname)==0 && revflg==0) oneseq=1;
743 #else
744 if (strlen(ttitle)>0)
745 #ifndef TFASTX
746 printf(" %s%s: %d %s\n vs %s library\n",ttitle,compstr,n0,qsqnam,ltitle);
747 #else
748 printf(" %s: %d %s\n vs %s library %s\n",ttitle,n0,qsqnam,ltitle,compstr);
749 #endif
750 else
751 #ifndef TFASTX
752 printf(" %s%s: %d %s vs %s library\n",tname,compstr,n0,qsqnam,ltitle);
753 #else
754 printf(" %s: %d %s vs %s library %s\n",tname,n0,qsqnam,ltitle,compstr);
755 #endif
756 if (dataflg) {
757 if (strlen(ttitle)>0)
758 fprintf(tmpfd,"; %s%s: %d %s\n; vs %s library\n",
759 ttitle,compstr,n0,qsqnam,ltitle);
760 else
761 fprintf(tmpfd,"; %s%s: %d %s vs %s library\n",
762 tname,compstr,n0,qsqnam,ltitle);
763 }
764
765 #endif
766
767 #if !defined(TFASTA) && !defined(TFASTX)
768 aa1 = aa0 + n0 + 2;
769 #else
770 aa10 = aa0 + n0 + 2;
771 #endif
772
773 maxn -= n0 + 3;
774
775 initpam2(); /* convert 1-d pam to 2-d pam2 */
776
777 initparm();
778 if (dataflg) {
779 fprintf(tmpfd,"; ktup = %d\n",ktup);
780 fprintf(tmpfd,"; cutoff = %d; optcut = %d; ggapval %d gdelval %d cgap %d\n",
781 bestcut,optcut,ggapval,gdelval,cgap);
782 if (lnsflg) fprintf(tmpfd,"; ln scaling");
783 }
784
785 tstart = sstime();
786
787 #ifdef FASTX
788 if (check_nt(aa0,n0,&i)==0)
789 {
790 fprintf(stderr," error - %s has non-nucleotide residues at: %d\n",tname,i);
791 exit(1);
792 }
793
794 aainit();
795 last_n0 = 0;
796 for (itemp=0; itemp<3; itemp++) {
797 n0x=saatran(aa0,&aa0x[last_n0],n0,itemp);
798 /*
799 for (i=0; i<n0x; i++) {
800 fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
801 if ((i%60)==59) fprintf(stderr,"\n");
802 }
803 fprintf(stderr,"\n");
804 */
805 last_n0 += n0x+1;
806 }
807
808 /* fprintf(stderr,"\n"); */
809 n0x = n0;
810 n0x31 = (n0-2)/3;
811 n0x32 = n0x31+1+(n0-n0x31-1)/2;
812 /* we also need a rearranged version,
813 111111222222233333333 becomes 123123123123123 */
814
815 for (fs=aa0x,itemp=0; itemp <3; itemp++,fs++) {
816 for (fd=&aa0y[itemp]; *fs !=EOSEQ; fd += 3, fs++) *fd = *fs;
817 *fd=EOSEQ;
818 }
819
820 /*
821 for (i=0; i<n0x; i++) {
822 fprintf(stderr,"%c",aa[aa0y[i]]);
823 if ((i%60)==59) fprintf(stderr,"\n");
824 }
825 fprintf(stderr,"\n");
826 */
827 #endif
828
829 fact = ktup*scfact;
830 #ifndef FASTX
831 hashaa(aa0,n0,ktup); /* hash test sequence */
832 #else
833 hashaa(aa0x,n0x,ktup);
834 #endif
835
836 #ifndef ALLOCN0
837 allocdiag(MAXDIAG);
838 #else
839 allocdiag(n0);
840 #endif
841
842 /* inithist(); */ /* initialize histogram, mean, sd */
843
844 initbest(MAXBEST+1); /* +1 required for select() */
845 for (nbest=0; nbest<MAXBEST+1; nbest++)
846 bptr[nbest] = &best[nbest];
847 bptr++; best++;
848 best[-1].score=best[-1].score0=
849 best[-1].gscore= best[-1].sscore = BIGNUM;
850 best[-1].zscore = (float)BIGNUM;
851
852 nlib = onlib = 0;
853 ntt = ontt = 0l;
854 nbest = 0;
855
856 #ifdef LFASTA
857 /* initialize crc array */
858 for (ncrc=0; ncrc<MAXSAV; ncrc++) lcrc0[ncrc]=lcrc1[ncrc]= -1;
859 ncrc = 0;
860 #ifdef __MWERKS__
861 HSetVol(NULL,q1Spec.vRefNum,q1Spec.parID);
862 /* SetVol(NULL,q1vRef); */
863 #endif
864 gettitle(lname,ltitle,50);
865 #ifdef __MWERKS__
866 HSetVol(NULL,ouSpec.vRefNum,ouSpec.parID);
867 /* SetVol(NULL,ouvRef); */
868 #endif
869 #endif
870
871 for (iln=0; iln<nln; iln++) {
872 libfn = iln;
873 #ifdef __MWERKS__
874 HSetVol(NULL,q1Spec.vRefNum,q1Spec.parID);
875 /* SetVol(NULL,q1vRef); */
876 #endif
877 if ((itemp=openlib(lbnames[iln],"\0"))>0) {
878 #ifndef TPLOT
879 fprintf(stdout," searching %s library\n",lbnames[iln]);
880 #endif
881 dhash();
882 }
883 if (itemp== -9) {
884 printf(" %8ld %s in %5ld sequences\n",ntt-ontt,
885 sqnam,nlib-onlib);
886 ontt=ntt; onlib=nlib; continue;
887 }
888 if (itemp<0) break;
889 #ifndef LFASTA
890 closelib();
891 #endif
892 }
893
894 tscan = sstime();
895
896 #ifdef PROGRESS
897 #ifdef UNIX
898 if (outtty)
899 #endif
900 if (nlib >= 200) fprintf(stderr," Done!\n");
901 #endif
902
903 #ifndef LFASTA
904 if (!dohist) {
905 if (nbest < 20) {
906 zsflag = 0;
907 histflg = 0;
908 }
909 else if (zsflag) process_hist(n0,bptr,nbest);
910 }
911
912 if (dataflg) {
913 fprintf(tmpfd,"; %8ld %s in %5ld sequences; scan time: ",
914 ntt,sqnam,nlib);
915 ptime(tmpfd,tscan-tstart);
916 fputs("\n",tmpfd);
917 if (optall)
918 fprintf(tmpfd,"; %ld optimizations performed\n",optcount);
919 }
920 prhist(stdout); /* print histogram, statistics */
921 #endif
922 #ifdef __MWERKS__
923 HSetVol(NULL,ouSpec.vRefNum,ouSpec.parID);
924 /* SetVol(NULL,ouvRef); */
925 #endif
926
927 free(diag);
928 freehash();
929
930 #ifndef LFASTA
931 outfd = stdout;
932 l3: if (outtty) {printf(" Enter filename for results [%s]: ",resfile); fflush(stdout);}
933 rline[0]='\0';
934 if (outtty && fgets(rline,sizeof(rline),stdin)==NULL) exit(0);
935 if ((bp=strchr(rline,'\n'))!=NULL) *bp = '\0';
936 if (rline[0]!='\0') strncpy(resfile,rline,sizeof(resfile));
937 if (resfile[0]!='\0') {
938 if ((outfd=fopen(resfile,"w"))==0) {
939 printf(" could not open %s\n",resfile);
940 goto l3;
941 }
942 fprintf(outfd," %s%s, %d %s vs %s library\n",
943 tname, compstr, n0, qsqnam, lname);
944 prhist(outfd);
945 }
946 #else
947 #ifdef TPLOT
948 outfd = stderr;
949 #else
950 outfd = stdout;
951 #endif /* LFASTA */
952 fprintf(outfd," Comparison of:\n(A) %-10s %-50s%s - %d %s\n",
953 tname,ttitle,compstr,n0,qsqnam);
954 fprintf(outfd,"(B) %-10s %-50s - %ld %s\n",lname,ltitle,ntt,sqnam);
955 if (strlen(smptr)>0) fprintf(outfd," using matrix file %s\n",smptr);
956 else fprintf(outfd," using %s matrix\n",sqtype);
957
958 if (nbest<=0) {
959 fprintf(outfd,
960 " No similar regions with scores greater %d than found\n",optcut);
961 exit(0);}
962 #endif
963
964 if (zsflag) {
965 sortbestz();
966 for (itemp=0; itemp<nbest; itemp++)
967 bptr[itemp]->escore = zs_to_E(bptr[itemp]->zscore,bptr[itemp]->n1);
968 sortbeste();
969 }
970 else sortbest();
971
972 #ifdef LFASTA
973 nshow = nbest;
974 #ifdef TPLOT
975 openplt((long)n0,(long)ntt);
976 if (oneseq) drawdiag((long)n0,(long)ntt);
977 showlocal(nshow);
978 closeplt();
979 #else
980 if (markx==10) {
981 fprintf(outfd,"\n>>>%s%s, %d %s vs %s, %d %s\n",
982 tname, compstr, n0, qsqnam, lname, ntt,sqnam);
983 fprintf(outfd,"; pg_name: %s\n",progstr);
984 fprintf(outfd,"; pg_ver: %s\n",verstr);
985 fprintf(outfd,"; pg_matrix: %s\n",smptr);
986 fprintf(outfd,"; pg_gap-pen: %d %d\n",gdelval,ggapval);
987 fprintf(outfd,"; pg_ktup: %d\n",ktup);
988 fprintf(outfd,"; pg_optcut: %d\n",optcut);
989 fprintf(outfd,"; pg_cgap: %d\n",cgap);
990 }
991 showlocal(nshow);
992 #endif
993
994 #else /* !LFASTA */
995
996 if (nbest <= 0) {
997 fprintf(outfd," no sequences with scores greater than %d found\n",bestcut);
998 if (outfd != stdout)
999 fprintf(outfd," no sequences with scores greater than %d found\n",bestcut);
1000 exit(0);
1001 }
1002 showbest(); /* display best matches */
1003
1004 rline[0]='Y';
1005 if (outtty) {
1006 printf(" Display alignments also? "); fflush(stdout);
1007 if (fgets(rline,sizeof(rline),stdin)==NULL) exit(0);
1008 }
1009
1010 if (markx==10) {
1011 fprintf(outfd,"\n>>>%s%s, %d %s vs %s library\n",
1012 tname, compstr, n0, qsqnam, lname);
1013 fprintf(outfd,"; pg_name: %s\n",progstr);
1014 fprintf(outfd,"; pg_ver: %s\n",verstr);
1015 fprintf(outfd,"; pg_matrix: %s\n",smptr);
1016 fprintf(outfd,"; pg_gap-pen: %d %d\n",gdelval,ggapval);
1017 fprintf(outfd,"; pg_ktup: %d\n",ktup);
1018 fprintf(outfd,"; pg_optcut: %d\n",optcut);
1019 fprintf(outfd,"; pg_cgap: %d\n",cgap);
1020 }
1021
1022 if (toupper(rline[0])=='Y') {
1023 if (outtty) {
1024 printf(" number of alignments [%d]? ",nshow);
1025 fflush(stdout);
1026 if (fgets(rline,sizeof(rline),stdin)==NULL) exit(0);
1027 if (rline[0]!=0) sscanf(rline,"%d",&nshow);
1028 ashow=nshow;
1029 }
1030 showalign(nshow);
1031 }
1032 tdone = sstime();
1033 if (markx<10) {
1034 printf("Library scan: "); ptime(stdout,tscan-tstart);
1035 printf(" total CPU time: "); ptime(stdout,tdone-tstart);
1036 printf("\n");
1037 if (outfd!=stdout) {
1038 fprintf(outfd,"Library scan: "); ptime(outfd,tscan-tstart);
1039 fprintf(outfd," total CPU time: "); ptime(outfd,tdone-tstart);
1040 fprintf(outfd,"\n");
1041 }
1042 }
1043 #endif
1044 exit(0);
1045 }
1046
1047 extern int *sascii, nascii[], aascii[];
1048
initenv(argc,argv)1049 initenv(argc,argv)
1050 int argc;
1051 char **argv;
1052 {
1053 char *cptr, *getenv();
1054 int copt, getopt();
1055 extern char *optarg;
1056 int i;
1057
1058 libenv="\0";
1059 if ((aaenv=getenv("AABANK"))==NULL) aaenv="\0";
1060 if ((cptr=getenv("FASTLIBS"))!=NULL) strncpy(flstr,cptr,sizeof(flstr));
1061 else flstr[0]='\0';
1062
1063 sascii = aascii;
1064 strncpy(sdstr,"BLOSUM50",sizeof(sdstr));
1065 smptr = sdstr;
1066 pam = abl50;
1067 sq = aa;
1068 hsq = haa;
1069 nsq = naa;
1070 dnaseq = 0;
1071 ldnaseq = 0;
1072
1073 #ifdef TFASTA
1074 gdelval = -16;
1075 ggapval = -4;
1076 #endif
1077 #ifdef FASTX
1078 gdelval = -15;
1079 ggapval = -3;
1080 gshift = -30;
1081 #endif
1082 #ifdef TFASTX
1083 gdelval = -15;
1084 ggapval = -3;
1085 gshift = -30;
1086 #endif
1087
1088 showall = 0;
1089
1090 if ((cptr=getenv("SHOWALL"))!=NULL)
1091 if (sscanf(cptr,"%d",&showall)!=1) showall = 1;
1092
1093 if ((cptr=getenv("LINLEN"))!=NULL) sscanf(cptr,"%d",&llen);
1094 else llen = 60;
1095 if (llen>=200) llen=200;
1096 markx=0;
1097 if ((cptr=getenv("MARKX"))==NULL) markx=0;
1098 else sscanf(cptr,"%d",&markx);
1099
1100 if ((cptr=getenv("GAPCUT"))!=NULL) sscanf(cptr,"%d",&cgap);
1101 else cgap=0;
1102
1103 if ((cptr=getenv("OPTCUT"))!=NULL) sscanf(cptr,"%d",&optcut);
1104 else optcut = 0;
1105
1106 if ((cptr=getenv("PAMFACT"))!=NULL) {
1107 sscanf(cptr,"%d",&pamfact);
1108 if (pamfact==1) pamfact= -2; else pamfact = 0;
1109 }
1110
1111 #ifndef LFASTA
1112 if ((cptr=getenv("LIBTYPE"))!=NULL) sscanf(cptr,"%d",&deftype);
1113 if (deftype<0 || deftype>LASTLIB) deftype= 0;
1114 #endif
1115
1116 while ((copt=
1117 getopt(argc,argv,"QqaAb:c:d:eE:f:g:h:Hik:l:Lm:noO:r:s:v:w:x:y:z13"))!=EOF)
1118 switch(copt) {
1119 #ifndef LFASTA
1120 case 'q':
1121 case 'Q': outtty=0; break;
1122 case 'a': showall=1; break;
1123 case 'A': dnasw_flg = 1; break;
1124 #endif
1125 case 'b': sscanf(optarg,"%d",&mshow);
1126 mshow_flg = 1;
1127 if (mshow<1) mshow=1;
1128 break;
1129 case 'c': sscanf(optarg,"%d",&optcut); break;
1130 case 'd': sscanf(optarg,"%d",&ashow);
1131 if (ashow<0) ashow=1;
1132 break;
1133 case 'e': /* lnsflg = 1; */
1134 fprintf(stderr," ln() normalization not available\n");
1135 break;
1136 case 'E': sscanf(optarg,"%g",&e_cut);
1137 e_cut_set=1;
1138 break;
1139 case 'f': sscanf(optarg,"%d",&gdelval); del_set=1;
1140 if (gdelval > 0) gdelval = -gdelval;
1141 break;
1142 case 'g': sscanf(optarg,"%d",&ggapval); gap_set=1;
1143 if (ggapval > 0) ggapval = - ggapval;
1144 break;
1145 case 'h': sscanf(optarg,"%d",&gshift); shift_set=1;
1146 break;
1147 case 'k': sscanf(optarg,"%d",&cgap); break;
1148 case 'H': histflg = 0; break;
1149 case 'i': revflg = 1; compstr=" (rev-comp)"; break;
1150 case 'l': strncpy(flstr,optarg,sizeof(flstr));
1151 break;
1152 case 'L': long_info = 1; break;
1153 case 'm': sscanf(optarg,"%d",&markx);
1154 if (markx == 4) llen=50;
1155 if (markx > 5 && markx != 10 ) markx = 0;
1156 break;
1157 #ifndef FASTX
1158 case 'n': dnaseq=1;
1159 sascii = nascii;
1160 sq = nt;
1161 nsq = nnt;
1162 hsq = hnt;
1163 strcpy(qsqnam,"nt");
1164 strcpy(sqnam,"nt");
1165 strcpy(sqtype,"DNA");
1166 resetp(dnaseq);
1167 break;
1168 #endif
1169 case 's':
1170 strncpy(smstr,optarg,sizeof(smstr));
1171 if (initpam(smstr)) {
1172 dnaseq= -1;
1173 ldnaseq = (sqtype[0]=='D')?1:0;
1174 smptr = smstr;
1175 }
1176 break;
1177 case 'w': sscanf(optarg,"%d",&llen); break;
1178 case 'x': sscanf(optarg,"%ld %ld",&sq0off,&sq1off);
1179 break;
1180 case 'y': sscanf(optarg,"%d",&optwid);
1181 optwidf = 1;
1182 break;
1183 case 'z': zsflag = 0; histflg = 0;
1184 break;
1185 case 'O': strncpy(resfile,optarg,sizeof(resfile));
1186 break;
1187 #ifndef LFASTA
1188 case '1': init1flg=1; break;
1189 case 'o': optall=0; break;
1190 case 'r': dataflg=1;
1191 strncpy(tmpfname,optarg,sizeof(tmpfname));
1192 break;
1193 #else
1194 case '1':
1195 case 'Q':
1196 case 'o':
1197 case 'R':fprintf(stderr," illegal option -%c\n",copt);
1198 break;
1199 #endif
1200 #ifdef TPLOT
1201 case 'v': strncpy(lvstr,optarg,sizeof(lvstr));
1202 break;
1203 #endif
1204 #if defined(TFASTA) || defined(TFASTX)
1205 #ifdef TFASTA
1206 case '3': nframe=3; break;
1207 #else
1208 case '3': nframe=1; break;
1209 #endif
1210 #else
1211 case '3':
1212 #endif
1213 default : fprintf(stderr," illegal option -%c\n",copt);
1214 }
1215
1216 optind--;
1217
1218 if (dnaseq>=0) {
1219 if ((smptr=getenv("SMATRIX"))!=NULL && initpam(smptr)) {
1220 dnaseq = -1;
1221 ldnaseq = (sqtype[0]=='D')?1:0;
1222 }
1223 else smptr = sdstr;
1224 }
1225
1226 ktmax = bktup;
1227
1228 if (dnaseq<0 && strlen(smptr)>0)
1229 fprintf(stderr," matrix file reset to %s\n",smptr);
1230 }
1231
reseta(dnaseq)1232 reseta(dnaseq)
1233 int dnaseq;
1234 {
1235 strcpy(qsqnam,"nt");
1236 strcpy(sqnam,"aa");
1237 strcpy(sqtype,"protein");
1238 }
1239
resetp(dnaseq)1240 resetp(dnaseq)
1241 int dnaseq;
1242 {
1243 if (dnaseq==1) {
1244 bestscale=80;
1245 bkfact=5;
1246 scfact=1;
1247 bktup=6;
1248 ktmax=6;
1249 bestmax=80;
1250 bestoff=45;
1251 pam = npam;
1252 if (!del_set) gdelval = -16;
1253 if (!gap_set) ggapval = -4;
1254 if (strlen(smstr)>0)
1255 fprintf(stderr," resetting matrix to DNA\n");
1256 strncpy(sdstr,"DNA",sizeof(smstr));
1257 smptr = sdstr;
1258 ldnaseq=1;
1259 if (pamfact>=0) pamfact = 0;
1260 if (!e_cut_set) e_cut = 2.0;
1261 }
1262 }
1263
initparm()1264 initparm()
1265 {
1266 char *getenv(), *cptr;
1267 int itemp, btemp;
1268
1269 if (!optwidf) {
1270 if (dnaseq!=1 && ktup==1) optwid = 32;
1271 else optwid = 16;
1272 }
1273
1274 btemp = 2*bestoff/3 + n0/bestscale + bkfact*(bktup-ktup);
1275 if (btemp>bestmax) btemp = bestmax;
1276 if (btemp > 3*n0) btemp = 3*shscore(aa0,n0)/5;
1277
1278 bestfull = 0;
1279
1280 if (cgap<=0) cgap=btemp+bestoff/3;
1281 if (optcut<=0) optcut=btemp;
1282 #ifdef TFASTA
1283 optcut = (optcut*3)/2;
1284 #endif
1285 pgap = gdelval+ggapval;
1286 }
1287
1288 /* hashaa - hash sequence 0 for rapid lookup of seq 1 (library) */
1289
hashaa(aa0,n0,ktup)1290 hashaa(aa0, n0, ktup)
1291 char *aa0; int n0, ktup;
1292 {
1293 int mhv,phv;
1294 int i0, hv;
1295
1296 if (pamfact == -1) pamfact=0;
1297 else if (pamfact== -2) pamfact=1;
1298
1299 for (i0=0, mhv= -1; i0<nsq; i0++)
1300 if (hsq[i0]>mhv) mhv=hsq[i0];
1301
1302 if (mhv<=0) {
1303 fprintf(stderr," maximum hsq <=0 %d\n",mhv);
1304 exit(1);
1305 }
1306
1307 for (kshft=0; mhv>0; mhv/=2) {
1308 kshft++;
1309 }
1310
1311 /* kshft = 2; */
1312 kt1 = ktup-1;
1313
1314 hv = 1;
1315 for (i0 = 0; i0<ktup; i0++)
1316 hv = hv<<kshft;
1317 hmax = hv;
1318 hmask = (hmax>>kshft)-1;
1319
1320 allochash(n0,hmax);
1321
1322 for (i0=0; i0<hmax; i0++) harr[i0]= -1;
1323 for (i0=0; i0<n0; i0++) link[i0]= -1;
1324
1325 /* encode the aa0 array */
1326
1327 phv=hv=0;
1328 for (i0=0; i0<kt1; i0++) {
1329 hv= (hv<<kshft)+hsq[aa0[i0]];
1330 phv += pam2[aa0[i0]][aa0[i0]]*ktup;
1331 }
1332
1333 for (; i0<n0; i0++) {
1334 hv=((hv&hmask)<<kshft) + hsq[aa0[i0]];
1335 link[i0]=harr[hv];
1336 harr[hv]=i0;
1337 if (pamfact) {
1338 pamh2[hv] = (phv += pam2[aa0[i0]][aa0[i0]]*ktup);
1339 phv -= pam2[aa0[i0-kt1]][aa0[i0-kt1]]*ktup;
1340 }
1341 else pamh2[hv]=fact*ktup;
1342 }
1343
1344 if (pamfact)
1345 for (i0=0; i0<nsq; i0++) pamh1[i0]=pam2[i0][i0]*ktup;
1346 else
1347 for (i0=0; i0<nsq; i0++) pamh1[i0]=fact;
1348 }
1349
allochash(n0,hmax)1350 allochash(n0, hmax)
1351 int n0, hmax;
1352 {
1353
1354 if ((harr=(int *)calloc((size_t)hmax,sizeof(int)))== NULL) {
1355 fprintf(stderr," cannot allocate hash array\n");
1356 exit(1);
1357 }
1358 if ((pamh2=(int *)calloc((size_t)hmax,sizeof(int)))==NULL) {
1359 fprintf(stderr," cannot allocate pamh2 array\n");
1360 exit(1);
1361 }
1362 if ((link=(int *)calloc((size_t)n0,sizeof(int)))== NULL) {
1363 fprintf(stderr," cannot allocate hash link array");
1364 exit(1);
1365 }
1366 }
1367
freehash()1368 freehash()
1369 {
1370 free(harr); free(link); free(pamh2);
1371 }
1372
1373 /* this is the main loop. First zero the diagonal arrays,
1374 then go through the sequence ktup at a time updating the
1375 diagonals appropriately. Finally, scan the diagonals,
1376 looking for the max score, and use the pam matrix
1377 */
1378
dhash()1379 dhash()
1380 {
1381 int nd, ndo, n00; /* diagonal array size */
1382 int lhval;
1383 int kfact;
1384 register struct dstruct *dptr;
1385 register int tscor;
1386 #ifndef ALLOCN0
1387 register struct dstruct *diagp;
1388 #else
1389 register int dpos;
1390 int lposn0;
1391 #endif
1392 struct dstruct *dpmax;
1393 register int lpos;
1394 int tpos, tlim;
1395 struct beststr *vmptr;
1396 int scor, gscor, cscor, scor0;
1397 float zscor;
1398 int im, ib, nsave;
1399 int cmps(); /* comparison routine for ksort */
1400 unsigned char *aa1ptr;
1401 float lnscale;
1402 #if defined(TFASTA) || defined(TFASTX)
1403 int n10, i;
1404 #endif
1405 #ifdef TFASTX
1406 int last_n1;
1407 unsigned char *fs, *fd;
1408 #endif
1409 int itt,itx,lcont, ocont, loff;/* lcont is returned by getlib to
1410 indicate there is more sequence
1411 remaining. ocont is the previous
1412 value of lcont, for going back later.
1413 loff corrects maxn for the modified
1414 size of aa1 for continued sequences
1415 */
1416
1417
1418 tlim = 0;
1419 #ifdef FKFACT
1420 kfact = ktup*fact;
1421 #endif
1422 ndo = 0;
1423 #ifdef FASTX
1424 n00 = n0x;
1425 #else
1426 n00 = n0;
1427 #endif
1428
1429 noff = n00-1;
1430
1431 #ifdef ALLOCN0
1432 nd = n00;
1433 #endif
1434
1435 /*
1436 these initializations have been added to deal with reading
1437 sequences in chunks
1438 */
1439
1440 #if defined(TFASTA) || defined(TFASTX)
1441 aa1ptr=aa10;
1442 #else
1443 aa1ptr=aa1;
1444 #endif
1445
1446 lcont=0;
1447 ocont=0;
1448 loff = 0;
1449 #if !defined(TFASTA) && !defined(TFASTX)
1450 while ((n1=GETLIB(aa1ptr,maxn-loff,libstr,&lmark,&lcont))>0) {
1451 nlib++;
1452 ntt += n1;
1453 if (n1==1 || n1 < ktup) {
1454 /* if (igncnt++ <10)
1455 fprintf(stderr,"Ignoring: %s\n",libstr); */
1456 goto loop;
1457 }
1458 if (aa1!=aa1ptr) {n1 += n00; nlib--;}
1459
1460 #ifdef LFASTA
1461 if (n1 == n0) {
1462 for (itt=0; itt<n0; itt++) if (aa0[itt]!=aa1[itt]) break;
1463 if (itt==n0) oneseq = 1;
1464 }
1465 #endif
1466
1467 #else
1468 maxt = maxn-loff-3; maxt -= maxt%3; maxt++;
1469 while ((n10=GETLIB(aa1ptr,maxt,libstr,&lmark,&lcont))>0) {
1470 nlib++;
1471 ntt += n10;
1472 if (n10==1 || n10 < 3*ktup) {
1473 /* if (igncnt++ <10)
1474 fprintf(stderr,"Ignoring: %s\n",libstr); */
1475 goto loop;
1476 }
1477 if (aa10!=aa1ptr) {n10 += 3*n00; nlib--;}
1478 #endif
1479
1480 #ifdef PROGRESS
1481 #ifdef UNIX
1482 if (outtty)
1483 #endif
1484 if (nlib % 200 == 199) {
1485 fputc('.',stderr);
1486 if (nlib % 10000 == 9999) fputc('\n',stderr);
1487 else if (nlib % 1000 == 999) fputc(' ',stderr);
1488 }
1489 #endif
1490 /*
1491 if (check_nt(aa10,n10,&i)==0) {
1492 fprintf(stderr," error - non-nucleotide in sequence %s\n",libstr);
1493 i = max(0,i-10);
1494 for (itx=i; itx<i+20; itx++) fputc(sq[aa10[itx]],stderr);
1495 fputc('\n',stderr);
1496 exit(1);
1497 }
1498 */
1499 #ifdef TFASTA
1500 for (itt=0; itt<nframe; itt++) {
1501 n1=aatran(aa10,aa1,n10,itt);
1502 if (n1 < ktup) continue;
1503 #else
1504 #ifdef TFASTX
1505 for (itt=revflg; itt<nframe; itt++) {
1506 last_n1 = 0;
1507 for (itx=3*itt; itx<3+3*itt; itx++) {
1508 n1= aatran(aa10,&aa1[last_n1],n10,itx);
1509 /*
1510 fprintf(stderr,"n1x? %d\n",n1);
1511 for (i=0; i<n1; i++) {
1512 fprintf(stderr,"%c",aa[aa1[last_n1+i]]);
1513 if ((i%60)==59) fprintf(stderr,"\n");
1514 }
1515 fprintf(stderr,"\n");
1516 */
1517 last_n1 += n1 + 1;
1518 }
1519
1520 /* fprintf(stderr,"---\n"); */
1521
1522 n1 = n10;
1523 n1x31 = (n1-2)/3;
1524 n1x32 = n1x31+1 + (n1-n1x31-1)/2;
1525 /* we also need a rearranged version,
1526 111111222222233333333 becomes 123123123123123 */
1527
1528 for (itx = 0, fs=aa1; itx < 3; itx++,fs++) {
1529 for (fd=&aa1y[itx]; *fs !=EOSEQ; fd += 3, fs++) *fd = *fs;
1530 *fd=EOSEQ;
1531 }
1532 /*
1533 fprintf(stderr,"\n");
1534 for (i=0; i<n1; i++) {
1535 fprintf(stderr,"%c",aa[aa1y[i]]);
1536 if ((i%60)==59) fprintf(stderr,"\n");
1537 }
1538 fprintf(stderr,"\n");
1539 */
1540 #else
1541 itt=0;
1542 #endif
1543 #endif
1544
1545 #ifdef __MWERKS__
1546 ChkEvent();
1547 #endif
1548 #ifndef ALLOCN0
1549 nd = n00+n1;
1550 #endif
1551 if (lnsflg && n1 > 5)
1552 lnscale = (log((float)n0)/log((float)n1));
1553 else lnscale = 1.0;
1554
1555
1556 dpmax = &diag[nd];
1557 for (dptr= &diag[ndo]; dptr<dpmax;) {
1558 dptr->stop = -1;
1559 dptr->dmax = NULL;
1560 dptr++->score = 0;
1561 }
1562
1563 for (vmptr=vmax; vmptr<&vmax[MAXSAV]; vmptr++)
1564 vmptr->score = 0;
1565 lowscor = 0;
1566
1567 /* start hashing */
1568 lhval = 0;
1569 for (lpos=0; lpos<kt1;)
1570 lhval= ((lhval&hmask)<<kshft)+hsq[aa1[lpos++]];
1571
1572 #ifndef ALLOCN0
1573 diagp = &diag[noff + kt1];
1574 for ( ; lpos<n1; lpos++,diagp++) {
1575 lhval = ((lhval&hmask)<<kshft) + hsq[aa1[lpos]];
1576 for (tpos=harr[lhval]; tpos>=tlim; tpos=link[tpos]) {
1577 if ((tscor = (dptr = &diagp[-tpos])->stop)>=0) {
1578 #else
1579 lposn0 = noff + lpos;
1580 for ( ; lpos<n1; lpos++,lposn0++) {
1581 lhval = ((lhval&hmask)<<kshft) + hsq[aa1[lpos]];
1582 for (tpos=harr[lhval]; tpos>=tlim; tpos=link[tpos]) {
1583 dpos = lposn0 - tpos;
1584 if ((tscor = (dptr = &diag[dpos%nd])->stop)>=0) {
1585 #endif
1586 tscor += ktup;
1587 if ((tscor -=lpos)<=0) {
1588 scor = dptr->score;
1589 #ifdef FKFACT
1590 if ((tscor += kfact)<0 && lowscor<scor)
1591 #else
1592 if ((tscor += (kfact=pamh2[lhval]))<0
1593 && lowscor < scor)
1594 #endif
1595 #ifdef ALLOCN0
1596 savemax(dptr,dpos);
1597 #else
1598 savemax(dptr);
1599 #endif
1600 if ((tscor += scor)>=kfact) {
1601 dptr->score = tscor;
1602 dptr->stop=lpos;
1603 }
1604 else {
1605 dptr->score = kfact;
1606 dptr->start = (dptr->stop = lpos) - kt1;
1607 }
1608 }
1609 else {
1610 #ifdef FKFACT
1611 dptr->score += fact;
1612 #else
1613 dptr->score += pamh1[aa0[tpos]];
1614 #endif
1615 dptr->stop = lpos;
1616 }
1617 }
1618 else {
1619 #ifdef FKFACT
1620 dptr->score = kfact;
1621 #else
1622 dptr->score = pamh2[lhval];
1623 #endif
1624 dptr->start = (dptr->stop=lpos) - kt1;
1625 }
1626 } /* end tpos */
1627
1628 #ifdef LFASTA
1629 if (oneseq) tlim++;
1630 #endif
1631 #ifdef ALLOCN0
1632 /* reinitialize diag structure */
1633
1634 if ((dptr= &diag[lpos%nd])->score>lowscor)
1635 savemax(dptr,lpos);
1636 dptr->stop = -1;
1637 dptr->dmax = NULL;
1638 dptr->score = 0;
1639 #endif
1640 } /* end lpos */
1641
1642 #ifdef ALLOCN0
1643 for (tpos=0, dpos = noff+n1-1; tpos < n00; tpos++,dpos--) {
1644 if ((dptr= &diag[dpos%nd])->score>lowscor) savemax(dptr,dpos);
1645 }
1646 #else
1647 for (dptr=diag; dptr < dpmax; ) {
1648 if (dptr->score>lowscor) savemax(dptr);
1649 dptr->stop = -1;
1650 dptr->dmax = NULL;
1651 dptr++->score = 0;
1652 }
1653 ndo = nd;
1654 #endif
1655
1656 /*
1657 at this point all of the elements of aa1[lpos]
1658 have been searched for elements of aa0[tpos]
1659 with the results in diag[dpos]
1660 */
1661 for (ib= nsave =0,vmptr=vmax; vmptr < &vmax[MAXSAV]; vmptr++) {
1662 #ifdef LFASTA
1663 if (oneseq && vmptr->start==0 && vmptr->stop==n1-1) continue;
1664 #endif
1665 if (vmptr->score>0) {
1666 vmptr->score=spam(vmptr);
1667 vptr[ib++]= vmptr;
1668 nsave++;
1669 }
1670 }
1671
1672 if (nsave>0) {
1673
1674 #ifndef LFASTA
1675
1676 #ifdef FASTX
1677 /* FASTX code here to modify the start, stop points for
1678 the three phases of the translated protein sequence
1679 */
1680
1681
1682 /*
1683 fprintf(stderr,"n0x: %d; n0x31: %d; n0x32 %d\n",n0x,n0x31,n0x32);
1684 for (ib=0; ib<nsave; ib++) {
1685 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
1686 noff+vptr[ib]->start-vptr[ib]->dp,
1687 noff+vptr[ib]->stop-vptr[ib]->dp,
1688 vptr[ib]->start,vptr[ib]->stop,
1689 vptr[ib]->dp,vptr[ib]->score);
1690 }
1691
1692 fprintf(stderr,"---\n");
1693 */
1694
1695 for (ib=0; ib<nsave; ib++) {
1696 if (noff - vptr[ib]->dp + vptr[ib]->start >= n0x32) {
1697 vptr[ib]->dp += n0x32;
1698 }
1699 if (noff - vptr[ib]->dp +vptr[ib]->start >= n0x31) {
1700 vptr[ib]->dp += n0x31;
1701 }
1702 }
1703
1704 /*
1705 for (ib=0; ib<nsave; ib++) {
1706 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
1707 noff+vptr[ib]->start-vptr[ib]->dp,
1708 noff+vptr[ib]->stop-vptr[ib]->dp,
1709 vptr[ib]->start,vptr[ib]->stop,
1710 vptr[ib]->dp,vptr[ib]->score);
1711 }
1712 */
1713 #endif /* FASTX */
1714 #ifdef TFASTX
1715 /* TFASTX code here to modify the start, stop points for
1716 the three phases of the translated protein sequence
1717 TFASTX modifies library start points, not query start points
1718 */
1719
1720
1721 /*
1722 fprintf(stderr,"n1: %d; n1x31: %d; n1x32: %d\n",n1,n1x31,n1x32);
1723 for (ib=0; ib<nsave; ib++) {
1724 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
1725 noff+vptr[ib]->start-vptr[ib]->dp,
1726 noff+vptr[ib]->stop-vptr[ib]->dp,
1727 vptr[ib]->start,vptr[ib]->stop,
1728 vptr[ib]->dp,vptr[ib]->score);
1729 }
1730 fprintf(stderr,"---\n");
1731 */
1732
1733 for (ib=0; ib<nsave; ib++) {
1734 if (vptr[ib]->start >= n1x32) {
1735 vptr[ib]->start -= n1x32;
1736 vptr[ib]->stop -= n1x32;
1737 vptr[ib]->dp -= n1x32;
1738 }
1739 if (vptr[ib]->start >= n1x31) {
1740 vptr[ib]->start -= n1x31;
1741 vptr[ib]->stop -= n1x31;
1742 vptr[ib]->dp -= n1x31;
1743 }
1744 }
1745
1746 /*
1747 for (ib=0; ib<nsave; ib++) {
1748 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
1749 noff+vptr[ib]->start-vptr[ib]->dp,
1750 noff+vptr[ib]->stop-vptr[ib]->dp,
1751 vptr[ib]->start,vptr[ib]->stop,
1752 vptr[ib]->dp,vptr[ib]->score);
1753 }
1754 */
1755
1756 #endif /* TFASTX */
1757
1758 scor = sconn(vptr,nsave);
1759 for (vmptr=vptr[0],ib=1; ib<nsave; ib++)
1760 if (vptr[ib]->score > vmptr->score) vmptr=vptr[ib];
1761
1762 /* kssort(vptr,nsave); */
1763
1764 scor = max(scor,vmptr->score);
1765 cscor = scor = (int)((float)scor*lnscale +0.5);
1766 scor0 = (int)((float)vmptr->score*lnscale + 0.5);
1767 if (init1flg) cscor = scor0;
1768 #else
1769 kssort(vptr,nsave);
1770 #endif
1771
1772 #ifdef LFASTA
1773 for (ib=0; ib<nsave; ib++)
1774 if ((scor0 = scor=vptr[ib]->score) > optcut) {
1775 vmptr=vptr[ib];
1776 #else
1777 ib=0;
1778
1779 gscor = scor;
1780 if (optall && scor>optcut) {
1781 #ifdef __MWERKS__
1782 ChkEvent();
1783 #endif
1784 #ifndef TFASTX
1785 cscor = gscor = dmatch(noff-vmptr->dp,NO);
1786 #else
1787 cscor = gscor = dmatch(vmptr->dp-noff,NO);
1788 #endif
1789 optcount++;
1790 }
1791
1792 if (dohist) addhistz(zscor=find_zm(cscor,n1),n1);
1793 else zscor = (float)cscor;
1794
1795
1796 if (dataflg)
1797 fprintf(tmpfd,"%-12s %4d %4d %4d %4d %4d %8ld\n",
1798 libstr,sfnum,n1,gscor,scor,scor0,lmark);
1799
1800 if ((int)zscor > bestcut) {
1801 #endif /* not LFASTA */
1802 if (nbest >= MAXBEST) {
1803 #ifndef LFASTA
1804 if (!dohist) {
1805 process_hist(n0,bptr,nbest);
1806 addhistz(zscor=find_zm(cscor,n1),n1);
1807 dohist=1;
1808 }
1809 #endif
1810 bestfull = nbest-MAXBEST/4;
1811 selectz(bestfull-1,nbest);
1812 bestcut = (int)(bptr[bestfull-1]->zscore + 0.5);
1813 nbest = bestfull;
1814 }
1815 bestptr = bptr[nbest];
1816 bestptr->dp = vmptr->dp;
1817 bestptr->start = vmptr->start;
1818 bestptr->stop = vmptr->stop;
1819 bestptr->score0 = scor0;
1820 bestptr->score = scor;
1821 bestptr->sscore = cscor;
1822 bestptr->zscore = zscor;
1823 bestptr->gscore = gscor;
1824 bestptr->lseek = lmark;
1825 bestptr->cont = ocont;
1826 bestptr->lib = libfn;
1827 bestptr->n1 = n1;
1828 bestptr->frame = itt;
1829 nbest++;
1830 }
1831 }
1832 #ifndef LFASTA
1833 else { /* nsave <= 0 */
1834 if (dataflg) fprintf(tmpfd,"%-12s %4d %4d %4d %4d %8ld\n",
1835 libstr,sfnum,0,0,0,lmark);
1836 }
1837 #endif
1838 #if defined(TFASTA) || defined(TFASTX)
1839 } /* end of for (itt ... ) */
1840 #endif
1841 loop:
1842 if (lcont) {
1843 #if !defined(TFASTA) && !defined(TFASTX)
1844 loff = n0;
1845 memcpy(aa1,&aa1[n1-n0],n0);
1846 aa1ptr= &aa1[loff];
1847 #else
1848 loff = 3*n0;
1849 memcpy(aa10,&aa10[n10-loff],loff);
1850 aa1ptr= &aa10[loff];
1851 maxt = maxn-loff-3; maxt -= maxt%3; maxt++;
1852 #endif
1853 ocont = lcont;
1854 }
1855 else {
1856 loff = 0;
1857 #if !defined(TFASTA) && !defined(TFASTX)
1858 aa1ptr=aa1;
1859 #else
1860 aa1ptr = aa10;
1861 maxt = maxn-loff-3; maxt -= maxt%3; maxt++;
1862 #endif
1863 ocont = lcont;
1864 }
1865 }
1866 }
1867
1868 #ifdef ALLOCN0
1869 savemax(dptr,dpos)
1870 register struct dstruct *dptr; int dpos;
1871 {
1872 register struct beststr *vmptr;
1873 register int i;
1874
1875 #else
1876 savemax(dptr)
1877 register struct dstruct *dptr;
1878 {
1879 register int dpos;
1880 register struct beststr *vmptr;
1881 register int i;
1882 #endif
1883 register struct beststr *lmp;
1884 static struct beststr *lowmax=vmax;
1885
1886 #ifndef ALLOCN0
1887 dpos = (int)(dptr-diag);
1888 #endif
1889
1890 /* check to see if this is the continuation of a run that is already saved */
1891
1892 if ((vmptr=dptr->dmax)!=NULL && vmptr->dp== dpos &&
1893 vmptr->start==dptr->start) {
1894 vmptr->stop = dptr->stop;
1895 if ((i=dptr->score)<=vmptr->score) return;
1896 vmptr->score = i;
1897 if (vmptr!=lowmax) return;
1898 }
1899 else {
1900 i=lowmax->score = dptr->score;
1901 lowmax->dp = dpos;
1902 lowmax->start = dptr->start;
1903 lowmax->stop = dptr->stop;
1904 dptr->dmax = lowmax;
1905 }
1906
1907 lmp = lowmax;
1908
1909 for (vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++)
1910 if (vmptr->score < i) {
1911 i = vmptr->score;
1912 lmp = vmptr;
1913 }
1914 lowscor = i;
1915 lowmax = lmp;
1916 }
1917
1918 initpam2()
1919 {
1920 int i, j, k;
1921
1922 /* now need to allocate **pam2 */
1923
1924 if ((pam2 = (int **)calloc(MAXSQ, sizeof(int *)))==NULL) {
1925 fprintf(stderr, "Cannot allocate pam2[%d]\n",MAXSQ+1);
1926 exit(1);
1927 }
1928
1929 if ((pam2[0] = (int *)calloc((MAXSQ)*(MAXSQ), sizeof(int)))==NULL) {
1930 fprintf(stderr, "Cannot allocate pam2[%d][%d]\n",MAXSQ+1);
1931 exit(1);
1932 }
1933
1934 for (i=1; i<MAXSQ; i++) {
1935 pam2[i] = pam2[i-1]+MAXSQ;
1936 }
1937
1938 k=0;
1939 for (i=0; i<nsq; i++)
1940 for (j=0; j<=i; j++)
1941 pam2[j][i] = pam2[i][j] = pam[k++];
1942
1943 /* make certain that the inclusion of EOS never produces a positive value */
1944
1945 for (i=0; i<EOSEQ; i++) pam2[i][EOSEQ] = pam2[EOSEQ][i] = -BIGNUM;
1946 pam2[EOSEQ][EOSEQ]=-BIGNUM;
1947 }
1948
1949 spam(dmax)
1950 struct beststr *dmax;
1951 {
1952 int lpos;
1953 int tot, mtot;
1954 struct {int start, stop, score;} curv, maxv;
1955 register unsigned char *aa0p, *aa1p;
1956
1957 aa1p= &aa1[lpos = dmax->start];
1958 #ifndef FASTX
1959 aa0p= &aa0[lpos - dmax->dp + noff];
1960 #else
1961 aa0p= &aa0x[lpos - dmax->dp + noff];
1962 #endif
1963 curv.start = lpos;
1964
1965 tot = curv.score = maxv.score = 0;
1966 for ( ; lpos <= dmax->stop; lpos++) {
1967 tot += pam2[*aa0p++][*aa1p++];
1968 if (tot > curv.score) {
1969 curv.stop = lpos;
1970 curv.score = tot;
1971 }
1972 else if (tot < 0) {
1973 if (curv.score > maxv.score) {
1974 maxv.start = curv.start;
1975 maxv.stop = curv.stop;
1976 maxv.score = curv.score;
1977 }
1978 tot = curv.score = 0;
1979 curv.start = lpos;
1980 }
1981 }
1982
1983 if (curv.score > maxv.score) {
1984 maxv.start = curv.start;
1985 maxv.stop = curv.stop;
1986 maxv.score = curv.score;
1987 }
1988
1989 /* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
1990 printf(" new region: %3d %3d %3d %3d\n",maxv.start,
1991 dmax->start,maxv.stop,dmax->stop);
1992 */
1993 dmax->start = maxv.start;
1994 dmax->stop = maxv.stop;
1995
1996 return maxv.score;
1997 }
1998
1999 sconn(v,n)
2000 struct beststr *v[];
2001 int n;
2002 {
2003 int i,si,cmpp();
2004 struct slink {
2005 int score;
2006 struct beststr *vp;
2007 struct slink *next;
2008 } *start, *sl, *sj, *so, sarr[MAXSAV];
2009 int lstart, tstart, plstop, ptstop;
2010
2011 /* sort the score left to right in lib pos */
2012
2013 kpsort(v,n);
2014
2015 start = NULL;
2016
2017 /* for the remaining runs, see if they fit */
2018
2019 for (i=0,si=0; i<n; i++) {
2020
2021 /* if the score is less than the gap threshold, it never helps */
2022 if (v[i]->score < cgap) continue;
2023 lstart=v[i]->start;
2024 tstart=lstart-v[i]->dp+noff;
2025
2026 /* put the run in the group */
2027 sarr[si].vp = v[i];
2028 sarr[si].score = v[i]->score;
2029 sarr[si].next = NULL;
2030
2031 /* if it fits, then increase the score */
2032 for (sl=start; sl!= NULL; sl = sl->next) {
2033 plstop = sl->vp->stop;
2034 ptstop = plstop - sl->vp->dp + noff;
2035 if (plstop<lstart+XFACT && ptstop<tstart+XFACT) {
2036 sarr[si].score = sl->score+v[i]->score+pgap;
2037 break;
2038 }
2039 }
2040
2041 /* now recalculate where the score fits */
2042 if (start==NULL) start= &sarr[si];
2043 else for (sj=start, so=NULL; sj!=NULL; sj = sj->next) {
2044 if (sarr[si].score>sj->score) {
2045 sarr[si].next = sj;
2046 if (so!=NULL) so->next= &sarr[si];
2047 else start= &sarr[si];
2048 break;
2049 }
2050 so=sj;
2051 }
2052 si++;
2053 }
2054
2055 if (start!=NULL) return (start->score);
2056 else return (0);
2057 }
2058
2059 shscore(aa0,n0) /* calculate the 100% identical score */
2060 char *aa0; int n0;
2061 {
2062 int i, sum;
2063 for (i=0,sum=0; i<n0; i++)
2064 sum += pam2[aa0[i]][aa0[i]];
2065 return sum;
2066 }
2067
2068
2069 prhist(fd)
2070 FILE *fd;
2071 {
2072 int i,j;
2073 long hl,hll, el, ell, ev, maxval, maxvalt;
2074 char hline[80], pch, *dstr;
2075 int mh1, mht;
2076 int dotsiz, ddotsiz,doinset;
2077 long cum_hl;
2078 float cur_e, prev_e, f_int;
2079 float max_dev, x_tmp;
2080 int n_chi_sq, max_i;
2081
2082 mh1 = maxh-1;
2083 mht = (3*maxh-3)/4 - 1;
2084
2085 fprintf(fd,"\n");
2086
2087 if (nbest < 20) {
2088 fprintf(fd,
2089 "%7ld residues in %5ld sequences\n",
2090 ntt,nlib);
2091 }
2092 else {
2093 if (histflg) {
2094 for (i=0,maxval=0,maxvalt=0; i<maxh; i++) {
2095 if (hist[i] > maxval) maxval = hist[i];
2096 if (i >= mht && hist[i]>maxvalt) maxvalt = hist[i];
2097 }
2098 max_dev = 0.0;
2099 n_chi_sq = 0;
2100
2101 if (zsflag) cum_hl = -hist[0];
2102
2103 dotsiz = (maxval-1)/60+1;
2104 ddotsiz = (maxvalt-1)/50+1;
2105 doinset = (ddotsiz < dotsiz && dotsiz > 2);
2106
2107 if (mh1 > 0) {
2108 fprintf(fd,"\n one = represents %d library sequences\n",dotsiz);
2109 if (doinset) fprintf(fd," for inset = represents %d library sequences\n",ddotsiz);
2110
2111 if (optall) dstr="z-opt";
2112 else if (init1flg) dstr="z-init1";
2113 else dstr="z-initn";
2114
2115 fprintf(fd,"\n %s E()\n",dstr);
2116 }
2117
2118 prev_e = 0.0;
2119 for (i=0; i<=mh1; i++) {
2120 pch = (i==mh1) ? '>' : ' ';
2121 pch = (i==0) ? '<' : pch;
2122 hll = hl = hist[i];
2123 if (zsflag) {
2124 f_int = (float)(i*histint + min_hist) + (float)histint/2.0;
2125 cur_e = zs_to_Ec(f_int);
2126 cum_hl += hl;
2127 ev = el = ell = (long)(cur_e - prev_e + 0.5);
2128 if (hl > 0 && i>5 && i < (90-min_hist)/histint) {
2129 x_tmp = fabs((float)cum_hl - cur_e);
2130 if (x_tmp > max_dev) {
2131 max_dev = x_tmp;
2132 max_i = i;
2133 }
2134 n_chi_sq++;
2135 }
2136 if ((el=(el+dotsiz-1)/dotsiz) > 60) el = 60;
2137 else if (el < 1) el = 1;
2138 if ((ell=(ell+ddotsiz-1)/ddotsiz) > 40) ell = 40;
2139 fprintf(fd,"%c%3d %5ld %5ld :",
2140 pch,(i<mh1)?(i)*histint+min_hist :
2141 mh1*histint+min_hist,hl,ev);
2142 }
2143 else fprintf(fd,"%c%3d %5ld :",
2144 pch,(i<mh1)?(i)*histint+min_hist :
2145 mh1*histint+min_hist,hl);
2146
2147 if ((hl=(hl+dotsiz-1)/dotsiz) > 60) hl = 60;
2148 if ((hll=(hll+ddotsiz-1)/ddotsiz) > 40) hll = 40;
2149 for (j=0; j<hl; j++) hline[j]='=';
2150 if (zsflag) {
2151 if (el <= hl ) {
2152 if (el > 0) hline[el-1]='*';
2153 hline[hl]='\0';
2154 }
2155 else {
2156 for (j = hl; j < el; j++) hline[j]=' ';
2157 hline[el-1]='*';
2158 hline[el]='\0';
2159 }
2160 }
2161 else hline[hl]='\0';
2162
2163 if (i >= mht&& doinset ) {
2164 for (j = max(el,hl); j < 10; j++) hline[j]=' ';
2165 hline[10]=':';
2166 for (j = 11; j<11+hll; j++) hline[j]='=';
2167 hline[11+hll]='\0';
2168 if (zsflag) {
2169 if (ell <= hll) hline[10+ell]='*';
2170 else {
2171 for (j = 11+hll; j < 10+ell; j++) hline[j]=' ';
2172 hline[10+ell] = '*';
2173 hline[11+ell] = '\0';
2174 }
2175 }
2176 }
2177
2178 fprintf(fd,"%s\n",hline);
2179 prev_e = cur_e;
2180 }
2181 }
2182 fprintf(fd,
2183 "%7ld residues in %5ld sequences\n",ntt,nlib);
2184 if (zsflag) {
2185 fprintf(fd," statistics extrapolated from %ld to %ld sequences\n",
2186 min(MAXBEST,num_db_entries),num_db_entries);
2187 /*
2188 if (histflg)
2189 fprintf(fd," Kolmogorov-Smirnov statistic: %6.4f (N=%3d) at %d\n",
2190 max_dev/(float)(cum_hl),n_chi_sq,max_i*histint+min_hist);
2191 */
2192 }
2193 }
2194 if (optall)
2195 fprintf(fd," results sorted and z-values calculated from opt score\n");
2196 else if (init1flg)
2197 fprintf(fd," results sorted and z-values calculated from init1 score\n");
2198 else
2199 fprintf(fd," results sorted and z-values calculated from initn score\n");
2200
2201 if (pamfact) strncpy(hline,"variable pamfact",sizeof(hline));
2202 else sprintf(hline,"fact: %d",fact);
2203 if (lnsflg) fprintf(fd," scores scaled by ln(n0)/ln(n1)\n");
2204 fprintf(fd," %4d scores better than %d saved, ktup: %d, %s\n",
2205 nbest,bestcut,ktup,hline);
2206 fprintf(fd," %s matrix,",smptr);
2207 #if !defined(FASTX) && !defined(TFASTX)
2208 fprintf(fd," gap penalties: %d,%d\n",gdelval,ggapval);
2209 #else
2210 fprintf(fd," gap penalties: %d,%d frame-shift: %d\n",gdelval,ggapval,gshift);
2211 #endif
2212 if (optall)
2213 fprintf(fd,
2214 " joining threshold: %d, optimization threshold: %d, width: %d\n",
2215 cgap,optcut,optwid);
2216 else fprintf(fd," joining threshold: %d, opt. width: %d",cgap,optwid);
2217
2218 if (dataflg) {
2219 if (lnsflg) fprintf(fd,"; scores scaled by ln(n0)/ln(n1)\n");
2220 }
2221 fprintf(fd," scan time: "); ptime(fd,tscan-tstart); fprintf(fd,"\n");
2222
2223 fflush(fd);
2224 }
2225
2226 allocdiag(dsize) /* allocates diagonal structures */
2227 int dsize;
2228 {
2229
2230 #ifdef I86BUG
2231 if ((int)sizeof(struct dstruct) != (1<<L2DSTR)) {
2232 printf(" L2DSTR incorrect - program should be recompiled %3d %3d",
2233 sizeof(struct dstruct),(1<<L2DSTR));
2234 exit(1);
2235 }
2236 #endif
2237
2238 diag = (struct dstruct *)calloc((size_t)dsize,sizeof(struct dstruct));
2239
2240 if (diag==NULL) {
2241 printf(" cannot allocate diagonal arrays\n");
2242 exit(1);
2243 }
2244 }
2245
2246 #ifndef LFASTA
2247 #ifndef A_MARK
2248 #define A_MARK ">>"
2249 #endif
2250
2251 showalign(nshow)
2252 int nshow;
2253 {
2254 int ib, istart, istop, i, tmp_len;
2255 char bline[512], *bp, *bl_ptr;
2256 char fmt[40],fmt2[40];
2257 int lcont, ccont, loff;
2258 unsigned char *aa1ptr;
2259 int olib, tmp;
2260 double lnscale;
2261 int swscore;
2262 #ifdef TFASTA
2263 int n10;
2264 #endif
2265 #ifdef TFASTX
2266 int n10;
2267 int last_n1,itt;
2268 unsigned char *fd, *fs;
2269 #endif
2270
2271 olib = -1;
2272
2273 sprintf(fmt,"%s%%-%ds (%%d %%s)\n",A_MARK,llen-10);
2274 if (markx<10) fprintf(outfd,"\n");
2275
2276 if (ashow < 0) ashow = nshow;
2277 istart = 0; istop = min(min(nbest,ashow),nshow);
2278 for (ib=istart; ib<istop; ib++) {
2279 bbp = bptr[ib];
2280 if (bbp->lib!=olib) {
2281 closelib();
2282 if (openlib(lbnames[bbp->lib],"\0")<=0) exit(0);
2283 olib=bbp->lib;
2284 }
2285
2286 if (long_info) {
2287 RANLIB(bline,sizeof(bline),bbp->lseek);
2288 tmp_len = strlen(bline);
2289 bl_ptr = bline;
2290 if (markx < 10) while (tmp_len > llen) {
2291 for (i=llen; i>10; i--)
2292 if (bl_ptr[i]==' ') {
2293 bl_ptr[i]='\n';
2294 break;
2295 }
2296 if (i <= 10) break;
2297 tmp_len -= i;
2298 bl_ptr += i;
2299 }
2300 bline[sizeof(bline)-1]='\0';
2301 }
2302 else {
2303 RANLIB(bline,llen-5,bbp->lseek);
2304 bline[llen-4]='\0';
2305 }
2306
2307 if (strlen(bline)==0) {
2308 bline[0]='>';
2309 strncpy(&bline[1],lbnames[bbp->lib],llen-5);
2310 }
2311 #if !defined(TFASTA) && !defined(TFASTX)
2312 aa1ptr=aa1;
2313 #else
2314 aa1ptr = aa10;
2315 #endif
2316 loff=0; loffset = 0l; lcont=0;
2317 for (ccont=0; ccont<=bbp->cont; ccont++) {
2318 #if !defined(TFASTA) && !defined(TFASTX)
2319 n1=GETLIB(aa1ptr,maxn-loff,libstr,&lmark,&lcont);
2320 if (aa1ptr!=aa1) n1 += n0;
2321 #else
2322 maxt = maxn-loff-3; maxt -= maxt%3; maxt++;
2323 n10 = GETLIB(aa1ptr,maxt,libstr,&lmark,&lcont);
2324 if (aa1ptr!=aa10) n10 += 3*n0;
2325 #endif
2326 if (lcont>bbp->cont) break;
2327 #if !defined(TFASTA) && !defined(TFASTX)
2328 if (lcont) {
2329 loff = n0;
2330 memcpy(aa1,&aa1[n1-n0],n0);
2331 aa1ptr= &aa1[loff];
2332 loffset += n1-n0;
2333 }
2334 else {
2335 loff = 0;
2336 aa1ptr=aa1;
2337 }
2338 #else
2339 if (lcont) {
2340 loff = 3*n0;
2341 memcpy(aa10,&aa10[n10-loff],loff);
2342 aa1ptr= &aa10[loff];
2343 loffset += n10-loff;
2344 maxt = maxn-loff-3; maxt -= maxt%3; maxt++;
2345 }
2346 else {
2347 loff = 0;
2348 aa1ptr = aa10;
2349 maxt = maxn-loff-3; maxt -= maxt%3; maxt++;
2350 }
2351 #endif
2352 }
2353 #ifdef TFASTA
2354 n1 = aatran(aa10,aa1,n10,bbp->frame);
2355 loffset /= 3;
2356 #endif
2357 #ifdef TFASTX
2358 last_n1 = 0;
2359 for (itt=3*bbp->frame; itt<3+3*bbp->frame; itt++) {
2360 n1 = aatran(aa10,&aa1[last_n1],n10,itt);
2361 /* for (i=0; i<n1; i++) {
2362 fprintf(stderr,"%c",aa[aa1[last_n1+i]]);
2363 if ((i%60)==59) fprintf(stderr,"\n");
2364 }
2365 fprintf(stderr,"\n"); */
2366 last_n1 += n1+1;
2367 }
2368
2369 /* fprintf(stderr,"\n"); */
2370 n1 = last_n1;
2371 /* we also need a rearranged version,
2372 111111222222233333333 becomes 123123123123123 */
2373
2374 for (fs=aa1, itt=0; itt <3; itt++,fs++) {
2375 for (fd=&aa1y[itt]; *fs !=EOSEQ; fd += 3, fs++) *fd = *fs;
2376 *fd=EOSEQ;
2377 }
2378 #endif
2379 if (markx != 4 && markx<10) {
2380 fprintf(outfd,fmt,bline,bbp->n1,sqnam);
2381 #if !defined(TFASTA) && !defined(TFASTX)
2382 if (zsflag)
2383 fprintf(outfd,
2384 "initn: %4d init1: %4d opt: %4d z-score: %4.1f E(): %6.2g\n",
2385 bbp->score,bbp->score0,bbp->gscore,bbp->zscore,
2386 bbp->escore);
2387 else
2388 fprintf(outfd,"initn: %4d init1: %4d opt: %4d\n",
2389 bbp->score,bbp->score0,bbp->gscore);
2390
2391 if (lnsflg) {
2392 if (n1 > 5) lnscale = log((double)n1)/log((double)n0);
2393 else lnscale = 1.0;
2394 fprintf(outfd," Unnormalized scores, initn: %4d init1: %4d opt %4d\n",
2395 (int)((double)bbp->score*lnscale+0.5),
2396 (int)((double)bbp->score0*lnscale+0.5),
2397 (int)((double)bbp->gscore*lnscale+0.5));
2398 }
2399 #else
2400 #ifdef TFASTA
2401 if (zsflag)
2402 fprintf(outfd,
2403 "Frame: %1d init1: %4d initn: %4d opt: %4d z-score: %4.1f E(): %6.2g\n",
2404 bbp->frame+1,bbp->score,bbp->score0,bbp->gscore,
2405 bbp->zscore,bbp->escore);
2406 else
2407 fprintf(outfd,
2408 "Frame: %1d init1: %4d initn: %4d opt: %4d\n",
2409 bbp->frame+1,bbp->score,bbp->score0,bbp->gscore);
2410 #else
2411 if (zsflag)
2412 fprintf(outfd,
2413 "Frame: %c init1: %4d initn: %4d opt: %4d z-score: %4.1f E(): %6.2g\n",
2414 (bbp->frame?'R':'F'),bbp->score,bbp->score0,bbp->gscore,
2415 bbp->zscore,bbp->escore);
2416 else
2417 fprintf(outfd,
2418 "Frame: %c init1: %4d initn: %4d opt: %4d\n",
2419 (bbp->frame?'R':'F'),bbp->score,bbp->score0,bbp->gscore);
2420 #endif
2421 #endif
2422 } /* if (markx!=4 && markx!=10) */
2423 strncpy(name1,bline,sizeof(name1));
2424 if (markx<=4) name1[6]='\0';
2425 if ((bp = strchr(name1,' '))!=NULL) *bp = '\0';
2426 if (markx==10) {
2427 fprintf(outfd,">>%s\n",bline);
2428 fprintf(outfd,"; fa_initn: %d\n",bbp->score);
2429 fprintf(outfd,"; fa_init1: %d\n",bbp->score0);
2430 fprintf(outfd,"; fa_opt: %d\n",bbp->gscore);
2431 fprintf(outfd,"; fa_z-score: %4.1f\n",bbp->zscore);
2432 fprintf(outfd,"; fa_expect: %6.2g\n",bbp->escore);
2433 }
2434 #if defined(FASTX) || defined(TFASTX)
2435 smark[0] = smark[1] = smark[2] = smark[3] = -BIGNUM;
2436 #ifdef FASTX
2437 swscore = pmatch(aa0,n0,aa0y,n0x,aa1,n1,YES);
2438 #endif
2439 #ifdef TFASTX
2440 swscore = pmatch(aa1,n1,aa1y,n1,aa0,n0,YES);
2441 #endif
2442 #else
2443 smark[2]=bbp->start;
2444 smark[3]=bbp->stop;
2445 smark[0]=noff+smark[2]-bbp->dp;
2446 smark[1]=noff+smark[3]-bbp->dp;
2447 iscore=bbp->score0;
2448 if (!ldnaseq || dnasw_flg) swscore=smatch(aa0,n0,aa1,n1,YES);
2449 else dmatch(noff-bbp->dp,YES);
2450 #endif
2451 if (markx !=4 && markx<10) fprintf(outfd,"\n");
2452 fflush(outfd);
2453 }
2454 }
2455 #endif
2456
2457 #ifdef LFASTA
2458 showlocal(nshow)
2459 int nshow;
2460 {
2461 int ib, istart, istop;
2462 char bline[200], *bp;
2463 int lcont, olcont, ccont, loff;
2464 unsigned char *aa1ptr;
2465 int olib;
2466
2467 olcont = -1;
2468
2469 istart = 0; istop = nbest;
2470
2471 bbp = bptr[0];
2472
2473 for (ib=istart; ib<istop; ib++) {
2474 bbp = bptr[ib];
2475 if (bbp->cont!=olcont) {
2476
2477 RANLIB(bline,llen-5,bbp->lseek);
2478 if (strlen(bline)==0) {
2479 bline[0]='>';
2480 strncpy(&bline[1],lbnames[bbp->lib],llen-5);
2481 }
2482
2483 aa1ptr=aa1;
2484
2485 loff=0; loffset = 0l; lcont=0;
2486 for (ccont=0; ccont<=bbp->cont; ccont++) {
2487 n1=GETLIB(aa1ptr,maxn-loff,libstr,&lmark,&lcont);
2488 if (aa1ptr!=aa1) n1 += n0;
2489 if (lcont>bbp->cont) break;
2490 if (lcont) {
2491 loff = n0;
2492 memcpy(aa1,&aa1[n1-n0],n0);
2493 aa1ptr= &aa1[loff];
2494 loffset += n1-n0;
2495 }
2496 else {
2497 loff = 0;
2498 aa1ptr=aa1;
2499 }
2500 }
2501 olcont = bbp->cont;
2502 }
2503 strncpy(name1,bline,6);
2504 name1[6]='\0';
2505 if ((bp = strchr(name1,' '))!=NULL) *bp = '\0';
2506 smark[2]=bbp->start;
2507 smark[3]=bbp->stop;
2508 smark[0]=noff+smark[2]-bbp->dp;
2509 smark[1]=noff+smark[3]-bbp->dp;
2510 iscore=bbp->score0;
2511 #ifdef __MWERKS__
2512 ChkEvent();
2513 #endif
2514 #ifndef TPLOT
2515 if (dmatch(smark[1],smark[3],YES)>=0 && markx < 10)
2516 fprintf(outfd,"\n----------\n");
2517 #else /* !TPLOT */
2518 dmatch(smark[1],smark[3],NO);
2519 #endif /* TPLOT */
2520 }
2521 }
2522 #endif
2523
2524 #ifdef FAR_PTR
2525 #ifdef __TURBOC__
2526 #define FCALLOC farcalloc
2527 #define MTYPE unsigned long
2528 #define FFREE farfree
2529 #endif
2530 #endif
2531
2532 initbest(nbest) /* allocate arrays for best sort */
2533 int nbest;
2534 {
2535 #ifndef FAR_PTR
2536 if ((best=(struct beststr *)calloc((size_t)nbest,sizeof(struct beststr)))
2537 == NULL) {fprintf(stderr,"cannot allocate best struct\n"); exit(1);}
2538 if ((bptr=(struct beststr **)calloc((size_t)nbest,sizeof(struct beststr *)))
2539 == NULL) {fprintf(stderr,"cannot allocate bptr\n"); exit(1);}
2540
2541 #else /* FAR_PTR */
2542 void far *FCALLOC();
2543 if ((best=(struct beststr huge *)
2544 FCALLOC((MTYPE)nbest,(MTYPE)sizeof(struct beststr)))== NULL) {
2545 fprintf(stderr,"cannot allocate best struct\n"); exit(1);}
2546 if ((bptr=(struct beststr huge * huge *)
2547 FCALLOC((MTYPE)nbest,(MTYPE)sizeof(struct beststr huge *)))==NULL) {
2548 fprintf(stderr,"cannot allocate bptr\n"); exit(1);}
2549 #endif /* FAR_PTR */
2550 }
2551
2552 freebest()
2553 {
2554 #ifndef BIGMEM
2555 #ifndef FAR_PTR
2556 free(bptr);
2557 free(best);
2558 #else /* FAR_PTR */
2559 FFREE(bptr);
2560 FFREE(best);
2561 #endif /* FAR_PTR */
2562 #endif /* BIGMEM */
2563 }
2564
2565
2566 #ifndef LFASTA
2567 showbest()
2568 {
2569 int ib, istart, istop;
2570 char bline[200], fmt[40], pad[200];
2571 int ntmp;
2572 int lcont, ccont, loff;
2573 unsigned char *aa1ptr;
2574 int olib;
2575 #ifdef TFASTA
2576 int n10;
2577 #endif
2578 #ifdef TFASTX
2579 int n10;
2580 int last_n1, itt;
2581 unsigned char *fs, *fd;
2582 #endif
2583
2584 if (nshow <= 0) return;
2585
2586 olib = -1;
2587
2588 #if !defined(TFASTA) && !defined(TFASTX)
2589 sprintf(fmt,"%%-%ds",llen-10);
2590 #else
2591 sprintf(fmt,"%%-%ds",llen-14);
2592 #endif
2593
2594 nshow = min(nshow,nbest);
2595 mshow = min(mshow,nbest);
2596 if (outtty) {
2597 printf(" How many scores would you like to see? [%d] ",nshow);
2598 fflush(stdout);
2599 if (fgets(rline,sizeof(rline),stdin)==NULL) exit(0);
2600 if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&nshow);
2601 if (nshow<=0) nshow = min(20,nbest);
2602 }
2603 else nshow=mshow;
2604
2605 memset(pad,' ',llen-25);
2606 #if !defined(TFASTA) && !defined(TFASTX)
2607 pad[llen-31]='\0';
2608 #else
2609 pad[llen-31]='\0';
2610 #endif
2611
2612 if (zsflag)
2613 fprintf(outfd,"The best scores are:%sinitn init1 opt z-sc E(%ld)\n",
2614 pad,num_db_entries);
2615 else
2616 fprintf(outfd,"The best scores are:%sinitn init1 opt\n",pad);
2617
2618 if (outfd != stdout)
2619 if (zsflag)
2620 printf("The best scores are:%sinitn init1 opt z-sc E(%ld)\n",
2621 pad,num_db_entries);
2622 else
2623 printf("The best scores are:%sinitn init1 opt\n",pad);
2624
2625 istart = 0;
2626 l1: istop = min(nbest,nshow);
2627 for (ib=istart; ib<istop; ib++) {
2628 bbp = bptr[ib];
2629
2630 if (!outtty && zsflag && bbp->escore > e_cut) {
2631 nshow = ib;
2632 goto done;
2633 }
2634
2635 if (bbp->lib!=olib) {
2636 closelib();
2637 if (openlib(lbnames[bbp->lib],"\0")<=0) exit(0);
2638 olib=bbp->lib;
2639 }
2640
2641 RANLIB(bline,llen-5,bbp->lseek);
2642 #if !defined(TFASTA) && !defined(TFASTX)
2643 bline[llen-11]='\0';
2644 #else
2645 bline[llen-15]='\0';
2646 #endif
2647
2648 if (!optall) {
2649 #if !defined(TFASTA) && !defined(TFASTX)
2650 aa1ptr=aa1;
2651 #else
2652 aa1ptr = aa10;
2653 #endif
2654 loff=0; lcont=0;
2655 for (ccont=0; ccont <= bbp->cont; ccont++) {
2656 #if !defined(TFASTA) && !defined(TFASTX)
2657 n1=GETLIB(aa1ptr,maxn-loff,libstr,&lmark,&lcont);
2658 if (aa1ptr!=aa1) n1 += n0;
2659 #else
2660 maxt = maxn-loff-3; maxt -= maxt%3; maxt++;
2661 n10=GETLIB(aa1ptr,maxt,libstr,&lmark,&lcont);
2662 if (aa1ptr!=aa10) n10 += 3*n0;
2663 #endif
2664 if (lcont>bbp->cont) break;
2665 #if !defined(TFASTA) && !defined(TFASTX)
2666 if (lcont) {
2667 loff = n0;
2668 memcpy(aa1,&aa1[n1-n0],n0);
2669 aa1ptr= &aa1[loff];
2670 }
2671 else {
2672 loff = 0;
2673 aa1ptr=aa1;
2674 }
2675 #else
2676 if (lcont) {
2677 loff = 3*n0;
2678 memcpy(aa10,&aa10[n10-loff],loff);
2679 aa1ptr= &aa10[loff];
2680 maxt = maxn-loff-3; maxt -= maxt%3; maxt++;
2681 }
2682 else {
2683 aa1ptr=aa10;
2684 loff = 0;
2685 maxt = maxn-loff-3; maxt -= maxt%3; maxt++;
2686 }
2687 #endif
2688 }
2689
2690 /* if (check_nt(aa10,n10,&last_n1)==0) {
2691 fprintf(stderr," error - non-nucleotide in sequence %s\n",libstr);
2692 last_n1 = max(0,last_n1-10);
2693 for (itt=last_n1; itt<last_n1+20; itt++) fputc(sq[aa10[itt]],stderr);
2694 fputc('\n',stderr);
2695 exit(1);
2696 }
2697 */
2698 #ifdef TFASTA
2699 n1=aatran(aa10,aa1,n10,bbp->frame);
2700 #endif
2701 #ifdef TFASTX
2702 last_n1 = 0;
2703 for (itt=(revflg?3:0); itt<(revflg?6:3); itt++) {
2704 n1 = aatran(aa10,&aa1[last_n1],n10,itt);
2705 last_n1 += n1+1;
2706 }
2707 n1 = n10;
2708 /* we also need a rearranged version,
2709 111111222222233333333 becomes 123123123123123 */
2710 for (itt=0,fs=aa1; itt <3; itt++,fs++) {
2711 for (fd=&aa1y[itt]; *fs !=EOSEQ; fd += 3, fs++) *fd = *fs;
2712 *fd=EOSEQ;
2713 }
2714 bbp->gscore=dmatch(bbp->dp-noff,NO);
2715 #else
2716 bbp->gscore=dmatch(noff-bbp->dp,NO);
2717 #endif
2718 }
2719
2720 fprintf(outfd,fmt,bline);
2721 #if !defined(TFASTA) && !defined(TFASTX)
2722 if (zsflag) fprintf(outfd,"%4d %3d %4d %4.1f %6.2g\n",
2723 bbp->score,bbp->score0,bbp->gscore,
2724 bbp->zscore,bbp->escore);
2725 else fprintf(outfd,"%4d %3d %4d\n",
2726 bbp->score,bbp->score0,bbp->gscore);
2727
2728 #else
2729 #ifndef TFASTX
2730 if (zsflag) fprintf(outfd,"(%1d) %4d %3d %4d %4.1f %6.2g\n",
2731 bbp->frame+1,bbp->score,bbp->score0,bbp->gscore,
2732 bbp->zscore,bbp->escore);
2733 else fprintf(outfd,"(%1d) %4d %3d %4d\n",
2734 bbp->frame+1,bbp->score,bbp->score0,bbp->gscore);
2735 #else
2736 if (zsflag) fprintf(outfd,"(%c) %4d %3d %4d %4.1f %6.2g\n",
2737 (bbp->frame?'r':'f'),bbp->score,bbp->score0,bbp->gscore,
2738 bbp->zscore,bbp->escore);
2739 else fprintf(outfd,"(%c) %4d %3d %4d\n",
2740 (bbp->frame?'r':'f'),
2741 bbp->score,bbp->score0,bbp->gscore);
2742 #endif
2743 #endif
2744
2745 if (outfd!=stdout) {
2746 fprintf(stdout,fmt,bline);
2747 #if !defined(TFASTA) && !defined(TFASTX)
2748 if (zsflag) printf("%4d %3d %4d %4.1f %5.2g\n",
2749 bbp->score,bbp->score0,bbp->gscore,
2750 bbp->zscore,bbp->escore);
2751 else
2752 printf("%4d %3d %4d\n",bbp->score,bbp->score0,bbp->gscore);
2753 #else
2754 #ifndef TFASTX
2755 if (zsflag)
2756 printf("(%1d) %4d %3d %4d %4.1f %5.2g\n",
2757 bbp->frame+1,bbp->score,bbp->score0,bbp->gscore,
2758 bbp->zscore,bbp->escore);
2759 else
2760 printf("(%1d) %4d %3d %4d\n",
2761 bbp->frame+1,bbp->score,bbp->score0,bbp->gscore);
2762 #else
2763 if (zsflag)
2764 printf("(%c) %4d %3d %4d %4.1f %5.2g\n",
2765 (bbp->frame?'r':'f'),bbp->score,bbp->score0,bbp->gscore,
2766 bbp->zscore,bbp->escore);
2767 else
2768 printf("(%c) %4d %3d %4d\n",
2769 (bbp->frame?'r':'f'),bbp->score,bbp->score0,bbp->gscore);
2770 #endif
2771 #endif
2772 }
2773 }
2774
2775 fflush(outfd); if (outfd!=stdout) fflush(stdout);
2776
2777 if (outtty) {
2778 printf(" More scores? [0] ");
2779 fflush(stdout);
2780 if (fgets(rline,sizeof(rline),stdin)==NULL) exit(0);
2781 ntmp = 0;
2782 if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&ntmp);
2783 if (ntmp<=0) ntmp = 0;
2784 if (ntmp>0) {
2785 istart = istop;
2786 nshow += ntmp;
2787 mshow += ntmp;
2788 goto l1;
2789 }
2790 }
2791 else if (zsflag && bbp->escore < e_cut) {
2792 if (mshow_flg && istop >= mshow) goto done;
2793 istart=istop;
2794 nshow += 10;
2795 goto l1;
2796 }
2797
2798 done:
2799 if (outfd!=stdout) fprintf(outfd,"\n");
2800 }
2801 #endif /* LFASTA */
2802
2803 selectz(k,n) /* k is rank in array */
2804 int k,n;
2805 {
2806 int t, i, j, l, r;
2807 float v;
2808 #ifndef FAR_PTR
2809 struct beststr *tmptr;
2810 #else
2811 struct beststr huge * tmptr;
2812 #endif
2813
2814 l=0; r=n-1;
2815
2816
2817 while ( r > l ) {
2818 i = l-1;
2819 j = r;
2820 v = bptr[r]->zscore;
2821 do {
2822 while (bptr[++i]->zscore > v ) ;
2823 while (bptr[--j]->zscore < v ) ;
2824 tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
2825 } while (j > i);
2826 bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
2827 if (i>=k) r = i-1;
2828 if (i<=k) l = i+1;
2829 }
2830 }
2831
2832 sortbest()
2833 {
2834 #ifndef FAR_PTR
2835 int cmps(), cmp1(), cmpa();
2836 if (init1flg) ksort(bptr,nbest,cmp1);
2837 else if (optall) ksort(bptr,nbest,cmpa);
2838 else ksort(bptr,nbest,cmps);
2839 #else
2840 int fcmps(), fcmp1(), fcmpa();
2841 if (init1flg) fksort(bptr,nbest,fcmp1);
2842 else if (optall) fksort(bptr,nbest,fcmpa);
2843 else fksort(bptr,nbest,fcmps);
2844 #endif
2845 }
2846
2847 sortbeste()
2848 {
2849 #ifndef FAR_PTR
2850 int cmpe();
2851
2852 ksort(bptr,nbest,cmpe);
2853 #else
2854 int fcmpe();
2855
2856 fksort(bptr,nbest,fcmpe);
2857 #endif
2858 }
2859
2860 sortbestz()
2861 {
2862 #ifndef FAR_PTR
2863 int cmpz();
2864
2865 ksort(bptr,nbest,cmpz);
2866 #else
2867 int fcmpz();
2868
2869 fksort(bptr,nbest,fcmpz);
2870 #endif
2871 }
2872
2873 cmps(ptr1,ptr2)
2874 struct beststr *ptr1, *ptr2;
2875 {
2876 if (ptr1->score < ptr2->score) return (1);
2877 else if (ptr1->score > ptr2->score) return (-1);
2878 else return (0);
2879 }
2880
2881 cmpa(ptr1,ptr2)
2882 struct beststr *ptr1, *ptr2;
2883 {
2884 if (ptr1->gscore < ptr2->gscore) return (1);
2885 else if (ptr1->gscore > ptr2->gscore) return (-1);
2886 else return (0);
2887 }
2888
2889 cmp1(ptr1,ptr2)
2890 struct beststr *ptr1, *ptr2;
2891 {
2892 if (ptr1->score0 < ptr2->score0) return (1);
2893 else if (ptr1->score0 > ptr2->score0) return (-1);
2894 else return (0);
2895 }
2896
2897 cmpz(ptr1,ptr2)
2898 struct beststr *ptr1, *ptr2;
2899 {
2900 if (ptr1->zscore < ptr2->zscore) return (1);
2901 else if (ptr1->zscore > ptr2->zscore) return (-1);
2902 else return (0);
2903 }
2904
2905 cmpe(ptr1,ptr2)
2906 struct beststr *ptr1, *ptr2;
2907 {
2908 if (ptr1->escore < ptr2->escore) return (-1);
2909 else if (ptr1->escore > ptr2->escore) return (1);
2910 else return (0);
2911 }
2912
2913 #ifdef FAR_PTR
2914 fcmps(ptr1,ptr2)
2915 struct beststr huge * ptr1, huge * ptr2;
2916 {
2917 if (ptr1->score < ptr2->score) return (1);
2918 else if (ptr1->score > ptr2->score) return (-1);
2919 else return (0);
2920 }
2921
2922 fcmp1(ptr1,ptr2)
2923 struct beststr huge * ptr1, huge * ptr2;
2924 {
2925 if (ptr1->score0 < ptr2->score0) return (1);
2926 else if (ptr1->score0 > ptr2->score0) return (-1);
2927 else return (0);
2928 }
2929
2930 fcmpa(ptr1,ptr2)
2931 struct beststr huge * ptr1, huge * ptr2;
2932 {
2933 if (ptr1->gscore < ptr2->gscore) return (1);
2934 else if (ptr1->gscore > ptr2->gscore) return (-1);
2935 else return (0);
2936 }
2937
2938 fcmpz(ptr1,ptr2)
2939 struct beststr huge * ptr1, huge * ptr2;
2940 {
2941 if (ptr1->zscore < ptr2->zscore) return (1);
2942 else if (ptr1->zscore > ptr2->zscore) return (-1);
2943 else return (0);
2944 }
2945
2946 fcmpe(ptr1,ptr2)
2947 struct beststr huge * ptr1, huge * ptr2;
2948 {
2949 if (ptr1->escore < ptr2->escore) return (-1);
2950 else if (ptr1->escore > ptr2->escore) return (1);
2951 else return (0);
2952 }
2953 #endif
2954
2955 cmpp(ptr1,ptr2)
2956 struct beststr *ptr1, *ptr2;
2957 {
2958 if (ptr1->start < ptr2->start) return (-1);
2959 else if (ptr1->start > ptr2->start) return (1);
2960 else return (0);
2961 }
2962
2963 kssort(v,n)
2964 struct beststr *v[]; int n;
2965 {
2966 int gap, i, j;
2967 struct beststr *tmp;
2968
2969 for (gap=n/2; gap>0; gap/=2)
2970 for (i=gap; i<n; i++)
2971 for (j=i-gap; j>=0; j -= gap) {
2972 if (v[j]->score >= v[j+gap]->score)
2973 break;
2974 tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
2975 }
2976 }
2977
2978 kpsort(v,n)
2979 struct beststr *v[]; int n;
2980 {
2981 int gap, i, j;
2982 struct beststr *tmp;
2983
2984 for (gap=n/2; gap>0; gap/=2)
2985 for (i=gap; i<n; i++)
2986 for (j=i-gap; j>=0; j -= gap) {
2987 if (v[j]->start <= v[j+gap]->start)
2988 break;
2989 tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
2990 }
2991 }
2992
2993
2994 ksort(v,n,comp)
2995 void *v[]; int n, (*comp)();
2996 {
2997 int gap, i, j;
2998 char *tmp;
2999
3000 for (gap=n/2; gap>0; gap/=2)
3001 for (i=gap; i<n; i++)
3002 for (j=i-gap; j>=0; j -= gap) {
3003 if ((*comp)(v[j],v[j+gap]) <=0)
3004 break;
3005 tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
3006 }
3007 }
3008
3009 #ifdef FAR_PTR
3010 fksort(v,n,comp)
3011 void huge * huge *v;
3012 int n, (*comp)();
3013 {
3014 int gap, i, j;
3015 void huge *tmp;
3016
3017 for (gap=n/2; gap>0; gap/=2)
3018 for (i=gap; i<n; i++)
3019 for (j=i-gap; j>=0; j -= gap) {
3020 if ((*comp)(v[j],v[j+gap]) <=0)
3021 break;
3022 tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp;
3023 }
3024 }
3025
3026 #endif
3027
3028 getlnames(tname) /* read in the library names */
3029 char *tname;
3030 {
3031 int i;
3032 char *bp;
3033 char lline[120];
3034 FILE *tptr;
3035
3036 if (*tname != '@') {addfile(tname,"\0"); return 1;}
3037 else tname++;
3038
3039 if ((bp=strchr(tname,' '))!=NULL) {
3040 *bp='\0';
3041 #ifndef LFASTA
3042 sscanf(bp+1,"%d",&deftype);
3043 if (deftype<0 || deftype>LASTLIB) {
3044 fprintf(stderr," default type error %d\n",deftype);
3045 deftype=0;
3046 }
3047 #endif
3048 }
3049
3050 if ((tptr=fopen(tname,"r"))==NULL) {
3051 fprintf(stderr," could not open file of names: %s\n",tname);
3052 return 0;
3053 }
3054
3055 while (fgets(lline,sizeof(lline),tptr)!=NULL) {
3056 if (lline[0]==';') continue;
3057 if ((bp=strchr(lline,'\n'))!=NULL) *bp='\0';
3058 if (lline[0]=='<') {
3059 if (ldname[0]!='\0' && strcmp(ldname,lline+1)!=0)
3060 fprintf(stderr,
3061 " changing default directory name from %s to %s\n",
3062 ldname,lline+1);
3063 strncpy(ldname,&lline[1],sizeof(ldname));
3064 ldname[sizeof(ldname)-1]='\0';
3065 libenv=ldname;
3066 }
3067 else addfile(lline,libenv);
3068 }
3069 fclose(tptr);
3070 return 1;
3071 }
3072
3073 /* modified Dec 13, 1989 requires different FASTLIBS */
3074
3075 #define MAXCHFIL 80
3076 #define MAXCH 40
3077
3078 libchoice(lname,nl,aaenv)
3079 char *lname, *aaenv;
3080 int nl;
3081 {
3082 FILE *fch;
3083 char line[120], *bp;
3084 char *chstr[MAXCH],*chfile[MAXCH];
3085 char *chtmp, *charr;
3086 int i,j,k,chlen;
3087
3088 charr = NULL;
3089 if (strlen(flstr)>0) {
3090 chlen = MAXCH*MAXCHFIL;
3091 if ((chtmp=charr=calloc((size_t)chlen,sizeof(char)))==NULL) {
3092 fprintf(stderr,"cannot allocate choice file array\n");
3093 goto l1;
3094 }
3095 chlen--;
3096 if ((fch=fopen(flstr,"r"))==NULL) {
3097 fprintf(stderr," cannot open choice file: %s\n",flstr);
3098 goto l1;
3099 }
3100 fprintf(stderr,"\n Choose sequence library:\n\n");
3101
3102 for (i=j=0; j<MAXCH; i++) {
3103 if (fgets(line,sizeof(line),fch)==NULL) break;
3104 if (line[0]==';') continue;
3105 if ((bp=strchr(line,'\n'))!=NULL) *bp='\0';
3106 if ((bp=strchr(line,'$'))==NULL) continue;
3107 *bp++='\0';
3108 if ((*bp++ -'0')!=ldnaseq) continue;
3109 if ((k=strlen(line))>chlen) break;
3110 strncpy(chstr[j]=chtmp,line,chlen);
3111 chtmp += k+1; chlen -= k+1;
3112 if ((k=strlen(bp))>chlen) break;
3113 strncpy(chfile[j]=chtmp,bp,chlen);
3114 chtmp += k+1; chlen -= k+1;
3115 fprintf(stderr," %c: %s\n",*chfile[j++],line);
3116 }
3117 l2: fprintf(stderr,"\n Enter library filename (e.g. %s), letter (e.g. P)\n",
3118 (ldnaseq==0)? "prot.lib" : "dna.lib");
3119 fprintf(stderr," or a %% followed by a list of letters (e.g. %%PN): ");
3120 fflush(stderr);
3121 if (fgets(line,sizeof(line),stdin)==NULL) exit(0);
3122 if ((bp=strchr(line,'\n'))!=NULL) *bp='\0';
3123 if (strlen(line)==0) goto l2;
3124 strncpy(lname,line,nl);
3125 }
3126 else {
3127 l1: fprintf(stderr," library file name: [%s]",aaenv);
3128 fflush(stderr);
3129 if (fgets(line,sizeof(line),stdin)==NULL) exit(0);
3130 if ((bp=strchr(line,'\n'))!=NULL) *bp='\0';
3131 if (strlen(line)>0) strncpy(lname,line,nl);
3132 else strncpy(lname,aaenv,nl);
3133 }
3134 if (charr!=NULL) {
3135 fclose(fch);
3136 free(charr);
3137 }
3138 }
3139
3140 libselect(lname)
3141 char *lname;
3142 {
3143 char line[120], *bp, *ulindex();
3144 FILE *fch;
3145 int i;
3146
3147 if (strlen(lname)>1 && *lname != '%') getlnames(lname);
3148 else {
3149 if (*lname=='%')
3150 if (*flstr=='\0') {
3151 fprintf(stderr," FASTLIBS undefined, cannot use %s\n",lname);
3152 exit(1);
3153 }
3154 else lname++;
3155 if (strlen(flstr)>0) {
3156 if ((fch=fopen(flstr,"r"))==NULL) {
3157 fprintf(stderr," cannot open choice file: %s\n",flstr);
3158 return;
3159 }
3160 }
3161 else {
3162 fprintf(stderr," FASTLIBS undefined\n");
3163 addfile(lname,"\0");
3164 return;
3165 }
3166
3167 while (fgets(line,sizeof(line),fch)!=NULL) {
3168 if (line[0]==';') continue;
3169 if ((bp=strchr(line,'\n'))!=NULL) *bp='\0';
3170 if ((bp=strchr(line,'$'))==NULL) continue;
3171 *bp++='\0';
3172 if ((*bp++ -'0')!=ldnaseq) continue;
3173 if (ulindex(lname,*bp)!=NULL) {
3174 strncpy(ltitle,line,sizeof(ltitle));
3175 getlnames(bp+1);
3176 }
3177 }
3178 fclose(fch);
3179 }
3180 }
3181
3182 char *lbptr;
3183 int nnsize;
3184
3185 addfile(fname,env)
3186 char *fname, *env;
3187 {
3188 char tname[120];
3189 int len, lenv, i;
3190
3191 /* allocate some space for file names */
3192 if (lbnarr==NULL) {
3193 if ((lbnarr=calloc((size_t)MAXLF*MAXLN,sizeof(char)))==NULL) {
3194 fprintf(stderr," could not allocate name table\n");
3195 exit(1);
3196 }
3197
3198 nln = 0;
3199 nnsize = MAXLF*MAXLN;
3200 lbptr = lbnarr;
3201 }
3202
3203 if (env!=NULL) lenv = strlen(env)+1;
3204 else lenv = 0;
3205 len=strlen(fname)+1+lenv;
3206 if (nnsize > sizeof(tname)) {
3207 if (lenv > 1 && *fname != '#') {
3208 strncpy(tname,env,sizeof(tname));
3209 #ifdef UNIX
3210 strcat(tname,"/");
3211 #endif
3212 }
3213 else tname[0]='\0';
3214 strncat(tname,fname,sizeof(tname)-strlen(tname)-1);
3215 len=strlen(tname)+1;
3216 strncpy(lbptr,tname,nnsize);
3217 }
3218 else fprintf(stderr,"no more space for filenames: %s ignored\n",fname);
3219 if (nln< MAXLF) lbnames[nln++]=lbptr;
3220 else fprintf(stderr," no more file name slots: %s ignored\n",lbptr);
3221 lbptr += len;
3222 nnsize -= len;
3223 }
3224
3225 char *ulindex(str,chr)
3226 char *str, chr;
3227 {
3228 char c;
3229
3230 c = tolower(chr);
3231
3232 while (*str != '\0' && tolower(*str) !=c ) str++;
3233 if (*str=='\0') return NULL;
3234 else return str;
3235 }
3236
3237