1 /* May, June 1987 - modified for rapid read of database
2
3 June 2, 1987 - added TFASTA
4 March 30, 1988 - combined ffgetaa, fgetgb;
5 April 8, 1988 - added PIRLIB format for unix
6 Feb 4, 1989 - added universal subroutines for libraries
7 December, 1995 - added range option file.name:1-1000
8
9 copyright (c) 1987,1988,1989,1992,1995 William R. Pearson
10
11 getnt.c associated subroutines for matching sequences */
12
13 /*
14 8-April-88
15 The compile time #define PIRLIB allows this routine to be used
16 to read protein and DNA sequence libraries in the NBRF/PIR
17 VAX/VMS library format. That is:
18
19 >P1;LCBO
20 This is a line of description
21 GTYH ... the sequence starts on this line
22
23 This may ease conversion from UWGCG format libraries. It
24 has not been extensively tested.
25
26 In addition, sequence libraries with a '>' in the 4th position
27 are recognized as NBRF format libraries for consistency with
28 UWGCG
29
30 February 4, 1988 - this starts a major revision of the getaa
31 routines. The goal is to be able to seach the following format
32 libraries:
33
34 0 - normal FASTA format
35 1 - full Genbank tape format
36 2 - NBRF/PIR CODATA format
37 3 - EMBL/Swiss-prot format
38 4 - Intelligentics format
39 5 - NBRF/PIR VMS format
40 6 - GCG 2bit format
41
42 11 - NCBI setdb/blastp (1.3.2) AA
43
44 see file altlib.h to confirm numbers
45
46 This is done with a new global variable and a requirement for the
47 FASTLIBS file. The FASTLIBS file will now indicate both the sequence
48 type (protein = 0, DNA = 1) and the file format (the numbers shown
49 above, although intelligenetics may become an alternative to Pearson).
50 This will be done by always using a function pointer for getlib and
51 ranlib(), and setting up a bunch of different getlib() and ranlib()
52 functions. Openlib() will be substantially simplified.
53 */
54
55 /* Nov 12, 1987 - this version checks to see if the sequence
56 is DNA or protein by asking whether > 85% is A, C, G, T
57
58 May 5, 1988 - modify the DNA/PROTEIN checker by re-reading
59 DNA sequences in order to check for 'U'.
60 */
61
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65
66 #include "uascii.gbl"
67
68 #ifdef VMS
69 #define PIRLIB
70 #endif
71
72 #define XTERNAL
73 #include "upam.gbl"
74 #undef XTERNAL
75
76 #define YES 1
77 #define NO 0
78 #define MAXLINE 512
79
80 #ifndef SFCHAR
81 #define SFCHAR ':'
82 #endif
83
84 #define min(a,b) (((a) < (b)) ? (a) : (b))
85
86 #ifdef __MWERKS__
87
88 /* BIGBUFF allows the Mac to use setvbuf to set up a new buffer for reading
89 the library, that is 32K instead of 0.5k. This appears to increase the speed
90 of the program by about 33%. */
91
92 char *bigbuf=NULL;
93 long fbufsize=128L;
94
95 #define BIGBUF
96 extern char prompt[];
97 #include <StandardFile.h>
98 extern StandardFileReply freply;
99 /* extern int anvRef; */
100 extern FSpec q1Spec;
101 #endif
102
103 extern long sq0off;
104
105 static int use_stdin=0;
106 static char llibstr0[256];
107 static char llibstr1[256];
108 static char o_line[256];
109
110 void closelib();
111 void revcomp(char *, int);
112 int scanseq(char *seq, int n, char *str);
113
114 int
getseq(char * filen,char * seq,int maxs,int * dnaseq)115 getseq(char *filen, char *seq, int maxs, int *dnaseq) {
116 FILE *fptr;
117 char line[512],*bp;
118 int i, j, n;
119 int ic;
120 int sstart, sstop, sset;
121 int have_desc = 0;
122
123 sset=0;
124 sstart = sstop = -1;
125
126 #ifndef MSDOS
127 if ((bp=strchr(filen,':'))!=NULL) {
128 #else
129 if ((bp=strchr(filen+3,':'))!=NULL) {
130 #endif
131 *bp='\0';
132 if (*(bp+1)=='-') sscanf(bp+2,"%d",&sstop);
133 else sscanf(bp+1,"%d-%d",&sstart,&sstop);
134 sset=1;
135 }
136
137
138 if (!use_stdin) {
139 if (strcmp(filen,"-")==0 || strcmp(filen,"@")==0) {
140 fptr = stdin;
141 use_stdin = 1;
142 }
143 else if ((fptr=fopen(filen,"r"))==NULL) {
144 fprintf(stderr," could not open %s\n",filen);
145 return 0;
146 }
147 }
148 else {
149 fptr = stdin;
150 if (o_line[0]=='>') {
151 have_desc = 1;
152 strncpy(llibstr1,o_line,sizeof(llibstr1));
153 }
154 else while (fgets(line,sizeof(line),stdin)!=NULL) {
155 if (line[0]=='>' || line[0]==';') {
156 strncpy(llibstr1,line,sizeof(llibstr1));
157 have_desc = 1;
158 break;
159 }
160 }
161 }
162
163 if (sset==1) {
164 filen[strlen(filen)]=':';
165 if (sq0off==1 || sstart>1) sq0off = sstart;
166 }
167
168 n=0;
169 while(fgets(line,sizeof(line),fptr)!=NULL) {
170 if (line[0]!='>'&& line[0]!=';') {
171 for (i=0; (n<maxs)&&
172 ((ic=sascii[line[i]&AAMASK])<EL); i++)
173 if (ic<NA) seq[n++]= ic;
174 if (ic == ES) break;
175 }
176 else {
177 if (have_desc) {
178 strncpy(o_line,line,sizeof(o_line));
179 break;
180 }
181 else if (line[0]=='>') {
182 strncpy(llibstr0,line,sizeof(llibstr0));
183 have_desc=1;
184 }
185 }
186 }
187
188 if (n==maxs) {
189 fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
190 fflush(stderr);
191 }
192 seq[n]= EOSEQ;
193
194 if (!use_stdin
195 && *dnaseq ==0 && (float)scanseq(seq,n,"ACGT")/(float)n > 0.85) {
196 *dnaseq = 1;
197 /* convert from protein to DNA sequence */
198 sascii = nascii;
199 fseek(fptr,0l,0);
200 n=0;
201 while(fgets(line,sizeof(line),fptr)!=NULL) {
202 if (line[0]!='>'&& line[0]!=';') {
203 for (i=0; (n<maxs)&&
204 ((ic=sascii[line[i]&AAMASK])<EL); i++)
205 if (ic<NA) seq[n++]= ic;
206 if (ic == ES) break;
207 }
208 }
209 if (n==maxs) {
210 fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
211 fflush(stderr);
212 }
213 seq[n]= EOSEQ;
214 sq = nt;
215 nsq = nnt;
216 hsq = hnt;
217 pam = npam;
218 strcpy(qsqnam,"nt");
219 strcpy(sqnam,"nt");
220 strcpy(sqtype,"DNA");
221 }
222
223 if (!use_stdin) fclose(fptr);
224
225 if ((sstart != -1) || (sstop != -1)) {
226 if (sstart < 1 || sstart > n) sstart = 1;
227 if (sstop < 1 || sstop > n ) sstop = n;
228 sstart--;
229 sstop--;
230 for (i=0, j=sstart; j<=sstop; i++,j++)
231 seq[i] = seq[j];
232 n = sstop - sstart +1;
233 seq[n]=EOSEQ;
234 }
235
236 return n;
237 }
238
239 int
240 getntseq(char *filen,char *seq, int maxs, int *dnaseq)
241 {
242 FILE *fptr;
243 char line[512],*bp;
244 int i, j, n;
245 int ic;
246 int sstart, sstop, sset;
247
248 sset=0;
249 sstart = sstop = -1;
250 #ifndef MSDOS
251 if ((bp=strchr(filen,':'))!=NULL) {
252 #else
253 if ((bp=strchr(filen+3,':'))!=NULL) {
254 #endif
255 *bp='\0';
256 if (*(bp+1)=='-') sscanf(bp+2,"%d",&sstop);
257 else sscanf(bp+1,"%d-%d",&sstart,&sstop);
258 sset=1;
259 }
260
261 if (strcmp(filen,"@")!= 0) {
262 if ((fptr=fopen(filen,"r"))==NULL) {
263 fprintf(stderr," could not open %s\n",filen);
264 return 0;
265 }
266 }
267 else fptr = stdin;
268
269 if (sset==1) {
270 filen[strlen(filen)]=':';
271 if (sq0off==1 || sstart>1) sq0off = sstart;
272 }
273
274 n=0;
275 while(fgets(line,sizeof(line),fptr)!=NULL) {
276 #ifdef PIRLIB
277 if (line[0]=='>'&& (line[3]==';'||line[3]=='>'))
278 fgets(line,sizeof(line),fptr);
279 else
280 #endif
281 if (line[0]!='>'&& line[0]!=';') {
282 for (i=0; (n<maxs)&&
283 ((ic=nascii[line[i]&AAMASK])<EL); i++)
284 if (ic<NA) seq[n++]= ic;
285 if (ic == ES) break;
286 }
287 }
288 if (n==maxs) {
289 fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
290 fflush(stderr);
291 }
292 seq[n]= EOSEQ;
293
294 fclose(fptr);
295
296 if ((sstart != -1) || (sstop != -1)) {
297 if (sstart <= 0) sstart = 1;
298 if (sstop <= 0) sstop = n;
299 sstart--;
300 sstop--;
301 for (i=0, j=sstart; j<=sstop; i++,j++)
302 seq[i] = seq[j];
303 n = sstop - sstart +1;
304 seq[n]=EOSEQ;
305 }
306
307 *dnaseq = 1;
308 strcpy(qsqnam,"nt");
309 strcpy(sqnam,"nt");
310 return n;
311 }
312
313 int
314 gettitle(char *filen, char * title, int len)
315 {
316 FILE *fptr;
317 char line[512];
318 char *bp;
319 int ll,sset;
320 #ifdef MSDOS
321 char *strpbrk();
322 #endif
323
324 sset=0;
325 #ifndef MSDOS
326 if ((bp=strchr(filen,':'))!=NULL) { *bp='\0'; sset=1;}
327 #else
328 if ((bp=strchr(filen+3,':'))!=NULL) { *bp='\0'; sset=1;}
329 #endif
330
331 if (use_stdin) {
332 if (use_stdin == 1) {
333 use_stdin++;
334 if (llibstr0[0]!='>')
335 strncpy(title,llibstr0,len);
336 else strncpy(title,&llibstr0[1],len);
337 }
338 else
339 if (llibstr1[0]!='>')
340 strncpy(title,llibstr1,len);
341 else strncpy(title,&llibstr1[1],len);
342 return strlen(title);
343 }
344
345 if ((fptr=fopen(filen,"r"))==NULL) {
346 fprintf(stderr," file %s was not found\n",filen);
347 fflush(stderr);
348 return 0;
349 }
350
351 if (sset==1) filen[strlen(filen)]=':';
352
353 while(fgets(line,sizeof(line),fptr)!=0) {
354 if (line[0]=='>'|| line[0]==';') goto found;
355 }
356 fclose(fptr);
357 title[0]='\0';
358 return 0;
359
360 found:
361 #ifdef PIRLIB
362 if (line[0]=='>'&&(line[3]==';'||line[3]=='>')) {
363 if ((bp = strchr(line,'\n'))!=NULL) *bp='\0';
364 ll=strlen(line); line[ll++]=' '; line[ll]='\0';
365 fgets(&line[ll],sizeof(line)-ll,fptr);
366 }
367 #endif
368 #ifdef MSDOS
369 bp = strpbrk(line,"\n\r");
370 #else
371 bp = strchr(line,'\n');
372 #endif
373 if (bp!=NULL) *bp = 0;
374 if (line[0]=='>') strncpy(title,line+1,len);
375 else strncpy(title,line,len);
376 title[len-1]='\0';
377 fclose(fptr);
378 return strlen(title);
379 }
380
381 #ifndef VMS
382 FILE *libf=NULL;
383 #else
384 int libf = -1;
385 #endif
386 #ifdef NOLIB
387 int leof = 0;
388 #endif
389
390 long lpos;
391 char lline[MAXLINE];
392 int lfflag=0; /* flag for CRLF in EMBL CDROM files */
393 #define LFCHAR '\015' /* for MWC 5.5 */
394
395
396 #ifndef NOLIB
397 #include "altlib.h"
398 int (*getlib)();
399 void (*ranlib)();
400 extern int ldnaseq;
401 #define GETLIB agetlib
402 #define RANLIB aranlib
403 #else
404 void ranlib();
405 #define LASTLIB 10
406 #define GETLIB getlib
407 #define RANLIB ranlib
408 #endif
409
410 /* the following is from fgetgb.c */
411
412 #ifdef __MWERKS__
413 SFTypeList llist={'TEXT',0L,0L,0L};
414 #define LLN 1
415 #endif
416
417 #include <fcntl.h>
418 #ifndef O_RAW
419 #ifdef O_BINARY
420 #define O_RAW O_BINARY
421 #else
422 #define O_RAW 0
423 #endif /* O_BINARY */
424 #endif /* O_RAW */
425 int libfd= -1;
426 #ifndef NOLIB
427 extern int deftype; /* default library type */
428 extern int outtty; /* flag for no interaction */
429 #ifndef UNIX
430 #define RBSTR "rb" /* read file in binary mode */
431 #else
432 #define RBSTR "r"
433 #endif
434 #else
435 int deftype=0;
436 int outtty=1;
437 #endif
438 int libtype; /* current open library type */
439 int sfnum=0; /* superfamily number from types 0 and 5 */
440
441 /* a file name for openlib may now include a library type suffix */
442
443 int
444 openlib(char *lname, char *libenv)
445 {
446 char rline[10],libn[120], *bp;
447 long ftell();
448 int wcnt, ll, opnflg;
449
450 if (lname[0]=='#') return -9;
451 wcnt = 0;
452
453
454 if (use_stdin) {
455 libf = stdin;
456 return 1;
457 }
458
459 #ifndef NOLIB
460 if (strlen(libenv)!=0) {
461 strncpy(libn,libenv,sizeof(libn));
462 #ifdef UNIX
463 strncat(libn,"/",sizeof(libn));
464 #endif
465 strncat(libn,lname,sizeof(libn)-strlen(libn));
466 }
467 else strncpy(libn,lname,sizeof(libn));
468 #else
469 strncpy(libn,lname,120);
470 #endif
471
472 /* check for library type */
473 if ((bp=strchr(libn,' '))!=NULL) {
474 *bp='\0';
475 sscanf(bp+1,"%d",&libtype);
476 if (libtype<0 || libtype >= LASTLIB) {
477 fprintf(stderr," invalid library type: %d (>%d)- resetting\n%s\n",
478 libtype,LASTLIB,lname);
479 libtype=deftype;
480 }
481 }
482 else libtype=deftype;
483
484 #ifdef __MWERKS__
485 HSetVol(NULL,q1Spec.vRefNum,q1Spec.parID);
486 if (bigbuf==NULL) {
487 if ((bp=getenv("BUFSIZE"))!=NULL) sscanf(bp,"%ld",&fbufsize);
488 else fbufsize=128l;
489 if ((bigbuf=malloc((long)(fbufsize*1024l)))==NULL)
490 fprintf(stderr," cannot allocate %ld K buffer\n",fbufsize);
491 }
492 #endif
493
494 #ifndef NOLIB
495 getlib=getliba[libtype];
496 ranlib=ranliba[libtype];
497
498 if (libtype != INTELLIG)
499 sascii['0'] = sascii['1'] = sascii['2'] = NA;
500
501 l1: if (libtype<LASTTXT) opnflg=((libf=fopen(libn,"r"))!=NULL);
502 #ifndef MSDOS
503 else if (libtype==LASTTXT) opnflg=((libf=fopen(libn,"r"))!=NULL);
504 #else
505 else if (libtype==LASTTXT) opnflg=((libf=fopen(libn,"rb"))!=NULL);
506 #endif
507 #ifdef NCBIBL13
508 else if (libtype==NCBIBL13) opnflg=(ncbl_openlib(libn)!= -1);
509 #endif
510
511 if (!opnflg) {
512 #else
513 l1: if ((libf=fopen(libn,"r"))==NULL) {
514 #endif
515
516 #ifdef __MWERKS__
517 rline[0]='\0';
518 sprintf(prompt," cannot open %s\r Select library filename",libn);
519 STFileDlog(prompt,&freply,llist,LLN);
520 if (freply.sfGood==TRUE) {
521 strcpy(libenv,"\0");
522 PtoCstr((StringPtr)freply.sfFile.name);
523 strcpy(libn,(char *)freply.sfFile.name);
524 strcpy(lname,libn);
525 /* anvRef=freply.vRefNum;
526 SetVol(NULL,anvRef);
527 */
528 q1Spec.vRefNum = freply.sfFile.vRefNum;
529 q1Spec.parID = freply.sfFile.parID;
530 HSetVol(NULL,q1Spec.vRefNum,q1Spec.parID);
531
532 goto l1;
533 }
534 else return -1;
535 }
536 #else
537 if (outtty) {
538 fprintf(stderr," cannot open %s library\n",libn);
539 fprintf(stderr," enter new file name or <RET> to quit ");
540 fflush(stderr);
541 if (fgets(libn,120,stdin)==NULL) return -1;
542 if ((bp=strchr(libn,'\n'))!=0) *bp='\0';
543 if (strlen(libn)==0) return 0;
544 if (++wcnt > 10) return -1;
545 strcpy(lname,libn);
546 goto l1;
547 }
548 else return -1;
549 }
550 #endif /* __MWERKS__ */
551
552 #ifndef NOLIB
553 #ifdef BIGBUF
554 if (bigbuf!=NULL)
555 setvbuf(libf,bigbuf,_IOFBF,(size_t)(fbufsize*1024l));
556 #endif
557 if (libtype<=LASTTXT) {
558 lpos = ftell(libf);
559 if (fgets(lline,MAXLINE,libf)==NULL) return -1;
560 #ifdef __MWERKS__
561 if (libtype==EMBLSWISS || libtype==VMSPIR ||libtype==FULLGB) {
562 fgets(lline,MAXLINE,libf);
563 lfflag = (lline[0]==LFCHAR);
564 fseek(libf,0,0l);
565 lpos = 0;
566 fgets(lline,sizeof(lline)-1,libf);
567 if (lfflag) {
568 getc(libf);
569 /* fprintf(stderr," lfflag is set\n"); */
570 }
571 }
572 #endif
573 }
574 #else /* NOLIB */
575 lpos = ftell(libf);
576 if (fgets(lline,MAXLINE,libf)==NULL) return -1;
577 leof = 0;
578 #endif /* NOLIB */
579 return 1;
580 }
581
582 void
583 closelib()
584 {
585 if (libf!=NULL) {
586 fclose(libf);
587 libf = NULL;
588 }
589 #ifndef NOLIB
590 #ifdef NCBIBL13
591 if (libtype == NCBIBL13) ncbl_closelib();
592 #endif
593 #endif
594 }
595
596 int
597 GETLIB(unsigned char *seq, int maxs,
598 char *libstr, long *libpos,int *lcont)
599 {
600 long ftell();
601 int ll;
602 int ic;
603 register unsigned char *cp, *seqp;
604 register int *ap;
605 unsigned char *seqm, *seqm1;
606 char *linep, *bp;
607
608 seqp = seq;
609 seqm = &seq[maxs-9];
610 seqm1 = seqm-1;
611 #if defined(TFASTA) || defined(TFASTX)
612 ap = nascii;
613 #else
614 ap = sascii;
615 #endif
616 if (*lcont==0) {
617 #ifndef NOLIB
618 while (lline[0]!='>' && lline[0]!=';') {
619 lpos = ftell(libf);
620 if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
621 }
622 #ifdef SUPERFAMNUM
623 if ((bp=strchr(lline,SFCHAR))!=NULL) {
624 *bp='\0';
625 sscanf(bp+1,"%d",&sfnum);
626 }
627 else sfnum=0;
628 #else
629 sfnum = 0;
630 #endif
631 if (use_stdin) {
632 strncpy(libstr,o_line+1,20);
633 }
634 else {
635 strncpy(libstr,lline+1,20);
636 }
637 libstr[20]='\0';
638 if ((bp=strchr(libstr,' '))!=NULL) *bp = '\0';
639 if ((bp=strchr(libstr,'\n'))!=NULL) *bp = '\0';
640
641 libstr[12]='\0';
642 *libpos = lpos;
643 #else /* NOLIB */
644 if (leof) return 0;
645 *libpos = lpos;
646 if (lline[0]=='>' || lline[0]==';') {
647 strncpy(libstr,lline+1,20);
648 if ((bp=strchr(libstr,' '))!=NULL) *bp = '\0';
649 if ((bp=strchr(libstr,'\n'))!=NULL) *bp = '\0';
650 libstr[12]='\0';
651 }
652 else {
653 libstr[0]='\0';
654 strncpy((char *)seqp,lline,(size_t)(seqm-seqp));
655 for (cp=seqp; seqp<seqm1; ) {
656 if ((*seqp++=ap[*cp++])<NA) continue;
657 if (*--seqp>NA) break;
658 }
659 if (*seqp==ES) goto done;
660 }
661 #endif
662 }
663
664 lline[0]='\0';
665 while (seqp<seqm1 && fgets((char *)seqp,(size_t)(seqm-seqp),libf)!=NULL) {
666 if (*seqp=='>') goto new;
667 if (*seqp==';') {
668 if (strchr((char *)seqp,'\n')==NULL) goto cont;
669 continue;
670 }
671 for (cp=seqp; seqp<seqm1; ) {
672 if ((*seqp++=ap[*cp++])<NA &&
673 (*seqp++=ap[*cp++])<NA &&
674 (*seqp++=ap[*cp++])<NA &&
675 (*seqp++=ap[*cp++])<NA &&
676 (*seqp++=ap[*cp++])<NA &&
677 (*seqp++=ap[*cp++])<NA &&
678 (*seqp++=ap[*cp++])<NA &&
679 (*seqp++=ap[*cp++])<NA &&
680 (*seqp++=ap[*cp++])<NA) continue;
681 if (*(--seqp)>NA) break;
682 }
683 if (*seqp==ES) goto done;
684 lpos = ftell(libf);
685 }
686 goto done;
687 new: strncpy(lline,(char *)seqp,MAXLINE);
688 lline[MAXLINE-1]='\0';
689 if (strchr((char *)seqp,'\n')==NULL)
690 fgets(&lline[strlen(lline)],sizeof(lline)-strlen(lline),libf);
691 goto done;
692
693 cont:
694 fgets(lline,sizeof(lline),libf);
695 seqm1 = seqp;
696
697 done: if (seqp>=seqm1) {
698 (*lcont)++;
699 }
700 else {
701 #ifdef NOLIB
702 leof = 1;
703 #endif
704 *lcont=0;
705 }
706
707
708 *seqp = EOSEQ;
709 if ((int)(seqp-seq)==0) return 1;
710 return (int)(seqp-seq);
711 }
712
713 void
714 RANLIB(char *str, int cnt, long seek)
715 {
716 char *bp;
717 int ll;
718
719 if (use_stdin) {
720 strncpy(str,o_line,cnt);
721 return;
722 }
723
724 fseek(libf, seek, 0);
725 fgets(lline,sizeof(lline),libf);
726
727 if (lline[0]=='>' || lline[0]==';') {
728 strncpy(str,lline+1,cnt);
729 str[cnt-1]='\0';
730 #ifdef SUPERFAMNUM
731 if ((bp = strchr(str,SFCHAR))!=NULL) *bp='\0';
732 else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
733 else str[cnt-1]='\0';
734 #else
735 if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
736 else str[cnt-1]='\0';
737 #endif
738 }
739 else {
740 str[0]='\0';
741 }
742 #ifdef NOLIB
743 leof=0;
744 #endif
745 }
746
747 #ifndef NOLIB
748 unsigned char *cpsave;
749
750 lgetlib(unsigned char *seq, int maxs,
751 char *libstr, long *libpos, int *lcont)
752 {
753 long ftell();
754 int i, n, ll;
755 int ic;
756 register unsigned char *cp, *seqp;
757 register int *ap;
758 unsigned char *seqm, *seqm1;
759 char *linep;
760
761 seqp = seq;
762 seqm = &seq[maxs-11];
763 seqm1 = seqm-1;
764 #if defined(TFASTA) || defined(TFASTX)
765 ap = nascii;
766 #else
767 ap = sascii;
768 #endif
769 if (*lcont==0) {
770 while (lline[0]!='L' || lline[1]!='O' ||
771 strncmp(lline,"LOCUS",5)) { /* find LOCUS */
772 lpos = ftell(libf);
773 if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
774 if (lfflag) getc(libf);
775 }
776 strncpy(libstr,&lline[12],13);
777 libstr[12]='\0';
778 *libpos=lpos;
779 while (lline[0]!='O' || lline[1]!='R' ||
780 strncmp(lline,"ORIGIN",6)) { /* find ORIGIN */
781 if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
782 if (lfflag) getc(libf);
783 }
784 }
785 else {
786 for (cp= cpsave; seqp<seqm1; ) {
787 if ((*seqp++=ap[*cp++])<NA) continue;
788 if (*(--seqp)>NA) break;
789 }
790 }
791
792 lline[0]='\0';
793 while (seqp<seqm1 && fgets(lline,sizeof(lline),libf)!=NULL) {
794 if (lfflag) getc(libf);
795 if (lline[0]=='/') goto new;
796 for (cp= (unsigned char *)&lline[10]; seqp<seqm1; ) {
797 if ((*seqp++=ap[*cp++])<NA &&
798 (*seqp++=ap[*cp++])<NA &&
799 (*seqp++=ap[*cp++])<NA &&
800 (*seqp++=ap[*cp++])<NA &&
801 (*seqp++=ap[*cp++])<NA &&
802 (*seqp++=ap[*cp++])<NA &&
803 (*seqp++=ap[*cp++])<NA &&
804 (*seqp++=ap[*cp++])<NA &&
805 (*seqp++=ap[*cp++])<NA &&
806 (*seqp++=ap[*cp++])<NA &&
807 (*seqp++=ap[*cp++])<NA) continue;
808 if (*(--seqp)>NA) break;
809 }
810 }
811 goto done;
812 new:
813 lpos = ftell(libf);
814 fgets(lline,sizeof(lline),libf);
815 if (lfflag) getc(libf);
816
817 done:
818 if (seqp>=seqm1) {
819 cpsave = cp;
820 (*lcont)++;
821 }
822 else *lcont=0;
823
824 *seqp = EOSEQ;
825 if ((int)(seqp-seq)==0) return 1;
826 return (int)(seqp-seq);
827 }
828
829 void
830 lranlib(char *str, int cnt, long seek)
831 {
832 char *bp;
833 int ll;
834
835 fseek(libf, seek, 0);
836 fgets(lline,sizeof(lline),libf);
837 if (lfflag) getc(libf);
838
839 strncpy(str,&lline[12],12);
840 str[12]='\0';
841 fgets(lline,sizeof(lline),libf);
842 if (lfflag) getc(libf);
843 while (lline[0]!='D' || lline[1]!='E' || strncmp(lline,"DEFINITION",10))
844 fgets(lline,sizeof(lline),libf);
845 strncpy(&str[10],&lline[11],cnt-10);
846 str[cnt-1]='\0';
847 if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
848
849 fseek(libf,seek,0);
850 fgets(lline,sizeof(lline),libf);
851 if (lfflag) getc(libf);
852 }
853
854 pgetlib(unsigned char *seq, int maxs,
855 char *libstr, long *libpos, int *lcont)
856 {
857 long ftell();
858 int i, n, ll;
859 int ic;
860 register unsigned char *cp, *seqp;
861 register int *ap;
862 unsigned char *seqm, *seqm1;
863 char *linep;
864
865 seqp = seq;
866 seqm = &seq[maxs-11];
867 seqm1 = seqm-1;
868 #if defined(TFASTA) || defined(TFASTX)
869 ap = nascii;
870 #else
871 ap = sascii;
872 #endif
873 if (*lcont==0) {
874 while (lline[0]!='E' || lline[1]!='N' || strncmp(lline,"ENTRY",5))
875 { /* find ENTRY */
876 lpos = ftell(libf);
877 if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
878 }
879 strncpy(libstr,&lline[16],8);
880 libstr[8]='\0';
881 *libpos = lpos;
882 while (lline[0]!='S' || lline[2]!='Q' || strncmp(lline,"SEQUENCE",8))
883 { /* find SEQUENCE */
884 if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
885 }
886 fgets(lline,sizeof(lline),libf); /* get the extra line */
887 }
888 else {
889 for (cp= cpsave; seqp<seqm1; ) {
890 if ((*seqp++=ap[*cp++])<NA) continue;
891 if (*(--seqp)>NA) break;
892 }
893 if (*seqp==ES) goto done;
894 }
895
896 lline[0]='\0';
897 while (seqp<seqm1 && fgets(lline,sizeof(lline),libf)!=NULL) {
898 if (lline[0]=='/') goto new;
899 for (cp= (unsigned char *)&lline[8]; seqp<seqm1; ) {
900 if ((*seqp++=ap[*cp++])<NA) continue;
901 if (*(--seqp)>NA) break;
902 };
903 if (*seqp==ES) goto done;
904 }
905 goto done;
906 new: lpos = ftell(libf);
907 fgets(lline,sizeof(lline),libf);
908
909 done: if (seqp>=seqm1) {
910 cpsave = cp;
911 (*lcont)++;
912 }
913 else *lcont=0;
914
915 *seqp = EOSEQ;
916 if ((int)(seqp-seq)==0) return 1;
917 return (int)(seqp-seq);
918 }
919
920 void
921 pranlib(char *str, int cnt, long seek)
922 {
923 char *bp;
924 int ll;
925
926 fseek(libf, seek, 0);
927 fgets(lline,sizeof(lline),libf);
928
929 strncpy(str,&lline[16],8);
930 str[8]='\0';
931 fgets(lline,sizeof(lline),libf);
932 while (lline[0]!='T' || lline[1]!='I' || strncmp(lline,"TITLE",5))
933 fgets(lline,sizeof(lline),libf);
934 strncpy(&str[8],&lline[16],cnt-9);
935 str[cnt-1]='\0';
936 if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
937
938 fseek(libf,seek,0);
939 fgets(lline,sizeof(lline),libf);
940 }
941
942 long seqsiz;
943
944 egetlib(unsigned char *seq, int maxs,
945 char *libstr, long *libpos, int *lcont)
946 {
947 long ftell();
948 int ll;
949 int ic;
950 register unsigned char *cp, *seqp;
951 register int *ap;
952 unsigned char *seqm, *seqm1;
953 char *linep;
954 char id[11]; /* Holds Identifier */
955
956 seqp = seq;
957 seqm = &seq[maxs-11];
958 seqm1 = seqm-1;
959 #if defined(TFASTA) || defined(TFASTX)
960 ap = nascii;
961 #else
962 ap = sascii;
963 #endif
964 if (*lcont==0) {
965 while (lline[0]!='I' || lline[1]!='D') { /* find ID */
966 lpos = ftell(libf);
967 if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
968 if (lfflag) getc(libf);
969 }
970 sscanf(&lline[5],"%s",id);
971 sprintf(libstr,"%-10.10s",id);
972 *libpos = lpos;
973 while (lline[0]!='S' || lline[1]!='Q') { /* find ORIGIN */
974 if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
975 if (lfflag) getc(libf);
976 }
977 sscanf(&lline[14],"%ld",&seqsiz);
978 }
979 else {
980 for (cp= cpsave; seqp<seqm1; ) {
981 if ((*seqp++=ap[*cp++])<NA) continue;
982 if (*(--seqp)>NA) break;
983 }
984 if (*seqp==ES) goto done;
985 }
986
987 lline[0]='\0';
988 while (seqp<seqm1 && fgets(lline,sizeof(lline),libf)!=NULL) {
989 if (lfflag) getc(libf);
990 if (lline[0]=='/') goto new;
991 lline[70]='\0';
992 for (cp= (unsigned char *)&lline[5]; seqp<seqm1; ) {
993 if ((*seqp++=ap[*cp++])<NA &&
994 (*seqp++=ap[*cp++])<NA &&
995 (*seqp++=ap[*cp++])<NA &&
996 (*seqp++=ap[*cp++])<NA &&
997 (*seqp++=ap[*cp++])<NA &&
998 (*seqp++=ap[*cp++])<NA &&
999 (*seqp++=ap[*cp++])<NA &&
1000 (*seqp++=ap[*cp++])<NA &&
1001 (*seqp++=ap[*cp++])<NA &&
1002 (*seqp++=ap[*cp++])<NA &&
1003 (*seqp++=ap[*cp++])<NA) continue;
1004 if (*(--seqp)>NA) break;
1005 }
1006 if (*seqp==ES) goto done;
1007 }
1008 goto done;
1009 new: lpos = ftell(libf);
1010 fgets(lline,sizeof(lline),libf);
1011 if (lfflag) getc(libf);
1012 goto done;
1013
1014 done: if (seqp>=seqm1) {
1015 cpsave = cp;
1016 (*lcont)++;
1017 seqsiz -= (long)(seqp-seq);
1018 }
1019 else *lcont=0;
1020
1021 *seqp = EOSEQ;
1022 if ((int)(seqp-seq)==0) return 1;
1023 /* if (*lcont==0 && (long)(seqp-seq)!=seqsiz)
1024 printf("%s read %d of %d\n",libstr,(int)(seqp-seq),seqsiz);
1025 */
1026 return (int)(seqp-seq);
1027 }
1028
1029 void
1030 eranlib(char *str, int cnt, long seek)
1031 {
1032 char *bp;
1033 char id[11]; /* Holds Identifier */
1034 int ll;
1035
1036 fseek(libf, seek, 0);
1037 fgets(lline,sizeof(lline),libf);
1038 if (lfflag) getc(libf);
1039
1040 sscanf(&lline[5],"%s",id);
1041 sprintf(str,"%-10.10s ",id);
1042 fgets(lline,sizeof(lline),libf);
1043 if (lfflag) getc(libf);
1044 while (lline[0]!='D' || lline[1]!='E') fgets(lline,sizeof(lline),libf);
1045 strncpy(&str[11],&lline[5],cnt-11);
1046 str[cnt-1]='\0';
1047 if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
1048 if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
1049
1050 fseek(libf,seek,0);
1051 fgets(lline,sizeof(lline),libf);
1052 if (lfflag) getc(libf);
1053 }
1054
1055 igetlib(seq,maxs,libstr,libpos,lcont)
1056 unsigned char *seq;
1057 int maxs;
1058 char *libstr;
1059 long *libpos;
1060 int *lcont;
1061 {
1062 long ftell();
1063 int i, n, ll;
1064 int ic;
1065 register unsigned char *cp, *seqp;
1066 register int *ap;
1067 unsigned char *seqm, *seqm1;
1068 char *linep, *bp;
1069
1070 seqp = seq;
1071 seqm = &seq[maxs-9];
1072 seqm1 = seqm-1;
1073 #if defined(TFASTA) || defined(TFASTX)
1074 ap = nascii;
1075 #else
1076 ap = sascii;
1077 #endif
1078 if (*lcont==0) {
1079 while (lline[0]!=';') {
1080 lpos = ftell(libf);
1081 if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
1082 }
1083 *libpos = lpos;
1084 while (lline[0]==';') fgets(lline,sizeof(lline),libf);
1085 strncpy(libstr,lline+1,10);
1086 libstr[9]='\0';
1087 if((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
1088 }
1089
1090 lline[0]='\0';
1091 while (seqp<seqm1 && fgets((char *)seqp,(size_t)(seqm-seqp),libf)!=NULL) {
1092 if (*seqp=='>') goto new;
1093 if (*seqp==';') {
1094 if (strchr((char *)seqp,'\n')==NULL) goto cont;
1095 continue;
1096 }
1097 for (cp=seqp; seqp<seqm1; ) {
1098 if ((*seqp++=ap[*cp++])<NA &&
1099 (*seqp++=ap[*cp++])<NA &&
1100 (*seqp++=ap[*cp++])<NA &&
1101 (*seqp++=ap[*cp++])<NA &&
1102 (*seqp++=ap[*cp++])<NA &&
1103 (*seqp++=ap[*cp++])<NA &&
1104 (*seqp++=ap[*cp++])<NA &&
1105 (*seqp++=ap[*cp++])<NA &&
1106 (*seqp++=ap[*cp++])<NA) continue;
1107 if (*(--seqp)>NA) break;
1108 }
1109 if (*seqp==ES) goto done;
1110 lpos = ftell(libf);
1111 }
1112 goto done;
1113 new: strncpy(lline,(char *)seqp,MAXLINE);
1114 lline[MAXLINE-1]='\0';
1115 if (strchr((char *)seqp,'\n')==NULL)
1116 fgets(&lline[strlen(lline)],sizeof(lline)-strlen(lline),libf);
1117 goto done;
1118
1119 cont:
1120 fgets(lline,sizeof(lline),libf);
1121 seqm1 = seqp;
1122
1123 done: if (seqp>=seqm1) {
1124 (*lcont)++;
1125 }
1126 else {
1127 *lcont=0;
1128 }
1129
1130
1131 *seqp = EOSEQ;
1132 if ((int)(seqp-seq)==0) return 1;
1133 return (int)(seqp-seq);
1134 }
1135
1136 void
1137 iranlib(char *str, int cnt, long seek)
1138 {
1139 char *bp;
1140 int ll;
1141 char tline[120];
1142
1143 fseek(libf, seek, 0);
1144 fgets(lline,sizeof(lline),libf);
1145
1146 if (lline[0]=='>' || lline[0]==';') {
1147 strncpy(tline,lline+1,sizeof(tline));
1148 str[cnt-1]='\0';
1149 if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
1150 else str[cnt-1]='\0';
1151 }
1152 else {
1153 tline[0]='\0';
1154 }
1155
1156 while (lline[0]==';') fgets(lline,sizeof(lline),libf);
1157 if ((bp=strchr(lline,'\n'))!=NULL) *bp=0;
1158 if ((bp=strchr(lline,' '))!=NULL) *bp=0;
1159 strncpy(str,lline,cnt);
1160 strncat(str," ",cnt-strlen(str));
1161 strncat(str,tline,cnt-strlen(str));
1162 str[cnt-1]='\0';
1163
1164 fseek(libf,seek,0);
1165 fgets(lline,sizeof(lline),libf);
1166 }
1167
1168 vgetlib(unsigned char *seq, int maxs,
1169 char *libstr, long *libpos, int *lcont)
1170 {
1171 long ftell();
1172 int i, n, ll;
1173 int ic, ich;
1174 register unsigned char *cp, *seqp;
1175 register int *ap;
1176 unsigned char *seqm, *seqm1;
1177 char *linep, *bp;
1178
1179 seqp = seq;
1180 seqm = &seq[maxs-9];
1181 seqm1 = seqm-1;
1182 #if defined(TFASTA) || defined(TFASTX)
1183 ap = nascii;
1184 #else
1185 ap = sascii;
1186 #endif
1187 if (*lcont==0) {
1188 while (lline[0]!='>' && lline[0]!=';') {
1189 lpos = ftell(libf);
1190 if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
1191 if (lfflag) getc(libf);
1192 }
1193 if ((bp=strchr(lline,SFCHAR))!=NULL) {
1194 *bp='\0';
1195 sscanf(bp+1,"%d",&sfnum);
1196 }
1197 else sfnum=0;
1198 if ((bp=strchr(lline,'\n'))!=NULL) *bp='\0';
1199 strncpy(libstr,&lline[4],20);
1200 if ((bp=strchr(libstr,' '))!=NULL) *bp = '\0';
1201 if ((bp=strchr(libstr,'\n'))!=NULL) *bp = '\0';
1202 libstr[12]='\0';
1203
1204 fgets(lline,sizeof(lline),libf);
1205 if (lfflag) getc(libf);
1206 *libpos = lpos;
1207 }
1208
1209 lline[0]='\0';
1210 while (seqp<seqm1 && fgets((char *)seqp,(size_t)(seqm-seqp),libf)!=NULL) {
1211 if (lfflag && (ich=getc(libf))!=LFCHAR) ungetc(ich,libf);
1212 if (*seqp=='>') goto new;
1213 if (*seqp==';') {
1214 if (strchr((char *)seqp,'\n')==NULL) goto cont;
1215 continue;
1216 }
1217 for (cp=seqp; seqp<seqm1; ) {
1218 if ((*seqp++=ap[*cp++])<NA &&
1219 (*seqp++=ap[*cp++])<NA &&
1220 (*seqp++=ap[*cp++])<NA &&
1221 (*seqp++=ap[*cp++])<NA &&
1222 (*seqp++=ap[*cp++])<NA &&
1223 (*seqp++=ap[*cp++])<NA &&
1224 (*seqp++=ap[*cp++])<NA &&
1225 (*seqp++=ap[*cp++])<NA &&
1226 (*seqp++=ap[*cp++])<NA) continue;
1227 if (*(--seqp)>NA) break;
1228 }
1229 if (*seqp==ES) goto done;
1230 lpos = ftell(libf);
1231 }
1232 goto done;
1233 new: strncpy(lline,(char *)seqp,MAXLINE);
1234 lline[MAXLINE-1]='\0';
1235 if (strchr((char *)seqp,'\n')==NULL) {
1236 fgets(lline,sizeof(lline)-strlen(lline),libf);
1237 if (lfflag && (ich=getc(libf))!=LFCHAR) ungetc(ich,libf);
1238 }
1239 goto done;
1240
1241 cont:
1242 fgets(lline,sizeof(lline),libf);
1243 if (lfflag && (ich=getc(libf))!=LFCHAR) ungetc(ich,libf);
1244 seqm1 = seqp;
1245
1246 done: if (seqp>=seqm1) {
1247 (*lcont)++;
1248 }
1249 else {
1250
1251 *lcont=0;
1252 }
1253
1254
1255 *seqp = EOSEQ;
1256 if ((int)(seqp-seq)==0) return 1;
1257 return (int)(seqp-seq);
1258 }
1259
1260 void
1261 vranlib(char *str, int cnt, long seek)
1262 {
1263 char *bp;
1264 int ll;
1265
1266 fseek(libf, seek, 0);
1267 fgets(lline,sizeof(lline),libf);
1268 if (lfflag) getc(libf);
1269
1270 if (lline[0]=='>'&&(lline[3]==';'||lline[3]=='>')) {
1271 strncpy(str,&lline[4],cnt);
1272
1273 if ((bp = strchr(str,':'))!=NULL) *bp='\0';
1274 if ((bp=strchr(str,'\r'))!=NULL) *bp='\0';
1275 else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
1276 else str[cnt-1]='\0';
1277
1278 fgets(lline,sizeof(lline),libf);
1279 if (lfflag) getc(libf);
1280
1281 if ((bp=strchr(lline,'\r'))!=NULL) *bp=' ';
1282 if ((bp=strchr(lline,'\n'))!=NULL) *bp='\0';
1283 strncat(str," ",(size_t)cnt);
1284 strncat(str,lline,(size_t)cnt-strlen(str));
1285 }
1286 else {
1287 str[0]='\0';
1288 }
1289
1290 fseek(libf,seek,0);
1291 fgets(lline,sizeof(lline),libf);
1292 if (lfflag) getc(libf);
1293 }
1294
1295 static char gcg_type[10];
1296 static long gcg_len;
1297 static int gcg_bton[4]={1,3,0,2};
1298
1299 gcg_getlib(unsigned char *seq, int maxs,
1300 char *libstr,long *libpos, int *lcont)
1301 {
1302 long ftell();
1303 char dummy[20];
1304 char gcg_date[6];
1305 int i, n, ll;
1306 int ic, ich;
1307 register unsigned char *cp, *seqp, stmp;
1308 register int *ap;
1309 unsigned char *seqm, *seqm1;
1310 long r_block, b_block;
1311 char *linep, *bp;
1312
1313 seqp = seq;
1314 seqm = &seq[maxs-9];
1315 seqm1 = seqm-1;
1316 #if defined(TFASTA) || defined(TFASTX)
1317 ap = nascii;
1318 #else
1319 ap = sascii;
1320 #endif
1321 if (*lcont==0) {
1322 while (lline[0]!='>' && lline[0]!=';') {
1323 lpos = ftell(libf);
1324 if (fgets(lline,sizeof(lline),libf)==NULL) return 0;
1325 }
1326 sscanf(&lline[4],"%s %s %s %s %ld",
1327 libstr,gcg_date,gcg_type,dummy,&gcg_len);
1328 fgets(lline,sizeof(lline),libf);
1329 libstr[12]='\0';
1330 *libpos = lpos;
1331 }
1332
1333 lline[0]='\0';
1334
1335 r_block = b_block = min((size_t)(seqm-seqp),gcg_len);
1336 if (gcg_type[0]=='2') {
1337 r_block = (r_block+3)/4;
1338 }
1339
1340 fread((char *)seqp,(size_t)r_block,(size_t)1,libf);
1341 if (gcg_type[0]=='A')
1342 for (cp=seqp; seqp<seq+r_block; ) *seqp++ = ap[*cp++];
1343 else if (gcg_type[0]=='2') {
1344 seqp = seq + r_block;
1345 cp = seq + 4*r_block;
1346 while (seqp > seq) {
1347 stmp = *--seqp;
1348 *--cp = gcg_bton[stmp&3];
1349 *--cp = gcg_bton[(stmp >>= 2)&3];
1350 *--cp = gcg_bton[(stmp >>= 2)&3];
1351 *--cp = gcg_bton[(stmp >>= 2)&3];
1352 }
1353 }
1354 if (b_block == gcg_len) {
1355 fgets(lline,MAXLINE,libf);
1356 *lcont = 0;
1357 }
1358 else {
1359 if (gcg_type[0]=='2') b_block = 4*r_block;
1360 gcg_len -= b_block;
1361 (*lcont)++;
1362 }
1363
1364 seq[b_block] = EOSEQ;
1365 if (b_block==0) return 1;
1366 else return b_block;
1367 }
1368
1369 void
1370 gcg_ranlib(char *str, int cnt, long seek)
1371 {
1372 char *bp, *bp1, *bp2, *llp;
1373 int ll;
1374
1375 fseek(libf, seek, 0);
1376 fgets(lline,sizeof(lline),libf);
1377
1378 if (lline[0]=='>'&&(lline[3]==';'||lline[3]=='>')) {
1379 strncpy(str,&lline[4],cnt);
1380
1381 if ((bp = strchr(str,':'))!=NULL) *bp='\0';
1382 if ((bp = strchr(str,' '))!=NULL) *bp='\0';
1383 if ((bp=strchr(str,'\r'))!=NULL) *bp='\0';
1384 else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
1385 else str[cnt-1]='\0';
1386
1387 fgets(lline,sizeof(lline),libf);
1388
1389 for (llp=lline,bp=str; *llp == *bp; llp++, bp++);
1390 if ((int)(llp-lline)<5) llp=lline;
1391
1392 /* here we would like to skip over some species stuff */
1393 if ((bp1 = strchr(llp,';'))!=NULL && (int)(bp1-llp)<50) {
1394 if ((bp2 = strchr(bp1+1,';'))!=NULL && (int)(bp2-bp1)<50) {
1395 *(bp2+1)='\0'; bp1 = bp2+2;
1396 }
1397 else {bp1=llp;}
1398 }
1399 else if ((bp1=strchr(llp,'.'))!=NULL && *(bp1+1)==' ') {
1400 *(bp1+1) = '\0'; bp1 += 2;
1401 }
1402 else bp1 = llp;
1403
1404 if ((bp=strchr(bp1,'\r'))!=NULL) *bp='\0';
1405 if ((bp=strchr(bp1,'\n'))!=NULL) *bp='\0';
1406 strncat(str," ",(size_t)cnt);
1407 strncat(str,bp1,(size_t)cnt-strlen(str));
1408 if (bp1!=llp) strncat(str,llp,(size_t)cnt-strlen(str));
1409 }
1410 else {
1411 str[0]='\0';
1412 }
1413
1414 fseek(libf,seek,0);
1415 fgets(lline,sizeof(lline),libf);
1416 }
1417
1418 #endif /* NOLIB */
1419
1420 int
1421 scanseq(char *seq, int n, char *str)
1422 {
1423 int tot,i;
1424 char aaray[MAXSQ]; /* this must be set > nsq */
1425
1426 for (i=0; i<MAXSQ; i++) aaray[i]=0;
1427 for (i=0; i<strlen(str); i++) aaray[sascii[str[i]]]=1;
1428 for (i=tot=0; i<n; i++) tot += aaray[seq[i]];
1429 return tot;
1430 }
1431
1432 void
1433 revcomp(char *seq, int n)
1434 {
1435 char tmp;
1436 int i, ni;
1437
1438 for (i=0; i< n; i++)
1439 if (nt[seq[i]]=='A') seq[i] = nascii['T'];
1440 else if (nt[seq[i]]=='C') seq[i] = nascii['G'];
1441 else if (nt[seq[i]]=='G') seq[i] = nascii['C'];
1442 else if (nt[seq[i]]=='T') seq[i] = nascii['A'];
1443 else if (nt[seq[i]]=='R') seq[i] = nascii['Y'];
1444 else if (nt[seq[i]]=='Y') seq[i] = nascii['R'];
1445 else if (nt[seq[i]]=='M') seq[i] = nascii['K'];
1446 else if (nt[seq[i]]=='K') seq[i] = nascii['M'];
1447 else if (nt[seq[i]]=='D') seq[i] = nascii['H'];
1448 else if (nt[seq[i]]=='H') seq[i] = nascii['D'];
1449 else if (nt[seq[i]]=='V') seq[i] = nascii['B'];
1450 else if (nt[seq[i]]=='B') seq[i] = nascii['V'];
1451
1452 for (i=0, ni = n-1; i< n/2; i++,ni--) {
1453 tmp = seq[i];
1454 seq[i] = seq[ni];
1455 seq[ni] = tmp;
1456 }
1457 }
1458
1459