1 /*
2 * Copyright, 1991, The Regents of the University of California.
3 * This software was produced by the Los Alamos National Laboratory,
4 * which is operated by the University of California for the United
5 * States Department of Energy under contract W-7405-ENG-36. The
6 * U. S. Government is licensed to use, reproduce, and distribute
7 * this software. Permission is granted to the public to copy and
8 * use this software without charge, provided that this Notice and
9 * any statement of authorship are reproduced on all copies.
10 * Neither the Government nor the University makes any warranty,
11 * express or implied, or assumes any liability or responsibility
12 * for the use of this software.
13 */
14
15 #define PROGRAM "trnascan"
16 #define RELEASE "1.4 (Feb 96)"
17 #define DERIV_VERSION "1.3 (Oct 91)"
18 #define BRIEF "'Identification of tRNA genes in genomic DNA'"
19 #define CITATION "Fichant and Burks, J. Mol. Biol. (1991) 220:659-671."
20 #define MODIF "(modified & optimized for use in tRNAscan-SE package by T. Lowe 2/96)"
21
22 /* Modified by T. Lowe 11/95 */
23 /* Changes: 1) Search parameters named in #define constants
24 2) Print statements added to help trace progress
25 of search - VERBOSE constant must be defined
26 Trace statements sent to "tscan.verb.out"
27 3) Bug that caused program to crash on any non-ACGT
28 sequence characters fixed
29 4) fgetseq() modified to correctly read in fasta sequences
30 5) compstrand() modified to increase efficiency &
31 getseqsize() added to allow input sequences of any
32 length (memory allowing)
33 6) Numerous calls to strlen(sequence) eliminated for
34 efficiency
35 7) Calls to myindex() function eliminated - replaced
36 by in-line switch statements for efficiency
37 8) basepairings() function rewritten for efficiency
38 9) program ANSI-fied to allow compilation with gcc
39 10) Fixed bug: indexing out of 'ntab' array bounds
40 causing unpredictable side-effects
41
42
43 ** Modifications result in over 200-fold speed increase **
44
45 -T. Lowe 9/2000
46 added "-i" option to allow alternate start of sequence nuc numbering
47
48 */
49
50 /* #define NO_AMBIG -use this option to eliminate conservative
51 * calling of 'N's as base pairing matches
52 * in tRNAs (this gives more false positives)
53 */
54
55 static char banner[] = "trnascan: scan a sequence for tRNAs";
56
57 char usage[] = "\n\
58 Usage: trnascan [-options] <seqfile>\n\
59 where supported options are:\n\
60 -s : use original tRNAscan 1.3 parameters (default)\n\
61 -r : use relaxed search parameters (used with tRNAscan-SE)\n\
62 -a : use alternate (user-set) search parameters\n\
63 -c : suppress credits\n\
64 -o <outfile> : write results to <outfile>\n\
65 -i <integer> : start sequence numbering at <integer> (def=1)\n\
66 -h : print (this) short help message\n\n";
67
68 #include <string.h>
69 #include <stdio.h>
70 #include <stdlib.h>
71 #include <ctype.h>
72
73 #ifdef MEMDEBUG
74 #include "dbmalloc.h"
75 #endif
76
77 #define TRUE 1
78 #define FALSE 0
79
80 #ifndef TSCANDIR
81 #define TSCANDIR "/usr/local/lib/trnascan"
82 #endif
83
84
85 #define MAXLINE 1000 /* max input seq line length */
86 #define BUF_SIZE 100 /* extra room added onto allocated sequence */
87 /* over that determined by getseqsize() */
88
89 #define MAX(x,y) (((x) > (y)) ? (x) : (y))
90
91 /* #define VERBOSE */
92
93 /* Original version 1.3 default parameters */
94 #define ST_SG_CUTOFF 5 /* general score (SG) cutoff */
95 #define ST_TPC_SIG_THRESH 0.40 /* TPC signal sequence matrix score */
96 /* cutoff */
97 #define ST_D_SIG_THRESH 0.40 /* D signal sequence matrix score cutoff */
98 #define ST_TPC_INV 2 /* Number of TPC matrix invariant bases */
99 /* allowed NOT to be invariant */
100 #define ST_TPC_INCSG 5 /* Number of base pairs required in TPC */
101 /* stem to increment general score */
102 #define ST_TPC_KEEP 4 /* Number of base pairs required in TPC */
103 /* stem to keep trna as a candidate */
104 #define ST_D_INV 1 /* Number of D matrix invariant bases */
105 /* allowed NOT to be invariant */
106 #define ST_LOOK_FOR_ACLOOP_SG 4 /* Minimum SG required to begin looking */
107 /* for anticodon loop */
108 #define ST_ACLOOP_MIN 4 /* Minimum base pairs required in */
109 /* anticodon loop */
110 #define ST_AA_INCSG 7 /* Number of base pairs in Amino acyl */
111 /* stem needed to increment SG */
112 #define ST_AA_KEEP 6 /* Number of base pairs in Amino acyl */
113 /* stem needed to keep tRNA candidate */
114
115 /* "Relaxed" parameters - used by default with */
116 /* tRNAscan-SE program. Makes tRNAscan into */
117 /* rough pre-filter for Covariance tRNA prediction */
118 /* program by S. Eddy */
119 #define RX_SG_CUTOFF 5
120 #define RX_TPC_SIG_THRESH 0.40
121 #define RX_D_SIG_THRESH 0.30
122 #define RX_TPC_INV 2
123 #define RX_TPC_INCSG 4
124 #define RX_TPC_KEEP 2
125 #define RX_D_INV 2
126 #define RX_LOOK_FOR_ACLOOP_SG 3
127 #define RX_ACLOOP_MIN 3
128 #define RX_AA_INCSG 5
129 #define RX_AA_KEEP 4
130
131 /* "Alternate" params - for experimenting */
132 /* with other param values */
133 #define ALT_SG_CUTOFF 4
134 #define ALT_TPC_SIG_THRESH 0.40
135 #define ALT_D_SIG_THRESH 0.30
136 #define ALT_TPC_INV 2
137 #define ALT_TPC_INCSG 4
138 #define ALT_TPC_KEEP 2
139 #define ALT_D_INV 2
140 #define ALT_LOOK_FOR_ACLOOP_SG 3
141 #define ALT_ACLOOP_MIN 3
142 #define ALT_AA_INCSG 6
143 #define ALT_AA_KEEP 4
144
145 #define MIN_VAR_LOOP 28 /* Minimum variable loop size, assumes min */
146 /* intron length = 8bp */
147 #define MAX_INTRON_LEN 60 /* Maximum allowable intron length */
148 #define MIN_SEQ_LEN 70 /* Minimum length of sequence that will be */
149 /* for tRNAs */
150
151 #define STRICT_PARAMS 1;
152 #define RELAXED_PARAMS 2;
153 #define ALT_PARAMS 3;
154
155
156 typedef struct pset {
157 float tpc_sig_thresh, d_sig_thresh;
158 int sg_cutoff,
159 tpc_inv, tpc_incsg, tpc_keep,
160 d_inv,
161 look_for_acloop_sg, acloop_min,
162 aa_incsg, aa_keep;
163 } Param_set_type;
164
165 Param_set_type ps;
166
167
168 void
169 set_search_params (Param_set_type *ps,
170 int params);
171
172 /* Subroutine that accomplishes the end of the test for the presence of a
173 tRNA gene */
174
175 void
176 following_search(long int pos, /* first position of the found T-Psi-C signal */
177 long int pos1, /* first position of the found D signal */
178 char *ptr1, /* pointer to the first position of the D signal */
179 char *ptr3, /* ptr3=ptr1+2 pointer to first position of the D arm */
180 int lpair,
181 int nloop, /*nloop=0 test of the sequence, */
182 /*nloop=1 test of the complementary sequence */
183 char *sequence,
184 long int seqlen,
185 FILE *fpo, /*pointer to the output file */
186 FILE *fpverb,
187 char *name,
188 int score, /* Value of the general score SG */
189 int match2, /* integer testing the presence of D */
190 /* arm with 3 base-pairings */
191 int *ntrna,
192 int *npred,
193 int *match,
194 long int sqoffset /* offset nucleotide numbering by this much (set with -i param) */
195 );
196
197
198 /* Subroutine to read the consensus matrix */
199
200 void
201 lectval(FILE *fp, /* pointer to the consensus matrix file */
202 float (*table_cons)[4], /* table containing the frequency of each base
203 at each position of the signal */
204 int (*table_inv)[2], /* table containing the position and the nature of
205 the invariant bases found in the signal. Code for
206 the bases: A=0, C=1, G=2 and T=3. */
207 int *lsig, /*lsig=length of the signal */
208 int *ktot, /* ktot= number of invariant bases */
209 float *maxtot /* maxtot= sum of the maximum frequencies */
210 );
211
212 /* Subroutine reading the sequence */
213 /* Modified to correctly read FASTA sequence files */
214
215 int fgetseq(char *name, /* string w/name of the sequence */
216 char *sequence, /* character string containing the sequence */
217 long int *seqlen, /* length of sequence */
218 FILE *fpi); /* input file pointer */
219
220
221 /* Subroutine reading & returning the sequence length */
222
223 int getseqsize(FILE *fpi); /* input file pointer */
224
225
226 /* Subroutine looking for the presence of a given signal, returns 1 if a
227 signal is found and 0 otherwise. It also return the table 'weight'
228 containing the frequencies of the oberved bases in the windowed
229 sequence and the number 'ninv' of invariant bases found in the windowed
230 sequence*/
231
232 int readsignal(char *ptr, /* pointer to the sequence */
233 int (*table_inv)[2], /* table containing the position and nature of the
234 invariant bases found in the consensus matrix */
235 int *lsig, /* lsig= length of the signal */
236 int *ktot, /* ktot= number of invariant bases in the consensus matrix */
237 float *weight, /* table containing the frequencies of the observed
238 base at each position of the
239 windowed sequence tested */
240 float (*table_cons)[4], /* table corresponding to the consensus matrix */
241 int *ninv, /* ninv= number of invariant */
242 /* bases in the windowed sequence */
243 int threshold_inv); /* Number of invariant bases */
244 /* allowed not to b e invariant */
245
246
247 /* Subroutine that calculates the similarity score on the potential signal
248 previously retained by the subroutine readsignal. This subroutine
249 returns 1 it the computed score is greater or equal to the defined
250 threshold and 0 otherwise. It returns also the value of the computed
251 score (score) */
252
253 int scoring(float *weight, /* table containing the frequencies of the observed base
254 at each position of the potential signal */
255 int lsig, /* length of the signal */
256 float max, /* sum of the maximum frequencies found in */
257 /* the consensus matrix */
258 int ktot, /* number of invariant bases found in the consensus matrix */
259 float *score, /* value of the computed score on the potential signal */
260 float ThresholdValue, /* defined threshold for the similarity score */
261 int ninv /* number of invariant bases found in the potential signal */
262 );
263
264 /* Subroutine looking for base-pairings between two parts of the sequence.
265 It returns the number of base-pairings found (ncomp) */
266
267 void
268 basepairing(char *ptr, /* pointer to the sequence */
269 int npair, /* number of base-pairings forming a given arm */
270 int lpair, /* number of nucleotides found between the first position of the
271 first part of the sequence involved in the stem and the last
272 position of the second part of the
273 sequence involved in the stem */
274 int *ncomp /* number of base-pairings observed between the two parts of
275 the sequence tested */
276 );
277
278
279 /* Subroutine that complements the sequence. It returns the complementary
280 sequence to the main */
281
282 void
283 compstrand(char **sequence, /* pointer to the sequence string */
284 long int seqlen /* sequence length */
285 );
286
287
288 /* Subroutine that codes the anticodon signal sequence by a number
289 comprised between 1 and 65. It returns this number. */
290
291 void
292 codage(char *anticodon, /* anticodon signal sequence */
293 int length1, /* length of the anticodon signal, lenght1=3 */
294 int *num /* number associated to the anticodon signal sequence */
295 );
296
297
298 /* Subroutine that determines the tRNA gene family */
299
300 void
301 corresaa(int num, /* Number coding the anticodon signal sequence */
302 char *type_trna /* tRNA gene family */
303 );
304
305
306 /* Subroutine that prints the results of the search */
307
308 void
309 printresult(FILE *fpo, /* output file pointer */
310 FILE *fpverb, /* character string for the name of the sequence */
311 char *name,
312 long int pos1, /* first position of the D signal */
313 long int pos, /* first position of the T-Psi-C signal */
314 int lpair, /* number of nucleotides between the */
315 /* first position of the D arm and the last one */
316 int lpair1, /* number of nucleotides between the first position of the
317 aminoacyl arm and the last one */
318 int lpair2, /* number of nucleotides between the first position of the
319 anticodon arm and the last one */
320 int nloop, /* nloop=0, scanning of the direct strand; nloop=1 scanning of
321 the complementary strand */
322 int *ntrna, /* number of tRNA genes predicted in the sequence */
323 char *chaine2, /* character string for the predicted tRNA gene sequence */
324 char *sequence, /* character string containing the sequence tested */
325 long int length, /* length of the sequence */
326 int *match, /* match=1 if at least one tRNA gene has been found on the
327 direct strand and 0 otherwise */
328 int ncomp, /* number of base-pairings in the anticodon arm of the
329 predicted tRNA gene */
330 char *type_trna, /* character string for the tRNA gene family */
331 char *anticodon, /* character string for the anticodon signal sequence */
332 long int sqoffset /* offset nucleotide numbering by this much (set with -i param) */
333 );
334
main(int argc,char ** argv)335 main(int argc, char **argv)
336 {
337 /* pointers to the different files fpi=input file, fpo=output file,
338 fpcons1= T-Psi-C matrix file, fpcons2= D matrix file */
339
340 extern Param_set_type ps; /* search parameters */
341
342 FILE *fpi,*fpo,*fpcons1,*fpcons2, *fpverb;
343
344 /* lsig1= length of the T-Psi-C signal, lsig2= length of the D signal
345 ktot1= number of invariant bases in the T-Psi-C signal
346 ktot2= number of invariant bases in the D signal */
347
348 int lsig1=0, lsig2=0, ktot1=0, ktot2=0;
349
350 long int pos,pos1, begin; /* start position of the T-Psi-C signal (pos)
351 and D signal (pos1) and of the search for the
352 D signal (begin) */
353 int npair=0, lpair=0; /* variables used to test the presence of a stem,
354 see definition further in the program */
355 float table_cons1[30][4]; /* table containing the frequency of each base
356 at each position of the T-Psi-C signal */
357 float table_cons2[30][4]; /* table containing the frequency of each base
358 at each position of the D signal */
359 float maxtot1=0, maxtot2=0; /* sum of the maximum frequencies found in
360 the T-Psi-C matrix (maxtot1) and in the D matrix
361 (maxtot2) */
362 float weight1[30]; /* table containing the frequency of the observed base
363 at each position of the windowed sequence tested
364 (T-Psi-C matrix) */
365 float weight2[30]; /* table containing the frequency of the observed base
366 at each position of the windowed sequence tested
367 (D matrix) */
368 int table_inv1[30][2]; /* table containing the position and the nature of
369 the invariant bases found in the T-Psi-C matrix */
370 int table_inv2[30][2]; /* table containing the position and the nature of
371 the invariant bases found in the D matrix */
372 int ntab[5][2]; /* table containing for each potential D arm found in the
373 windowed sequence, the number of base-pairings and the
374 number of nucleotides that separates the first position
375 of the D arm and the last one */
376 char *sequence; /* character string containing the sequence */
377 long int seqlen; /* keeps the length of the current sequence */
378 long int sqoffset=0; /* start numbering nucleotides by this offset (def=0) */
379 char *ptr,*ptr1,*ptr3, *ptrstart; /* pointers to the sequence, their
380 definitions are given at the place
381 they are used in the program */
382 char name[80]; /* character string containing the name of the sequence */
383 char tscan_dir[120]; /* holds name of directory of consensus files */
384
385 int ntrna; /* number of tRNA genes predicted in the sequence and its
386 complement */
387 int npred=0; /* Total number of tRNA genes predicted in the sequences of
388 the input file */
389 int nseq=0; /* Number of sequences tested (number of sequences of the
390 input file) */
391 long int lseq=0; /* Number of nucleotides tested */
392 int score; /* variable corresponding to the general score SG */
393 int ninv=0; /* number of invariant bases oberved in the windowed sequence
394 for each signal */
395 int ncomp=0; /* number of base-pairings observed in the sequence tested
396 for the presence of one of the four arms */
397 int threshold_inv; /* number of invariant bases that are allowed not to
398 be invariant */
399 int match; /* if at least one tRNA gene is predicted of the direct strand,
400 match=1, if no tRNA gene is found on the direct strand, match=0 */
401 int first_score=0,match2,h, nloop;
402
403 /* ThresholdValue1= threshold value of the similarity score used to retain
404 a T-Psi-C signal
405 ThresholdValue2= threshold value of the similarity score used to retain
406 a D signal
407 Changing the threshold value of the similarity score, MODIFY '0.4' in
408 the following line. */
409
410 float ThresholdValue1, ThresholdValue2;
411 float score1=0, score2=0;
412 int i,j;
413
414 int params;
415 int suppress_credits; /* flag for display of credits */
416 char *seqfile, /* name of input sequence file */
417 *outfile, /* name of output file (if not sent to stdout) */
418 *verbfile; /* name of file for verbose output */
419
420 int optc;
421 extern char *optarg; /* for getopt() */
422 extern int optind; /* for getopt() */
423
424 /***********************************************
425 * Parse command line
426 ***********************************************/
427
428 suppress_credits = FALSE;
429 params = STRICT_PARAMS;
430 outfile = NULL;
431 verbfile = NULL;
432
433 while ((optc = getopt(argc, argv, "csraho:v:i:")) != -1)
434 switch (optc) {
435
436 case 'c': suppress_credits = TRUE; break;
437 case 's': params = STRICT_PARAMS; break;
438 case 'r': params = RELAXED_PARAMS; break;
439 case 'a': params = ALT_PARAMS; break;
440 case 'o': outfile = optarg; break;
441 case 'v': verbfile = optarg; break;
442 case 'i': sqoffset = atoi(optarg)-1; break;
443
444 case 'h':
445 printf("%s\n version %s\n%s\n", banner, RELEASE, usage);
446 exit(0);
447 default:
448 fprintf(stderr,"unrecognized option %c\n", optc);
449 exit(1);
450 }
451
452 if (argc - optind != 1) {
453 fprintf(stderr,"%s\n", usage);
454 exit(1);
455 }
456
457 seqfile = argv[argc-1];
458
459
460 /* open the sequence file */
461
462 if ((fpi=fopen(seqfile,"r")) == NULL)
463 {
464 fprintf(stderr,"tRNAscan1.4: FATAL: Cannot open the input sequence file %s\n",seqfile);
465 exit(1);
466 }
467
468 /* open the output file */
469
470 if (outfile == NULL) {
471 fpo = stdout;
472 }
473 else if ((fpo=fopen(outfile,"w")) == NULL) {
474 fprintf(stderr,"tRNAscan1.4: FATAL: Cannot open the output file %s\n",outfile);
475 exit(1);
476 }
477
478 /* open Verbose output file */
479 if (verbfile != NULL)
480 if ((fpverb=fopen(verbfile,"w")) == NULL) {
481 fprintf(stderr,"tRNAscan1.4: Cannot open verbose output file %s\n",verbfile);
482 exit(1);
483 }
484
485 /* open the consensus matrix file for the T-Psi-C signal */
486
487 strcpy(tscan_dir,TSCANDIR);
488
489 if (((fpcons1=fopen("TPCsignal","r")) == NULL) &&
490 ((fpcons1=fopen(strcat(tscan_dir,"/TPCsignal"),"r")) == NULL))
491 {
492 fprintf(stderr,"tRNAscan1.4: main cannot open TPCsignal consensus file\n");
493 exit(1);
494 }
495
496 /* open the consensus matrix file for the D signal */
497
498 strcpy(tscan_dir,TSCANDIR);
499
500 if (((fpcons2=fopen("Dsignal","r")) == NULL) &&
501 ((fpcons2=fopen(strcat(tscan_dir,"/Dsignal"),"r")) == NULL))
502 {
503 fprintf(stderr,"tRNAscan1.4: main cannot open Dsignal consensus file\n");
504 exit(1);
505 }
506
507
508 /* Set search parameters */
509
510 set_search_params(&ps,params);
511
512 ThresholdValue1 = ps.tpc_sig_thresh;
513 ThresholdValue2 = ps.d_sig_thresh;
514
515 /* Credits */
516
517 if (!suppress_credits) {
518 printf("\n %s %s -- derived from version %s\n", PROGRAM, RELEASE,
519 DERIV_VERSION);
520 printf("\n Please cite: %s\n %s\n", BRIEF,CITATION);
521 printf(" %s\n\n",MODIF);
522 }
523
524 /* reading the two consensus file */
525
526 lectval(fpcons1,table_cons1,table_inv1,&lsig1,&ktot1,&maxtot1);
527 lectval(fpcons2,table_cons2,table_inv2,&lsig2,&ktot2,&maxtot2);
528
529
530 /* main loop for reading & analyzing one sequence at a time */
531
532 while (!feof(fpi)) {
533
534 /* find out sequence size before allocating memory & reading in */
535
536 seqlen = getseqsize(fpi);
537 sequence= (char *) calloc (seqlen+BUF_SIZE,sizeof(char));
538 if (sequence == NULL ) {
539 fprintf(stderr,"tRNAscan1.4: Can't malloc for sequence\n");
540 exit(-1);
541 }
542
543 /* reading the name of the sequence and the sequence from the input file */
544
545 if (fgetseq(name,sequence,&seqlen,fpi) == 0)
546 break;
547
548 nseq++;
549 lseq += seqlen;
550
551 match=0;
552 for (nloop=0; nloop <=1; nloop++)
553 {
554 /* if the sequence is shorter than 70 bp long it is skipped */
555
556 if (seqlen < MIN_SEQ_LEN)
557 break;
558
559 ntrna=0;
560
561 /* search of tRNA genes starts
562 pos= first position of the T-Psi-C signal
563 ptr= pointer to the first position of the T-Psi-C signal */
564
565 for (pos=44, ptr=(sequence+43); pos<seqlen-23;pos++, ptr++)
566 {
567 score=0;
568
569 #ifdef VERBOSE
570 fprintf(fpverb,"--- Moving TPC window (pos=%d)\n",pos);
571 #endif
572
573
574 /*change in the number of invariant bases that must be present in the
575 T-Psi-C signal, MODIFY the value of threshold_inv here */
576
577 threshold_inv= ps.tpc_inv;
578
579 /*search for the T-Psi-C signal */
580
581 if(readsignal(ptr,table_inv1,&lsig1,&ktot1,weight1,table_cons1,&ninv,
582 threshold_inv))
583 {
584
585 /*the general score SG (score) is incremented by 1, if the number of
586 invariant bases (ninv) in the found T-Psi-C signal >= 3 (ktot1=4)
587 change in the threshold of the T-Psi-C signal for the increment of SG,
588 MODIFY the following line */
589
590 if (ninv >= ktot1-1) {
591 score++;
592 #ifdef VERBOSE
593 fprintf(fpverb,"+ TPC invariant bp: %d. SG++\n",ninv); }
594 else {
595 fprintf(fpverb,"X TPC invariant bp: %d. NO SG++\n",ninv);
596 #endif
597 }
598
599 /* Computation of the score on the found T-Psi-C signal, signal retains if
600 the computed score is >= ThresholdValue1 */
601
602 if(scoring(weight1,lsig1,maxtot1,ktot1,&score1,ThresholdValue1,
603 ninv))
604 {
605 #ifdef VERBOSE
606 fprintf(fpverb,"+ TPC signal over thresh: %f\n",score1);
607 #endif
608
609
610 /* npair= number of base-pairings in the T-Psi-C stem
611 lpair= number of nucleotides between the first position of the T-Psi-C
612 arm and the last position of the T-Psi-C arm */
613
614 npair = 5;
615 lpair = 16;
616
617 /* Test for the presence of the T-Psi-C stem */
618
619 basepairing(ptr+1,npair,lpair,&ncomp);
620
621 /* If the number of base-pairings (ncomp) equal 5, SG (score) incremented
622 by 1. Change in the threshold of base-pairings in the T-Psi-C arm for
623 the increment of SG, MODIFY the value '5' in the following line */
624
625 if (ncomp >= ps.tpc_incsg) {
626 score++;
627 #ifdef VERBOSE
628 fprintf(fpverb,"+ TPC parings (%d) add to SG=%d\n",ncomp,score); }
629 else {
630 fprintf(fpverb,"X TPC parings (%d) NO add to SG=%d\n",ncomp,score);
631 #endif
632 }
633
634 /* Change in the threshold to retain a stem as a potential T-Psi-C arm,
635 MODIFY the value '4' in the following line */
636
637 #ifdef VERBOSE
638 if (ncomp < ps.tpc_keep) {
639 fprintf(fpverb,"X TPC parings retain loop: %d\n",ncomp); }
640 else {
641 fprintf(fpverb,"+ TPC parings retain loop: %d\n",ncomp);
642 #else
643 if (ncomp >= ps.tpc_keep) {
644 #endif
645
646
647 /* For the same found T-Psi-C region, different potential D region can be
648 found because of the possible different lengthes of the D loop. As the
649 search for the tRNA gene is applied for each potential D arm, the value
650 of SG computed on the T-Psi-C region is stored in 'first_score' */
651
652 first_score=score;
653
654 /*Change in the number of invariant bases to be present in the D signal,
655 MODIFY the value of threshold_inv here */
656
657 threshold_inv= ps.d_inv;
658
659 /* Search for the presence of a D region between -120 and -37 nucleotides
660 upstream from the found T-Psi-C region. 37 nucleotides is the observed
661 minimum length assuming no insertion in the D and variable loop. 120
662 nucleotides allows for an intron of up to 60 nucleotides.
663 Change in the length of the intron, MODIFY the value '120' in the
664 definition of begin and ptrstart.
665 ptr1= pointer to the first position of the D signal
666 pos1= first position of the D signal
667 begin= starting position for the search of the D signal
668 ptrsart= pointer to the starting position for the search of the D signal */
669
670 /* If the sequence does not have 127 nucleotides upstream of the T-Psi-C
671 signal, the search for the D signal starts at position 8 */
672
673 if (pos <=127)
674 {
675 begin = 8;
676 ptrstart = sequence+7;
677 }
678 else
679 {
680 begin = pos-(MAX_INTRON_LEN+60);
681 ptrstart = ptr-(MAX_INTRON_LEN+60);
682 }
683
684 for(pos1=begin,ptr1=ptrstart; pos1<=(pos-37);pos1++,
685 ptr1++)
686 {
687
688 /* Search for the D signal */
689
690 #ifdef VERBOSE
691 if(!readsignal(ptr1,table_inv2,&lsig2,&ktot2,weight2,
692 table_cons2,&ninv,threshold_inv))
693 {
694 fprintf(fpverb,"X D invariant bp: %d\n",ninv); }
695 else
696 {
697 fprintf(fpverb,"+ D invariant bp: %d\n",ninv);
698 #else
699 if(readsignal(ptr1,table_inv2,&lsig2,&ktot2,weight2,
700 table_cons2,&ninv,threshold_inv))
701 {
702 #endif
703
704
705 /* Number of invariant bases found in the windowed sequence equal to the
706 number of invariant bases of the matrix (ktot2=3), then SG (score) is
707 incremented by 1
708 Change in the increment of SG for the D signal, MODIFY the following
709 line */
710
711 if (ninv >= ktot2) {
712 score++;
713 #ifdef VERBOSE
714 fprintf(fpverb,"+ D invariant (%d) inc SG\n",ninv); }
715 else {
716 fprintf(fpverb,"X D invariant (%d) NO add to SG\n",ninv);
717 #endif
718 }
719
720 /* Computation of the score on the found D signal, signal retains if the
721 computed score is >= TresholdValue2 */
722
723 #ifdef VERBOSE
724 if(!scoring(weight2,lsig2,maxtot2,ktot2,&score2,
725 ThresholdValue2,ninv))
726 {
727 fprintf(fpverb,"X D signal threshold: %f\n",score2); }
728 else {
729 fprintf(fpverb,"+ D signal threshold: %f\n",score2);
730 #else
731 if(scoring(weight2,lsig2,maxtot2,ktot2,&score2,
732 ThresholdValue2,ninv))
733 {
734 #endif
735
736
737 /* ptr3= pointer pointing on the first position of the D arm
738 npair= number of base-pairings in the D stem
739 lpair= number of nucleotides between the first position of the D arm
740 and the last one. As the D loop presents some variation in length,
741 lpair can take different values:
742 lpair=14 smallest length of the D loop
743 lpair=18 greatest length of the D loop */
744
745 ptr3=ptr1+2;
746 npair = 3;
747 for (i=0; i < 5; i++)
748 for (j=0; j < 2; j++)
749 ntab[i][j]=0;
750
751 for (lpair=14,h=0; lpair <=18; lpair++,h++)
752 {
753
754 /* Search for the presence of the D arm */
755
756 basepairing(ptr3,npair,lpair,&ncomp);
757
758 /* For each potential stem found, the number of base-pairings (ncomp) and
759 lpair are stored in ntab */
760
761 ntab[h][0]=ncomp;
762 ntab[h][1]=lpair;
763 }
764
765 match2=0;
766 for (h=0; h < 5;h++)
767 {
768
769 /* If stems with 3 base-pairings are found for the D arm, the following
770 steps of the algorithm are applied on each of these potential stems.
771 The 2 base-pairings stems are discarded */
772
773 if (ntab[h][0] == 3)
774 {
775 lpair=ntab[h][1];
776 match2=1;
777 following_search(pos,pos1,ptr1,ptr3,lpair,nloop,
778 sequence,seqlen,fpo,fpverb,name,score,match2,&ntrna,&npred,&match,sqoffset);
779 }
780 }
781
782 /* If no stems with 3 base-pairings have been found, then the stems with 2
783 base-pairings are used in the following steps */
784
785 if (!match2)
786 {
787 for (h=0; h < 5;h++)
788 {
789 if (ntab[h][0] == 2)
790 {
791 lpair=ntab[h][1];
792 following_search(pos,pos1,ptr1,ptr3,lpair,nloop,
793 sequence,seqlen,fpo,fpverb,name,score,match2,&ntrna,&npred,&match,sqoffset);
794 }
795 }
796 }
797 }
798 score=first_score;
799 }
800 }
801 }
802 }
803 }
804 }
805
806 /* The sequence is complemented and the algorithm is applied on the
807 complementary strand */
808
809 compstrand(&sequence,seqlen);
810
811 #ifdef VERBOSE
812 if (nloop == 0)
813 fprintf(fpverb,"\n== Trying COMPLEMENTARY strand\n");
814 #endif
815
816 }
817
818 free(sequence); /* free up mem to ready for next sequence */
819
820 }
821 fprintf(fpo,"number of sequences= %d\n", nseq);
822 fprintf(fpo,"number of bases tested (one strand)=%ld\n", lseq);
823 lseq = 2* lseq;
824 fprintf(fpo,"number of bases tested (both strands)= %ld\n", lseq);
825 fprintf(fpo,"number of predicted tRNA=%d\n", npred);
826 exit(0);
827 }
828
829
830 /* Subroutine that accomplishes the end of the test for the presence of a
831 tRNA gene */
832
833 void
834 following_search(long int pos, /* first position of the found T-Psi-C signal */
835 long int pos1, /* first position of the found D signal */
836 char *ptr1, /* pointer to the first position of the D signal */
837 char *ptr3, /* ptr3=ptr1+2 pointer to first position of the D arm */
838 int lpair,
839 int nloop, /*nloop=0 test of the sequence, */
840 /*nloop=1 test of the complementary sequence */
841 char *sequence,
842 long int seqlen,
843 FILE *fpo, /*pointer to the output file */
844 FILE *fpverb,
845 char *name,
846 int score, /* Value of the general score SG */
847 int match2, /* integer testing the presence of D */
848 /* arm with 3 base-pairings */
849 int *ntrna,
850 int *npred,
851 int *match,
852 long int sqoffset /* offset nucleotide numbering by this much (set with -i param) */
853 )
854
855 {
856 extern Param_set_type ps;
857
858 char chaine2[300]; /* character string containing the predicted tRNA gene
859 sequence */
860 char anticodon[4]; /* character string containing the anticodon signal
861 sequence */
862 char type_trna[4]; /* character string containing the tRNA gene family */
863 char *ptr2,*ptr4,*ptr5; /*pointers to the sequence, see their definition
864 below*/
865 int score2, score1; /* variables for the general score SG */
866 int npair1=0; /* number of base-pairings in the aminoacyl arm */
867 int lpair1=0; /* number of nucleotides between the first position of
868 the aminoacyl arm and the last one */
869 int lpair2=0; /* number of nucleotides between the first position of
870 the anticodon arm and the last one */
871 int npair2=0; /* number of base-pairings in the anticodon arm */
872 int pos6=0; /* number of nucleotides found between the first position of
873 the anticodon arm and the first position of the T-Psi-C
874 signal. */
875 int pos4=0; /* number of nucleotides present in the variable loop.*/
876 int length1=3; /* length of the anticodon signal */
877 int num=0; /* variable that codes the anticodon signal sequence
878 (comprised between 1 and 65). */
879 int match1; /* match1=1 if a tRNA without intron has been predicted
880 and 0 otherwise*/
881 int ncomp=0; /* number of base-pairings observed in the sequence tested
882 for the presence of a given arm */
883 int i;
884
885 #ifdef VERBOSE
886 fprintf(fpverb,"IN following search...\n");
887 #endif
888
889 score1=score;
890
891 /* If match2=1, the found D arm present 3 base-pairings, the general score
892 SG (score1) is incremented by 1
893 Change in the threshold of base-pairings in the D arm for the increment
894 of SG, MODIFY the following line */
895
896
897 if (match2) {
898 score1++;
899 #ifdef VERBOSE
900 fprintf(fpverb,"+ D arm found 3 bp\n"); }
901 else {
902 fprintf(fpverb,"X D arm found less than 3 bp\n");
903 #endif
904 }
905 npair1=7; /* Number of base-pairings in the aminoacyl arm */
906 lpair1=pos-pos1+8+23; /* Number of nucleotides between the first
907 position of the aminoacyl arm and the last one */
908 ptr2=(ptr1-7); /* pointer to the first position of the amino acyl arm */
909
910 /* Test for the presence of the aminoacyl arm */
911
912 basepairing(ptr2,npair1,lpair1,&ncomp);
913
914 /* If the number of base-pairings (ncomp) equal 7, the general score SG
915 (score1) is incremented by 1 .
916 Change in the threshold of base-pairings in the aminoacyl arm for the
917 increment of SG, MODIFY the value '7' in the following line */
918
919 if (ncomp >= ps.aa_incsg) {
920 score1++;
921 #ifdef VERBOSE
922 fprintf(fpverb,"+ AA arm found %d base pairings\n",ncomp); }
923 else {
924 fprintf(fpverb,"X AA arm found %d base pairings\n",ncomp);
925 #endif
926 }
927
928 /* Change in the threshold to retain a stem as a potential aminoacyl arm
929 MODIFY the value '6' in the following line */
930
931 if (ncomp >= ps.aa_keep)
932 {
933 i = 0;
934 while (i< 300)
935 {
936 chaine2[i]='\0';
937 i++;
938 }
939
940 /* If the general score SG is >= 4, the algorithm looks for the presence of
941 an anticodon stem, otherwise the algorithm is initiated again on the
942 following windowed sequence */
943
944 if (score1 >= ps.look_for_acloop_sg)
945 {
946 #ifdef VERBOSE
947 fprintf(fpverb,"Looking for anticodon stem\n");
948 #endif
949
950 ptr4=ptr3+lpair+2; /* pointer to the first position of the anticodon arm */
951 match1=0;
952 npair2=5; /* Number of base-pairings in the anticodon arm */
953 lpair2=16; /* Number of nucleotides between the first position of the
954 anticodon arm and the last position */
955
956 /* Test for the presence of an anticodon arm */
957
958 basepairing(ptr4,npair2,lpair2,&ncomp);
959
960 /* If 4 or 5 base-pairings are observed, the windowed sequence is retained
961 as a potential anticodon arm */
962
963 if (ncomp >= ps.acloop_min)
964 {
965 if (nloop == 0)
966 *match=1;
967 i=0;
968 while (i< 4)
969 {
970 anticodon[i]='\0';
971 type_trna[i]='\0';
972 i++;
973 }
974
975 /* Test of the presence of the residue T at the position preceding the
976 anticodon signal. If the residue T is present, the general score is
977 incremented by 1.*/
978
979 if ((*(ptr4+6)) == 't') {
980 score1++;
981 #ifdef VERBOSE
982 fprintf(fpverb,"+ Invariant T found in anticodon. SG++\n"); }
983 else {
984 fprintf(fpverb,"X Invariant T NOT found in anticodon. No SG inc\n");
985 #endif
986 }
987
988 /* If SG >= 5, the windowed sequence is retained as a potential tRNA gene.
989 The tRNA gene predicted at that level is without intron */
990
991 #ifdef VERBOSE
992 if (score1 < ps.sg_cutoff) {
993 fprintf(fpverb,"X Under SG threshold: %d\n",score1); }
994 else {
995 fprintf(fpverb,"+ Over SG threshold: %d\n",score1);
996 #else
997 if (score1 >= ps.sg_cutoff) {
998 #endif
999
1000
1001 /* Identification of the tRNA gene family */
1002
1003 strncpy(anticodon,ptr4+7,3);
1004 codage(anticodon,length1,&num);
1005 corresaa(num,type_trna);
1006 strncpy(chaine2,(ptr1-7),lpair1+2);
1007
1008 (*ntrna)++;
1009 (*npred)++;
1010 match1=1; /* match1=1 indicates that a tRNA gene without
1011 intron has been predicted */
1012
1013 /* The results are printed in the output file */
1014
1015 printresult(fpo,fpverb,name,pos1,pos,lpair,lpair1,lpair2,nloop,ntrna,
1016 chaine2,sequence,seqlen,match,ncomp,type_trna,anticodon,sqoffset);
1017 }
1018 }
1019
1020 /* If no anticodon arm has been found for a value of lpair2=16, i. e. an
1021 anticodon loop of 7 nucleotides (without intron), the algorithm searchs
1022 for the presence of an anticodon arm assuming the presence of an intron
1023 in the anticodon loop */
1024
1025 /* pos6 corresponds to the number of nucleotides found between the first
1026 position of the anticodon arm and the first position of the T-Psi-C
1027 signal. */
1028
1029 #ifdef VERBOSE
1030 fprintf(fpverb,"+ Looking for anticodon stem WITH intron...\n");
1031 #endif
1032
1033 pos6=pos-(pos1+lpair+3);
1034
1035 /* The search for the presence of an anticodon stem is initiated only if an
1036 intron of at least 8 bases long is expected to be present. The value 28
1037 for pos6 corresponds to the number of nucleotides found for the smallest
1038 length of the variable loop and for an intron length of 8 nucleotides.
1039 Change in the smallest length of the intron, MODIFY the value of pos6
1040 in the following line and also the start value of lpair2 (lpair2=24)
1041 in the loop (lpair2 is the number of nucleotides found between the first
1042 position of the anticodon arm and the last position of the anticodon arm.
1043 lpair2=16 when no intron in the anticodon loop). */
1044
1045 if ((!match1) && (pos6 >=MIN_VAR_LOOP))
1046 {
1047 for (lpair2=MIN_VAR_LOOP-4;lpair2<pos6;lpair2++)
1048 {
1049 score2=score1;
1050
1051 /* Test for the presence of the anticodon arm for the different possible
1052 lengthes of the intron.
1053 ptr4 = pointer to the first base involved in the first part of the
1054 anticodon arm */
1055
1056 basepairing(ptr4,npair2,lpair2,&ncomp);
1057
1058 /* pos4 corresponds to the number of nucleotides present in the variable
1059 loop. The smallest size of the variable loop in the tRNA sequences
1060 is 3. Thus to retain the stem as a potential anticodon arm, pos4 has
1061 to be >= 3. */
1062
1063 pos4=pos-pos1-lpair-lpair2-4;
1064
1065 /* ptr5 = pointer to the position preceding the intron. */
1066
1067 ptr5=ptr4+10;
1068
1069 /* The anticodon arm is retained if 4 or 5 base-pairings are formed and if
1070 pos4 >= 3 */
1071
1072 if ((ncomp >=4) && (pos4 >= 3))
1073 {
1074
1075 /* Test for the presence of the residues A or G at the position preceding
1076 the intron */
1077
1078 if(((*ptr5) == 'a') || ((*ptr5) == 'g'))
1079 {
1080 if (nloop == 0)
1081 (*match)=1;
1082 i=0;
1083 while (i<4)
1084 {
1085 anticodon[i]='\0';
1086 type_trna[i]='\0';
1087 i++;
1088 }
1089
1090 /* Test for the presence of the residue T at the position preceding the
1091 anticodon signal. If the residue T is present, the general score is
1092 incremented by 1.*/
1093
1094 if ((*(ptr4+6)) == 't') {
1095 score2++;
1096 #ifdef VERBOSE
1097 fprintf(fpverb,"+ Found invariant T. SG= %d\n",score2); }
1098 else {
1099 fprintf(fpverb,"X Did NOT find invariant T. SG= %d\n",score2);
1100 #endif
1101 }
1102
1103 /* If SG >= 5, the windowed sequence is retained as a potential tRNA gene.
1104 The tRNA gene predicted at that level has one intron in
1105 the anticodon loop. */
1106
1107 if (score2 >= ps.sg_cutoff)
1108 {
1109
1110 /* Identification of the tRNA gene family */
1111
1112 strncpy(anticodon,ptr4+7,3);
1113 codage(anticodon,length1,&num);
1114 corresaa(num, type_trna);
1115 strncpy(chaine2,(ptr1-7),lpair1+2);
1116
1117 (*ntrna)++;
1118 (*npred)++;
1119
1120 /* The results are printed in the output file */
1121
1122 printresult(fpo,fpverb,name,pos1,pos,lpair,lpair1,lpair2,nloop,
1123 ntrna,chaine2,sequence,seqlen,match,ncomp,type_trna,anticodon,sqoffset);
1124 }
1125 }
1126 }
1127 }
1128 }
1129 }
1130 }
1131 }
1132
1133
1134
1135 /* Subroutine to read the consensus matrix */
1136
1137 void
1138 lectval(FILE *fp, /* pointer to the consensus matrix file */
1139 float (*table_cons)[4], /* table containing the frequency of each base
1140 at each position of the signal */
1141 int (*table_inv)[2], /* table containing the position and the nature of
1142 the invariant bases found in the signal. Code for
1143 the bases: A=0, C=1, G=2 and T=3. */
1144 int *lsig, /*lsig=length of the signal */
1145 int *ktot, /* ktot= number of invariant bases */
1146 float *maxtot /* maxtot= sum of the maximum frequencies */
1147 )
1148
1149 {
1150 int i=0,j, k=0,l,m;
1151 float max=0;
1152 int ret = 0;
1153
1154 for (l=0; l< 30; l++)
1155 for (m=0; m<4; m++)
1156 table_cons[l][m]=0;
1157
1158 for (l=0; l< 30; l++)
1159 for (m=0; m < 2; m++)
1160 table_inv[l][m]=0;
1161
1162 while(feof(fp) == 0)
1163 {
1164 for(j=0;j<4;j++)
1165 {
1166 ret = fscanf(fp,"%f",&table_cons[i][j]);
1167 }
1168 i++;
1169 }
1170
1171 *lsig=i-1;
1172 for (i=0; i<*lsig; i++)
1173 {
1174 for (j=0; j<4; j++)
1175 {
1176 if (table_cons[i][j] == 1.0)
1177 {
1178 k++;
1179 table_inv[k][1]=i;
1180 table_inv[k][2]=j;
1181 }
1182 }
1183 }
1184
1185 for (i=0; i<*lsig; i++)
1186 {
1187 max=table_cons[i][0];
1188 for (j=1; j<4; j++)
1189 max= MAX(max,table_cons[i][j]);
1190 *maxtot += max;
1191 }
1192
1193 *ktot=k;
1194 }
1195
1196 /* Subroutine reading the sequence */
1197 /* Modified to correctly read FASTA sequence files */
1198
1199 int fgetseq(char *name, /* string w/name of the sequence */
1200 char *sequence, /* character string containing the sequence */
1201 long int *seqlen, /* length of sequence */
1202 FILE *fpi) /* input file pointer */
1203
1204 {
1205 char line[MAXLINE]; /* character string used to read a line */
1206 char *ptr; /* pointer to the sequence */
1207 long int i,j,c;
1208 char *ptrRet;
1209
1210 line[0]='\0';
1211 *sequence='\0';
1212 *seqlen = 0;
1213
1214 /* Test the first character to choose between the two formats available */
1215
1216 if (fgets(line, MAXLINE, fpi) == NULL) {
1217 return 0;
1218 }
1219
1220 else if (line[0] == ';') {
1221
1222 /* File in the non-GenBank format */
1223
1224 if (line[1] != ' ')
1225 {
1226 for (i=0, ptr= &(line[1]); *ptr != ' ' && *ptr !='\n';i++)
1227 name[i]= *ptr++;
1228 name[i] = '\0';
1229 while ((c = getc(fpi)) == ';')
1230 ptrRet = fgets(line, MAXLINE, fpi);
1231 ungetc(c, fpi);
1232 ptr = sequence;
1233
1234 while ((c= getc(fpi)) != ';' && c != EOF)
1235 if (isalpha(c)) {
1236 *ptr = tolower(c);
1237 ptr++;
1238 (*seqlen)++;
1239 }
1240
1241 if (c != EOF)
1242 ungetc(c, fpi);
1243 *ptr= '\0';
1244 }
1245 else if (line[1] == ' ')
1246 {
1247 /* Intelligenetics format */
1248
1249 while ((c = getc(fpi)) == ';')
1250 ptrRet = fgets(line,MAXLINE, fpi);
1251 ungetc(c, fpi);
1252 ptrRet = fgets(line, MAXLINE, fpi);
1253 for (i=0, ptr= &(line[0]); *ptr != ' ' && *ptr !='\n';i++)
1254 name[i]= *ptr++;
1255 name[i] = '\0';
1256
1257 ptr = sequence;
1258
1259 while ((c= getc(fpi)) != ';' && c != EOF)
1260 if (isalpha(c)){
1261 *ptr = tolower(c);
1262 ptr++;
1263 (*seqlen)++;
1264 }
1265
1266 if (c != EOF)
1267 ungetc(c, fpi);
1268 *ptr= '\0';
1269 }
1270 }
1271 else if (line[0] == '>')
1272 {
1273 /* Fasta Format */
1274
1275 for (i=1; line[i] == ' '; i++)
1276 ;
1277 for (j = 0, ptr = &line[i]; *ptr != ' '; ptr++, j++)
1278 name[j] = *ptr;
1279 name[j] = '\0';
1280
1281 ptr = sequence;
1282 while ((c= getc(fpi)) != '>' && c != EOF)
1283 if (isalpha(c)) {
1284 *ptr = tolower(c);
1285 *seqlen += 1;
1286 ptr++;
1287 }
1288
1289 if (c != EOF)
1290 ungetc(c, fpi);
1291 *ptr= '\0';
1292 }
1293 else
1294 {
1295 /* File in GenBank format */
1296
1297 while (strncmp(line, "LOCUS", 5) != 0)
1298 if (fgets(line, MAXLINE, fpi) == NULL)
1299 exit(1);
1300
1301 for (i = 0, ptr = &line[12]; *ptr != ' '; ptr++, i++)
1302 name[i] = *ptr;
1303 name[i] = '\0';
1304
1305 while (strncmp(line, "ORIGIN", 6) != 0)
1306 if (fgets(line, MAXLINE, fpi) == NULL)
1307 exit(1);
1308
1309 ptr = sequence;
1310 *seqlen=0;
1311 ptrRet = fgets(line, MAXLINE, fpi);
1312 while (strncmp(line, "//", 2) != 0) {
1313 for (i = 0; line[i] != '\n'; i++)
1314 if (isalpha(line[i])) {
1315 *ptr++ = tolower(line[i]);
1316 (*seqlen)++;
1317 }
1318 ptrRet = fgets(line, MAXLINE, fpi);
1319 }
1320 *ptr = '\0';
1321
1322 }
1323 return (1);
1324 }
1325
1326 /* Subroutine reading & returning the sequence length */
1327
1328 int getseqsize(FILE *fpi /* input file pointer */
1329 )
1330
1331 {
1332
1333 char line[MAXLINE]; /* character string used to read a line */
1334 long int i,c, seqlen, fpi_save_pos;
1335 char* ptrRet;
1336
1337 line[0]='\0';
1338 seqlen = 0;
1339
1340 fpi_save_pos = ftell(fpi); /* save current position in file */
1341
1342 /* Test the first character to choose between the two formats available */
1343
1344 if (fgets(line, MAXLINE, fpi) == NULL) {
1345 return 0;
1346 }
1347
1348 else if (line[0] == ';') {
1349
1350 /* File in the non-GenBank format */
1351
1352 if (line[1] != ' ')
1353 {
1354 while ((c = getc(fpi)) == ';')
1355 ptrRet = fgets(line, MAXLINE, fpi);
1356 ungetc(c, fpi);
1357
1358 while ((c= getc(fpi)) != ';' && c != EOF)
1359 if (isalpha(c)) {
1360 seqlen++;
1361 }
1362
1363 }
1364 else if (line[1] == ' ')
1365 {
1366 /* Intelligenetics format */
1367
1368 while ((c = getc(fpi)) == ';')
1369 ptrRet = fgets(line,MAXLINE, fpi);
1370 ungetc(c, fpi);
1371 ptrRet = fgets(line, MAXLINE, fpi);
1372
1373 while ((c= getc(fpi)) != ';' && c != EOF)
1374 if (isalpha(c)){
1375 seqlen++;
1376 }
1377 }
1378 }
1379 else if (line[0] == '>')
1380 {
1381 /* Fasta Format */
1382
1383 while ((c= getc(fpi)) != '>' && c != EOF)
1384 if (isalpha(c))
1385 seqlen++;
1386 }
1387 else
1388 {
1389 /* File in GenBank format */
1390
1391 while (strncmp(line, "LOCUS", 5) != 0)
1392 if (fgets(line, MAXLINE, fpi) == NULL)
1393 exit(1);
1394
1395 while (strncmp(line, "ORIGIN", 6) != 0)
1396 if (fgets(line, MAXLINE, fpi) == NULL)
1397 exit(1);
1398
1399 ptrRet = fgets(line, MAXLINE, fpi);
1400 while (strncmp(line, "//", 2) != 0) {
1401 for (i = 0; line[i] != '\n'; i++)
1402 if (isalpha(line[i])) {
1403 seqlen++;
1404 }
1405 ptrRet = fgets(line, MAXLINE, fpi);
1406 }
1407
1408 }
1409
1410 fseek(fpi,fpi_save_pos,0); /* reposition file pointer */
1411 return seqlen;
1412 }
1413
1414
1415
1416 /* Subroutine that returns the position in the string s where the string t
1417 begins or -1 if s does not contain t */
1418
1419 /* Calls to this function eliminated for efficiency T. Lowe 11/95 */
1420
1421 myindex (char *s, char *t)
1422 {
1423 int i, j, k;
1424 for (i=0; s[i] != '\0'; i++) {
1425 for (j=i, k=0; t[k] != '\0' && s[j] == t[k]; j++, k++)
1426 ;
1427 if (t[k] == '\0')
1428 return(i);
1429 }
1430 return(-1);
1431 }
1432
1433
1434 /* Subroutine looking for the presence of a given signal, returns 1 if a
1435 signal is found and 0 otherwise. It also return the table 'weight'
1436 containing the frequencies of the oberved bases in the windowed
1437 sequence and the number 'ninv' of invariant bases found in the windowed
1438 sequence*/
1439
1440 int readsignal(char *ptr, /* pointer to the sequence */
1441 int (*table_inv)[2], /* table containing the position and nature of the
1442 invariant bases found in the consensus matrix */
1443 int *lsig, /* lsig= length of the signal */
1444 int *ktot, /* ktot= number of invariant bases in the consensus matrix */
1445 float *weight, /* table containing the frequencies of the observed
1446 base at each position of the
1447 windowed sequence tested */
1448 float (*table_cons)[4], /* table corresponding to the consensus matrix */
1449 int *ninv, /* ninv= number of invariant */
1450 /* bases in the windowed sequence */
1451 int threshold_inv) /* Number of invariant bases */
1452 /* allowed not to b e invariant */
1453
1454
1455 {
1456 extern Param_set_type ps;
1457
1458 int k = 1, i=0, j,match1;
1459 int l;
1460 int tab[30];
1461
1462 (*ninv)=0;
1463 for (l=0; l< 30; l++) {
1464 weight[l]=0;
1465 tab[l]=0;
1466 }
1467
1468 /* If the consensus matrix contains some invariant bases, the subroutine
1469 calculates the number of invariant bases that are found in the windowed
1470 sequence (ninv) */
1471
1472 if (*ktot)
1473 {
1474 for (k=1; k <= (*ktot); k++)
1475 {
1476
1477 /* (original code commented out)
1478
1479 temp[0]= *(ptr+table_inv[k][1]);
1480 j = myindex(base,temp); */
1481
1482 switch (*(ptr+table_inv[k][1])) {
1483 case 'a': j=0; break;
1484 case 'c': j=1; break;
1485 case 'g': j=2; break;
1486 case 't': j=3; break;
1487 default: j=-1;
1488 }
1489
1490 /* trap for non-ATGC chars, assume match for ambiguous bases (j= -1)*/
1491
1492
1493 #ifdef NO_AMBIG
1494 if (j == table_inv[k][2])
1495 #else
1496 if ((j == table_inv[k][2]) || (j == -1))
1497 #endif
1498 (*ninv)++;
1499
1500 }
1501 }
1502
1503 /* If the number of invariant bases found in the windowed sequence is < to
1504 the threshold allowed, the windowed sequence is discarded as a potential
1505 signal. */
1506
1507 if ((*ninv) < (*ktot)-threshold_inv)
1508 return(0);
1509
1510 /* If the number of invariant bases is >= to the threshold, the table
1511 'weight' is constructed */
1512
1513 if((*ninv) >= (*ktot)-threshold_inv)
1514 {
1515 match1=1;
1516 while(match1 && (i<*lsig))
1517 {
1518
1519 /* (original code commented out)
1520
1521 temp[0]= *(ptr+i);
1522 tab[i]= myindex(base,temp); */
1523
1524 switch (*(ptr+i)) {
1525 case 'a': tab[i]=0; break;
1526 case 'c': tab[i]=1; break;
1527 case 'g': tab[i]=2; break;
1528 case 't': tab[i]=3; break;
1529 default: tab[i]=-1;
1530 }
1531
1532
1533 /* trap for non-ATGC chars, assume match for ambig bases (tab[i]= -1) */
1534
1535 if (tab[i] == -1)
1536 #ifdef NO_AMBIG
1537 weight[i] = 0;
1538 #else
1539 weight[i] = 1;
1540 #endif
1541 else
1542 weight[i]=table_cons[i][tab[i]];
1543
1544 if((ktot == 0) && (weight[i] == 0))
1545 {
1546 match1=0;
1547 return(0);
1548 }
1549 else
1550 {
1551 i++;
1552 }
1553 }
1554 }
1555 return(1);
1556 }
1557
1558 /* Subroutine that calculates the similarity score on the potential signal
1559 previously retained by the subroutine readsignal. This subroutine
1560 returns 1 it the computed score is greater or equal to the defined
1561 threshold and 0 otherwise. It returns also the value of the computed
1562 score (score) */
1563
1564 int scoring(float *weight, /* table containing the frequencies of the observed base
1565 at each position of the potential signal */
1566 int lsig, /* length of the signal */
1567 float max, /* sum of the maximum frequencies found in */
1568 /* the consensus matrix */
1569 int ktot, /* number of invariant bases found in the */
1570 /* consensus matrix */
1571 float *score, /* value of the computed score on the */
1572 /* potential signal */
1573 float ThresholdValue, /* defined threshold for the */
1574 /* similarity score */
1575 int ninv /* number of invariant bases found */
1576 /* in the potential signal */
1577 )
1578 {
1579
1580 float tot;
1581 int i;
1582
1583 /* Computation of the value of the score on the potential signal */
1584
1585 tot=0;
1586 for(i=0; i< lsig; i++)
1587 tot += weight[i];
1588
1589 tot -= ninv;
1590 max -= ktot;
1591 *score = tot / max;
1592
1593 /* Comparison of the computed score with the defined threshold value */
1594
1595 if (*score >= ThresholdValue)
1596 return(1);
1597 else {
1598 return(0);
1599 }
1600 }
1601
1602 /* Subroutine looking for base-pairings between two parts of the sequence.
1603 It returns the number of base-pairings found (ncomp) */
1604
1605 /* rewritten to improve efficiency & eliminate use of myindex() calls */
1606
1607 void
1608 basepairing(char *ptr, /* pointer to the sequence */
1609 int npair, /* number of base-pairings forming a given arm */
1610 int lpair, /* number of nucleotides found between the first position of the
1611 first part of the sequence involved in the stem and the last
1612 position of the second part of the
1613 sequence involved in the stem */
1614 int *ncomp) /* number of base-pairings observed between the two parts of
1615 the sequence tested */
1616
1617 {
1618 int n; /* loop counter */
1619
1620 *ncomp=0;
1621
1622 #ifdef NO_AMBIG
1623 for(n=0; n<npair; n++)
1624 {
1625 switch (*(ptr+n)) {
1626 case 'a': if (*(ptr+lpair-n) == 't') (*ncomp)++; break;
1627 case 'c': if (*(ptr+lpair-n) == 'g') (*ncomp)++; break;
1628 case 'g': case 'r': if ((*(ptr+lpair-n) == 'c') || (*(ptr+lpair-n) == 't'))
1629 (*ncomp)++; break;
1630 case 't': case 'y': if ((*(ptr+lpair-n) == 'a') || (*(ptr+lpair-n) == 'g'))
1631 (*ncomp)++; break;
1632 }
1633 }
1634 #else
1635 for(n=0; n<npair; n++)
1636 {
1637 if ((*(ptr+lpair-n) == 'n') && (*(ptr+n) != 'n')) {
1638 (*ncomp)++; }
1639 else {
1640 switch (*(ptr+n)) {
1641 case 'a': if (*(ptr+lpair-n) == 't') (*ncomp)++; break;
1642 case 'c': if (*(ptr+lpair-n) == 'g') (*ncomp)++; break;
1643 case 'g': case 'r': if ((*(ptr+lpair-n) == 'c') || (*(ptr+lpair-n) == 't'))
1644 (*ncomp)++; break;
1645 case 't': case 'y': if ((*(ptr+lpair-n) == 'a') || (*(ptr+lpair-n) == 'g'))
1646 (*ncomp)++; break;
1647 case 'n': if (*(ptr+lpair-n) != 'n') (*ncomp)++; break;
1648 }
1649 }
1650 }
1651 #endif
1652
1653 }
1654
1655 /* Subroutine looking for base-pairings between two parts of the sequence.
1656 It returns the number of base-pairings found (ncomp) */
1657
1658 /* replaced by new basepairing() function above */
1659
1660 void
1661 old_basepairing(char *ptr, int npair, int lpair, int *ncomp)
1662
1663 {
1664
1665 char base[5]; /* character string containing the 4 bases A, C, G and T */
1666 char temp1[2], temp2[2]; /* temporary tables */
1667 int n,j1,j2;
1668
1669 /* Table 'pairing' gives the base-pairing scheme. If two nucleotides can
1670 form a base-pairing, the value in the table is 1 otherwise it is 0. */
1671
1672 static int pairing[4][4]= {
1673 {0,0,0,1},
1674 {0,0,1,0},
1675 {0,1,0,1},
1676 {1,0,1,0},
1677 };
1678
1679 base[0]='a', base[1]='c', base[2]='g', base[3]='t', base[4]='\0';
1680 temp1[0]='\0';
1681 temp1[1]='\0';
1682 temp2[0]='\0';
1683 temp2[1]='\0';
1684
1685 *ncomp=0;
1686 for(n=0; n<npair; n++)
1687 {
1688
1689 temp1[0]= *(ptr+n); /* temp1[0] contains the base found at a given
1690 position in the first part of the stem */
1691
1692 temp2[0]= *(ptr+lpair-n); /* temp2[0] contains the base found at the
1693 position of the second part of the stem that
1694 should form the base-pairing with temp1[0] */
1695 j1= myindex(base,temp1); /* Code the base found in temp1[0]. j1 can take the
1696 value 0 (for A), 1 (for C), 2 (for G) and 3 (for T) */
1697
1698 j2= myindex(base,temp2); /* same than j1 for the base in temp2[0] */
1699
1700 /* Test if the bases in temp1[0] and temp2[0] can form a base-parings */
1701
1702 /* Assume non-ATGC base can bp with anything */
1703
1704 #ifdef NO_AMBIG
1705 if (pairing[j1][j2] == 1)
1706 *ncomp= (*ncomp)+1;
1707 #else
1708 if (j1 == -1 || j2 == -1)
1709 *ncomp= (*ncomp)+1;
1710 else if (pairing[j1][j2] == 1)
1711 *ncomp= (*ncomp)+1;
1712 #endif
1713 }
1714 }
1715
1716
1717
1718 /* Subroutine that complements the sequence. It returns the complementary
1719 sequence to the main */
1720
1721 /* Modifed to dynamically allocate space for reverse complement & then */
1722 /* release memory occupied by original sequence */
1723 /* Sets *sequence pointer to new reverse complement sequence */
1724
1725 void
1726 compstrand(char **sequence, /* pointer to the sequence string */
1727 long int seqlen /* sequence length */
1728 )
1729 {
1730
1731 char *ptr, *revseq;
1732 long int pos;
1733
1734
1735 revseq= (char *) calloc (seqlen+1,sizeof(char));
1736
1737 if (revseq == NULL)
1738 {
1739 fprintf(stderr,"tRNAscan1.4: Can't malloc for complement sequence\n");
1740 exit(-1);
1741 }
1742
1743
1744 /* Complementation of the sequence */
1745
1746 for (pos=0, ptr= &((*sequence)[seqlen-1]); pos<seqlen; pos++, ptr--)
1747 {
1748 switch (*ptr)
1749 {
1750 case 'a':
1751 revseq[pos]='t';
1752 break;
1753 case 'c':
1754 revseq[pos]='g';
1755 break;
1756 case 'g':
1757 revseq[pos]='c';
1758 break;
1759 case 't':
1760 revseq[pos]='a';
1761 break;
1762 default:
1763 revseq[pos]='n';
1764 break;
1765 }
1766 }
1767
1768 revseq[seqlen] = '\0';
1769
1770 free(*sequence); /* release mem from original seq */
1771 *sequence = revseq;
1772
1773 }
1774
1775
1776 /* Subroutine that codes the anticodon signal sequence by a number
1777 comprised between 1 and 65. It returns this number. */
1778
1779 /* Modified to eliminate call to myindex() */
1780
1781 void
1782 codage(char *anticodon, /* anticodon signal sequence */
1783 int length1, /* length of the anticodon signal, lenght1=3 */
1784 int *num /* number associated to the anticodon signal sequence */
1785 )
1786
1787 {
1788 int j=0,i,iba,match=0;
1789 iba=1;
1790 *num=1;
1791
1792 /* Codage of the anticodon signal sequence. The codage is done by
1793 alphabetical order, i. e., AAA is coded by 1, AAC is coded by 2
1794 and so on until TTT that is coded by 64. If a base is not determined
1795 in the anticodon signal sequence, the anticodon is coded by 65. */
1796
1797 for (i=length1; i>=1;i--)
1798 {
1799
1800 switch (anticodon[i-1]) {
1801 case 'a': j=0; break;
1802 case 'c': j=1; break;
1803 case 'g': j=2; break;
1804 case 't': j=3; break;
1805 default: j=-1;
1806 }
1807
1808 if (j == -1)
1809 {
1810 match=1;
1811 i=1;
1812 }
1813 *num=(*num)+j*iba;
1814 iba=4*iba;
1815 }
1816
1817 if (match)
1818 *num=65;
1819
1820 }
1821
1822
1823 /* Subroutine that determines the tRNA gene family */
1824
1825 void
1826 corresaa(int num, /* Number coding the anticodon signal sequence */
1827 char *type_trna /* tRNA gene family */
1828 )
1829
1830 {
1831
1832 /*The table 'amino_acid' gives the correspondence between the number
1833 (num) coding the anticodon signal and the amino acid that is added
1834 to the protein by the tRNA. For example, if the anticodon is AAA then
1835 num=1 and the table will associate the amino acid Phe to that anticodon.
1836 The tRNA gene in that case will be a Phe tRNA gene. */
1837
1838 static char *amino_acid[]= {"Phe","Val","Leu","Ile","Cys","Trp","Arg","Ser",
1839 "Ser","Ala","Pro","Thr","Tyr","Asp","His","Asn",
1840 "Leu","Val","Leu","Met","Trp","Gly","Arg","Arg",
1841 "Ser","Ala","Pro","Thr","Sup","Glu","Gln","Lys",
1842 "Phe","Val","Leu","Ile","Cys","Gly","Arg","Ser",
1843 "Ser","Ala","Pro","Thr","Tyr","Asp","His","Asn",
1844 "Leu","Val","Leu","Ile","Sup","Gly","Arg","Arg",
1845 "Ser","Ala","Pro","Thr","Sup","Glu","Gln","Lys",
1846 "Ind"};
1847
1848 strncpy(type_trna,amino_acid[num-1],3);
1849 }
1850
1851 /* Subroutine that prints the results of the search */
1852
1853 void
1854 printresult(FILE *fpo, /* output file pointer */
1855 FILE *fpverb, /* character string for the name of the sequence */
1856 char *name,
1857 long int pos1, /* first position of the D signal */
1858 long int pos, /* first position of the T-Psi-C signal */
1859 int lpair, /* number of nucleotides between the */
1860 /* first position of the D arm and the last one */
1861 int lpair1, /* number of nucleotides between the first position of the
1862 aminoacyl arm and the last one */
1863 int lpair2, /* number of nucleotides between the first position of the
1864 anticodon arm and the last one */
1865 int nloop, /* nloop=0, scanning of the direct strand; nloop=1 scanning of
1866 the complementary strand */
1867 int *ntrna, /* number of tRNA genes predicted in the sequence */
1868 char *chaine2, /* character string for the predicted tRNA gene sequence */
1869 char *sequence, /* character string containing the sequence tested */
1870 long int length, /* length of the sequence */
1871 int *match, /* match=1 if at least one tRNA gene has been found on the
1872 direct strand and 0 otherwise */
1873 int ncomp, /* number of base-pairings in the anticodon arm of the
1874 predicted tRNA gene */
1875 char *type_trna, /* character string for the tRNA gene family */
1876 char *anticodon, /* character string for the anticodon signal sequence */
1877 long int sqoffset /* offset nucleotide numbering by this much (set with -i param) */
1878 )
1879
1880 {
1881 long int pos2; /* first position of the predicted tRNA gene when it
1882 is found on the complementary strand */
1883 long int posstart; /* first position of the intron */
1884 long int posend; /* last position of the intron */
1885
1886
1887 /* If lpair2 > 16, treatment of the tRNA gene with one intron */
1888
1889 /* Results for the tRNA genes predicted on the direct strand */
1890
1891 #ifdef VERBOSE
1892 fprintf(fpverb,"*** tRNA found\n\n");
1893 #endif
1894
1895
1896 if ((nloop) == 0)
1897 {
1898 if((*ntrna) == 1)
1899 fprintf(fpo,"sequence name= %s\n", name);
1900
1901 fprintf(fpo,"start position= %ld end position= %ld\n",pos1-7+sqoffset,pos1-6+lpair1+sqoffset);
1902 fprintf(fpo,"potential tRNA sequence= %s\n",chaine2);
1903 fprintf(fpo,"D signal= %ld %ld TpsyC signal= %ld %ld\n", pos1+sqoffset,pos1+7+sqoffset, pos+sqoffset,
1904 pos+14+sqoffset);
1905 fprintf(fpo,"amino-acyl stem= %ld-%ld;%ld-%ld\n",pos1-7+sqoffset,pos1-1+sqoffset,pos1-13+lpair1+sqoffset,
1906 pos1-7+lpair1+sqoffset);
1907 fprintf(fpo,"D stem= %ld-%ld;%ld-%ld\n",pos1+2+sqoffset,pos1+4+sqoffset,pos1+lpair+sqoffset,
1908 pos1+lpair+2+sqoffset);
1909
1910 if(lpair2 > 16)
1911 {
1912 fprintf(fpo,"anticodon stem= %ld-%ld;%ld-%ld\n",pos1+lpair+4+sqoffset,pos1+lpair+8+sqoffset,
1913 pos1+lpair+lpair2+sqoffset,pos1+lpair+lpair2+4+sqoffset);
1914 }
1915 else
1916 {
1917 fprintf(fpo,"anticodon stem= %ld-%ld;%ld-%ld\n",pos1+lpair+4+sqoffset,pos1+lpair+8+sqoffset,
1918 pos1+lpair+16+sqoffset,pos1+lpair+20+sqoffset);
1919 }
1920
1921 fprintf(fpo,"TpsyC stem= %ld-%ld;%ld-%ld\n",pos+1+sqoffset,pos+5+sqoffset,pos+13+sqoffset,pos+17+sqoffset);
1922 if (strcmp(type_trna,"Ind") != 0)
1923 {
1924 fprintf(fpo,"tRNA predict as a tRNA- %s : anticodon %s\n", type_trna,
1925 anticodon);
1926 }
1927 else
1928 {
1929 fprintf(fpo,"anticodon includes unknown bases\n");
1930 }
1931
1932 if (lpair2 > 16)
1933 {
1934 posstart=pos1+lpair+15;
1935 posend= pos1+lpair+lpair2-2;
1936 fprintf(fpo,"potential intron between positions %ld %ld\n",posstart+sqoffset,
1937 posend+sqoffset);
1938 }
1939 fprintf(fpo,"number of base pairing in the anticodon stem= %d\n",ncomp);
1940 fprintf(fpo,"\n");
1941 }
1942 else
1943 {
1944
1945 /* Results on the complementary strand */
1946
1947 /* If no tRNA gene is predicted on the direct strand, then for the first
1948 tRNA gene predicted on the complementary strand the two following lines
1949 are printed in the output file */
1950
1951 if( (!(*match)) && ((*ntrna) == 1))
1952 {
1953 fprintf(fpo,"sequence name= %s\n", name);
1954 fprintf(fpo,"complementary strand\n");
1955 }
1956
1957 /* If tRNA genes are predicted on the direct strand, then for the first
1958 tRNA gene predicted on the complemetary strand the following line is
1959 printed in the output file */
1960
1961 else if ((*ntrna) == 1)
1962 {
1963 fprintf(fpo,"complementary strand\n");
1964 }
1965
1966 pos2= length-pos1+8;
1967 fprintf(fpo,"start position= %ld end position= %ld\n",pos2+sqoffset,pos2-lpair1-1+sqoffset);
1968 fprintf(fpo,"potential tRNA sequence= %s\n",chaine2);
1969
1970 fprintf(fpo,"D signal= %ld %ld TpsyC signal= %ld %ld\n",length-pos1+1+sqoffset,
1971 length-pos1-6+sqoffset,length-pos+1+sqoffset,length-pos-13+sqoffset);
1972 fprintf(fpo,"amino-acyl stem= %ld-%ld;%ld-%ld\n",pos2+sqoffset,pos2-6+sqoffset, pos2-lpair1+6+sqoffset,
1973 pos2-lpair1+sqoffset);
1974 fprintf(fpo,"D stem= %ld-%ld;%ld-%ld\n",length-pos1-1+sqoffset,length-pos1-3+sqoffset,
1975 length-pos1-lpair+1+sqoffset,length-pos1-lpair-1+sqoffset);
1976
1977 if (lpair2 > 16)
1978 {
1979 posstart=pos1+lpair+15;
1980 posend=pos1+lpair+lpair2-2;
1981 fprintf(fpo,"anticodon stem= %ld-%ld;%ld-%ld\n",length-pos1-lpair-3+sqoffset,
1982 length-pos1-lpair-7+sqoffset,length-posend-1+sqoffset,length-posend-5+sqoffset);
1983 }
1984 else
1985 {
1986 fprintf(fpo,"anticodon stem= %ld-%ld;%ld-%ld\n",length-pos1-lpair-3+sqoffset,
1987 length-pos1-lpair-7+sqoffset,length-pos1-lpair-lpair2+1+sqoffset,
1988 length-pos1-lpair-lpair2-3+sqoffset);
1989 }
1990
1991 fprintf(fpo,"TpsyC stem= %ld-%ld;%ld-%ld\n",length-pos+sqoffset,length-pos-4+sqoffset,
1992 length-pos-12+sqoffset,length-pos-16+sqoffset);
1993
1994 if (strcmp(type_trna,"Ind") != 0)
1995 {
1996 fprintf(fpo,"tRNA predict as a tRNA- %s : anticodon %s\n", type_trna,
1997 anticodon);
1998 }
1999 else
2000 {
2001 fprintf(fpo,"anticodon includes unknown bases\n");
2002 }
2003
2004 if (lpair2 > 16)
2005 {
2006 posstart=pos1+lpair+15;
2007 posend=pos1+lpair+lpair2-2;
2008 fprintf(fpo,"potential intron between positions %ld %ld\n",
2009 length-posstart+1+sqoffset ,length-posend+1+sqoffset);
2010 }
2011
2012 fprintf(fpo,"number of base pairing in the anticodon stem=%d\n",ncomp);
2013 fprintf(fpo,"\n");
2014 }
2015 }
2016
2017
2018 void
2019 set_search_params (Param_set_type *ps,
2020 int params)
2021 {
2022
2023 if (params == 1) {
2024 ps->tpc_sig_thresh = ST_TPC_SIG_THRESH;
2025 ps->d_sig_thresh = ST_D_SIG_THRESH;
2026 ps->sg_cutoff = ST_SG_CUTOFF;
2027 ps->tpc_inv = ST_TPC_INV;
2028 ps->tpc_incsg = ST_TPC_INCSG;
2029 ps->tpc_keep = ST_TPC_KEEP;
2030 ps->d_inv = ST_D_INV;
2031 ps->look_for_acloop_sg = ST_LOOK_FOR_ACLOOP_SG;
2032 ps->acloop_min = ST_ACLOOP_MIN;
2033 ps->aa_incsg = ST_AA_INCSG;
2034 ps->aa_keep = ST_AA_KEEP;
2035 }
2036 else if (params == 2) {
2037 ps->tpc_sig_thresh = RX_TPC_SIG_THRESH;
2038 ps->d_sig_thresh = RX_D_SIG_THRESH;
2039 ps->sg_cutoff = RX_SG_CUTOFF;
2040 ps->tpc_inv = RX_TPC_INV;
2041 ps->tpc_incsg = RX_TPC_INCSG;
2042 ps->tpc_keep = RX_TPC_KEEP;
2043 ps->d_inv = RX_D_INV;
2044 ps->look_for_acloop_sg = RX_LOOK_FOR_ACLOOP_SG;
2045 ps->acloop_min = RX_ACLOOP_MIN;
2046 ps->aa_incsg = RX_AA_INCSG;
2047 ps->aa_keep = RX_AA_KEEP;
2048 }
2049 else if (params == 3) {
2050 ps->tpc_sig_thresh = ALT_TPC_SIG_THRESH;
2051 ps->d_sig_thresh = ALT_D_SIG_THRESH;
2052 ps->sg_cutoff = ALT_SG_CUTOFF;
2053 ps->tpc_inv = ALT_TPC_INV;
2054 ps->tpc_incsg = ALT_TPC_INCSG;
2055 ps->tpc_keep = ALT_TPC_KEEP;
2056 ps->d_inv = ALT_D_INV;
2057 ps->look_for_acloop_sg = ALT_LOOK_FOR_ACLOOP_SG;
2058 ps->acloop_min = ALT_ACLOOP_MIN;
2059 ps->aa_incsg = ALT_AA_INCSG;
2060 ps->aa_keep = ALT_AA_KEEP;
2061 }
2062 else {
2063 fprintf (stderr,"tRNAscan1.4: FATAL: Unable to select search parameter set.\n");
2064 exit(1);
2065 }
2066
2067 }
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080