1 /*
2  * Copyright, 1991, The Regents of the University of California.
3  * This software was produced by the Los Alamos National Laboratory,
4  * which is operated by the University of California for the United
5  * States Department of Energy under contract W-7405-ENG-36.  The
6  * U. S. Government is licensed to use, reproduce, and distribute
7  * this software.  Permission is granted to the public to copy and
8  * use this software without charge, provided that this Notice and
9  * any statement of authorship are reproduced on all copies.
10  * Neither the Government nor the University makes any warranty,
11  * express or implied, or assumes any liability or responsibility
12  * for the use of this software.
13 */
14 
15 #define PROGRAM  "trnascan"
16 #define RELEASE "1.4 (Feb 96)"
17 #define DERIV_VERSION  "1.3 (Oct 91)"
18 #define BRIEF    "'Identification of tRNA genes in genomic DNA'"
19 #define CITATION "Fichant and Burks, J. Mol. Biol. (1991) 220:659-671."
20 #define MODIF "(modified & optimized for use in tRNAscan-SE package by T. Lowe  2/96)"
21 
22 /* Modified by T. Lowe  11/95  */
23 /* Changes:  1) Search parameters named in #define constants
24              2) Print statements added to help trace progress
25 	         of search - VERBOSE constant must be defined
26 		Trace statements sent to "tscan.verb.out"
27 	     3) Bug that caused program to crash on any non-ACGT
28 	        sequence characters fixed
29 	     4) fgetseq() modified to correctly read in fasta sequences
30              5) compstrand() modified to increase efficiency &
31 	        getseqsize() added to allow input sequences of any
32                 length (memory allowing)
33 	     6) Numerous calls to strlen(sequence) eliminated for
34 	        efficiency
35 	     7) Calls to myindex() function eliminated - replaced
36 	        by in-line switch statements for efficiency
37 	     8) basepairings() function rewritten for efficiency
38 	     9) program ANSI-fied to allow compilation with gcc
39             10) Fixed bug: indexing out of 'ntab' array bounds
40 	        causing unpredictable side-effects
41 
42 
43     **  Modifications result in over 200-fold speed increase **
44 
45     -T. Lowe  9/2000
46      added "-i" option to allow alternate start of sequence nuc numbering
47 
48 */
49 
50 /* #define NO_AMBIG       -use this option to eliminate conservative
51  *                         calling of 'N's as base pairing matches
52  *                         in tRNAs (this gives more false positives)
53  */
54 
55 static char banner[] = "trnascan: scan a sequence for tRNAs";
56 
57 char usage[] = "\n\
58 Usage: trnascan [-options] <seqfile>\n\
59 where supported options are:\n\
60 -s             : use original tRNAscan 1.3 parameters (default)\n\
61 -r             : use relaxed search parameters (used with tRNAscan-SE)\n\
62 -a             : use alternate (user-set) search parameters\n\
63 -c             : suppress credits\n\
64 -o <outfile>   : write results to <outfile>\n\
65 -i <integer>   : start sequence numbering at <integer> (def=1)\n\
66 -h             : print (this) short help message\n\n";
67 
68 #include <string.h>
69 #include <stdio.h>
70 #include <stdlib.h>
71 #include <ctype.h>
72 
73 #ifdef MEMDEBUG
74 #include "dbmalloc.h"
75 #endif
76 
77 #define TRUE 1
78 #define FALSE 0
79 
80 #ifndef TSCANDIR
81 #define TSCANDIR "/usr/local/lib/trnascan"
82 #endif
83 
84 
85 #define MAXLINE 1000    /* max input seq line length */
86 #define BUF_SIZE 100   /* extra room added onto allocated sequence */
87 		       /* over that determined by getseqsize()  */
88 
89 #define MAX(x,y) (((x) > (y)) ? (x) : (y))
90 
91 /* #define VERBOSE   */
92 
93                                   /* Original version 1.3 default parameters */
94 #define ST_SG_CUTOFF 5            /* general score (SG) cutoff */
95 #define ST_TPC_SIG_THRESH 0.40    /* TPC signal sequence matrix score */
96 			       /*   cutoff */
97 #define ST_D_SIG_THRESH 0.40      /* D signal sequence matrix score cutoff */
98 #define ST_TPC_INV 2        /* Number of TPC matrix invariant bases */
99 			       /*   allowed NOT to be invariant */
100 #define ST_TPC_INCSG 5         /* Number of base pairs required in TPC */
101 			       /*   stem to increment general score */
102 #define ST_TPC_KEEP 4          /* Number of base pairs required in TPC */
103 			       /*   stem to keep trna as a candidate */
104 #define ST_D_INV 1          /* Number of D matrix invariant bases */
105 			       /*   allowed NOT to be invariant */
106 #define ST_LOOK_FOR_ACLOOP_SG 4   /* Minimum SG required to begin looking */
107 			       /*   for anticodon loop */
108 #define ST_ACLOOP_MIN 4            /* Minimum base pairs required in */
109 			       /*   anticodon loop */
110 #define ST_AA_INCSG 7          /* Number of base pairs in Amino acyl */
111 			       /*   stem needed to increment SG */
112 #define ST_AA_KEEP 6           /* Number of base pairs in Amino acyl */
113 			       /*   stem needed to keep tRNA candidate */
114 
115                         /* "Relaxed" parameters - used by default with */
116 			/* tRNAscan-SE program.  Makes tRNAscan into */
117 			/* rough pre-filter for Covariance tRNA prediction */
118 			/* program by S. Eddy  */
119 #define RX_SG_CUTOFF 5
120 #define RX_TPC_SIG_THRESH 0.40
121 #define RX_D_SIG_THRESH 0.30
122 #define RX_TPC_INV 2
123 #define RX_TPC_INCSG 4
124 #define RX_TPC_KEEP 2
125 #define RX_D_INV 2
126 #define RX_LOOK_FOR_ACLOOP_SG 3
127 #define RX_ACLOOP_MIN 3
128 #define RX_AA_INCSG 5
129 #define RX_AA_KEEP 4
130 
131                          /* "Alternate" params - for experimenting */
132 			 /*    with other param values */
133 #define ALT_SG_CUTOFF 4
134 #define ALT_TPC_SIG_THRESH 0.40
135 #define ALT_D_SIG_THRESH 0.30
136 #define ALT_TPC_INV 2
137 #define ALT_TPC_INCSG 4
138 #define ALT_TPC_KEEP 2
139 #define ALT_D_INV 2
140 #define ALT_LOOK_FOR_ACLOOP_SG 3
141 #define ALT_ACLOOP_MIN 3
142 #define ALT_AA_INCSG 6
143 #define ALT_AA_KEEP 4
144 
145 #define MIN_VAR_LOOP 28     /* Minimum variable loop size, assumes min */
146 			    /*   intron length = 8bp */
147 #define MAX_INTRON_LEN 60   /* Maximum allowable intron length */
148 #define MIN_SEQ_LEN 70      /* Minimum length of sequence that will be */
149 			    /*  for tRNAs */
150 
151 #define STRICT_PARAMS 1;
152 #define RELAXED_PARAMS 2;
153 #define ALT_PARAMS 3;
154 
155 
156 typedef struct pset {
157   float tpc_sig_thresh, d_sig_thresh;
158   int sg_cutoff,
159   tpc_inv, tpc_incsg, tpc_keep,
160   d_inv,
161   look_for_acloop_sg, acloop_min,
162   aa_incsg, aa_keep;
163 } Param_set_type;
164 
165 Param_set_type ps;
166 
167 
168 void
169 set_search_params (Param_set_type *ps,
170 		   int params);
171 
172 /* Subroutine that accomplishes the end of the test for the presence of a
173    tRNA gene */
174 
175 void
176 following_search(long int pos,    /* first position of the found T-Psi-C signal */
177 		 long int pos1,   /* first position of the found D signal */
178 		 char *ptr1,      /* pointer to the first position of the D signal */
179 		 char *ptr3,      /* ptr3=ptr1+2  pointer to first position of the D arm  */
180 		 int lpair,
181 		 int nloop,       /*nloop=0 test of the sequence, */
182 				  /*nloop=1 test of the complementary sequence */
183 		 char *sequence,
184 		 long int seqlen,
185 		 FILE *fpo,       /*pointer to the output file */
186 		 FILE *fpverb,
187 		 char *name,
188 		 int score,       /* Value of the general score SG */
189 		 int match2,     /* integer testing the presence of D */
190 				 /* arm with 3 base-pairings */
191 		 int *ntrna,
192 		 int *npred,
193 		 int *match,
194 		 long int sqoffset   /* offset nucleotide numbering by this much (set with -i param) */
195 		 );
196 
197 
198 /* Subroutine to read the consensus matrix */
199 
200 void
201 lectval(FILE *fp,                  /* pointer to the consensus matrix file */
202 	float (*table_cons)[4],    /* table containing the frequency of each base
203 				      at each position of the signal */
204 	int (*table_inv)[2],  /* table containing the position and the nature of
205 				 the invariant bases found in the signal. Code for
206 				 the bases: A=0, C=1, G=2 and T=3. */
207 	int *lsig,            /*lsig=length of the signal */
208 	int *ktot,            /* ktot= number of invariant bases */
209 	float *maxtot         /* maxtot= sum of the maximum frequencies */
210 	);
211 
212 /* Subroutine reading the sequence */
213 /* Modified to correctly read FASTA sequence files */
214 
215 int fgetseq(char *name,           /* string w/name of the sequence */
216 	    char *sequence,       /* character string containing the sequence */
217 	    long int *seqlen,     /* length of sequence */
218 	    FILE *fpi);            /* input file pointer */
219 
220 
221 /* Subroutine reading & returning the sequence length */
222 
223 int getseqsize(FILE *fpi);       /* input file pointer */
224 
225 
226 /* Subroutine looking for the presence of a given signal, returns 1 if a
227    signal is found and 0 otherwise. It also return the table 'weight'
228    containing the frequencies of the oberved bases in the windowed
229    sequence and the number 'ninv' of invariant bases found in the windowed
230    sequence*/
231 
232 int readsignal(char *ptr,      /* pointer to the sequence */
233 	       int (*table_inv)[2],  /* table containing the position and nature of the
234 					invariant bases found in the consensus matrix */
235 	       int *lsig,   /* lsig= length of the signal */
236 	       int *ktot,   /* ktot= number of invariant bases in the consensus matrix */
237 	       float *weight,       /* table containing the frequencies of the observed
238 				       base at each position of the
239 				       windowed sequence tested */
240 	       float (*table_cons)[4],  /* table corresponding to the consensus matrix */
241 	       int *ninv,               /* ninv= number of invariant */
242 					/* bases in the windowed  sequence */
243 	       int threshold_inv);       /* Number of invariant bases */
244 					/* allowed not to b e invariant */
245 
246 
247 /* Subroutine that calculates the similarity score on the potential signal
248    previously retained by the subroutine readsignal. This subroutine
249    returns 1 it the computed score is greater or equal to the defined
250    threshold and 0 otherwise. It returns also the value of the computed
251    score (score) */
252 
253 int scoring(float *weight, /* table containing the frequencies of the observed base
254 			      at each position of the potential signal */
255 	    int lsig,      /* length of the signal */
256 	    float max,     /* sum of the maximum frequencies found in */
257 			   /* the consensus matrix */
258 	    int ktot,      /* number of invariant bases found in the consensus matrix */
259 	    float *score,  /* value of the computed score on the potential signal */
260 	    float ThresholdValue,   /* defined threshold for the similarity score */
261 	    int ninv   /* number of invariant bases found in the potential signal */
262 	    );
263 
264 /* Subroutine looking for base-pairings between two parts of the sequence.
265    It returns the number of base-pairings found (ncomp) */
266 
267 void
268 basepairing(char *ptr,   /* pointer to the sequence */
269 	    int npair,   /* number of base-pairings forming a given arm */
270 	    int lpair,   /* number of nucleotides found between the first position of the
271 			    first part of the sequence involved in the stem and the last
272 			    position of the second part of the
273 			    sequence involved in the stem */
274 	    int *ncomp   /* number of base-pairings observed between the two parts of
275 			    the sequence tested */
276 	    );
277 
278 
279 /* Subroutine that complements the sequence. It returns the complementary
280    sequence to the main */
281 
282 void
283 compstrand(char **sequence,   /* pointer to the sequence string */
284 	   long int seqlen    /* sequence length   */
285 	   );
286 
287 
288 /* Subroutine that codes the anticodon signal sequence by a number
289    comprised between 1 and 65. It returns this number. */
290 
291 void
292 codage(char *anticodon,    /* anticodon signal sequence */
293        int length1,        /* length of the anticodon signal, lenght1=3 */
294        int *num            /* number associated to the anticodon signal sequence */
295        );
296 
297 
298 /* Subroutine that determines the tRNA gene family */
299 
300 void
301 corresaa(int num,           /* Number coding the anticodon signal sequence */
302 	 char *type_trna    /* tRNA gene family */
303 	 );
304 
305 
306 /* Subroutine that prints the results of the search */
307 
308 void
309 printresult(FILE *fpo,     /* output file pointer */
310 	    FILE *fpverb,   /* character string for the name of the sequence */
311 	    char *name,
312 	    long int pos1,    /* first position of the D signal */
313 	    long int pos,     /* first position of the T-Psi-C signal */
314 	    int lpair,        /* number of nucleotides between the */
315 			      /* first position  of the D arm and the last one */
316 	    int lpair1,     /* number of nucleotides between the first position of the
317 			       aminoacyl arm and the last one */
318 	    int lpair2,     /* number of nucleotides between the first position of the
319 			       anticodon arm and the last one */
320 	    int nloop,      /* nloop=0, scanning of the direct strand; nloop=1 scanning of
321 			       the complementary strand */
322 	    int *ntrna,     /* number of tRNA genes predicted in the sequence */
323 	    char *chaine2,  /* character string for the predicted tRNA gene sequence */
324 	    char *sequence,    /* character string containing the sequence tested */
325 	    long int length,   /* length of the sequence */
326 	    int *match,      /* match=1 if at least one tRNA gene has been found on the
327 				direct strand and 0 otherwise */
328 	    int ncomp,       /* number of base-pairings in the anticodon arm of the
329 				predicted tRNA gene */
330 	    char *type_trna,     /* character string for the tRNA gene family */
331 	    char *anticodon,      /* character string for the anticodon signal sequence */
332 	    long int sqoffset   /* offset nucleotide numbering by this much (set with -i param) */
333 	    );
334 
main(int argc,char ** argv)335 main(int argc, char **argv)
336 {
337   /* pointers to the different files fpi=input file, fpo=output file,
338    fpcons1= T-Psi-C matrix file, fpcons2= D matrix file */
339 
340   extern Param_set_type ps;   /* search parameters */
341 
342   FILE *fpi,*fpo,*fpcons1,*fpcons2, *fpverb;
343 
344   /* lsig1= length of the T-Psi-C signal, lsig2= length of the D signal
345      ktot1= number of invariant bases in the T-Psi-C signal
346      ktot2= number of invariant bases in the D signal */
347 
348   int lsig1=0, lsig2=0, ktot1=0, ktot2=0;
349 
350   long int pos,pos1, begin; /* start position of the T-Psi-C signal (pos)
351 			       and D signal (pos1) and of the search for the
352 			       D signal (begin) */
353   int npair=0, lpair=0; /* variables used to test the presence of a stem,
354 			   see definition further in the program */
355   float table_cons1[30][4]; /* table containing the frequency of each base
356 			       at each position of the T-Psi-C signal */
357   float table_cons2[30][4]; /* table containing the frequency of each base
358 			       at each position of the D signal */
359   float maxtot1=0, maxtot2=0; /* sum of the maximum frequencies found in
360 				 the T-Psi-C matrix (maxtot1) and in the D matrix
361 				 (maxtot2) */
362   float weight1[30]; /* table containing the frequency of the observed base
363 			at each position of the windowed sequence tested
364 			(T-Psi-C matrix) */
365   float weight2[30]; /* table containing the frequency of the observed base
366 			at each position of the windowed sequence tested
367 			(D matrix) */
368   int table_inv1[30][2]; /* table containing the position and the nature of
369 			    the invariant bases found in the T-Psi-C matrix */
370   int table_inv2[30][2]; /* table containing the position and the nature of
371 			    the invariant bases found in the D matrix */
372   int ntab[5][2]; /* table containing for each potential D arm found in the
373 		     windowed sequence, the number of base-pairings and the
374 		     number of nucleotides that separates the first position
375 		     of the D arm and the last one */
376   char *sequence; /* character string containing the sequence */
377   long int seqlen; /* keeps the length of the current sequence */
378   long int sqoffset=0;  /* start numbering nucleotides by this offset (def=0) */
379   char  *ptr,*ptr1,*ptr3, *ptrstart; /* pointers to the sequence, their
380 					definitions are given at the place
381 					they are used in the program */
382   char name[80]; /* character string containing the name of the sequence */
383   char tscan_dir[120];  /* holds name of directory of consensus files */
384 
385   int ntrna; /* number of tRNA genes predicted in the sequence and its
386 		complement */
387   int npred=0; /* Total number of tRNA genes predicted in the sequences of
388 		  the input file */
389   int nseq=0; /* Number of sequences tested (number of sequences of the
390 		 input file) */
391   long int lseq=0; /* Number of nucleotides tested */
392   int score; /* variable corresponding to the general score SG */
393   int ninv=0; /* number of invariant bases oberved in the windowed sequence
394 		 for each signal */
395   int ncomp=0; /* number of base-pairings observed in the sequence tested
396 		  for the presence of one of the four arms */
397   int threshold_inv; /* number of invariant bases that are allowed not to
398 			be invariant */
399   int match; /* if at least one tRNA gene is predicted of the direct strand,
400 		match=1, if no tRNA gene is found on the direct strand, match=0 */
401   int first_score=0,match2,h, nloop;
402 
403   /* ThresholdValue1= threshold value of the similarity score used to retain
404      a T-Psi-C signal
405      ThresholdValue2= threshold value of the similarity score used to retain
406      a D signal
407      Changing the threshold value of the similarity score, MODIFY '0.4' in
408      the following line. */
409 
410   float ThresholdValue1, ThresholdValue2;
411   float score1=0, score2=0;
412   int i,j;
413 
414   int params;
415   int suppress_credits;      /* flag for display of credits */
416   char *seqfile,   /* name of input sequence file  */
417     *outfile,   /* name of output file (if not sent to stdout) */
418     *verbfile;  /* name of file for verbose output */
419 
420   int          optc;
421   extern char *optarg;          /* for getopt() */
422   extern int   optind;		/* for getopt() */
423 
424 /***********************************************
425  * Parse command line
426  ***********************************************/
427 
428   suppress_credits = FALSE;
429   params        = STRICT_PARAMS;
430   outfile          = NULL;
431   verbfile         = NULL;
432 
433 while ((optc = getopt(argc, argv, "csraho:v:i:")) != -1)
434   switch (optc) {
435 
436   case 'c': suppress_credits = TRUE; break;
437   case 's': params = STRICT_PARAMS;  break;
438   case 'r': params = RELAXED_PARAMS;  break;
439   case 'a': params = ALT_PARAMS;  break;
440   case 'o': outfile   = optarg;  break;
441   case 'v': verbfile  = optarg;  break;
442   case 'i': sqoffset = atoi(optarg)-1; break;
443 
444   case 'h':
445     printf("%s\n  version %s\n%s\n", banner, RELEASE, usage);
446     exit(0);
447   default:
448     fprintf(stderr,"unrecognized option %c\n", optc);
449     exit(1);
450   }
451 
452 if (argc - optind != 1) {
453   fprintf(stderr,"%s\n", usage);
454   exit(1);
455 }
456 
457 seqfile = argv[argc-1];
458 
459 
460 /* open the sequence file */
461 
462 if ((fpi=fopen(seqfile,"r")) == NULL)
463   {
464   fprintf(stderr,"tRNAscan1.4: FATAL:  Cannot open the input sequence file %s\n",seqfile);
465   exit(1);
466   }
467 
468 /* open the output file */
469 
470 if (outfile == NULL) {
471   fpo = stdout;
472 }
473 else if ((fpo=fopen(outfile,"w")) == NULL) {
474   fprintf(stderr,"tRNAscan1.4: FATAL: Cannot open the output file %s\n",outfile);
475   exit(1);
476 }
477 
478 /* open Verbose output file */
479 if (verbfile != NULL)
480   if ((fpverb=fopen(verbfile,"w")) == NULL) {
481     fprintf(stderr,"tRNAscan1.4: Cannot open verbose output file %s\n",verbfile);
482     exit(1);
483 }
484 
485 /* open the consensus matrix file for the T-Psi-C signal */
486 
487 strcpy(tscan_dir,TSCANDIR);
488 
489 if (((fpcons1=fopen("TPCsignal","r")) == NULL) &&
490     ((fpcons1=fopen(strcat(tscan_dir,"/TPCsignal"),"r")) == NULL))
491   {
492   fprintf(stderr,"tRNAscan1.4: main cannot open TPCsignal consensus file\n");
493   exit(1);
494   }
495 
496 /* open the consensus matrix file for the D signal */
497 
498 strcpy(tscan_dir,TSCANDIR);
499 
500 if (((fpcons2=fopen("Dsignal","r")) == NULL) &&
501     ((fpcons2=fopen(strcat(tscan_dir,"/Dsignal"),"r")) == NULL))
502   {
503   fprintf(stderr,"tRNAscan1.4: main cannot open Dsignal consensus file\n");
504   exit(1);
505   }
506 
507 
508 /* Set search parameters */
509 
510 set_search_params(&ps,params);
511 
512 ThresholdValue1 = ps.tpc_sig_thresh;
513 ThresholdValue2 = ps.d_sig_thresh;
514 
515 /* Credits   */
516 
517 if (!suppress_credits) {
518   printf("\n %s %s -- derived from version %s\n", PROGRAM, RELEASE,
519 	 DERIV_VERSION);
520   printf("\n Please cite: %s\n %s\n", BRIEF,CITATION);
521   printf(" %s\n\n",MODIF);
522 }
523 
524 /* reading the two consensus file */
525 
526 lectval(fpcons1,table_cons1,table_inv1,&lsig1,&ktot1,&maxtot1);
527 lectval(fpcons2,table_cons2,table_inv2,&lsig2,&ktot2,&maxtot2);
528 
529 
530 /* main loop for reading & analyzing one sequence at a time */
531 
532 while (!feof(fpi)) {
533 
534 /* find out sequence size before allocating memory & reading in */
535 
536   seqlen = getseqsize(fpi);
537   sequence= (char *) calloc (seqlen+BUF_SIZE,sizeof(char));
538   if (sequence == NULL ) {
539     fprintf(stderr,"tRNAscan1.4: Can't malloc for sequence\n");
540     exit(-1);
541   }
542 
543 /* reading the name of the sequence and the sequence from the input file */
544 
545   if (fgetseq(name,sequence,&seqlen,fpi) == 0)
546     break;
547 
548   nseq++;
549   lseq += seqlen;
550 
551   match=0;
552   for (nloop=0; nloop <=1; nloop++)
553     {
554 /* if the sequence is shorter than 70 bp long it is skipped */
555 
556     if (seqlen < MIN_SEQ_LEN)
557     break;
558 
559     ntrna=0;
560 
561 /*  search of tRNA genes starts
562     pos= first position of the T-Psi-C signal
563     ptr= pointer to the first position of the T-Psi-C signal */
564 
565     for (pos=44, ptr=(sequence+43); pos<seqlen-23;pos++, ptr++)
566       {
567       score=0;
568 
569 #ifdef VERBOSE
570 	  fprintf(fpverb,"--- Moving TPC window (pos=%d)\n",pos);
571 #endif
572 
573 
574 /*change in the number of invariant bases that must be present in the
575   T-Psi-C signal, MODIFY the value of threshold_inv here */
576 
577       threshold_inv= ps.tpc_inv;
578 
579 /*search for the T-Psi-C signal */
580 
581       if(readsignal(ptr,table_inv1,&lsig1,&ktot1,weight1,table_cons1,&ninv,
582         threshold_inv))
583         {
584 
585 /*the general score SG (score) is incremented by 1, if the number of
586   invariant bases (ninv) in the found T-Psi-C signal >= 3 (ktot1=4)
587   change in the threshold of the T-Psi-C signal for the increment of SG,
588   MODIFY the following line */
589 
590         if (ninv >= ktot1-1)  {
591 	  score++;
592 #ifdef VERBOSE
593 	  fprintf(fpverb,"+ TPC invariant bp: %d.  SG++\n",ninv); }
594 	else  {
595 	  fprintf(fpverb,"X TPC invariant bp: %d.  NO SG++\n",ninv);
596 #endif
597 	}
598 
599 /* Computation of the score on the found T-Psi-C signal, signal retains if
600    the computed score is >= ThresholdValue1 */
601 
602         if(scoring(weight1,lsig1,maxtot1,ktot1,&score1,ThresholdValue1,
603 		   ninv))
604           {
605 #ifdef VERBOSE
606 	   fprintf(fpverb,"+ TPC signal over thresh: %f\n",score1);
607 #endif
608 
609 
610 /* npair= number of base-pairings in the T-Psi-C stem
611    lpair= number of nucleotides between the first position of the T-Psi-C
612           arm and the last position of the T-Psi-C arm */
613 
614           npair = 5;
615           lpair = 16;
616 
617 /* Test for the presence of the T-Psi-C stem */
618 
619           basepairing(ptr+1,npair,lpair,&ncomp);
620 
621 /* If the number of base-pairings (ncomp) equal 5, SG (score) incremented
622    by 1. Change in the threshold of base-pairings in the T-Psi-C arm for
623    the increment of SG, MODIFY the value '5' in the following line */
624 
625           if (ncomp >= ps.tpc_incsg) {
626             score++;
627 #ifdef VERBOSE
628  	    fprintf(fpverb,"+ TPC parings (%d) add to SG=%d\n",ncomp,score); }
629 	  else  {
630  	    fprintf(fpverb,"X TPC parings (%d) NO add to SG=%d\n",ncomp,score);
631 #endif
632 	  }
633 
634 /* Change in the threshold to retain a stem as a potential T-Psi-C arm,
635    MODIFY the value '4' in the following line */
636 
637 #ifdef VERBOSE
638           if (ncomp < ps.tpc_keep)  {
639 	    fprintf(fpverb,"X TPC parings retain loop: %d\n",ncomp); }
640 	  else {
641 	    fprintf(fpverb,"+ TPC parings retain loop: %d\n",ncomp);
642 #else
643           if (ncomp >= ps.tpc_keep)  {
644 #endif
645 
646 
647 /* For the same found T-Psi-C region, different potential D region can be
648    found because of the possible different lengthes of the D loop. As the
649    search for the tRNA gene is applied for each potential D arm, the value
650    of SG computed on the T-Psi-C region is stored in 'first_score' */
651 
652             first_score=score;
653 
654 /*Change in the number of invariant bases to be present in the D signal,
655   MODIFY the value of threshold_inv here */
656 
657             threshold_inv= ps.d_inv;
658 
659 /* Search for the presence of a D region between -120 and -37 nucleotides
660    upstream from the found T-Psi-C region. 37 nucleotides is the observed
661    minimum length assuming no insertion in the D and variable loop. 120
662    nucleotides allows for an intron of up to 60 nucleotides.
663    Change in the length of the intron, MODIFY the value '120' in the
664    definition of begin and ptrstart.
665    ptr1= pointer to the first position of the D signal
666    pos1= first position of the D signal
667    begin= starting position for the search of the D signal
668    ptrsart= pointer to the starting position for the search of the D signal */
669 
670 /* If the sequence does not have 127 nucleotides upstream of the T-Psi-C
671    signal, the search for the D signal starts at position 8 */
672 
673             if (pos <=127)
674               {
675                begin = 8;
676                ptrstart = sequence+7;
677               }
678             else
679               {
680                begin = pos-(MAX_INTRON_LEN+60);
681                ptrstart = ptr-(MAX_INTRON_LEN+60);
682                }
683 
684             for(pos1=begin,ptr1=ptrstart; pos1<=(pos-37);pos1++,
685                 ptr1++)
686               {
687 
688 /* Search for the D signal */
689 
690 #ifdef VERBOSE
691               if(!readsignal(ptr1,table_inv2,&lsig2,&ktot2,weight2,
692                  table_cons2,&ninv,threshold_inv))
693                 {
694 		  fprintf(fpverb,"X D invariant bp: %d\n",ninv); }
695 	      else
696 		{
697 		  fprintf(fpverb,"+ D invariant bp: %d\n",ninv);
698 #else
699               if(readsignal(ptr1,table_inv2,&lsig2,&ktot2,weight2,
700                  table_cons2,&ninv,threshold_inv))
701                 {
702 #endif
703 
704 
705 /* Number of invariant bases found in the windowed sequence equal to the
706    number of invariant bases of the matrix (ktot2=3), then SG (score) is
707    incremented by 1
708    Change in the increment of SG for the D signal, MODIFY the following
709    line */
710 
711 		  if (ninv >= ktot2) {
712 		    score++;
713 #ifdef VERBOSE
714 		    fprintf(fpverb,"+ D invariant (%d) inc SG\n",ninv); }
715 		  else {
716 		    fprintf(fpverb,"X D invariant (%d) NO add to SG\n",ninv);
717 #endif
718 		  }
719 
720 /* Computation of the score on the found D signal, signal retains if the
721    computed score is >= TresholdValue2 */
722 
723 #ifdef VERBOSE
724 		if(!scoring(weight2,lsig2,maxtot2,ktot2,&score2,
725                    ThresholdValue2,ninv))
726                   {
727 		    fprintf(fpverb,"X D signal threshold: %f\n",score2); }
728 		else  {
729 		  fprintf(fpverb,"+ D signal threshold: %f\n",score2);
730 #else
731 		if(scoring(weight2,lsig2,maxtot2,ktot2,&score2,
732                    ThresholdValue2,ninv))
733                   {
734 #endif
735 
736 
737 /* ptr3= pointer pointing on the first position of the D arm
738    npair= number of base-pairings in the D stem
739    lpair= number of nucleotides between the first position of the D arm
740    and the last one. As the D loop presents some variation in length,
741    lpair can take different values:
742    lpair=14 smallest length of the D loop
743    lpair=18 greatest length of the D loop */
744 
745                   ptr3=ptr1+2;
746                   npair = 3;
747                   for (i=0; i < 5; i++)
748                      for (j=0; j < 2; j++)
749                         ntab[i][j]=0;
750 
751                   for (lpair=14,h=0; lpair <=18; lpair++,h++)
752                     {
753 
754 /* Search for the presence of the D arm */
755 
756 		      basepairing(ptr3,npair,lpair,&ncomp);
757 
758 /* For each potential stem found, the number of base-pairings (ncomp) and
759    lpair are stored in ntab */
760 
761 		      ntab[h][0]=ncomp;
762 		      ntab[h][1]=lpair;
763                     }
764 
765                   match2=0;
766                   for (h=0; h < 5;h++)
767                     {
768 
769 /* If stems with 3 base-pairings are found for the D arm, the following
770    steps of the algorithm are applied on each of these potential stems.
771    The 2 base-pairings stems are discarded */
772 
773                     if (ntab[h][0] == 3)
774                       {
775                       lpair=ntab[h][1];
776                       match2=1;
777                       following_search(pos,pos1,ptr1,ptr3,lpair,nloop,
778                       sequence,seqlen,fpo,fpverb,name,score,match2,&ntrna,&npred,&match,sqoffset);
779                       }
780                     }
781 
782 /* If no stems with 3 base-pairings have been found, then the stems with 2
783    base-pairings are used in the following steps */
784 
785                   if (!match2)
786                     {
787                     for (h=0; h < 5;h++)
788                       {
789                       if (ntab[h][0] == 2)
790                         {
791                         lpair=ntab[h][1];
792                         following_search(pos,pos1,ptr1,ptr3,lpair,nloop,
793                         sequence,seqlen,fpo,fpverb,name,score,match2,&ntrna,&npred,&match,sqoffset);
794                         }
795                       }
796                     }
797                   }
798                 score=first_score;
799                 }
800               }
801             }
802           }
803         }
804       }
805 
806 /* The sequence is complemented and the algorithm is applied on the
807    complementary strand */
808 
809     compstrand(&sequence,seqlen);
810 
811 #ifdef VERBOSE
812 	   if (nloop == 0)
813 	     fprintf(fpverb,"\n== Trying COMPLEMENTARY strand\n");
814 #endif
815 
816     }
817 
818   free(sequence);  /* free up mem to ready for next sequence */
819 
820   }
821 fprintf(fpo,"number of sequences= %d\n", nseq);
822 fprintf(fpo,"number of bases tested (one strand)=%ld\n", lseq);
823 lseq = 2* lseq;
824 fprintf(fpo,"number of bases tested (both strands)= %ld\n", lseq);
825 fprintf(fpo,"number of predicted tRNA=%d\n", npred);
826 exit(0);
827 }
828 
829 
830 /* Subroutine that accomplishes the end of the test for the presence of a
831    tRNA gene */
832 
833 void
834 following_search(long int pos,    /* first position of the found T-Psi-C signal */
835 		 long int pos1,   /* first position of the found D signal */
836 		 char *ptr1,      /* pointer to the first position of the D signal */
837 		 char *ptr3,      /* ptr3=ptr1+2  pointer to first position of the D arm  */
838 		 int lpair,
839 		 int nloop,       /*nloop=0 test of the sequence, */
840 				  /*nloop=1 test of the complementary sequence */
841 		 char *sequence,
842 		 long int seqlen,
843 		 FILE *fpo,       /*pointer to the output file */
844 		 FILE *fpverb,
845 		 char *name,
846 		 int score,       /* Value of the general score SG */
847 		 int match2,     /* integer testing the presence of D */
848 				 /* arm with 3 base-pairings */
849 		 int *ntrna,
850 		 int *npred,
851 		 int *match,
852 		 long int sqoffset   /* offset nucleotide numbering by this much (set with -i param) */
853 		 )
854 
855 {
856 extern Param_set_type ps;
857 
858 char chaine2[300]; /* character string containing the predicted tRNA gene
859                    sequence */
860 char anticodon[4]; /* character string containing the anticodon signal
861                    sequence */
862 char type_trna[4]; /* character string containing the tRNA gene family */
863 char *ptr2,*ptr4,*ptr5; /*pointers to the sequence, see their definition
864                         below*/
865 int score2, score1; /* variables for the general score SG */
866 int npair1=0; /* number of base-pairings in the aminoacyl arm */
867 int lpair1=0; /* number of nucleotides between the first position of
868               the aminoacyl arm and the last one */
869 int lpair2=0; /* number of nucleotides between the first position of
870               the anticodon arm and the last one */
871 int npair2=0; /* number of base-pairings in the anticodon arm */
872 int pos6=0; /* number of nucleotides found between the first position of
873             the anticodon arm and the first position of the T-Psi-C
874             signal. */
875 int pos4=0; /* number of nucleotides present in the variable loop.*/
876 int length1=3; /* length of the anticodon signal */
877 int num=0; /* variable that codes the anticodon signal sequence
878            (comprised between 1 and 65). */
879 int match1; /* match1=1 if a tRNA without intron has been predicted
880             and 0 otherwise*/
881 int ncomp=0; /* number of base-pairings observed in the sequence tested
882              for the presence of a given arm */
883 int i;
884 
885 #ifdef VERBOSE
886   fprintf(fpverb,"IN following search...\n");
887 #endif
888 
889 score1=score;
890 
891 /* If match2=1, the found D arm present 3 base-pairings, the general score
892    SG (score1) is incremented by 1
893    Change in the threshold of base-pairings in the D arm for the increment
894    of SG, MODIFY the following line */
895 
896 
897 if (match2) {
898   score1++;
899 #ifdef VERBOSE
900   fprintf(fpverb,"+ D arm found 3 bp\n"); }
901 else {
902   fprintf(fpverb,"X D arm found less than 3 bp\n");
903 #endif
904 }
905 npair1=7; /* Number of base-pairings in the aminoacyl arm */
906 lpair1=pos-pos1+8+23; /* Number of nucleotides between the first
907                       position of the aminoacyl arm and the last one */
908 ptr2=(ptr1-7); /* pointer to the first position of the amino acyl arm */
909 
910 /* Test for the presence of the aminoacyl arm */
911 
912 basepairing(ptr2,npair1,lpair1,&ncomp);
913 
914 /* If the number of base-pairings (ncomp) equal 7, the general score SG
915    (score1) is incremented by 1 .
916    Change in the threshold of base-pairings in the aminoacyl arm for the
917    increment of SG, MODIFY the value '7' in the following line */
918 
919 if (ncomp >= ps.aa_incsg) {
920   score1++;
921 #ifdef VERBOSE
922   fprintf(fpverb,"+ AA arm found %d base pairings\n",ncomp); }
923 else {
924   fprintf(fpverb,"X AA arm found %d base pairings\n",ncomp);
925 #endif
926 }
927 
928 /* Change in the threshold to retain a stem as a potential aminoacyl arm
929    MODIFY the value '6' in the following line */
930 
931 if (ncomp >= ps.aa_keep)
932   {
933   i = 0;
934   while (i< 300)
935     {
936     chaine2[i]='\0';
937     i++;
938   }
939 
940 /* If the general score SG is >= 4, the algorithm looks for the presence of
941    an anticodon stem, otherwise the algorithm is initiated again on the
942    following windowed sequence */
943 
944   if (score1 >= ps.look_for_acloop_sg)
945     {
946 #ifdef VERBOSE
947       fprintf(fpverb,"Looking for anticodon stem\n");
948 #endif
949 
950       ptr4=ptr3+lpair+2; /* pointer to the first position of the anticodon arm */
951     match1=0;
952     npair2=5; /* Number of base-pairings in the anticodon arm */
953     lpair2=16; /* Number of nucleotides between the first position of the
954                anticodon arm and the last position */
955 
956 /* Test for the presence of an anticodon arm */
957 
958     basepairing(ptr4,npair2,lpair2,&ncomp);
959 
960 /* If 4 or 5 base-pairings are observed, the windowed sequence is retained
961    as a potential anticodon arm */
962 
963     if (ncomp >= ps.acloop_min)
964       {
965       if (nloop == 0)
966         *match=1;
967       i=0;
968       while (i< 4)
969         {
970         anticodon[i]='\0';
971         type_trna[i]='\0';
972         i++;
973         }
974 
975 /* Test of the presence of the residue T at the position preceding the
976    anticodon signal. If the residue T is present, the general score is
977    incremented by 1.*/
978 
979       if ((*(ptr4+6)) == 't')  {
980         score1++;
981 #ifdef VERBOSE
982 	fprintf(fpverb,"+ Invariant T found in anticodon. SG++\n"); }
983       else  {
984 	fprintf(fpverb,"X Invariant T NOT found in anticodon. No SG inc\n");
985 #endif
986       }
987 
988 /* If SG >= 5, the windowed sequence is retained as a potential tRNA gene.
989    The tRNA gene predicted at that level is without intron */
990 
991 #ifdef VERBOSE
992       if (score1 < ps.sg_cutoff)  {
993 	fprintf(fpverb,"X Under SG threshold: %d\n",score1); }
994       else  {
995 	fprintf(fpverb,"+ Over SG threshold: %d\n",score1);
996 #else
997       if (score1 >= ps.sg_cutoff)  {
998 #endif
999 
1000 
1001 /* Identification of the tRNA gene family */
1002 
1003         strncpy(anticodon,ptr4+7,3);
1004         codage(anticodon,length1,&num);
1005         corresaa(num,type_trna);
1006         strncpy(chaine2,(ptr1-7),lpair1+2);
1007 
1008         (*ntrna)++;
1009         (*npred)++;
1010         match1=1; /* match1=1 indicates that a tRNA gene without
1011                   intron has been predicted */
1012 
1013 /* The results are printed in the output file */
1014 
1015         printresult(fpo,fpverb,name,pos1,pos,lpair,lpair1,lpair2,nloop,ntrna,
1016         chaine2,sequence,seqlen,match,ncomp,type_trna,anticodon,sqoffset);
1017       }
1018     }
1019 
1020 /* If no anticodon arm has been found for a value of lpair2=16, i. e. an
1021    anticodon loop of 7 nucleotides (without intron), the algorithm searchs
1022    for the presence of an anticodon arm assuming the presence of an intron
1023    in the anticodon loop */
1024 
1025 /* pos6 corresponds to the number of nucleotides found between the first
1026    position of the anticodon arm and the first position of the T-Psi-C
1027    signal. */
1028 
1029 #ifdef VERBOSE
1030     fprintf(fpverb,"+ Looking for anticodon stem WITH intron...\n");
1031 #endif
1032 
1033     pos6=pos-(pos1+lpair+3);
1034 
1035 /* The search for the presence of an anticodon stem is initiated only if an
1036    intron of at least 8 bases long is expected to be present. The value 28
1037    for pos6 corresponds to the number of nucleotides found for the smallest
1038    length of the variable loop and for an intron length of 8 nucleotides.
1039    Change in the smallest length of the intron, MODIFY the value of pos6
1040    in the following line and also the start value of lpair2 (lpair2=24)
1041    in the loop (lpair2 is the number of nucleotides found between the first
1042    position of the anticodon arm and the last position of the anticodon arm.
1043    lpair2=16 when no intron in the anticodon loop). */
1044 
1045     if ((!match1) && (pos6 >=MIN_VAR_LOOP))
1046       {
1047       for (lpair2=MIN_VAR_LOOP-4;lpair2<pos6;lpair2++)
1048         {
1049         score2=score1;
1050 
1051 /* Test for the presence of the anticodon arm for the different possible
1052    lengthes of the intron.
1053    ptr4 = pointer to the first base involved in the first part of the
1054           anticodon arm */
1055 
1056         basepairing(ptr4,npair2,lpair2,&ncomp);
1057 
1058 /* pos4 corresponds to the number of nucleotides present in the variable
1059    loop. The smallest size of the variable loop in the tRNA sequences
1060    is 3. Thus to retain the stem as a potential anticodon arm, pos4 has
1061    to be >= 3. */
1062 
1063         pos4=pos-pos1-lpair-lpair2-4;
1064 
1065 /* ptr5 = pointer to the position preceding the intron. */
1066 
1067         ptr5=ptr4+10;
1068 
1069 /* The anticodon arm is retained if 4 or 5 base-pairings are formed and if
1070    pos4 >= 3 */
1071 
1072         if ((ncomp >=4) && (pos4 >= 3))
1073           {
1074 
1075 /* Test for the presence of the residues A or G at the position preceding
1076    the intron */
1077 
1078           if(((*ptr5) == 'a') || ((*ptr5) == 'g'))
1079             {
1080             if (nloop == 0)
1081               (*match)=1;
1082             i=0;
1083             while (i<4)
1084               {
1085               anticodon[i]='\0';
1086               type_trna[i]='\0';
1087               i++;
1088               }
1089 
1090 /* Test for the presence of the residue T at the position preceding the
1091    anticodon signal. If the residue T is present, the general score is
1092    incremented by 1.*/
1093 
1094             if ((*(ptr4+6)) == 't') {
1095               score2++;
1096 #ifdef VERBOSE
1097 	      fprintf(fpverb,"+ Found invariant T. SG= %d\n",score2); }
1098       else  {
1099 	      fprintf(fpverb,"X Did NOT find invariant T. SG= %d\n",score2);
1100 #endif
1101 	    }
1102 
1103 /* If SG >= 5, the windowed sequence is retained as a potential tRNA gene.
1104    The tRNA gene predicted at that level has one intron in
1105    the anticodon loop. */
1106 
1107             if (score2 >= ps.sg_cutoff)
1108               {
1109 
1110 /* Identification of the tRNA gene family */
1111 
1112               strncpy(anticodon,ptr4+7,3);
1113               codage(anticodon,length1,&num);
1114               corresaa(num, type_trna);
1115               strncpy(chaine2,(ptr1-7),lpair1+2);
1116 
1117               (*ntrna)++;
1118               (*npred)++;
1119 
1120 /* The results are printed in the output file */
1121 
1122               printresult(fpo,fpverb,name,pos1,pos,lpair,lpair1,lpair2,nloop,
1123               ntrna,chaine2,sequence,seqlen,match,ncomp,type_trna,anticodon,sqoffset);
1124 	    }
1125 	  }
1126 	}
1127       }
1128     }
1129     }
1130     }
1131 }
1132 
1133 
1134 
1135 /* Subroutine to read the consensus matrix */
1136 
1137 void
1138 lectval(FILE *fp,                  /* pointer to the consensus matrix file */
1139 	float (*table_cons)[4],    /* table containing the frequency of each base
1140 				      at each position of the signal */
1141 	int (*table_inv)[2],  /* table containing the position and the nature of
1142 				 the invariant bases found in the signal. Code for
1143 				 the bases: A=0, C=1, G=2 and T=3. */
1144 	int *lsig,            /*lsig=length of the signal */
1145 	int *ktot,            /* ktot= number of invariant bases */
1146 	float *maxtot         /* maxtot= sum of the maximum frequencies */
1147 	)
1148 
1149 {
1150 int i=0,j, k=0,l,m;
1151 float  max=0;
1152 int ret = 0;
1153 
1154 for (l=0; l< 30; l++)
1155   for (m=0; m<4; m++)
1156     table_cons[l][m]=0;
1157 
1158 for (l=0; l< 30; l++)
1159   for (m=0; m < 2; m++)
1160     table_inv[l][m]=0;
1161 
1162 while(feof(fp) == 0)
1163   {
1164   for(j=0;j<4;j++)
1165     {
1166     ret = fscanf(fp,"%f",&table_cons[i][j]);
1167     }
1168   i++;
1169   }
1170 
1171 *lsig=i-1;
1172 for (i=0; i<*lsig; i++)
1173   {
1174   for (j=0; j<4; j++)
1175     {
1176     if (table_cons[i][j] == 1.0)
1177       {
1178       k++;
1179       table_inv[k][1]=i;
1180       table_inv[k][2]=j;
1181       }
1182     }
1183   }
1184 
1185 for (i=0; i<*lsig; i++)
1186   {
1187   max=table_cons[i][0];
1188   for (j=1; j<4; j++)
1189     max= MAX(max,table_cons[i][j]);
1190   *maxtot += max;
1191   }
1192 
1193 *ktot=k;
1194 }
1195 
1196 /* Subroutine reading the sequence */
1197 /* Modified to correctly read FASTA sequence files */
1198 
1199 int fgetseq(char *name,           /* string w/name of the sequence */
1200 	    char *sequence,       /* character string containing the sequence */
1201 	    long int *seqlen,     /* length of sequence */
1202 	    FILE *fpi)            /* input file pointer */
1203 
1204 {
1205 char line[MAXLINE]; /* character string used to read a line */
1206 char *ptr; /* pointer to the sequence */
1207 long int i,j,c;
1208 char *ptrRet;
1209 
1210 line[0]='\0';
1211 *sequence='\0';
1212 *seqlen = 0;
1213 
1214 /* Test the first character to choose between the two formats available */
1215 
1216 if (fgets(line, MAXLINE, fpi) == NULL) {
1217   return 0;
1218 }
1219 
1220 else if (line[0] == ';') {
1221 
1222   /* File in the non-GenBank format */
1223 
1224   if (line[1] != ' ')
1225     {
1226       for (i=0, ptr= &(line[1]); *ptr != ' ' && *ptr !='\n';i++)
1227         name[i]= *ptr++;
1228       name[i] = '\0';
1229       while ((c = getc(fpi)) == ';')
1230         ptrRet = fgets(line, MAXLINE, fpi);
1231       ungetc(c, fpi);
1232       ptr = sequence;
1233 
1234       while ((c= getc(fpi)) != ';' && c != EOF)
1235 	if (isalpha(c)) {
1236 	  *ptr = tolower(c);
1237 	  ptr++;
1238 	  (*seqlen)++;
1239 	}
1240 
1241       if (c != EOF)
1242 	ungetc(c, fpi);
1243       *ptr= '\0';
1244     }
1245   else if (line[1] == ' ')
1246     {
1247       /* Intelligenetics format */
1248 
1249       while ((c = getc(fpi)) == ';')
1250         ptrRet = fgets(line,MAXLINE, fpi);
1251       ungetc(c, fpi);
1252       ptrRet = fgets(line, MAXLINE, fpi);
1253       for (i=0, ptr= &(line[0]); *ptr != ' ' && *ptr !='\n';i++)
1254         name[i]= *ptr++;
1255       name[i] = '\0';
1256 
1257       ptr = sequence;
1258 
1259       while ((c= getc(fpi)) != ';' && c != EOF)
1260 	if (isalpha(c)){
1261 	  *ptr = tolower(c);
1262 	  ptr++;
1263 	  (*seqlen)++;
1264 	}
1265 
1266       if (c != EOF)
1267 	ungetc(c, fpi);
1268       *ptr= '\0';
1269     }
1270   }
1271 else if (line[0] == '>')
1272   {
1273     /* Fasta Format */
1274 
1275     for (i=1; line[i] == ' '; i++)
1276       ;
1277     for (j = 0, ptr = &line[i]; *ptr != ' '; ptr++, j++)
1278       name[j] = *ptr;
1279     name[j] = '\0';
1280 
1281     ptr = sequence;
1282     while ((c= getc(fpi)) != '>' && c != EOF)
1283       if (isalpha(c)) {
1284 	*ptr = tolower(c);
1285 	*seqlen += 1;
1286 	ptr++;
1287       }
1288 
1289     if (c != EOF)
1290       ungetc(c, fpi);
1291     *ptr= '\0';
1292   }
1293 else
1294   {
1295     /* File in GenBank format */
1296 
1297     while (strncmp(line, "LOCUS", 5) != 0)
1298       if (fgets(line, MAXLINE, fpi) == NULL)
1299 	exit(1);
1300 
1301     for (i = 0, ptr = &line[12]; *ptr != ' '; ptr++, i++)
1302       name[i] = *ptr;
1303     name[i] = '\0';
1304 
1305     while (strncmp(line, "ORIGIN", 6) != 0)
1306       if (fgets(line, MAXLINE, fpi) == NULL)
1307 	exit(1);
1308 
1309     ptr = sequence;
1310     *seqlen=0;
1311     ptrRet = fgets(line, MAXLINE, fpi);
1312     while (strncmp(line, "//", 2) != 0) {
1313       for (i = 0; line[i] != '\n'; i++)
1314 	if (isalpha(line[i])) {
1315 	  *ptr++ = tolower(line[i]);
1316 	  (*seqlen)++;
1317 	}
1318       ptrRet = fgets(line, MAXLINE, fpi);
1319     }
1320     *ptr = '\0';
1321 
1322   }
1323 return (1);
1324 }
1325 
1326 /* Subroutine reading & returning the sequence length */
1327 
1328 int getseqsize(FILE *fpi       /* input file pointer */
1329 	       )
1330 
1331 {
1332 
1333 char line[MAXLINE]; /* character string used to read a line */
1334 long int i,c, seqlen, fpi_save_pos;
1335 char* ptrRet;
1336 
1337 line[0]='\0';
1338 seqlen = 0;
1339 
1340 fpi_save_pos = ftell(fpi);  /* save current position in file */
1341 
1342 /* Test the first character to choose between the two formats available */
1343 
1344 if (fgets(line, MAXLINE, fpi) == NULL) {
1345   return 0;
1346 }
1347 
1348 else if (line[0] == ';') {
1349 
1350   /* File in the non-GenBank format */
1351 
1352   if (line[1] != ' ')
1353     {
1354       while ((c = getc(fpi)) == ';')
1355 	ptrRet = fgets(line, MAXLINE, fpi);
1356       ungetc(c, fpi);
1357 
1358       while ((c= getc(fpi)) != ';' && c != EOF)
1359 	if (isalpha(c)) {
1360 	  seqlen++;
1361 	}
1362 
1363     }
1364   else if (line[1] == ' ')
1365     {
1366       /* Intelligenetics format */
1367 
1368       while ((c = getc(fpi)) == ';')
1369 	ptrRet = fgets(line,MAXLINE, fpi);
1370       ungetc(c, fpi);
1371       ptrRet = fgets(line, MAXLINE, fpi);
1372 
1373       while ((c= getc(fpi)) != ';' && c != EOF)
1374 	if (isalpha(c)){
1375 	  seqlen++;
1376 	}
1377     }
1378 }
1379 else if (line[0] == '>')
1380   {
1381     /* Fasta Format */
1382 
1383     while ((c= getc(fpi)) != '>' && c != EOF)
1384       if (isalpha(c))
1385 	seqlen++;
1386   }
1387 else
1388   {
1389     /* File in GenBank format */
1390 
1391     while (strncmp(line, "LOCUS", 5) != 0)
1392       if (fgets(line, MAXLINE, fpi) == NULL)
1393 	exit(1);
1394 
1395     while (strncmp(line, "ORIGIN", 6) != 0)
1396       if (fgets(line, MAXLINE, fpi) == NULL)
1397 	exit(1);
1398 
1399     ptrRet = fgets(line, MAXLINE, fpi);
1400     while (strncmp(line, "//", 2) != 0) {
1401       for (i = 0; line[i] != '\n'; i++)
1402 	if (isalpha(line[i])) {
1403 	  seqlen++;
1404 	}
1405       ptrRet = fgets(line, MAXLINE, fpi);
1406     }
1407 
1408   }
1409 
1410 fseek(fpi,fpi_save_pos,0);  /* reposition file pointer */
1411 return seqlen;
1412 }
1413 
1414 
1415 
1416 /* Subroutine that returns the position in the string s where the string t
1417    begins or -1 if s does not contain t */
1418 
1419 /* Calls to this function eliminated for efficiency  T. Lowe  11/95  */
1420 
1421 myindex (char *s, char *t)
1422 {
1423 int i, j, k;
1424 for (i=0; s[i] != '\0'; i++) {
1425   for (j=i, k=0; t[k] != '\0' && s[j] == t[k]; j++, k++)
1426     ;
1427   if (t[k] == '\0')
1428   return(i);
1429 }
1430  return(-1);
1431 }
1432 
1433 
1434 /* Subroutine looking for the presence of a given signal, returns 1 if a
1435    signal is found and 0 otherwise. It also return the table 'weight'
1436    containing the frequencies of the oberved bases in the windowed
1437    sequence and the number 'ninv' of invariant bases found in the windowed
1438    sequence*/
1439 
1440 int readsignal(char *ptr,      /* pointer to the sequence */
1441 	       int (*table_inv)[2],  /* table containing the position and nature of the
1442 					invariant bases found in the consensus matrix */
1443 	       int *lsig,   /* lsig= length of the signal */
1444 	       int *ktot,   /* ktot= number of invariant bases in the consensus matrix */
1445 	       float *weight,       /* table containing the frequencies of the observed
1446 				       base at each position of the
1447 				       windowed sequence tested */
1448 	       float (*table_cons)[4],  /* table corresponding to the consensus matrix */
1449 	       int *ninv,               /* ninv= number of invariant */
1450 					/* bases in the windowed  sequence */
1451 	       int threshold_inv)       /* Number of invariant bases */
1452 					/* allowed not to b e invariant */
1453 
1454 
1455 {
1456   extern Param_set_type ps;
1457 
1458   int k = 1, i=0, j,match1;
1459   int l;
1460   int tab[30];
1461 
1462   (*ninv)=0;
1463   for (l=0; l< 30; l++) {
1464     weight[l]=0;
1465     tab[l]=0;
1466   }
1467 
1468 /* If the consensus matrix contains some invariant bases, the subroutine
1469    calculates the number of invariant bases that are found in the windowed
1470    sequence (ninv) */
1471 
1472   if (*ktot)
1473     {
1474       for (k=1; k <= (*ktot); k++)
1475 	{
1476 
1477 /*   (original code commented out)
1478 
1479      temp[0]= *(ptr+table_inv[k][1]);
1480       j = myindex(base,temp);              */
1481 
1482 	  switch (*(ptr+table_inv[k][1])) {
1483 	  case 'a': j=0; break;
1484 	  case 'c': j=1; break;
1485 	  case 'g': j=2; break;
1486 	  case 't': j=3; break;
1487 	  default: j=-1;
1488 	  }
1489 
1490 /* trap for non-ATGC chars, assume match for ambiguous bases (j= -1)*/
1491 
1492 
1493 #ifdef NO_AMBIG
1494 	if (j == table_inv[k][2])
1495 #else
1496 	if ((j == table_inv[k][2]) || (j == -1))
1497 #endif
1498 	    (*ninv)++;
1499 
1500 	}
1501     }
1502 
1503 /* If the number of invariant bases found in the windowed sequence is < to
1504    the threshold allowed, the windowed sequence is discarded as a potential
1505    signal. */
1506 
1507   if ((*ninv) < (*ktot)-threshold_inv)
1508     return(0);
1509 
1510 /* If the number of invariant bases is >= to the threshold, the table
1511   'weight' is constructed */
1512 
1513   if((*ninv) >= (*ktot)-threshold_inv)
1514     {
1515       match1=1;
1516       while(match1 && (i<*lsig))
1517 	{
1518 
1519 /*  (original code commented out)
1520 
1521     temp[0]= *(ptr+i);
1522     tab[i]= myindex(base,temp);  */
1523 
1524 	  switch (*(ptr+i)) {
1525 	  case 'a': tab[i]=0; break;
1526 	  case 'c': tab[i]=1; break;
1527 	  case 'g': tab[i]=2; break;
1528 	  case 't': tab[i]=3; break;
1529 	  default:  tab[i]=-1;
1530 	  }
1531 
1532 
1533 /* trap for non-ATGC chars, assume match for ambig bases (tab[i]= -1) */
1534 
1535 	  if (tab[i] == -1)
1536 #ifdef NO_AMBIG
1537 	    weight[i] = 0;
1538 #else
1539 	    weight[i] = 1;
1540 #endif
1541 	  else
1542 	    weight[i]=table_cons[i][tab[i]];
1543 
1544 	  if((ktot == 0) && (weight[i] == 0))
1545 	    {
1546 	      match1=0;
1547 	      return(0);
1548 	    }
1549 	  else
1550 	    {
1551 	      i++;
1552 	    }
1553 	}
1554     }
1555   return(1);
1556 }
1557 
1558 /* Subroutine that calculates the similarity score on the potential signal
1559    previously retained by the subroutine readsignal. This subroutine
1560    returns 1 it the computed score is greater or equal to the defined
1561    threshold and 0 otherwise. It returns also the value of the computed
1562    score (score) */
1563 
1564 int scoring(float *weight, /* table containing the frequencies of the observed base
1565 			      at each position of the potential signal */
1566 	    int lsig,      /* length of the signal */
1567 	    float max,     /* sum of the maximum frequencies found in */
1568 			   /* the consensus matrix */
1569 	    int ktot,      /* number of invariant bases found in the */
1570 			   /* consensus matrix */
1571 	    float *score,  /* value of the computed score on the */
1572 			   /* potential signal */
1573 	    float ThresholdValue,   /* defined threshold for the */
1574 			 	    /* similarity score */
1575 	    int ninv       /* number of invariant bases found */
1576 		           /* in the potential signal */
1577 	    )
1578 {
1579 
1580 float tot;
1581 int i;
1582 
1583 /* Computation of the value of the score on the potential signal */
1584 
1585 tot=0;
1586 for(i=0; i< lsig; i++)
1587   tot += weight[i];
1588 
1589 tot -= ninv;
1590 max -= ktot;
1591 *score = tot / max;
1592 
1593 /* Comparison of the computed score with the defined threshold value */
1594 
1595 if (*score >= ThresholdValue)
1596   return(1);
1597 else {
1598   return(0);
1599 }
1600 }
1601 
1602 /* Subroutine looking for base-pairings between two parts of the sequence.
1603    It returns the number of base-pairings found (ncomp) */
1604 
1605 /* rewritten to improve efficiency & eliminate use of myindex() calls */
1606 
1607 void
1608 basepairing(char *ptr,   /* pointer to the sequence */
1609 	    int npair,   /* number of base-pairings forming a given arm */
1610 	    int lpair,   /* number of nucleotides found between the first position of the
1611 			    first part of the sequence involved in the stem and the last
1612 			    position of the second part of the
1613 			    sequence involved in the stem */
1614 	    int *ncomp)   /* number of base-pairings observed between the two parts of
1615 			    the sequence tested */
1616 
1617 {
1618   int n;   /* loop counter */
1619 
1620   *ncomp=0;
1621 
1622 #ifdef NO_AMBIG
1623   for(n=0; n<npair; n++)
1624     {
1625       switch (*(ptr+n)) {
1626 	case 'a': if (*(ptr+lpair-n) == 't') (*ncomp)++;  break;
1627 	case 'c': if (*(ptr+lpair-n) == 'g') (*ncomp)++;  break;
1628 	case 'g': case 'r': if ((*(ptr+lpair-n) == 'c') || (*(ptr+lpair-n) == 't'))
1629 	  (*ncomp)++;  break;
1630 	case 't': case 'y': if ((*(ptr+lpair-n) == 'a') || (*(ptr+lpair-n) == 'g'))
1631 	  (*ncomp)++;  break;
1632          }
1633     }
1634 #else
1635   for(n=0; n<npair; n++)
1636     {
1637       if ((*(ptr+lpair-n) == 'n') && (*(ptr+n) != 'n')) {
1638 	(*ncomp)++;   }
1639       else  {
1640 	switch (*(ptr+n)) {
1641 	case 'a': if (*(ptr+lpair-n) == 't') (*ncomp)++;  break;
1642 	case 'c': if (*(ptr+lpair-n) == 'g') (*ncomp)++;  break;
1643 	case 'g': case 'r': if ((*(ptr+lpair-n) == 'c') || (*(ptr+lpair-n) == 't'))
1644 	  (*ncomp)++;  break;
1645 	case 't': case 'y': if ((*(ptr+lpair-n) == 'a') || (*(ptr+lpair-n) == 'g'))
1646 	  (*ncomp)++;  break;
1647 	case 'n': if (*(ptr+lpair-n) != 'n') (*ncomp)++;  break;
1648 	}
1649       }
1650     }
1651 #endif
1652 
1653 }
1654 
1655 /* Subroutine looking for base-pairings between two parts of the sequence.
1656    It returns the number of base-pairings found (ncomp) */
1657 
1658 /* replaced by new basepairing() function above  */
1659 
1660 void
1661 old_basepairing(char *ptr, int npair, int lpair, int *ncomp)
1662 
1663 {
1664 
1665 char base[5]; /* character string containing the 4 bases A, C, G and T */
1666 char temp1[2], temp2[2]; /* temporary tables */
1667 int n,j1,j2;
1668 
1669 /* Table 'pairing' gives the base-pairing scheme. If two nucleotides can
1670    form a base-pairing, the value in the table is 1 otherwise it is 0. */
1671 
1672 static int pairing[4][4]= {
1673                           {0,0,0,1},
1674                           {0,0,1,0},
1675                           {0,1,0,1},
1676                           {1,0,1,0},
1677                           };
1678 
1679 base[0]='a', base[1]='c', base[2]='g', base[3]='t', base[4]='\0';
1680 temp1[0]='\0';
1681 temp1[1]='\0';
1682 temp2[0]='\0';
1683 temp2[1]='\0';
1684 
1685 *ncomp=0;
1686 for(n=0; n<npair; n++)
1687   {
1688 
1689     temp1[0]= *(ptr+n);  /* temp1[0] contains the base found at a given
1690 			    position in the first part of the stem */
1691 
1692     temp2[0]= *(ptr+lpair-n); /* temp2[0] contains the base found at the
1693 				 position of the second part of the stem that
1694 				 should form the base-pairing with temp1[0] */
1695     j1= myindex(base,temp1); /* Code the base found in temp1[0]. j1 can take the
1696 				value 0 (for A), 1 (for C), 2 (for G) and 3 (for T) */
1697 
1698     j2= myindex(base,temp2); /* same than j1 for the base in temp2[0] */
1699 
1700     /* Test if the bases in temp1[0] and temp2[0] can form a base-parings */
1701 
1702 /* Assume non-ATGC base can bp with anything  */
1703 
1704 #ifdef NO_AMBIG
1705     if (pairing[j1][j2] == 1)
1706       *ncomp= (*ncomp)+1;
1707 #else
1708     if (j1 == -1 || j2 == -1)
1709       *ncomp= (*ncomp)+1;
1710     else if (pairing[j1][j2] == 1)
1711       *ncomp= (*ncomp)+1;
1712 #endif
1713   }
1714 }
1715 
1716 
1717 
1718 /* Subroutine that complements the sequence. It returns the complementary
1719    sequence to the main */
1720 
1721 /* Modifed to dynamically allocate space for reverse complement & then */
1722 /* release memory occupied by original sequence */
1723 /* Sets *sequence pointer to new reverse complement sequence */
1724 
1725 void
1726 compstrand(char **sequence,   /* pointer to the sequence string */
1727 	   long int seqlen    /* sequence length   */
1728 	   )
1729 {
1730 
1731 char *ptr, *revseq;
1732 long int pos;
1733 
1734 
1735 revseq= (char *) calloc (seqlen+1,sizeof(char));
1736 
1737 if (revseq == NULL)
1738   {
1739   fprintf(stderr,"tRNAscan1.4: Can't malloc for complement sequence\n");
1740   exit(-1);
1741   }
1742 
1743 
1744 /* Complementation of the sequence */
1745 
1746 for (pos=0, ptr= &((*sequence)[seqlen-1]); pos<seqlen; pos++, ptr--)
1747   {
1748     switch (*ptr)
1749       {
1750       case 'a':
1751 	revseq[pos]='t';
1752 	break;
1753       case 'c':
1754 	revseq[pos]='g';
1755 	break;
1756       case 'g':
1757 	revseq[pos]='c';
1758 	break;
1759       case 't':
1760 	revseq[pos]='a';
1761 	break;
1762       default:
1763 	revseq[pos]='n';
1764 	break;
1765       }
1766   }
1767 
1768 revseq[seqlen] = '\0';
1769 
1770 free(*sequence);           /* release mem from original seq */
1771 *sequence = revseq;
1772 
1773 }
1774 
1775 
1776 /* Subroutine that codes the anticodon signal sequence by a number
1777    comprised between 1 and 65. It returns this number. */
1778 
1779 /* Modified to eliminate call to myindex()  */
1780 
1781 void
1782 codage(char *anticodon,    /* anticodon signal sequence */
1783        int length1,        /* length of the anticodon signal, lenght1=3 */
1784        int *num            /* number associated to the anticodon signal sequence */
1785        )
1786 
1787 {
1788 int j=0,i,iba,match=0;
1789 iba=1;
1790 *num=1;
1791 
1792 /* Codage of the anticodon signal sequence. The codage is done by
1793    alphabetical order, i. e., AAA is coded by 1, AAC is coded by 2
1794    and so on until TTT that is coded by 64. If a base is not determined
1795    in the anticodon signal sequence, the anticodon is coded by 65. */
1796 
1797 for (i=length1; i>=1;i--)
1798   {
1799 
1800     switch (anticodon[i-1]) {
1801     case 'a': j=0; break;
1802     case 'c': j=1; break;
1803     case 'g': j=2; break;
1804     case 't': j=3; break;
1805     default: j=-1;
1806     }
1807 
1808     if (j == -1)
1809       {
1810 	match=1;
1811 	i=1;
1812       }
1813     *num=(*num)+j*iba;
1814     iba=4*iba;
1815   }
1816 
1817 if (match)
1818   *num=65;
1819 
1820 }
1821 
1822 
1823 /* Subroutine that determines the tRNA gene family */
1824 
1825 void
1826 corresaa(int num,           /* Number coding the anticodon signal sequence */
1827 	 char *type_trna    /* tRNA gene family */
1828 	 )
1829 
1830 {
1831 
1832 /*The table 'amino_acid' gives the correspondence between the number
1833   (num) coding the anticodon signal and the amino acid that is added
1834   to the protein by the tRNA. For example, if the anticodon is AAA then
1835   num=1 and the table will associate the amino acid Phe to that anticodon.
1836   The tRNA gene in that case will be a Phe tRNA gene. */
1837 
1838 static char *amino_acid[]= {"Phe","Val","Leu","Ile","Cys","Trp","Arg","Ser",
1839                            "Ser","Ala","Pro","Thr","Tyr","Asp","His","Asn",
1840                            "Leu","Val","Leu","Met","Trp","Gly","Arg","Arg",
1841                            "Ser","Ala","Pro","Thr","Sup","Glu","Gln","Lys",
1842                            "Phe","Val","Leu","Ile","Cys","Gly","Arg","Ser",
1843                            "Ser","Ala","Pro","Thr","Tyr","Asp","His","Asn",
1844                            "Leu","Val","Leu","Ile","Sup","Gly","Arg","Arg",
1845                            "Ser","Ala","Pro","Thr","Sup","Glu","Gln","Lys",
1846                            "Ind"};
1847 
1848 strncpy(type_trna,amino_acid[num-1],3);
1849 }
1850 
1851 /* Subroutine that prints the results of the search */
1852 
1853 void
1854 printresult(FILE *fpo,     /* output file pointer */
1855 	    FILE *fpverb,   /* character string for the name of the sequence */
1856 	    char *name,
1857 	    long int pos1,    /* first position of the D signal */
1858 	    long int pos,     /* first position of the T-Psi-C signal */
1859 	    int lpair,        /* number of nucleotides between the */
1860 			      /* first position  of the D arm and the last one */
1861 	    int lpair1,     /* number of nucleotides between the first position of the
1862 			       aminoacyl arm and the last one */
1863 	    int lpair2,     /* number of nucleotides between the first position of the
1864 			       anticodon arm and the last one */
1865 	    int nloop,      /* nloop=0, scanning of the direct strand; nloop=1 scanning of
1866 			       the complementary strand */
1867 	    int *ntrna,     /* number of tRNA genes predicted in the sequence */
1868 	    char *chaine2,  /* character string for the predicted tRNA gene sequence */
1869 	    char *sequence,    /* character string containing the sequence tested */
1870 	    long int length,   /* length of the sequence */
1871 	    int *match,      /* match=1 if at least one tRNA gene has been found on the
1872 				direct strand and 0 otherwise */
1873 	    int ncomp,       /* number of base-pairings in the anticodon arm of the
1874 				predicted tRNA gene */
1875 	    char *type_trna,     /* character string for the tRNA gene family */
1876 	    char *anticodon,      /* character string for the anticodon signal sequence */
1877 	    long int sqoffset   /* offset nucleotide numbering by this much (set with -i param) */
1878 	    )
1879 
1880 {
1881 long int pos2; /* first position of the predicted tRNA gene when it
1882                is found on the complementary strand */
1883 long int posstart; /* first position of the intron */
1884 long int posend; /* last position of the intron */
1885 
1886 
1887 /* If lpair2 > 16, treatment of the tRNA gene with one intron */
1888 
1889 /* Results for the tRNA genes predicted on the direct strand */
1890 
1891 #ifdef VERBOSE
1892 fprintf(fpverb,"*** tRNA found\n\n");
1893 #endif
1894 
1895 
1896 if ((nloop) == 0)
1897   {
1898   if((*ntrna) == 1)
1899     fprintf(fpo,"sequence name= %s\n", name);
1900 
1901   fprintf(fpo,"start position= %ld end position= %ld\n",pos1-7+sqoffset,pos1-6+lpair1+sqoffset);
1902   fprintf(fpo,"potential tRNA sequence= %s\n",chaine2);
1903   fprintf(fpo,"D signal= %ld %ld TpsyC signal= %ld %ld\n", pos1+sqoffset,pos1+7+sqoffset, pos+sqoffset,
1904   pos+14+sqoffset);
1905   fprintf(fpo,"amino-acyl stem= %ld-%ld;%ld-%ld\n",pos1-7+sqoffset,pos1-1+sqoffset,pos1-13+lpair1+sqoffset,
1906   pos1-7+lpair1+sqoffset);
1907   fprintf(fpo,"D stem= %ld-%ld;%ld-%ld\n",pos1+2+sqoffset,pos1+4+sqoffset,pos1+lpair+sqoffset,
1908 	  pos1+lpair+2+sqoffset);
1909 
1910   if(lpair2 > 16)
1911     {
1912     fprintf(fpo,"anticodon stem= %ld-%ld;%ld-%ld\n",pos1+lpair+4+sqoffset,pos1+lpair+8+sqoffset,
1913     pos1+lpair+lpair2+sqoffset,pos1+lpair+lpair2+4+sqoffset);
1914     }
1915   else
1916     {
1917     fprintf(fpo,"anticodon stem= %ld-%ld;%ld-%ld\n",pos1+lpair+4+sqoffset,pos1+lpair+8+sqoffset,
1918     pos1+lpair+16+sqoffset,pos1+lpair+20+sqoffset);
1919     }
1920 
1921   fprintf(fpo,"TpsyC stem= %ld-%ld;%ld-%ld\n",pos+1+sqoffset,pos+5+sqoffset,pos+13+sqoffset,pos+17+sqoffset);
1922   if (strcmp(type_trna,"Ind") != 0)
1923     {
1924     fprintf(fpo,"tRNA predict as a tRNA- %s : anticodon %s\n", type_trna,
1925     anticodon);
1926     }
1927   else
1928     {
1929     fprintf(fpo,"anticodon includes unknown bases\n");
1930     }
1931 
1932   if (lpair2 > 16)
1933    {
1934     posstart=pos1+lpair+15;
1935     posend= pos1+lpair+lpair2-2;
1936     fprintf(fpo,"potential intron between positions %ld %ld\n",posstart+sqoffset,
1937     posend+sqoffset);
1938     }
1939   fprintf(fpo,"number of base pairing in the anticodon stem= %d\n",ncomp);
1940   fprintf(fpo,"\n");
1941   }
1942 else
1943   {
1944 
1945 /* Results on the complementary strand */
1946 
1947 /* If no tRNA gene is predicted on the direct strand, then for the first
1948    tRNA gene predicted on the complementary strand the two following lines
1949    are printed in the output file */
1950 
1951   if( (!(*match)) && ((*ntrna) == 1))
1952     {
1953     fprintf(fpo,"sequence name= %s\n", name);
1954     fprintf(fpo,"complementary strand\n");
1955     }
1956 
1957 /* If tRNA genes are predicted on the direct strand, then for the first
1958    tRNA gene predicted on the complemetary strand the following line is
1959    printed in the output file */
1960 
1961   else if ((*ntrna) == 1)
1962     {
1963     fprintf(fpo,"complementary strand\n");
1964     }
1965 
1966   pos2= length-pos1+8;
1967   fprintf(fpo,"start position= %ld end position= %ld\n",pos2+sqoffset,pos2-lpair1-1+sqoffset);
1968   fprintf(fpo,"potential tRNA sequence= %s\n",chaine2);
1969 
1970   fprintf(fpo,"D signal= %ld %ld TpsyC signal= %ld %ld\n",length-pos1+1+sqoffset,
1971   length-pos1-6+sqoffset,length-pos+1+sqoffset,length-pos-13+sqoffset);
1972   fprintf(fpo,"amino-acyl stem= %ld-%ld;%ld-%ld\n",pos2+sqoffset,pos2-6+sqoffset, pos2-lpair1+6+sqoffset,
1973   pos2-lpair1+sqoffset);
1974   fprintf(fpo,"D stem= %ld-%ld;%ld-%ld\n",length-pos1-1+sqoffset,length-pos1-3+sqoffset,
1975   length-pos1-lpair+1+sqoffset,length-pos1-lpair-1+sqoffset);
1976 
1977   if (lpair2 > 16)
1978     {
1979     posstart=pos1+lpair+15;
1980     posend=pos1+lpair+lpair2-2;
1981     fprintf(fpo,"anticodon stem= %ld-%ld;%ld-%ld\n",length-pos1-lpair-3+sqoffset,
1982     length-pos1-lpair-7+sqoffset,length-posend-1+sqoffset,length-posend-5+sqoffset);
1983     }
1984   else
1985     {
1986     fprintf(fpo,"anticodon stem= %ld-%ld;%ld-%ld\n",length-pos1-lpair-3+sqoffset,
1987     length-pos1-lpair-7+sqoffset,length-pos1-lpair-lpair2+1+sqoffset,
1988     length-pos1-lpair-lpair2-3+sqoffset);
1989     }
1990 
1991   fprintf(fpo,"TpsyC stem= %ld-%ld;%ld-%ld\n",length-pos+sqoffset,length-pos-4+sqoffset,
1992   length-pos-12+sqoffset,length-pos-16+sqoffset);
1993 
1994   if (strcmp(type_trna,"Ind") != 0)
1995     {
1996     fprintf(fpo,"tRNA predict as a tRNA- %s : anticodon %s\n", type_trna,
1997     anticodon);
1998     }
1999   else
2000     {
2001     fprintf(fpo,"anticodon includes unknown bases\n");
2002     }
2003 
2004   if (lpair2 > 16)
2005     {
2006     posstart=pos1+lpair+15;
2007     posend=pos1+lpair+lpair2-2;
2008     fprintf(fpo,"potential intron between positions %ld %ld\n",
2009     length-posstart+1+sqoffset ,length-posend+1+sqoffset);
2010     }
2011 
2012   fprintf(fpo,"number of base pairing in the anticodon stem=%d\n",ncomp);
2013   fprintf(fpo,"\n");
2014   }
2015 }
2016 
2017 
2018 void
2019 set_search_params (Param_set_type *ps,
2020 		   int params)
2021   {
2022 
2023     if (params == 1) {
2024       ps->tpc_sig_thresh = ST_TPC_SIG_THRESH;
2025       ps->d_sig_thresh = ST_D_SIG_THRESH;
2026       ps->sg_cutoff = ST_SG_CUTOFF;
2027       ps->tpc_inv = ST_TPC_INV;
2028       ps->tpc_incsg = ST_TPC_INCSG;
2029       ps->tpc_keep = ST_TPC_KEEP;
2030       ps->d_inv = ST_D_INV;
2031       ps->look_for_acloop_sg = ST_LOOK_FOR_ACLOOP_SG;
2032       ps->acloop_min = ST_ACLOOP_MIN;
2033       ps->aa_incsg = ST_AA_INCSG;
2034       ps->aa_keep = ST_AA_KEEP;
2035     }
2036     else if (params == 2) {
2037       ps->tpc_sig_thresh = RX_TPC_SIG_THRESH;
2038       ps->d_sig_thresh = RX_D_SIG_THRESH;
2039       ps->sg_cutoff = RX_SG_CUTOFF;
2040       ps->tpc_inv = RX_TPC_INV;
2041       ps->tpc_incsg = RX_TPC_INCSG;
2042       ps->tpc_keep = RX_TPC_KEEP;
2043       ps->d_inv = RX_D_INV;
2044       ps->look_for_acloop_sg = RX_LOOK_FOR_ACLOOP_SG;
2045       ps->acloop_min = RX_ACLOOP_MIN;
2046       ps->aa_incsg = RX_AA_INCSG;
2047       ps->aa_keep = RX_AA_KEEP;
2048     }
2049     else if (params == 3) {
2050       ps->tpc_sig_thresh = ALT_TPC_SIG_THRESH;
2051       ps->d_sig_thresh = ALT_D_SIG_THRESH;
2052       ps->sg_cutoff = ALT_SG_CUTOFF;
2053       ps->tpc_inv = ALT_TPC_INV;
2054       ps->tpc_incsg = ALT_TPC_INCSG;
2055       ps->tpc_keep = ALT_TPC_KEEP;
2056       ps->d_inv = ALT_D_INV;
2057       ps->look_for_acloop_sg = ALT_LOOK_FOR_ACLOOP_SG;
2058       ps->acloop_min = ALT_ACLOOP_MIN;
2059       ps->aa_incsg = ALT_AA_INCSG;
2060       ps->aa_keep = ALT_AA_KEEP;
2061     }
2062     else {
2063       fprintf (stderr,"tRNAscan1.4: FATAL: Unable to select search parameter set.\n");
2064       exit(1);
2065     }
2066 
2067   }
2068 
2069 
2070 
2071 
2072 
2073 
2074 
2075 
2076 
2077 
2078 
2079 
2080