1 /* score_main.c
2  * Fri Feb 18 10:31:48 1994
3  *
4  * main() for scoring test sequences with a model.
5  *   Also, can print out alignments of model to sequence so
6  *   that the pairwise assignments can be seen.
7  */
8 
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <time.h>
13 #include <math.h>
14 
15 #ifdef NEED_GETOPTH
16 #include <getopt.h>
17 #endif
18 
19 #include "structs.h"
20 #include "funcs.h"
21 #include "squid.h"
22 #include "version.h"
23 
24 #ifdef MEMDEBUG
25 #include "dbmalloc.h"
26 #endif
27 
28 #define OPTIONS "ag:ms"
29 
30 static char usage[]  = "\
31 Usage: coves [-options] <CM file> <seqfile>\n\
32 where options are:\n\
33     -a          : show all pairs, not just Watson-Crick\n\
34     -g <gcfrac> : set expected background GC composition (default 0.5)\n\
35     -m          : mountain representation of structural alignment\n\
36     -s          : secondary structure string representation of \n\
37                   structural alignment\n";
38 
39 static char banner[] = "\
40 coves - scoring and structure prediction of RNA sequences\n\
41         using a covariance model";
42 
43 int
main(int argc,char ** argv)44 main(int argc, char **argv)
45 {
46   char  *seq;                   /* a sequence to score            */
47   SQINFO sqinfo;                /* info about seq                 */
48   char  *seqfile;               /* sequence file                  */
49   int    fmt;			/* format of sequence file        */
50   SQFILE *dbfp;                 /* open sequence file for reading */
51   char  *cmfile;                /* file containing covariance model */
52   struct cm_s *cm;              /* model                          */
53   double score;                 /* score of alignment             */
54   struct trace_s *tr;           /* traceback of alignment         */
55   struct align_s *ali;		/* alignment of seq to model      */
56   char  *aseq;                  /* "aligned" sequence string      */
57   char  *khstruct;              /* secondary structure string     */
58   char   buffer[61];		/* output buffer for structures   */
59   int    len;			/* length of aseq, khstruct       */
60   int    apos;			/* position in aseq, khstruct     */
61   double rfreq[ALPHASIZE];      /* expected background symbol frequencies */
62   struct istate_s *icm;		/* integer log odds model         */
63   int    statenum;		/* # of states in icm             */
64 
65   int     do_khstructure;	/* TRUE if we print a structure string  */
66   int     do_mountain;		/* TRUE if we show a mountain structure */
67   int     watsoncrick;          /* TRUE if only canonical pairs are indicated */
68   double  gcfrac;		/* expected background GC fraction */
69 
70 #ifdef MEMDEBUG
71   unsigned long histid1, histid2, orig_size, curr_size;
72 #endif
73 
74   int          optc;
75   extern char *optarg;          /* for getopt() */
76   extern int   optind;		/* for getopt() */
77 
78   /***********************************************
79    * Parse command line
80    ***********************************************/
81 
82   do_khstructure = FALSE;
83   do_mountain    = FALSE;
84   watsoncrick    = TRUE;
85   gcfrac         = 0.5;
86 
87   while ((optc = getopt(argc, argv, OPTIONS)) != -1)
88     switch (optc) {
89 
90     case 'a': watsoncrick    = FALSE;                 break;
91     case 'g': gcfrac         = (double) atof(optarg); break;
92     case 'm': do_mountain    = TRUE;                  break;
93     case 's': do_khstructure = TRUE;                  break;
94 
95     case 'h':
96       printf("%s\n  version %s (%s)\n%s\n", banner, RELEASE, RELEASEDATE, usage);
97       exit(0);
98 
99     default: Die("unrecognized option %c\n", optc);
100     }
101 
102   if (argc - optind != 2)
103     Die("%s\n", usage);
104 
105   cmfile  = argv[argc-2];
106   seqfile = argv[argc-1];
107 
108   if (! SeqfileFormat(seqfile, &fmt, NULL))
109     Die("Failed to determine format of sequence database %s", seqfile);
110 
111   if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL)
112     Die("Failed to open sequence file %s for reading", seqfile);
113 
114   if (! ReadCM(cmfile, &cm))
115     Die("Failed to read model from file %s", cmfile);
116 
117   rfreq[1] = rfreq[2] = gcfrac / 2.;
118   rfreq[0] = rfreq[3] = (1. - gcfrac) / 2.;
119 
120   if (! RearrangeCM(cm, rfreq, &icm, &statenum))
121     Die("failed to make integer log odds model");
122 
123   /***********************************************
124    * Print banner
125    ***********************************************/
126 
127   puts(banner);
128   printf("     version %s, %s\n\n", RELEASE, RELEASEDATE);
129 
130   printf("---------------------------------------------------\n");
131   printf("Database to search/score:      %s\n", seqfile);
132   printf("Model:                         %s\n", cmfile);
133   printf("GC%% of background model:       %.0f%%\n", (gcfrac*100.));
134   printf("---------------------------------------------------\n");
135   puts("");
136 
137   /***********************************************
138    * Score each sequence
139    ***********************************************/
140 
141 #ifdef MEMDEBUG
142   orig_size = malloc_size(&histid1);
143 #endif
144 
145   while (ReadSeq(dbfp, fmt, &seq, &sqinfo))
146     {
147       char *prepseq;
148       prepseq = Strdup(seq);
149       PrepareSequence(prepseq);
150 
151       if (! ViterbiAlign(icm, statenum, prepseq, &score, &tr))
152 	Die("ViterbiAlign() failed on sequence %s", sqinfo.name);
153       free(prepseq);
154 
155       printf("%6.2f bits : %s\n", score, sqinfo.name);
156 
157       if (do_khstructure || do_mountain)
158 	{
159 	  if (! Trace2ali(seq, tr, watsoncrick, &ali)) Die("Trace2ali failed");
160 
161 	  if (do_khstructure)
162 	    {
163 	      if (! Align2kh(ali, &aseq, &khstruct)) Die("Align2kh failed\n");
164 
165 	      /* Print out the sequence and structure
166 	       */
167 	      len = strlen(aseq);
168 	      buffer[60] = '\0';
169 	      for (apos = 1; apos <= len; apos += 60)
170 		{
171 		  strncpy(buffer, aseq + apos - 1, 60);
172 		  printf("    %10s %s\n", sqinfo.name, buffer);
173 		  strncpy(buffer, khstruct + apos - 1, 60);
174 		  printf("    %10s %s\n", sqinfo.name, buffer);
175 		  puts("");
176 		}
177 	      free(aseq);
178 	      free(khstruct);
179 	    }
180 
181 	  if (do_mountain)
182 	    {
183 	      PrintAliLandscape(stdout, cm, ali);
184 	      puts("");
185 	    }
186 
187 	  Free_align(ali);
188 	}
189 
190       FreeSequence(seq, &sqinfo);
191 
192 #ifdef MEMDEBUG
193       curr_size = malloc_size(&histid2);
194       if (curr_size != orig_size)
195 	{
196 	  Warn("malloc-debug: current size %ul, starting size %ul\n", curr_size, orig_size);
197 	  malloc_list(2,histid1, histid2);
198 	}
199 #endif
200 
201     }
202 
203   SeqfileClose(dbfp);
204   FreeCM(cm);
205   free(icm);
206   return 0;
207 }
208 
209 
210