1 /* align_main.c
2  * SRE, Wed Jun 30 09:56:15 1993
3  * 2.0 Thu Sep 30 14:23:57 1993
4  *
5  * main() for covea
6  * Multiple sequence alignment to a covariance HMM model.
7  */
8 
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <time.h>
13 #include <math.h>
14 
15 #ifdef NEED_GETOPTH
16 #include <getopt.h>
17 #endif
18 
19 #include "structs.h"
20 #include "funcs.h"
21 #include "squid.h"
22 #include "version.h"
23 
24 #ifdef MEMDEBUG
25 #include "dbmalloc.h"
26 #endif
27 
28 #define OPTIONS "aho:s:S"
29 
30 static char usage[]  = "\
31 Usage: covea [-options] <cm file> <seqfile>\n\
32 where supported options are:\n\
33      -a             : annotate all base pairs, not just canonical ones\n\
34      -h             : print short help and version info\n\
35      -o <outfile>   : write alignment to <outfile> in SELEX format\n\
36      -s <scorefile> : save individual alignment scores to <scorefile>\n\
37 \n\
38 Experimental options:\n\
39      -S             : use small-memory variant of alignment algorithm\n";
40 
41 static char banner[] = "covea: multiple sequence alignment to a covariance model";
42 
43 int
main(int argc,char ** argv)44 main(int argc, char **argv)
45 {
46   char **rseqs;			/* raw sequences to align       */
47   char **aseqs;                 /* multiple sequence alignment  */
48   SQINFO *sqinfo;		/* array of info structures     */
49   int    nseq;			/* number of seqs               */
50   char  *seqfile;               /* sequence file                */
51   int    format;		/* format of sequence file      */
52   char  *cmfile;                /* cvhmm save file to read      */
53   struct cm_s *cm;              /* model                        */
54   struct trace_s **tr;          /* array of tracebacks for seqs */
55   int    idx;			/* counter for sequences        */
56   double score;                 /* score of indiv. alignment    */
57   double tot_score;		/* sum of scores                */
58   AINFO  ainfo;                 /* optional alignment info (sec structure) */
59   struct istate_s *icm;         /* model, integer log odds form */
60   int    statenum;		/* # of states in icm           */
61   double rfreq[ALPHASIZE];	/* expected background symbol frequencies */
62 
63   char  *outfile;		/* file to write alignment to       */
64   char  *scorefile;		/* file to save scores to           */
65   FILE  *ofp;			/* opened outfile                   */
66   FILE  *sfp;			/* opened scorefile                 */
67   int    do_smallmemory;	/* use small-memory viterbi variant */
68   int    watsoncrick;           /* annotate only canonical pairs    */
69 
70   int          optc;
71   extern char *optarg;          /* for getopt() */
72   extern int   optind;		/* for getopt() */
73 
74 #ifdef MEMDEBUG
75   unsigned long histid1, histid2, orig_size, current_size;
76 #endif
77 
78   /***********************************************
79    * Parse command line
80    ***********************************************/
81 
82   outfile        = NULL;
83   scorefile      = NULL;
84   do_smallmemory = FALSE;
85   watsoncrick    = TRUE;
86 
87   while ((optc = getopt(argc, argv, OPTIONS)) != -1)
88     switch (optc) {
89 
90     case 'a': watsoncrick = FALSE; break;
91     case 'o': outfile   = optarg;  break;
92     case 's': scorefile = optarg;  break;
93 
94     case 'S': do_smallmemory = TRUE; break;
95 
96     case 'h':
97       printf("%s\n  version %s (%s)\n%s\n", banner, RELEASE, RELEASEDATE, usage);
98       exit(0);
99     default:
100       Die("unrecognized option %c\n", optc);
101     }
102 
103   if (argc - optind != 2)
104     Die("%s\n", usage);
105 
106   cmfile  = argv[argc-2];
107   seqfile = argv[argc-1];
108 
109 #ifdef MEMDEBUG
110   orig_size = malloc_size(&histid1);
111 #endif
112 
113 
114   /***********************************************
115    * Get sequence data and model; open output ptrs
116    ***********************************************/
117 
118   if (! SeqfileFormat(seqfile, &format, NULL))
119     Die("Failed to determine format of file %s\n", seqfile);
120 
121   if (! ReadMultipleRseqs(seqfile, format, &rseqs, &sqinfo, &nseq))
122     Die("Failed to read sequences from file %s", seqfile);
123 
124   if (! ReadCM(cmfile, &cm))
125     Die("Failed to read model from file %s", cmfile);
126 
127   rfreq[0] = rfreq[1] = rfreq[2] = rfreq[3] = 0.25;
128   if (! RearrangeCM(cm, rfreq, &icm, &statenum))
129     Die("Failed to convert CM to integer log odds");
130 
131   if (outfile != NULL)
132     if ((ofp = fopen(outfile, "w")) == NULL)
133       Die("Open failed for alignment output file %s", outfile);
134 
135   if (scorefile != NULL)
136     if ((sfp = fopen(scorefile, "w")) == NULL)
137       Die("Open failed for score output file %s", scorefile);
138 
139   /***********************************************
140    * Print banner
141    ***********************************************/
142 
143   puts(banner);
144   printf("     release %s, %s\n\n", RELEASE, RELEASEDATE);
145   printf("---------------------------------------------------\n");
146   printf("Sequence data:          %s (%d sequences)\n", seqfile, nseq);
147   printf("Covariance model:       %s (%d nodes)\n", cmfile, cm->nodes);
148   if (outfile != NULL)
149     printf("Alignment saved to:     %s\n", outfile);
150   if (scorefile != NULL)
151     printf("Indiv. scores saved to: %s\n", scorefile);
152   printf("---------------------------------------------------\n");
153   puts("");
154 
155   /***********************************************
156    * Do the alignment
157    ***********************************************/
158 
159   if ((tr = (struct trace_s **) malloc (nseq * sizeof(struct trace_s *))) == NULL)
160     Die("Memory failure, line %d of %s", __LINE__, __FILE__);
161 
162   tot_score = 0.0;
163   for (idx = 0; idx < nseq; idx++)
164     {
165       char *prepseq;
166       prepseq = Strdup(rseqs[idx]);
167       PrepareSequence(prepseq);
168 
169       if (do_smallmemory)
170 	{
171 	  if (! SmallViterbiAlign(icm, statenum, prepseq, &score, &tr[idx]))
172 	    Die("SmallViterbiAlign() failed on sequence %d", idx);
173 	}
174       else if (! ViterbiAlign(icm, statenum, prepseq, &score, &tr[idx]))
175 	Die("ViterbiAlign() failed on sequence %d", idx);
176 
177       tot_score += score;
178       if (scorefile != NULL)
179 	fprintf(sfp, "%-8.3f   : %s\n", score, sqinfo[idx].name);
180 
181       free(prepseq);
182     }
183 
184   if (do_smallmemory)
185     {
186       printf("aborting... no traceback/alignment code yet for small memory variant\n");
187       Free2DArray(rseqs,    nseq);
188       FreeCM(cm);
189       exit(0);
190     }
191 
192   if (! Traces2Alignment(rseqs, sqinfo, tr, nseq, cm, watsoncrick, &aseqs, &ainfo))
193     Die("Traces2Alignment() failed");
194 
195   /***********************************************
196    * Print the alignment
197    ***********************************************/
198 
199   if (outfile != NULL)
200     {
201       if (! WriteSELEX(ofp, aseqs, nseq, &ainfo, 60))
202 	Die("Write failed: can't save alignment to %s", outfile);
203       fclose(ofp);
204       printf("Alignment written to %s\n", outfile);
205     }
206   else
207     {
208       if (! WriteSELEX(stdout, aseqs, nseq, &ainfo, 60))
209 	Die("Write failed: can't print alignment");
210     }
211 
212   if (scorefile != NULL) fclose(sfp);
213 
214   printf("Overall alignment score: %.2f\n", tot_score / (double) nseq);
215 
216   /***********************************************
217    * Garbage collect and exit
218    ***********************************************/
219 
220   for (idx = 0; idx < nseq; idx++)
221     {
222       FreeTrace(tr[idx], NULL);
223       FreeSequence(rseqs[idx],&(sqinfo[idx]));
224     }
225   free(tr);
226   free(sqinfo);
227   FreeAlignment(aseqs, nseq, &ainfo);
228   FreeCM(cm);
229   free(icm);
230 
231 #ifdef MEMDEBUG
232   current_size = malloc_size(&histid2);
233 
234   if (current_size != orig_size)
235     malloc_list(2, histid1, histid2);
236   else
237     fprintf(stderr, "No memory leaks, sir.\n");
238 
239 #endif
240 
241 
242   return 0;
243 }
244