1 /* align_main.c
2 * SRE, Wed Jun 30 09:56:15 1993
3 * 2.0 Thu Sep 30 14:23:57 1993
4 *
5 * main() for covea
6 * Multiple sequence alignment to a covariance HMM model.
7 */
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <time.h>
13 #include <math.h>
14
15 #ifdef NEED_GETOPTH
16 #include <getopt.h>
17 #endif
18
19 #include "structs.h"
20 #include "funcs.h"
21 #include "squid.h"
22 #include "version.h"
23
24 #ifdef MEMDEBUG
25 #include "dbmalloc.h"
26 #endif
27
28 #define OPTIONS "aho:s:S"
29
30 static char usage[] = "\
31 Usage: covea [-options] <cm file> <seqfile>\n\
32 where supported options are:\n\
33 -a : annotate all base pairs, not just canonical ones\n\
34 -h : print short help and version info\n\
35 -o <outfile> : write alignment to <outfile> in SELEX format\n\
36 -s <scorefile> : save individual alignment scores to <scorefile>\n\
37 \n\
38 Experimental options:\n\
39 -S : use small-memory variant of alignment algorithm\n";
40
41 static char banner[] = "covea: multiple sequence alignment to a covariance model";
42
43 int
main(int argc,char ** argv)44 main(int argc, char **argv)
45 {
46 char **rseqs; /* raw sequences to align */
47 char **aseqs; /* multiple sequence alignment */
48 SQINFO *sqinfo; /* array of info structures */
49 int nseq; /* number of seqs */
50 char *seqfile; /* sequence file */
51 int format; /* format of sequence file */
52 char *cmfile; /* cvhmm save file to read */
53 struct cm_s *cm; /* model */
54 struct trace_s **tr; /* array of tracebacks for seqs */
55 int idx; /* counter for sequences */
56 double score; /* score of indiv. alignment */
57 double tot_score; /* sum of scores */
58 AINFO ainfo; /* optional alignment info (sec structure) */
59 struct istate_s *icm; /* model, integer log odds form */
60 int statenum; /* # of states in icm */
61 double rfreq[ALPHASIZE]; /* expected background symbol frequencies */
62
63 char *outfile; /* file to write alignment to */
64 char *scorefile; /* file to save scores to */
65 FILE *ofp; /* opened outfile */
66 FILE *sfp; /* opened scorefile */
67 int do_smallmemory; /* use small-memory viterbi variant */
68 int watsoncrick; /* annotate only canonical pairs */
69
70 int optc;
71 extern char *optarg; /* for getopt() */
72 extern int optind; /* for getopt() */
73
74 #ifdef MEMDEBUG
75 unsigned long histid1, histid2, orig_size, current_size;
76 #endif
77
78 /***********************************************
79 * Parse command line
80 ***********************************************/
81
82 outfile = NULL;
83 scorefile = NULL;
84 do_smallmemory = FALSE;
85 watsoncrick = TRUE;
86
87 while ((optc = getopt(argc, argv, OPTIONS)) != -1)
88 switch (optc) {
89
90 case 'a': watsoncrick = FALSE; break;
91 case 'o': outfile = optarg; break;
92 case 's': scorefile = optarg; break;
93
94 case 'S': do_smallmemory = TRUE; break;
95
96 case 'h':
97 printf("%s\n version %s (%s)\n%s\n", banner, RELEASE, RELEASEDATE, usage);
98 exit(0);
99 default:
100 Die("unrecognized option %c\n", optc);
101 }
102
103 if (argc - optind != 2)
104 Die("%s\n", usage);
105
106 cmfile = argv[argc-2];
107 seqfile = argv[argc-1];
108
109 #ifdef MEMDEBUG
110 orig_size = malloc_size(&histid1);
111 #endif
112
113
114 /***********************************************
115 * Get sequence data and model; open output ptrs
116 ***********************************************/
117
118 if (! SeqfileFormat(seqfile, &format, NULL))
119 Die("Failed to determine format of file %s\n", seqfile);
120
121 if (! ReadMultipleRseqs(seqfile, format, &rseqs, &sqinfo, &nseq))
122 Die("Failed to read sequences from file %s", seqfile);
123
124 if (! ReadCM(cmfile, &cm))
125 Die("Failed to read model from file %s", cmfile);
126
127 rfreq[0] = rfreq[1] = rfreq[2] = rfreq[3] = 0.25;
128 if (! RearrangeCM(cm, rfreq, &icm, &statenum))
129 Die("Failed to convert CM to integer log odds");
130
131 if (outfile != NULL)
132 if ((ofp = fopen(outfile, "w")) == NULL)
133 Die("Open failed for alignment output file %s", outfile);
134
135 if (scorefile != NULL)
136 if ((sfp = fopen(scorefile, "w")) == NULL)
137 Die("Open failed for score output file %s", scorefile);
138
139 /***********************************************
140 * Print banner
141 ***********************************************/
142
143 puts(banner);
144 printf(" release %s, %s\n\n", RELEASE, RELEASEDATE);
145 printf("---------------------------------------------------\n");
146 printf("Sequence data: %s (%d sequences)\n", seqfile, nseq);
147 printf("Covariance model: %s (%d nodes)\n", cmfile, cm->nodes);
148 if (outfile != NULL)
149 printf("Alignment saved to: %s\n", outfile);
150 if (scorefile != NULL)
151 printf("Indiv. scores saved to: %s\n", scorefile);
152 printf("---------------------------------------------------\n");
153 puts("");
154
155 /***********************************************
156 * Do the alignment
157 ***********************************************/
158
159 if ((tr = (struct trace_s **) malloc (nseq * sizeof(struct trace_s *))) == NULL)
160 Die("Memory failure, line %d of %s", __LINE__, __FILE__);
161
162 tot_score = 0.0;
163 for (idx = 0; idx < nseq; idx++)
164 {
165 char *prepseq;
166 prepseq = Strdup(rseqs[idx]);
167 PrepareSequence(prepseq);
168
169 if (do_smallmemory)
170 {
171 if (! SmallViterbiAlign(icm, statenum, prepseq, &score, &tr[idx]))
172 Die("SmallViterbiAlign() failed on sequence %d", idx);
173 }
174 else if (! ViterbiAlign(icm, statenum, prepseq, &score, &tr[idx]))
175 Die("ViterbiAlign() failed on sequence %d", idx);
176
177 tot_score += score;
178 if (scorefile != NULL)
179 fprintf(sfp, "%-8.3f : %s\n", score, sqinfo[idx].name);
180
181 free(prepseq);
182 }
183
184 if (do_smallmemory)
185 {
186 printf("aborting... no traceback/alignment code yet for small memory variant\n");
187 Free2DArray(rseqs, nseq);
188 FreeCM(cm);
189 exit(0);
190 }
191
192 if (! Traces2Alignment(rseqs, sqinfo, tr, nseq, cm, watsoncrick, &aseqs, &ainfo))
193 Die("Traces2Alignment() failed");
194
195 /***********************************************
196 * Print the alignment
197 ***********************************************/
198
199 if (outfile != NULL)
200 {
201 if (! WriteSELEX(ofp, aseqs, nseq, &ainfo, 60))
202 Die("Write failed: can't save alignment to %s", outfile);
203 fclose(ofp);
204 printf("Alignment written to %s\n", outfile);
205 }
206 else
207 {
208 if (! WriteSELEX(stdout, aseqs, nseq, &ainfo, 60))
209 Die("Write failed: can't print alignment");
210 }
211
212 if (scorefile != NULL) fclose(sfp);
213
214 printf("Overall alignment score: %.2f\n", tot_score / (double) nseq);
215
216 /***********************************************
217 * Garbage collect and exit
218 ***********************************************/
219
220 for (idx = 0; idx < nseq; idx++)
221 {
222 FreeTrace(tr[idx], NULL);
223 FreeSequence(rseqs[idx],&(sqinfo[idx]));
224 }
225 free(tr);
226 free(sqinfo);
227 FreeAlignment(aseqs, nseq, &ainfo);
228 FreeCM(cm);
229 free(icm);
230
231 #ifdef MEMDEBUG
232 current_size = malloc_size(&histid2);
233
234 if (current_size != orig_size)
235 malloc_list(2, histid1, histid2);
236 else
237 fprintf(stderr, "No memory leaks, sir.\n");
238
239 #endif
240
241
242 return 0;
243 }
244