1 /************************************************************
2 * HMMER - Biological sequence analysis with profile-HMMs
3 * Copyright (C) 1992-1998 Washington University School of Medicine
4 *
5 * This source code is distributed under the terms of the
6 * GNU General Public License. See the files COPYING and
7 * GNULICENSE for details.
8 *
9 ************************************************************/
10
11 /* hmmalign.c
12 * SRE, Thu Dec 18 16:05:29 1997 [St. Louis]
13 *
14 * main() for aligning a set of sequences to an HMM.
15 * RCS $Id: hmmalign.c,v 1.1.1.1 2001/06/18 13:59:49 birney Exp $
16 */
17
18 #include <stdio.h>
19 #include <stdlib.h>
20
21 #include "structs.h" /* data structures, macros, #define's */
22 #include "config.h" /* compile-time configuration constants */
23 #include "funcs.h" /* function declarations */
24 #include "globals.h" /* alphabet global variables */
25 #include "squid.h" /* general sequence analysis library */
26
27 #ifdef MEMDEBUG
28 #include "dbmalloc.h"
29 #endif
30
31 static char banner[] = "hmmalign - align sequences to an HMM profile";
32
33 static char usage[] = "\
34 Usage: hmmalign [-options] <hmm file> <sequence file>\n\
35 Available options are:\n\
36 -h : help; print brief help on version and usage\n\
37 -m : only print symbols aligned to match states\n\
38 -o <f> : save alignment in file <f> in SELEX format\n\
39 -q : quiet - suppress verbose banner\n\
40 ";
41
42 static char experts[] = "\
43 \n";
44
45 static struct opt_s OPTIONS[] = {
46 { "-h", TRUE, sqdARG_NONE },
47 { "-m", TRUE, sqdARG_NONE } ,
48 { "-o", TRUE, sqdARG_STRING },
49 { "-q", TRUE, sqdARG_NONE },
50 };
51 #define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
52
53 int
main(int argc,char ** argv)54 main(int argc, char **argv)
55 {
56 char *hmmfile; /* file to read HMMs from */
57 HMMFILE *hmmfp; /* opened hmmfile for reading */
58 char *seqfile; /* file to read target sequence from */
59 int format; /* format of seqfile */
60 char **rseq; /* raw, unaligned sequences */
61 SQINFO *sqinfo; /* info associated with sequences */
62 char **dsq; /* digitized raw sequences */
63 int nseq; /* number of sequences */
64 char **aseq; /* aligned sequences */
65 AINFO ainfo; /* alignment information */
66 float *wgt; /* per-sequence weights */
67 int i;
68 struct plan7_s *hmm; /* HMM to align to */
69 struct p7trace_s **tr; /* traces for aligned sequences */
70
71 char *optname; /* name of option found by Getopt() */
72 char *optarg; /* argument found by Getopt() */
73 int optind; /* index in argv[] */
74 int be_quiet; /* TRUE to suppress verbose banner */
75 int matchonly; /* TRUE to show only match state syms */
76 char *outfile; /* optional alignment output file */
77 FILE *ofp; /* handle on alignment output file */
78
79
80 #ifdef MEMDEBUG
81 unsigned long histid1, histid2, orig_size, current_size;
82 orig_size = malloc_inuse(&histid1);
83 fprintf(stderr, "[... memory debugging is ON ...]\n");
84 #endif
85
86 /***********************************************
87 * Parse command line
88 ***********************************************/
89
90 matchonly = FALSE;
91 outfile = NULL;
92 be_quiet = FALSE;
93
94 while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
95 &optind, &optname, &optarg)) {
96 if (strcmp(optname, "-m") == 0) matchonly= TRUE;
97 else if (strcmp(optname, "-o") == 0) outfile = optarg;
98 else if (strcmp(optname, "-q") == 0) be_quiet = TRUE;
99 else if (strcmp(optname, "-h") == 0)
100 {
101 Banner(stdout, banner);
102 puts(usage);
103 puts(experts);
104 exit(0);
105 }
106 }
107 if (argc - optind != 2)
108 Die("Incorrect number of arguments.\n%s\n", usage);
109
110 hmmfile = argv[optind++];
111 seqfile = argv[optind++];
112
113 /***********************************************
114 * Open HMM file (might be in HMMERDB or current directory).
115 * Read a single HMM from it.
116 ***********************************************/
117
118 if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL)
119 Die("Failed to open HMM file %s\n%s", hmmfile, usage);
120 if (!HMMFileRead(hmmfp, &hmm))
121 Die("Failed to read any HMMs from %s\n", hmmfile);
122 HMMFileClose(hmmfp);
123 if (hmm == NULL)
124 Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile);
125 P7Logoddsify(hmm, TRUE);
126
127 /***********************************************
128 * Open sequence file in current directory.
129 * Read all seqs from it.
130 ***********************************************/
131
132 if (! SeqfileFormat(seqfile, &format, NULL))
133 switch (squid_errno) {
134 case SQERR_NOFILE:
135 Die("Sequence file %s could not be opened for reading", seqfile); /*FALLTHRU*/
136 case SQERR_FORMAT:
137 default:
138 Die("Failed to determine format of sequence file %s", seqfile);
139 }
140 if (! ReadMultipleRseqs(seqfile, format, &rseq, &sqinfo, &nseq))
141 Die("Failed to read any sequences from file %s", seqfile);
142
143 /***********************************************
144 * Show the banner
145 ***********************************************/
146
147 if (! be_quiet)
148 {
149 Banner(stdout, banner);
150 printf( "HMM file: %s\n", hmmfile);
151 printf( "Sequence file: %s\n", seqfile);
152 printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n");
153 }
154
155 /***********************************************
156 * Do the work
157 ***********************************************/
158
159 /* Allocations and initializations.
160 */
161 dsq = MallocOrDie(sizeof(char *) * nseq);
162 tr = MallocOrDie(sizeof(struct p7trace_s *) * nseq);
163 wgt = MallocOrDie(sizeof(float) * nseq);
164 FSet(wgt, nseq, 1.0);
165
166 /* Align each sequence to the model, collect traces
167 */
168 for (i = 0; i < nseq; i++)
169 {
170 dsq[i] = DigitizeSequence(rseq[i], sqinfo[i].len);
171
172 if (P7ViterbiSize(sqinfo[i].len, hmm->M) <= RAMLIMIT)
173 (void) P7Viterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i]));
174 else
175 (void) P7SmallViterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i]));
176 }
177
178 /* Turn traces into a multiple alignment
179 */
180 P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, matchonly,
181 &aseq, &ainfo);
182
183 /***********************************************
184 * Output the alignment
185 ***********************************************/
186
187 if (outfile != NULL && (ofp = fopen(outfile, "w")) != NULL)
188 {
189 WriteSELEX(ofp, aseq, &ainfo, 50);
190 printf("Alignment saved in file %s\n", outfile);
191 fclose(ofp);
192 }
193 else
194 WriteSELEX(stdout, aseq, &ainfo, 50);
195
196 /***********************************************
197 * Cleanup and exit
198 ***********************************************/
199
200 for (i = 0; i < nseq; i++)
201 {
202 P7FreeTrace(tr[i]);
203 FreeSequence(rseq[i], &(sqinfo[i]));
204 free(dsq[i]);
205 }
206 FreeAlignment(aseq, &ainfo);
207 FreePlan7(hmm);
208 free(sqinfo);
209 free(dsq);
210 free(wgt);
211 free(tr);
212
213 SqdClean();
214
215 #ifdef MEMDEBUG
216 current_size = malloc_inuse(&histid2);
217 if (current_size != orig_size) malloc_list(2, histid1, histid2);
218 else fprintf(stderr, "[No memory leaks.]\n");
219 #endif
220 return 0;
221 }
222