1 /************************************************************
2  * HMMER - Biological sequence analysis with profile-HMMs
3  * Copyright (C) 1992-1998, Washington University School of Medicine
4  *
5  *   This source code is distributed under the terms of the
6  *   GNU General Public License. See the files COPYING and
7  *   GNULICENSE for details.
8  *
9  ************************************************************/
10 
11 /* hmmemit.c
12  * SRE, Sun Mar  8 14:11:24 1998 [St. Louis]
13  *
14  * main() for generating sequences from an HMM
15  * RCS $Id: hmmemit.c,v 1.1.1.1 2001/06/18 13:59:49 birney Exp $
16  */
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <time.h>
21 
22 #include "structs.h"		/* data structures, macros, #define's   */
23 #include "config.h"		/* compile-time configuration constants */
24 #include "funcs.h"		/* function declarations                */
25 #include "globals.h"		/* alphabet global variables            */
26 #include "squid.h"		/* general sequence analysis library    */
27 
28 #ifdef MEMDEBUG
29 #include "dbmalloc.h"
30 #endif
31 
32 static char banner[] = "hmmemit - generate sequences from a profile HMM";
33 
34 static char usage[]  = "\
35 Usage: hmmemit [-options] <hmm file>\n\
36 Available options are:\n\
37    -a     : write generated sequences as an alignment, not FASTA\n\
38    -h     : help; print brief help on version and usage\n\
39    -n <n> : emit <n> sequences (default 10)\n\
40    -o <f> : save sequences in file <f>\n\
41    -q     : quiet - suppress verbose banner\n\
42 ";
43 
44 static char experts[] = "\
45    --seed <n> : set random number seed to <n>\n\
46 ";
47 
48 static struct opt_s OPTIONS[] = {
49   { "-a",        TRUE,  sqdARG_NONE },
50   { "-h",        TRUE,  sqdARG_NONE },
51   { "-n",        TRUE,  sqdARG_INT},
52   { "-o",        TRUE,  sqdARG_STRING},
53   { "-q",        TRUE,  sqdARG_NONE},
54   { "--seed",    FALSE, sqdARG_INT},
55 };
56 #define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
57 
58 int
main(int argc,char ** argv)59 main(int argc, char **argv)
60 {
61   char            *hmmfile;	/* file to read HMMs from                  */
62   FILE            *fp;          /* output file handle                      */
63   HMMFILE         *hmmfp;       /* opened hmmfile for reading              */
64   struct plan7_s  *hmm;         /* HMM to generate from                    */
65   int              L;		/* length of a sequence                    */
66   int              i;		/* counter over sequences                  */
67 
68   char            *ofile;       /* output sequence file                    */
69   int              nseq;	/* number of seqs to sample                */
70   int              seed;	/* random number generator seed            */
71   int              be_quiet;	/* TRUE to silence header/footer           */
72   int              do_alignment;/* TRUE to output in aligned format        */
73 
74   char *optname;                /* name of option found by Getopt()         */
75   char *optarg;                 /* argument found by Getopt()               */
76   int   optind;                 /* index in argv[]                          */
77 
78 #ifdef MEMDEBUG
79   unsigned long histid1, histid2, orig_size, current_size;
80   orig_size = malloc_inuse(&histid1);
81   fprintf(stderr, "[... memory debugging is ON ...]\n");
82 #endif
83 
84   /***********************************************
85    * Parse command line
86    ***********************************************/
87 
88   nseq         = 10;
89   seed         = time ((time_t *) NULL);
90   be_quiet     = FALSE;
91   do_alignment = FALSE;
92   ofile        = NULL;
93 
94   while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
95                 &optind, &optname, &optarg))  {
96     if      (strcmp(optname, "-a")     == 0) do_alignment = TRUE;
97     else if (strcmp(optname, "-n")     == 0) nseq         = atoi(optarg);
98     else if (strcmp(optname, "-o")     == 0) ofile        = optarg;
99     else if (strcmp(optname, "-q")     == 0) be_quiet     = TRUE;
100     else if (strcmp(optname, "--seed") == 0) seed         = atoi(optarg);
101     else if (strcmp(optname, "-h") == 0)
102       {
103 	Banner(stdout, banner);
104 	puts(usage);
105 	puts(experts);
106 	exit(0);
107       }
108   }
109   if (argc - optind != 1)
110     Die("Incorrect number of arguments.\n%s\n", usage);
111 
112   hmmfile = argv[optind++];
113 
114   sre_srandom(seed);
115 
116   /***********************************************
117    * Open HMM file (might be in HMMERDB or current directory).
118    * Read a single HMM from it.
119    ***********************************************/
120 
121   if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL)
122     Die("Failed to open HMM file %s\n%s", hmmfile, usage);
123   if (!HMMFileRead(hmmfp, &hmm))
124     Die("Failed to read any HMMs from %s\n", hmmfile);
125   HMMFileClose(hmmfp);
126   if (hmm == NULL)
127     Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile);
128 
129   /* Configure the HMM to shut off N,J,C emission: so we
130    * do a simple single pass through the model.
131    */
132   Plan7NakedConfig(hmm);
133   Plan7Renormalize(hmm);
134 
135   /***********************************************
136    * Open the output file, or stdout
137    ***********************************************/
138 
139    if (ofile == NULL) fp = stdout;
140    else {
141      if ((fp = fopen(ofile, "w")) == NULL)
142        Die("Failed to open output file %s for writing", ofile);
143    }
144 
145   /***********************************************
146    * Show the options banner
147    ***********************************************/
148 
149   if (! be_quiet)
150     {
151       Banner(stdout, banner);
152       printf("HMM file:             %s\n", hmmfile);
153       printf("Random seed:          %d\n", seed);
154       printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n");
155     }
156 
157   /***********************************************
158    * Do the work.
159    * If we're generating an alignment, we have to collect
160    * all our traces, then output. If we're generating unaligned
161    * sequences, we can emit one at a time.
162    ***********************************************/
163 
164   if (do_alignment)
165     {
166       struct p7trace_s **tr;        /* traces for aligned sequences            */
167       char           **dsq;         /* digitized sequences                     */
168       SQINFO          *sqinfo;      /* info about sequences (name/desc)        */
169       char           **aseq;        /* sequence alignment                      */
170       AINFO            ainfo;	    /* optional alignment info                 */
171       float           *wgt;
172 
173       dsq    = MallocOrDie(sizeof(char *)             * nseq);
174       tr     = MallocOrDie(sizeof(struct p7trace_s *) * nseq);
175       sqinfo = MallocOrDie(sizeof(SQINFO)             * nseq);
176       wgt    = MallocOrDie(sizeof(float)              * nseq);
177       FSet(wgt, nseq, 1.0);
178 
179       for (i = 0; i < nseq; i++)
180 	{
181 	  EmitSequence(hmm, &(dsq[i]), &L, &(tr[i]));
182 	  sprintf(sqinfo[i].name, "seq%d", i+1);
183 	  sqinfo[i].len   = L;
184 	  sqinfo[i].flags = SQINFO_NAME | SQINFO_LEN;
185 	}
186 
187       P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, FALSE,
188 			 &aseq, &ainfo);
189 
190 				/* Output the alignment */
191       WriteSELEX(fp, aseq, &ainfo, 50);
192       if (ofile != NULL && !be_quiet) printf("Alignment saved in file %s\n", ofile);
193 
194       /* Free memory
195        */
196       for (i = 0; i < nseq; i++)
197 	{
198 	  P7FreeTrace(tr[i]);
199 	  free(dsq[i]);
200 	}
201       FreeAlignment(aseq, &ainfo);
202       free(sqinfo);
203       free(dsq);
204       free(wgt);
205       free(tr);
206     }
207   else				/* unaligned sequence output */
208     {
209       struct p7trace_s *tr;         /* generated trace                        */
210       char             *dsq;        /* digitized sequence                     */
211       char             *seq;        /* alphabetic sequence                    */
212       SQINFO            sqinfo;     /* info about sequence (name/len)         */
213 
214       for (i = 0; i < nseq; i++)
215 	{
216 	  EmitSequence(hmm, &dsq, &L, &tr);
217 	  sprintf(sqinfo.name, "seq%d", i+1);
218 	  sqinfo.len   = L;
219 	  sqinfo.flags = SQINFO_NAME | SQINFO_LEN;
220 
221 	  seq = DedigitizeSequence(dsq, L);
222 
223 	  WriteSeq(fp, kPearson, seq, &sqinfo);
224 
225 	  P7FreeTrace(tr);
226 	  free(dsq);
227 	  free(seq);
228 	}
229     }
230 
231   if (ofile != NULL) fclose(fp);
232   FreePlan7(hmm);
233   SqdClean();
234 
235 #ifdef MEMDEBUG
236   current_size = malloc_inuse(&histid2);
237   if (current_size != orig_size) malloc_list(2, histid1, histid2);
238   else fprintf(stderr, "[No memory leaks.]\n");
239 #endif
240   return 0;
241 }
242 
243