1 /* hmmstat: display summary statistics for an HMM database.
2  */
3 #include "p7_config.h"
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <math.h>
9 
10 #include "easel.h"
11 #include "esl_getopts.h"
12 
13 #include "hmmer.h"
14 
15 static ESL_OPTIONS options[] = {
16   /* name           type       default   env  range    toggles    reqs       incomp  help   docgroup*/
17   { "-h",        eslARG_NONE,    FALSE,  NULL, NULL,    NULL,  NULL,           NULL, "show brief help on version and usage",            0 },
18   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
19 };
20 
21 static char usage[]  = "[-options] <hmmfile>";
22 static char banner[] = "display summary statistics for a profile file";
23 
24 
25 int
main(int argc,char ** argv)26 main(int argc, char **argv)
27 {
28   ESL_GETOPTS     *go	   = NULL;
29   ESL_ALPHABET    *abc     = NULL;
30   char            *hmmfile = NULL;
31   P7_HMMFILE      *hfp     = NULL;
32   P7_HMM          *hmm     = NULL;
33   P7_BG           *bg      = NULL;
34   int              nhmm;
35   double           x;
36   float            KL;
37   char             errbuf[eslERRBUFSIZE];
38   int              status;
39 
40   /* Process command line
41    */
42   go = esl_getopts_Create(options);
43   if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK ||
44       esl_opt_VerifyConfig(go)               != eslOK)
45     {
46       printf("Failed to parse command line: %s\n", go->errbuf);
47       esl_usage(stdout, argv[0], usage);
48       printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
49       exit(1);
50     }
51   if (esl_opt_GetBoolean(go, "-h") == TRUE)
52     {
53       p7_banner(stdout, argv[0], banner);
54       esl_usage(stdout, argv[0], usage);
55       puts("\nOptions:");
56       esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=docgroup, 2 = indentation; 80=textwidth*/
57       exit(0);
58     }
59   if (esl_opt_ArgNumber(go) != 1)
60     {
61       puts("Incorrect number of command line arguments.");
62       esl_usage(stdout, argv[0], usage);
63       printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
64       exit(1);
65     }
66   if ((hmmfile = esl_opt_GetArg(go, 1)) == NULL)
67     {
68       puts("Failed to read <hmmfile> argument from command line.");
69       esl_usage(stdout, argv[0], usage);
70       printf("\nTo see more help on available options, do %s -h\n\n", argv[0]);
71       exit(1);
72     }
73 
74   p7_banner(stdout, go->argv[0], banner);
75 
76 
77 
78   /* Initializations: open the HMM file
79    */
80   status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf);
81   if      (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf);
82   else if (status == eslEFORMAT)   p7_Fail("File format problem in trying to open HMM file %s.\n%s\n",                hmmfile, errbuf);
83   else if (status != eslOK)        p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n",               status, hmmfile, errbuf);
84 
85 
86   /* Output header
87    */
88   printf("#\n");
89   printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s\n", "idx",  "name",                 "accession",    "nseq",     "eff_nseq", "M",      "relent", "info",   "p relE", "compKL");
90   printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s\n", "----", "--------------------", "------------", "--------", "--------", "------", "------", "------", "------", "------");
91 
92 
93   /* Main body: read HMMs one at a time, print one line of stats per profile
94    */
95   nhmm = 0;
96   while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF)
97     {
98       if      (status == eslEOD)       esl_fatal("read failed, HMM file %s may be truncated?", hmmfile);
99       else if (status == eslEFORMAT)   esl_fatal("bad file format in HMM file %s",             hmmfile);
100       else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets",   hmmfile);
101       else if (status != eslOK)        esl_fatal("Unexpected error in reading HMMs from %s",   hmmfile);
102       nhmm++;
103 
104       if (bg == NULL) bg = p7_bg_Create(abc);
105 
106       p7_MeanPositionRelativeEntropy(hmm, bg, &x);
107       p7_hmm_CompositionKLD(hmm, bg, &KL, NULL);
108 
109       printf("%-6d %-20s %-12s %8d %8.2f %6d %6.2f %6.2f %6.2f %6.2f\n",
110 	     nhmm,
111 	     hmm->name,
112 	     hmm->acc == NULL ? "-" : hmm->acc,
113 	     hmm->nseq,
114 	     hmm->eff_nseq,
115 	     hmm->M,
116 	     p7_MeanMatchRelativeEntropy(hmm, bg),
117 	     p7_MeanMatchInfo(hmm, bg),
118 	     x,
119 	     KL);
120 
121       p7_hmm_Destroy(hmm);
122     }
123 
124   p7_bg_Destroy(bg);
125   esl_alphabet_Destroy(abc);
126   p7_hmmfile_Close(hfp);
127   esl_getopts_Destroy(go);
128   exit(0);
129 }
130