1###############################################################################
2#
3# hmmerModelParser.py - parse a HMMER model file
4#
5###############################################################################
6#                                                                             #
7#    This program is free software: you can redistribute it and/or modify     #
8#    it under the terms of the GNU General Public License as published by     #
9#    the Free Software Foundation, either version 3 of the License, or        #
10#    (at your option) any later version.                                      #
11#                                                                             #
12#    This program is distributed in the hope that it will be useful,          #
13#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
14#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
15#    GNU General Public License for more details.                             #
16#                                                                             #
17#    You should have received a copy of the GNU General Public License        #
18#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
19#                                                                             #
20###############################################################################
21
22
23class HmmModelError(Exception):
24    pass
25
26
27class HmmModel(object):
28    """Store HMM parameters."""
29    def __init__(self, keys):
30        setattr(self, 'ga', None)
31        setattr(self, 'tc', None)
32        setattr(self, 'nc', None)
33
34        if 'acc' not in keys:
35            setattr(self, 'acc', keys['name'])
36
37        for key, value in keys.items():
38            setattr(self, key, value)
39
40
41class HmmModelParser(object):
42    """Parse HMM file."""
43    def __init__(self, hmmFile):
44        self.hmmFile = open(hmmFile)
45
46    def models(self):
47        """Parse all models from HMM file."""
48        models = {}
49        for model in self.simpleParse():
50            models[model.acc] = model
51
52        return models
53
54    def simpleParse(self):
55        """Parse simplified description of single model from HMM file."""
56        headerKeys = dict()
57        for line in self.hmmFile:
58            if line.startswith("HMMER"):
59                headerKeys['format'] = line.rstrip()
60            elif line.startswith("HMM"):
61                # beginning of the hmm; iterate through till end of model
62                for line in self.hmmFile:
63                    if line.startswith("//"):
64                        yield HmmModel(headerKeys)
65                        break
66            else:
67                # header sections
68                fields = line.rstrip().split(None, 1)
69                if len(fields) != 2:
70                    raise HmmModelError
71                else:
72                    # transform some data based on some of the header tags
73                    if fields[0] == 'ACC' or fields[0] == 'NAME':
74                        headerKeys[fields[0].lower()] = fields[1]
75                    elif fields[0] == "LENG":
76                        headerKeys[fields[0].lower()] = int(fields[1])
77                    elif fields[0] == "GA" or fields[0] == "TC" or fields[0] == "NC":
78                        params = fields[1].split()
79                        if len(params) != 2:
80                            raise HmmModelError
81                        headerKeys[fields[0].lower()] = (float(params[0].replace(';', '')), float(params[1].replace(';', '')))
82                    else:
83                        pass
84
85    def parse(self):
86        """Parse full description of single model from HMM file."""
87        fields = []
88        headerKeys = dict()
89        for line in self.hmmFile:
90            if line.startswith("HMMER"):
91                headerKeys['format'] = line.rstrip()
92            elif line.startswith("HMM"):
93                # beginning of the hmm; iterate through till end of model
94                for line in self.hmmFile:
95                    if line.startswith("//"):
96                        yield HmmModel(headerKeys)
97                        break
98            else:
99                # header sections
100                fields = line.rstrip().split(None, 1)
101                if len(fields) != 2:
102                    raise HmmModelError
103                else:
104                    # transform some data based on some of the header tags
105                    if fields[0] == "LENG" or fields[0] == "NSEQ" or fields[0] == "CKSUM":
106                        headerKeys[fields[0].lower()] = int(fields[1])
107                    elif fields[0] == "RF" or fields[0] == "CS" or fields[0] == "MAP":
108                        if fields[1].lower() == "no":
109                            headerKeys[fields[0].lower()] = False
110                        else:
111                            headerKeys[fields[0].lower()] = True
112                    elif fields[0] == "EFFN":
113                        headerKeys[fields[0].lower()] = float(fields[1])
114                    elif fields[0] == "GA" or fields[0] == "TC" or fields[0] == "NC":
115                        params = fields[1].split()
116                        if len(params) != 2:
117                            raise HmmModelError
118                        headerKeys[fields[0].lower()] = (float(params[0].replace(';', '')), float(params[1].replace(';', '')))
119                    elif fields[0] == "STATS":
120                        params = fields[1].split()
121                        if params[0] != "LOCAL":
122                            raise HmmModelError
123                        if params[1] == "MSV" or params[1] == "VITERBI" or params[1] == "FORWARD":
124                            headerKeys[(fields[0] + "_" + params[0] + "_" + params[1]).lower()] = (float(params[2]), float(params[3]))
125                        else:
126                            print("'" + params[1] + "'")
127                            raise HmmModelError
128                    else:
129                        headerKeys[fields[0].lower()] = fields[1]
130