1###############################################################################
2#
3# hmmer.py - runs HMMER and provides functions for parsing output
4#
5###############################################################################
6#                                                                             #
7#    This program is free software: you can redistribute it and/or modify     #
8#    it under the terms of the GNU General Public License as published by     #
9#    the Free Software Foundation, either version 3 of the License, or        #
10#    (at your option) any later version.                                      #
11#                                                                             #
12#    This program is distributed in the hope that it will be useful,          #
13#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
14#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
15#    GNU General Public License for more details.                             #
16#                                                                             #
17#    You should have received a copy of the GNU General Public License        #
18#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
19#                                                                             #
20###############################################################################
21
22import os
23import sys
24import logging
25import subprocess
26from re import split as re_split
27
28
29class FormatError(BaseException):
30    pass
31
32
33class HMMERError(BaseException):
34    pass
35
36
37class HMMMERModeError(BaseException):
38    pass
39
40
41class HMMERRunner():
42    """Wrapper for running HMMER3."""
43    def __init__(self, mode="dom"):
44        self.logger = logging.getLogger('timestamp')
45
46        # make sure HMMER is installed
47        self.checkForHMMER()
48
49        # set the mode
50        if mode == "dom":
51            self.mode = 'domtblout'
52        elif mode == "tbl":
53            self.mode = 'tblout'
54        elif mode == 'align':
55            self.mode = 'align'
56        elif mode == 'fetch':
57            self.mode = 'fetch'
58        else:
59            raise HMMMERModeError("Mode %s not understood" % mode)
60
61    def search(self, db, query, tableOut, hmmerOut, cmdlineOptions='', bKeepOutput=True):
62        """Run hmmsearch"""
63        # make the output dir and files
64        if self.mode != 'domtblout' and self.mode != 'tblout':
65            raise HMMMERModeError("Mode %s not compatible with search" % self.mode)
66
67        if not bKeepOutput:
68            hmmerOut = '/dev/null'
69
70        cmd = ('hmmsearch --%s %s %s %s %s > %s' % (self.mode, tableOut, cmdlineOptions, db, query, hmmerOut))
71        os.system(cmd)
72
73    def align(self, db, query, outputFile, writeMode='>', outputFormat='PSIBLAST', trim=True):
74        """Run hmmalign"""
75        if self.mode != 'align':
76            raise HMMMERModeError("Mode %s not compatible with align" % self.mode)
77
78        # build up the option string
79        opts = ''
80        if trim:
81            opts += ' --trim '
82
83        # run hmmer
84        cmd = 'hmmalign --allcol %s --outformat %s %s %s %s %s' % (opts, outputFormat, db, query, writeMode, outputFile)
85        rtn = os.system(cmd)
86        if rtn == 256:
87            # assume user has a newer version of HMMER (>= 3.1b1) and the allcol parameter is no longer valid
88            cmd = cmd.replace('--allcol', '')
89            os.system(cmd)
90
91    def fetch(self, db, key, fetchFileName, bKeyFile=False):
92        """Run hmmfetch"""
93        if self.mode != 'fetch':
94            raise HMMMERModeError("Mode %s not compatible with fetch" % self.mode)
95
96        keyFileOpt = ''
97        if bKeyFile:
98            keyFileOpt = '-f'
99
100        os.system('hmmfetch ' + keyFileOpt + ' %s %s > %s' % (db, key, fetchFileName))
101
102    def press(self, hmmModelFile):
103        """Press a HMM file."""
104        os.system('hmmpress %s > /dev/null' % hmmModelFile)
105
106    def index(self, hmmModelFile):
107        """Index a HMM file."""
108        if self.mode != 'fetch':
109            raise HMMMERModeError("Mode %s not compatible with fetch" % self.mode)
110
111        os.system('hmmfetch --index %s > /dev/null' % hmmModelFile)
112
113    def checkForHMMER(self):
114        """Check to see if HMMER is on the system path."""
115        try:
116            subprocess.call(['hmmsearch', '-h'], stdout=open(os.devnull, 'w'), stderr=subprocess.STDOUT)
117        except:
118            self.logger.error("  [Error] Make sure HMMER executables (e.g., hmmsearch, hmmfetch) are on your system path.")
119            sys.exit(1)
120
121
122class HMMERParser():
123    """Parses tabular output."""
124    def __init__(self, fileHandle, mode='dom'):
125        self.handle = fileHandle
126        if mode == 'dom':
127            self.mode = 'domtblout'
128        elif mode == 'tbl':
129            self.mode = 'tblout'
130        else:
131            raise HMMERError("Mode %s not understood, please use 'dom' or 'tbl'" % mode)
132
133    def next(self):
134        """Process each hit in the file."""
135        while 1:
136            if self.mode == 'domtblout':
137                hit = self.readHitsDOM()
138            elif self.mode == 'tblout':
139                hit = self.readHitsTBL()
140            else:
141                raise HMMERError("Mode %s not understood" % self.mode)
142
143            if hit == {}:
144                return None
145            else:
146                return hit
147
148    def readHitsTBL(self):
149        """Process single hit in tblout format."""
150        """
151We expect line to look like:
152NODE_110054_length_1926_cov_24.692627_41_3 -          Ribosomal_S9         PF00380.14   5.9e-48  158.7   0.0   6.7e-48  158.5   0.0   1.0   1   0   0   1   1   1   1 # 1370 # 1756 # 1 # ID=41_3;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None
153        """
154        while (1):
155            line = self.handle.readline().rstrip()
156            try:
157                if line[0] != '#' and len(line) != 0:
158                    dMatch = re_split(r'\s+', line.rstrip())
159                    if len(dMatch) < 19:
160                        raise FormatError("Error processing line:\n%s" % (line))
161                    refined_match = dMatch[0:18] + [" ".join([str(i) for i in dMatch[18:]])]
162                    return HmmerHitTBL(refined_match)
163            except IndexError:
164                return {}
165
166    def readHitsDOM(self):
167        """Process single hit in domtblout format."""
168        """
169We expect the line to look like:
170NODE_925902_length_6780_cov_18.428171_754_2 -            399 PGK                  PF00162.14   384  2.2e-164  543.7   0.1   1   1  1.3e-167  2.5e-164  543.5   0.1     1   384     9   386     9   386 1.00 # 1767 # 2963 # -1 # ID=754_2;partial=00;start_type=ATG;rbs_motif=AGGA;rbs_spacer=5-10bp
171        """
172        while (1):
173            line = self.handle.readline().rstrip()
174            try:
175                if line[0] != '#' and len(line) != 0:
176                    dMatch = re_split(r'\s+', line.rstrip())
177                    if len(dMatch) < 23:
178                        raise FormatError("Error processing line:\n%s" % (line))
179                    refined_match = dMatch[0:22] + [" ".join([str(i) for i in dMatch[22:]])]
180                    return HmmerHitDOM(refined_match)
181            except IndexError:
182                return {}
183
184
185class HmmerHitTBL():
186    """Encapsulate a HMMER hit given in tblout format."""
187    def __init__(self, values):
188        if len(values) == 19:
189            self.target_name = values[0]
190            self.target_accession = values[1]
191            self.query_name = values[2]
192
193            self.query_accession = values[3]
194            if self.query_accession == '-':
195                self.query_accession = self.query_name
196
197            self.full_e_value = float(values[4])
198            self.full_score = float(values[5])
199            self.full_bias = float(values[6])
200            self.best_e_value = float(values[7])
201            self.best_score = float(values[8])
202            self.best_bias = float(values[9])
203            self.exp = float(values[10])
204            self.reg = int(values[11])
205            self.clu = int(values[12])
206            self.ov = int(values[13])
207            self.env = int(values[14])
208            self.dom = int(values[15])
209            self.rep = int(values[16])
210            self.inc = int(values[17])
211            self.target_description = values[18]
212
213    def __str__(self):
214        return "\t".join([self.target_name,
215                          self.target_accession,
216                          self.query_name,
217                          self.query_accession,
218                          str(self.full_e_value),
219                          str(self.full_score),
220                          str(self.full_bias),
221                          str(self.best_e_value),
222                          str(self.best_score),
223                          str(self.best_bias),
224                          str(self.exp),
225                          str(self.reg),
226                          str(self.clu),
227                          str(self.ov),
228                          str(self.env),
229                          str(self.dom),
230                          str(self.rep),
231                          str(self.inc),
232                          self.target_description
233                          ]
234                         )
235
236
237class HmmerHitDOM():
238    """Encapsulate a HMMER hit given in domtblout format."""
239    def __init__(self, values):
240        if len(values) == 23:
241            self.target_name = values[0]
242            self.target_accession = values[1]
243            self.target_length = int(values[2])
244            self.query_name = values[3]
245
246            self.query_accession = values[4]
247            if self.query_accession == '-':
248                self.query_accession = self.query_name
249
250            self.query_length = int(values[5])
251            self.full_e_value = float(values[6])
252            self.full_score = float(values[7])
253            self.full_bias = float(values[8])
254            self.dom = int(values[9])
255            self.ndom = int(values[10])
256            self.c_evalue = float(values[11])
257            self.i_evalue = float(values[12])
258            self.dom_score = float(values[13])
259            self.dom_bias = float(values[14])
260            self.hmm_from = int(values[15])
261            self.hmm_to = int(values[16])
262            self.ali_from = int(values[17])
263            self.ali_to = int(values[18])
264            self.env_from = int(values[19])
265            self.env_to = int(values[20])
266            self.acc = float(values[21])
267            self.target_description = values[22]
268
269    def __str__(self):
270        return "\t".join([self.target_name,
271                          self.target_accession,
272                          str(self.target_length),
273                          self.query_name,
274                          self.query_accession,
275                          str(self.query_length),
276                          str(self.full_e_value),
277                          str(self.full_score),
278                          str(self.full_bias),
279                          str(self.dom),
280                          str(self.ndom),
281                          str(self.c_evalue),
282                          str(self.i_evalue),
283                          str(self.dom_score),
284                          str(self.dom_bias),
285                          str(self.hmm_from),
286                          str(self.hmm_to),
287                          str(self.ali_from),
288                          str(self.ali_to),
289                          str(self.env_from),
290                          str(self.env_to),
291                          str(self.acc),
292                          self.target_description]
293                         )
294