1############################################################################### 2# 3# hmmerModelParser.py - parse a HMMER model file 4# 5############################################################################### 6# # 7# This program is free software: you can redistribute it and/or modify # 8# it under the terms of the GNU General Public License as published by # 9# the Free Software Foundation, either version 3 of the License, or # 10# (at your option) any later version. # 11# # 12# This program is distributed in the hope that it will be useful, # 13# but WITHOUT ANY WARRANTY; without even the implied warranty of # 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15# GNU General Public License for more details. # 16# # 17# You should have received a copy of the GNU General Public License # 18# along with this program. If not, see <http://www.gnu.org/licenses/>. # 19# # 20############################################################################### 21 22 23class HmmModelError(Exception): 24 pass 25 26 27class HmmModel(object): 28 """Store HMM parameters.""" 29 def __init__(self, keys): 30 setattr(self, 'ga', None) 31 setattr(self, 'tc', None) 32 setattr(self, 'nc', None) 33 34 if 'acc' not in keys: 35 setattr(self, 'acc', keys['name']) 36 37 for key, value in keys.items(): 38 setattr(self, key, value) 39 40 41class HmmModelParser(object): 42 """Parse HMM file.""" 43 def __init__(self, hmmFile): 44 self.hmmFile = open(hmmFile) 45 46 def models(self): 47 """Parse all models from HMM file.""" 48 models = {} 49 for model in self.simpleParse(): 50 models[model.acc] = model 51 52 return models 53 54 def simpleParse(self): 55 """Parse simplified description of single model from HMM file.""" 56 headerKeys = dict() 57 for line in self.hmmFile: 58 if line.startswith("HMMER"): 59 headerKeys['format'] = line.rstrip() 60 elif line.startswith("HMM"): 61 # beginning of the hmm; iterate through till end of model 62 for line in self.hmmFile: 63 if line.startswith("//"): 64 yield HmmModel(headerKeys) 65 break 66 else: 67 # header sections 68 fields = line.rstrip().split(None, 1) 69 if len(fields) != 2: 70 raise HmmModelError 71 else: 72 # transform some data based on some of the header tags 73 if fields[0] == 'ACC' or fields[0] == 'NAME': 74 headerKeys[fields[0].lower()] = fields[1] 75 elif fields[0] == "LENG": 76 headerKeys[fields[0].lower()] = int(fields[1]) 77 elif fields[0] == "GA" or fields[0] == "TC" or fields[0] == "NC": 78 params = fields[1].split() 79 if len(params) != 2: 80 raise HmmModelError 81 headerKeys[fields[0].lower()] = (float(params[0].replace(';', '')), float(params[1].replace(';', ''))) 82 else: 83 pass 84 85 def parse(self): 86 """Parse full description of single model from HMM file.""" 87 fields = [] 88 headerKeys = dict() 89 for line in self.hmmFile: 90 if line.startswith("HMMER"): 91 headerKeys['format'] = line.rstrip() 92 elif line.startswith("HMM"): 93 # beginning of the hmm; iterate through till end of model 94 for line in self.hmmFile: 95 if line.startswith("//"): 96 yield HmmModel(headerKeys) 97 break 98 else: 99 # header sections 100 fields = line.rstrip().split(None, 1) 101 if len(fields) != 2: 102 raise HmmModelError 103 else: 104 # transform some data based on some of the header tags 105 if fields[0] == "LENG" or fields[0] == "NSEQ" or fields[0] == "CKSUM": 106 headerKeys[fields[0].lower()] = int(fields[1]) 107 elif fields[0] == "RF" or fields[0] == "CS" or fields[0] == "MAP": 108 if fields[1].lower() == "no": 109 headerKeys[fields[0].lower()] = False 110 else: 111 headerKeys[fields[0].lower()] = True 112 elif fields[0] == "EFFN": 113 headerKeys[fields[0].lower()] = float(fields[1]) 114 elif fields[0] == "GA" or fields[0] == "TC" or fields[0] == "NC": 115 params = fields[1].split() 116 if len(params) != 2: 117 raise HmmModelError 118 headerKeys[fields[0].lower()] = (float(params[0].replace(';', '')), float(params[1].replace(';', ''))) 119 elif fields[0] == "STATS": 120 params = fields[1].split() 121 if params[0] != "LOCAL": 122 raise HmmModelError 123 if params[1] == "MSV" or params[1] == "VITERBI" or params[1] == "FORWARD": 124 headerKeys[(fields[0] + "_" + params[0] + "_" + params[1]).lower()] = (float(params[2]), float(params[3])) 125 else: 126 print("'" + params[1] + "'") 127 raise HmmModelError 128 else: 129 headerKeys[fields[0].lower()] = fields[1] 130