1# Copyright 2018 by Ariel Aptekmann. 2# All rights reserved. 3# 4# This file is part of the Biopython distribution and governed by your 5# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". 6# Please see the LICENSE file that should have been included as part of this 7# package. 8"""Module for the support of MEME minimal motif format.""" 9 10from Bio import motifs 11 12 13def read(handle): 14 """Parse the text output of the MEME program into a meme.Record object. 15 16 Examples 17 -------- 18 >>> from Bio.motifs import minimal 19 >>> with open("motifs/meme.out") as f: 20 ... record = minimal.read(f) 21 ... 22 >>> for motif in record: 23 ... print(motif.name, motif.evalue) 24 ... 25 1 1.1e-22 26 27 You can access individual motifs in the record by their index or find a motif 28 by its name: 29 30 >>> from Bio import motifs 31 >>> with open("motifs/minimal_test.meme") as f: 32 ... record = motifs.parse(f, 'minimal') 33 ... 34 >>> motif = record[0] 35 >>> print(motif.name) 36 KRP 37 >>> motif = record['IFXA'] 38 >>> print(motif.name) 39 IFXA 40 41 This function wont retrieve instances, as there are none in minimal meme format. 42 43 """ 44 motif_number = 0 45 record = Record() 46 _read_version(record, handle) 47 _read_alphabet(record, handle) 48 _read_background(record, handle) 49 50 while True: 51 for line in handle: 52 if line.startswith("MOTIF"): 53 break 54 else: 55 return record 56 name = line.split()[1] 57 motif_number += 1 58 length, num_occurrences, evalue = _read_motif_statistics(handle) 59 counts = _read_lpm(handle, num_occurrences) 60 # {'A': 0.25, 'C': 0.25, 'T': 0.25, 'G': 0.25} 61 motif = motifs.Motif(alphabet=record.alphabet, counts=counts) 62 motif.background = record.background 63 motif.length = length 64 motif.num_occurrences = num_occurrences 65 motif.evalue = evalue 66 motif.name = name 67 record.append(motif) 68 assert len(record) == motif_number 69 return record 70 71 72class Record(list): 73 """Class for holding the results of a minimal MEME run.""" 74 75 def __init__(self): 76 """Initialize record class values.""" 77 self.version = "" 78 self.datafile = "" 79 self.command = "" 80 self.alphabet = None 81 self.background = {} 82 self.sequences = [] 83 84 def __getitem__(self, key): 85 """Return the motif of index key.""" 86 if isinstance(key, str): 87 for motif in self: 88 if motif.name == key: 89 return motif 90 else: 91 return list.__getitem__(self, key) 92 93 94# Everything below is private 95 96 97def _read_background(record, handle): 98 """Read background letter frequencies (PRIVATE).""" 99 for line in handle: 100 if line.startswith("Background letter frequencies"): 101 break 102 else: 103 raise ValueError( 104 "Improper input file. File should contain a line starting background frequencies." 105 ) 106 try: 107 line = next(handle) 108 except StopIteration: 109 raise ValueError( 110 "Unexpected end of stream: Expected to find line starting background frequencies." 111 ) 112 line = line.strip() 113 ls = line.split() 114 A, C, G, T = float(ls[1]), float(ls[3]), float(ls[5]), float(ls[7]) 115 record.background = {"A": A, "C": C, "G": G, "T": T} 116 117 118def _read_version(record, handle): 119 """Read MEME version (PRIVATE).""" 120 for line in handle: 121 if line.startswith("MEME version"): 122 break 123 else: 124 raise ValueError( 125 "Improper input file. File should contain a line starting MEME version." 126 ) 127 line = line.strip() 128 ls = line.split() 129 record.version = ls[2] 130 131 132def _read_alphabet(record, handle): 133 """Read alphabet (PRIVATE).""" 134 for line in handle: 135 if line.startswith("ALPHABET"): 136 break 137 else: 138 raise ValueError( 139 "Unexpected end of stream: Expected to find line starting with 'ALPHABET'" 140 ) 141 if not line.startswith("ALPHABET= "): 142 raise ValueError("Line does not start with 'ALPHABET':\n%s" % line) 143 line = line.strip().replace("ALPHABET= ", "") 144 if line == "ACGT": 145 al = "ACGT" 146 else: 147 al = "ACDEFGHIKLMNPQRSTVWY" 148 record.alphabet = al 149 150 151def _read_lpm(handle, num_occurrences): 152 """Read letter probability matrix (PRIVATE).""" 153 counts = [[], [], [], []] 154 for line in handle: 155 freqs = line.split() 156 if len(freqs) != 4: 157 break 158 counts[0].append(round(float(freqs[0]) * num_occurrences)) 159 counts[1].append(round(float(freqs[1]) * num_occurrences)) 160 counts[2].append(round(float(freqs[2]) * num_occurrences)) 161 counts[3].append(round(float(freqs[3]) * num_occurrences)) 162 c = {} 163 c["A"] = counts[0] 164 c["C"] = counts[1] 165 c["G"] = counts[2] 166 c["T"] = counts[3] 167 return c 168 169 170def _read_motif_statistics(handle): 171 """Read motif statistics (PRIVATE).""" 172 # minimal : 173 # letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009 174 for line in handle: 175 if line.startswith("letter-probability matrix:"): 176 break 177 num_occurrences = int(line.split("nsites=")[1].split()[0]) 178 length = int(line.split("w=")[1].split()[0]) 179 evalue = float(line.split("E=")[1].split()[0]) 180 return length, num_occurrences, evalue 181 182 183def _read_motif_name(handle): 184 """Read motif name (PRIVATE).""" 185 for line in handle: 186 if "sorted by position p-value" in line: 187 break 188 else: 189 raise ValueError("Unexpected end of stream: Failed to find motif name") 190 line = line.strip() 191 words = line.split() 192 name = " ".join(words[0:2]) 193 return name 194