1# Copyright 2018 by Ariel Aptekmann.
2# All rights reserved.
3#
4# This file is part of the Biopython distribution and governed by your
5# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
6# Please see the LICENSE file that should have been included as part of this
7# package.
8"""Module for the support of MEME minimal motif format."""
9
10from Bio import motifs
11
12
13def read(handle):
14    """Parse the text output of the MEME program into a meme.Record object.
15
16    Examples
17    --------
18    >>> from Bio.motifs import minimal
19    >>> with open("motifs/meme.out") as f:
20    ...     record = minimal.read(f)
21    ...
22    >>> for motif in record:
23    ...     print(motif.name, motif.evalue)
24    ...
25    1 1.1e-22
26
27    You can access individual motifs in the record by their index or find a motif
28    by its name:
29
30    >>> from Bio import motifs
31    >>> with open("motifs/minimal_test.meme") as f:
32    ...     record = motifs.parse(f, 'minimal')
33    ...
34    >>> motif = record[0]
35    >>> print(motif.name)
36    KRP
37    >>> motif = record['IFXA']
38    >>> print(motif.name)
39    IFXA
40
41    This function wont retrieve instances, as there are none in minimal meme format.
42
43    """
44    motif_number = 0
45    record = Record()
46    _read_version(record, handle)
47    _read_alphabet(record, handle)
48    _read_background(record, handle)
49
50    while True:
51        for line in handle:
52            if line.startswith("MOTIF"):
53                break
54        else:
55            return record
56        name = line.split()[1]
57        motif_number += 1
58        length, num_occurrences, evalue = _read_motif_statistics(handle)
59        counts = _read_lpm(handle, num_occurrences)
60        # {'A': 0.25, 'C': 0.25, 'T': 0.25, 'G': 0.25}
61        motif = motifs.Motif(alphabet=record.alphabet, counts=counts)
62        motif.background = record.background
63        motif.length = length
64        motif.num_occurrences = num_occurrences
65        motif.evalue = evalue
66        motif.name = name
67        record.append(motif)
68        assert len(record) == motif_number
69    return record
70
71
72class Record(list):
73    """Class for holding the results of a minimal MEME run."""
74
75    def __init__(self):
76        """Initialize record class values."""
77        self.version = ""
78        self.datafile = ""
79        self.command = ""
80        self.alphabet = None
81        self.background = {}
82        self.sequences = []
83
84    def __getitem__(self, key):
85        """Return the motif of index key."""
86        if isinstance(key, str):
87            for motif in self:
88                if motif.name == key:
89                    return motif
90        else:
91            return list.__getitem__(self, key)
92
93
94# Everything below is private
95
96
97def _read_background(record, handle):
98    """Read background letter frequencies (PRIVATE)."""
99    for line in handle:
100        if line.startswith("Background letter frequencies"):
101            break
102    else:
103        raise ValueError(
104            "Improper input file. File should contain a line starting background frequencies."
105        )
106    try:
107        line = next(handle)
108    except StopIteration:
109        raise ValueError(
110            "Unexpected end of stream: Expected to find line starting background frequencies."
111        )
112    line = line.strip()
113    ls = line.split()
114    A, C, G, T = float(ls[1]), float(ls[3]), float(ls[5]), float(ls[7])
115    record.background = {"A": A, "C": C, "G": G, "T": T}
116
117
118def _read_version(record, handle):
119    """Read MEME version (PRIVATE)."""
120    for line in handle:
121        if line.startswith("MEME version"):
122            break
123    else:
124        raise ValueError(
125            "Improper input file. File should contain a line starting MEME version."
126        )
127    line = line.strip()
128    ls = line.split()
129    record.version = ls[2]
130
131
132def _read_alphabet(record, handle):
133    """Read alphabet (PRIVATE)."""
134    for line in handle:
135        if line.startswith("ALPHABET"):
136            break
137    else:
138        raise ValueError(
139            "Unexpected end of stream: Expected to find line starting with 'ALPHABET'"
140        )
141    if not line.startswith("ALPHABET= "):
142        raise ValueError("Line does not start with 'ALPHABET':\n%s" % line)
143    line = line.strip().replace("ALPHABET= ", "")
144    if line == "ACGT":
145        al = "ACGT"
146    else:
147        al = "ACDEFGHIKLMNPQRSTVWY"
148    record.alphabet = al
149
150
151def _read_lpm(handle, num_occurrences):
152    """Read letter probability matrix (PRIVATE)."""
153    counts = [[], [], [], []]
154    for line in handle:
155        freqs = line.split()
156        if len(freqs) != 4:
157            break
158        counts[0].append(round(float(freqs[0]) * num_occurrences))
159        counts[1].append(round(float(freqs[1]) * num_occurrences))
160        counts[2].append(round(float(freqs[2]) * num_occurrences))
161        counts[3].append(round(float(freqs[3]) * num_occurrences))
162    c = {}
163    c["A"] = counts[0]
164    c["C"] = counts[1]
165    c["G"] = counts[2]
166    c["T"] = counts[3]
167    return c
168
169
170def _read_motif_statistics(handle):
171    """Read motif statistics (PRIVATE)."""
172    # minimal :
173    #      letter-probability matrix: alength= 4 w= 19 nsites= 17 E= 4.1e-009
174    for line in handle:
175        if line.startswith("letter-probability matrix:"):
176            break
177    num_occurrences = int(line.split("nsites=")[1].split()[0])
178    length = int(line.split("w=")[1].split()[0])
179    evalue = float(line.split("E=")[1].split()[0])
180    return length, num_occurrences, evalue
181
182
183def _read_motif_name(handle):
184    """Read motif name (PRIVATE)."""
185    for line in handle:
186        if "sorted by position p-value" in line:
187            break
188    else:
189        raise ValueError("Unexpected end of stream: Failed to find motif name")
190    line = line.strip()
191    words = line.split()
192    name = " ".join(words[0:2])
193    return name
194