1# Copyright 2008 Michiel de Hoon
2#
3# This file is part of the Biopython distribution and governed by your
4# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
5# Please see the LICENSE file that should have been included as part of this
6# package.
7
8"""Tests for Medline module."""
9
10import unittest
11
12from Bio import Medline
13
14
15class TestMedline(unittest.TestCase):
16    def test_read(self):
17        with open("Medline/pubmed_result1.txt") as handle:
18            record = Medline.read(handle)
19        self.assertEqual(record["PMID"], "12230038")
20        self.assertEqual(record["OWN"], "NLM")
21        self.assertEqual(record["STAT"], "MEDLINE")
22        self.assertEqual(record["DA"], "20020916")
23        self.assertEqual(record["DCOM"], "20030606")
24        self.assertEqual(record["LR"], "20041117")
25        self.assertEqual(record["PUBM"], "Print")
26        self.assertEqual(record["IS"], "1467-5463 (Print)")
27        self.assertEqual(record["VI"], "3")
28        self.assertEqual(record["IP"], "3")
29        self.assertEqual(record["DP"], "2002 Sep")
30        self.assertEqual(record["TI"], "The Bio* toolkits--a brief overview.")
31        self.assertEqual(record["PG"], "296-302")
32        self.assertEqual(
33            record["AB"],
34            "Bioinformatics research is often difficult to do with commercial software. The Open Source BioPerl, BioPython and Biojava projects provide toolkits with multiple functionality that make it easier to create customised pipelines or analysis. This review briefly compares the quirks of the underlying languages and the functionality, documentation, utility and relative advantages of the Bio counterparts, particularly from the point of view of the beginning biologist programmer.",
35        )
36        self.assertEqual(
37            record["AD"], ["tacg Informatics, Irvine, CA 92612, USA. hjm@tacgi.com"]
38        )
39        self.assertEqual(record["FAU"], ["Mangalam, Harry"])
40        self.assertEqual(record["AU"], ["Mangalam H"])
41        self.assertEqual(record["LA"], ["eng"])
42        self.assertEqual(record["PT"], ["Journal Article"])
43        self.assertEqual(record["PL"], "England")
44        self.assertEqual(record["TA"], "Brief Bioinform")
45        self.assertEqual(record["JT"], "Briefings in bioinformatics")
46        self.assertEqual(record["JID"], "100912837")
47        self.assertEqual(record["SB"], "IM")
48        self.assertEqual(
49            record["MH"],
50            [
51                "*Computational Biology",
52                "Computer Systems",
53                "Humans",
54                "Internet",
55                "*Programming Languages",
56                "*Software",
57                "User-Computer Interface",
58            ],
59        )
60        self.assertEqual(record["EDAT"], "2002/09/17 10:00")
61        self.assertEqual(record["MHDA"], "2003/06/07 05:00")
62        self.assertEqual(record["PST"], "ppublish")
63        self.assertEqual(record["SO"], "Brief Bioinform. 2002 Sep;3(3):296-302.")
64
65    def test_parse(self):
66        with open("Medline/pubmed_result2.txt") as handle:
67            records = Medline.parse(handle)
68            record = next(records)
69            self.assertEqual(record["PMID"], "16403221")
70            self.assertEqual(record["OWN"], "NLM")
71            self.assertEqual(record["STAT"], "MEDLINE")
72            self.assertEqual(record["DA"], "20060220")
73            self.assertEqual(record["DCOM"], "20060314")
74            self.assertEqual(record["PUBM"], "Electronic")
75            self.assertEqual(record["IS"], "1471-2105 (Electronic)")
76            self.assertEqual(record["VI"], "7")
77            self.assertEqual(record["DP"], "2006")
78            self.assertEqual(
79                record["TI"],
80                "A high level interface to SCOP and ASTRAL implemented in python.",
81            )
82            self.assertEqual(record["PG"], "10")
83            self.assertEqual(
84                record["AB"],
85                "BACKGROUND: Benchmarking algorithms in structural bioinformatics often involves the construction of datasets of proteins with given sequence and structural properties. The SCOP database is a manually curated structural classification which groups together proteins on the basis of structural similarity. The ASTRAL compendium provides non redundant subsets of SCOP domains on the basis of sequence similarity such that no two domains in a given subset share more than a defined degree of sequence similarity. Taken together these two resources provide a 'ground truth' for assessing structural bioinformatics algorithms. We present a small and easy to use API written in python to enable construction of datasets from these resources. RESULTS: We have designed a set of python modules to provide an abstraction of the SCOP and ASTRAL databases. The modules are designed to work as part of the Biopython distribution. Python users can now manipulate and use the SCOP hierarchy from within python programs, and use ASTRAL to return sequences of domains in SCOP, as well as clustered representations of SCOP from ASTRAL. CONCLUSION: The modules make the analysis and generation of datasets for use in structural genomics easier and more principled.",
86            )
87            self.assertEqual(
88                record["AD"],
89                [
90                    "Bioinformatics, Institute of Cell and Molecular Science, School of Medicine and Dentistry, Queen Mary, University of London, London EC1 6BQ, UK. j.a.casbon@qmul.ac.uk"
91                ],
92            )
93            self.assertEqual(
94                record["FAU"],
95                ["Casbon, James A", "Crooks, Gavin E", "Saqi, Mansoor A S"],
96            )
97            self.assertEqual(record["AU"], ["Casbon JA", "Crooks GE", "Saqi MA"])
98            self.assertEqual(record["LA"], ["eng"])
99            self.assertEqual(record["PT"], ["Evaluation Studies", "Journal Article"])
100            self.assertEqual(record["DEP"], "20060110")
101            self.assertEqual(record["PL"], "England")
102            self.assertEqual(record["TA"], "BMC Bioinformatics")
103            self.assertEqual(record["JT"], "BMC bioinformatics")
104            self.assertEqual(record["JID"], "100965194")
105            self.assertEqual(record["SB"], "IM")
106            self.assertEqual(
107                record["MH"],
108                [
109                    "*Database Management Systems",
110                    "*Databases, Protein",
111                    "Information Storage and Retrieval/*methods",
112                    "Programming Languages",
113                    "Sequence Alignment/*methods",
114                    "Sequence Analysis, Protein/*methods",
115                    "Sequence Homology, Amino Acid",
116                    "*Software",
117                    "*User-Computer Interface",
118                ],
119            )
120            self.assertEqual(record["PMC"], "PMC1373603")
121            self.assertEqual(record["EDAT"], "2006/01/13 09:00")
122            self.assertEqual(record["MHDA"], "2006/03/15 09:00")
123            self.assertEqual(
124                record["PHST"],
125                [
126                    "2005/06/17 [received]",
127                    "2006/01/10 [accepted]",
128                    "2006/01/10 [aheadofprint]",
129                ],
130            )
131            self.assertEqual(
132                record["AID"], ["1471-2105-7-10 [pii]", "10.1186/1471-2105-7-10 [doi]"]
133            )
134            self.assertEqual(record["PST"], "epublish")
135            self.assertEqual(record["SO"], "BMC Bioinformatics. 2006 Jan 10;7:10.")
136            record = next(records)
137            self.assertEqual(record["PMID"], "16377612")
138            self.assertEqual(record["OWN"], "NLM")
139            self.assertEqual(record["STAT"], "MEDLINE")
140            self.assertEqual(record["DA"], "20060223")
141            self.assertEqual(record["DCOM"], "20060418")
142            self.assertEqual(record["LR"], "20061115")
143            self.assertEqual(record["PUBM"], "Print-Electronic")
144            self.assertEqual(record["IS"], "1367-4803 (Print)")
145            self.assertEqual(record["VI"], "22")
146            self.assertEqual(record["IP"], "5")
147            self.assertEqual(record["DP"], "2006 Mar 1")
148            self.assertEqual(
149                record["TI"],
150                "GenomeDiagram: a python package for the visualization of large-scale genomic data.",
151            )
152            self.assertEqual(record["PG"], "616-7")
153            self.assertEqual(
154                record["AB"],
155                "SUMMARY: We present GenomeDiagram, a flexible, open-source Python module for the visualization of large-scale genomic, comparative genomic and other data with reference to a single chromosome or other biological sequence. GenomeDiagram may be used to generate publication-quality vector graphics, rastered images and in-line streamed graphics for webpages. The package integrates with datatypes from the BioPython project, and is available for Windows, Linux and Mac OS X systems. AVAILABILITY: GenomeDiagram is freely available as source code (under GNU Public License) at http://bioinf.scri.ac.uk/lp/programs.html, and requires Python 2.3 or higher, and recent versions of the ReportLab and BioPython packages. SUPPLEMENTARY INFORMATION: A user manual, example code and images are available at http://bioinf.scri.ac.uk/lp/programs.html.",
156            )
157            self.assertEqual(
158                record["AD"],
159                [
160                    "Plant Pathogen Programme, Scottish Crop Research Institute, Invergowrie, Dundee DD2 5DA, Scotland, UK. lpritc@scri.ac.uk"
161                ],
162            )
163            self.assertEqual(
164                record["FAU"],
165                [
166                    "Pritchard, Leighton",
167                    "White, Jennifer A",
168                    "Birch, Paul R J",
169                    "Toth, Ian K",
170                ],
171            )
172            self.assertEqual(
173                record["AU"], ["Pritchard L", "White JA", "Birch PR", "Toth IK"]
174            )
175            self.assertEqual(record["LA"], ["eng"])
176            self.assertEqual(
177                record["PT"], ["Journal Article", "Research Support, Non-U.S. Gov't"]
178            )
179            self.assertEqual(record["DEP"], "20051223")
180            self.assertEqual(record["PL"], "England")
181            self.assertEqual(record["TA"], "Bioinformatics")
182            self.assertEqual(record["JT"], "Bioinformatics (Oxford, England)")
183            self.assertEqual(record["JID"], "9808944")
184            self.assertEqual(record["SB"], "IM")
185            self.assertEqual(
186                record["MH"],
187                [
188                    "Chromosome Mapping/*methods",
189                    "*Computer Graphics",
190                    "*Database Management Systems",
191                    "*Databases, Genetic",
192                    "Information Storage and Retrieval/methods",
193                    "*Programming Languages",
194                    "*Software",
195                    "*User-Computer Interface",
196                ],
197            )
198            self.assertEqual(record["EDAT"], "2005/12/27 09:00")
199            self.assertEqual(record["MHDA"], "2006/04/19 09:00")
200            self.assertEqual(record["PHST"], ["2005/12/23 [aheadofprint]"])
201            self.assertEqual(
202                record["AID"], ["btk021 [pii]", "10.1093/bioinformatics/btk021 [doi]"]
203            )
204            self.assertEqual(record["PST"], "ppublish")
205            self.assertEqual(
206                record["SO"],
207                "Bioinformatics. 2006 Mar 1;22(5):616-7. Epub 2005 Dec 23.",
208            )
209            record = next(records)
210            self.assertEqual(record["PMID"], "14871861")
211            self.assertEqual(record["OWN"], "NLM")
212            self.assertEqual(record["STAT"], "MEDLINE")
213            self.assertEqual(record["DA"], "20040611")
214            self.assertEqual(record["DCOM"], "20050104")
215            self.assertEqual(record["LR"], "20061115")
216            self.assertEqual(record["PUBM"], "Print-Electronic")
217            self.assertEqual(record["IS"], "1367-4803 (Print)")
218            self.assertEqual(record["VI"], "20")
219            self.assertEqual(record["IP"], "9")
220            self.assertEqual(record["DP"], "2004 Jun 12")
221            self.assertEqual(record["TI"], "Open source clustering software.")
222            self.assertEqual(record["PG"], "1453-4")
223            self.assertEqual(
224                record["AB"],
225                "SUMMARY: We have implemented k-means clustering, hierarchical clustering and self-organizing maps in a single multipurpose open-source library of C routines, callable from other C and C++ programs. Using this library, we have created an improved version of Michael Eisen's well-known Cluster program for Windows, Mac OS X and Linux/Unix. In addition, we generated a Python and a Perl interface to the C Clustering Library, thereby combining the flexibility of a scripting language with the speed of C. AVAILABILITY: The C Clustering Library and the corresponding Python C extension module Pycluster were released under the Python License, while the Perl module Algorithm::Cluster was released under the Artistic License. The GUI code Cluster 3.0 for Windows, Macintosh and Linux/Unix, as well as the corresponding command-line program, were released under the same license as the original Cluster code. The complete source code is available at http://bonsai.ims.u-tokyo.ac.jp/mdehoon/software/cluster. Alternatively, Algorithm::Cluster can be downloaded from CPAN, while Pycluster is also available as part of the Biopython distribution.",
226            )
227            self.assertEqual(
228                record["AD"],
229                [
230                    "Human Genome Center, Institute of Medical Science, University of Tokyo, 4-6-1 Shirokanedai, Minato-ku, Tokyo, 108-8639 Japan. mdehoon@ims.u-tokyo.ac.jp"
231                ],
232            )
233            self.assertEqual(
234                record["FAU"], ["de Hoon, M J L", "Imoto, S", "Nolan, J", "Miyano, S"]
235            )
236            self.assertEqual(
237                record["AU"], ["de Hoon MJ", "Imoto S", "Nolan J", "Miyano S"]
238            )
239            self.assertEqual(record["LA"], ["eng"])
240            self.assertEqual(
241                record["PT"],
242                [
243                    "Comparative Study",
244                    "Evaluation Studies",
245                    "Journal Article",
246                    "Validation Studies",
247                ],
248            )
249            self.assertEqual(record["DEP"], "20040210")
250            self.assertEqual(record["PL"], "England")
251            self.assertEqual(record["TA"], "Bioinformatics")
252            self.assertEqual(record["JT"], "Bioinformatics (Oxford, England)")
253            self.assertEqual(record["JID"], "9808944")
254            self.assertEqual(record["SB"], "IM")
255            self.assertEqual(
256                record["MH"],
257                [
258                    "*Algorithms",
259                    "*Cluster Analysis",
260                    "Gene Expression Profiling/*methods",
261                    "Pattern Recognition, Automated/methods",
262                    "*Programming Languages",
263                    "Sequence Alignment/*methods",
264                    "Sequence Analysis, DNA/*methods",
265                    "*Software",
266                ],
267            )
268            self.assertEqual(record["EDAT"], "2004/02/12 05:00")
269            self.assertEqual(record["MHDA"], "2005/01/05 09:00")
270            self.assertEqual(record["PHST"], ["2004/02/10 [aheadofprint]"])
271            self.assertEqual(
272                record["AID"], ["10.1093/bioinformatics/bth078 [doi]", "bth078 [pii]"]
273            )
274            self.assertEqual(record["PST"], "ppublish")
275            self.assertEqual(
276                record["SO"],
277                "Bioinformatics. 2004 Jun 12;20(9):1453-4. Epub 2004 Feb 10.",
278            )
279            record = next(records)
280            self.assertEqual(record["PMID"], "14630660")
281            self.assertEqual(record["OWN"], "NLM")
282            self.assertEqual(record["STAT"], "MEDLINE")
283            self.assertEqual(record["DA"], "20031121")
284            self.assertEqual(record["DCOM"], "20040722")
285            self.assertEqual(record["LR"], "20061115")
286            self.assertEqual(record["PUBM"], "Print")
287            self.assertEqual(record["IS"], "1367-4803 (Print)")
288            self.assertEqual(record["VI"], "19")
289            self.assertEqual(record["IP"], "17")
290            self.assertEqual(record["DP"], "2003 Nov 22")
291            self.assertEqual(
292                record["TI"],
293                "PDB file parser and structure class implemented in Python.",
294            )
295            self.assertEqual(record["PG"], "2308-10")
296            self.assertEqual(
297                record["AB"],
298                "The biopython project provides a set of bioinformatics tools implemented in Python. Recently, biopython was extended with a set of modules that deal with macromolecular structure. Biopython now contains a parser for PDB files that makes the atomic information available in an easy-to-use but powerful data structure. The parser and data structure deal with features that are often left out or handled inadequately by other packages, e.g. atom and residue disorder (if point mutants are present in the crystal), anisotropic B factors, multiple models and insertion codes. In addition, the parser performs some sanity checking to detect obvious errors. AVAILABILITY: The Biopython distribution (including source code and documentation) is freely available (under the Biopython license) from http://www.biopython.org",
299            )
300            self.assertEqual(
301                record["AD"],
302                [
303                    "Department of Cellular and Molecular Interactions, Vlaams Interuniversitair Instituut voor Biotechnologie and Computational Modeling Lab, Department of Computer Science, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium. thamelry@vub.ac.be"
304                ],
305            )
306            self.assertEqual(record["FAU"], ["Hamelryck, Thomas", "Manderick, Bernard"])
307            self.assertEqual(record["AU"], ["Hamelryck T", "Manderick B"])
308            self.assertEqual(record["LA"], ["eng"])
309            self.assertEqual(
310                record["PT"],
311                [
312                    "Comparative Study",
313                    "Evaluation Studies",
314                    "Journal Article",
315                    "Research Support, Non-U.S. Gov't",
316                    "Validation Studies",
317                ],
318            )
319            self.assertEqual(record["PL"], "England")
320            self.assertEqual(record["TA"], "Bioinformatics")
321            self.assertEqual(record["JT"], "Bioinformatics (Oxford, England)")
322            self.assertEqual(record["JID"], "9808944")
323            self.assertEqual(record["RN"], ["0 (Macromolecular Substances)"])
324            self.assertEqual(record["SB"], "IM")
325            self.assertEqual(
326                record["MH"],
327                [
328                    "Computer Simulation",
329                    "Database Management Systems/*standards",
330                    "*Databases, Protein",
331                    "Information Storage and Retrieval/*methods/*standards",
332                    "Macromolecular Substances",
333                    "*Models, Molecular",
334                    "*Programming Languages",
335                    "Protein Conformation",
336                    "*Software",
337                ],
338            )
339            self.assertEqual(record["EDAT"], "2003/11/25 05:00")
340            self.assertEqual(record["MHDA"], "2004/07/23 05:00")
341            self.assertEqual(record["PST"], "ppublish")
342            self.assertEqual(
343                record["SO"], "Bioinformatics. 2003 Nov 22;19(17):2308-10."
344            )
345            self.assertRaises(StopIteration, next, records)
346
347    def test_multiline_mesh(self):
348        with open("Medline/pubmed_result3.txt") as handle:
349            record = Medline.read(handle)
350            self.assertEqual(record["PMID"], "23039619")
351        self.assertEqual(
352            record["MH"],
353            [
354                "Blood Circulation",
355                "High-Intensity Focused Ultrasound Ablation/adverse effects/instrumentation/*methods",
356                "Humans",
357                "Models, Biological",
358                "Sonication",
359                "Temperature",
360                "Time Factors",
361                "Transducers",
362            ],
363        )
364
365
366if __name__ == "__main__":
367    runner = unittest.TextTestRunner(verbosity=2)
368    unittest.main(testRunner=runner)
369