1# Copyright 2008 Michiel de Hoon 2# 3# This file is part of the Biopython distribution and governed by your 4# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". 5# Please see the LICENSE file that should have been included as part of this 6# package. 7 8"""Tests for Medline module.""" 9 10import unittest 11 12from Bio import Medline 13 14 15class TestMedline(unittest.TestCase): 16 def test_read(self): 17 with open("Medline/pubmed_result1.txt") as handle: 18 record = Medline.read(handle) 19 self.assertEqual(record["PMID"], "12230038") 20 self.assertEqual(record["OWN"], "NLM") 21 self.assertEqual(record["STAT"], "MEDLINE") 22 self.assertEqual(record["DA"], "20020916") 23 self.assertEqual(record["DCOM"], "20030606") 24 self.assertEqual(record["LR"], "20041117") 25 self.assertEqual(record["PUBM"], "Print") 26 self.assertEqual(record["IS"], "1467-5463 (Print)") 27 self.assertEqual(record["VI"], "3") 28 self.assertEqual(record["IP"], "3") 29 self.assertEqual(record["DP"], "2002 Sep") 30 self.assertEqual(record["TI"], "The Bio* toolkits--a brief overview.") 31 self.assertEqual(record["PG"], "296-302") 32 self.assertEqual( 33 record["AB"], 34 "Bioinformatics research is often difficult to do with commercial software. The Open Source BioPerl, BioPython and Biojava projects provide toolkits with multiple functionality that make it easier to create customised pipelines or analysis. This review briefly compares the quirks of the underlying languages and the functionality, documentation, utility and relative advantages of the Bio counterparts, particularly from the point of view of the beginning biologist programmer.", 35 ) 36 self.assertEqual( 37 record["AD"], ["tacg Informatics, Irvine, CA 92612, USA. hjm@tacgi.com"] 38 ) 39 self.assertEqual(record["FAU"], ["Mangalam, Harry"]) 40 self.assertEqual(record["AU"], ["Mangalam H"]) 41 self.assertEqual(record["LA"], ["eng"]) 42 self.assertEqual(record["PT"], ["Journal Article"]) 43 self.assertEqual(record["PL"], "England") 44 self.assertEqual(record["TA"], "Brief Bioinform") 45 self.assertEqual(record["JT"], "Briefings in bioinformatics") 46 self.assertEqual(record["JID"], "100912837") 47 self.assertEqual(record["SB"], "IM") 48 self.assertEqual( 49 record["MH"], 50 [ 51 "*Computational Biology", 52 "Computer Systems", 53 "Humans", 54 "Internet", 55 "*Programming Languages", 56 "*Software", 57 "User-Computer Interface", 58 ], 59 ) 60 self.assertEqual(record["EDAT"], "2002/09/17 10:00") 61 self.assertEqual(record["MHDA"], "2003/06/07 05:00") 62 self.assertEqual(record["PST"], "ppublish") 63 self.assertEqual(record["SO"], "Brief Bioinform. 2002 Sep;3(3):296-302.") 64 65 def test_parse(self): 66 with open("Medline/pubmed_result2.txt") as handle: 67 records = Medline.parse(handle) 68 record = next(records) 69 self.assertEqual(record["PMID"], "16403221") 70 self.assertEqual(record["OWN"], "NLM") 71 self.assertEqual(record["STAT"], "MEDLINE") 72 self.assertEqual(record["DA"], "20060220") 73 self.assertEqual(record["DCOM"], "20060314") 74 self.assertEqual(record["PUBM"], "Electronic") 75 self.assertEqual(record["IS"], "1471-2105 (Electronic)") 76 self.assertEqual(record["VI"], "7") 77 self.assertEqual(record["DP"], "2006") 78 self.assertEqual( 79 record["TI"], 80 "A high level interface to SCOP and ASTRAL implemented in python.", 81 ) 82 self.assertEqual(record["PG"], "10") 83 self.assertEqual( 84 record["AB"], 85 "BACKGROUND: Benchmarking algorithms in structural bioinformatics often involves the construction of datasets of proteins with given sequence and structural properties. The SCOP database is a manually curated structural classification which groups together proteins on the basis of structural similarity. The ASTRAL compendium provides non redundant subsets of SCOP domains on the basis of sequence similarity such that no two domains in a given subset share more than a defined degree of sequence similarity. Taken together these two resources provide a 'ground truth' for assessing structural bioinformatics algorithms. We present a small and easy to use API written in python to enable construction of datasets from these resources. RESULTS: We have designed a set of python modules to provide an abstraction of the SCOP and ASTRAL databases. The modules are designed to work as part of the Biopython distribution. Python users can now manipulate and use the SCOP hierarchy from within python programs, and use ASTRAL to return sequences of domains in SCOP, as well as clustered representations of SCOP from ASTRAL. CONCLUSION: The modules make the analysis and generation of datasets for use in structural genomics easier and more principled.", 86 ) 87 self.assertEqual( 88 record["AD"], 89 [ 90 "Bioinformatics, Institute of Cell and Molecular Science, School of Medicine and Dentistry, Queen Mary, University of London, London EC1 6BQ, UK. j.a.casbon@qmul.ac.uk" 91 ], 92 ) 93 self.assertEqual( 94 record["FAU"], 95 ["Casbon, James A", "Crooks, Gavin E", "Saqi, Mansoor A S"], 96 ) 97 self.assertEqual(record["AU"], ["Casbon JA", "Crooks GE", "Saqi MA"]) 98 self.assertEqual(record["LA"], ["eng"]) 99 self.assertEqual(record["PT"], ["Evaluation Studies", "Journal Article"]) 100 self.assertEqual(record["DEP"], "20060110") 101 self.assertEqual(record["PL"], "England") 102 self.assertEqual(record["TA"], "BMC Bioinformatics") 103 self.assertEqual(record["JT"], "BMC bioinformatics") 104 self.assertEqual(record["JID"], "100965194") 105 self.assertEqual(record["SB"], "IM") 106 self.assertEqual( 107 record["MH"], 108 [ 109 "*Database Management Systems", 110 "*Databases, Protein", 111 "Information Storage and Retrieval/*methods", 112 "Programming Languages", 113 "Sequence Alignment/*methods", 114 "Sequence Analysis, Protein/*methods", 115 "Sequence Homology, Amino Acid", 116 "*Software", 117 "*User-Computer Interface", 118 ], 119 ) 120 self.assertEqual(record["PMC"], "PMC1373603") 121 self.assertEqual(record["EDAT"], "2006/01/13 09:00") 122 self.assertEqual(record["MHDA"], "2006/03/15 09:00") 123 self.assertEqual( 124 record["PHST"], 125 [ 126 "2005/06/17 [received]", 127 "2006/01/10 [accepted]", 128 "2006/01/10 [aheadofprint]", 129 ], 130 ) 131 self.assertEqual( 132 record["AID"], ["1471-2105-7-10 [pii]", "10.1186/1471-2105-7-10 [doi]"] 133 ) 134 self.assertEqual(record["PST"], "epublish") 135 self.assertEqual(record["SO"], "BMC Bioinformatics. 2006 Jan 10;7:10.") 136 record = next(records) 137 self.assertEqual(record["PMID"], "16377612") 138 self.assertEqual(record["OWN"], "NLM") 139 self.assertEqual(record["STAT"], "MEDLINE") 140 self.assertEqual(record["DA"], "20060223") 141 self.assertEqual(record["DCOM"], "20060418") 142 self.assertEqual(record["LR"], "20061115") 143 self.assertEqual(record["PUBM"], "Print-Electronic") 144 self.assertEqual(record["IS"], "1367-4803 (Print)") 145 self.assertEqual(record["VI"], "22") 146 self.assertEqual(record["IP"], "5") 147 self.assertEqual(record["DP"], "2006 Mar 1") 148 self.assertEqual( 149 record["TI"], 150 "GenomeDiagram: a python package for the visualization of large-scale genomic data.", 151 ) 152 self.assertEqual(record["PG"], "616-7") 153 self.assertEqual( 154 record["AB"], 155 "SUMMARY: We present GenomeDiagram, a flexible, open-source Python module for the visualization of large-scale genomic, comparative genomic and other data with reference to a single chromosome or other biological sequence. GenomeDiagram may be used to generate publication-quality vector graphics, rastered images and in-line streamed graphics for webpages. The package integrates with datatypes from the BioPython project, and is available for Windows, Linux and Mac OS X systems. AVAILABILITY: GenomeDiagram is freely available as source code (under GNU Public License) at http://bioinf.scri.ac.uk/lp/programs.html, and requires Python 2.3 or higher, and recent versions of the ReportLab and BioPython packages. SUPPLEMENTARY INFORMATION: A user manual, example code and images are available at http://bioinf.scri.ac.uk/lp/programs.html.", 156 ) 157 self.assertEqual( 158 record["AD"], 159 [ 160 "Plant Pathogen Programme, Scottish Crop Research Institute, Invergowrie, Dundee DD2 5DA, Scotland, UK. lpritc@scri.ac.uk" 161 ], 162 ) 163 self.assertEqual( 164 record["FAU"], 165 [ 166 "Pritchard, Leighton", 167 "White, Jennifer A", 168 "Birch, Paul R J", 169 "Toth, Ian K", 170 ], 171 ) 172 self.assertEqual( 173 record["AU"], ["Pritchard L", "White JA", "Birch PR", "Toth IK"] 174 ) 175 self.assertEqual(record["LA"], ["eng"]) 176 self.assertEqual( 177 record["PT"], ["Journal Article", "Research Support, Non-U.S. Gov't"] 178 ) 179 self.assertEqual(record["DEP"], "20051223") 180 self.assertEqual(record["PL"], "England") 181 self.assertEqual(record["TA"], "Bioinformatics") 182 self.assertEqual(record["JT"], "Bioinformatics (Oxford, England)") 183 self.assertEqual(record["JID"], "9808944") 184 self.assertEqual(record["SB"], "IM") 185 self.assertEqual( 186 record["MH"], 187 [ 188 "Chromosome Mapping/*methods", 189 "*Computer Graphics", 190 "*Database Management Systems", 191 "*Databases, Genetic", 192 "Information Storage and Retrieval/methods", 193 "*Programming Languages", 194 "*Software", 195 "*User-Computer Interface", 196 ], 197 ) 198 self.assertEqual(record["EDAT"], "2005/12/27 09:00") 199 self.assertEqual(record["MHDA"], "2006/04/19 09:00") 200 self.assertEqual(record["PHST"], ["2005/12/23 [aheadofprint]"]) 201 self.assertEqual( 202 record["AID"], ["btk021 [pii]", "10.1093/bioinformatics/btk021 [doi]"] 203 ) 204 self.assertEqual(record["PST"], "ppublish") 205 self.assertEqual( 206 record["SO"], 207 "Bioinformatics. 2006 Mar 1;22(5):616-7. Epub 2005 Dec 23.", 208 ) 209 record = next(records) 210 self.assertEqual(record["PMID"], "14871861") 211 self.assertEqual(record["OWN"], "NLM") 212 self.assertEqual(record["STAT"], "MEDLINE") 213 self.assertEqual(record["DA"], "20040611") 214 self.assertEqual(record["DCOM"], "20050104") 215 self.assertEqual(record["LR"], "20061115") 216 self.assertEqual(record["PUBM"], "Print-Electronic") 217 self.assertEqual(record["IS"], "1367-4803 (Print)") 218 self.assertEqual(record["VI"], "20") 219 self.assertEqual(record["IP"], "9") 220 self.assertEqual(record["DP"], "2004 Jun 12") 221 self.assertEqual(record["TI"], "Open source clustering software.") 222 self.assertEqual(record["PG"], "1453-4") 223 self.assertEqual( 224 record["AB"], 225 "SUMMARY: We have implemented k-means clustering, hierarchical clustering and self-organizing maps in a single multipurpose open-source library of C routines, callable from other C and C++ programs. Using this library, we have created an improved version of Michael Eisen's well-known Cluster program for Windows, Mac OS X and Linux/Unix. In addition, we generated a Python and a Perl interface to the C Clustering Library, thereby combining the flexibility of a scripting language with the speed of C. AVAILABILITY: The C Clustering Library and the corresponding Python C extension module Pycluster were released under the Python License, while the Perl module Algorithm::Cluster was released under the Artistic License. The GUI code Cluster 3.0 for Windows, Macintosh and Linux/Unix, as well as the corresponding command-line program, were released under the same license as the original Cluster code. The complete source code is available at http://bonsai.ims.u-tokyo.ac.jp/mdehoon/software/cluster. Alternatively, Algorithm::Cluster can be downloaded from CPAN, while Pycluster is also available as part of the Biopython distribution.", 226 ) 227 self.assertEqual( 228 record["AD"], 229 [ 230 "Human Genome Center, Institute of Medical Science, University of Tokyo, 4-6-1 Shirokanedai, Minato-ku, Tokyo, 108-8639 Japan. mdehoon@ims.u-tokyo.ac.jp" 231 ], 232 ) 233 self.assertEqual( 234 record["FAU"], ["de Hoon, M J L", "Imoto, S", "Nolan, J", "Miyano, S"] 235 ) 236 self.assertEqual( 237 record["AU"], ["de Hoon MJ", "Imoto S", "Nolan J", "Miyano S"] 238 ) 239 self.assertEqual(record["LA"], ["eng"]) 240 self.assertEqual( 241 record["PT"], 242 [ 243 "Comparative Study", 244 "Evaluation Studies", 245 "Journal Article", 246 "Validation Studies", 247 ], 248 ) 249 self.assertEqual(record["DEP"], "20040210") 250 self.assertEqual(record["PL"], "England") 251 self.assertEqual(record["TA"], "Bioinformatics") 252 self.assertEqual(record["JT"], "Bioinformatics (Oxford, England)") 253 self.assertEqual(record["JID"], "9808944") 254 self.assertEqual(record["SB"], "IM") 255 self.assertEqual( 256 record["MH"], 257 [ 258 "*Algorithms", 259 "*Cluster Analysis", 260 "Gene Expression Profiling/*methods", 261 "Pattern Recognition, Automated/methods", 262 "*Programming Languages", 263 "Sequence Alignment/*methods", 264 "Sequence Analysis, DNA/*methods", 265 "*Software", 266 ], 267 ) 268 self.assertEqual(record["EDAT"], "2004/02/12 05:00") 269 self.assertEqual(record["MHDA"], "2005/01/05 09:00") 270 self.assertEqual(record["PHST"], ["2004/02/10 [aheadofprint]"]) 271 self.assertEqual( 272 record["AID"], ["10.1093/bioinformatics/bth078 [doi]", "bth078 [pii]"] 273 ) 274 self.assertEqual(record["PST"], "ppublish") 275 self.assertEqual( 276 record["SO"], 277 "Bioinformatics. 2004 Jun 12;20(9):1453-4. Epub 2004 Feb 10.", 278 ) 279 record = next(records) 280 self.assertEqual(record["PMID"], "14630660") 281 self.assertEqual(record["OWN"], "NLM") 282 self.assertEqual(record["STAT"], "MEDLINE") 283 self.assertEqual(record["DA"], "20031121") 284 self.assertEqual(record["DCOM"], "20040722") 285 self.assertEqual(record["LR"], "20061115") 286 self.assertEqual(record["PUBM"], "Print") 287 self.assertEqual(record["IS"], "1367-4803 (Print)") 288 self.assertEqual(record["VI"], "19") 289 self.assertEqual(record["IP"], "17") 290 self.assertEqual(record["DP"], "2003 Nov 22") 291 self.assertEqual( 292 record["TI"], 293 "PDB file parser and structure class implemented in Python.", 294 ) 295 self.assertEqual(record["PG"], "2308-10") 296 self.assertEqual( 297 record["AB"], 298 "The biopython project provides a set of bioinformatics tools implemented in Python. Recently, biopython was extended with a set of modules that deal with macromolecular structure. Biopython now contains a parser for PDB files that makes the atomic information available in an easy-to-use but powerful data structure. The parser and data structure deal with features that are often left out or handled inadequately by other packages, e.g. atom and residue disorder (if point mutants are present in the crystal), anisotropic B factors, multiple models and insertion codes. In addition, the parser performs some sanity checking to detect obvious errors. AVAILABILITY: The Biopython distribution (including source code and documentation) is freely available (under the Biopython license) from http://www.biopython.org", 299 ) 300 self.assertEqual( 301 record["AD"], 302 [ 303 "Department of Cellular and Molecular Interactions, Vlaams Interuniversitair Instituut voor Biotechnologie and Computational Modeling Lab, Department of Computer Science, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium. thamelry@vub.ac.be" 304 ], 305 ) 306 self.assertEqual(record["FAU"], ["Hamelryck, Thomas", "Manderick, Bernard"]) 307 self.assertEqual(record["AU"], ["Hamelryck T", "Manderick B"]) 308 self.assertEqual(record["LA"], ["eng"]) 309 self.assertEqual( 310 record["PT"], 311 [ 312 "Comparative Study", 313 "Evaluation Studies", 314 "Journal Article", 315 "Research Support, Non-U.S. Gov't", 316 "Validation Studies", 317 ], 318 ) 319 self.assertEqual(record["PL"], "England") 320 self.assertEqual(record["TA"], "Bioinformatics") 321 self.assertEqual(record["JT"], "Bioinformatics (Oxford, England)") 322 self.assertEqual(record["JID"], "9808944") 323 self.assertEqual(record["RN"], ["0 (Macromolecular Substances)"]) 324 self.assertEqual(record["SB"], "IM") 325 self.assertEqual( 326 record["MH"], 327 [ 328 "Computer Simulation", 329 "Database Management Systems/*standards", 330 "*Databases, Protein", 331 "Information Storage and Retrieval/*methods/*standards", 332 "Macromolecular Substances", 333 "*Models, Molecular", 334 "*Programming Languages", 335 "Protein Conformation", 336 "*Software", 337 ], 338 ) 339 self.assertEqual(record["EDAT"], "2003/11/25 05:00") 340 self.assertEqual(record["MHDA"], "2004/07/23 05:00") 341 self.assertEqual(record["PST"], "ppublish") 342 self.assertEqual( 343 record["SO"], "Bioinformatics. 2003 Nov 22;19(17):2308-10." 344 ) 345 self.assertRaises(StopIteration, next, records) 346 347 def test_multiline_mesh(self): 348 with open("Medline/pubmed_result3.txt") as handle: 349 record = Medline.read(handle) 350 self.assertEqual(record["PMID"], "23039619") 351 self.assertEqual( 352 record["MH"], 353 [ 354 "Blood Circulation", 355 "High-Intensity Focused Ultrasound Ablation/adverse effects/instrumentation/*methods", 356 "Humans", 357 "Models, Biological", 358 "Sonication", 359 "Temperature", 360 "Time Factors", 361 "Transducers", 362 ], 363 ) 364 365 366if __name__ == "__main__": 367 runner = unittest.TextTestRunner(verbosity=2) 368 unittest.main(testRunner=runner) 369