1 /********************************************************************** 2 Copyright (C) 2019 by NextMove Software 3 4 This file is part of the Open Babel project. 5 For more information, see <http://openbabel.org/> 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation version 2 of the License. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 ***********************************************************************/ 16 17 #include <openbabel/babelconfig.h> 18 #include <openbabel/obmolecformat.h> 19 20 #include <openbabel/base.h> 21 #include <openbabel/mol.h> 22 23 bool NMReadWLN(const char *ptr, OpenBabel::OBMol* mol); 24 25 using namespace std; 26 namespace OpenBabel 27 { 28 29 class WLNFormat : public OBMoleculeFormat 30 { 31 public: 32 //Register this format type ID WLNFormat()33 WLNFormat() 34 { 35 OBConversion::RegisterFormat("wln", this); 36 } 37 Description()38 virtual const char* Description() //required 39 { 40 return 41 "Wiswesser Line Notation\n" 42 "A chemical line notation developed by Wiswesser\n\n" 43 44 "WLN was invented in 1949, by William J. Wiswesser, as one of the first attempts\n" 45 "to codify chemical structure as a line notation, enabling collation on punched\n" 46 "cards using automatic tabulating machines and early electronic computers. WLN\n" 47 "was a forerunner to the SMILES notation used in modern cheminformatics systems,\n" 48 "which attempted to simplify the complex rules used in WLN encoding (at the\n" 49 "expense of brevity) to come up with an algorithmic system more suitable for\n" 50 "implementation on computers, where historically WLN was typically encoded\n" 51 "by hand by trained registrars.\n\n" 52 53 "WLN encoding makes use of uppercase letters, digits, spaces and punctuation:\n\n" 54 55 "- E Bromine atom\n" 56 "- F Fluorine atom\n" 57 "- G Chlorine atom\n" 58 "- H Hydrogen atom\n" 59 "- I Iodine atom\n" 60 "- Q Hydroxyl group, -OH\n" 61 "- R Benzene ring\n" 62 "- S Sulfur atom\n" 63 "- U Double bond\n" 64 "- UU Triple bond\n" 65 "- V Carbonyl, -C(=O)-\n" 66 "- C Unbranched carbon multiply bonded to non-carbon atom\n" 67 "- K Nitrogen atom bonded to more than three other atoms\n" 68 "- L First symbol of a carbocyclic ring notation\n" 69 "- M Imino or imido -NH-group\n" 70 "- N Nitrogen atom, hydrogen free, bonded to fewer than 4 atoms\n" 71 "- O Oxygen atom, hydrogen-free\n" 72 "- T First symbol of a heterocyclic ring notation\n" 73 "- W Non-linear dioxo group, as in -NO2 or -SO2-\n" 74 "- X Carbon attached to four atoms other than hydrogen\n" 75 "- Y Carbon attached to three atoms other then hydrogen\n" 76 "- Z Amino and amido NH2 group\n" 77 "- <digit> Digits '1' to '9' denote unbranched alkyl chains\n" 78 "- & Sidechain terminator or, after a space, a component separator\n\n" 79 80 "For a more complete description of the grammar see Smith's book [1], which more\n" 81 "accurately reflects the WLN commonly encountered than Wiswesser's book [2].\n" 82 "Additional WLN dialects include inorganic salts, and methyl contractions.\n\n" 83 84 "Here are some examples of WLN strings along with a corresponding SMILES string:\n\n" 85 86 "- WN3 [O-][N+](=O)CCC\n" 87 "- G1UU1G ClC#CCl\n" 88 "- VH3 O=CCCC\n" 89 "- NCCN N#CC#N\n" 90 "- ZYZUM NC(=N)N\n" 91 "- QY CC(C)O\n" 92 "- OV1 &-NA- CC(=O)[O-].[Na+]\n" 93 "- RM1R c1ccccc1NCc2ccccc2\n" 94 "- T56 BMJ B D - DT6N CNJ BMR BO1 DN1 & 2N1 & 1 EMV1U1 (osimertinib)\n" 95 " Cn1cc(c2c1cccc2)c3ccnc(n3)Nc4cc(c(cc4OC)N(C)CCN(C)C)NC(=O)C=C\n\n" 96 97 "This reader was contributed by Roger Sayle (NextMove Software). The text of\n" 98 "this description was taken from his Bio-IT World poster [3]. Note that not\n" 99 "all of WLN is currently supported; however, about 76% of the WLN strings\n" 100 "found in PubChem can be interpreted.\n\n" 101 102 "1. Elbert G. Smith, \"The Wiswesser Line-Formula Chemical Notation\",\n" 103 " McGraw-Hill Book Company publishers, 1968.\n" 104 "2. William J. Wiswesser, \"A Line-Formula Chemical Notation\", Thomas Crowell\n" 105 " Company publishers, 1954.\n" 106 "3. Roger Sayle, Noel O'Boyle, Greg Landrum, Roman Affentranger. \"Open\n" 107 " sourcing a Wiswesser Line Notation (WLN) parser to facilitate electronic\n" 108 " lab notebook (ELN) record transfer using the Pistoia Alliance's UDM\n" 109 " (Unified Data Model) standard.\" BioIT World. Apr 2019.\n" 110 " https://www.nextmovesoftware.com/posters/Sayle_WisswesserLineNotation_BioIT_201904.pdf\n" 111 ; 112 }; 113 Flags()114 virtual unsigned int Flags() 115 { 116 return NOTWRITABLE; 117 } 118 119 //*** This section identical for most OBMol conversions *** 120 //////////////////////////////////////////////////// 121 /// The "API" interface functions 122 virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv); 123 }; 124 //*** 125 126 //Make an instance of the format class 127 WLNFormat theWLNFormat; 128 129 ///////////////////////////////////////////////////////////////// ReadMolecule(OBBase * pOb,OBConversion * pConv)130 bool WLNFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) 131 { 132 OBMol* pmol = pOb->CastAndClear<OBMol>(); 133 if (pmol == nullptr) 134 return false; 135 136 //Define some references so we can use the old parameter names 137 istream &ifs = *pConv->GetInStream(); 138 const char* title = pConv->GetTitle(); 139 char buffer[BUFF_SIZE]; 140 141 if (!ifs.getline(buffer,BUFF_SIZE)) 142 return false; 143 144 NMReadWLN(buffer, pmol); 145 146 return true; 147 } 148 149 //////////////////////////////////////////////////////////////// 150 151 } //namespace OpenBabel 152