1 /**********************************************************************
2 Copyright (C) 2019 by NextMove Software
3 
4 This file is part of the Open Babel project.
5 For more information, see <http://openbabel.org/>
6 
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation version 2 of the License.
10 
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 ***********************************************************************/
16 
17 #include <openbabel/babelconfig.h>
18 #include <openbabel/obmolecformat.h>
19 
20 #include <openbabel/base.h>
21 #include <openbabel/mol.h>
22 
23 bool NMReadWLN(const char *ptr, OpenBabel::OBMol* mol);
24 
25 using namespace std;
26 namespace OpenBabel
27 {
28 
29   class WLNFormat : public OBMoleculeFormat
30   {
31   public:
32     //Register this format type ID
WLNFormat()33     WLNFormat()
34     {
35       OBConversion::RegisterFormat("wln", this);
36     }
37 
Description()38     virtual const char* Description() //required
39     {
40       return
41         "Wiswesser Line Notation\n"
42 	"A chemical line notation developed by Wiswesser\n\n"
43 
44         "WLN was invented in 1949, by William J. Wiswesser, as one of the first attempts\n"
45         "to codify chemical structure as a line notation, enabling collation on punched\n"
46         "cards using automatic tabulating machines and early electronic computers. WLN\n"
47         "was a forerunner to the SMILES notation used in modern cheminformatics systems,\n"
48         "which attempted to simplify the complex rules used in WLN encoding (at the\n"
49         "expense of brevity) to come up with an algorithmic system more suitable for\n"
50         "implementation on computers, where historically WLN was typically encoded\n"
51         "by hand by trained registrars.\n\n"
52 
53         "WLN encoding makes use of uppercase letters, digits, spaces and punctuation:\n\n"
54 
55         "- E       Bromine atom\n"
56         "- F       Fluorine atom\n"
57         "- G       Chlorine atom\n"
58         "- H       Hydrogen atom\n"
59         "- I       Iodine atom\n"
60         "- Q       Hydroxyl group, -OH\n"
61         "- R       Benzene ring\n"
62         "- S       Sulfur atom\n"
63         "- U       Double bond\n"
64         "- UU      Triple bond\n"
65         "- V       Carbonyl, -C(=O)-\n"
66         "- C       Unbranched carbon multiply bonded to non-carbon atom\n"
67         "- K       Nitrogen atom bonded to more than three other atoms\n"
68         "- L       First symbol of a carbocyclic ring notation\n"
69         "- M       Imino or imido -NH-group\n"
70         "- N       Nitrogen atom, hydrogen free, bonded to fewer than 4 atoms\n"
71         "- O       Oxygen atom, hydrogen-free\n"
72         "- T       First symbol of a heterocyclic ring notation\n"
73         "- W       Non-linear dioxo group, as in -NO2 or -SO2-\n"
74         "- X       Carbon attached to four atoms other than hydrogen\n"
75         "- Y       Carbon attached to three atoms other then hydrogen\n"
76         "- Z       Amino and amido NH2 group\n"
77         "- <digit> Digits '1' to '9' denote unbranched alkyl chains\n"
78         "- &       Sidechain terminator or, after a space, a component separator\n\n"
79 
80         "For a more complete description of the grammar see Smith's book [1], which more\n"
81         "accurately reflects the WLN commonly encountered than Wiswesser's book [2].\n"
82         "Additional WLN dialects include inorganic salts, and methyl contractions.\n\n"
83 
84         "Here are some examples of WLN strings along with a corresponding SMILES string:\n\n"
85 
86         "- WN3        [O-][N+](=O)CCC\n"
87         "- G1UU1G     ClC#CCl\n"
88         "- VH3        O=CCCC\n"
89         "- NCCN       N#CC#N\n"
90         "- ZYZUM      NC(=N)N\n"
91         "- QY         CC(C)O\n"
92         "- OV1 &-NA-  CC(=O)[O-].[Na+]\n"
93         "- RM1R       c1ccccc1NCc2ccccc2\n"
94         "- T56 BMJ B D - DT6N CNJ BMR BO1 DN1 & 2N1 & 1 EMV1U1   (osimertinib)\n"
95         "  Cn1cc(c2c1cccc2)c3ccnc(n3)Nc4cc(c(cc4OC)N(C)CCN(C)C)NC(=O)C=C\n\n"
96 
97         "This reader was contributed by Roger Sayle (NextMove Software). The text of\n"
98         "this description was taken from his Bio-IT World poster [3]. Note that not\n"
99         "all of WLN is currently supported; however, about 76% of the WLN strings\n"
100         "found in PubChem can be interpreted.\n\n"
101 
102         "1. Elbert G. Smith, \"The Wiswesser Line-Formula Chemical Notation\",\n"
103         "   McGraw-Hill Book Company publishers, 1968.\n"
104         "2. William J. Wiswesser, \"A Line-Formula Chemical Notation\", Thomas Crowell\n"
105         "   Company publishers, 1954.\n"
106         "3. Roger Sayle, Noel O'Boyle, Greg Landrum, Roman Affentranger. \"Open\n"
107         "   sourcing a Wiswesser Line Notation (WLN) parser to facilitate electronic\n"
108         "   lab notebook (ELN) record transfer using the Pistoia Alliance's UDM\n"
109         "   (Unified Data Model) standard.\" BioIT World. Apr 2019.\n"
110         "   https://www.nextmovesoftware.com/posters/Sayle_WisswesserLineNotation_BioIT_201904.pdf\n"
111         ;
112     };
113 
Flags()114     virtual unsigned int Flags()
115     {
116       return NOTWRITABLE;
117     }
118 
119     //*** This section identical for most OBMol conversions ***
120     ////////////////////////////////////////////////////
121     /// The "API" interface functions
122     virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
123   };
124   //***
125 
126   //Make an instance of the format class
127   WLNFormat theWLNFormat;
128 
129   /////////////////////////////////////////////////////////////////
ReadMolecule(OBBase * pOb,OBConversion * pConv)130   bool WLNFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
131   {
132     OBMol* pmol = pOb->CastAndClear<OBMol>();
133     if (pmol == nullptr)
134       return false;
135 
136     //Define some references so we can use the old parameter names
137     istream &ifs = *pConv->GetInStream();
138     const char* title = pConv->GetTitle();
139     char buffer[BUFF_SIZE];
140 
141     if (!ifs.getline(buffer,BUFF_SIZE))
142       return false;
143 
144     NMReadWLN(buffer, pmol);
145 
146     return true;
147   }
148 
149   ////////////////////////////////////////////////////////////////
150 
151 } //namespace OpenBabel
152