1 /*
2  * Copyright (c) 2013 European Bioinformatics Institute (EMBL-EBI)
3  *                    John May <jwmay@users.sf.net>
4  *
5  * Contact: cdk-devel@lists.sourceforge.net
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation; either version 2.1 of the License, or (at
10  * your option) any later version. All we ask is that proper credit is given
11  * for our work, which includes - but is not limited to - adding the above
12  * copyright notice to the beginning of your source code files, and to any
13  * copyright notice that you may distribute with programs based on this work.
14  *
15  * This program is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
18  * License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
23  */
24 
25 package org.openscience.cdk.aromaticity;
26 
27 import com.google.common.collect.Maps;
28 import org.openscience.cdk.interfaces.IAtom;
29 import org.openscience.cdk.interfaces.IAtomContainer;
30 import org.openscience.cdk.interfaces.IBond;
31 import org.openscience.cdk.interfaces.IPseudoAtom;
32 import org.openscience.cdk.ringsearch.RingSearch;
33 
34 import java.util.Arrays;
35 import java.util.Map;
36 
37 import static com.google.common.base.Preconditions.checkNotNull;
38 
39 /**
40  * Electron donation model closely mirroring the Daylight model for use in
41  * generating SMILES. The model was interpreted from various resources and as
42  * such may not match exactly. If you find an inconsistency please add a request
43  * for enhancement to the patch tracker. One known limitation is that this model
44  * does not currently consider unknown/pseudo atoms '*'.
45  *
46  * The model makes a couple of assumptions which it will not correct for.
47  * Checked assumptions cause the model to throw a runtime exception. <ul>
48  * <li>there should be no valence errors (unchecked)</li> <li>every atom has a
49  * set implicit hydrogen count (checked)</li> <li>every bond has defined order,
50  * single, double etc (checked)</li> <li>atomic number of non-pseudo atoms is
51  * set (checked)</li> </ul>
52  *
53  * The aromaticity model in SMILES was designed to simplify canonicalisation and
54  * express symmetry in a molecule. The contributed electrons can be summarised
55  * as follows (refer to code for exact specification): <ul> <li>carbon,
56  * nitrogen, oxygen, phosphorus, sulphur, arsenic and selenium are allow to be
57  * aromatic</li> <li>atoms should be Sp2 hybridised - not actually computed</li>
58  * <li>atoms adjacent to a single cyclic pi bond contribute 1 electron</li>
59  * <li>neutral or negatively charged atoms with a lone pair contribute 2
60  * electrons</li> <li>exocyclic pi bonds are allowed but if the exocyclic atom
61  * is more electronegative it consumes an electron. As an example ketone groups
62  * contribute '0' electrons.</li></ul>
63  *
64  * @author John May
65  * @cdk.module standard
66  * @cdk.githash
67  */
68 final class DaylightModel extends ElectronDonation {
69 
70     private static final int CARBON     = 6;
71     private static final int NITROGEN   = 7;
72     private static final int OXYGEN     = 8;
73     private static final int PHOSPHORUS = 15;
74     private static final int SULPHUR    = 16;
75     private static final int ARSENIC    = 33;
76     private static final int SELENIUM   = 34;
77 
78     /**{@inheritDoc} */
79     @Override
contribution(IAtomContainer container, RingSearch ringSearch)80     int[] contribution(IAtomContainer container, RingSearch ringSearch) {
81 
82         int n = container.getAtomCount();
83 
84         // we compute values we need for all atoms and then make the decisions
85         // - this avoids costly operations such as looking up connected
86         // bonds on each atom at the cost of memory
87         int[] degree = new int[n];
88         int[] bondOrderSum = new int[n];
89         int[] nCyclicPiBonds = new int[n];
90         int[] exocyclicPiBond = new int[n];
91         int[] electrons = new int[n];
92 
93         Arrays.fill(exocyclicPiBond, -1);
94 
95         // index atoms and set the degree to the number of implicit hydrogens
96         Map<IAtom, Integer> atomIndex = Maps.newHashMapWithExpectedSize(n);
97         for (int i = 0; i < n; i++) {
98             IAtom a = container.getAtom(i);
99             atomIndex.put(a, i);
100             degree[i] = checkNotNull(a.getImplicitHydrogenCount(),
101                     "Aromaticity model requires implicit hydrogen count is set.");
102         }
103 
104         // for each bond we increase the degree count and check for cyclic and
105         // exocyclic pi bonds. if there is a cyclic pi bond the atom is marked.
106         // if there is an exocyclic pi bond we store the adjacent atom for
107         // lookup later.
108         for (IBond bond : container.bonds()) {
109             int u = atomIndex.get(bond.getBegin());
110             int v = atomIndex.get(bond.getEnd());
111             degree[u]++;
112             degree[v]++;
113 
114             IBond.Order order = checkNotNull(bond.getOrder(), "Aromaticity model requires that bond orders must be set");
115 
116             switch (order) {
117                 case UNSET:
118                     throw new IllegalArgumentException("Aromaticity model requires that bond orders must be set");
119                 case DOUBLE:
120                     if (ringSearch.cyclic(u, v)) {
121                         nCyclicPiBonds[u]++;
122                         nCyclicPiBonds[v]++;
123                     } else {
124                         exocyclicPiBond[u] = v;
125                         exocyclicPiBond[v] = u;
126                     }
127                     // note - fall through
128                 case SINGLE:
129                 case TRIPLE:
130                 case QUADRUPLE:
131                     bondOrderSum[u] += order.numeric();
132                     bondOrderSum[v] += order.numeric();
133             }
134         }
135 
136         // now make a decision on how many electrons each atom contributes
137         for (int i = 0; i < n; i++) {
138 
139             int element = element(container.getAtom(i));
140             int charge = charge(container.getAtom(i));
141 
142             // abnormal valence, usually indicated a radical. these cause problems
143             // with kekulisations
144             int bondedValence = bondOrderSum[i] + container.getAtom(i).getImplicitHydrogenCount();
145             if (!normal(element, charge, bondedValence)) {
146                 electrons[i] = -1;
147             }
148 
149             // non-aromatic element, acyclic atoms, atoms with more than three
150             // neighbors and atoms with more than 1 cyclic pi bond are not
151             // considered
152             else if (!aromaticElement(element) || !ringSearch.cyclic(i) || degree[i] > 3 || nCyclicPiBonds[i] > 1) {
153                 electrons[i] = -1;
154             }
155 
156             // exocyclic bond contributes 0 or 1 electrons depending on
157             // preset electronegativity - check the exocyclicContribution method
158             else if (exocyclicPiBond[i] >= 0) {
159                 electrons[i] = exocyclicContribution(element, element(container.getAtom(exocyclicPiBond[i])), charge,
160                         nCyclicPiBonds[i]);
161             }
162 
163             // any atom (except arsenic) with one cyclic pi bond contributes a
164             // single electron
165             else if (nCyclicPiBonds[i] == 1) {
166                 electrons[i] = element == ARSENIC ? -1 : 1;
167             }
168 
169             // a anion with a lone pair contributes 2 electrons - simplification
170             // here is we count the number free valence electrons but also
171             // check if the bonded valence is okay (i.e. not a radical)
172             else if (charge <= 0 && charge > -3) {
173                 if (valence(element, charge) - bondOrderSum[i] >= 2)
174                     electrons[i] = 2;
175                 else
176                     electrons[i] = -1;
177             }
178 
179             else {
180                 // cation with no double bonds - single exception?
181                 if (element == CARBON && charge > 0)
182                     electrons[i] = 0;
183                 else
184                     electrons[i] = -1;
185             }
186         }
187 
188         return electrons;
189     }
190 
191     /**
192      * Defines the number of electrons contributed when a pi bond is exocyclic
193      * (spouting). When an atom is connected to an more electronegative atom
194      * then the electrons are 'pulled' from the ring. The preset conditions are
195      * as follows:
196      *
197      * <ul> <li>A cyclic carbon with an exocyclic pi bond to anything but carbon
198      * contributes 0 electrons. If the exocyclic atom is also a carbon then 1
199      * electron is contributed.</li> <li>A cyclic 4 valent nitrogen or
200      * phosphorus cation with an exocyclic pi bond will always contribute 1
201      * electron. A 5 valent neutral nitrogen or phosphorus with an exocyclic
202      * bond to an oxygen contributes 1 electron. </li> <li>A neutral sulphur
203      * connected to an oxygen contributes 2 electrons</li><li>If none of the
204      * previous conditions are met the atom is not considered as being able to
205      * participate in an aromatic system and -1 is returned.</li> </ul>
206      *
207      * @param element      the element of the cyclic atom
208      * @param otherElement the element of the exocyclic atom which is connected
209      *                     to the cyclic atom by a pi bond
210      * @param charge       the charge of the cyclic atom
211      * @param nCyclic      the number of cyclic pi bonds adjacent to cyclic
212      *                     atom
213      * @return number of contributed electrons
214      */
exocyclicContribution(int element, int otherElement, int charge, int nCyclic)215     private static int exocyclicContribution(int element, int otherElement, int charge, int nCyclic) {
216         switch (element) {
217             case CARBON:
218                 return otherElement != CARBON ? 0 : 1;
219             case NITROGEN:
220             case PHOSPHORUS:
221                 if (charge == 1)
222                     return 1;
223                 else if (charge == 0 && otherElement == OXYGEN && nCyclic == 1) return 1;
224                 return -1;
225             case SULPHUR:
226                 // quirky but try - O=C1C=CS(=O)C=C1
227                 return charge == 0 && otherElement == OXYGEN ? 2 : -1;
228         }
229         return -1;
230     }
231 
232     /**
233      * Is the element specified by the atomic number, allowed to be aromatic by
234      * the daylight specification. Allowed elements are C, N, O, P, S, As, Se
235      * and *. This model allows all except for the unknown ('*') element.
236      *
237      * @param element atomic number of element
238      * @return the element can be aromatic
239      */
aromaticElement(int element)240     private static boolean aromaticElement(int element) {
241         switch (element) {
242             case CARBON:
243             case NITROGEN:
244             case OXYGEN:
245             case PHOSPHORUS:
246             case SULPHUR:
247             case ARSENIC:
248             case SELENIUM:
249                 return true;
250         }
251         return false;
252     }
253 
254     /**
255      * The element has normal valence for the specified charge.
256      *
257      * @param element atomic number
258      * @param charge  formal charge
259      * @param valence bonded electrons
260      * @return acceptable for this model
261      */
normal(int element, int charge, int valence)262     private static boolean normal(int element, int charge, int valence) {
263         switch (element) {
264             case CARBON:
265                 if (charge == -1 || charge == +1) return valence == 3;
266                 return charge == 0 && valence == 4;
267             case NITROGEN:
268             case PHOSPHORUS:
269             case ARSENIC:
270                 if (charge == -1) return valence == 2;
271                 if (charge == +1) return valence == 4;
272                 return charge == 0 && (valence == 3 || (valence == 5 && element == NITROGEN));
273             case OXYGEN:
274                 if (charge == +1) return valence == 3;
275                 return charge == 0 && valence == 2;
276             case SULPHUR:
277             case SELENIUM:
278                 if (charge == +1) return valence == 3;
279                 return charge == 0 && (valence == 2 || valence == 4 || valence == 6);
280         }
281         return false;
282     }
283 
284     /**
285      * Lookup of the number of valence electrons for the element at a given
286      * charge.
287      *
288      * @param element the atomic number of an element
289      * @param charge  the formal charge on the atom
290      * @return the valence
291      * @throws UnsupportedOperationException encountered an element which the
292      *                                       valence was not encoded for
293      */
valence(int element, int charge)294     private int valence(int element, int charge) {
295         return valence(element - charge);
296     }
297 
298     /**
299      * Lookup of the number of valence electrons for elements near those which
300      * this model considers aromatic. As only the {@link #aromaticElement(int)}
301      * are checked we need only consider elements within a charge range.
302      *
303      * @param element the atomic number of an element
304      * @return the valence
305      * @throws UnsupportedOperationException encountered an element which the
306      *                                       valence was not encoded for
307      */
valence(int element)308     private int valence(int element) {
309         switch (element) {
310             case 5: // boron
311             case 13: // aluminium
312             case 31: // gallium
313                 return 3;
314             case CARBON:
315             case 14: // silicon
316             case 32: // germanium
317                 return 4;
318             case NITROGEN:
319             case PHOSPHORUS:
320             case ARSENIC:
321                 return 5;
322             case OXYGEN:
323             case SULPHUR:
324             case SELENIUM:
325                 return 6;
326             case 9: // fluorine
327             case 17: // chlorine
328             case 35: // bromine
329                 return 7;
330         }
331         throw new UnsupportedOperationException("Valence not yet handled for element with atomic number " + element);
332     }
333 
334     /**
335      * Get the atomic number as an non-null integer value. Although pseudo atoms
336      * are not considered by this model the pseudo atoms are intercepted to have
337      * non-null atomic number (defaults to 0).
338      *
339      * @param atom atom to get the element from
340      * @return the formal charge
341      */
element(IAtom atom)342     private int element(IAtom atom) {
343         Integer element = atom.getAtomicNumber();
344         if (element != null) return element;
345         if (atom instanceof IPseudoAtom) return 0;
346         throw new IllegalArgumentException("Aromaiticty model requires atomic numbers to be set");
347     }
348 
349     /**
350      * Get the formal charge as an integer value - null defaults to 0.
351      *
352      * @param atom the atom to get the charge of
353      * @return the formal charge
354      */
charge(IAtom atom)355     private int charge(IAtom atom) {
356         return atom.getFormalCharge() != null ? atom.getFormalCharge() : 0;
357     }
358 }
359