1 /* 2 * Copyright (c) 2013 European Bioinformatics Institute (EMBL-EBI) 3 * John May <jwmay@users.sf.net> 4 * 5 * Contact: cdk-devel@lists.sourceforge.net 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU Lesser General Public License as published by 9 * the Free Software Foundation; either version 2.1 of the License, or (at 10 * your option) any later version. All we ask is that proper credit is given 11 * for our work, which includes - but is not limited to - adding the above 12 * copyright notice to the beginning of your source code files, and to any 13 * copyright notice that you may distribute with programs based on this work. 14 * 15 * This program is distributed in the hope that it will be useful, but WITHOUT 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 18 * License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U 23 */ 24 25 package org.openscience.cdk.aromaticity; 26 27 import com.google.common.collect.Maps; 28 import org.openscience.cdk.interfaces.IAtom; 29 import org.openscience.cdk.interfaces.IAtomContainer; 30 import org.openscience.cdk.interfaces.IBond; 31 import org.openscience.cdk.interfaces.IPseudoAtom; 32 import org.openscience.cdk.ringsearch.RingSearch; 33 34 import java.util.Arrays; 35 import java.util.Map; 36 37 import static com.google.common.base.Preconditions.checkNotNull; 38 39 /** 40 * Electron donation model closely mirroring the Daylight model for use in 41 * generating SMILES. The model was interpreted from various resources and as 42 * such may not match exactly. If you find an inconsistency please add a request 43 * for enhancement to the patch tracker. One known limitation is that this model 44 * does not currently consider unknown/pseudo atoms '*'. 45 * 46 * The model makes a couple of assumptions which it will not correct for. 47 * Checked assumptions cause the model to throw a runtime exception. <ul> 48 * <li>there should be no valence errors (unchecked)</li> <li>every atom has a 49 * set implicit hydrogen count (checked)</li> <li>every bond has defined order, 50 * single, double etc (checked)</li> <li>atomic number of non-pseudo atoms is 51 * set (checked)</li> </ul> 52 * 53 * The aromaticity model in SMILES was designed to simplify canonicalisation and 54 * express symmetry in a molecule. The contributed electrons can be summarised 55 * as follows (refer to code for exact specification): <ul> <li>carbon, 56 * nitrogen, oxygen, phosphorus, sulphur, arsenic and selenium are allow to be 57 * aromatic</li> <li>atoms should be Sp2 hybridised - not actually computed</li> 58 * <li>atoms adjacent to a single cyclic pi bond contribute 1 electron</li> 59 * <li>neutral or negatively charged atoms with a lone pair contribute 2 60 * electrons</li> <li>exocyclic pi bonds are allowed but if the exocyclic atom 61 * is more electronegative it consumes an electron. As an example ketone groups 62 * contribute '0' electrons.</li></ul> 63 * 64 * @author John May 65 * @cdk.module standard 66 * @cdk.githash 67 */ 68 final class DaylightModel extends ElectronDonation { 69 70 private static final int CARBON = 6; 71 private static final int NITROGEN = 7; 72 private static final int OXYGEN = 8; 73 private static final int PHOSPHORUS = 15; 74 private static final int SULPHUR = 16; 75 private static final int ARSENIC = 33; 76 private static final int SELENIUM = 34; 77 78 /**{@inheritDoc} */ 79 @Override contribution(IAtomContainer container, RingSearch ringSearch)80 int[] contribution(IAtomContainer container, RingSearch ringSearch) { 81 82 int n = container.getAtomCount(); 83 84 // we compute values we need for all atoms and then make the decisions 85 // - this avoids costly operations such as looking up connected 86 // bonds on each atom at the cost of memory 87 int[] degree = new int[n]; 88 int[] bondOrderSum = new int[n]; 89 int[] nCyclicPiBonds = new int[n]; 90 int[] exocyclicPiBond = new int[n]; 91 int[] electrons = new int[n]; 92 93 Arrays.fill(exocyclicPiBond, -1); 94 95 // index atoms and set the degree to the number of implicit hydrogens 96 Map<IAtom, Integer> atomIndex = Maps.newHashMapWithExpectedSize(n); 97 for (int i = 0; i < n; i++) { 98 IAtom a = container.getAtom(i); 99 atomIndex.put(a, i); 100 degree[i] = checkNotNull(a.getImplicitHydrogenCount(), 101 "Aromaticity model requires implicit hydrogen count is set."); 102 } 103 104 // for each bond we increase the degree count and check for cyclic and 105 // exocyclic pi bonds. if there is a cyclic pi bond the atom is marked. 106 // if there is an exocyclic pi bond we store the adjacent atom for 107 // lookup later. 108 for (IBond bond : container.bonds()) { 109 int u = atomIndex.get(bond.getBegin()); 110 int v = atomIndex.get(bond.getEnd()); 111 degree[u]++; 112 degree[v]++; 113 114 IBond.Order order = checkNotNull(bond.getOrder(), "Aromaticity model requires that bond orders must be set"); 115 116 switch (order) { 117 case UNSET: 118 throw new IllegalArgumentException("Aromaticity model requires that bond orders must be set"); 119 case DOUBLE: 120 if (ringSearch.cyclic(u, v)) { 121 nCyclicPiBonds[u]++; 122 nCyclicPiBonds[v]++; 123 } else { 124 exocyclicPiBond[u] = v; 125 exocyclicPiBond[v] = u; 126 } 127 // note - fall through 128 case SINGLE: 129 case TRIPLE: 130 case QUADRUPLE: 131 bondOrderSum[u] += order.numeric(); 132 bondOrderSum[v] += order.numeric(); 133 } 134 } 135 136 // now make a decision on how many electrons each atom contributes 137 for (int i = 0; i < n; i++) { 138 139 int element = element(container.getAtom(i)); 140 int charge = charge(container.getAtom(i)); 141 142 // abnormal valence, usually indicated a radical. these cause problems 143 // with kekulisations 144 int bondedValence = bondOrderSum[i] + container.getAtom(i).getImplicitHydrogenCount(); 145 if (!normal(element, charge, bondedValence)) { 146 electrons[i] = -1; 147 } 148 149 // non-aromatic element, acyclic atoms, atoms with more than three 150 // neighbors and atoms with more than 1 cyclic pi bond are not 151 // considered 152 else if (!aromaticElement(element) || !ringSearch.cyclic(i) || degree[i] > 3 || nCyclicPiBonds[i] > 1) { 153 electrons[i] = -1; 154 } 155 156 // exocyclic bond contributes 0 or 1 electrons depending on 157 // preset electronegativity - check the exocyclicContribution method 158 else if (exocyclicPiBond[i] >= 0) { 159 electrons[i] = exocyclicContribution(element, element(container.getAtom(exocyclicPiBond[i])), charge, 160 nCyclicPiBonds[i]); 161 } 162 163 // any atom (except arsenic) with one cyclic pi bond contributes a 164 // single electron 165 else if (nCyclicPiBonds[i] == 1) { 166 electrons[i] = element == ARSENIC ? -1 : 1; 167 } 168 169 // a anion with a lone pair contributes 2 electrons - simplification 170 // here is we count the number free valence electrons but also 171 // check if the bonded valence is okay (i.e. not a radical) 172 else if (charge <= 0 && charge > -3) { 173 if (valence(element, charge) - bondOrderSum[i] >= 2) 174 electrons[i] = 2; 175 else 176 electrons[i] = -1; 177 } 178 179 else { 180 // cation with no double bonds - single exception? 181 if (element == CARBON && charge > 0) 182 electrons[i] = 0; 183 else 184 electrons[i] = -1; 185 } 186 } 187 188 return electrons; 189 } 190 191 /** 192 * Defines the number of electrons contributed when a pi bond is exocyclic 193 * (spouting). When an atom is connected to an more electronegative atom 194 * then the electrons are 'pulled' from the ring. The preset conditions are 195 * as follows: 196 * 197 * <ul> <li>A cyclic carbon with an exocyclic pi bond to anything but carbon 198 * contributes 0 electrons. If the exocyclic atom is also a carbon then 1 199 * electron is contributed.</li> <li>A cyclic 4 valent nitrogen or 200 * phosphorus cation with an exocyclic pi bond will always contribute 1 201 * electron. A 5 valent neutral nitrogen or phosphorus with an exocyclic 202 * bond to an oxygen contributes 1 electron. </li> <li>A neutral sulphur 203 * connected to an oxygen contributes 2 electrons</li><li>If none of the 204 * previous conditions are met the atom is not considered as being able to 205 * participate in an aromatic system and -1 is returned.</li> </ul> 206 * 207 * @param element the element of the cyclic atom 208 * @param otherElement the element of the exocyclic atom which is connected 209 * to the cyclic atom by a pi bond 210 * @param charge the charge of the cyclic atom 211 * @param nCyclic the number of cyclic pi bonds adjacent to cyclic 212 * atom 213 * @return number of contributed electrons 214 */ exocyclicContribution(int element, int otherElement, int charge, int nCyclic)215 private static int exocyclicContribution(int element, int otherElement, int charge, int nCyclic) { 216 switch (element) { 217 case CARBON: 218 return otherElement != CARBON ? 0 : 1; 219 case NITROGEN: 220 case PHOSPHORUS: 221 if (charge == 1) 222 return 1; 223 else if (charge == 0 && otherElement == OXYGEN && nCyclic == 1) return 1; 224 return -1; 225 case SULPHUR: 226 // quirky but try - O=C1C=CS(=O)C=C1 227 return charge == 0 && otherElement == OXYGEN ? 2 : -1; 228 } 229 return -1; 230 } 231 232 /** 233 * Is the element specified by the atomic number, allowed to be aromatic by 234 * the daylight specification. Allowed elements are C, N, O, P, S, As, Se 235 * and *. This model allows all except for the unknown ('*') element. 236 * 237 * @param element atomic number of element 238 * @return the element can be aromatic 239 */ aromaticElement(int element)240 private static boolean aromaticElement(int element) { 241 switch (element) { 242 case CARBON: 243 case NITROGEN: 244 case OXYGEN: 245 case PHOSPHORUS: 246 case SULPHUR: 247 case ARSENIC: 248 case SELENIUM: 249 return true; 250 } 251 return false; 252 } 253 254 /** 255 * The element has normal valence for the specified charge. 256 * 257 * @param element atomic number 258 * @param charge formal charge 259 * @param valence bonded electrons 260 * @return acceptable for this model 261 */ normal(int element, int charge, int valence)262 private static boolean normal(int element, int charge, int valence) { 263 switch (element) { 264 case CARBON: 265 if (charge == -1 || charge == +1) return valence == 3; 266 return charge == 0 && valence == 4; 267 case NITROGEN: 268 case PHOSPHORUS: 269 case ARSENIC: 270 if (charge == -1) return valence == 2; 271 if (charge == +1) return valence == 4; 272 return charge == 0 && (valence == 3 || (valence == 5 && element == NITROGEN)); 273 case OXYGEN: 274 if (charge == +1) return valence == 3; 275 return charge == 0 && valence == 2; 276 case SULPHUR: 277 case SELENIUM: 278 if (charge == +1) return valence == 3; 279 return charge == 0 && (valence == 2 || valence == 4 || valence == 6); 280 } 281 return false; 282 } 283 284 /** 285 * Lookup of the number of valence electrons for the element at a given 286 * charge. 287 * 288 * @param element the atomic number of an element 289 * @param charge the formal charge on the atom 290 * @return the valence 291 * @throws UnsupportedOperationException encountered an element which the 292 * valence was not encoded for 293 */ valence(int element, int charge)294 private int valence(int element, int charge) { 295 return valence(element - charge); 296 } 297 298 /** 299 * Lookup of the number of valence electrons for elements near those which 300 * this model considers aromatic. As only the {@link #aromaticElement(int)} 301 * are checked we need only consider elements within a charge range. 302 * 303 * @param element the atomic number of an element 304 * @return the valence 305 * @throws UnsupportedOperationException encountered an element which the 306 * valence was not encoded for 307 */ valence(int element)308 private int valence(int element) { 309 switch (element) { 310 case 5: // boron 311 case 13: // aluminium 312 case 31: // gallium 313 return 3; 314 case CARBON: 315 case 14: // silicon 316 case 32: // germanium 317 return 4; 318 case NITROGEN: 319 case PHOSPHORUS: 320 case ARSENIC: 321 return 5; 322 case OXYGEN: 323 case SULPHUR: 324 case SELENIUM: 325 return 6; 326 case 9: // fluorine 327 case 17: // chlorine 328 case 35: // bromine 329 return 7; 330 } 331 throw new UnsupportedOperationException("Valence not yet handled for element with atomic number " + element); 332 } 333 334 /** 335 * Get the atomic number as an non-null integer value. Although pseudo atoms 336 * are not considered by this model the pseudo atoms are intercepted to have 337 * non-null atomic number (defaults to 0). 338 * 339 * @param atom atom to get the element from 340 * @return the formal charge 341 */ element(IAtom atom)342 private int element(IAtom atom) { 343 Integer element = atom.getAtomicNumber(); 344 if (element != null) return element; 345 if (atom instanceof IPseudoAtom) return 0; 346 throw new IllegalArgumentException("Aromaiticty model requires atomic numbers to be set"); 347 } 348 349 /** 350 * Get the formal charge as an integer value - null defaults to 0. 351 * 352 * @param atom the atom to get the charge of 353 * @return the formal charge 354 */ charge(IAtom atom)355 private int charge(IAtom atom) { 356 return atom.getFormalCharge() != null ? atom.getFormalCharge() : 0; 357 } 358 } 359