1 /* Copyright (C) 2004-2007 The Chemistry Development Kit (CDK) project 2 * 3 * Contact: cdk-devel@lists.sourceforge.net 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public License 7 * as published by the Free Software Foundation; either version 2.1 8 * of the License, or (at your option) any later version. 9 * All we ask is that proper credit is given for our work, which includes 10 * - but is not limited to - adding the above copyright notice to the beginning 11 * of your source code files, and to any copyright notice that you may distribute 12 * with programs based on this work. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 22 * 23 */ 24 package org.openscience.cdk.tools; 25 26 import org.openscience.cdk.CDKConstants; 27 import org.openscience.cdk.config.AtomTypeFactory; 28 import org.openscience.cdk.exception.CDKException; 29 import org.openscience.cdk.interfaces.IAtom; 30 import org.openscience.cdk.interfaces.IAtomContainer; 31 import org.openscience.cdk.interfaces.IAtomType; 32 import org.openscience.cdk.interfaces.IBond; 33 import org.openscience.cdk.interfaces.IChemObjectBuilder; 34 import org.openscience.cdk.interfaces.IPseudoAtom; 35 import org.openscience.cdk.interfaces.IAtomType.Hybridization; 36 import org.openscience.cdk.tools.manipulator.BondManipulator; 37 38 /** 39 * Small customization of ValencyHybridChecker suggested by Todd Martin 40 * specially tuned for SMILES parsing. 41 * 42 * @author Egon Willighagen 43 * @cdk.created 2004-06-12 44 * @cdk.keyword atom, valency 45 * @cdk.module valencycheck 46 * @cdk.githash 47 */ 48 public class SmilesValencyChecker implements IValencyChecker, IDeduceBondOrderTool { 49 50 private String atomTypeList = null; 51 protected AtomTypeFactory structgenATF; 52 protected static ILoggingTool logger = LoggingToolFactory.createLoggingTool(SmilesValencyChecker.class); 53 SmilesValencyChecker()54 public SmilesValencyChecker() { 55 this("org/openscience/cdk/dict/data/cdk-atom-types.owl"); 56 } 57 SmilesValencyChecker(String atomTypeList)58 public SmilesValencyChecker(String atomTypeList) { 59 this.atomTypeList = atomTypeList; 60 logger.info("Using configuration file: ", atomTypeList); 61 } 62 63 /** 64 * Saturates a molecule by setting appropriate bond orders. 65 * 66 * @cdk.keyword bond order, calculation 67 * 68 * @cdk.created 2003-10-03 69 */ 70 @Override saturate(IAtomContainer atomContainer)71 public void saturate(IAtomContainer atomContainer) throws CDKException { 72 logger.info("Saturating atomContainer by adjusting bond orders..."); 73 boolean allSaturated = allSaturated(atomContainer); 74 if (!allSaturated) { 75 logger.info("Saturating bond orders is needed..."); 76 IBond[] bonds = new IBond[atomContainer.getBondCount()]; 77 for (int i = 0; i < bonds.length; i++) 78 bonds[i] = atomContainer.getBond(i); 79 boolean succeeded = saturate(bonds, atomContainer); 80 if (!succeeded) { 81 throw new CDKException("Could not saturate this atomContainer!"); 82 } 83 } 84 } 85 86 /** 87 * Saturates a set of Bonds in an AtomContainer. 88 */ saturate(IBond[] bonds, IAtomContainer atomContainer)89 public boolean saturate(IBond[] bonds, IAtomContainer atomContainer) throws CDKException { 90 logger.debug("Saturating bond set of size: ", bonds.length); 91 boolean bondsAreFullySaturated = false; 92 if (bonds.length > 0) { 93 IBond bond = bonds[0]; 94 95 // determine bonds left 96 int leftBondCount = bonds.length - 1; 97 IBond[] leftBonds = new IBond[leftBondCount]; 98 System.arraycopy(bonds, 1, leftBonds, 0, leftBondCount); 99 100 // examine this bond 101 logger.debug("Examining this bond: ", bond); 102 if (isSaturated(bond, atomContainer)) { 103 logger.debug("OK, bond is saturated, now try to saturate remaining bonds (if needed)"); 104 bondsAreFullySaturated = saturate(leftBonds, atomContainer); 105 } else if (isUnsaturated(bond, atomContainer)) { 106 logger.debug("Ok, this bond is unsaturated, and can be saturated"); 107 // two options now: 108 // 1. saturate this one directly 109 // 2. saturate this one by saturating the rest 110 logger.debug("Option 1: Saturating this bond directly, then trying to saturate rest"); 111 // considering organic bonds, the max order is 3, so increase twice 112 boolean bondOrderIncreased = saturateByIncreasingBondOrder(bond, atomContainer); 113 bondsAreFullySaturated = bondOrderIncreased && saturate(bonds, atomContainer); 114 if (bondsAreFullySaturated) { 115 logger.debug("Option 1: worked"); 116 } else { 117 logger.debug("Option 1: failed. Trying option 2."); 118 logger.debug("Option 2: Saturing this bond by saturating the rest"); 119 // revert the increase (if succeeded), then saturate the rest 120 if (bondOrderIncreased) unsaturateByDecreasingBondOrder(bond); 121 bondsAreFullySaturated = saturate(leftBonds, atomContainer) && isSaturated(bond, atomContainer); 122 if (!bondsAreFullySaturated) logger.debug("Option 2: failed"); 123 } 124 } else { 125 logger.debug("Ok, this bond is unsaturated, but cannot be saturated"); 126 // try recursing and see if that fixes things 127 bondsAreFullySaturated = saturate(leftBonds, atomContainer) && isSaturated(bond, atomContainer); 128 } 129 } else { 130 bondsAreFullySaturated = true; // empty is saturated by default 131 } 132 return bondsAreFullySaturated; 133 } 134 unsaturateByDecreasingBondOrder(IBond bond)135 public boolean unsaturateByDecreasingBondOrder(IBond bond) { 136 if (bond.getOrder() != IBond.Order.SINGLE) { 137 bond.setOrder(BondManipulator.decreaseBondOrder(bond.getOrder())); 138 return true; 139 } else { 140 return false; 141 } 142 } 143 144 /** 145 * Returns whether a bond is unsaturated. A bond is unsaturated if 146 * <b>all</b> Atoms in the bond are unsaturated. 147 */ isUnsaturated(IBond bond, IAtomContainer atomContainer)148 public boolean isUnsaturated(IBond bond, IAtomContainer atomContainer) throws CDKException { 149 logger.debug("isBondUnsaturated?: ", bond); 150 IAtom[] atoms = BondManipulator.getAtomArray(bond); 151 boolean isUnsaturated = true; 152 for (int i = 0; i < atoms.length && isUnsaturated; i++) { 153 isUnsaturated = isUnsaturated && !isSaturated(atoms[i], atomContainer); 154 } 155 logger.debug("Bond is unsaturated?: ", isUnsaturated); 156 return isUnsaturated; 157 } 158 159 /** 160 * Tries to saturate a bond by increasing its bond orders by 1.0. 161 * 162 * @return true if the bond could be increased 163 */ saturateByIncreasingBondOrder(IBond bond, IAtomContainer atomContainer)164 public boolean saturateByIncreasingBondOrder(IBond bond, IAtomContainer atomContainer) throws CDKException { 165 IAtom[] atoms = BondManipulator.getAtomArray(bond); 166 IAtom atom = atoms[0]; 167 IAtom partner = atoms[1]; 168 logger.debug(" saturating bond: ", atom.getSymbol(), "-", partner.getSymbol()); 169 IAtomType[] atomTypes1 = getAtomTypeFactory(bond.getBuilder()).getAtomTypes(atom.getSymbol()); 170 IAtomType[] atomTypes2 = getAtomTypeFactory(bond.getBuilder()).getAtomTypes(partner.getSymbol()); 171 for (int atCounter1 = 0; atCounter1 < atomTypes1.length; atCounter1++) { 172 IAtomType aType1 = atomTypes1[atCounter1]; 173 logger.debug(" condidering atom type: ", aType1); 174 if (couldMatchAtomType(atomContainer, atom, aType1)) { 175 logger.debug(" trying atom type: ", aType1); 176 for (int atCounter2 = 0; atCounter2 < atomTypes2.length; atCounter2++) { 177 IAtomType aType2 = atomTypes2[atCounter2]; 178 logger.debug(" condidering partner type: ", aType1); 179 if (couldMatchAtomType(atomContainer, partner, atomTypes2[atCounter2])) { 180 logger.debug(" with atom type: ", aType2); 181 if (BondManipulator.isLowerOrder(bond.getOrder(), aType2.getMaxBondOrder()) 182 && BondManipulator.isLowerOrder(bond.getOrder(), aType1.getMaxBondOrder())) { 183 bond.setOrder(BondManipulator.increaseBondOrder(bond.getOrder())); 184 logger.debug("Bond order now ", bond.getOrder()); 185 return true; 186 } 187 } 188 } 189 } 190 } 191 return false; 192 } 193 194 /** 195 * Returns whether a bond is saturated. A bond is saturated if 196 * <b>both</b> Atoms in the bond are saturated. 197 */ isSaturated(IBond bond, IAtomContainer atomContainer)198 public boolean isSaturated(IBond bond, IAtomContainer atomContainer) throws CDKException { 199 logger.debug("isBondSaturated?: ", bond); 200 IAtom[] atoms = BondManipulator.getAtomArray(bond); 201 boolean isSaturated = true; 202 for (int i = 0; i < atoms.length; i++) { 203 logger.debug("isSaturated(Bond, AC): atom I=", i); 204 isSaturated = isSaturated && isSaturated(atoms[i], atomContainer); 205 } 206 logger.debug("isSaturated(Bond, AC): result=", isSaturated); 207 return isSaturated; 208 } 209 210 /** 211 * Determines of all atoms on the AtomContainer are saturated. 212 */ 213 @Override isSaturated(IAtomContainer container)214 public boolean isSaturated(IAtomContainer container) throws CDKException { 215 return allSaturated(container); 216 } 217 allSaturated(IAtomContainer ac)218 public boolean allSaturated(IAtomContainer ac) throws CDKException { 219 logger.debug("Are all atoms saturated?"); 220 for (int f = 0; f < ac.getAtomCount(); f++) { 221 if (!isSaturated(ac.getAtom(f), ac)) { 222 return false; 223 } 224 } 225 return true; 226 } 227 228 /** 229 * Determines if the atom can be of type AtomType. That is, it sees if this 230 * AtomType only differs in bond orders, or implicit hydrogen count. 231 */ couldMatchAtomType(IAtom atom, double bondOrderSum, IBond.Order maxBondOrder, IAtomType type)232 public boolean couldMatchAtomType(IAtom atom, double bondOrderSum, IBond.Order maxBondOrder, IAtomType type) { 233 logger.debug("couldMatchAtomType: ... matching atom ", atom, " vs ", type); 234 int hcount = atom.getImplicitHydrogenCount(); 235 int charge = atom.getFormalCharge(); 236 if (charge == type.getFormalCharge()) { 237 logger.debug("couldMatchAtomType: formal charge matches..."); 238 // if (atom.getHybridization() == type.getHybridization()) { 239 // logger.debug("couldMatchAtomType: hybridization is OK..."); 240 if (bondOrderSum + hcount <= type.getBondOrderSum()) { 241 logger.debug("couldMatchAtomType: bond order sum is OK..."); 242 if (!BondManipulator.isHigherOrder(maxBondOrder, type.getMaxBondOrder())) { 243 logger.debug("couldMatchAtomType: max bond order is OK... We have a match!"); 244 return true; 245 } 246 } else { 247 logger.debug("couldMatchAtomType: no match", "" + (bondOrderSum + hcount), " > ", 248 "" + type.getBondOrderSum()); 249 } 250 // } 251 } else { 252 logger.debug("couldMatchAtomType: formal charge does NOT match..."); 253 } 254 logger.debug("couldMatchAtomType: No Match"); 255 return false; 256 } 257 258 /** 259 * Calculates the number of hydrogens that can be added to the given atom to fullfil 260 * the atom's valency. It will return 0 for PseudoAtoms, and for atoms for which it 261 * does not have an entry in the configuration file. 262 */ calculateNumberOfImplicitHydrogens(IAtom atom, double bondOrderSum, IBond.Order maxBondOrder, int neighbourCount)263 public int calculateNumberOfImplicitHydrogens(IAtom atom, double bondOrderSum, IBond.Order maxBondOrder, 264 int neighbourCount) throws CDKException { 265 266 int missingHydrogens = 0; 267 if (atom instanceof IPseudoAtom) { 268 logger.debug("don't figure it out... it simply does not lack H's"); 269 return 0; 270 } 271 272 logger.debug("Calculating number of missing hydrogen atoms"); 273 // get default atom 274 IAtomType[] atomTypes = getAtomTypeFactory(atom.getBuilder()).getAtomTypes(atom.getSymbol()); 275 if (atomTypes.length == 0) { 276 logger.warn("Element not found in configuration file: ", atom); 277 return 0; 278 } 279 280 logger.debug("Found atomtypes: ", atomTypes.length); 281 for (int f = 0; f < atomTypes.length; f++) { 282 IAtomType type = atomTypes[f]; 283 if (couldMatchAtomType(atom, bondOrderSum, maxBondOrder, type)) { 284 logger.debug("This type matches: ", type); 285 int formalNeighbourCount = type.getFormalNeighbourCount(); 286 if (type.getHybridization() == CDKConstants.UNSET) { 287 missingHydrogens = (int) (type.getBondOrderSum() - bondOrderSum); 288 } else if (type.getHybridization() == Hybridization.SP3) { 289 missingHydrogens = formalNeighbourCount - neighbourCount; 290 } else if (type.getHybridization() == Hybridization.SP2) { 291 missingHydrogens = formalNeighbourCount - neighbourCount; 292 } else if (type.getHybridization() == Hybridization.SP1) { 293 missingHydrogens = formalNeighbourCount - neighbourCount; 294 } else { 295 missingHydrogens = (int) (type.getBondOrderSum() - bondOrderSum); 296 } 297 break; 298 } 299 } 300 301 logger.debug("missing hydrogens: ", missingHydrogens); 302 return missingHydrogens; 303 } 304 305 /** 306 * Checks whether an Atom is saturated by comparing it with known AtomTypes. 307 * It returns true if the atom is an PseudoAtom and when the element is not in the list. 308 */ 309 @Override isSaturated(IAtom atom, IAtomContainer container)310 public boolean isSaturated(IAtom atom, IAtomContainer container) throws CDKException { 311 if (atom instanceof IPseudoAtom) { 312 logger.debug("don't figure it out... it simply does not lack H's"); 313 return true; 314 } 315 316 IAtomType[] atomTypes = getAtomTypeFactory(atom.getBuilder()).getAtomTypes(atom.getSymbol()); 317 if (atomTypes.length == 0) { 318 logger.warn("Missing entry in atom type list for ", atom.getSymbol()); 319 return true; 320 } 321 double bondOrderSum = container.getBondOrderSum(atom); 322 IBond.Order maxBondOrder = container.getMaximumBondOrder(atom); 323 int hcount = atom.getImplicitHydrogenCount(); 324 int charge = atom.getFormalCharge(); 325 326 logger.debug("Checking saturation of atom ", atom.getSymbol()); 327 logger.debug("bondOrderSum: ", bondOrderSum); 328 logger.debug("maxBondOrder: ", maxBondOrder); 329 logger.debug("hcount: ", hcount); 330 logger.debug("charge: ", charge); 331 332 boolean elementPlusChargeMatches = false; 333 for (int f = 0; f < atomTypes.length; f++) { 334 IAtomType type = atomTypes[f]; 335 if (couldMatchAtomType(atom, bondOrderSum, maxBondOrder, type)) { 336 if (bondOrderSum + hcount == type.getBondOrderSum() 337 && !BondManipulator.isHigherOrder(maxBondOrder, type.getMaxBondOrder())) { 338 logger.debug("We have a match: ", type); 339 logger.debug("Atom is saturated: ", atom.getSymbol()); 340 return true; 341 } else { 342 // ok, the element and charge matche, but unfulfilled 343 elementPlusChargeMatches = true; 344 } 345 } // else: formal charges don't match 346 } 347 348 if (elementPlusChargeMatches) { 349 logger.debug("No, atom is not saturated."); 350 return false; 351 } 352 353 // ok, the found atom was not in the list 354 logger.error("Could not find atom type!"); 355 throw new CDKException("The atom with element " + atom.getSymbol() + " and charge " + charge + " is not found."); 356 } 357 calculateNumberOfImplicitHydrogens(IAtom atom, IAtomContainer container)358 public int calculateNumberOfImplicitHydrogens(IAtom atom, IAtomContainer container) throws CDKException { 359 return this.calculateNumberOfImplicitHydrogens(atom, container.getBondOrderSum(atom), 360 container.getMaximumBondOrder(atom), container.getConnectedBondsCount(atom)); 361 } 362 getAtomTypeFactory(IChemObjectBuilder builder)363 protected AtomTypeFactory getAtomTypeFactory(IChemObjectBuilder builder) throws CDKException { 364 if (structgenATF == null) { 365 try { 366 structgenATF = AtomTypeFactory.getInstance(atomTypeList, builder); 367 } catch (Exception exception) { 368 logger.debug(exception); 369 throw new CDKException("Could not instantiate AtomTypeFactory!", exception); 370 } 371 } 372 return structgenATF; 373 } 374 375 /** 376 * Determines if the atom can be of type AtomType. 377 */ couldMatchAtomType(IAtomContainer container, IAtom atom, IAtomType type)378 public boolean couldMatchAtomType(IAtomContainer container, IAtom atom, IAtomType type) { 379 double bondOrderSum = container.getBondOrderSum(atom); 380 IBond.Order maxBondOrder = container.getMaximumBondOrder(atom); 381 return couldMatchAtomType(atom, bondOrderSum, maxBondOrder, type); 382 } 383 384 } 385