1 /* Copyright (C) 2004-2007  The Chemistry Development Kit (CDK) project
2  *
3  *  Contact: cdk-devel@lists.sourceforge.net
4  *
5  *  This program is free software; you can redistribute it and/or
6  *  modify it under the terms of the GNU Lesser General Public License
7  *  as published by the Free Software Foundation; either version 2.1
8  *  of the License, or (at your option) any later version.
9  *  All we ask is that proper credit is given for our work, which includes
10  *  - but is not limited to - adding the above copyright notice to the beginning
11  *  of your source code files, and to any copyright notice that you may distribute
12  *  with programs based on this work.
13  *
14  *  This program is distributed in the hope that it will be useful,
15  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *  GNU Lesser General Public License for more details.
18  *
19  *  You should have received a copy of the GNU Lesser General Public License
20  *  along with this program; if not, write to the Free Software
21  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
22  *
23  */
24 package org.openscience.cdk.tools;
25 
26 import org.openscience.cdk.CDKConstants;
27 import org.openscience.cdk.config.AtomTypeFactory;
28 import org.openscience.cdk.exception.CDKException;
29 import org.openscience.cdk.interfaces.IAtom;
30 import org.openscience.cdk.interfaces.IAtomContainer;
31 import org.openscience.cdk.interfaces.IAtomType;
32 import org.openscience.cdk.interfaces.IBond;
33 import org.openscience.cdk.interfaces.IChemObjectBuilder;
34 import org.openscience.cdk.interfaces.IPseudoAtom;
35 import org.openscience.cdk.interfaces.IAtomType.Hybridization;
36 import org.openscience.cdk.tools.manipulator.BondManipulator;
37 
38 /**
39  * Small customization of ValencyHybridChecker suggested by Todd Martin
40  * specially tuned for SMILES parsing.
41  *
42  * @author       Egon Willighagen
43  * @cdk.created  2004-06-12
44  * @cdk.keyword  atom, valency
45  * @cdk.module   valencycheck
46  * @cdk.githash
47  */
48 public class SmilesValencyChecker implements IValencyChecker, IDeduceBondOrderTool {
49 
50     private String                atomTypeList = null;
51     protected AtomTypeFactory     structgenATF;
52     protected static ILoggingTool logger       = LoggingToolFactory.createLoggingTool(SmilesValencyChecker.class);
53 
SmilesValencyChecker()54     public SmilesValencyChecker() {
55         this("org/openscience/cdk/dict/data/cdk-atom-types.owl");
56     }
57 
SmilesValencyChecker(String atomTypeList)58     public SmilesValencyChecker(String atomTypeList) {
59         this.atomTypeList = atomTypeList;
60         logger.info("Using configuration file: ", atomTypeList);
61     }
62 
63     /**
64      * Saturates a molecule by setting appropriate bond orders.
65      *
66      * @cdk.keyword            bond order, calculation
67      *
68      * @cdk.created 2003-10-03
69      */
70     @Override
saturate(IAtomContainer atomContainer)71     public void saturate(IAtomContainer atomContainer) throws CDKException {
72         logger.info("Saturating atomContainer by adjusting bond orders...");
73         boolean allSaturated = allSaturated(atomContainer);
74         if (!allSaturated) {
75             logger.info("Saturating bond orders is needed...");
76             IBond[] bonds = new IBond[atomContainer.getBondCount()];
77             for (int i = 0; i < bonds.length; i++)
78                 bonds[i] = atomContainer.getBond(i);
79             boolean succeeded = saturate(bonds, atomContainer);
80             if (!succeeded) {
81                 throw new CDKException("Could not saturate this atomContainer!");
82             }
83         }
84     }
85 
86     /**
87      * Saturates a set of Bonds in an AtomContainer.
88      */
saturate(IBond[] bonds, IAtomContainer atomContainer)89     public boolean saturate(IBond[] bonds, IAtomContainer atomContainer) throws CDKException {
90         logger.debug("Saturating bond set of size: ", bonds.length);
91         boolean bondsAreFullySaturated = false;
92         if (bonds.length > 0) {
93             IBond bond = bonds[0];
94 
95             // determine bonds left
96             int leftBondCount = bonds.length - 1;
97             IBond[] leftBonds = new IBond[leftBondCount];
98             System.arraycopy(bonds, 1, leftBonds, 0, leftBondCount);
99 
100             // examine this bond
101             logger.debug("Examining this bond: ", bond);
102             if (isSaturated(bond, atomContainer)) {
103                 logger.debug("OK, bond is saturated, now try to saturate remaining bonds (if needed)");
104                 bondsAreFullySaturated = saturate(leftBonds, atomContainer);
105             } else if (isUnsaturated(bond, atomContainer)) {
106                 logger.debug("Ok, this bond is unsaturated, and can be saturated");
107                 // two options now:
108                 // 1. saturate this one directly
109                 // 2. saturate this one by saturating the rest
110                 logger.debug("Option 1: Saturating this bond directly, then trying to saturate rest");
111                 // considering organic bonds, the max order is 3, so increase twice
112                 boolean bondOrderIncreased = saturateByIncreasingBondOrder(bond, atomContainer);
113                 bondsAreFullySaturated = bondOrderIncreased && saturate(bonds, atomContainer);
114                 if (bondsAreFullySaturated) {
115                     logger.debug("Option 1: worked");
116                 } else {
117                     logger.debug("Option 1: failed. Trying option 2.");
118                     logger.debug("Option 2: Saturing this bond by saturating the rest");
119                     // revert the increase (if succeeded), then saturate the rest
120                     if (bondOrderIncreased) unsaturateByDecreasingBondOrder(bond);
121                     bondsAreFullySaturated = saturate(leftBonds, atomContainer) && isSaturated(bond, atomContainer);
122                     if (!bondsAreFullySaturated) logger.debug("Option 2: failed");
123                 }
124             } else {
125                 logger.debug("Ok, this bond is unsaturated, but cannot be saturated");
126                 // try recursing and see if that fixes things
127                 bondsAreFullySaturated = saturate(leftBonds, atomContainer) && isSaturated(bond, atomContainer);
128             }
129         } else {
130             bondsAreFullySaturated = true; // empty is saturated by default
131         }
132         return bondsAreFullySaturated;
133     }
134 
unsaturateByDecreasingBondOrder(IBond bond)135     public boolean unsaturateByDecreasingBondOrder(IBond bond) {
136         if (bond.getOrder() != IBond.Order.SINGLE) {
137             bond.setOrder(BondManipulator.decreaseBondOrder(bond.getOrder()));
138             return true;
139         } else {
140             return false;
141         }
142     }
143 
144     /**
145      * Returns whether a bond is unsaturated. A bond is unsaturated if
146      * <b>all</b> Atoms in the bond are unsaturated.
147      */
isUnsaturated(IBond bond, IAtomContainer atomContainer)148     public boolean isUnsaturated(IBond bond, IAtomContainer atomContainer) throws CDKException {
149         logger.debug("isBondUnsaturated?: ", bond);
150         IAtom[] atoms = BondManipulator.getAtomArray(bond);
151         boolean isUnsaturated = true;
152         for (int i = 0; i < atoms.length && isUnsaturated; i++) {
153             isUnsaturated = isUnsaturated && !isSaturated(atoms[i], atomContainer);
154         }
155         logger.debug("Bond is unsaturated?: ", isUnsaturated);
156         return isUnsaturated;
157     }
158 
159     /**
160      * Tries to saturate a bond by increasing its bond orders by 1.0.
161      *
162      * @return true if the bond could be increased
163      */
saturateByIncreasingBondOrder(IBond bond, IAtomContainer atomContainer)164     public boolean saturateByIncreasingBondOrder(IBond bond, IAtomContainer atomContainer) throws CDKException {
165         IAtom[] atoms = BondManipulator.getAtomArray(bond);
166         IAtom atom = atoms[0];
167         IAtom partner = atoms[1];
168         logger.debug("  saturating bond: ", atom.getSymbol(), "-", partner.getSymbol());
169         IAtomType[] atomTypes1 = getAtomTypeFactory(bond.getBuilder()).getAtomTypes(atom.getSymbol());
170         IAtomType[] atomTypes2 = getAtomTypeFactory(bond.getBuilder()).getAtomTypes(partner.getSymbol());
171         for (int atCounter1 = 0; atCounter1 < atomTypes1.length; atCounter1++) {
172             IAtomType aType1 = atomTypes1[atCounter1];
173             logger.debug("  condidering atom type: ", aType1);
174             if (couldMatchAtomType(atomContainer, atom, aType1)) {
175                 logger.debug("  trying atom type: ", aType1);
176                 for (int atCounter2 = 0; atCounter2 < atomTypes2.length; atCounter2++) {
177                     IAtomType aType2 = atomTypes2[atCounter2];
178                     logger.debug("  condidering partner type: ", aType1);
179                     if (couldMatchAtomType(atomContainer, partner, atomTypes2[atCounter2])) {
180                         logger.debug("    with atom type: ", aType2);
181                         if (BondManipulator.isLowerOrder(bond.getOrder(), aType2.getMaxBondOrder())
182                                 && BondManipulator.isLowerOrder(bond.getOrder(), aType1.getMaxBondOrder())) {
183                             bond.setOrder(BondManipulator.increaseBondOrder(bond.getOrder()));
184                             logger.debug("Bond order now ", bond.getOrder());
185                             return true;
186                         }
187                     }
188                 }
189             }
190         }
191         return false;
192     }
193 
194     /**
195      * Returns whether a bond is saturated. A bond is saturated if
196      * <b>both</b> Atoms in the bond are saturated.
197      */
isSaturated(IBond bond, IAtomContainer atomContainer)198     public boolean isSaturated(IBond bond, IAtomContainer atomContainer) throws CDKException {
199         logger.debug("isBondSaturated?: ", bond);
200         IAtom[] atoms = BondManipulator.getAtomArray(bond);
201         boolean isSaturated = true;
202         for (int i = 0; i < atoms.length; i++) {
203             logger.debug("isSaturated(Bond, AC): atom I=", i);
204             isSaturated = isSaturated && isSaturated(atoms[i], atomContainer);
205         }
206         logger.debug("isSaturated(Bond, AC): result=", isSaturated);
207         return isSaturated;
208     }
209 
210     /**
211      * Determines of all atoms on the AtomContainer are saturated.
212      */
213     @Override
isSaturated(IAtomContainer container)214     public boolean isSaturated(IAtomContainer container) throws CDKException {
215         return allSaturated(container);
216     }
217 
allSaturated(IAtomContainer ac)218     public boolean allSaturated(IAtomContainer ac) throws CDKException {
219         logger.debug("Are all atoms saturated?");
220         for (int f = 0; f < ac.getAtomCount(); f++) {
221             if (!isSaturated(ac.getAtom(f), ac)) {
222                 return false;
223             }
224         }
225         return true;
226     }
227 
228     /**
229      * Determines if the atom can be of type AtomType. That is, it sees if this
230      * AtomType only differs in bond orders, or implicit hydrogen count.
231      */
couldMatchAtomType(IAtom atom, double bondOrderSum, IBond.Order maxBondOrder, IAtomType type)232     public boolean couldMatchAtomType(IAtom atom, double bondOrderSum, IBond.Order maxBondOrder, IAtomType type) {
233         logger.debug("couldMatchAtomType:   ... matching atom ", atom, " vs ", type);
234         int hcount = atom.getImplicitHydrogenCount();
235         int charge = atom.getFormalCharge();
236         if (charge == type.getFormalCharge()) {
237             logger.debug("couldMatchAtomType:     formal charge matches...");
238             //            if (atom.getHybridization() == type.getHybridization()) {
239             //                logger.debug("couldMatchAtomType:     hybridization is OK...");
240             if (bondOrderSum + hcount <= type.getBondOrderSum()) {
241                 logger.debug("couldMatchAtomType:     bond order sum is OK...");
242                 if (!BondManipulator.isHigherOrder(maxBondOrder, type.getMaxBondOrder())) {
243                     logger.debug("couldMatchAtomType:     max bond order is OK... We have a match!");
244                     return true;
245                 }
246             } else {
247                 logger.debug("couldMatchAtomType:      no match", "" + (bondOrderSum + hcount), " > ",
248                         "" + type.getBondOrderSum());
249             }
250             //            }
251         } else {
252             logger.debug("couldMatchAtomType:     formal charge does NOT match...");
253         }
254         logger.debug("couldMatchAtomType:    No Match");
255         return false;
256     }
257 
258     /**
259      * Calculates the number of hydrogens that can be added to the given atom to fullfil
260      * the atom's valency. It will return 0 for PseudoAtoms, and for atoms for which it
261      * does not have an entry in the configuration file.
262      */
calculateNumberOfImplicitHydrogens(IAtom atom, double bondOrderSum, IBond.Order maxBondOrder, int neighbourCount)263     public int calculateNumberOfImplicitHydrogens(IAtom atom, double bondOrderSum, IBond.Order maxBondOrder,
264             int neighbourCount) throws CDKException {
265 
266         int missingHydrogens = 0;
267         if (atom instanceof IPseudoAtom) {
268             logger.debug("don't figure it out... it simply does not lack H's");
269             return 0;
270         }
271 
272         logger.debug("Calculating number of missing hydrogen atoms");
273         // get default atom
274         IAtomType[] atomTypes = getAtomTypeFactory(atom.getBuilder()).getAtomTypes(atom.getSymbol());
275         if (atomTypes.length == 0) {
276             logger.warn("Element not found in configuration file: ", atom);
277             return 0;
278         }
279 
280         logger.debug("Found atomtypes: ", atomTypes.length);
281         for (int f = 0; f < atomTypes.length; f++) {
282             IAtomType type = atomTypes[f];
283             if (couldMatchAtomType(atom, bondOrderSum, maxBondOrder, type)) {
284                 logger.debug("This type matches: ", type);
285                 int formalNeighbourCount = type.getFormalNeighbourCount();
286                 if (type.getHybridization() == CDKConstants.UNSET) {
287                     missingHydrogens = (int) (type.getBondOrderSum() - bondOrderSum);
288                 } else if (type.getHybridization() == Hybridization.SP3) {
289                     missingHydrogens = formalNeighbourCount - neighbourCount;
290                 } else if (type.getHybridization() == Hybridization.SP2) {
291                     missingHydrogens = formalNeighbourCount - neighbourCount;
292                 } else if (type.getHybridization() == Hybridization.SP1) {
293                     missingHydrogens = formalNeighbourCount - neighbourCount;
294                 } else {
295                     missingHydrogens = (int) (type.getBondOrderSum() - bondOrderSum);
296                 }
297                 break;
298             }
299         }
300 
301         logger.debug("missing hydrogens: ", missingHydrogens);
302         return missingHydrogens;
303     }
304 
305     /**
306      * Checks whether an Atom is saturated by comparing it with known AtomTypes.
307      * It returns true if the atom is an PseudoAtom and when the element is not in the list.
308      */
309     @Override
isSaturated(IAtom atom, IAtomContainer container)310     public boolean isSaturated(IAtom atom, IAtomContainer container) throws CDKException {
311         if (atom instanceof IPseudoAtom) {
312             logger.debug("don't figure it out... it simply does not lack H's");
313             return true;
314         }
315 
316         IAtomType[] atomTypes = getAtomTypeFactory(atom.getBuilder()).getAtomTypes(atom.getSymbol());
317         if (atomTypes.length == 0) {
318             logger.warn("Missing entry in atom type list for ", atom.getSymbol());
319             return true;
320         }
321         double bondOrderSum = container.getBondOrderSum(atom);
322         IBond.Order maxBondOrder = container.getMaximumBondOrder(atom);
323         int hcount = atom.getImplicitHydrogenCount();
324         int charge = atom.getFormalCharge();
325 
326         logger.debug("Checking saturation of atom ", atom.getSymbol());
327         logger.debug("bondOrderSum: ", bondOrderSum);
328         logger.debug("maxBondOrder: ", maxBondOrder);
329         logger.debug("hcount: ", hcount);
330         logger.debug("charge: ", charge);
331 
332         boolean elementPlusChargeMatches = false;
333         for (int f = 0; f < atomTypes.length; f++) {
334             IAtomType type = atomTypes[f];
335             if (couldMatchAtomType(atom, bondOrderSum, maxBondOrder, type)) {
336                 if (bondOrderSum + hcount == type.getBondOrderSum()
337                         && !BondManipulator.isHigherOrder(maxBondOrder, type.getMaxBondOrder())) {
338                     logger.debug("We have a match: ", type);
339                     logger.debug("Atom is saturated: ", atom.getSymbol());
340                     return true;
341                 } else {
342                     // ok, the element and charge matche, but unfulfilled
343                     elementPlusChargeMatches = true;
344                 }
345             } // else: formal charges don't match
346         }
347 
348         if (elementPlusChargeMatches) {
349             logger.debug("No, atom is not saturated.");
350             return false;
351         }
352 
353         // ok, the found atom was not in the list
354         logger.error("Could not find atom type!");
355         throw new CDKException("The atom with element " + atom.getSymbol() + " and charge " + charge + " is not found.");
356     }
357 
calculateNumberOfImplicitHydrogens(IAtom atom, IAtomContainer container)358     public int calculateNumberOfImplicitHydrogens(IAtom atom, IAtomContainer container) throws CDKException {
359         return this.calculateNumberOfImplicitHydrogens(atom, container.getBondOrderSum(atom),
360                 container.getMaximumBondOrder(atom), container.getConnectedBondsCount(atom));
361     }
362 
getAtomTypeFactory(IChemObjectBuilder builder)363     protected AtomTypeFactory getAtomTypeFactory(IChemObjectBuilder builder) throws CDKException {
364         if (structgenATF == null) {
365             try {
366                 structgenATF = AtomTypeFactory.getInstance(atomTypeList, builder);
367             } catch (Exception exception) {
368                 logger.debug(exception);
369                 throw new CDKException("Could not instantiate AtomTypeFactory!", exception);
370             }
371         }
372         return structgenATF;
373     }
374 
375     /**
376      * Determines if the atom can be of type AtomType.
377      */
couldMatchAtomType(IAtomContainer container, IAtom atom, IAtomType type)378     public boolean couldMatchAtomType(IAtomContainer container, IAtom atom, IAtomType type) {
379         double bondOrderSum = container.getBondOrderSum(atom);
380         IBond.Order maxBondOrder = container.getMaximumBondOrder(atom);
381         return couldMatchAtomType(atom, bondOrderSum, maxBondOrder, type);
382     }
383 
384 }
385