1 /*  Copyright (C) 2004-2007  Rajarshi Guha <rajarshi@users.sourceforge.net>
2  *
3  *  Contact: cdk-devel@lists.sourceforge.net
4  *
5  *  This program is free software; you can redistribute it and/or
6  *  modify it under the terms of the GNU Lesser General Public License
7  *  as published by the Free Software Foundation; either version 2.1
8  *  of the License, or (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU Lesser General Public License for more details.
14  *
15  *  You should have received a copy of the GNU Lesser General Public License
16  *  along with this program; if not, write to the Free Software
17  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 package org.openscience.cdk.qsar.descriptors.molecular;
20 
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Collections;
24 import java.util.HashSet;
25 import java.util.List;
26 
27 import org.openscience.cdk.config.Isotopes;
28 import org.openscience.cdk.config.IsotopeFactory;
29 import org.openscience.cdk.exception.CDKException;
30 import org.openscience.cdk.interfaces.IAtom;
31 import org.openscience.cdk.interfaces.IAtomContainer;
32 import org.openscience.cdk.interfaces.IBond;
33 import org.openscience.cdk.isomorphism.UniversalIsomorphismTester;
34 import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer;
35 import org.openscience.cdk.isomorphism.mcss.RMap;
36 import org.openscience.cdk.qsar.AtomValenceTool;
37 
38 /**
39  * Utility methods for chi index calculations.
40  *
41  * These methods are common to all the types of chi index calculations and can
42  * be used to evaluate path, path-cluster, cluster and chain chi indices.
43  *
44  * @author     Rajarshi Guha
45  * @cdk.module qsarmolecular
46  * @cdk.githash
47  */
48 class ChiIndexUtils {
49 
50     /**
51      * Gets the fragments from a target <code>AtomContainer</code> matching a set of query fragments.
52      *
53      * This method returns a list of lists. Each list contains the atoms of the target <code>AtomContainer</code>
54      * that arise in the mapping of bonds in the target molecule to the bonds in the query fragment.
55      * The query fragments should be constructed
56      * using the <code>createAnyAtomAnyBondContainer</code> method of the <code>QueryAtomContainerCreator</code>
57      * CDK class, since we are only interested in connectivity and not actual atom or bond type information.
58      *
59      * @param atomContainer The target <code>AtomContainer</code>
60      * @param queries       An array of query fragments
61      * @return A list of lists, each list being the atoms that match the query fragments
62      */
getFragments(IAtomContainer atomContainer, QueryAtomContainer[] queries)63     public static List<List<Integer>> getFragments(IAtomContainer atomContainer, QueryAtomContainer[] queries) {
64         UniversalIsomorphismTester universalIsomorphismTester = new UniversalIsomorphismTester();
65         List<List<Integer>> uniqueSubgraphs = new ArrayList<List<Integer>>();
66         for (QueryAtomContainer query : queries) {
67             List<List<RMap>> subgraphMaps = null;
68             try {
69                 // we get the list of bond mappings
70                 subgraphMaps = universalIsomorphismTester.getSubgraphMaps(atomContainer, query);
71             } catch (CDKException e) {
72                 e.printStackTrace();
73             }
74             if (subgraphMaps == null) continue;
75             if (subgraphMaps.size() == 0) continue;
76 
77             // get the atom paths in the unique set of bond maps
78             uniqueSubgraphs.addAll(getUniqueBondSubgraphs(subgraphMaps, atomContainer));
79         }
80 
81         // lets run a check on the length of each returned fragment and delete
82         // any that don't match the length of out query fragments. Note that since
83         // sometimes a fragment might be a ring, it will have number of atoms
84         // equal to the number of bonds, where as a fragment with no rings
85         // will have number of atoms equal to the number of bonds+1. So we need to check
86         // fragment size against all unique query sizes - I get lazy and don't check
87         // unique query sizes, but the size of each query
88         List<List<Integer>> retValue = new ArrayList<List<Integer>>();
89         for (List<Integer> fragment : uniqueSubgraphs) {
90             for (QueryAtomContainer query : queries) {
91                 if (fragment.size() == query.getAtomCount()) {
92                     retValue.add(fragment);
93                     break;
94                 }
95             }
96         }
97         return retValue;
98     }
99 
100     /**
101      * Evaluates the simple chi index for a set of fragments.
102      *
103      * @param atomContainer The target <code>AtomContainer</code>
104      * @param fragLists      A list of fragments
105      * @return The simple chi index
106      */
evalSimpleIndex(IAtomContainer atomContainer, List<List<Integer>> fragLists)107     public static double evalSimpleIndex(IAtomContainer atomContainer, List<List<Integer>> fragLists) {
108         double sum = 0;
109         for (List<Integer> fragList : fragLists) {
110             double prod = 1.0;
111             for (Integer atomSerial : fragList) {
112                 IAtom atom = atomContainer.getAtom(atomSerial);
113                 int nconnected = atomContainer.getConnectedBondsCount(atom);
114                 prod = prod * nconnected;
115             }
116             if (prod != 0) sum += 1.0 / Math.sqrt(prod);
117         }
118         return sum;
119     }
120 
121     /**
122      * Evaluates the valence corrected chi index for a set of fragments.
123      *
124      * This method takes into account the S and P atom types described in
125      * Kier & Hall (1986), page 20 for which empirical delta V values are used.
126      *
127      * @param atomContainer The target <code>AtomContainer</code>
128      * @param fragList      A list of fragments
129      * @return The valence corrected chi index
130      * @throws CDKException if the <code>IsotopeFactory</code> cannot be created
131      */
evalValenceIndex(IAtomContainer atomContainer, List<List<Integer>> fragList)132     public static double evalValenceIndex(IAtomContainer atomContainer, List<List<Integer>> fragList) throws CDKException {
133         try {
134             IsotopeFactory ifac = Isotopes.getInstance();
135             ifac.configureAtoms(atomContainer);
136         } catch (IOException e) {
137             throw new CDKException("IO problem occurred when using the CDK atom config\n" + e.getMessage(), e);
138         }
139         double sum = 0;
140         for (List<Integer> aFragList : fragList) {
141             List<Integer> frag = aFragList;
142             double prod = 1.0;
143             for (Object aFrag : frag) {
144                 int atomSerial = (Integer) aFrag;
145                 IAtom atom = atomContainer.getAtom(atomSerial);
146 
147                 String sym = atom.getSymbol();
148 
149                 if (sym.equals("S")) { // check for some special S environments
150                     double tmp = deltavSulphur(atom, atomContainer);
151                     if (tmp != -1) {
152                         prod = prod * tmp;
153                         continue;
154                     }
155                 }
156                 if (sym.equals("P")) { // check for some special P environments
157                     double tmp = deltavPhosphorous(atom, atomContainer);
158                     if (tmp != -1) {
159                         prod = prod * tmp;
160                         continue;
161                     }
162                 }
163 
164                 int z = atom.getAtomicNumber();
165 
166                 // TODO there should be a neater way to get the valence electron count
167                 int zv = getValenceElectronCount(atom);
168 
169                 int hsupp = atom.getImplicitHydrogenCount();
170                 double deltav = (double) (zv - hsupp) / (double) (z - zv - 1);
171 
172                 prod = prod * deltav;
173             }
174             if (prod != 0) sum += 1.0 / Math.sqrt(prod);
175         }
176         return sum;
177     }
178 
getValenceElectronCount(IAtom atom)179     private static int getValenceElectronCount(IAtom atom) {
180         int valency = AtomValenceTool.getValence(atom);
181         return valency - atom.getFormalCharge();
182     }
183 
184     /**
185      * Evaluates the empirical delt V for some S environments.
186      *
187      * The method checks to see whether a S atom is in a -S-S-,
188      * -SO-, -SO2- group and returns the empirical values noted
189      * in Kier & Hall (1986), page 20.
190      *
191      * @param atom          The S atom in question
192      * @param atomContainer The molecule containing the S
193      * @return The empirical delta V if it is present in one of the above
194      *         environments, -1 otherwise
195      */
deltavSulphur(IAtom atom, IAtomContainer atomContainer)196     protected static double deltavSulphur(IAtom atom, IAtomContainer atomContainer) {
197         if (!atom.getSymbol().equals("S")) return -1;
198 
199         // check whether it's a S in S-S
200         List<IAtom> connected = atomContainer.getConnectedAtomsList(atom);
201         for (IAtom connectedAtom : connected) {
202             if (connectedAtom.getSymbol().equals("S")
203                     && atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.SINGLE) return .89;
204         }
205 
206         int count = 0;
207         for (IAtom connectedAtom : connected) {
208             if (connectedAtom.getSymbol().equals("O")
209                     && atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.DOUBLE) count++;
210         }
211         if (count == 1)
212             return 1.33; // check whether it's a S in -SO-
213         else if (count == 2) return 2.67; // check whether it's a S in -SO2-
214 
215         return -1;
216     }
217 
218     /**
219      * Checks whether the P atom is in a PO environment.
220      *
221      * This environment is noted in Kier & Hall (1986), page 20
222      *
223      * @param atom          The P atom in question
224      * @param atomContainer The molecule containing the P atom
225      * @return The empirical delta V if present in the above environment,
226      *         -1 otherwise
227      */
deltavPhosphorous(IAtom atom, IAtomContainer atomContainer)228     private static double deltavPhosphorous(IAtom atom, IAtomContainer atomContainer) {
229         if (!atom.getSymbol().equals("P")) return -1;
230 
231         List<IAtom> connected = atomContainer.getConnectedAtomsList(atom);
232         int conditions = 0;
233 
234         if (connected.size() == 4) conditions++;
235 
236         for (IAtom connectedAtom : connected) {
237             if (connectedAtom.getSymbol().equals("O")
238                     && atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.DOUBLE) conditions++;
239             if (atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.SINGLE) conditions++;
240         }
241         if (conditions == 5) return 2.22;
242         return -1;
243     }
244 
245     /**
246      * Converts a set of bond mappings to a unique set of atom paths.
247      *
248      * This method accepts a <code>List</code> of bond mappings. It first
249      * reduces the set to a unique set of bond maps and then for each bond map
250      * converts it to a series of atoms making up the bonds.
251      *
252      * @param subgraphs A <code>List</code> of bon mappings
253      * @param ac        The molecule we are examining
254      * @return A unique <code>List</code> of atom paths
255      */
getUniqueBondSubgraphs(List<List<RMap>> subgraphs, IAtomContainer ac)256     private static List<List<Integer>> getUniqueBondSubgraphs(List<List<RMap>> subgraphs, IAtomContainer ac) {
257         List<List<Integer>> bondList = new ArrayList<List<Integer>>();
258         for (List<RMap> subgraph : subgraphs) {
259             List<RMap> current = subgraph;
260             List<Integer> ids = new ArrayList<Integer>();
261             for (RMap aCurrent : current) {
262                 RMap rmap = (RMap) aCurrent;
263                 ids.add(rmap.getId1());
264             }
265             Collections.sort(ids);
266             bondList.add(ids);
267         }
268 
269         // get the unique set of bonds
270         HashSet<List<Integer>> hs = new HashSet<List<Integer>>(bondList);
271         bondList = new ArrayList<List<Integer>>(hs);
272 
273         List<List<Integer>> paths = new ArrayList<List<Integer>>();
274         for (List<Integer> aBondList1 : bondList) {
275             List<Integer> aBondList = aBondList1;
276             List<Integer> tmp = new ArrayList<Integer>();
277             for (Object anABondList : aBondList) {
278                 int bondNumber = (Integer) anABondList;
279                 for (IAtom atom : ac.getBond(bondNumber).atoms()) {
280                     Integer atomInt = ac.indexOf(atom);
281                     if (!tmp.contains(atomInt)) tmp.add(atomInt);
282                 }
283             }
284             paths.add(tmp);
285         }
286         return paths;
287     }
288 
289 }
290