1 /* Copyright (C) 2004-2007 Rajarshi Guha <rajarshi@users.sourceforge.net> 2 * 3 * Contact: cdk-devel@lists.sourceforge.net 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public License 7 * as published by the Free Software Foundation; either version 2.1 8 * of the License, or (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 */ 19 package org.openscience.cdk.qsar.descriptors.molecular; 20 21 import java.io.IOException; 22 import java.util.ArrayList; 23 import java.util.Collections; 24 import java.util.HashSet; 25 import java.util.List; 26 27 import org.openscience.cdk.config.Isotopes; 28 import org.openscience.cdk.config.IsotopeFactory; 29 import org.openscience.cdk.exception.CDKException; 30 import org.openscience.cdk.interfaces.IAtom; 31 import org.openscience.cdk.interfaces.IAtomContainer; 32 import org.openscience.cdk.interfaces.IBond; 33 import org.openscience.cdk.isomorphism.UniversalIsomorphismTester; 34 import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer; 35 import org.openscience.cdk.isomorphism.mcss.RMap; 36 import org.openscience.cdk.qsar.AtomValenceTool; 37 38 /** 39 * Utility methods for chi index calculations. 40 * 41 * These methods are common to all the types of chi index calculations and can 42 * be used to evaluate path, path-cluster, cluster and chain chi indices. 43 * 44 * @author Rajarshi Guha 45 * @cdk.module qsarmolecular 46 * @cdk.githash 47 */ 48 class ChiIndexUtils { 49 50 /** 51 * Gets the fragments from a target <code>AtomContainer</code> matching a set of query fragments. 52 * 53 * This method returns a list of lists. Each list contains the atoms of the target <code>AtomContainer</code> 54 * that arise in the mapping of bonds in the target molecule to the bonds in the query fragment. 55 * The query fragments should be constructed 56 * using the <code>createAnyAtomAnyBondContainer</code> method of the <code>QueryAtomContainerCreator</code> 57 * CDK class, since we are only interested in connectivity and not actual atom or bond type information. 58 * 59 * @param atomContainer The target <code>AtomContainer</code> 60 * @param queries An array of query fragments 61 * @return A list of lists, each list being the atoms that match the query fragments 62 */ getFragments(IAtomContainer atomContainer, QueryAtomContainer[] queries)63 public static List<List<Integer>> getFragments(IAtomContainer atomContainer, QueryAtomContainer[] queries) { 64 UniversalIsomorphismTester universalIsomorphismTester = new UniversalIsomorphismTester(); 65 List<List<Integer>> uniqueSubgraphs = new ArrayList<List<Integer>>(); 66 for (QueryAtomContainer query : queries) { 67 List<List<RMap>> subgraphMaps = null; 68 try { 69 // we get the list of bond mappings 70 subgraphMaps = universalIsomorphismTester.getSubgraphMaps(atomContainer, query); 71 } catch (CDKException e) { 72 e.printStackTrace(); 73 } 74 if (subgraphMaps == null) continue; 75 if (subgraphMaps.size() == 0) continue; 76 77 // get the atom paths in the unique set of bond maps 78 uniqueSubgraphs.addAll(getUniqueBondSubgraphs(subgraphMaps, atomContainer)); 79 } 80 81 // lets run a check on the length of each returned fragment and delete 82 // any that don't match the length of out query fragments. Note that since 83 // sometimes a fragment might be a ring, it will have number of atoms 84 // equal to the number of bonds, where as a fragment with no rings 85 // will have number of atoms equal to the number of bonds+1. So we need to check 86 // fragment size against all unique query sizes - I get lazy and don't check 87 // unique query sizes, but the size of each query 88 List<List<Integer>> retValue = new ArrayList<List<Integer>>(); 89 for (List<Integer> fragment : uniqueSubgraphs) { 90 for (QueryAtomContainer query : queries) { 91 if (fragment.size() == query.getAtomCount()) { 92 retValue.add(fragment); 93 break; 94 } 95 } 96 } 97 return retValue; 98 } 99 100 /** 101 * Evaluates the simple chi index for a set of fragments. 102 * 103 * @param atomContainer The target <code>AtomContainer</code> 104 * @param fragLists A list of fragments 105 * @return The simple chi index 106 */ evalSimpleIndex(IAtomContainer atomContainer, List<List<Integer>> fragLists)107 public static double evalSimpleIndex(IAtomContainer atomContainer, List<List<Integer>> fragLists) { 108 double sum = 0; 109 for (List<Integer> fragList : fragLists) { 110 double prod = 1.0; 111 for (Integer atomSerial : fragList) { 112 IAtom atom = atomContainer.getAtom(atomSerial); 113 int nconnected = atomContainer.getConnectedBondsCount(atom); 114 prod = prod * nconnected; 115 } 116 if (prod != 0) sum += 1.0 / Math.sqrt(prod); 117 } 118 return sum; 119 } 120 121 /** 122 * Evaluates the valence corrected chi index for a set of fragments. 123 * 124 * This method takes into account the S and P atom types described in 125 * Kier & Hall (1986), page 20 for which empirical delta V values are used. 126 * 127 * @param atomContainer The target <code>AtomContainer</code> 128 * @param fragList A list of fragments 129 * @return The valence corrected chi index 130 * @throws CDKException if the <code>IsotopeFactory</code> cannot be created 131 */ evalValenceIndex(IAtomContainer atomContainer, List<List<Integer>> fragList)132 public static double evalValenceIndex(IAtomContainer atomContainer, List<List<Integer>> fragList) throws CDKException { 133 try { 134 IsotopeFactory ifac = Isotopes.getInstance(); 135 ifac.configureAtoms(atomContainer); 136 } catch (IOException e) { 137 throw new CDKException("IO problem occurred when using the CDK atom config\n" + e.getMessage(), e); 138 } 139 double sum = 0; 140 for (List<Integer> aFragList : fragList) { 141 List<Integer> frag = aFragList; 142 double prod = 1.0; 143 for (Object aFrag : frag) { 144 int atomSerial = (Integer) aFrag; 145 IAtom atom = atomContainer.getAtom(atomSerial); 146 147 String sym = atom.getSymbol(); 148 149 if (sym.equals("S")) { // check for some special S environments 150 double tmp = deltavSulphur(atom, atomContainer); 151 if (tmp != -1) { 152 prod = prod * tmp; 153 continue; 154 } 155 } 156 if (sym.equals("P")) { // check for some special P environments 157 double tmp = deltavPhosphorous(atom, atomContainer); 158 if (tmp != -1) { 159 prod = prod * tmp; 160 continue; 161 } 162 } 163 164 int z = atom.getAtomicNumber(); 165 166 // TODO there should be a neater way to get the valence electron count 167 int zv = getValenceElectronCount(atom); 168 169 int hsupp = atom.getImplicitHydrogenCount(); 170 double deltav = (double) (zv - hsupp) / (double) (z - zv - 1); 171 172 prod = prod * deltav; 173 } 174 if (prod != 0) sum += 1.0 / Math.sqrt(prod); 175 } 176 return sum; 177 } 178 getValenceElectronCount(IAtom atom)179 private static int getValenceElectronCount(IAtom atom) { 180 int valency = AtomValenceTool.getValence(atom); 181 return valency - atom.getFormalCharge(); 182 } 183 184 /** 185 * Evaluates the empirical delt V for some S environments. 186 * 187 * The method checks to see whether a S atom is in a -S-S-, 188 * -SO-, -SO2- group and returns the empirical values noted 189 * in Kier & Hall (1986), page 20. 190 * 191 * @param atom The S atom in question 192 * @param atomContainer The molecule containing the S 193 * @return The empirical delta V if it is present in one of the above 194 * environments, -1 otherwise 195 */ deltavSulphur(IAtom atom, IAtomContainer atomContainer)196 protected static double deltavSulphur(IAtom atom, IAtomContainer atomContainer) { 197 if (!atom.getSymbol().equals("S")) return -1; 198 199 // check whether it's a S in S-S 200 List<IAtom> connected = atomContainer.getConnectedAtomsList(atom); 201 for (IAtom connectedAtom : connected) { 202 if (connectedAtom.getSymbol().equals("S") 203 && atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.SINGLE) return .89; 204 } 205 206 int count = 0; 207 for (IAtom connectedAtom : connected) { 208 if (connectedAtom.getSymbol().equals("O") 209 && atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.DOUBLE) count++; 210 } 211 if (count == 1) 212 return 1.33; // check whether it's a S in -SO- 213 else if (count == 2) return 2.67; // check whether it's a S in -SO2- 214 215 return -1; 216 } 217 218 /** 219 * Checks whether the P atom is in a PO environment. 220 * 221 * This environment is noted in Kier & Hall (1986), page 20 222 * 223 * @param atom The P atom in question 224 * @param atomContainer The molecule containing the P atom 225 * @return The empirical delta V if present in the above environment, 226 * -1 otherwise 227 */ deltavPhosphorous(IAtom atom, IAtomContainer atomContainer)228 private static double deltavPhosphorous(IAtom atom, IAtomContainer atomContainer) { 229 if (!atom.getSymbol().equals("P")) return -1; 230 231 List<IAtom> connected = atomContainer.getConnectedAtomsList(atom); 232 int conditions = 0; 233 234 if (connected.size() == 4) conditions++; 235 236 for (IAtom connectedAtom : connected) { 237 if (connectedAtom.getSymbol().equals("O") 238 && atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.DOUBLE) conditions++; 239 if (atomContainer.getBond(atom, connectedAtom).getOrder() == IBond.Order.SINGLE) conditions++; 240 } 241 if (conditions == 5) return 2.22; 242 return -1; 243 } 244 245 /** 246 * Converts a set of bond mappings to a unique set of atom paths. 247 * 248 * This method accepts a <code>List</code> of bond mappings. It first 249 * reduces the set to a unique set of bond maps and then for each bond map 250 * converts it to a series of atoms making up the bonds. 251 * 252 * @param subgraphs A <code>List</code> of bon mappings 253 * @param ac The molecule we are examining 254 * @return A unique <code>List</code> of atom paths 255 */ getUniqueBondSubgraphs(List<List<RMap>> subgraphs, IAtomContainer ac)256 private static List<List<Integer>> getUniqueBondSubgraphs(List<List<RMap>> subgraphs, IAtomContainer ac) { 257 List<List<Integer>> bondList = new ArrayList<List<Integer>>(); 258 for (List<RMap> subgraph : subgraphs) { 259 List<RMap> current = subgraph; 260 List<Integer> ids = new ArrayList<Integer>(); 261 for (RMap aCurrent : current) { 262 RMap rmap = (RMap) aCurrent; 263 ids.add(rmap.getId1()); 264 } 265 Collections.sort(ids); 266 bondList.add(ids); 267 } 268 269 // get the unique set of bonds 270 HashSet<List<Integer>> hs = new HashSet<List<Integer>>(bondList); 271 bondList = new ArrayList<List<Integer>>(hs); 272 273 List<List<Integer>> paths = new ArrayList<List<Integer>>(); 274 for (List<Integer> aBondList1 : bondList) { 275 List<Integer> aBondList = aBondList1; 276 List<Integer> tmp = new ArrayList<Integer>(); 277 for (Object anABondList : aBondList) { 278 int bondNumber = (Integer) anABondList; 279 for (IAtom atom : ac.getBond(bondNumber).atoms()) { 280 Integer atomInt = ac.indexOf(atom); 281 if (!tmp.contains(atomInt)) tmp.add(atomInt); 282 } 283 } 284 paths.add(tmp); 285 } 286 return paths; 287 } 288 289 } 290