1 /* 2 * Copyright (c) 2013 European Bioinformatics Institute (EMBL-EBI) 3 * John May <jwmay@users.sf.net> 4 * 5 * Contact: cdk-devel@lists.sourceforge.net 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU Lesser General Public License as published by 9 * the Free Software Foundation; either version 2.1 of the License, or (at 10 * your option) any later version. All we ask is that proper credit is given 11 * for our work, which includes - but is not limited to - adding the above 12 * copyright notice to the beginning of your source code files, and to any 13 * copyright notice that you may distribute with programs based on this work. 14 * 15 * This program is distributed in the hope that it will be useful, but WITHOUT 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 18 * License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U 23 */ 24 25 package org.openscience.cdk.isomorphism.matchers.smarts; 26 27 import org.openscience.cdk.CDKConstants; 28 import org.openscience.cdk.graph.Cycles; 29 import org.openscience.cdk.graph.GraphUtil; 30 import org.openscience.cdk.interfaces.IAtom; 31 import org.openscience.cdk.interfaces.IAtomContainer; 32 import org.openscience.cdk.interfaces.IBond; 33 import org.openscience.cdk.ringsearch.RingSearch; 34 35 import java.util.Arrays; 36 import java.util.Collections; 37 import java.util.Set; 38 39 import static com.google.common.base.Preconditions.checkNotNull; 40 import static org.openscience.cdk.graph.GraphUtil.EdgeToBondMap; 41 42 /** 43 * Computes and stores atom invariants in a single object. The atom invariants 44 * are utilised as additional information for the {@link SMARTSAtom}s to match. 45 * The values provide additional invariants which are not defined in the {@link 46 * IAtom} API and avoids storing multiple properties in a type unsafe map 47 * ({@link IAtom#setProperty(Object, Object)}). Depending on the SMARTS 48 * implementation different values for the ring information may be set. The 49 * choice of ring set affects {@link #ringNumber()} and {@link #ringSize()}. 50 * Some implementations store all ring sizes whilst others (Daylight) store only 51 * the smallest. The {@link #degree()} also depends on whether hydrogens are 52 * suppressed or represented as explicit atoms. The {@link 53 * #configureDaylightWithRingInfo(IAtomContainer)} and {@link 54 * #configureDaylightWithoutRingInfo(IAtomContainer)} static utilities create 55 * and set the invariants following the Daylight implementation. The invariants 56 * are set on the {@link #KEY} property of each atom. 57 * 58 * @author John May 59 * @cdk.module smarts 60 */ 61 @Deprecated 62 final class SMARTSAtomInvariants { 63 64 /** Property key to index the class by. */ 65 static String KEY = "SMARTS.INVARIANTS"; 66 67 /** the molecule which this atom belongs. */ 68 private final IAtomContainer target; 69 70 /** Total number of bonds formed - also refereed to as bond order sum. */ 71 private final int valence; 72 73 /** The number of rings this atom can be found in. */ 74 private final int ringNumber; 75 76 /** The size of rings an atom is found in. */ 77 private final Set<Integer> ringSize; 78 79 /** Total number of connected atoms including implicit hydrogens. */ 80 private final int connectivity; 81 82 /** Total number of connected ring bonds. */ 83 private final int ringConnectivity; 84 85 /** Total number of explicitly connected atoms. */ 86 private final int degree; 87 88 /** The total number of hydrogens on an atom. */ 89 private final int totalHydrogenCount; 90 91 /** 92 * Internal constructor - simple takes all the values. 93 * 94 * @param valence the valence value 95 * @param ringNumber number of rings an atom belongs to (variable) 96 * @param ringSize the size of the rings (variable) 97 * @param ringConnectivity the number of connected ring bonds (or atoms) 98 * @param degree the degree of an atom 99 * @param connectivity the number of connections (degree + implicit H 100 * count) 101 * @param totalHydrogenCount the total number of hydrogens 102 */ SMARTSAtomInvariants(IAtomContainer target, int valence, int ringNumber, Set<Integer> ringSize, int ringConnectivity, int degree, int connectivity, int totalHydrogenCount)103 SMARTSAtomInvariants(IAtomContainer target, int valence, int ringNumber, Set<Integer> ringSize, 104 int ringConnectivity, int degree, int connectivity, int totalHydrogenCount) { 105 this.target = target; 106 this.valence = valence; 107 this.ringNumber = ringNumber; 108 this.ringSize = ringSize; 109 this.connectivity = connectivity; 110 this.totalHydrogenCount = totalHydrogenCount; 111 this.ringConnectivity = ringConnectivity; 112 this.degree = degree; 113 } 114 target()115 IAtomContainer target() { 116 return target; 117 } 118 119 /** 120 * Access the valence of this atom. The valence is matched by the {@code 121 * v<NUMBER>} SMARTS token. The valence is the total number of bonds formed 122 * by this atom and <b>NOT</b> the number of valence electrons. As such 123 * {@code [v3]} will match a 3 valent nitrogen and {@code [v5]} will match a 124 * 5 valent nitrogen. The value is separate from {@link IAtom#getValency()} 125 * so it can be cleaned up after matching and avoid confusion with what the 126 * value should be. 127 * 128 * @return the valence of the atom. 129 */ valence()130 int valence() { 131 return valence; 132 } 133 134 /** 135 * The number of rings this atom belong to. The value is matched by the 136 * {@code R<NUMBER>} token and depends on the ring set used. The Daylight 137 * implementation uses the non-unique Smallest Set of Smallest Rings (SSSR) 138 * which can lead to inconsistent matches. 139 * 140 * @return number or rings 141 */ ringNumber()142 int ringNumber() { 143 return ringNumber; 144 } 145 146 /** 147 * The sizes of rings this atoms belongs to. The value is matched by the 148 * {@code r<NUMBER>} token and depends on the ring set used. The Daylight 149 * implementation uses this value to match the smallest ring to which this 150 * atom is a member. It may be beneficial to match multiple ring sizes (not 151 * yet defined by OpenSMARTS). 152 * 153 * @return ring sizes 154 */ ringSize()155 Set<Integer> ringSize() { 156 return ringSize; 157 } 158 159 /** 160 * The number of connected ring bonds (or atoms). This value is matched by 161 * the {@code x<NUMBER>} token. The Daylight implementation counts the 162 * number of connected ring bonds but it may be beneficial to match the atom 163 * ring connectivity (not yet defined by OpenSMARTS). 164 * 165 * @return ring connectivity 166 */ ringConnectivity()167 int ringConnectivity() { 168 return ringConnectivity; 169 } 170 171 /** 172 * The number of connected bonds including those to hydrogens. This value is 173 * matched by the {@code X<NUMBER>} token. This value depends on whether the 174 * hydrogens have been suppressed or are represented as explicit atoms. 175 * 176 * @return connectivity 177 */ connectivity()178 int connectivity() { 179 return connectivity; 180 } 181 182 /** 183 * The degree of a vertex defined as the number of explicit connected bonds. 184 * This value is matched by the {@code D<NUMBER>} token. This value depends 185 * on whether the hydrogens have been suppressed or are represented as 186 * explicit atoms. 187 * 188 * @return connectivity 189 */ degree()190 int degree() { 191 return degree; 192 } 193 194 /** 195 * The total number of hydrogens attached to an atom. 196 * 197 * @return 198 */ totalHydrogenCount()199 int totalHydrogenCount() { 200 return totalHydrogenCount; 201 } 202 203 /** 204 * Computes {@link SMARTSAtomInvariants} and stores on the {@link #KEY} or 205 * each {@link IAtom} in the {@code container}. The {@link 206 * CDKConstants#ISINRING} is also set for each bond. This configuration does 207 * not include ring information and values are left as unset. 208 * Ring membership is still configured but not ring size. 209 * 210 * <blockquote><pre> 211 * IAtomContainer container = ...; 212 * SMARTSAtomInvariants.configureDaylightWithoutRingInfo(container); 213 * for (IAtom atom : container.atoms()) { 214 * SMARTSAtomInvariants inv = atom.getProperty(SMARTSAtomInvariants.KEY); 215 * } 216 * </pre></blockquote> 217 * 218 * @param container the container to configure 219 */ configureDaylightWithoutRingInfo(IAtomContainer container)220 static void configureDaylightWithoutRingInfo(IAtomContainer container) { 221 EdgeToBondMap map = EdgeToBondMap.withSpaceFor(container); 222 int[][] graph = GraphUtil.toAdjList(container, map); 223 configureDaylight(container, graph, map, false); 224 } 225 226 /** 227 * Computes {@link SMARTSAtomInvariants} and stores on the {@link #KEY} or 228 * each {@link IAtom} in the {@code container}. The {@link 229 * CDKConstants#ISINRING} is also set for each bond. This configuration 230 * includes the ring information as used by the Daylight implementation. 231 * That is the Smallest Set of Smallest Rings (SSSR) is used and only the 232 * smallest ring is stored for the {@link #ringSize()}. 233 * 234 * <blockquote><pre> 235 * IAtomContainer container = ...; 236 * SMARTSAtomInvariants.configureDaylightWithRingInfo(container); 237 * for (IAtom atom : container.atoms()) { 238 * SMARTSAtomInvariants inv = atom.getProperty(SMARTSAtomInvariants.KEY); 239 * 240 * } 241 * </pre></blockquote> 242 * 243 * @param container the container to configure 244 */ configureDaylightWithRingInfo(IAtomContainer container)245 static void configureDaylightWithRingInfo(IAtomContainer container) { 246 EdgeToBondMap map = EdgeToBondMap.withSpaceFor(container); 247 int[][] graph = GraphUtil.toAdjList(container, map); 248 configureDaylight(container, graph, map, true); 249 } 250 251 /** 252 * Computes invariants - see {@link #configureDaylightWithRingInfo(IAtomContainer)} 253 * and {@link #configureDaylightWithoutRingInfo(IAtomContainer)}. 254 * 255 * @param container the container to configure 256 * @param graph the graph for quick traversal 257 * @param bondMap the bond map for quick bond lookup 258 * @param ringInfo logical condition as whether ring info should be 259 * included 260 */ configureDaylight(IAtomContainer container, int[][] graph, EdgeToBondMap bondMap, boolean ringInfo)261 private static void configureDaylight(IAtomContainer container, int[][] graph, EdgeToBondMap bondMap, 262 boolean ringInfo) { 263 264 int nAtoms = container.getAtomCount(); 265 266 int[] ringNumber = new int[nAtoms]; 267 int[] ringSize = new int[nAtoms]; 268 269 Arrays.fill(ringSize, nAtoms + 1); 270 271 if (ringInfo) { 272 // non-unique but used by daylight 273 for (int[] cycle : Cycles.sssr(container).paths()) { 274 int size = cycle.length - 1; 275 for (int i = 1; i < cycle.length; i++) { 276 int v = cycle[i]; 277 if (size < ringSize[v]) ringSize[v] = size; 278 ringNumber[v]++; 279 bondMap.get(cycle[i], cycle[i - 1]).setFlag(CDKConstants.ISINRING, true); 280 } 281 } 282 } else { 283 // ring membership is super cheap 284 for (IBond bond : new RingSearch(container, graph).ringFragments().bonds()) { 285 bond.setFlag(CDKConstants.ISINRING, true); 286 } 287 } 288 289 for (int v = 0; v < nAtoms; v++) { 290 291 IAtom atom = container.getAtom(v); 292 293 int implHCount = checkNotNull(atom.getImplicitHydrogenCount(), "Implicit hydrogen count was not set."); 294 295 int totalHCount = implHCount; 296 int valence = implHCount; 297 int degree = 0; 298 int ringConnections = 0; 299 300 // traverse bonds 301 for (int w : graph[v]) { 302 IBond bond = bondMap.get(v, w); 303 IBond.Order order = bond.getOrder(); 304 305 if (order == null || order == IBond.Order.UNSET) 306 throw new NullPointerException("Bond order was not set."); 307 308 valence += order.numeric(); 309 310 degree++; 311 312 if (bond.getFlag(CDKConstants.ISINRING)) { 313 ringConnections++; 314 } 315 316 if (container.getAtom(w).getAtomicNumber() == 1) { 317 totalHCount++; 318 } 319 320 } 321 322 SMARTSAtomInvariants inv = new SMARTSAtomInvariants(container, valence, ringNumber[v], 323 ringSize[v] <= nAtoms ? Collections.singleton(ringSize[v]) : Collections.<Integer> emptySet(), 324 ringConnections, degree, degree + implHCount, totalHCount); 325 326 // if there was no properties a default size LinkedHashMap is created 327 // automatically 328 atom.setProperty(SMARTSAtomInvariants.KEY, inv); 329 } 330 } 331 } 332