1 /*
2  * Copyright (c) 2013 European Bioinformatics Institute (EMBL-EBI)
3  *                    John May <jwmay@users.sf.net>
4  *
5  * Contact: cdk-devel@lists.sourceforge.net
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation; either version 2.1 of the License, or (at
10  * your option) any later version. All we ask is that proper credit is given
11  * for our work, which includes - but is not limited to - adding the above
12  * copyright notice to the beginning of your source code files, and to any
13  * copyright notice that you may distribute with programs based on this work.
14  *
15  * This program is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
18  * License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
23  */
24 
25 package org.openscience.cdk.isomorphism.matchers.smarts;
26 
27 import org.openscience.cdk.CDKConstants;
28 import org.openscience.cdk.graph.Cycles;
29 import org.openscience.cdk.graph.GraphUtil;
30 import org.openscience.cdk.interfaces.IAtom;
31 import org.openscience.cdk.interfaces.IAtomContainer;
32 import org.openscience.cdk.interfaces.IBond;
33 import org.openscience.cdk.ringsearch.RingSearch;
34 
35 import java.util.Arrays;
36 import java.util.Collections;
37 import java.util.Set;
38 
39 import static com.google.common.base.Preconditions.checkNotNull;
40 import static org.openscience.cdk.graph.GraphUtil.EdgeToBondMap;
41 
42 /**
43  * Computes and stores atom invariants in a single object. The atom invariants
44  * are utilised as additional information for the {@link SMARTSAtom}s to match.
45  * The values provide additional invariants which are not defined in the {@link
46  * IAtom} API and avoids storing multiple properties in a type unsafe map
47  * ({@link IAtom#setProperty(Object, Object)}).  Depending on the SMARTS
48  * implementation different values for the ring information may be set. The
49  * choice of ring set affects {@link #ringNumber()} and {@link #ringSize()}.
50  * Some implementations store all ring sizes whilst others (Daylight) store only
51  * the smallest. The {@link #degree()} also depends on whether hydrogens are
52  * suppressed or represented as explicit atoms.  The {@link
53  * #configureDaylightWithRingInfo(IAtomContainer)} and {@link
54  * #configureDaylightWithoutRingInfo(IAtomContainer)} static utilities create
55  * and set the invariants following the Daylight implementation. The invariants
56  * are set on the {@link #KEY} property of each atom.
57  *
58  * @author John May
59  * @cdk.module smarts
60  */
61 @Deprecated
62 final class SMARTSAtomInvariants {
63 
64     /** Property key to index the class by. */
65     static String                KEY = "SMARTS.INVARIANTS";
66 
67     /** the molecule which this atom belongs. */
68     private final IAtomContainer target;
69 
70     /** Total number of bonds formed - also refereed to as bond order sum. */
71     private final int            valence;
72 
73     /** The number of rings this atom can be found in. */
74     private final int            ringNumber;
75 
76     /** The size of rings an atom is found in. */
77     private final Set<Integer>   ringSize;
78 
79     /** Total number of connected atoms including implicit hydrogens. */
80     private final int            connectivity;
81 
82     /** Total number of connected ring bonds. */
83     private final int            ringConnectivity;
84 
85     /** Total number of explicitly connected atoms. */
86     private final int            degree;
87 
88     /** The total number of hydrogens on an atom. */
89     private final int            totalHydrogenCount;
90 
91     /**
92      * Internal constructor - simple takes all the values.
93      *
94      * @param valence            the valence value
95      * @param ringNumber         number of rings an atom belongs to (variable)
96      * @param ringSize           the size of the rings (variable)
97      * @param ringConnectivity   the number of connected ring bonds (or atoms)
98      * @param degree             the degree of an atom
99      * @param connectivity       the number of connections (degree + implicit H
100      *                           count)
101      * @param totalHydrogenCount the total number of hydrogens
102      */
SMARTSAtomInvariants(IAtomContainer target, int valence, int ringNumber, Set<Integer> ringSize, int ringConnectivity, int degree, int connectivity, int totalHydrogenCount)103     SMARTSAtomInvariants(IAtomContainer target, int valence, int ringNumber, Set<Integer> ringSize,
104             int ringConnectivity, int degree, int connectivity, int totalHydrogenCount) {
105         this.target = target;
106         this.valence = valence;
107         this.ringNumber = ringNumber;
108         this.ringSize = ringSize;
109         this.connectivity = connectivity;
110         this.totalHydrogenCount = totalHydrogenCount;
111         this.ringConnectivity = ringConnectivity;
112         this.degree = degree;
113     }
114 
target()115     IAtomContainer target() {
116         return target;
117     }
118 
119     /**
120      * Access the valence of this atom. The valence is matched by the {@code
121      * v<NUMBER>} SMARTS token. The valence is the total number of bonds formed
122      * by this atom and <b>NOT</b> the number of valence electrons. As such
123      * {@code [v3]} will match a 3 valent nitrogen and {@code [v5]} will match a
124      * 5 valent nitrogen. The value is separate from {@link IAtom#getValency()}
125      * so it can be cleaned up after matching and avoid confusion with what the
126      * value should be.
127      *
128      * @return the valence of the atom.
129      */
valence()130     int valence() {
131         return valence;
132     }
133 
134     /**
135      * The number of rings this atom belong to. The value is matched by the
136      * {@code R<NUMBER>} token and depends on the ring set used. The Daylight
137      * implementation uses the non-unique Smallest Set of Smallest Rings (SSSR)
138      * which can lead to inconsistent matches.
139      *
140      * @return number or rings
141      */
ringNumber()142     int ringNumber() {
143         return ringNumber;
144     }
145 
146     /**
147      * The sizes of rings this atoms belongs to. The value is matched by the
148      * {@code r<NUMBER>} token and depends on the ring set used. The Daylight
149      * implementation uses this value to match the smallest ring to which this
150      * atom is a member. It may be beneficial to match multiple ring sizes (not
151      * yet defined by OpenSMARTS).
152      *
153      * @return ring sizes
154      */
ringSize()155     Set<Integer> ringSize() {
156         return ringSize;
157     }
158 
159     /**
160      * The number of connected ring bonds (or atoms). This value is matched by
161      * the {@code x<NUMBER>} token. The Daylight implementation counts the
162      * number of connected ring bonds but it may be beneficial to match the atom
163      * ring connectivity (not yet defined by OpenSMARTS).
164      *
165      * @return ring connectivity
166      */
ringConnectivity()167     int ringConnectivity() {
168         return ringConnectivity;
169     }
170 
171     /**
172      * The number of connected bonds including those to hydrogens. This value is
173      * matched by the {@code X<NUMBER>} token. This value depends on whether the
174      * hydrogens have been suppressed or are represented as explicit atoms.
175      *
176      * @return connectivity
177      */
connectivity()178     int connectivity() {
179         return connectivity;
180     }
181 
182     /**
183      * The degree of a vertex defined as the number of explicit connected bonds.
184      * This value is matched by the {@code D<NUMBER>} token. This value depends
185      * on whether the hydrogens have been suppressed or are represented as
186      * explicit atoms.
187      *
188      * @return connectivity
189      */
degree()190     int degree() {
191         return degree;
192     }
193 
194     /**
195      * The total number of hydrogens attached to an atom.
196      *
197      * @return
198      */
totalHydrogenCount()199     int totalHydrogenCount() {
200         return totalHydrogenCount;
201     }
202 
203     /**
204      * Computes {@link SMARTSAtomInvariants} and stores on the {@link #KEY} or
205      * each {@link IAtom} in the {@code container}. The {@link
206      * CDKConstants#ISINRING} is also set for each bond. This configuration does
207      * not include ring information and values are left as unset.
208      * Ring membership is still configured but not ring size.
209      *
210      * <blockquote><pre>
211      *     IAtomContainer container = ...;
212      *     SMARTSAtomInvariants.configureDaylightWithoutRingInfo(container);
213      *     for (IAtom atom : container.atoms()) {
214      *         SMARTSAtomInvariants inv = atom.getProperty(SMARTSAtomInvariants.KEY);
215      *     }
216      * </pre></blockquote>
217      *
218      * @param container the container to configure
219      */
configureDaylightWithoutRingInfo(IAtomContainer container)220     static void configureDaylightWithoutRingInfo(IAtomContainer container) {
221         EdgeToBondMap map = EdgeToBondMap.withSpaceFor(container);
222         int[][] graph = GraphUtil.toAdjList(container, map);
223         configureDaylight(container, graph, map, false);
224     }
225 
226     /**
227      * Computes {@link SMARTSAtomInvariants} and stores on the {@link #KEY} or
228      * each {@link IAtom} in the {@code container}. The {@link
229      * CDKConstants#ISINRING} is also set for each bond. This configuration
230      * includes the ring information as used by the Daylight implementation.
231      * That is the Smallest Set of Smallest Rings (SSSR) is used and only the
232      * smallest ring is stored for the {@link #ringSize()}.
233      *
234      * <blockquote><pre>
235      *     IAtomContainer container = ...;
236      *     SMARTSAtomInvariants.configureDaylightWithRingInfo(container);
237      *     for (IAtom atom : container.atoms()) {
238      *         SMARTSAtomInvariants inv = atom.getProperty(SMARTSAtomInvariants.KEY);
239      *
240      *     }
241      * </pre></blockquote>
242      *
243      * @param container the container to configure
244      */
configureDaylightWithRingInfo(IAtomContainer container)245     static void configureDaylightWithRingInfo(IAtomContainer container) {
246         EdgeToBondMap map = EdgeToBondMap.withSpaceFor(container);
247         int[][] graph = GraphUtil.toAdjList(container, map);
248         configureDaylight(container, graph, map, true);
249     }
250 
251     /**
252      * Computes invariants - see {@link #configureDaylightWithRingInfo(IAtomContainer)}
253      * and {@link #configureDaylightWithoutRingInfo(IAtomContainer)}.
254      *
255      * @param container the container to configure
256      * @param graph     the graph for quick traversal
257      * @param bondMap   the bond map for quick bond lookup
258      * @param ringInfo  logical condition as whether ring info should be
259      *                  included
260      */
configureDaylight(IAtomContainer container, int[][] graph, EdgeToBondMap bondMap, boolean ringInfo)261     private static void configureDaylight(IAtomContainer container, int[][] graph, EdgeToBondMap bondMap,
262             boolean ringInfo) {
263 
264         int nAtoms = container.getAtomCount();
265 
266         int[] ringNumber = new int[nAtoms];
267         int[] ringSize = new int[nAtoms];
268 
269         Arrays.fill(ringSize, nAtoms + 1);
270 
271         if (ringInfo) {
272             // non-unique but used by daylight
273             for (int[] cycle : Cycles.sssr(container).paths()) {
274                 int size = cycle.length - 1;
275                 for (int i = 1; i < cycle.length; i++) {
276                     int v = cycle[i];
277                     if (size < ringSize[v]) ringSize[v] = size;
278                     ringNumber[v]++;
279                     bondMap.get(cycle[i], cycle[i - 1]).setFlag(CDKConstants.ISINRING, true);
280                 }
281             }
282         } else {
283             // ring membership is super cheap
284             for (IBond bond : new RingSearch(container, graph).ringFragments().bonds()) {
285                 bond.setFlag(CDKConstants.ISINRING, true);
286             }
287         }
288 
289         for (int v = 0; v < nAtoms; v++) {
290 
291             IAtom atom = container.getAtom(v);
292 
293             int implHCount = checkNotNull(atom.getImplicitHydrogenCount(), "Implicit hydrogen count was not set.");
294 
295             int totalHCount = implHCount;
296             int valence = implHCount;
297             int degree = 0;
298             int ringConnections = 0;
299 
300             // traverse bonds
301             for (int w : graph[v]) {
302                 IBond bond = bondMap.get(v, w);
303                 IBond.Order order = bond.getOrder();
304 
305                 if (order == null || order == IBond.Order.UNSET)
306                     throw new NullPointerException("Bond order was not set.");
307 
308                 valence += order.numeric();
309 
310                 degree++;
311 
312                 if (bond.getFlag(CDKConstants.ISINRING)) {
313                     ringConnections++;
314                 }
315 
316                 if (container.getAtom(w).getAtomicNumber() == 1) {
317                     totalHCount++;
318                 }
319 
320             }
321 
322             SMARTSAtomInvariants inv = new SMARTSAtomInvariants(container, valence, ringNumber[v],
323                     ringSize[v] <= nAtoms ? Collections.singleton(ringSize[v]) : Collections.<Integer> emptySet(),
324                     ringConnections, degree, degree + implHCount, totalHCount);
325 
326             // if there was no properties a default size LinkedHashMap is created
327             // automatically
328             atom.setProperty(SMARTSAtomInvariants.KEY, inv);
329         }
330     }
331 }
332