1 /*
2  * Copyright (C) 2018 NextMove Software
3  *
4  * Contact: cdk-devel@lists.sourceforge.net
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation; either version 2.1 of the License, or (at
9  * your option) any later version. All we ask is that proper credit is given
10  * for our work, which includes - but is not limited to - adding the above
11  * copyright notice to the beginning of your source code files, and to any
12  * copyright notice that you may distribute with programs based on this work.
13  *
14  * This program is distributed in the hope that it will be useful, but WITHOUT
15  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17  * License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 package org.openscience.cdk.isomorphism;
25 
26 import org.openscience.cdk.interfaces.IAtom;
27 import org.openscience.cdk.interfaces.IAtomContainer;
28 import org.openscience.cdk.isomorphism.matchers.Expr;
29 import org.openscience.cdk.isomorphism.matchers.IQueryAtom;
30 import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer;
31 import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer;
32 
33 import java.util.Collections;
34 
35 import static org.openscience.cdk.isomorphism.matchers.Expr.Type.*;
36 
37 /**
38  * The depth-first (DF) backtracking sub-structure matching algorithm so named
39  * because it matches the molecule in a depth-first manner (bond by bond). The
40  * algorithm is a simple but elegant backtracking search iterating over the
41  * bonds of a query. Like the popular VF2 the algorithm, it uses linear memory
42  * but unlike VF2 bonded atoms are selected from the neighbor lists of already
43  * mapped atoms.
44  * <br><br>
45  * In practice VF2 take O(N<sup>2</sup>) to match a linear chain against it's
46  * self whilst this algorithm is O(N).
47  * <br><br>
48  * Usage:
49  * <pre>{@code
50  * DfPattern ptrn = DfPattern.findSubstructure(query);
51  *
52  * // has match?
53  * if (ptrn.matches(mol)) {
54  *
55  * }
56  *
57  * // get lazy mapping iterator
58  * Mappings mappings = ptrn.matchAll(mol);
59  * for (int[] amap : mappings) {
60  *
61  * }
62  *
63  * // test if pattern matches at a given atom
64  * for (IAtom atom : mol.atoms()) {
65  *   if (ptrn.matchesRoot(atom)) {
66  *
67  *   }
68  * }
69  * }</pre>
70  * <b>References</b>
71  * <ul>
72  *     <li>{@cdk.cite Ray57}</li>
73  *     <li>{@cdk.cite Ullmann76}</li>
74  *     <li>{@cdk.cite Cordella04}</li>
75  *     <li>{@cdk.cite Jeliazkova18}</li>
76  * </ul>
77  *
78  * @author John Mayfield
79  * @see DfPattern
80  * @see Mappings
81  */
82 public class DfPattern extends Pattern {
83 
84     private final IQueryAtomContainer query;
85     private final DfState             state;
86 
DfPattern(IQueryAtomContainer query)87     private DfPattern(IQueryAtomContainer query) {
88         this.query = query;
89         determineFilters(query);
90         state = new DfState(query);
91     }
92 
checkCompatibleAPI(IAtom atom)93     private static void checkCompatibleAPI(IAtom atom) {
94         if (atom.getContainer() == null) {
95             throw new IllegalArgumentException(
96                     "This API can only be used with the option " +
97                     "CdkUseLegacyAtomContainer=false (default). The atoms in " +
98                     "the molecule provided do not know about their parent " +
99                     "molecule"
100             );
101         }
102     }
103 
104     /**
105      * {@inheritDoc}
106      */
107     @Override
match(IAtomContainer target)108     public int[] match(IAtomContainer target) {
109         return matchAll(target).first();
110     }
111 
112     /**
113      * {@inheritDoc}
114      */
115     @Override
matches(IAtomContainer target)116     public boolean matches(IAtomContainer target) {
117         return matchAll(target).atLeast(1);
118     }
119 
120     /**
121      * {@inheritDoc}
122      */
123     @Override
matchAll(IAtomContainer mol)124     public Mappings matchAll(IAtomContainer mol) {
125         if (mol.getAtomCount() < query.getAtomCount())
126             return new Mappings(query, mol, Collections.<int[]>emptySet());
127         if (mol.getAtomCount() > 0)
128             checkCompatibleAPI(mol.getAtom(0));
129         DfState local = new DfState(state);
130         local.setMol(mol);
131         return filter(new Mappings(query, mol, local), query, mol);
132     }
133 
134     /**
135      * Match the pattern at the provided root.
136      *
137      * @param root the root atom of the molecule
138      * @return mappings
139      * @see Mappings
140      */
matchRoot(IAtom root)141     Mappings matchRoot(IAtom root) {
142         checkCompatibleAPI(root);
143         IAtomContainer mol = root.getContainer();
144         if (query.getAtomCount() > 0 && ((IQueryAtom) query.getAtom(0)).matches(root)) {
145             DfState local = new DfState(state);
146             local.setRoot(root);
147             return filter(new Mappings(query, mol, local), query, mol);
148         } else {
149             return new Mappings(query, mol, Collections.<int[]>emptySet());
150         }
151     }
152 
153     /**
154      * Test whether the pattern matches at the provided atom.
155      *
156      * @param root the root atom of the molecule
157      * @return the pattern matches
158      */
matchesRoot(IAtom root)159     public boolean matchesRoot(IAtom root) {
160         return matchRoot(root).atLeast(1);
161     }
162 
163     /**
164      * Create a pattern which can be used to find molecules which contain the
165      * {@code query} structure. If a 'real' molecule is provided is is converted
166      * with {@link QueryAtomContainer#create(IAtomContainer, Expr.Type...)}
167      * matching elements, aromaticity status, and bond orders.
168      *
169      * @param query the substructure to find
170      * @return a pattern for finding the {@code query}
171      * @see QueryAtomContainer#create(IAtomContainer, Expr.Type...)
172      */
findSubstructure(IAtomContainer query)173     public static DfPattern findSubstructure(IAtomContainer query) {
174         if (query instanceof IQueryAtomContainer)
175             return new DfPattern((IQueryAtomContainer) query);
176         else
177             return new DfPattern(QueryAtomContainer.create(query,
178                                                            ALIPHATIC_ELEMENT,
179                                                            AROMATIC_ELEMENT,
180                                                            SINGLE_OR_AROMATIC,
181                                                            ALIPHATIC_ORDER,
182                                                            STEREOCHEMISTRY));
183     }
184 
185 }
186