1 /* 2 * Copyright (C) 2018 NextMove Software 3 * 4 * Contact: cdk-devel@lists.sourceforge.net 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU Lesser General Public License as published by 8 * the Free Software Foundation; either version 2.1 of the License, or (at 9 * your option) any later version. All we ask is that proper credit is given 10 * for our work, which includes - but is not limited to - adding the above 11 * copyright notice to the beginning of your source code files, and to any 12 * copyright notice that you may distribute with programs based on this work. 13 * 14 * This program is distributed in the hope that it will be useful, but WITHOUT 15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17 * License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24 package org.openscience.cdk.isomorphism; 25 26 import org.openscience.cdk.interfaces.IAtom; 27 import org.openscience.cdk.interfaces.IAtomContainer; 28 import org.openscience.cdk.isomorphism.matchers.Expr; 29 import org.openscience.cdk.isomorphism.matchers.IQueryAtom; 30 import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer; 31 import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer; 32 33 import java.util.Collections; 34 35 import static org.openscience.cdk.isomorphism.matchers.Expr.Type.*; 36 37 /** 38 * The depth-first (DF) backtracking sub-structure matching algorithm so named 39 * because it matches the molecule in a depth-first manner (bond by bond). The 40 * algorithm is a simple but elegant backtracking search iterating over the 41 * bonds of a query. Like the popular VF2 the algorithm, it uses linear memory 42 * but unlike VF2 bonded atoms are selected from the neighbor lists of already 43 * mapped atoms. 44 * <br><br> 45 * In practice VF2 take O(N<sup>2</sup>) to match a linear chain against it's 46 * self whilst this algorithm is O(N). 47 * <br><br> 48 * Usage: 49 * <pre>{@code 50 * DfPattern ptrn = DfPattern.findSubstructure(query); 51 * 52 * // has match? 53 * if (ptrn.matches(mol)) { 54 * 55 * } 56 * 57 * // get lazy mapping iterator 58 * Mappings mappings = ptrn.matchAll(mol); 59 * for (int[] amap : mappings) { 60 * 61 * } 62 * 63 * // test if pattern matches at a given atom 64 * for (IAtom atom : mol.atoms()) { 65 * if (ptrn.matchesRoot(atom)) { 66 * 67 * } 68 * } 69 * }</pre> 70 * <b>References</b> 71 * <ul> 72 * <li>{@cdk.cite Ray57}</li> 73 * <li>{@cdk.cite Ullmann76}</li> 74 * <li>{@cdk.cite Cordella04}</li> 75 * <li>{@cdk.cite Jeliazkova18}</li> 76 * </ul> 77 * 78 * @author John Mayfield 79 * @see DfPattern 80 * @see Mappings 81 */ 82 public class DfPattern extends Pattern { 83 84 private final IQueryAtomContainer query; 85 private final DfState state; 86 DfPattern(IQueryAtomContainer query)87 private DfPattern(IQueryAtomContainer query) { 88 this.query = query; 89 determineFilters(query); 90 state = new DfState(query); 91 } 92 checkCompatibleAPI(IAtom atom)93 private static void checkCompatibleAPI(IAtom atom) { 94 if (atom.getContainer() == null) { 95 throw new IllegalArgumentException( 96 "This API can only be used with the option " + 97 "CdkUseLegacyAtomContainer=false (default). The atoms in " + 98 "the molecule provided do not know about their parent " + 99 "molecule" 100 ); 101 } 102 } 103 104 /** 105 * {@inheritDoc} 106 */ 107 @Override match(IAtomContainer target)108 public int[] match(IAtomContainer target) { 109 return matchAll(target).first(); 110 } 111 112 /** 113 * {@inheritDoc} 114 */ 115 @Override matches(IAtomContainer target)116 public boolean matches(IAtomContainer target) { 117 return matchAll(target).atLeast(1); 118 } 119 120 /** 121 * {@inheritDoc} 122 */ 123 @Override matchAll(IAtomContainer mol)124 public Mappings matchAll(IAtomContainer mol) { 125 if (mol.getAtomCount() < query.getAtomCount()) 126 return new Mappings(query, mol, Collections.<int[]>emptySet()); 127 if (mol.getAtomCount() > 0) 128 checkCompatibleAPI(mol.getAtom(0)); 129 DfState local = new DfState(state); 130 local.setMol(mol); 131 return filter(new Mappings(query, mol, local), query, mol); 132 } 133 134 /** 135 * Match the pattern at the provided root. 136 * 137 * @param root the root atom of the molecule 138 * @return mappings 139 * @see Mappings 140 */ matchRoot(IAtom root)141 Mappings matchRoot(IAtom root) { 142 checkCompatibleAPI(root); 143 IAtomContainer mol = root.getContainer(); 144 if (query.getAtomCount() > 0 && ((IQueryAtom) query.getAtom(0)).matches(root)) { 145 DfState local = new DfState(state); 146 local.setRoot(root); 147 return filter(new Mappings(query, mol, local), query, mol); 148 } else { 149 return new Mappings(query, mol, Collections.<int[]>emptySet()); 150 } 151 } 152 153 /** 154 * Test whether the pattern matches at the provided atom. 155 * 156 * @param root the root atom of the molecule 157 * @return the pattern matches 158 */ matchesRoot(IAtom root)159 public boolean matchesRoot(IAtom root) { 160 return matchRoot(root).atLeast(1); 161 } 162 163 /** 164 * Create a pattern which can be used to find molecules which contain the 165 * {@code query} structure. If a 'real' molecule is provided is is converted 166 * with {@link QueryAtomContainer#create(IAtomContainer, Expr.Type...)} 167 * matching elements, aromaticity status, and bond orders. 168 * 169 * @param query the substructure to find 170 * @return a pattern for finding the {@code query} 171 * @see QueryAtomContainer#create(IAtomContainer, Expr.Type...) 172 */ findSubstructure(IAtomContainer query)173 public static DfPattern findSubstructure(IAtomContainer query) { 174 if (query instanceof IQueryAtomContainer) 175 return new DfPattern((IQueryAtomContainer) query); 176 else 177 return new DfPattern(QueryAtomContainer.create(query, 178 ALIPHATIC_ELEMENT, 179 AROMATIC_ELEMENT, 180 SINGLE_OR_AROMATIC, 181 ALIPHATIC_ORDER, 182 STEREOCHEMISTRY)); 183 } 184 185 } 186