1 /*
2  * Copyright (c) 2018 NextMove Software
3  *
4  * Contact: cdk-devel@lists.sourceforge.net
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation; either version 2.1 of the License, or (at
9  * your option) any later version. All we ask is that proper credit is given
10  * for our work, which includes - but is not limited to - adding the above
11  * copyright notice to the beginning of your source code files, and to any
12  * copyright notice that you may distribute with programs based on this work.
13  *
14  * This program is distributed in the hope that it will be useful, but WITHOUT
15  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17  * License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 package org.openscience.cdk.isomorphism.matchers;
25 
26 import com.google.common.cache.CacheBuilder;
27 import com.google.common.cache.CacheLoader;
28 import com.google.common.cache.LoadingCache;
29 import com.google.common.cache.Weigher;
30 import org.openscience.cdk.CDKConstants;
31 import org.openscience.cdk.ReactionRole;
32 import org.openscience.cdk.config.Elements;
33 import org.openscience.cdk.graph.Cycles;
34 import org.openscience.cdk.interfaces.IAtom;
35 import org.openscience.cdk.interfaces.IAtomContainer;
36 import org.openscience.cdk.interfaces.IAtomType;
37 import org.openscience.cdk.interfaces.IBond;
38 import org.openscience.cdk.isomorphism.DfPattern;
39 import static org.openscience.cdk.isomorphism.matchers.Expr.Type.*;
40 
41 import java.util.ArrayDeque;
42 import java.util.Arrays;
43 import java.util.Deque;
44 import java.util.Objects;
45 
46 /**
47  * A expression stores a predicate tree for checking properties of atoms
48  * and bonds.
49  * <pre>
50  * Expr expr = new Expr(ELEMENT, 6);
51  * if (expr.matches(atom)) {
52  *   // expression matches if atom is a carbon!
53  * }
54  * </pre>
55  * An expression is composed of an {@link Type}, an optional 'value', and
56  * optionally one or more 'sub-expressions'. Each expr can either be a leaf or
57  * an intermediate (logical) node. The simplest expression trees contain a
58  * single leaf node:
59  * <pre>
60  * new Expr(IS_AROMATIC); // matches any aromatic atom
61  * new Expr(ELEMENT, 6);  // matches any carbon atom (atomic num=6)
62  * new Expr(VALENCE, 4);  // matches an atom with valence 4
63  * new Expr(DEGREE, 1);   // matches a terminal atom, e.g. -OH, =O
64  * new Expr(IS_IN_RING);  // matches any atom marked as in a ring
65  * new Expr(IS_HETERO);   // matches anything other than carbon or nitrogen
66  * new Expr(TRUE);        // any atom
67  * </pre>
68  * Logical internal nodes combine one or two sub-expressions with conjunction
69  * (and), disjunction (or), and negation (not).
70  * <br>
71  * Consider the following expression tree, is matches fluorine, chlorine, or
72  * bromine.
73  * <pre>
74  *     OR
75  *    /  \
76  *   F   OR
77  *      /  \
78  *     Cl   Br
79  * </pre>
80  * We can construct this tree as follows:
81  * <pre>
82  * Expr expr = new Expr(ELEMENT, 9) // F
83  *                  .or(new Expr(ELEMENT, 17)) // Cl
84  *                  .or(new Expr(ELEMENT, 35))  // Br</pre>
85  * A more verbose construction could also be used:
86  * <pre>
87  * Expr leafF  = new Expr(ELEMENT, 9); // F
88  * Expr leafCl = new Expr(ELEMENT, 17); // Cl
89  * Expr leafBr = new Expr(ELEMENT, 35);  // Br
90  * Expr node4  = new Expr(OR, leaf2, leaf3);
91  * Expr node5  = new Expr(OR, leaf1, node4);
92  * </pre>
93  *
94  * Expressions can be used to match bonds. Note some expressions apply to either
95  * atoms or bonds.
96  * <pre>
97  * new Expr(TRUE);               // any bond
98  * new Expr(IS_IN_RING);         // any ring bond
99  * new Expr(ALIPHATIC_ORDER, 2); // double bond
100  * </pre>
101  * See the documentation for {@link Type}s for a detail explanation of
102  * each type.
103  */
104 public final class Expr {
105 
106     /** Sentinel value for indicating the stereochemistry configuration
107      *  is not yet known. Since stereochemistry depends on the ordering
108      *  of neighbors we can't check this until those neighbors are
109      *  matched. */
110     public static final int UNKNOWN_STEREO = -1;
111 
112     // the expression type
113     private Type type;
114     // used for primitive leaf types
115     private int  value;
116     // used for unary and binary types; not, and, or
117     private Expr left, right;
118     // user for recursive expression types
119     private IAtomContainer query;
120     private DfPattern      ptrn;
121 
122     /**
123      * Creates an atom expression that will always match ({@link Type#TRUE}).
124      */
Expr()125     public Expr() {
126         this(Type.TRUE);
127     }
128 
129     /**
130      * Creates an atom expression for the specified primitive.
131      */
Expr(Type op)132     public Expr(Type op) {
133         setPrimitive(op);
134     }
135 
136     /**
137      * Creates an atom expression for the specified primitive and 'value'.
138      */
Expr(Type op, int val)139     public Expr(Type op, int val) {
140         setPrimitive(op, val);
141     }
142 
143     /**
144      * Creates a logical atom expression for the specified.
145      */
Expr(Type op, Expr left, Expr right)146     public Expr(Type op, Expr left, Expr right) {
147         setLogical(op, left, right);
148     }
149 
150     /**
151      * Creates a recursive atom expression.
152      */
Expr(Type op, IAtomContainer mol)153     public Expr(Type op, IAtomContainer mol) {
154         setRecursive(op, mol);
155     }
156 
157     /**
158      * Copy-constructor, creates a shallow copy of the provided expression.
159      *
160      * @param expr the expre
161      */
Expr(final Expr expr)162     public Expr(final Expr expr) {
163         set(expr);
164     }
165 
eq(Integer a, int b)166     private static boolean eq(Integer a, int b) {
167         return a != null && a == b;
168     }
169 
unbox(Integer x)170     private static int unbox(Integer x) {
171         return x != null ? x : 0;
172     }
173 
isInRingSize(IAtom atom, IBond prev, IAtom beg, int size, int req)174     private static boolean isInRingSize(IAtom atom, IBond prev, IAtom beg,
175                                         int size, int req) {
176         atom.setFlag(CDKConstants.VISITED, true);
177         for (IBond bond : atom.bonds()) {
178             if (bond == prev)
179                 continue;
180             IAtom nbr = bond.getOther(atom);
181             if (nbr.equals(beg))
182                 return size == req;
183             else if (size < req &&
184                      !nbr.getFlag(CDKConstants.VISITED) &&
185                      isInRingSize(nbr, bond, beg, size + 1, req))
186                 return true;
187         }
188         atom.setFlag(CDKConstants.VISITED, false);
189         return false;
190     }
191 
isInRingSize(IAtom atom, int size)192     private static boolean isInRingSize(IAtom atom, int size) {
193         for (IAtom a : atom.getContainer().atoms())
194             a.setFlag(CDKConstants.VISITED, false);
195         return isInRingSize(atom, null, atom, 1, size);
196     }
197 
isInSmallRingSize(IAtom atom, int size)198     private static boolean isInSmallRingSize(IAtom atom, int size) {
199         IAtomContainer mol    = atom.getContainer();
200         int[]          distTo = new int[mol.getAtomCount()];
201         Arrays.fill(distTo, 1 + distTo.length);
202         distTo[atom.getIndex()] = 0;
203         Deque<IAtom> queue = new ArrayDeque<>();
204         queue.push(atom);
205         int smallest = 1 + distTo.length;
206         while (!queue.isEmpty()) {
207             IAtom a    = queue.poll();
208             int   dist = 1 + distTo[a.getIndex()];
209             for (IBond b : a.bonds()) {
210                 IAtom nbr = b.getOther(a);
211                 if (dist < distTo[nbr.getIndex()]) {
212                     distTo[nbr.getIndex()] = dist;
213                     queue.add(nbr);
214                 } else if (dist != 2 + distTo[nbr.getIndex()]) {
215                     int tmp = dist + distTo[nbr.getIndex()];
216                     if (tmp < smallest)
217                         smallest = tmp;
218                 }
219             }
220             if (2 * dist > 1 + size)
221                 break;
222         }
223         return smallest == size;
224     }
225 
226     /**
227      * Internal match methods - does not null check.
228      *
229      * @param type the type
230      * @param atom the atom
231      * @return the expression matches
232      */
matches(Type type, IAtom atom, int stereo)233     private boolean matches(Type type, IAtom atom, int stereo) {
234         switch (type) {
235             // predicates
236             case TRUE:
237                 return true;
238             case FALSE:
239                 return false;
240             case IS_AROMATIC:
241                 return atom.isAromatic();
242             case IS_ALIPHATIC:
243                 return !atom.isAromatic();
244             case IS_IN_RING:
245                 return atom.isInRing();
246             case IS_IN_CHAIN:
247                 return !atom.isInRing();
248             case IS_HETERO:
249                 return !eq(atom.getAtomicNumber(), 6) &&
250                        !eq(atom.getAtomicNumber(), 1);
251             case HAS_IMPLICIT_HYDROGEN:
252                 return atom.getImplicitHydrogenCount() != null &&
253                        atom.getImplicitHydrogenCount() > 0;
254             case HAS_ISOTOPE:
255                 return atom.getMassNumber() != null;
256             case HAS_UNSPEC_ISOTOPE:
257                 return atom.getMassNumber() == null;
258             case UNSATURATED:
259                 for (IBond bond : atom.bonds())
260                     if (bond.getOrder() == IBond.Order.DOUBLE)
261                         return true;
262                 return false;
263             // value primitives
264             case ELEMENT:
265                 return eq(atom.getAtomicNumber(), value);
266             case ALIPHATIC_ELEMENT:
267                 return !atom.isAromatic() &&
268                        eq(atom.getAtomicNumber(), value);
269             case AROMATIC_ELEMENT:
270                 return atom.isAromatic() &&
271                        eq(atom.getAtomicNumber(), value);
272             case IMPL_H_COUNT:
273                 return eq(atom.getImplicitHydrogenCount(), value);
274             case TOTAL_H_COUNT:
275                 if (atom.getImplicitHydrogenCount() != null
276                     && atom.getImplicitHydrogenCount() > value)
277                     return false;
278                 return getTotalHCount(atom) == value;
279             case DEGREE:
280                 return atom.getBondCount() == value;
281             case HEAVY_DEGREE: // XXX: CDK quirk
282                 return atom.getBondCount() - (getTotalHCount(atom) - atom.getImplicitHydrogenCount()) == value;
283             case TOTAL_DEGREE:
284                 int x = atom.getBondCount() + unbox(atom.getImplicitHydrogenCount());
285                 return x == value;
286             case VALENCE:
287                 int v = unbox(atom.getImplicitHydrogenCount());
288                 if (v > value)
289                     return false;
290                 for (IBond bond : atom.bonds()) {
291                     if (bond.getOrder() != null)
292                         v += bond.getOrder().numeric();
293                 }
294                 return v == value;
295             case ISOTOPE:
296                 return eq(atom.getMassNumber(), value);
297             case FORMAL_CHARGE:
298                 return eq(atom.getFormalCharge(), value);
299             case RING_BOND_COUNT:
300                 if (!atom.isInRing() || atom.getBondCount() < value)
301                     return false;
302                 int rbonds = 0;
303                 for (IBond bond : atom.bonds())
304                     rbonds += bond.isInRing() ? 1 : 0;
305                 return rbonds == value;
306             case RING_COUNT:
307                 return atom.isInRing() && getRingCount(atom) == value;
308             case RING_SMALLEST:
309                 return atom.isInRing() && isInSmallRingSize(atom, value);
310             case RING_SIZE:
311                 return atom.isInRing() && isInRingSize(atom, value);
312             case HETERO_SUBSTITUENT_COUNT:
313                 if (atom.getBondCount() < value)
314                     return false;
315                 int q = 0;
316                 for (IBond bond : atom.bonds())
317                     q += matches(Type.IS_HETERO, bond.getOther(atom), stereo) ? 1 : 0;
318                 return q == value;
319             case INSATURATION:
320                 int db = 0;
321                 for (IBond bond : atom.bonds())
322                     if (bond.getOrder() == IBond.Order.DOUBLE)
323                         db++;
324                 return db == value;
325             case HYBRIDISATION_NUMBER:
326                 IAtomType.Hybridization hyb = atom.getHybridization();
327                 if (hyb == null)
328                     return false;
329                 switch (value) {
330                     case 1:
331                         return hyb == IAtomType.Hybridization.SP1;
332                     case 2:
333                         return hyb == IAtomType.Hybridization.SP2;
334                     case 3:
335                         return hyb == IAtomType.Hybridization.SP3;
336                     case 4:
337                         return hyb == IAtomType.Hybridization.SP3D1;
338                     case 5:
339                         return hyb == IAtomType.Hybridization.SP3D2;
340                     case 6:
341                         return hyb == IAtomType.Hybridization.SP3D3;
342                     case 7:
343                         return hyb == IAtomType.Hybridization.SP3D4;
344                     case 8:
345                         return hyb == IAtomType.Hybridization.SP3D5;
346                     default:
347                         return false;
348                 }
349             case PERIODIC_GROUP:
350                 return atom.getAtomicNumber() != null &&
351                        Elements.ofNumber(atom.getAtomicNumber()).group() == value;
352             case STEREOCHEMISTRY:
353                 return stereo == UNKNOWN_STEREO || stereo == value;
354             case REACTION_ROLE:
355                 ReactionRole role = atom.getProperty(CDKConstants.REACTION_ROLE);
356                 return role != null && role.ordinal() == value;
357             case AND:
358                 return left.matches(left.type, atom, stereo) &&
359                        right.matches(right.type, atom, stereo);
360             case OR:
361                 return left.matches(left.type, atom, stereo) ||
362                        right.matches(right.type, atom, stereo);
363             case NOT:
364                 return !left.matches(left.type, atom, stereo) ||
365                        // XXX: ugly but needed, when matching stereo
366                        (stereo == UNKNOWN_STEREO &&
367                         (left.type == STEREOCHEMISTRY ||
368                          left.type == OR && left.left.type == STEREOCHEMISTRY));
369             case RECURSIVE:
370                 if (ptrn == null)
371                     ptrn = DfPattern.findSubstructure(query);
372                 return ptrn.matchesRoot(atom);
373             default:
374                 throw new IllegalArgumentException("Cannot match AtomExpr, type=" + type);
375         }
376     }
377 
matches(IBond bond, int stereo)378     public boolean matches(IBond bond, int stereo) {
379         switch (type) {
380             case TRUE:
381                 return true;
382             case FALSE:
383                 return false;
384             case ALIPHATIC_ORDER:
385                 return !bond.isAromatic() &&
386                        bond.getOrder() != null &&
387                        bond.getOrder().numeric() == value;
388             case ORDER:
389                 return bond.getOrder() != null &&
390                        bond.getOrder().numeric() == value;
391             case IS_AROMATIC:
392                 return bond.isAromatic();
393             case IS_ALIPHATIC:
394                 return !bond.isAromatic();
395             case IS_IN_RING:
396                 return bond.isInRing();
397             case IS_IN_CHAIN:
398                 return !bond.isInRing();
399             case SINGLE_OR_AROMATIC:
400                 return bond.isAromatic() ||
401                        IBond.Order.SINGLE.equals(bond.getOrder());
402             case DOUBLE_OR_AROMATIC:
403                 return bond.isAromatic() ||
404                        IBond.Order.DOUBLE.equals(bond.getOrder());
405             case SINGLE_OR_DOUBLE:
406                 return IBond.Order.SINGLE.equals(bond.getOrder()) ||
407                        IBond.Order.DOUBLE.equals(bond.getOrder());
408             case STEREOCHEMISTRY:
409                 return stereo == UNKNOWN_STEREO || value == stereo;
410             case AND:
411                 return left.matches(bond, stereo) && right.matches(bond, stereo);
412             case OR:
413                 return left.matches(bond, stereo) || right.matches(bond, stereo);
414             case NOT:
415                 return !left.matches(bond, stereo) ||
416                        // XXX: ugly but needed, when matching stereo
417                        (stereo == UNKNOWN_STEREO &&
418                         (left.type == STEREOCHEMISTRY ||
419                          left.type == OR && left.left.type == STEREOCHEMISTRY));
420             default:
421                 throw new IllegalArgumentException("Cannot match BondExpr, type=" + type);
422         }
423     }
424 
matches(IBond bond)425     public boolean matches(IBond bond) {
426         return matches(bond, UNKNOWN_STEREO);
427     }
428 
getTotalHCount(IAtom atom)429     private static int getTotalHCount(IAtom atom) {
430         int h = unbox(atom.getImplicitHydrogenCount());
431         for (IBond bond : atom.bonds())
432             if (eq(bond.getOther(atom).getAtomicNumber(), 1))
433                 h++;
434         return h;
435     }
436 
437     /**
438      * Test whether this expression matches an atom instance.
439      *
440      * @param atom an atom (nullable)
441      * @return the atom matches
442      */
matches(final IAtom atom)443     public boolean matches(final IAtom atom) {
444         return atom != null && matches(type, atom, UNKNOWN_STEREO);
445     }
446 
matches(final IAtom atom, final int stereo)447     public boolean matches(final IAtom atom, final int stereo) {
448         return atom != null && matches(type, atom, stereo);
449     }
450 
451     /**
452      * Utility, combine this expression with another, using conjunction.
453      * The expression will only match if both conditions are met.
454      *
455      * @param expr the other expression
456      * @return self for chaining
457      */
and(Expr expr)458     public Expr and(Expr expr) {
459         if (type == Type.TRUE) {
460             set(expr);
461         } else if (expr.type != Type.TRUE) {
462             if (type.isLogical() && !expr.type.isLogical()) {
463                 if (type == AND)
464                     right.and(expr);
465                 else if (type != NOT)
466                     setLogical(Type.AND, expr, new Expr(this));
467                 else
468                     setLogical(Type.AND, expr, new Expr(this));
469             } else {
470                 setLogical(Type.AND, new Expr(this), expr);
471             }
472         }
473         return this;
474     }
475 
476     /**
477      * Utility, combine this expression with another, using disjunction.
478      * The expression will match if either conditions is met.
479      * @param expr the other expression
480      * @return self for chaining
481      */
or(Expr expr)482     public Expr or(Expr expr) {
483         if (type == Type.TRUE ||
484             type == Type.FALSE ||
485             type == NONE) {
486             set(expr);
487         } else if (expr.type != Type.TRUE &&
488                    expr.type != Type.FALSE &&
489                    expr.type != Type.NONE) {
490             if (type.isLogical() && !expr.type.isLogical()) {
491                 if (type == OR)
492                     right.or(expr);
493                 else if (type != NOT)
494                     setLogical(Type.OR, expr, new Expr(this));
495                 else
496                     setLogical(Type.OR, new Expr(this), expr);
497             }
498             else
499                 setLogical(Type.OR, new Expr(this), expr);
500         }
501         return this;
502     }
503 
504     /**
505      * Negate the expression, the expression will not return true only if
506      * the condition is not met. Some expressions have explicit types
507      * that are more efficient to use, for example:
508      * <code>IS_IN_RING =&gt; NOT(IS_IN_RING) =&gt; IS_IN_CHAIN</code>. This
509      * negation method will use the more efficient type where possible.
510      * <pre>{@code
511      * Expr e = new Expr(ELEMENT, 8); // SMARTS: [#8]
512      * e.negate(); // SMARTS: [!#8]
513      * }</pre>
514      * @return self for chaining
515      */
negate()516     public Expr negate() {
517         switch (type) {
518             case TRUE:
519                 type = Type.FALSE;
520                 break;
521             case FALSE:
522                 type = Type.TRUE;
523                 break;
524             case HAS_ISOTOPE:
525                 type = Type.HAS_UNSPEC_ISOTOPE;
526                 break;
527             case HAS_UNSPEC_ISOTOPE:
528                 type = Type.HAS_ISOTOPE;
529                 break;
530             case IS_AROMATIC:
531                 type = Type.IS_ALIPHATIC;
532                 break;
533             case IS_ALIPHATIC:
534                 type = Type.IS_AROMATIC;
535                 break;
536             case IS_IN_RING:
537                 type = Type.IS_IN_CHAIN;
538                 break;
539             case IS_IN_CHAIN:
540                 type = Type.IS_IN_RING;
541                 break;
542             case NOT:
543                 set(this.left);
544                 break;
545             default:
546                 setLogical(Type.NOT, new Expr(this), null);
547                 break;
548         }
549         return this;
550     }
551 
552     /**
553      * Set the primitive value of this atom expression.
554      *
555      * @param type the type of expression
556      * @param val  the value to check
557      */
setPrimitive(Type type, int val)558     public void setPrimitive(Type type, int val) {
559         if (type.hasValue()) {
560             this.type = type;
561             this.value = val;
562             this.left = null;
563             this.right = null;
564             this.query = null;
565         } else {
566             throw new IllegalArgumentException("Value provided for non-value "
567                                                + "expression type!");
568         }
569     }
570 
571     /**
572      * Set the primitive value of this atom expression.
573      *
574      * @param type the type of expression
575      */
setPrimitive(Type type)576     public void setPrimitive(Type type) {
577         if (!type.hasValue() && !type.isLogical()) {
578             this.type = type;
579             this.value = -1;
580             this.left = null;
581             this.right = null;
582             this.query = null;
583         } else {
584             throw new IllegalArgumentException("Expression type requires a value!");
585         }
586     }
587 
588     /**
589      * Set the logical value of this atom expression.
590      *
591      * @param type  the type of expression
592      * @param left  the left sub-expression
593      * @param right the right sub-expression
594      */
setLogical(Type type, Expr left, Expr right)595     public void setLogical(Type type, Expr left, Expr right) {
596         switch (type) {
597             case AND:
598             case OR:
599                 this.type = type;
600                 this.value = 0;
601                 this.left = left;
602                 this.right = right;
603                 this.query = null;
604                 break;
605             case NOT:
606                 this.type = type;
607                 if (left != null && right == null)
608                     this.left = left;
609                 else if (left == null && right != null)
610                     this.left = right;
611                 else if (left != null)
612                     throw new IllegalArgumentException("Only one sub-expression"
613                                                        + " should be provided"
614                                                        + " for NOT expressions!");
615                 this.query = null;
616                 this.value = 0;
617                 break;
618             default:
619                 throw new IllegalArgumentException("Left/Right sub expressions "
620                                                    + "supplied for "
621                                                    + " non-logical operator!");
622         }
623     }
624 
625     /**
626      * Set the recursive value of this atom expression.
627      *
628      * @param type the type of expression
629      * @param mol  the recursive pattern
630      */
setRecursive(Type type, IAtomContainer mol)631     private void setRecursive(Type type, IAtomContainer mol) {
632         switch (type) {
633             case RECURSIVE:
634                 this.type = type;
635                 this.value = 0;
636                 this.left = null;
637                 this.right = null;
638                 this.query = mol;
639                 this.ptrn  = null;
640                 break;
641             default:
642                 throw new IllegalArgumentException();
643         }
644     }
645 
646     /**
647      * Set this expression to another (shallow copy).
648      *
649      * @param expr the other expression
650      */
set(Expr expr)651     public void set(Expr expr) {
652         this.type = expr.type;
653         this.value = expr.value;
654         this.left = expr.left;
655         this.right = expr.right;
656         this.query = expr.query;
657     }
658 
659     /**
660      * Access the type of the atom expression.
661      *
662      * @return the expression type
663      */
type()664     public Type type() {
665         return type;
666     }
667 
668     /**
669      * Access the value of this atom expression being
670      * tested.
671      *
672      * @return the expression value
673      */
value()674     public int value() {
675         return value;
676     }
677 
678     /**
679      * Access the left sub-expression of this atom expression being
680      * tested.
681      *
682      * @return the expression value
683      */
left()684     public Expr left() {
685         return left;
686     }
687 
688     /**
689      * Access the right sub-expression of this atom expression being
690      * tested.
691      *
692      * @return the expression value
693      */
right()694     public Expr right() {
695         return right;
696     }
697 
698     /**
699      * Access the sub-query, only applicable to recursive types.
700      * @return the sub-query
701      * @see Type#RECURSIVE
702      */
subquery()703     public IAtomContainer subquery() {
704         return query;
705     }
706 
707     /* Property Caches */
708     private static LoadingCache<IAtomContainer, int[]> cacheRCounts;
709 
getRingCounts(IAtomContainer mol)710     private static int[] getRingCounts(IAtomContainer mol) {
711         int[] rcounts = new int[mol.getAtomCount()];
712         for (int[] path : Cycles.mcb(mol).paths()) {
713             for (int i = 1; i < path.length; i++) {
714                 rcounts[path[i]]++;
715             }
716         }
717         return rcounts;
718     }
719 
getRingCount(IAtom atom)720     private static int getRingCount(IAtom atom) {
721         final IAtomContainer mol = atom.getContainer();
722         if (cacheRCounts == null) {
723             cacheRCounts = CacheBuilder.newBuilder()
724                                        .maximumWeight(1000) // 4KB
725                                        .weigher(new Weigher<IAtomContainer, int[]>() {
726                                            @Override
727                                            public int weigh(IAtomContainer key,
728                                                             int[] value) {
729                                                return value.length;
730                                            }
731                                        })
732                                        .build(new CacheLoader<IAtomContainer, int[]>() {
733                                            @Override
734                                            public int[] load(IAtomContainer key) throws Exception {
735                                                return getRingCounts(key);
736                                            }
737                                        });
738         }
739         return cacheRCounts.getUnchecked(mol)[atom.getIndex()];
740     }
741 
742     /**
743      * {@inheritDoc}
744      */
745     @Override
equals(Object o)746     public boolean equals(Object o) {
747         if (this == o) return true;
748         if (o == null || getClass() != o.getClass()) return false;
749         Expr atomExpr = (Expr) o;
750         return type == atomExpr.type &&
751                value == atomExpr.value &&
752                Objects.equals(left, atomExpr.left) && Objects.equals(right, atomExpr.right);
753     }
754 
755     /**
756      * {@inheritDoc}
757      */
758     @Override
hashCode()759     public int hashCode() {
760         return Objects.hash(type, value, left, right);
761     }
762 
763     /**
764      * {@inheritDoc}
765      */
766     @Override
toString()767     public String toString() {
768         StringBuilder sb = new StringBuilder();
769         sb.append(type);
770         if (type.isLogical()) {
771             switch (type) {
772                 case NOT:
773                     sb.append('(').append(left).append(')');
774                     break;
775                 case OR:
776                 case AND:
777                     sb.append('(').append(left).append(',').append(right).append(')');
778                     break;
779             }
780         } else if (type.hasValue()) {
781             sb.append('=').append(value);
782         } else if (type == Type.RECURSIVE) {
783             sb.append("(...)");
784         }
785         return sb.toString();
786     }
787 
788     /**
789      * Types of expression, for use in the {@link Expr} tree object.
790      */
791     public enum Type {
792         /** Always returns true. */
793         TRUE,
794         /** Always returns false. */
795         FALSE,
796         /** Return true if {@link IAtom#isAromatic} or {@link IBond#isAromatic} is
797          *  true. */
798         IS_AROMATIC,
799         /** Return true if {@link IAtom#isAromatic} or {@link IBond#isAromatic} is
800          *  flase. */
801         IS_ALIPHATIC,
802         /** Return true if {@link IAtom#isInRing()} or {@link IBond#isInRing} is
803          *  true. */
804         IS_IN_RING,
805         /** Return true if {@link IAtom#isInRing()} or {@link IBond#isInRing} is
806          *  false. */
807         IS_IN_CHAIN,
808         /** Return true if {@link IAtom#getAtomicNumber()} is neither 6 (carbon)
809          *  nor 1 (hydrogen). */
810         IS_HETERO,
811         /** Return true if {@link IAtom#getAtomicNumber()} is neither 6 (carbon)
812          *  nor 1 (hydrogen) and the atom is aliphatic. */
813         IS_ALIPHATIC_HETERO,
814         /** True if the hydrogen count ({@link IAtom#getImplicitHydrogenCount()})
815          *  is &gt; 0. */
816         HAS_IMPLICIT_HYDROGEN,
817         /** True if the atom mass ({@link IAtom#getMassNumber()}) is non-null. */
818         HAS_ISOTOPE,
819         /** True if the atom mass ({@link IAtom#getMassNumber()}) is null
820          *  (unspecified). */
821         HAS_UNSPEC_ISOTOPE,
822         /** True if the atom is adjacent to a hetero atom. */
823         HAS_HETERO_SUBSTITUENT,
824         /** True if the atom is adjacent to an aliphatic hetero atom. */
825         HAS_ALIPHATIC_HETERO_SUBSTITUENT,
826         /** True if the atom is unsaturated.
827          *  TODO: check if CACTVS if double bond to non-carbons are counted. */
828         UNSATURATED,
829         /** True if the bond order ({@link IBond#getOrder()}) is single or double. */
830         SINGLE_OR_DOUBLE,
831         /** True if the bond order ({@link IBond#getOrder()}) is single or the bond
832          *  is marked as aromatic ({@link IBond#isAromatic()}). */
833         SINGLE_OR_AROMATIC,
834         /** True if the bond order ({@link IBond#getOrder()}) is double or the bond
835          *  is marked as aromatic ({@link IBond#isAromatic()}). */
836         DOUBLE_OR_AROMATIC,
837 
838         /* Expressions that take values */
839 
840         /** True if the atomic number ({@link IAtom#getAtomicNumber()} ()})
841          *  of an atom equals the specified 'value'. */
842         ELEMENT,
843         /** True if the atomic number ({@link IAtom#getAtomicNumber()} ()})
844          *  of an atom equals the specified 'value' and {@link IAtom#isAromatic()}
845          *  is false. */
846         ALIPHATIC_ELEMENT,
847         /** True if the atomic number ({@link IAtom#getAtomicNumber()} ()})
848          *  of an atom equals the specified 'value' and {@link IAtom#isAromatic()}
849          *  is true. */
850         AROMATIC_ELEMENT,
851         /** True if the hydrogen count ({@link IAtom#getImplicitHydrogenCount()})
852          *  of an atom equals the specified 'value'. */
853         IMPL_H_COUNT,
854         /** True if the total hydrogen count of an atom equals the specified
855          * 'value'. */
856         TOTAL_H_COUNT,
857         /** True if the degree ({@link IAtom#getBondCount()}) of an atom
858          *  equals the specified 'value'. */
859         DEGREE,
860         /** True if the total degree ({@link IAtom#getBondCount()} +
861          *  {@link IAtom#getImplicitHydrogenCount()}) of an atom equals the
862          *  specified 'value'. */
863         TOTAL_DEGREE,
864         /** True if the degree ({@link IAtom#getBondCount()}) - any hydrogen atoms
865          x*  equals the specified 'value'. */
866         HEAVY_DEGREE,
867         /** True if the valence of an atom equals the specified 'value'. */
868         VALENCE,
869         /** True if the mass ({@link IAtom#getMassNumber()}) of an atom equals the
870          *  specified 'value'. */
871         ISOTOPE,
872         /** True if the formal charge ({@link IAtom#getFormalCharge()}) of an atom
873          *  equals the specified 'value'. */
874         FORMAL_CHARGE,
875         /** True if the ring bond count of an atom equals the specified 'value'. */
876         RING_BOND_COUNT,
877         /** True if the number of rings this atom belongs to matches the specified
878          *  'value'. Here a ring means a member of the Minimum Cycle Basis (MCB)
879          *  (aka Smallest Set of Smallest Rings). Since the MCB is non-unique the
880          *  numbers often don't make sense of bicyclo systems. */
881         RING_COUNT,
882         /** True if the smallest ring this atom belongs to equals the specified
883          *  'value' */
884         RING_SMALLEST,
885         /** True if the this atom belongs to a ring equal to the specified
886          * 'value' */
887         RING_SIZE,
888         /** True if the this atom hybridisation ({@link IAtom#getHybridization()})
889          *  is equal to the specified 'value'. SP1=1, SP2=2, SP3=3, SP3D1=4,
890          *  SP3D2=5, SP3D3=6, SP3D4=7, SP3D5=8. */
891         HYBRIDISATION_NUMBER,
892         /** True if the number hetero atoms (see {@link #IS_HETERO}) this atom is
893          *  next to is equal to the specified value. */
894         HETERO_SUBSTITUENT_COUNT,
895         /** True if the number hetero atoms (see {@link #IS_ALIPHATIC_HETERO}) this atom is
896          *  next to is equal to the specified value. */
897         ALIPHATIC_HETERO_SUBSTITUENT_COUNT,
898         /** True if the periodic table group of this atom is equal to the specified
899          *  value. For example halogens are Group '17'.*/
900         PERIODIC_GROUP,
901         /** True if the number of double bonds equals the specified value.
902          *  TODO: check if CACTVS if double bond to non-carbons are counted. */
903         INSATURATION,
904         /** True if an atom has the specified reaction role. */
905         REACTION_ROLE,
906         /** True if an atom or bond has the specified stereochemistry value, see
907          *  ({@link org.openscience.cdk.interfaces.IStereoElement}) for a list of
908          *  values.*/
909         STEREOCHEMISTRY,
910         /** True if the bond order {@link IBond#getOrder()} equals the specified
911          *  value and the bond is not marked as aromatic
912          *  ({@link IAtom#isAromatic()}). */
913         ALIPHATIC_ORDER,
914         /** True if the bond order {@link IBond#getOrder()} equals the specified
915          *  value and the bond, aromaticity is not check. */
916         ORDER,
917 
918         /* Binary/unary internal nodes */
919 
920         /** True if both the subexpressions are true. */
921         AND,
922         /** True if both either subexpressions are true. */
923         OR,
924         /** True if the subexpression is not true. */
925         NOT,
926 
927         /** Recursive query. */
928         RECURSIVE,
929 
930         /** Undefined expression type. */
931         NONE;
932 
isLogical()933         boolean isLogical() {
934             switch (this) {
935                 case OR:
936                 case NOT:
937                 case AND:
938                     return true;
939                 default:
940                     return false;
941             }
942         }
943 
hasValue()944         boolean hasValue() {
945             switch (this) {
946                 case OR:
947                 case NOT:
948                 case AND:
949                 case TRUE:
950                 case FALSE:
951                 case NONE:
952                 case IS_AROMATIC:
953                 case IS_ALIPHATIC:
954                 case IS_IN_RING:
955                 case IS_IN_CHAIN:
956                 case IS_HETERO:
957                 case IS_ALIPHATIC_HETERO:
958                 case HAS_IMPLICIT_HYDROGEN:
959                 case HAS_ISOTOPE:
960                 case HAS_UNSPEC_ISOTOPE:
961                 case HAS_ALIPHATIC_HETERO_SUBSTITUENT:
962                 case HAS_HETERO_SUBSTITUENT:
963                 case UNSATURATED:
964                 case SINGLE_OR_AROMATIC:
965                 case SINGLE_OR_DOUBLE:
966                 case DOUBLE_OR_AROMATIC:
967                 case RECURSIVE:
968                     return false;
969                 default:
970                     return true;
971             }
972         }
973     }
974 }
975