1 /* 2 * Copyright (c) 2018 NextMove Software 3 * 4 * Contact: cdk-devel@lists.sourceforge.net 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU Lesser General Public License as published by 8 * the Free Software Foundation; either version 2.1 of the License, or (at 9 * your option) any later version. All we ask is that proper credit is given 10 * for our work, which includes - but is not limited to - adding the above 11 * copyright notice to the beginning of your source code files, and to any 12 * copyright notice that you may distribute with programs based on this work. 13 * 14 * This program is distributed in the hope that it will be useful, but WITHOUT 15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17 * License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24 package org.openscience.cdk.isomorphism.matchers; 25 26 import com.google.common.cache.CacheBuilder; 27 import com.google.common.cache.CacheLoader; 28 import com.google.common.cache.LoadingCache; 29 import com.google.common.cache.Weigher; 30 import org.openscience.cdk.CDKConstants; 31 import org.openscience.cdk.ReactionRole; 32 import org.openscience.cdk.config.Elements; 33 import org.openscience.cdk.graph.Cycles; 34 import org.openscience.cdk.interfaces.IAtom; 35 import org.openscience.cdk.interfaces.IAtomContainer; 36 import org.openscience.cdk.interfaces.IAtomType; 37 import org.openscience.cdk.interfaces.IBond; 38 import org.openscience.cdk.isomorphism.DfPattern; 39 import static org.openscience.cdk.isomorphism.matchers.Expr.Type.*; 40 41 import java.util.ArrayDeque; 42 import java.util.Arrays; 43 import java.util.Deque; 44 import java.util.Objects; 45 46 /** 47 * A expression stores a predicate tree for checking properties of atoms 48 * and bonds. 49 * <pre> 50 * Expr expr = new Expr(ELEMENT, 6); 51 * if (expr.matches(atom)) { 52 * // expression matches if atom is a carbon! 53 * } 54 * </pre> 55 * An expression is composed of an {@link Type}, an optional 'value', and 56 * optionally one or more 'sub-expressions'. Each expr can either be a leaf or 57 * an intermediate (logical) node. The simplest expression trees contain a 58 * single leaf node: 59 * <pre> 60 * new Expr(IS_AROMATIC); // matches any aromatic atom 61 * new Expr(ELEMENT, 6); // matches any carbon atom (atomic num=6) 62 * new Expr(VALENCE, 4); // matches an atom with valence 4 63 * new Expr(DEGREE, 1); // matches a terminal atom, e.g. -OH, =O 64 * new Expr(IS_IN_RING); // matches any atom marked as in a ring 65 * new Expr(IS_HETERO); // matches anything other than carbon or nitrogen 66 * new Expr(TRUE); // any atom 67 * </pre> 68 * Logical internal nodes combine one or two sub-expressions with conjunction 69 * (and), disjunction (or), and negation (not). 70 * <br> 71 * Consider the following expression tree, is matches fluorine, chlorine, or 72 * bromine. 73 * <pre> 74 * OR 75 * / \ 76 * F OR 77 * / \ 78 * Cl Br 79 * </pre> 80 * We can construct this tree as follows: 81 * <pre> 82 * Expr expr = new Expr(ELEMENT, 9) // F 83 * .or(new Expr(ELEMENT, 17)) // Cl 84 * .or(new Expr(ELEMENT, 35)) // Br</pre> 85 * A more verbose construction could also be used: 86 * <pre> 87 * Expr leafF = new Expr(ELEMENT, 9); // F 88 * Expr leafCl = new Expr(ELEMENT, 17); // Cl 89 * Expr leafBr = new Expr(ELEMENT, 35); // Br 90 * Expr node4 = new Expr(OR, leaf2, leaf3); 91 * Expr node5 = new Expr(OR, leaf1, node4); 92 * </pre> 93 * 94 * Expressions can be used to match bonds. Note some expressions apply to either 95 * atoms or bonds. 96 * <pre> 97 * new Expr(TRUE); // any bond 98 * new Expr(IS_IN_RING); // any ring bond 99 * new Expr(ALIPHATIC_ORDER, 2); // double bond 100 * </pre> 101 * See the documentation for {@link Type}s for a detail explanation of 102 * each type. 103 */ 104 public final class Expr { 105 106 /** Sentinel value for indicating the stereochemistry configuration 107 * is not yet known. Since stereochemistry depends on the ordering 108 * of neighbors we can't check this until those neighbors are 109 * matched. */ 110 public static final int UNKNOWN_STEREO = -1; 111 112 // the expression type 113 private Type type; 114 // used for primitive leaf types 115 private int value; 116 // used for unary and binary types; not, and, or 117 private Expr left, right; 118 // user for recursive expression types 119 private IAtomContainer query; 120 private DfPattern ptrn; 121 122 /** 123 * Creates an atom expression that will always match ({@link Type#TRUE}). 124 */ Expr()125 public Expr() { 126 this(Type.TRUE); 127 } 128 129 /** 130 * Creates an atom expression for the specified primitive. 131 */ Expr(Type op)132 public Expr(Type op) { 133 setPrimitive(op); 134 } 135 136 /** 137 * Creates an atom expression for the specified primitive and 'value'. 138 */ Expr(Type op, int val)139 public Expr(Type op, int val) { 140 setPrimitive(op, val); 141 } 142 143 /** 144 * Creates a logical atom expression for the specified. 145 */ Expr(Type op, Expr left, Expr right)146 public Expr(Type op, Expr left, Expr right) { 147 setLogical(op, left, right); 148 } 149 150 /** 151 * Creates a recursive atom expression. 152 */ Expr(Type op, IAtomContainer mol)153 public Expr(Type op, IAtomContainer mol) { 154 setRecursive(op, mol); 155 } 156 157 /** 158 * Copy-constructor, creates a shallow copy of the provided expression. 159 * 160 * @param expr the expre 161 */ Expr(final Expr expr)162 public Expr(final Expr expr) { 163 set(expr); 164 } 165 eq(Integer a, int b)166 private static boolean eq(Integer a, int b) { 167 return a != null && a == b; 168 } 169 unbox(Integer x)170 private static int unbox(Integer x) { 171 return x != null ? x : 0; 172 } 173 isInRingSize(IAtom atom, IBond prev, IAtom beg, int size, int req)174 private static boolean isInRingSize(IAtom atom, IBond prev, IAtom beg, 175 int size, int req) { 176 atom.setFlag(CDKConstants.VISITED, true); 177 for (IBond bond : atom.bonds()) { 178 if (bond == prev) 179 continue; 180 IAtom nbr = bond.getOther(atom); 181 if (nbr.equals(beg)) 182 return size == req; 183 else if (size < req && 184 !nbr.getFlag(CDKConstants.VISITED) && 185 isInRingSize(nbr, bond, beg, size + 1, req)) 186 return true; 187 } 188 atom.setFlag(CDKConstants.VISITED, false); 189 return false; 190 } 191 isInRingSize(IAtom atom, int size)192 private static boolean isInRingSize(IAtom atom, int size) { 193 for (IAtom a : atom.getContainer().atoms()) 194 a.setFlag(CDKConstants.VISITED, false); 195 return isInRingSize(atom, null, atom, 1, size); 196 } 197 isInSmallRingSize(IAtom atom, int size)198 private static boolean isInSmallRingSize(IAtom atom, int size) { 199 IAtomContainer mol = atom.getContainer(); 200 int[] distTo = new int[mol.getAtomCount()]; 201 Arrays.fill(distTo, 1 + distTo.length); 202 distTo[atom.getIndex()] = 0; 203 Deque<IAtom> queue = new ArrayDeque<>(); 204 queue.push(atom); 205 int smallest = 1 + distTo.length; 206 while (!queue.isEmpty()) { 207 IAtom a = queue.poll(); 208 int dist = 1 + distTo[a.getIndex()]; 209 for (IBond b : a.bonds()) { 210 IAtom nbr = b.getOther(a); 211 if (dist < distTo[nbr.getIndex()]) { 212 distTo[nbr.getIndex()] = dist; 213 queue.add(nbr); 214 } else if (dist != 2 + distTo[nbr.getIndex()]) { 215 int tmp = dist + distTo[nbr.getIndex()]; 216 if (tmp < smallest) 217 smallest = tmp; 218 } 219 } 220 if (2 * dist > 1 + size) 221 break; 222 } 223 return smallest == size; 224 } 225 226 /** 227 * Internal match methods - does not null check. 228 * 229 * @param type the type 230 * @param atom the atom 231 * @return the expression matches 232 */ matches(Type type, IAtom atom, int stereo)233 private boolean matches(Type type, IAtom atom, int stereo) { 234 switch (type) { 235 // predicates 236 case TRUE: 237 return true; 238 case FALSE: 239 return false; 240 case IS_AROMATIC: 241 return atom.isAromatic(); 242 case IS_ALIPHATIC: 243 return !atom.isAromatic(); 244 case IS_IN_RING: 245 return atom.isInRing(); 246 case IS_IN_CHAIN: 247 return !atom.isInRing(); 248 case IS_HETERO: 249 return !eq(atom.getAtomicNumber(), 6) && 250 !eq(atom.getAtomicNumber(), 1); 251 case HAS_IMPLICIT_HYDROGEN: 252 return atom.getImplicitHydrogenCount() != null && 253 atom.getImplicitHydrogenCount() > 0; 254 case HAS_ISOTOPE: 255 return atom.getMassNumber() != null; 256 case HAS_UNSPEC_ISOTOPE: 257 return atom.getMassNumber() == null; 258 case UNSATURATED: 259 for (IBond bond : atom.bonds()) 260 if (bond.getOrder() == IBond.Order.DOUBLE) 261 return true; 262 return false; 263 // value primitives 264 case ELEMENT: 265 return eq(atom.getAtomicNumber(), value); 266 case ALIPHATIC_ELEMENT: 267 return !atom.isAromatic() && 268 eq(atom.getAtomicNumber(), value); 269 case AROMATIC_ELEMENT: 270 return atom.isAromatic() && 271 eq(atom.getAtomicNumber(), value); 272 case IMPL_H_COUNT: 273 return eq(atom.getImplicitHydrogenCount(), value); 274 case TOTAL_H_COUNT: 275 if (atom.getImplicitHydrogenCount() != null 276 && atom.getImplicitHydrogenCount() > value) 277 return false; 278 return getTotalHCount(atom) == value; 279 case DEGREE: 280 return atom.getBondCount() == value; 281 case HEAVY_DEGREE: // XXX: CDK quirk 282 return atom.getBondCount() - (getTotalHCount(atom) - atom.getImplicitHydrogenCount()) == value; 283 case TOTAL_DEGREE: 284 int x = atom.getBondCount() + unbox(atom.getImplicitHydrogenCount()); 285 return x == value; 286 case VALENCE: 287 int v = unbox(atom.getImplicitHydrogenCount()); 288 if (v > value) 289 return false; 290 for (IBond bond : atom.bonds()) { 291 if (bond.getOrder() != null) 292 v += bond.getOrder().numeric(); 293 } 294 return v == value; 295 case ISOTOPE: 296 return eq(atom.getMassNumber(), value); 297 case FORMAL_CHARGE: 298 return eq(atom.getFormalCharge(), value); 299 case RING_BOND_COUNT: 300 if (!atom.isInRing() || atom.getBondCount() < value) 301 return false; 302 int rbonds = 0; 303 for (IBond bond : atom.bonds()) 304 rbonds += bond.isInRing() ? 1 : 0; 305 return rbonds == value; 306 case RING_COUNT: 307 return atom.isInRing() && getRingCount(atom) == value; 308 case RING_SMALLEST: 309 return atom.isInRing() && isInSmallRingSize(atom, value); 310 case RING_SIZE: 311 return atom.isInRing() && isInRingSize(atom, value); 312 case HETERO_SUBSTITUENT_COUNT: 313 if (atom.getBondCount() < value) 314 return false; 315 int q = 0; 316 for (IBond bond : atom.bonds()) 317 q += matches(Type.IS_HETERO, bond.getOther(atom), stereo) ? 1 : 0; 318 return q == value; 319 case INSATURATION: 320 int db = 0; 321 for (IBond bond : atom.bonds()) 322 if (bond.getOrder() == IBond.Order.DOUBLE) 323 db++; 324 return db == value; 325 case HYBRIDISATION_NUMBER: 326 IAtomType.Hybridization hyb = atom.getHybridization(); 327 if (hyb == null) 328 return false; 329 switch (value) { 330 case 1: 331 return hyb == IAtomType.Hybridization.SP1; 332 case 2: 333 return hyb == IAtomType.Hybridization.SP2; 334 case 3: 335 return hyb == IAtomType.Hybridization.SP3; 336 case 4: 337 return hyb == IAtomType.Hybridization.SP3D1; 338 case 5: 339 return hyb == IAtomType.Hybridization.SP3D2; 340 case 6: 341 return hyb == IAtomType.Hybridization.SP3D3; 342 case 7: 343 return hyb == IAtomType.Hybridization.SP3D4; 344 case 8: 345 return hyb == IAtomType.Hybridization.SP3D5; 346 default: 347 return false; 348 } 349 case PERIODIC_GROUP: 350 return atom.getAtomicNumber() != null && 351 Elements.ofNumber(atom.getAtomicNumber()).group() == value; 352 case STEREOCHEMISTRY: 353 return stereo == UNKNOWN_STEREO || stereo == value; 354 case REACTION_ROLE: 355 ReactionRole role = atom.getProperty(CDKConstants.REACTION_ROLE); 356 return role != null && role.ordinal() == value; 357 case AND: 358 return left.matches(left.type, atom, stereo) && 359 right.matches(right.type, atom, stereo); 360 case OR: 361 return left.matches(left.type, atom, stereo) || 362 right.matches(right.type, atom, stereo); 363 case NOT: 364 return !left.matches(left.type, atom, stereo) || 365 // XXX: ugly but needed, when matching stereo 366 (stereo == UNKNOWN_STEREO && 367 (left.type == STEREOCHEMISTRY || 368 left.type == OR && left.left.type == STEREOCHEMISTRY)); 369 case RECURSIVE: 370 if (ptrn == null) 371 ptrn = DfPattern.findSubstructure(query); 372 return ptrn.matchesRoot(atom); 373 default: 374 throw new IllegalArgumentException("Cannot match AtomExpr, type=" + type); 375 } 376 } 377 matches(IBond bond, int stereo)378 public boolean matches(IBond bond, int stereo) { 379 switch (type) { 380 case TRUE: 381 return true; 382 case FALSE: 383 return false; 384 case ALIPHATIC_ORDER: 385 return !bond.isAromatic() && 386 bond.getOrder() != null && 387 bond.getOrder().numeric() == value; 388 case ORDER: 389 return bond.getOrder() != null && 390 bond.getOrder().numeric() == value; 391 case IS_AROMATIC: 392 return bond.isAromatic(); 393 case IS_ALIPHATIC: 394 return !bond.isAromatic(); 395 case IS_IN_RING: 396 return bond.isInRing(); 397 case IS_IN_CHAIN: 398 return !bond.isInRing(); 399 case SINGLE_OR_AROMATIC: 400 return bond.isAromatic() || 401 IBond.Order.SINGLE.equals(bond.getOrder()); 402 case DOUBLE_OR_AROMATIC: 403 return bond.isAromatic() || 404 IBond.Order.DOUBLE.equals(bond.getOrder()); 405 case SINGLE_OR_DOUBLE: 406 return IBond.Order.SINGLE.equals(bond.getOrder()) || 407 IBond.Order.DOUBLE.equals(bond.getOrder()); 408 case STEREOCHEMISTRY: 409 return stereo == UNKNOWN_STEREO || value == stereo; 410 case AND: 411 return left.matches(bond, stereo) && right.matches(bond, stereo); 412 case OR: 413 return left.matches(bond, stereo) || right.matches(bond, stereo); 414 case NOT: 415 return !left.matches(bond, stereo) || 416 // XXX: ugly but needed, when matching stereo 417 (stereo == UNKNOWN_STEREO && 418 (left.type == STEREOCHEMISTRY || 419 left.type == OR && left.left.type == STEREOCHEMISTRY)); 420 default: 421 throw new IllegalArgumentException("Cannot match BondExpr, type=" + type); 422 } 423 } 424 matches(IBond bond)425 public boolean matches(IBond bond) { 426 return matches(bond, UNKNOWN_STEREO); 427 } 428 getTotalHCount(IAtom atom)429 private static int getTotalHCount(IAtom atom) { 430 int h = unbox(atom.getImplicitHydrogenCount()); 431 for (IBond bond : atom.bonds()) 432 if (eq(bond.getOther(atom).getAtomicNumber(), 1)) 433 h++; 434 return h; 435 } 436 437 /** 438 * Test whether this expression matches an atom instance. 439 * 440 * @param atom an atom (nullable) 441 * @return the atom matches 442 */ matches(final IAtom atom)443 public boolean matches(final IAtom atom) { 444 return atom != null && matches(type, atom, UNKNOWN_STEREO); 445 } 446 matches(final IAtom atom, final int stereo)447 public boolean matches(final IAtom atom, final int stereo) { 448 return atom != null && matches(type, atom, stereo); 449 } 450 451 /** 452 * Utility, combine this expression with another, using conjunction. 453 * The expression will only match if both conditions are met. 454 * 455 * @param expr the other expression 456 * @return self for chaining 457 */ and(Expr expr)458 public Expr and(Expr expr) { 459 if (type == Type.TRUE) { 460 set(expr); 461 } else if (expr.type != Type.TRUE) { 462 if (type.isLogical() && !expr.type.isLogical()) { 463 if (type == AND) 464 right.and(expr); 465 else if (type != NOT) 466 setLogical(Type.AND, expr, new Expr(this)); 467 else 468 setLogical(Type.AND, expr, new Expr(this)); 469 } else { 470 setLogical(Type.AND, new Expr(this), expr); 471 } 472 } 473 return this; 474 } 475 476 /** 477 * Utility, combine this expression with another, using disjunction. 478 * The expression will match if either conditions is met. 479 * @param expr the other expression 480 * @return self for chaining 481 */ or(Expr expr)482 public Expr or(Expr expr) { 483 if (type == Type.TRUE || 484 type == Type.FALSE || 485 type == NONE) { 486 set(expr); 487 } else if (expr.type != Type.TRUE && 488 expr.type != Type.FALSE && 489 expr.type != Type.NONE) { 490 if (type.isLogical() && !expr.type.isLogical()) { 491 if (type == OR) 492 right.or(expr); 493 else if (type != NOT) 494 setLogical(Type.OR, expr, new Expr(this)); 495 else 496 setLogical(Type.OR, new Expr(this), expr); 497 } 498 else 499 setLogical(Type.OR, new Expr(this), expr); 500 } 501 return this; 502 } 503 504 /** 505 * Negate the expression, the expression will not return true only if 506 * the condition is not met. Some expressions have explicit types 507 * that are more efficient to use, for example: 508 * <code>IS_IN_RING => NOT(IS_IN_RING) => IS_IN_CHAIN</code>. This 509 * negation method will use the more efficient type where possible. 510 * <pre>{@code 511 * Expr e = new Expr(ELEMENT, 8); // SMARTS: [#8] 512 * e.negate(); // SMARTS: [!#8] 513 * }</pre> 514 * @return self for chaining 515 */ negate()516 public Expr negate() { 517 switch (type) { 518 case TRUE: 519 type = Type.FALSE; 520 break; 521 case FALSE: 522 type = Type.TRUE; 523 break; 524 case HAS_ISOTOPE: 525 type = Type.HAS_UNSPEC_ISOTOPE; 526 break; 527 case HAS_UNSPEC_ISOTOPE: 528 type = Type.HAS_ISOTOPE; 529 break; 530 case IS_AROMATIC: 531 type = Type.IS_ALIPHATIC; 532 break; 533 case IS_ALIPHATIC: 534 type = Type.IS_AROMATIC; 535 break; 536 case IS_IN_RING: 537 type = Type.IS_IN_CHAIN; 538 break; 539 case IS_IN_CHAIN: 540 type = Type.IS_IN_RING; 541 break; 542 case NOT: 543 set(this.left); 544 break; 545 default: 546 setLogical(Type.NOT, new Expr(this), null); 547 break; 548 } 549 return this; 550 } 551 552 /** 553 * Set the primitive value of this atom expression. 554 * 555 * @param type the type of expression 556 * @param val the value to check 557 */ setPrimitive(Type type, int val)558 public void setPrimitive(Type type, int val) { 559 if (type.hasValue()) { 560 this.type = type; 561 this.value = val; 562 this.left = null; 563 this.right = null; 564 this.query = null; 565 } else { 566 throw new IllegalArgumentException("Value provided for non-value " 567 + "expression type!"); 568 } 569 } 570 571 /** 572 * Set the primitive value of this atom expression. 573 * 574 * @param type the type of expression 575 */ setPrimitive(Type type)576 public void setPrimitive(Type type) { 577 if (!type.hasValue() && !type.isLogical()) { 578 this.type = type; 579 this.value = -1; 580 this.left = null; 581 this.right = null; 582 this.query = null; 583 } else { 584 throw new IllegalArgumentException("Expression type requires a value!"); 585 } 586 } 587 588 /** 589 * Set the logical value of this atom expression. 590 * 591 * @param type the type of expression 592 * @param left the left sub-expression 593 * @param right the right sub-expression 594 */ setLogical(Type type, Expr left, Expr right)595 public void setLogical(Type type, Expr left, Expr right) { 596 switch (type) { 597 case AND: 598 case OR: 599 this.type = type; 600 this.value = 0; 601 this.left = left; 602 this.right = right; 603 this.query = null; 604 break; 605 case NOT: 606 this.type = type; 607 if (left != null && right == null) 608 this.left = left; 609 else if (left == null && right != null) 610 this.left = right; 611 else if (left != null) 612 throw new IllegalArgumentException("Only one sub-expression" 613 + " should be provided" 614 + " for NOT expressions!"); 615 this.query = null; 616 this.value = 0; 617 break; 618 default: 619 throw new IllegalArgumentException("Left/Right sub expressions " 620 + "supplied for " 621 + " non-logical operator!"); 622 } 623 } 624 625 /** 626 * Set the recursive value of this atom expression. 627 * 628 * @param type the type of expression 629 * @param mol the recursive pattern 630 */ setRecursive(Type type, IAtomContainer mol)631 private void setRecursive(Type type, IAtomContainer mol) { 632 switch (type) { 633 case RECURSIVE: 634 this.type = type; 635 this.value = 0; 636 this.left = null; 637 this.right = null; 638 this.query = mol; 639 this.ptrn = null; 640 break; 641 default: 642 throw new IllegalArgumentException(); 643 } 644 } 645 646 /** 647 * Set this expression to another (shallow copy). 648 * 649 * @param expr the other expression 650 */ set(Expr expr)651 public void set(Expr expr) { 652 this.type = expr.type; 653 this.value = expr.value; 654 this.left = expr.left; 655 this.right = expr.right; 656 this.query = expr.query; 657 } 658 659 /** 660 * Access the type of the atom expression. 661 * 662 * @return the expression type 663 */ type()664 public Type type() { 665 return type; 666 } 667 668 /** 669 * Access the value of this atom expression being 670 * tested. 671 * 672 * @return the expression value 673 */ value()674 public int value() { 675 return value; 676 } 677 678 /** 679 * Access the left sub-expression of this atom expression being 680 * tested. 681 * 682 * @return the expression value 683 */ left()684 public Expr left() { 685 return left; 686 } 687 688 /** 689 * Access the right sub-expression of this atom expression being 690 * tested. 691 * 692 * @return the expression value 693 */ right()694 public Expr right() { 695 return right; 696 } 697 698 /** 699 * Access the sub-query, only applicable to recursive types. 700 * @return the sub-query 701 * @see Type#RECURSIVE 702 */ subquery()703 public IAtomContainer subquery() { 704 return query; 705 } 706 707 /* Property Caches */ 708 private static LoadingCache<IAtomContainer, int[]> cacheRCounts; 709 getRingCounts(IAtomContainer mol)710 private static int[] getRingCounts(IAtomContainer mol) { 711 int[] rcounts = new int[mol.getAtomCount()]; 712 for (int[] path : Cycles.mcb(mol).paths()) { 713 for (int i = 1; i < path.length; i++) { 714 rcounts[path[i]]++; 715 } 716 } 717 return rcounts; 718 } 719 getRingCount(IAtom atom)720 private static int getRingCount(IAtom atom) { 721 final IAtomContainer mol = atom.getContainer(); 722 if (cacheRCounts == null) { 723 cacheRCounts = CacheBuilder.newBuilder() 724 .maximumWeight(1000) // 4KB 725 .weigher(new Weigher<IAtomContainer, int[]>() { 726 @Override 727 public int weigh(IAtomContainer key, 728 int[] value) { 729 return value.length; 730 } 731 }) 732 .build(new CacheLoader<IAtomContainer, int[]>() { 733 @Override 734 public int[] load(IAtomContainer key) throws Exception { 735 return getRingCounts(key); 736 } 737 }); 738 } 739 return cacheRCounts.getUnchecked(mol)[atom.getIndex()]; 740 } 741 742 /** 743 * {@inheritDoc} 744 */ 745 @Override equals(Object o)746 public boolean equals(Object o) { 747 if (this == o) return true; 748 if (o == null || getClass() != o.getClass()) return false; 749 Expr atomExpr = (Expr) o; 750 return type == atomExpr.type && 751 value == atomExpr.value && 752 Objects.equals(left, atomExpr.left) && Objects.equals(right, atomExpr.right); 753 } 754 755 /** 756 * {@inheritDoc} 757 */ 758 @Override hashCode()759 public int hashCode() { 760 return Objects.hash(type, value, left, right); 761 } 762 763 /** 764 * {@inheritDoc} 765 */ 766 @Override toString()767 public String toString() { 768 StringBuilder sb = new StringBuilder(); 769 sb.append(type); 770 if (type.isLogical()) { 771 switch (type) { 772 case NOT: 773 sb.append('(').append(left).append(')'); 774 break; 775 case OR: 776 case AND: 777 sb.append('(').append(left).append(',').append(right).append(')'); 778 break; 779 } 780 } else if (type.hasValue()) { 781 sb.append('=').append(value); 782 } else if (type == Type.RECURSIVE) { 783 sb.append("(...)"); 784 } 785 return sb.toString(); 786 } 787 788 /** 789 * Types of expression, for use in the {@link Expr} tree object. 790 */ 791 public enum Type { 792 /** Always returns true. */ 793 TRUE, 794 /** Always returns false. */ 795 FALSE, 796 /** Return true if {@link IAtom#isAromatic} or {@link IBond#isAromatic} is 797 * true. */ 798 IS_AROMATIC, 799 /** Return true if {@link IAtom#isAromatic} or {@link IBond#isAromatic} is 800 * flase. */ 801 IS_ALIPHATIC, 802 /** Return true if {@link IAtom#isInRing()} or {@link IBond#isInRing} is 803 * true. */ 804 IS_IN_RING, 805 /** Return true if {@link IAtom#isInRing()} or {@link IBond#isInRing} is 806 * false. */ 807 IS_IN_CHAIN, 808 /** Return true if {@link IAtom#getAtomicNumber()} is neither 6 (carbon) 809 * nor 1 (hydrogen). */ 810 IS_HETERO, 811 /** Return true if {@link IAtom#getAtomicNumber()} is neither 6 (carbon) 812 * nor 1 (hydrogen) and the atom is aliphatic. */ 813 IS_ALIPHATIC_HETERO, 814 /** True if the hydrogen count ({@link IAtom#getImplicitHydrogenCount()}) 815 * is > 0. */ 816 HAS_IMPLICIT_HYDROGEN, 817 /** True if the atom mass ({@link IAtom#getMassNumber()}) is non-null. */ 818 HAS_ISOTOPE, 819 /** True if the atom mass ({@link IAtom#getMassNumber()}) is null 820 * (unspecified). */ 821 HAS_UNSPEC_ISOTOPE, 822 /** True if the atom is adjacent to a hetero atom. */ 823 HAS_HETERO_SUBSTITUENT, 824 /** True if the atom is adjacent to an aliphatic hetero atom. */ 825 HAS_ALIPHATIC_HETERO_SUBSTITUENT, 826 /** True if the atom is unsaturated. 827 * TODO: check if CACTVS if double bond to non-carbons are counted. */ 828 UNSATURATED, 829 /** True if the bond order ({@link IBond#getOrder()}) is single or double. */ 830 SINGLE_OR_DOUBLE, 831 /** True if the bond order ({@link IBond#getOrder()}) is single or the bond 832 * is marked as aromatic ({@link IBond#isAromatic()}). */ 833 SINGLE_OR_AROMATIC, 834 /** True if the bond order ({@link IBond#getOrder()}) is double or the bond 835 * is marked as aromatic ({@link IBond#isAromatic()}). */ 836 DOUBLE_OR_AROMATIC, 837 838 /* Expressions that take values */ 839 840 /** True if the atomic number ({@link IAtom#getAtomicNumber()} ()}) 841 * of an atom equals the specified 'value'. */ 842 ELEMENT, 843 /** True if the atomic number ({@link IAtom#getAtomicNumber()} ()}) 844 * of an atom equals the specified 'value' and {@link IAtom#isAromatic()} 845 * is false. */ 846 ALIPHATIC_ELEMENT, 847 /** True if the atomic number ({@link IAtom#getAtomicNumber()} ()}) 848 * of an atom equals the specified 'value' and {@link IAtom#isAromatic()} 849 * is true. */ 850 AROMATIC_ELEMENT, 851 /** True if the hydrogen count ({@link IAtom#getImplicitHydrogenCount()}) 852 * of an atom equals the specified 'value'. */ 853 IMPL_H_COUNT, 854 /** True if the total hydrogen count of an atom equals the specified 855 * 'value'. */ 856 TOTAL_H_COUNT, 857 /** True if the degree ({@link IAtom#getBondCount()}) of an atom 858 * equals the specified 'value'. */ 859 DEGREE, 860 /** True if the total degree ({@link IAtom#getBondCount()} + 861 * {@link IAtom#getImplicitHydrogenCount()}) of an atom equals the 862 * specified 'value'. */ 863 TOTAL_DEGREE, 864 /** True if the degree ({@link IAtom#getBondCount()}) - any hydrogen atoms 865 x* equals the specified 'value'. */ 866 HEAVY_DEGREE, 867 /** True if the valence of an atom equals the specified 'value'. */ 868 VALENCE, 869 /** True if the mass ({@link IAtom#getMassNumber()}) of an atom equals the 870 * specified 'value'. */ 871 ISOTOPE, 872 /** True if the formal charge ({@link IAtom#getFormalCharge()}) of an atom 873 * equals the specified 'value'. */ 874 FORMAL_CHARGE, 875 /** True if the ring bond count of an atom equals the specified 'value'. */ 876 RING_BOND_COUNT, 877 /** True if the number of rings this atom belongs to matches the specified 878 * 'value'. Here a ring means a member of the Minimum Cycle Basis (MCB) 879 * (aka Smallest Set of Smallest Rings). Since the MCB is non-unique the 880 * numbers often don't make sense of bicyclo systems. */ 881 RING_COUNT, 882 /** True if the smallest ring this atom belongs to equals the specified 883 * 'value' */ 884 RING_SMALLEST, 885 /** True if the this atom belongs to a ring equal to the specified 886 * 'value' */ 887 RING_SIZE, 888 /** True if the this atom hybridisation ({@link IAtom#getHybridization()}) 889 * is equal to the specified 'value'. SP1=1, SP2=2, SP3=3, SP3D1=4, 890 * SP3D2=5, SP3D3=6, SP3D4=7, SP3D5=8. */ 891 HYBRIDISATION_NUMBER, 892 /** True if the number hetero atoms (see {@link #IS_HETERO}) this atom is 893 * next to is equal to the specified value. */ 894 HETERO_SUBSTITUENT_COUNT, 895 /** True if the number hetero atoms (see {@link #IS_ALIPHATIC_HETERO}) this atom is 896 * next to is equal to the specified value. */ 897 ALIPHATIC_HETERO_SUBSTITUENT_COUNT, 898 /** True if the periodic table group of this atom is equal to the specified 899 * value. For example halogens are Group '17'.*/ 900 PERIODIC_GROUP, 901 /** True if the number of double bonds equals the specified value. 902 * TODO: check if CACTVS if double bond to non-carbons are counted. */ 903 INSATURATION, 904 /** True if an atom has the specified reaction role. */ 905 REACTION_ROLE, 906 /** True if an atom or bond has the specified stereochemistry value, see 907 * ({@link org.openscience.cdk.interfaces.IStereoElement}) for a list of 908 * values.*/ 909 STEREOCHEMISTRY, 910 /** True if the bond order {@link IBond#getOrder()} equals the specified 911 * value and the bond is not marked as aromatic 912 * ({@link IAtom#isAromatic()}). */ 913 ALIPHATIC_ORDER, 914 /** True if the bond order {@link IBond#getOrder()} equals the specified 915 * value and the bond, aromaticity is not check. */ 916 ORDER, 917 918 /* Binary/unary internal nodes */ 919 920 /** True if both the subexpressions are true. */ 921 AND, 922 /** True if both either subexpressions are true. */ 923 OR, 924 /** True if the subexpression is not true. */ 925 NOT, 926 927 /** Recursive query. */ 928 RECURSIVE, 929 930 /** Undefined expression type. */ 931 NONE; 932 isLogical()933 boolean isLogical() { 934 switch (this) { 935 case OR: 936 case NOT: 937 case AND: 938 return true; 939 default: 940 return false; 941 } 942 } 943 hasValue()944 boolean hasValue() { 945 switch (this) { 946 case OR: 947 case NOT: 948 case AND: 949 case TRUE: 950 case FALSE: 951 case NONE: 952 case IS_AROMATIC: 953 case IS_ALIPHATIC: 954 case IS_IN_RING: 955 case IS_IN_CHAIN: 956 case IS_HETERO: 957 case IS_ALIPHATIC_HETERO: 958 case HAS_IMPLICIT_HYDROGEN: 959 case HAS_ISOTOPE: 960 case HAS_UNSPEC_ISOTOPE: 961 case HAS_ALIPHATIC_HETERO_SUBSTITUENT: 962 case HAS_HETERO_SUBSTITUENT: 963 case UNSATURATED: 964 case SINGLE_OR_AROMATIC: 965 case SINGLE_OR_DOUBLE: 966 case DOUBLE_OR_AROMATIC: 967 case RECURSIVE: 968 return false; 969 default: 970 return true; 971 } 972 } 973 } 974 } 975