1 /* 2 * Copyright (c) 2013 European Bioinformatics Institute (EMBL-EBI) 3 * John May <jwmay@users.sf.net> 4 * 5 * Contact: cdk-devel@lists.sourceforge.net 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU Lesser General Public License as published by 9 * the Free Software Foundation; either version 2.1 of the License, or (at 10 * your option) any later version. All we ask is that proper credit is given 11 * for our work, which includes - but is not limited to - adding the above 12 * copyright notice to the beginning of your source code files, and to any 13 * copyright notice that you may distribute with programs based on this work. 14 * 15 * This program is distributed in the hope that it will be useful, but WITHOUT 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 18 * License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U 23 */ 24 25 package org.openscience.cdk.graph; 26 27 import org.openscience.cdk.exception.Intractable; 28 import org.openscience.cdk.interfaces.IAtom; 29 import org.openscience.cdk.interfaces.IAtomContainer; 30 import org.openscience.cdk.interfaces.IBond; 31 import org.openscience.cdk.interfaces.IChemObjectBuilder; 32 import org.openscience.cdk.interfaces.IRing; 33 import org.openscience.cdk.interfaces.IRingSet; 34 import org.openscience.cdk.ringsearch.RingSearch; 35 36 import java.util.ArrayList; 37 import java.util.Arrays; 38 import java.util.BitSet; 39 import java.util.List; 40 41 import static org.openscience.cdk.graph.GraphUtil.EdgeToBondMap; 42 43 /** 44 * A utility class for storing and computing the cycles of a chemical graph. 45 * Utilities are also provided for converting the cycles to {@link IRing}s. A 46 * brief description of each cycle set is given below - for a more comprehensive 47 * review please see - {@cdk.cite Berger04}. 48 * 49 * <ul> <li>{@link #all()} - all simple cycles in the graph, the number of 50 * cycles generated may be very large and may not be feasible for some 51 * molecules, such as, fullerene.</li> <li>{@link #mcb()} (aka. SSSR) - minimum 52 * cycle basis (MCB) of a graph, these cycles are linearly independent and can 53 * be used to generate all of cycles in the graph. It is important to note the 54 * MCB is not unique and a that there may be multiple equally valid MCBs. The 55 * smallest set of smallest rings (SSSR) is often used to refer to the MCB but 56 * originally SSSR was defined as a strictly fundamental cycle basis {@cdk.cite 57 * Berger04}. Not every graph has a strictly fundamental cycle basis the 58 * definition has come to mean the MCB. Due to the non-uniqueness of the 59 * MCB/SSSR its use is discouraged.</li> <li>{@link #relevant()} - relevant 60 * cycles of a graph, the smallest set of uniquely defined short cycles. If a 61 * graph has a single MCB then the relevant cycles and MCB are the same. If the 62 * graph has multiple MCB then the relevant cycles is the union of all MCBs. The 63 * number of relevant cycles may be exponential but it is possible to determine 64 * how many relevant cycles there are in polynomial time without generating 65 * them. For chemical graphs the number of relevant cycles is usually within 66 * manageable bounds. </li> <li>{@link #essential()} - essential cycles of a 67 * graph. Similar to the relevant cycles the set is unique for a graph. If a 68 * graph has a single MCB then the essential cycles and MCB are the same. If the 69 * graph has multiple MCB then the essential cycles is the intersect of all 70 * MCBs. That is the cycles which appear in every MCB. This means that is is 71 * possible to have no essential cycles in a molecule which clearly has cycles 72 * (e.g. bridged system like bicyclo[2.2.2]octane). </li> <li> {@link 73 * #tripletShort()} - the triple short cycles are the shortest cycle through 74 * each triple of vertices. This allows one to generate the envelope rings of 75 * some molecules (e.g. naphthalene) without generating all cycles. The cycles 76 * are primarily useful for the CACTVS Substructure Keys (PubChem fingerprint). 77 * </li> <li> {@link #vertexShort()} - the shortest cycles through each vertex. 78 * Unlike the MCB, linear independence is not checked and it may not be possible 79 * to generate all other cycles from this set. In practice the vertex/edge short 80 * cycles are similar to MCB. </li> <li> {@link #edgeShort()} - the shortest 81 * cycles through each edge. Unlike the MCB, linear independence is not checked 82 * and it may not be possible to generate all other cycles from this set. In 83 * practice the vertex/edge short cycles are similar to MCB. </li> </ul> 84 * 85 * @author John May 86 * @cdk.module core 87 * @cdk.githash 88 */ 89 public final class Cycles { 90 91 /** Vertex paths for each cycle. */ 92 private final int[][] paths; 93 94 /** The input container - allows us to create 'Ring' objects. */ 95 private final IAtomContainer container; 96 97 /** Mapping for quick lookup of bond mapping. */ 98 private final EdgeToBondMap bondMap; 99 100 /** 101 * Internal constructor - may change in future but currently just takes the 102 * cycle paths and the container from which they came. 103 * 104 * @param paths the cycle paths (closed vertex walks) 105 * @param container the input container 106 */ Cycles(int[][] paths, IAtomContainer container, EdgeToBondMap bondMap)107 private Cycles(int[][] paths, IAtomContainer container, EdgeToBondMap bondMap) { 108 this.paths = paths; 109 this.container = container; 110 this.bondMap = bondMap; 111 } 112 113 /** 114 * How many cycles are stored. 115 * 116 * @return number of cycles 117 */ numberOfCycles()118 public int numberOfCycles() { 119 return paths.length; 120 } 121 paths()122 public int[][] paths() { 123 int[][] cpy = new int[paths.length][]; 124 for (int i = 0; i < paths.length; i++) 125 cpy[i] = paths[i].clone(); 126 return cpy; 127 } 128 129 /** 130 * Convert the cycles to a {@link IRingSet} containing the {@link IAtom}s 131 * and {@link IBond}s of the input molecule. 132 * 133 * @return ringset for the cycles 134 */ toRingSet()135 public IRingSet toRingSet() { 136 return toRingSet(container, paths, bondMap); 137 } 138 139 /** 140 * Create a cycle finder which will compute all simple cycles in a molecule. 141 * The threshold values can not be tuned and is set at a value which will 142 * complete in reasonable time for most molecules. To change the threshold 143 * values please use the stand-alone {@link AllCycles} or {@link 144 * org.openscience.cdk.ringsearch.AllRingsFinder}. All cycles is every 145 * possible simple cycle (i.e. non-repeating vertices) in the chemical 146 * graph. As an example - all simple cycles of anthracene includes, 3 cycles 147 * of length 6, 2 of length 10 and 1 of length 14. 148 * 149 * <blockquote> 150 * <pre> 151 * CycleFinder cf = Cycles.all(); 152 * for (IAtomContainer m : ms) { 153 * try { 154 * Cycles cycles = cf.find(m); 155 * IRingSet rings = cycles.toRingSet(); 156 * } catch (Intractable e) { 157 * // handle error - note it is common that finding all simple 158 * // cycles in chemical graphs is intractable 159 * } 160 * } 161 * </pre> 162 * </blockquote> 163 * 164 * @return finder for all simple cycles 165 * @see #all(org.openscience.cdk.interfaces.IAtomContainer) 166 * @see AllCycles 167 * @see org.openscience.cdk.ringsearch.AllRingsFinder 168 */ all()169 public static CycleFinder all() { 170 return CycleComputation.ALL; 171 } 172 173 /** 174 * All cycles of smaller than or equal to the specified length. If a length 175 * is also provided to {@link CycleFinder#find(IAtomContainer, int)} the 176 * minimum of the two limits is used. 177 * 178 * @param length maximum size or cycle to find 179 * @return cycle finder 180 */ all(int length)181 public static CycleFinder all(int length) { 182 return new AllUpToLength(length); 183 } 184 185 /** 186 * Create a cycle finder which will compute the minimum cycle basis (MCB) of 187 * a molecule. 188 * 189 * <blockquote> 190 * <pre> 191 * CycleFinder cf = Cycles.mcb(); 192 * for (IAtomContainer m : ms) { 193 * try { 194 * Cycles cycles = cf.find(m); 195 * IRingSet rings = cycles.toRingSet(); 196 * } catch (Intractable e) { 197 * // ignore error - MCB should never be intractable 198 * } 199 * } 200 * </pre> 201 * </blockquote> 202 * 203 * @return finder for all simple cycles 204 * @see #mcb(org.openscience.cdk.interfaces.IAtomContainer) 205 * @see MinimumCycleBasis 206 */ mcb()207 public static CycleFinder mcb() { 208 return CycleComputation.MCB; 209 } 210 211 /** 212 * Create a cycle finder which will compute the relevant cycle basis (RC) of 213 * a molecule. 214 * 215 * <blockquote> 216 * <pre> 217 * CycleFinder cf = Cycles.relevant(); 218 * for (IAtomContainer m : ms) { 219 * try { 220 * Cycles cycles = cf.find(m); 221 * IRingSet rings = cycles.toRingSet(); 222 * } catch (Intractable e) { 223 * // ignore error - there may be an exponential number of cycles 224 * // but this is not currently checked 225 * } 226 * } 227 * </pre> 228 * </blockquote> 229 * 230 * @return finder for relevant cycles 231 * @see #relevant(org.openscience.cdk.interfaces.IAtomContainer) 232 * @see RelevantCycles 233 */ relevant()234 public static CycleFinder relevant() { 235 return CycleComputation.RELEVANT; 236 } 237 238 /** 239 * Create a cycle finder which will compute the essential cycles of a 240 * molecule. 241 * 242 * <blockquote> 243 * <pre> 244 * CycleFinder cf = Cycles.essential(); 245 * for (IAtomContainer m : ms) { 246 * try { 247 * Cycles cycles = cf.find(m); 248 * IRingSet rings = cycles.toRingSet(); 249 * } catch (Intractable e) { 250 * // ignore error - essential cycles do not check tractability 251 * } 252 * } 253 * </pre> 254 * </blockquote> 255 * 256 * @return finder for essential cycles 257 * @see #relevant(org.openscience.cdk.interfaces.IAtomContainer) 258 * @see RelevantCycles 259 */ essential()260 public static CycleFinder essential() { 261 return CycleComputation.ESSENTIAL; 262 } 263 264 /** 265 * Create a cycle finder which will compute the triplet short cycles of a 266 * molecule. These cycles are the shortest through each triplet of vertices 267 * are utilised in the generation of CACTVS Substructure Keys (PubChem 268 * Fingerprint). Currently the triplet cycles are non-canonical (which in 269 * this algorithms case means unique). For finer tuning of options please 270 * use the {@link TripletShortCycles}. 271 * 272 * <blockquote> 273 * <pre> 274 * CycleFinder cf = Cycles.tripletShort(); 275 * for (IAtomContainer m : ms) { 276 * try { 277 * Cycles cycles = cf.find(m); 278 * IRingSet rings = cycles.toRingSet(); 279 * } catch (Intractable e) { 280 * // ignore error - triple short cycles do not check tractability 281 * } 282 * } 283 * </pre> 284 * </blockquote> 285 * 286 * @return finder for triplet short cycles 287 * @see #tripletShort(org.openscience.cdk.interfaces.IAtomContainer) 288 * @see TripletShortCycles 289 */ tripletShort()290 public static CycleFinder tripletShort() { 291 return CycleComputation.TRIPLET_SHORT; 292 } 293 294 /** 295 * Create a cycle finder which will compute the shortest cycles of each 296 * vertex in a molecule. Unlike the SSSR/MCB computation linear independence 297 * is not required and provides some performance gain. In practise typical 298 * chemical graphs are small and the linear independence check is relatively 299 * fast. 300 * 301 * <blockquote> 302 * <pre> 303 * CycleFinder cf = Cycles.vertexShort(); 304 * for (IAtomContainer m : ms) { 305 * try { 306 * Cycles cycles = cf.find(m); 307 * IRingSet rings = cycles.toRingSet(); 308 * } catch (Intractable e) { 309 * // ignore error - vertex short cycles do not check tractability 310 * } 311 * } 312 * </pre> 313 * </blockquote> 314 * 315 * @return finder for vertex short cycles 316 * @see #vertexShort(org.openscience.cdk.interfaces.IAtomContainer) 317 */ vertexShort()318 public static CycleFinder vertexShort() { 319 return CycleComputation.VERTEX_SHORT; 320 } 321 322 /** 323 * Create a cycle finder which will compute the shortest cycles of each 324 * vertex in a molecule. Unlike the SSSR/MCB computation linear independence 325 * is not required and provides some performance gain. In practise typical 326 * chemical graphs are small and the linear independence check is relatively 327 * fast. 328 * 329 * <blockquote> 330 * <pre> 331 * CycleFinder cf = Cycles.edgeShort(); 332 * for (IAtomContainer m : ms) { 333 * try { 334 * Cycles cycles = cf.find(m); 335 * IRingSet rings = cycles.toRingSet(); 336 * } catch (Intractable e) { 337 * // ignore error - edge short cycles do not check tractability 338 * } 339 * } 340 * </pre> 341 * </blockquote> 342 * 343 * @return finder for edge short cycles 344 * @see #edgeShort(org.openscience.cdk.interfaces.IAtomContainer) 345 */ edgeShort()346 public static CycleFinder edgeShort() { 347 return CycleComputation.EDGE_SHORT; 348 } 349 350 /** 351 * Create a cycle finder which will compute a set of cycles traditionally 352 * used by the CDK to test for aromaticity. This set of cycles is the 353 * MCB/SSSR and {@link #all} cycles for fused systems with 3 or less rings. 354 * This allows on to test aromaticity of envelope rings in compounds such as 355 * azulene without generating an huge number of cycles for large fused 356 * systems (e.g. fullerenes). The use case was that computation of all 357 * cycles previously took a long time and ring systems with more than 2 358 * rings were too difficult. However it is now more efficient to simply 359 * check all cycles/rings without using the MCB/SSSR. This computation will 360 * fail for complex fused systems but the failure is fast and one can easily 361 * 'fall back' to a smaller set of cycles after catching the exception. 362 * 363 * <blockquote> 364 * <pre> 365 * CycleFinder cf = Cycles.cdkAromaticSet(); 366 * for (IAtomContainer m : ms) { 367 * try { 368 * Cycles cycles = cf.find(m); 369 * IRingSet rings = cycles.toRingSet(); 370 * } catch (Intractable e) { 371 * // ignore error - edge short cycles do not check tractability 372 * } 373 * } 374 * </pre> 375 * </blockquote> 376 * 377 * @return finder for cdk aromatic cycles 378 * @see #edgeShort(org.openscience.cdk.interfaces.IAtomContainer) 379 */ cdkAromaticSet()380 public static CycleFinder cdkAromaticSet() { 381 return CycleComputation.CDK_AROMATIC; 382 } 383 384 /** 385 * Find all cycles in a fused system or if there were too many cycles 386 * fallback and use the shortest cycles through each vertex. Typically the 387 * types of molecules which the vertex short cycles are provided for are 388 * fullerenes. This cycle finder is well suited to aromaticity. 389 * 390 * <blockquote> 391 * <pre> 392 * CycleFinder cf = Cycles.allOrVertexShort(); 393 * for (IAtomContainer m : ms) { 394 * try { 395 * Cycles cycles = cf.find(m); 396 * IRingSet rings = cycles.toRingSet(); 397 * } catch (Intractable e) { 398 * // ignore error - edge short cycles do not check tractability 399 * } 400 * } 401 * </pre> 402 * </blockquote> 403 * 404 * @return a cycle finder which computes all cycles if possible or provides 405 * the vertex short cycles 406 * @deprecated use {@link #or} to define a custom fall-back 407 */ 408 @Deprecated allOrVertexShort()409 public static CycleFinder allOrVertexShort() { 410 return or(all(), vertexShort()); 411 } 412 413 414 /** 415 * Find and mark all cyclic atoms and bonds in the provided molecule. 416 * 417 * @param mol molecule 418 * @see IBond#isInRing() 419 * @see IAtom#isInRing() 420 * @return Number of rings found (circuit rank) 421 * @see <a href="https://en.wikipedia.org/wiki/Circuit_rank">Circuit Rank</a> 422 */ markRingAtomsAndBonds(IAtomContainer mol)423 public static int markRingAtomsAndBonds(IAtomContainer mol) { 424 EdgeToBondMap bonds = EdgeToBondMap.withSpaceFor(mol); 425 return markRingAtomsAndBonds(mol, GraphUtil.toAdjList(mol, bonds), bonds); 426 } 427 428 /** 429 * Find and mark all cyclic atoms and bonds in the provided molecule. This optimised version 430 * allows the caller to optionally provided indexed fast access structure which would otherwise 431 * be created. 432 * 433 * @param mol molecule 434 * @see IBond#isInRing() 435 * @see IAtom#isInRing() 436 * @return Number of rings found (circuit rank) 437 * @see <a href="https://en.wikipedia.org/wiki/Circuit_rank">Circuit Rank</a> 438 */ markRingAtomsAndBonds(IAtomContainer mol, int[][] adjList, EdgeToBondMap bondMap)439 public static int markRingAtomsAndBonds(IAtomContainer mol, int[][] adjList, EdgeToBondMap bondMap) { 440 RingSearch ringSearch = new RingSearch(mol, adjList); 441 for (int v = 0; v < mol.getAtomCount(); v++) { 442 mol.getAtom(v).setIsInRing(false); 443 for (int w : adjList[v]) { 444 // note we only mark the bond on second visit (first v < w) and 445 // clear flag on first visit (or if non-cyclic) 446 if (v > w && ringSearch.cyclic(v, w)) { 447 bondMap.get(v, w).setIsInRing(true); 448 mol.getAtom(v).setIsInRing(true); 449 mol.getAtom(w).setIsInRing(true); 450 } else { 451 bondMap.get(v, w).setIsInRing(false); 452 } 453 } 454 } 455 return ringSearch.numRings(); 456 } 457 458 /** 459 * Use an auxiliary cycle finder if the primary method was intractable. 460 * 461 * <blockquote><pre>{@code 462 * // all cycles or all cycles size <= 6 463 * CycleFinder cf = Cycles.or(Cycles.all(), Cycles.all(6)); 464 * }</pre></blockquote> 465 * 466 * It is possible to nest multiple levels. 467 * 468 * <blockquote><pre> 469 * // all cycles or relevant or essential 470 * CycleFinder cf = Cycles.or(Cycles.all(), 471 * Cycles.or(Cycles.relevant(), 472 * Cycles.essential())); 473 * </pre></blockquote> 474 * 475 * @param primary primary cycle finding method 476 * @param auxiliary auxiliary cycle finding method if the primary failed 477 * @return a new cycle finder 478 */ or(CycleFinder primary, CycleFinder auxiliary)479 public static CycleFinder or(CycleFinder primary, CycleFinder auxiliary) { 480 return new Fallback(primary, auxiliary); 481 } 482 483 /** 484 * Find all simple cycles in a molecule. The threshold values can not be 485 * tuned and is set at a value which will complete in reasonable time for 486 * most molecules. To change the threshold values please use the stand-alone 487 * {@link AllCycles} or {@link org.openscience.cdk.ringsearch.AllRingsFinder}. 488 * All cycles is every possible simple cycle (i.e. non-repeating vertices) 489 * in the chemical graph. As an example - all simple cycles of anthracene 490 * includes, 3 cycles of length 6, 2 of length 10 and 1 of length 14. 491 * 492 * <blockquote> 493 * <pre> 494 * for (IAtomContainer m : ms) { 495 * try { 496 * Cycles cycles = Cycles.all(m); 497 * IRingSet rings = cycles.toRingSet(); 498 * } catch (Intractable e) { 499 * // handle error - note it is common that finding all simple 500 * // cycles in chemical graphs is intractable 501 * } 502 * } 503 * </pre> 504 * </blockquote> 505 * 506 * @return all simple cycles 507 * @throws Intractable the algorithm reached a limit which caused it to 508 * abort in reasonable time 509 * @see #all() 510 * @see AllCycles 511 * @see org.openscience.cdk.ringsearch.AllRingsFinder 512 */ all(IAtomContainer container)513 public static Cycles all(IAtomContainer container) throws Intractable { 514 return all().find(container, container.getAtomCount()); 515 } 516 517 /** 518 * All cycles of smaller than or equal to the specified length. 519 * 520 * @param container input container 521 * @param length maximum size or cycle to find 522 * @return all cycles 523 * @throws Intractable computation was not feasible 524 */ all(IAtomContainer container, int length)525 public static Cycles all(IAtomContainer container, int length) throws Intractable { 526 return all().find(container, length); 527 } 528 529 /** 530 * Find the minimum cycle basis (MCB) of a molecule. 531 * 532 * <blockquote> 533 * <pre> 534 * for (IAtomContainer m : ms) { 535 * Cycles cycles = Cycles.mcb(m); 536 * IRingSet rings = cycles.toRingSet(); 537 * } 538 * </pre> 539 * </blockquote> 540 * 541 * @return cycles belonging to the minimum cycle basis 542 * @see #mcb() 543 * @see MinimumCycleBasis 544 */ mcb(IAtomContainer container)545 public static Cycles mcb(IAtomContainer container) { 546 return _invoke(mcb(), container); 547 } 548 549 /** 550 * Find the smallest set of smallest rings (SSSR) - aka minimum cycle basis 551 * (MCB) of a molecule. 552 * 553 * <blockquote> 554 * <pre> 555 * for (IAtomContainer m : ms) { 556 * Cycles cycles = Cycles.sssr(m); 557 * IRingSet rings = cycles.toRingSet(); 558 * } 559 * </pre> 560 * </blockquote> 561 * 562 * @return cycles belonging to the minimum cycle basis 563 * @see #mcb() 564 * @see #mcb(org.openscience.cdk.interfaces.IAtomContainer) 565 * @see MinimumCycleBasis 566 */ sssr(IAtomContainer container)567 public static Cycles sssr(IAtomContainer container) { 568 return mcb(container); 569 } 570 571 /** 572 * Find the relevant cycles of a molecule. 573 * 574 * <blockquote> 575 * <pre> 576 * for (IAtomContainer m : ms) { 577 * Cycles cycles = Cycles.relevant(m); 578 * IRingSet rings = cycles.toRingSet(); 579 * } 580 * </pre> 581 * </blockquote> 582 * 583 * @return relevant cycles 584 * @see #relevant() 585 * @see RelevantCycles 586 */ relevant(IAtomContainer container)587 public static Cycles relevant(IAtomContainer container) { 588 return _invoke(relevant(), container); 589 } 590 591 /** 592 * Find the essential cycles of a molecule. 593 * 594 * <blockquote> 595 * <pre> 596 * for (IAtomContainer m : ms) { 597 * Cycles cycles = Cycles.essential(m); 598 * IRingSet rings = cycles.toRingSet(); 599 * } 600 * </pre> 601 * </blockquote> 602 * 603 * @return essential cycles 604 * @see #relevant() 605 * @see RelevantCycles 606 */ essential(IAtomContainer container)607 public static Cycles essential(IAtomContainer container) { 608 return _invoke(essential(), container); 609 } 610 611 /** 612 * Find the triplet short cycles of a molecule. 613 * 614 * <blockquote> 615 * <pre> 616 * for (IAtomContainer m : ms) { 617 * Cycles cycles = Cycles.tripletShort(m); 618 * IRingSet rings = cycles.toRingSet(); 619 * } 620 * </pre> 621 * </blockquote> 622 * 623 * @return triplet short cycles 624 * @see #tripletShort() 625 * @see TripletShortCycles 626 */ tripletShort(IAtomContainer container)627 public static Cycles tripletShort(IAtomContainer container) { 628 return _invoke(tripletShort(), container); 629 } 630 631 /** 632 * Find the vertex short cycles of a molecule. 633 * 634 * <blockquote> 635 * <pre> 636 * for (IAtomContainer m : ms) { 637 * Cycles cycles = Cycles.vertexShort(m); 638 * IRingSet rings = cycles.toRingSet(); 639 * } 640 * </pre> 641 * </blockquote> 642 * 643 * @return triplet short cycles 644 * @see #vertexShort() 645 * @see VertexShortCycles 646 */ vertexShort(IAtomContainer container)647 public static Cycles vertexShort(IAtomContainer container) { 648 return _invoke(vertexShort(), container); 649 } 650 651 /** 652 * Find the edge short cycles of a molecule. 653 * 654 * <blockquote> 655 * <pre> 656 * for (IAtomContainer m : ms) { 657 * Cycles cycles = Cycles.edgeShort(m); 658 * IRingSet rings = cycles.toRingSet(); 659 * } 660 * </pre> 661 * </blockquote> 662 * 663 * @return edge short cycles 664 * @see #edgeShort() 665 * @see EdgeShortCycles 666 */ edgeShort(IAtomContainer container)667 public static Cycles edgeShort(IAtomContainer container) { 668 return _invoke(edgeShort(), container); 669 } 670 671 /** 672 * Derive a new cycle finder that only provides cycles without a chord. 673 * 674 * @param original find the initial cycles before filtering 675 * @return cycles or the original without chords 676 */ unchorded(CycleFinder original)677 public static CycleFinder unchorded(CycleFinder original) { 678 return new Unchorded(original); 679 } 680 681 /** 682 * Internal method to wrap cycle computations which <i>should</i> be 683 * tractable. That is they currently won't throw the exception - if the 684 * method does throw an exception an internal error is triggered as a sanity 685 * check. 686 * 687 * @param finder the cycle finding method 688 * @param container the molecule to find the cycles of 689 * @return the cycles of the molecule 690 */ _invoke(CycleFinder finder, IAtomContainer container)691 private static Cycles _invoke(CycleFinder finder, IAtomContainer container) { 692 return _invoke(finder, container, container.getAtomCount()); 693 } 694 695 /** 696 * Internal method to wrap cycle computations which <i>should</i> be 697 * tractable. That is they currently won't throw the exception - if the 698 * method does throw an exception an internal error is triggered as a sanity 699 * check. 700 * 701 * @param finder the cycle finding method 702 * @param container the molecule to find the cycles of 703 * @param length maximum size or cycle to find 704 * @return the cycles of the molecule 705 */ _invoke(CycleFinder finder, IAtomContainer container, int length)706 private static Cycles _invoke(CycleFinder finder, IAtomContainer container, int length) { 707 try { 708 return finder.find(container, length); 709 } catch (Intractable e) { 710 throw new RuntimeException("Cycle computation should not be intractable: ", e); 711 } 712 } 713 714 /** Interbank enumeration of cycle finders. */ 715 private static enum CycleComputation implements CycleFinder { 716 MCB { 717 718 /** {@inheritDoc} */ 719 @Override apply(int[][] graph, int length)720 int[][] apply(int[][] graph, int length) { 721 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length); 722 return new MinimumCycleBasis(ic, true).paths(); 723 } 724 }, 725 ESSENTIAL { 726 727 /** {@inheritDoc} */ 728 @Override apply(int[][] graph, int length)729 int[][] apply(int[][] graph, int length) { 730 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length); 731 RelevantCycles rc = new RelevantCycles(ic); 732 return new EssentialCycles(rc, ic).paths(); 733 } 734 }, 735 RELEVANT { 736 737 /** {@inheritDoc} */ 738 @Override apply(int[][] graph, int length)739 int[][] apply(int[][] graph, int length) { 740 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length); 741 return new RelevantCycles(ic).paths(); 742 } 743 }, 744 ALL { 745 746 /** {@inheritDoc} */ 747 @Override apply(int[][] graph, int length)748 int[][] apply(int[][] graph, int length) throws Intractable { 749 final int threshold = 684; // see. AllRingsFinder.Threshold.Pubchem_99 750 AllCycles ac = new AllCycles(graph, Math.min(length, graph.length), threshold); 751 if (!ac.completed()) 752 throw new Intractable("A large number of cycles were being generated and the" 753 + " computation was aborted. Please use AllCycles/AllRingsFinder with" 754 + " and specify a larger threshold or use a CycleFinder with a fall-back" 755 + " to a set unique cycles: e.g. Cycles.allOrVertexShort()."); 756 return ac.paths(); 757 } 758 }, 759 TRIPLET_SHORT { 760 761 /** {@inheritDoc} */ 762 @Override apply(int[][] graph, int length)763 int[][] apply(int[][] graph, int length) throws Intractable { 764 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length); 765 return new TripletShortCycles(new MinimumCycleBasis(ic, true), false).paths(); 766 } 767 }, 768 VERTEX_SHORT { 769 770 /** {@inheritDoc} */ 771 @Override apply(int[][] graph, int length)772 int[][] apply(int[][] graph, int length) throws Intractable { 773 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length); 774 return new VertexShortCycles(ic).paths(); 775 } 776 }, 777 EDGE_SHORT { 778 779 /** {@inheritDoc} */ 780 @Override apply(int[][] graph, int length)781 int[][] apply(int[][] graph, int length) throws Intractable { 782 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length); 783 return new EdgeShortCycles(ic).paths(); 784 } 785 }, 786 CDK_AROMATIC { 787 788 /** {@inheritDoc} */ 789 @Override apply(int[][] graph, int length)790 int[][] apply(int[][] graph, int length) throws Intractable { 791 792 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length); 793 MinimumCycleBasis mcb = new MinimumCycleBasis(ic, true); 794 795 // As per the old aromaticity detector if the MCB/SSSR is made 796 // of 2 or 3 rings we check all rings for aromaticity - otherwise 797 // we just check the MCB/SSSR 798 if (mcb.size() > 3) { 799 return mcb.paths(); 800 } else { 801 return ALL.apply(graph, length); 802 } 803 } 804 }, 805 ALL_OR_VERTEX_SHORT { 806 807 /** {@inheritDoc} */ 808 @Override apply(int[][] graph, int length)809 int[][] apply(int[][] graph, int length) throws Intractable { 810 final int threshold = 684; // see. AllRingsFinder.Threshold.Pubchem_99 811 AllCycles ac = new AllCycles(graph, Math.min(length, graph.length), threshold); 812 813 return ac.completed() ? ac.paths() : VERTEX_SHORT.apply(graph, length); 814 } 815 }; 816 817 /** 818 * Apply cycle perception to the graph (g) - graph is expeced to be 819 * biconnected. 820 * 821 * @param graph the graph (adjacency list) 822 * @return the cycles of the graph 823 * @throws Intractable the computation reached a set limit 824 */ apply(int[][] graph, int length)825 abstract int[][] apply(int[][] graph, int length) throws Intractable; 826 827 /**{@inheritDoc} */ 828 @Override find(IAtomContainer molecule)829 public Cycles find(IAtomContainer molecule) throws Intractable { 830 return find(molecule, molecule.getAtomCount()); 831 } 832 833 /** {@inheritDoc} */ 834 @Override find(IAtomContainer molecule, int length)835 public Cycles find(IAtomContainer molecule, int length) throws Intractable { 836 837 EdgeToBondMap bondMap = EdgeToBondMap.withSpaceFor(molecule); 838 int[][] graph = GraphUtil.toAdjList(molecule, bondMap); 839 RingSearch ringSearch = new RingSearch(molecule, graph); 840 841 List<int[]> walks = new ArrayList<int[]>(6); 842 843 // all isolated cycles are relevant - all we need to do is walk around 844 // the vertices in the subset 'isolated' 845 for (int[] isolated : ringSearch.isolated()) { 846 if (isolated.length <= length) walks.add(GraphUtil.cycle(graph, isolated)); 847 } 848 849 // each biconnected component which isn't an isolated cycle is processed 850 // separately as a subgraph. 851 for (int[] fused : ringSearch.fused()) { 852 853 // make a subgraph and 'apply' the cycle computation - the walk 854 // (path) is then lifted to the original graph 855 for (int[] cycle : apply(GraphUtil.subgraph(graph, fused), length)) { 856 walks.add(lift(cycle, fused)); 857 } 858 } 859 860 return new Cycles(walks.toArray(new int[walks.size()][0]), molecule, bondMap); 861 } 862 863 /**{@inheritDoc} */ 864 @Override find(IAtomContainer molecule, int[][] graph, int length)865 public Cycles find(IAtomContainer molecule, int[][] graph, int length) throws Intractable { 866 867 RingSearch ringSearch = new RingSearch(molecule, graph); 868 869 List<int[]> walks = new ArrayList<int[]>(6); 870 871 // all isolated cycles are relevant - all we need to do is walk around 872 // the vertices in the subset 'isolated' 873 for (int[] isolated : ringSearch.isolated()) { 874 walks.add(GraphUtil.cycle(graph, isolated)); 875 } 876 877 // each biconnected component which isn't an isolated cycle is processed 878 // separately as a subgraph. 879 for (int[] fused : ringSearch.fused()) { 880 881 // make a subgraph and 'apply' the cycle computation - the walk 882 // (path) is then lifted to the original graph 883 for (int[] cycle : apply(GraphUtil.subgraph(graph, fused), length)) { 884 walks.add(lift(cycle, fused)); 885 } 886 } 887 888 return new Cycles(walks.toArray(new int[walks.size()][0]), molecule, null); 889 } 890 } 891 892 /** 893 * Internal - lifts a path in a subgraph back to the original graph. 894 * 895 * @param path a path 896 * @param mapping vertex mapping 897 * @return the lifted path 898 */ lift(int[] path, int[] mapping)899 private static int[] lift(int[] path, int[] mapping) { 900 for (int i = 0; i < path.length; i++) { 901 path[i] = mapping[path[i]]; 902 } 903 return path; 904 } 905 906 /** 907 * Internal - convert a set of cycles to an ring set. 908 * 909 * @param container molecule 910 * @param cycles a cycle of the chemical graph 911 * @param bondMap mapping of the edges (int,int) to the bonds of the 912 * container 913 * @return the ring set 914 */ toRingSet(IAtomContainer container, int[][] cycles, EdgeToBondMap bondMap)915 private static IRingSet toRingSet(IAtomContainer container, int[][] cycles, EdgeToBondMap bondMap) { 916 917 // note currently no way to say the size of the RingSet 918 // even through we know it 919 IChemObjectBuilder builder = container.getBuilder(); 920 IRingSet rings = builder.newInstance(IRingSet.class); 921 922 for (int[] cycle : cycles) { 923 rings.addAtomContainer(toRing(container, cycle, bondMap)); 924 } 925 926 return rings; 927 } 928 929 /** 930 * Internal - convert a set of cycles to a ring. 931 * 932 * @param container molecule 933 * @param cycle a cycle of the chemical graph 934 * @param bondMap mapping of the edges (int,int) to the bonds of the 935 * container 936 * @return the ring for the specified cycle 937 */ toRing(IAtomContainer container, int[] cycle, EdgeToBondMap bondMap)938 private static IRing toRing(IAtomContainer container, int[] cycle, EdgeToBondMap bondMap) { 939 940 IAtom[] atoms = new IAtom[cycle.length - 1]; 941 IBond[] bonds = new IBond[cycle.length - 1]; 942 943 for (int i = 1; i < cycle.length; i++) { 944 int v = cycle[i]; 945 int u = cycle[i - 1]; 946 atoms[i - 1] = container.getAtom(u); 947 bonds[i - 1] = getBond(container, bondMap, u, v); 948 } 949 950 IChemObjectBuilder builder = container.getBuilder(); 951 IAtomContainer ring = builder.newInstance(IAtomContainer.class, 0, 0, 0, 0); 952 ring.setAtoms(atoms); 953 ring.setBonds(bonds); 954 955 return builder.newInstance(IRing.class, ring); 956 } 957 958 /** 959 * Obtain the bond between the atoms at index 'u' and 'v'. If the 'bondMap' 960 * is non-null it is used for direct lookup otherwise the slower linear 961 * lookup in 'container' is used. 962 * 963 * @param container a structure 964 * @param bondMap optimised map of atom indices to bond instances 965 * @param u an atom index 966 * @param v an atom index (connected to u) 967 * @return the bond between u and v 968 */ getBond(IAtomContainer container, EdgeToBondMap bondMap, int u, int v)969 private static IBond getBond(IAtomContainer container, EdgeToBondMap bondMap, int u, int v) { 970 if (bondMap != null) return bondMap.get(u, v); 971 return container.getBond(container.getAtom(u), container.getAtom(v)); 972 } 973 974 /** 975 * All cycles smaller than or equal to a specified length. 976 */ 977 private static final class AllUpToLength implements CycleFinder { 978 979 private final int predefinedLength; 980 981 // see. AllRingsFinder.Threshold.Pubchem_99 982 private final int threshold = 684; 983 AllUpToLength(int length)984 private AllUpToLength(int length) { 985 this.predefinedLength = length; 986 } 987 988 /**{@inheritDoc} */ 989 @Override find(IAtomContainer molecule)990 public Cycles find(IAtomContainer molecule) throws Intractable { 991 return find(molecule, molecule.getAtomCount()); 992 } 993 994 /**{@inheritDoc} */ 995 @Override find(IAtomContainer molecule, int length)996 public Cycles find(IAtomContainer molecule, int length) throws Intractable { 997 return find(molecule, GraphUtil.toAdjList(molecule), length); 998 } 999 1000 /**{@inheritDoc} */ 1001 @Override find(IAtomContainer molecule, int[][] graph, int length)1002 public Cycles find(IAtomContainer molecule, int[][] graph, int length) throws Intractable { 1003 RingSearch ringSearch = new RingSearch(molecule, graph); 1004 1005 if (this.predefinedLength < length) length = this.predefinedLength; 1006 1007 List<int[]> walks = new ArrayList<int[]>(6); 1008 1009 // all isolated cycles are relevant - all we need to do is walk around 1010 // the vertices in the subset 'isolated' 1011 for (int[] isolated : ringSearch.isolated()) { 1012 if (isolated.length <= length) walks.add(GraphUtil.cycle(graph, isolated)); 1013 } 1014 1015 // each biconnected component which isn't an isolated cycle is processed 1016 // separately as a subgraph. 1017 for (int[] fused : ringSearch.fused()) { 1018 1019 // make a subgraph and 'apply' the cycle computation - the walk 1020 // (path) is then lifted to the original graph 1021 for (int[] cycle : findInFused(GraphUtil.subgraph(graph, fused), length)) { 1022 walks.add(lift(cycle, fused)); 1023 } 1024 } 1025 1026 return new Cycles(walks.toArray(new int[walks.size()][0]), molecule, null); 1027 } 1028 1029 /** 1030 * Find rings in a biconnected component. 1031 * 1032 * @param g adjacency list 1033 * @return 1034 * @throws Intractable computation was not feasible 1035 */ findInFused(int[][] g, int length)1036 private int[][] findInFused(int[][] g, int length) throws Intractable { 1037 AllCycles allCycles = new AllCycles(g, Math.min(g.length, length), threshold); 1038 if (!allCycles.completed()) 1039 throw new Intractable("A large number of cycles were being generated and the" 1040 + " computation was aborted. Please us AllCycles/AllRingsFinder with" 1041 + " and specify a larger threshold or an alternative cycle set."); 1042 return allCycles.paths(); 1043 } 1044 } 1045 1046 /** 1047 * Find cycles using a primary cycle finder, if the computation was 1048 * intractable fallback to an auxiliary cycle finder. 1049 */ 1050 private static final class Fallback implements CycleFinder { 1051 1052 private CycleFinder primary, auxiliary; 1053 1054 /** 1055 * Create a fallback for two cycle finders. 1056 * 1057 * @param primary the primary cycle finder 1058 * @param auxiliary the auxiliary cycle finder 1059 */ Fallback(CycleFinder primary, CycleFinder auxiliary)1060 private Fallback(CycleFinder primary, CycleFinder auxiliary) { 1061 this.primary = primary; 1062 this.auxiliary = auxiliary; 1063 } 1064 1065 /**{@inheritDoc} */ 1066 @Override find(IAtomContainer molecule)1067 public Cycles find(IAtomContainer molecule) throws Intractable { 1068 return find(molecule, molecule.getAtomCount()); 1069 } 1070 1071 /**{@inheritDoc} */ 1072 @Override find(IAtomContainer molecule, int length)1073 public Cycles find(IAtomContainer molecule, int length) throws Intractable { 1074 return find(molecule, GraphUtil.toAdjList(molecule), length); 1075 } 1076 1077 /**{@inheritDoc} */ 1078 @Override find(IAtomContainer molecule, int[][] graph, int length)1079 public Cycles find(IAtomContainer molecule, int[][] graph, int length) throws Intractable { 1080 try { 1081 return primary.find(molecule, graph, length); 1082 } catch (Intractable e) { 1083 // auxiliary may still thrown an exception 1084 return auxiliary.find(molecule, graph, length); 1085 } 1086 } 1087 } 1088 1089 /** 1090 * Remove cycles with a chord from an existing set of cycles. 1091 */ 1092 private static final class Unchorded implements CycleFinder { 1093 1094 private CycleFinder primary; 1095 1096 /** 1097 * Filter any cycles produced by the {@code primary} cycle finder and 1098 * only allow those without a chord. 1099 * 1100 * @param primary the primary cycle finder 1101 */ Unchorded(CycleFinder primary)1102 private Unchorded(CycleFinder primary) { 1103 this.primary = primary; 1104 } 1105 1106 /**{@inheritDoc} */ 1107 @Override find(IAtomContainer molecule)1108 public Cycles find(IAtomContainer molecule) throws Intractable { 1109 return find(molecule, molecule.getAtomCount()); 1110 } 1111 1112 /**{@inheritDoc} */ 1113 @Override find(IAtomContainer molecule, int length)1114 public Cycles find(IAtomContainer molecule, int length) throws Intractable { 1115 return find(molecule, GraphUtil.toAdjList(molecule), length); 1116 } 1117 1118 /**{@inheritDoc} */ 1119 @Override find(IAtomContainer molecule, int[][] graph, int length)1120 public Cycles find(IAtomContainer molecule, int[][] graph, int length) throws Intractable { 1121 1122 Cycles inital = primary.find(molecule, graph, length); 1123 1124 int[][] filtered = new int[inital.numberOfCycles()][0]; 1125 int n = 0; 1126 1127 for (int[] path : inital.paths) { 1128 if (accept(path, graph)) filtered[n++] = path; 1129 } 1130 1131 return new Cycles(Arrays.copyOf(filtered, n), inital.container, inital.bondMap); 1132 } 1133 1134 /** 1135 * The cycle path is accepted if it does not have chord. 1136 * 1137 * @param path a path 1138 * @param graph the adjacency of atoms 1139 * @return accept the path as unchorded 1140 */ accept(int[] path, int[][] graph)1141 private boolean accept(int[] path, int[][] graph) { 1142 BitSet vertices = new BitSet(); 1143 1144 for (int v : path) 1145 vertices.set(v); 1146 1147 for (int j = 1; j < path.length; j++) { 1148 int v = path[j]; 1149 int prev = path[j - 1]; 1150 int next = path[(j + 1) % (path.length - 1)]; 1151 1152 for (int w : graph[v]) { 1153 // chord found 1154 if (w != prev && w != next && vertices.get(w)) return false; 1155 } 1156 1157 } 1158 1159 return true; 1160 } 1161 } 1162 } 1163