1 /*
2  * Copyright (c) 2013 European Bioinformatics Institute (EMBL-EBI)
3  *                    John May <jwmay@users.sf.net>
4  *
5  * Contact: cdk-devel@lists.sourceforge.net
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation; either version 2.1 of the License, or (at
10  * your option) any later version. All we ask is that proper credit is given
11  * for our work, which includes - but is not limited to - adding the above
12  * copyright notice to the beginning of your source code files, and to any
13  * copyright notice that you may distribute with programs based on this work.
14  *
15  * This program is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
18  * License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
23  */
24 
25 package org.openscience.cdk.graph;
26 
27 import org.openscience.cdk.exception.Intractable;
28 import org.openscience.cdk.interfaces.IAtom;
29 import org.openscience.cdk.interfaces.IAtomContainer;
30 import org.openscience.cdk.interfaces.IBond;
31 import org.openscience.cdk.interfaces.IChemObjectBuilder;
32 import org.openscience.cdk.interfaces.IRing;
33 import org.openscience.cdk.interfaces.IRingSet;
34 import org.openscience.cdk.ringsearch.RingSearch;
35 
36 import java.util.ArrayList;
37 import java.util.Arrays;
38 import java.util.BitSet;
39 import java.util.List;
40 
41 import static org.openscience.cdk.graph.GraphUtil.EdgeToBondMap;
42 
43 /**
44  * A utility class for storing and computing the cycles of a chemical graph.
45  * Utilities are also provided for converting the cycles to {@link IRing}s. A
46  * brief description of each cycle set is given below - for a more comprehensive
47  * review please see - {@cdk.cite Berger04}.
48  *
49  * <ul> <li>{@link #all()} - all simple cycles in the graph, the number of
50  * cycles generated may be very large and may not be feasible for some
51  * molecules, such as, fullerene.</li> <li>{@link #mcb()} (aka. SSSR) - minimum
52  * cycle basis (MCB) of a graph, these cycles are linearly independent and can
53  * be used to generate all of cycles in the graph. It is important to note the
54  * MCB is not unique and a that there may be multiple equally valid MCBs. The
55  * smallest set of smallest rings (SSSR) is often used to refer to the MCB but
56  * originally SSSR was defined as a strictly fundamental cycle basis {@cdk.cite
57  * Berger04}. Not every graph has a strictly fundamental cycle basis the
58  * definition has come to mean the MCB. Due to the non-uniqueness of the
59  * MCB/SSSR its use is discouraged.</li> <li>{@link #relevant()} - relevant
60  * cycles of a graph, the smallest set of uniquely defined short cycles. If a
61  * graph has a single MCB then the relevant cycles and MCB are the same. If the
62  * graph has multiple MCB then the relevant cycles is the union of all MCBs. The
63  * number of relevant cycles may be exponential but it is possible to determine
64  * how many relevant cycles there are in polynomial time without generating
65  * them. For chemical graphs the number of relevant cycles is usually within
66  * manageable bounds. </li> <li>{@link #essential()} - essential cycles of a
67  * graph. Similar to the relevant cycles the set is unique for a graph. If a
68  * graph has a single MCB then the essential cycles and MCB are the same. If the
69  * graph has multiple MCB then the essential cycles is the intersect of all
70  * MCBs. That is the cycles which appear in every MCB. This means that is is
71  * possible to have no essential cycles in a molecule which clearly has cycles
72  * (e.g. bridged system like bicyclo[2.2.2]octane). </li> <li> {@link
73  * #tripletShort()} - the triple short cycles are the shortest cycle through
74  * each triple of vertices. This allows one to generate the envelope rings of
75  * some molecules (e.g. naphthalene) without generating all cycles. The cycles
76  * are primarily useful for the CACTVS Substructure Keys (PubChem fingerprint).
77  * </li> <li> {@link #vertexShort()} - the shortest cycles through each vertex.
78  * Unlike the MCB, linear independence is not checked and it may not be possible
79  * to generate all other cycles from this set. In practice the vertex/edge short
80  * cycles are similar to MCB. </li> <li> {@link #edgeShort()} - the shortest
81  * cycles through each edge. Unlike the MCB, linear independence is not checked
82  * and it may not be possible to generate all other cycles from this set. In
83  * practice the vertex/edge short cycles are similar to MCB. </li> </ul>
84  *
85  * @author John May
86  * @cdk.module core
87  * @cdk.githash
88  */
89 public final class Cycles {
90 
91     /** Vertex paths for each cycle. */
92     private final int[][]        paths;
93 
94     /** The input container - allows us to create 'Ring' objects. */
95     private final IAtomContainer container;
96 
97     /** Mapping for quick lookup of bond mapping. */
98     private final EdgeToBondMap  bondMap;
99 
100     /**
101      * Internal constructor - may change in future but currently just takes the
102      * cycle paths and the container from which they came.
103      *
104      * @param paths     the cycle paths (closed vertex walks)
105      * @param container the input container
106      */
Cycles(int[][] paths, IAtomContainer container, EdgeToBondMap bondMap)107     private Cycles(int[][] paths, IAtomContainer container, EdgeToBondMap bondMap) {
108         this.paths = paths;
109         this.container = container;
110         this.bondMap = bondMap;
111     }
112 
113     /**
114      * How many cycles are stored.
115      *
116      * @return number of cycles
117      */
numberOfCycles()118     public int numberOfCycles() {
119         return paths.length;
120     }
121 
paths()122     public int[][] paths() {
123         int[][] cpy = new int[paths.length][];
124         for (int i = 0; i < paths.length; i++)
125             cpy[i] = paths[i].clone();
126         return cpy;
127     }
128 
129     /**
130      * Convert the cycles to a {@link IRingSet} containing the {@link IAtom}s
131      * and {@link IBond}s of the input molecule.
132      *
133      * @return ringset for the cycles
134      */
toRingSet()135     public IRingSet toRingSet() {
136         return toRingSet(container, paths, bondMap);
137     }
138 
139     /**
140      * Create a cycle finder which will compute all simple cycles in a molecule.
141      * The threshold values can not be tuned and is set at a value which will
142      * complete in reasonable time for most molecules. To change the threshold
143      * values please use the stand-alone {@link AllCycles} or {@link
144      * org.openscience.cdk.ringsearch.AllRingsFinder}. All cycles is every
145      * possible simple cycle (i.e. non-repeating vertices) in the chemical
146      * graph. As an example - all simple cycles of anthracene includes, 3 cycles
147      * of length 6, 2 of length 10 and 1 of length 14.
148      *
149      * <blockquote>
150      * <pre>
151      * CycleFinder cf = Cycles.all();
152      * for (IAtomContainer m : ms) {
153      *     try {
154      *         Cycles   cycles = cf.find(m);
155      *         IRingSet rings  = cycles.toRingSet();
156      *     } catch (Intractable e) {
157      *         // handle error - note it is common that finding all simple
158      *         // cycles in chemical graphs is intractable
159      *     }
160      * }
161      * </pre>
162      * </blockquote>
163      *
164      * @return finder for all simple cycles
165      * @see #all(org.openscience.cdk.interfaces.IAtomContainer)
166      * @see AllCycles
167      * @see org.openscience.cdk.ringsearch.AllRingsFinder
168      */
all()169     public static CycleFinder all() {
170         return CycleComputation.ALL;
171     }
172 
173     /**
174      * All cycles of smaller than or equal to the specified length. If a length
175      * is also provided to {@link CycleFinder#find(IAtomContainer, int)} the
176      * minimum of the two limits is used.
177      *
178      * @param length maximum size or cycle to find
179      * @return cycle finder
180      */
all(int length)181     public static CycleFinder all(int length) {
182         return new AllUpToLength(length);
183     }
184 
185     /**
186      * Create a cycle finder which will compute the minimum cycle basis (MCB) of
187      * a molecule.
188      *
189      * <blockquote>
190      * <pre>
191      * CycleFinder cf = Cycles.mcb();
192      * for (IAtomContainer m : ms) {
193      *     try {
194      *         Cycles   cycles = cf.find(m);
195      *         IRingSet rings  = cycles.toRingSet();
196      *     } catch (Intractable e) {
197      *         // ignore error - MCB should never be intractable
198      *     }
199      * }
200      * </pre>
201      * </blockquote>
202      *
203      * @return finder for all simple cycles
204      * @see #mcb(org.openscience.cdk.interfaces.IAtomContainer)
205      * @see MinimumCycleBasis
206      */
mcb()207     public static CycleFinder mcb() {
208         return CycleComputation.MCB;
209     }
210 
211     /**
212      * Create a cycle finder which will compute the relevant cycle basis (RC) of
213      * a molecule.
214      *
215      * <blockquote>
216      * <pre>
217      * CycleFinder cf = Cycles.relevant();
218      * for (IAtomContainer m : ms) {
219      *     try {
220      *         Cycles   cycles = cf.find(m);
221      *         IRingSet rings  = cycles.toRingSet();
222      *     } catch (Intractable e) {
223      *         // ignore error - there may be an exponential number of cycles
224      *         // but this is not currently checked
225      *     }
226      * }
227      * </pre>
228      * </blockquote>
229      *
230      * @return finder for relevant cycles
231      * @see #relevant(org.openscience.cdk.interfaces.IAtomContainer)
232      * @see RelevantCycles
233      */
relevant()234     public static CycleFinder relevant() {
235         return CycleComputation.RELEVANT;
236     }
237 
238     /**
239      * Create a cycle finder which will compute the essential cycles of a
240      * molecule.
241      *
242      * <blockquote>
243      * <pre>
244      * CycleFinder cf = Cycles.essential();
245      * for (IAtomContainer m : ms) {
246      *     try {
247      *         Cycles   cycles = cf.find(m);
248      *         IRingSet rings  = cycles.toRingSet();
249      *     } catch (Intractable e) {
250      *         // ignore error - essential cycles do not check tractability
251      *     }
252      * }
253      * </pre>
254      * </blockquote>
255      *
256      * @return finder for essential cycles
257      * @see #relevant(org.openscience.cdk.interfaces.IAtomContainer)
258      * @see RelevantCycles
259      */
essential()260     public static CycleFinder essential() {
261         return CycleComputation.ESSENTIAL;
262     }
263 
264     /**
265      * Create a cycle finder which will compute the triplet short cycles of a
266      * molecule. These cycles are the shortest through each triplet of vertices
267      * are utilised in the generation of CACTVS Substructure Keys (PubChem
268      * Fingerprint). Currently the triplet cycles are non-canonical (which in
269      * this algorithms case means unique). For finer tuning of options please
270      * use the {@link TripletShortCycles}.
271      *
272      * <blockquote>
273      * <pre>
274      * CycleFinder cf = Cycles.tripletShort();
275      * for (IAtomContainer m : ms) {
276      *     try {
277      *         Cycles   cycles = cf.find(m);
278      *         IRingSet rings  = cycles.toRingSet();
279      *     } catch (Intractable e) {
280      *         // ignore error - triple short cycles do not check tractability
281      *     }
282      * }
283      * </pre>
284      * </blockquote>
285      *
286      * @return finder for triplet short cycles
287      * @see #tripletShort(org.openscience.cdk.interfaces.IAtomContainer)
288      * @see TripletShortCycles
289      */
tripletShort()290     public static CycleFinder tripletShort() {
291         return CycleComputation.TRIPLET_SHORT;
292     }
293 
294     /**
295      * Create a cycle finder which will compute the shortest cycles of each
296      * vertex in a molecule. Unlike the SSSR/MCB computation linear independence
297      * is not required and provides some performance gain. In practise typical
298      * chemical graphs are small and the linear independence check is relatively
299      * fast.
300      *
301      * <blockquote>
302      * <pre>
303      * CycleFinder cf = Cycles.vertexShort();
304      * for (IAtomContainer m : ms) {
305      *     try {
306      *         Cycles   cycles = cf.find(m);
307      *         IRingSet rings  = cycles.toRingSet();
308      *     } catch (Intractable e) {
309      *         // ignore error - vertex short cycles do not check tractability
310      *     }
311      * }
312      * </pre>
313      * </blockquote>
314      *
315      * @return finder for vertex short cycles
316      * @see #vertexShort(org.openscience.cdk.interfaces.IAtomContainer)
317      */
vertexShort()318     public static CycleFinder vertexShort() {
319         return CycleComputation.VERTEX_SHORT;
320     }
321 
322     /**
323      * Create a cycle finder which will compute the shortest cycles of each
324      * vertex in a molecule. Unlike the SSSR/MCB computation linear independence
325      * is not required and provides some performance gain. In practise typical
326      * chemical graphs are small and the linear independence check is relatively
327      * fast.
328      *
329      * <blockquote>
330      * <pre>
331      * CycleFinder cf = Cycles.edgeShort();
332      * for (IAtomContainer m : ms) {
333      *     try {
334      *         Cycles   cycles = cf.find(m);
335      *         IRingSet rings  = cycles.toRingSet();
336      *     } catch (Intractable e) {
337      *         // ignore error - edge short cycles do not check tractability
338      *     }
339      * }
340      * </pre>
341      * </blockquote>
342      *
343      * @return finder for edge short cycles
344      * @see #edgeShort(org.openscience.cdk.interfaces.IAtomContainer)
345      */
edgeShort()346     public static CycleFinder edgeShort() {
347         return CycleComputation.EDGE_SHORT;
348     }
349 
350     /**
351      * Create a cycle finder which will compute a set of cycles traditionally
352      * used by the CDK to test for aromaticity. This set of cycles is the
353      * MCB/SSSR and {@link #all} cycles for fused systems with 3 or less rings.
354      * This allows on to test aromaticity of envelope rings in compounds such as
355      * azulene without generating an huge number of cycles for large fused
356      * systems (e.g. fullerenes). The use case was that computation of all
357      * cycles previously took a long time and ring systems with more than 2
358      * rings were too difficult. However it is now more efficient to simply
359      * check all cycles/rings without using the MCB/SSSR. This computation will
360      * fail for complex fused systems but the failure is fast and one can easily
361      * 'fall back' to a smaller set of cycles after catching the exception.
362      *
363      * <blockquote>
364      * <pre>
365      * CycleFinder cf = Cycles.cdkAromaticSet();
366      * for (IAtomContainer m : ms) {
367      *     try {
368      *         Cycles   cycles = cf.find(m);
369      *         IRingSet rings  = cycles.toRingSet();
370      *     } catch (Intractable e) {
371      *         // ignore error - edge short cycles do not check tractability
372      *     }
373      * }
374      * </pre>
375      * </blockquote>
376      *
377      * @return finder for cdk aromatic cycles
378      * @see #edgeShort(org.openscience.cdk.interfaces.IAtomContainer)
379      */
cdkAromaticSet()380     public static CycleFinder cdkAromaticSet() {
381         return CycleComputation.CDK_AROMATIC;
382     }
383 
384     /**
385      * Find all cycles in a fused system or if there were too many cycles
386      * fallback and use the shortest cycles through each vertex. Typically the
387      * types of molecules which the vertex short cycles are provided for are
388      * fullerenes. This cycle finder is well suited to aromaticity.
389      *
390      * <blockquote>
391      * <pre>
392      * CycleFinder cf = Cycles.allOrVertexShort();
393      * for (IAtomContainer m : ms) {
394      *     try {
395      *         Cycles   cycles = cf.find(m);
396      *         IRingSet rings  = cycles.toRingSet();
397      *     } catch (Intractable e) {
398      *         // ignore error - edge short cycles do not check tractability
399      *     }
400      * }
401      * </pre>
402      * </blockquote>
403      *
404      * @return a cycle finder which computes all cycles if possible or provides
405      * the vertex short cycles
406      * @deprecated use {@link #or} to define a custom fall-back
407      */
408     @Deprecated
allOrVertexShort()409     public static CycleFinder allOrVertexShort() {
410         return or(all(), vertexShort());
411     }
412 
413 
414     /**
415      * Find and mark all cyclic atoms and bonds in the provided molecule.
416      *
417      * @param mol molecule
418      * @see IBond#isInRing()
419      * @see IAtom#isInRing()
420      * @return Number of rings found (circuit rank)
421      * @see <a href="https://en.wikipedia.org/wiki/Circuit_rank">Circuit Rank</a>
422      */
markRingAtomsAndBonds(IAtomContainer mol)423     public static int markRingAtomsAndBonds(IAtomContainer mol) {
424         EdgeToBondMap bonds = EdgeToBondMap.withSpaceFor(mol);
425         return markRingAtomsAndBonds(mol, GraphUtil.toAdjList(mol, bonds), bonds);
426     }
427 
428     /**
429      * Find and mark all cyclic atoms and bonds in the provided molecule. This optimised version
430      * allows the caller to optionally provided indexed fast access structure which would otherwise
431      * be created.
432      *
433      * @param mol molecule
434      * @see IBond#isInRing()
435      * @see IAtom#isInRing()
436      * @return Number of rings found (circuit rank)
437      * @see <a href="https://en.wikipedia.org/wiki/Circuit_rank">Circuit Rank</a>
438      */
markRingAtomsAndBonds(IAtomContainer mol, int[][] adjList, EdgeToBondMap bondMap)439     public static int markRingAtomsAndBonds(IAtomContainer mol, int[][] adjList, EdgeToBondMap bondMap) {
440         RingSearch ringSearch = new RingSearch(mol, adjList);
441         for (int v = 0; v < mol.getAtomCount(); v++) {
442             mol.getAtom(v).setIsInRing(false);
443             for (int w : adjList[v]) {
444                 // note we only mark the bond on second visit (first v < w) and
445                 // clear flag on first visit (or if non-cyclic)
446                 if (v > w && ringSearch.cyclic(v, w)) {
447                     bondMap.get(v, w).setIsInRing(true);
448                     mol.getAtom(v).setIsInRing(true);
449                     mol.getAtom(w).setIsInRing(true);
450                 } else {
451                     bondMap.get(v, w).setIsInRing(false);
452                 }
453             }
454         }
455         return ringSearch.numRings();
456     }
457 
458     /**
459      * Use an auxiliary cycle finder if the primary method was intractable.
460      *
461      * <blockquote><pre>{@code
462      * // all cycles or all cycles size <= 6
463      * CycleFinder cf = Cycles.or(Cycles.all(), Cycles.all(6));
464      * }</pre></blockquote>
465      *
466      * It is possible to nest multiple levels.
467      *
468      * <blockquote><pre>
469      * // all cycles or relevant or essential
470      * CycleFinder cf = Cycles.or(Cycles.all(),
471      *                            Cycles.or(Cycles.relevant(),
472      *                                      Cycles.essential()));
473      * </pre></blockquote>
474      *
475      * @param primary   primary cycle finding method
476      * @param auxiliary auxiliary cycle finding method if the primary failed
477      * @return a new cycle finder
478      */
or(CycleFinder primary, CycleFinder auxiliary)479     public static CycleFinder or(CycleFinder primary, CycleFinder auxiliary) {
480         return new Fallback(primary, auxiliary);
481     }
482 
483     /**
484      * Find all simple cycles in a molecule. The threshold values can not be
485      * tuned and is set at a value which will complete in reasonable time for
486      * most molecules. To change the threshold values please use the stand-alone
487      * {@link AllCycles} or {@link org.openscience.cdk.ringsearch.AllRingsFinder}.
488      * All cycles is every possible simple cycle (i.e. non-repeating vertices)
489      * in the chemical graph. As an example - all simple cycles of anthracene
490      * includes, 3 cycles of length 6, 2 of length 10 and 1 of length 14.
491      *
492      * <blockquote>
493      * <pre>
494      * for (IAtomContainer m : ms) {
495      *     try {
496      *         Cycles   cycles = Cycles.all(m);
497      *         IRingSet rings  = cycles.toRingSet();
498      *     } catch (Intractable e) {
499      *         // handle error - note it is common that finding all simple
500      *         // cycles in chemical graphs is intractable
501      *     }
502      * }
503      * </pre>
504      * </blockquote>
505      *
506      * @return all simple cycles
507      * @throws Intractable the algorithm reached a limit which caused it to
508      *                     abort in reasonable time
509      * @see #all()
510      * @see AllCycles
511      * @see org.openscience.cdk.ringsearch.AllRingsFinder
512      */
all(IAtomContainer container)513     public static Cycles all(IAtomContainer container) throws Intractable {
514         return all().find(container, container.getAtomCount());
515     }
516 
517     /**
518      * All cycles of smaller than or equal to the specified length.
519      *
520      * @param container input container
521      * @param length    maximum size or cycle to find
522      * @return all cycles
523      * @throws Intractable computation was not feasible
524      */
all(IAtomContainer container, int length)525     public static Cycles all(IAtomContainer container, int length) throws Intractable {
526         return all().find(container, length);
527     }
528 
529     /**
530      * Find the minimum cycle basis (MCB) of a molecule.
531      *
532      * <blockquote>
533      * <pre>
534      * for (IAtomContainer m : ms) {
535      *     Cycles   cycles = Cycles.mcb(m);
536      *     IRingSet rings  = cycles.toRingSet();
537      * }
538      * </pre>
539      * </blockquote>
540      *
541      * @return cycles belonging to the minimum cycle basis
542      * @see #mcb()
543      * @see MinimumCycleBasis
544      */
mcb(IAtomContainer container)545     public static Cycles mcb(IAtomContainer container) {
546         return _invoke(mcb(), container);
547     }
548 
549     /**
550      * Find the smallest set of smallest rings (SSSR) - aka minimum cycle basis
551      * (MCB) of a molecule.
552      *
553      * <blockquote>
554      * <pre>
555      * for (IAtomContainer m : ms) {
556      *     Cycles   cycles = Cycles.sssr(m);
557      *     IRingSet rings  = cycles.toRingSet();
558      * }
559      * </pre>
560      * </blockquote>
561      *
562      * @return cycles belonging to the minimum cycle basis
563      * @see #mcb()
564      * @see #mcb(org.openscience.cdk.interfaces.IAtomContainer)
565      * @see MinimumCycleBasis
566      */
sssr(IAtomContainer container)567     public static Cycles sssr(IAtomContainer container) {
568         return mcb(container);
569     }
570 
571     /**
572      * Find the relevant cycles of a molecule.
573      *
574      * <blockquote>
575      * <pre>
576      * for (IAtomContainer m : ms) {
577      *     Cycles   cycles = Cycles.relevant(m);
578      *     IRingSet rings  = cycles.toRingSet();
579      * }
580      * </pre>
581      * </blockquote>
582      *
583      * @return relevant cycles
584      * @see #relevant()
585      * @see RelevantCycles
586      */
relevant(IAtomContainer container)587     public static Cycles relevant(IAtomContainer container) {
588         return _invoke(relevant(), container);
589     }
590 
591     /**
592      * Find the essential cycles of a molecule.
593      *
594      * <blockquote>
595      * <pre>
596      * for (IAtomContainer m : ms) {
597      *     Cycles   cycles = Cycles.essential(m);
598      *     IRingSet rings  = cycles.toRingSet();
599      * }
600      * </pre>
601      * </blockquote>
602      *
603      * @return essential cycles
604      * @see #relevant()
605      * @see RelevantCycles
606      */
essential(IAtomContainer container)607     public static Cycles essential(IAtomContainer container) {
608         return _invoke(essential(), container);
609     }
610 
611     /**
612      * Find the triplet short cycles of a molecule.
613      *
614      * <blockquote>
615      * <pre>
616      * for (IAtomContainer m : ms) {
617      *     Cycles   cycles = Cycles.tripletShort(m);
618      *     IRingSet rings  = cycles.toRingSet();
619      * }
620      * </pre>
621      * </blockquote>
622      *
623      * @return triplet short cycles
624      * @see #tripletShort()
625      * @see TripletShortCycles
626      */
tripletShort(IAtomContainer container)627     public static Cycles tripletShort(IAtomContainer container) {
628         return _invoke(tripletShort(), container);
629     }
630 
631     /**
632      * Find the vertex short cycles of a molecule.
633      *
634      * <blockquote>
635      * <pre>
636      * for (IAtomContainer m : ms) {
637      *     Cycles   cycles = Cycles.vertexShort(m);
638      *     IRingSet rings  = cycles.toRingSet();
639      * }
640      * </pre>
641      * </blockquote>
642      *
643      * @return triplet short cycles
644      * @see #vertexShort()
645      * @see VertexShortCycles
646      */
vertexShort(IAtomContainer container)647     public static Cycles vertexShort(IAtomContainer container) {
648         return _invoke(vertexShort(), container);
649     }
650 
651     /**
652      * Find the edge short cycles of a molecule.
653      *
654      * <blockquote>
655      * <pre>
656      * for (IAtomContainer m : ms) {
657      *     Cycles   cycles = Cycles.edgeShort(m);
658      *     IRingSet rings  = cycles.toRingSet();
659      * }
660      * </pre>
661      * </blockquote>
662      *
663      * @return edge short cycles
664      * @see #edgeShort()
665      * @see EdgeShortCycles
666      */
edgeShort(IAtomContainer container)667     public static Cycles edgeShort(IAtomContainer container) {
668         return _invoke(edgeShort(), container);
669     }
670 
671     /**
672      * Derive a new cycle finder that only provides cycles without a chord.
673      *
674      * @param original find the initial cycles before filtering
675      * @return cycles or the original without chords
676      */
unchorded(CycleFinder original)677     public static CycleFinder unchorded(CycleFinder original) {
678         return new Unchorded(original);
679     }
680 
681     /**
682      * Internal method to wrap cycle computations which <i>should</i> be
683      * tractable. That is they currently won't throw the exception - if the
684      * method does throw an exception an internal error is triggered as a sanity
685      * check.
686      *
687      * @param finder    the cycle finding method
688      * @param container the molecule to find the cycles of
689      * @return the cycles of the molecule
690      */
_invoke(CycleFinder finder, IAtomContainer container)691     private static Cycles _invoke(CycleFinder finder, IAtomContainer container) {
692         return _invoke(finder, container, container.getAtomCount());
693     }
694 
695     /**
696      * Internal method to wrap cycle computations which <i>should</i> be
697      * tractable. That is they currently won't throw the exception - if the
698      * method does throw an exception an internal error is triggered as a sanity
699      * check.
700      *
701      * @param finder    the cycle finding method
702      * @param container the molecule to find the cycles of
703      * @param length    maximum size or cycle to find
704      * @return the cycles of the molecule
705      */
_invoke(CycleFinder finder, IAtomContainer container, int length)706     private static Cycles _invoke(CycleFinder finder, IAtomContainer container, int length) {
707         try {
708             return finder.find(container, length);
709         } catch (Intractable e) {
710             throw new RuntimeException("Cycle computation should not be intractable: ", e);
711         }
712     }
713 
714     /** Interbank enumeration of cycle finders. */
715     private static enum CycleComputation implements CycleFinder {
716         MCB {
717 
718             /** {@inheritDoc} */
719             @Override
apply(int[][] graph, int length)720             int[][] apply(int[][] graph, int length) {
721                 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length);
722                 return new MinimumCycleBasis(ic, true).paths();
723             }
724         },
725         ESSENTIAL {
726 
727             /** {@inheritDoc} */
728             @Override
apply(int[][] graph, int length)729             int[][] apply(int[][] graph, int length) {
730                 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length);
731                 RelevantCycles rc = new RelevantCycles(ic);
732                 return new EssentialCycles(rc, ic).paths();
733             }
734         },
735         RELEVANT {
736 
737             /** {@inheritDoc} */
738             @Override
apply(int[][] graph, int length)739             int[][] apply(int[][] graph, int length) {
740                 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length);
741                 return new RelevantCycles(ic).paths();
742             }
743         },
744         ALL {
745 
746             /** {@inheritDoc} */
747             @Override
apply(int[][] graph, int length)748             int[][] apply(int[][] graph, int length) throws Intractable {
749                 final int threshold = 684; // see. AllRingsFinder.Threshold.Pubchem_99
750                 AllCycles ac = new AllCycles(graph, Math.min(length, graph.length), threshold);
751                 if (!ac.completed())
752                     throw new Intractable("A large number of cycles were being generated and the"
753                             + " computation was aborted. Please use AllCycles/AllRingsFinder with"
754                             + " and specify a larger threshold or use a CycleFinder with a fall-back"
755                             + " to a set unique cycles: e.g. Cycles.allOrVertexShort().");
756                 return ac.paths();
757             }
758         },
759         TRIPLET_SHORT {
760 
761             /** {@inheritDoc} */
762             @Override
apply(int[][] graph, int length)763             int[][] apply(int[][] graph, int length) throws Intractable {
764                 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length);
765                 return new TripletShortCycles(new MinimumCycleBasis(ic, true), false).paths();
766             }
767         },
768         VERTEX_SHORT {
769 
770             /** {@inheritDoc} */
771             @Override
apply(int[][] graph, int length)772             int[][] apply(int[][] graph, int length) throws Intractable {
773                 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length);
774                 return new VertexShortCycles(ic).paths();
775             }
776         },
777         EDGE_SHORT {
778 
779             /** {@inheritDoc} */
780             @Override
apply(int[][] graph, int length)781             int[][] apply(int[][] graph, int length) throws Intractable {
782                 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length);
783                 return new EdgeShortCycles(ic).paths();
784             }
785         },
786         CDK_AROMATIC {
787 
788             /** {@inheritDoc} */
789             @Override
apply(int[][] graph, int length)790             int[][] apply(int[][] graph, int length) throws Intractable {
791 
792                 InitialCycles ic = InitialCycles.ofBiconnectedComponent(graph, length);
793                 MinimumCycleBasis mcb = new MinimumCycleBasis(ic, true);
794 
795                 // As per the old aromaticity detector if the MCB/SSSR is made
796                 // of 2 or 3 rings we check all rings for aromaticity - otherwise
797                 // we just check the MCB/SSSR
798                 if (mcb.size() > 3) {
799                     return mcb.paths();
800                 } else {
801                     return ALL.apply(graph, length);
802                 }
803             }
804         },
805         ALL_OR_VERTEX_SHORT {
806 
807             /** {@inheritDoc} */
808             @Override
apply(int[][] graph, int length)809             int[][] apply(int[][] graph, int length) throws Intractable {
810                 final int threshold = 684; // see. AllRingsFinder.Threshold.Pubchem_99
811                 AllCycles ac = new AllCycles(graph, Math.min(length, graph.length), threshold);
812 
813                 return ac.completed() ? ac.paths() : VERTEX_SHORT.apply(graph, length);
814             }
815         };
816 
817         /**
818          * Apply cycle perception to the graph (g) - graph is expeced to be
819          * biconnected.
820          *
821          * @param graph the graph (adjacency list)
822          * @return the cycles of the graph
823          * @throws Intractable the computation reached a set limit
824          */
apply(int[][] graph, int length)825         abstract int[][] apply(int[][] graph, int length) throws Intractable;
826 
827         /**{@inheritDoc} */
828         @Override
find(IAtomContainer molecule)829         public Cycles find(IAtomContainer molecule) throws Intractable {
830             return find(molecule, molecule.getAtomCount());
831         }
832 
833         /** {@inheritDoc} */
834         @Override
find(IAtomContainer molecule, int length)835         public Cycles find(IAtomContainer molecule, int length) throws Intractable {
836 
837             EdgeToBondMap bondMap = EdgeToBondMap.withSpaceFor(molecule);
838             int[][] graph = GraphUtil.toAdjList(molecule, bondMap);
839             RingSearch ringSearch = new RingSearch(molecule, graph);
840 
841             List<int[]> walks = new ArrayList<int[]>(6);
842 
843             // all isolated cycles are relevant - all we need to do is walk around
844             // the vertices in the subset 'isolated'
845             for (int[] isolated : ringSearch.isolated()) {
846                 if (isolated.length <= length) walks.add(GraphUtil.cycle(graph, isolated));
847             }
848 
849             // each biconnected component which isn't an isolated cycle is processed
850             // separately as a subgraph.
851             for (int[] fused : ringSearch.fused()) {
852 
853                 // make a subgraph and 'apply' the cycle computation - the walk
854                 // (path) is then lifted to the original graph
855                 for (int[] cycle : apply(GraphUtil.subgraph(graph, fused), length)) {
856                     walks.add(lift(cycle, fused));
857                 }
858             }
859 
860             return new Cycles(walks.toArray(new int[walks.size()][0]), molecule, bondMap);
861         }
862 
863         /**{@inheritDoc} */
864         @Override
find(IAtomContainer molecule, int[][] graph, int length)865         public Cycles find(IAtomContainer molecule, int[][] graph, int length) throws Intractable {
866 
867             RingSearch ringSearch = new RingSearch(molecule, graph);
868 
869             List<int[]> walks = new ArrayList<int[]>(6);
870 
871             // all isolated cycles are relevant - all we need to do is walk around
872             // the vertices in the subset 'isolated'
873             for (int[] isolated : ringSearch.isolated()) {
874                 walks.add(GraphUtil.cycle(graph, isolated));
875             }
876 
877             // each biconnected component which isn't an isolated cycle is processed
878             // separately as a subgraph.
879             for (int[] fused : ringSearch.fused()) {
880 
881                 // make a subgraph and 'apply' the cycle computation - the walk
882                 // (path) is then lifted to the original graph
883                 for (int[] cycle : apply(GraphUtil.subgraph(graph, fused), length)) {
884                     walks.add(lift(cycle, fused));
885                 }
886             }
887 
888             return new Cycles(walks.toArray(new int[walks.size()][0]), molecule, null);
889         }
890     }
891 
892     /**
893      * Internal - lifts a path in a subgraph back to the original graph.
894      *
895      * @param path    a path
896      * @param mapping vertex mapping
897      * @return the lifted path
898      */
lift(int[] path, int[] mapping)899     private static int[] lift(int[] path, int[] mapping) {
900         for (int i = 0; i < path.length; i++) {
901             path[i] = mapping[path[i]];
902         }
903         return path;
904     }
905 
906     /**
907      * Internal - convert a set of cycles to an ring set.
908      *
909      * @param container molecule
910      * @param cycles    a cycle of the chemical graph
911      * @param bondMap   mapping of the edges (int,int) to the bonds of the
912      *                  container
913      * @return the ring set
914      */
toRingSet(IAtomContainer container, int[][] cycles, EdgeToBondMap bondMap)915     private static IRingSet toRingSet(IAtomContainer container, int[][] cycles, EdgeToBondMap bondMap) {
916 
917         // note currently no way to say the size of the RingSet
918         // even through we know it
919         IChemObjectBuilder builder = container.getBuilder();
920         IRingSet rings = builder.newInstance(IRingSet.class);
921 
922         for (int[] cycle : cycles) {
923             rings.addAtomContainer(toRing(container, cycle, bondMap));
924         }
925 
926         return rings;
927     }
928 
929     /**
930      * Internal - convert a set of cycles to a ring.
931      *
932      * @param container molecule
933      * @param cycle     a cycle of the chemical graph
934      * @param bondMap   mapping of the edges (int,int) to the bonds of the
935      *                  container
936      * @return the ring for the specified cycle
937      */
toRing(IAtomContainer container, int[] cycle, EdgeToBondMap bondMap)938     private static IRing toRing(IAtomContainer container, int[] cycle, EdgeToBondMap bondMap) {
939 
940         IAtom[] atoms = new IAtom[cycle.length - 1];
941         IBond[] bonds = new IBond[cycle.length - 1];
942 
943         for (int i = 1; i < cycle.length; i++) {
944             int v = cycle[i];
945             int u = cycle[i - 1];
946             atoms[i - 1] = container.getAtom(u);
947             bonds[i - 1] = getBond(container, bondMap, u, v);
948         }
949 
950         IChemObjectBuilder builder = container.getBuilder();
951         IAtomContainer ring = builder.newInstance(IAtomContainer.class, 0, 0, 0, 0);
952         ring.setAtoms(atoms);
953         ring.setBonds(bonds);
954 
955         return builder.newInstance(IRing.class, ring);
956     }
957 
958     /**
959      * Obtain the bond between the atoms at index 'u' and 'v'. If the 'bondMap'
960      * is non-null it is used for direct lookup otherwise the slower linear
961      * lookup in 'container' is used.
962      *
963      * @param container a structure
964      * @param bondMap   optimised map of atom indices to bond instances
965      * @param u         an atom index
966      * @param v         an atom index (connected to u)
967      * @return the bond between u and v
968      */
getBond(IAtomContainer container, EdgeToBondMap bondMap, int u, int v)969     private static IBond getBond(IAtomContainer container, EdgeToBondMap bondMap, int u, int v) {
970         if (bondMap != null) return bondMap.get(u, v);
971         return container.getBond(container.getAtom(u), container.getAtom(v));
972     }
973 
974     /**
975      * All cycles smaller than or equal to a specified length.
976      */
977     private static final class AllUpToLength implements CycleFinder {
978 
979         private final int predefinedLength;
980 
981         // see. AllRingsFinder.Threshold.Pubchem_99
982         private final int threshold = 684;
983 
AllUpToLength(int length)984         private AllUpToLength(int length) {
985             this.predefinedLength = length;
986         }
987 
988         /**{@inheritDoc} */
989         @Override
find(IAtomContainer molecule)990         public Cycles find(IAtomContainer molecule) throws Intractable {
991             return find(molecule, molecule.getAtomCount());
992         }
993 
994         /**{@inheritDoc} */
995         @Override
find(IAtomContainer molecule, int length)996         public Cycles find(IAtomContainer molecule, int length) throws Intractable {
997             return find(molecule, GraphUtil.toAdjList(molecule), length);
998         }
999 
1000         /**{@inheritDoc} */
1001         @Override
find(IAtomContainer molecule, int[][] graph, int length)1002         public Cycles find(IAtomContainer molecule, int[][] graph, int length) throws Intractable {
1003             RingSearch ringSearch = new RingSearch(molecule, graph);
1004 
1005             if (this.predefinedLength < length) length = this.predefinedLength;
1006 
1007             List<int[]> walks = new ArrayList<int[]>(6);
1008 
1009             // all isolated cycles are relevant - all we need to do is walk around
1010             // the vertices in the subset 'isolated'
1011             for (int[] isolated : ringSearch.isolated()) {
1012                 if (isolated.length <= length) walks.add(GraphUtil.cycle(graph, isolated));
1013             }
1014 
1015             // each biconnected component which isn't an isolated cycle is processed
1016             // separately as a subgraph.
1017             for (int[] fused : ringSearch.fused()) {
1018 
1019                 // make a subgraph and 'apply' the cycle computation - the walk
1020                 // (path) is then lifted to the original graph
1021                 for (int[] cycle : findInFused(GraphUtil.subgraph(graph, fused), length)) {
1022                     walks.add(lift(cycle, fused));
1023                 }
1024             }
1025 
1026             return new Cycles(walks.toArray(new int[walks.size()][0]), molecule, null);
1027         }
1028 
1029         /**
1030          * Find rings in a biconnected component.
1031          *
1032          * @param g adjacency list
1033          * @return
1034          * @throws Intractable computation was not feasible
1035          */
findInFused(int[][] g, int length)1036         private int[][] findInFused(int[][] g, int length) throws Intractable {
1037             AllCycles allCycles = new AllCycles(g, Math.min(g.length, length), threshold);
1038             if (!allCycles.completed())
1039                 throw new Intractable("A large number of cycles were being generated and the"
1040                         + " computation was aborted. Please us AllCycles/AllRingsFinder with"
1041                         + " and specify a larger threshold or an alternative cycle set.");
1042             return allCycles.paths();
1043         }
1044     }
1045 
1046     /**
1047      * Find cycles using a primary cycle finder, if the computation was
1048      * intractable fallback to an auxiliary cycle finder.
1049      */
1050     private static final class Fallback implements CycleFinder {
1051 
1052         private CycleFinder primary, auxiliary;
1053 
1054         /**
1055          * Create a fallback for two cycle finders.
1056          *
1057          * @param primary   the primary cycle finder
1058          * @param auxiliary the auxiliary cycle finder
1059          */
Fallback(CycleFinder primary, CycleFinder auxiliary)1060         private Fallback(CycleFinder primary, CycleFinder auxiliary) {
1061             this.primary = primary;
1062             this.auxiliary = auxiliary;
1063         }
1064 
1065         /**{@inheritDoc} */
1066         @Override
find(IAtomContainer molecule)1067         public Cycles find(IAtomContainer molecule) throws Intractable {
1068             return find(molecule, molecule.getAtomCount());
1069         }
1070 
1071         /**{@inheritDoc} */
1072         @Override
find(IAtomContainer molecule, int length)1073         public Cycles find(IAtomContainer molecule, int length) throws Intractable {
1074             return find(molecule, GraphUtil.toAdjList(molecule), length);
1075         }
1076 
1077         /**{@inheritDoc} */
1078         @Override
find(IAtomContainer molecule, int[][] graph, int length)1079         public Cycles find(IAtomContainer molecule, int[][] graph, int length) throws Intractable {
1080             try {
1081                 return primary.find(molecule, graph, length);
1082             } catch (Intractable e) {
1083                 // auxiliary may still thrown an exception
1084                 return auxiliary.find(molecule, graph, length);
1085             }
1086         }
1087     }
1088 
1089     /**
1090      * Remove cycles with a chord from an existing set of cycles.
1091      */
1092     private static final class Unchorded implements CycleFinder {
1093 
1094         private CycleFinder primary;
1095 
1096         /**
1097          * Filter any cycles produced by the {@code primary} cycle finder and
1098          * only allow those without a chord.
1099          *
1100          * @param primary the primary cycle finder
1101          */
Unchorded(CycleFinder primary)1102         private Unchorded(CycleFinder primary) {
1103             this.primary = primary;
1104         }
1105 
1106         /**{@inheritDoc} */
1107         @Override
find(IAtomContainer molecule)1108         public Cycles find(IAtomContainer molecule) throws Intractable {
1109             return find(molecule, molecule.getAtomCount());
1110         }
1111 
1112         /**{@inheritDoc} */
1113         @Override
find(IAtomContainer molecule, int length)1114         public Cycles find(IAtomContainer molecule, int length) throws Intractable {
1115             return find(molecule, GraphUtil.toAdjList(molecule), length);
1116         }
1117 
1118         /**{@inheritDoc} */
1119         @Override
find(IAtomContainer molecule, int[][] graph, int length)1120         public Cycles find(IAtomContainer molecule, int[][] graph, int length) throws Intractable {
1121 
1122             Cycles inital = primary.find(molecule, graph, length);
1123 
1124             int[][] filtered = new int[inital.numberOfCycles()][0];
1125             int n = 0;
1126 
1127             for (int[] path : inital.paths) {
1128                 if (accept(path, graph)) filtered[n++] = path;
1129             }
1130 
1131             return new Cycles(Arrays.copyOf(filtered, n), inital.container, inital.bondMap);
1132         }
1133 
1134         /**
1135          * The cycle path is accepted if it does not have chord.
1136          *
1137          * @param path  a path
1138          * @param graph the adjacency of atoms
1139          * @return accept the path as unchorded
1140          */
accept(int[] path, int[][] graph)1141         private boolean accept(int[] path, int[][] graph) {
1142             BitSet vertices = new BitSet();
1143 
1144             for (int v : path)
1145                 vertices.set(v);
1146 
1147             for (int j = 1; j < path.length; j++) {
1148                 int v = path[j];
1149                 int prev = path[j - 1];
1150                 int next = path[(j + 1) % (path.length - 1)];
1151 
1152                 for (int w : graph[v]) {
1153                     // chord found
1154                     if (w != prev && w != next && vertices.get(w)) return false;
1155                 }
1156 
1157             }
1158 
1159             return true;
1160         }
1161     }
1162 }
1163