1 /* $RCSfile$
2  * $Author$
3  * $Date$
4  * $Revision$
5  *
6  * Copyright (C) 2017  The Jmol Development Team
7  *
8  *
9  * Contact: jmol-developers@lists.sf.net
10  *
11  *  This library is free software; you can redistribute it and/or
12  *  modify it under the terms of the GNU Lesser General Public
13  *  License as published by the Free Software Foundation; either
14  *  version 2.1 of the License, or (at your option) any later version.
15  *
16  *  This library is distributed in the hope that it will be useful,
17  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  *  Lesser General Public License for more details.
20  *
21  *  You should have received a copy of the GNU Lesser General Public
22  *  License along with this library; if not, write to the Free Software
23  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24  *  02110-1301, USA.
25  */
26 
27 package org.jmol.symmetry;
28 
29 import java.util.Arrays;
30 import java.util.Collections;
31 import java.util.Hashtable;
32 import java.util.Map;
33 
34 import javajs.util.BS;
35 import javajs.util.Lst;
36 import javajs.util.PT;
37 
38 import org.jmol.util.Elements;
39 import org.jmol.util.Logger;
40 import org.jmol.util.SimpleEdge;
41 import org.jmol.util.SimpleNode;
42 import org.jmol.viewer.JC;
43 
44 /**
45  * A fully validated relatively efficient implementation of Cahn-Ingold-Prelog
46  * rules for assigning R/S, M/P, and E/Z stereochemical descriptors. Based on
47  * IUPAC Blue Book rules of 2013 and assorted corrections.
48  *
49  * IUPAC Project: Corrections, Revisions and Extension for the Nomenclature of
50  * Organic Chemistry - IUPAC Recommendations and Preferred Names 2013 (the IUPAC
51  * Blue Book)
52  * https://iupac.org/projects/project-details/?project_nr=2001-043-1-800
53  * http://www.sbcs.qmul.ac.uk/iupac/bibliog/BBerrors.html
54  *
55  * Settable options:
56  *
57  * set testflag1    use advanced in/out-sensitive Rule 6 (r,r-bicyclo[2.2.2]octane)
58  * set testflag2    turn off tracking (saving of _M.CIPInfo) for speed
59  *
60  * Features include:
61  *
62  * - deeply validated
63  *
64  * - includes revised Rules 1b, and 2
65  *
66  * - includes a proposed Rule 6
67  *
68  * - implemented in Java (Jmol) and JavaScript (JSmol)
69  *
70  * - only a few Java classes; < 1000 lines
71  *
72  * - efficient, one-pass process for each center using a single finite digraph
73  * for all auxiliary descriptors
74  *
75  * - exhaustive processing of all 9 sequence rules (1a, 1b, 2, 3, 4a, 4b, 4c, 5,
76  * 6)
77  *
78  * - includes R/S, r/s, M/P (axial, not planar), E/Z
79  *
80  * - covers any-length odd and even cumulenes
81  *
82  * - uses Jmol conformational SMARTS to detect atropisomers and helicenes
83  *
84  * - covers chiral phosphorus and sulfur, including trigonal pyramidal and
85  * tetrahedral
86  *
87  * - properly treats complex combinations of R/S, M/P, and seqCis/seqTrans
88  * centers (Rule 4b)
89  *
90  * - properly treats neutral-species resonance structures using fractional
91  * atomic mass and a modified Rule 1b
92  *
93  * - implements CIP spiro rule (BB P-93.5.3.1) as part of Rule 6
94  *
95  * - detects small rings (fewer than 8 members) and removes E/Z specifications
96  * for such
97  *
98  * - detects chiral bridgehead nitrogens and E/Z imines and diazines
99  *
100  * - reports atom descriptor along with the rule that ultimately decided it
101  *
102  * - fills _M.CIPInfo with detailed information about how each ligand was decided
103  *   (feature turned off by set testflag2)
104  *
105  * - generates advanced Rule 6 descriptors for cubane and the like. (Generally 'r')
106  *   using set testflag1
107  *
108  * Primary 236-compound Chapter-9 validation set (AY-236) provided by Andrey
109  * Yerin, ACD/Labs (Moscow).
110  *
111  * Mikko Vainio also supplied a 64-compound testing suite (MV-64), which is
112  * available on SourceForge in the Jmol-datafiles directory.
113  * (https://sourceforge.net/p/jmol/code/HEAD/tree/trunk/Jmol-datafiles/cip).
114  *
115  * Additional test structures provided by John Mayfield.
116  *
117  * Additional thanks to the IUPAC Blue Book Revision project, specifically
118  * Karl-Heinz Hellwich for alerting me to the errata page for the 2013 IUPAC
119  * specs (http://www.chem.qmul.ac.uk/iupac/bibliog/BBerrors.html), Gerry Moss
120  * for discussions, Andrey Yerin for discussion and digraph checking.
121  *
122  * Many thanks to the members of the BlueObelisk-Discuss group, particularly
123  * Mikko Vainio, John Mayfield (aka John May), Wolf Ihlenfeldt, and Egon
124  * Willighagen, for encouragement, examples, serious skepticism, and extremely
125  * helpful advice.
126  *
127  * References:
128  *
129  * CIP(1966) R.S. Cahn, C. Ingold, V. Prelog, Specification of Molecular
130  * Chirality, Angew.Chem. Internat. Edit. 5, 385ff
131  *
132  * Custer(1986) Roland H. Custer, Mathematical Statements About the Revised
133  * CIP-System, MATCH, 21, 1986, 3-31
134  * http://match.pmf.kg.ac.rs/electronic_versions/Match21/match21_3-31.pdf
135  *
136  * Mata(1993) Paulina Mata, Ana M. Lobo, Chris Marshall, A.Peter Johnson The CIP
137  * sequence rules: Analysis and proposal for a revision, Tetrahedron: Asymmetry,
138  * Volume 4, Issue 4, April 1993, Pages 657-668
139  *
140  * Mata(1994) Paulina Mata, Ana M. Lobo, Chris Marshall, and A. Peter Johnson,
141  * Implementation of the Cahn-Ingold-Prelog System for Stereochemical Perception
142  * in the LHASA Program, J. Chem. Inf. Comput. Sci. 1994, 34, 491-504 491
143  * http://pubs.acs.org/doi/abs/10.1021/ci00019a004
144  *
145  * Mata(2005) Paulina Mata, Ana M. Lobo, The Cahn, Ingold and Prelog System:
146  * eliminating ambiguity in the comparison of diastereomorphic and
147  * enantiomorphic ligands, Tetrahedron: Asymmetry, Volume 16, Issue 13, 4 July
148  * 2005, Pages 2215-2223
149  *
150  * Favre(2013) Henri A Favre, Warren H Powell, Nomenclature of Organic Chemistry
151  * : IUPAC Recommendations and Preferred Names 2013 DOI:10.1039/9781849733069
152  * http://pubs.rsc.org/en/content/ebook/9780854041824#!divbookcontent
153  *
154  * code history:
155  *
156  * 5/12/18 Jmol 14.29.14 fixes minor Rule 5 bug and adds advanced Rule 6 in/out testflag1 option (857 lines)
157  *
158  * 5/1/18  Jmol 14.29.14 fixes enantiomorphic Rule 5 R/S check for BH64_85 and BH64_86
159  *
160  * 4/25/18 Jmol 14.29.14 fixes spiroallene Rule 6 issue for BH64_84
161  *
162  * 4/23/18 Jmol 14.29.14 fixes Rule 2 for JM_008, involving mass and duplicates (824 lines)
163  *
164  * 4/11/18 Jmol 14.29.13 adds optional CIPDataTracker class (822 lines)
165  *
166  * 4/2/18 Jmol 14.29.13 adds optional CIPDataSmiles class
167  *
168  * 4/2/18 Jmol 14.29.13 adds John's "mancude-like" cyclic conjugated ene Kekule
169  * averaging
170  *
171  * 12/10/17 Jmol 14.29.9 adds CIPData, mancude Kekule averaging
172  *
173  * 11/11/17 Jmol 14.25.1 adds "duplicate over terminal" in Rule 1b; streamlined
174  * (777 lines)
175  *
176  * 11/05/17 Jmol 14.24.1 fixes a problem with seqCis/seqTrans and also with Rule
177  * 2 (799 lines)
178  *
179  * 10/17/17 Jmol 14.20.10 adds S4 check in Rule 6 and also fixes bug in aux
180  * descriptors being skipped when two ligands are equivalent for the root (798
181  * lines)
182  *
183  * 9/19/17 CIPChirality code simplification (778 lines)
184  *
185  * 9/14/17 Jmol 14.20.6 switching to Mikko's idea for Rule 4b and 5. Abandons
186  * "thread" idea. Uses breadth-first algorithm for generating bitsets for R and
187  * S. Processing time reduced by 50%. Still could be optimized some. (820 lines)
188  *
189  * 7/25/17 Jmol 14.20.4 consolidates all ene determinations; moves auxiliary
190  * descriptor generation to prior to Rule 3 (850 lines) 7/23/17 Jmol 14.20.4
191  * adds Rule 6; rewrite/consolidate spiro, C3, double spiran code (853 lines)
192  *
193  * 7/19/17 Jmol 14.20.3 fixing Rule 2 (880 lines) 7/13/17 Jmol 14.20.3 more
194  * thorough spiro testing (858 lines) 7/10/17 Jmol 14.20.2 adding check for C3
195  * and double spiran (CIP 1966 #32 and #33) 7/8/17 Jmol 14.20.2 adding presort
196  * for Rules 4a and 4c (test12.mol; 828 lines)
197  *
198  * 7/7/17 Jmol 14.20.1 minor coding efficiencies (833 lines)
199  *
200  * 7/6/17 Jmol 14.20.1 major rewrite to correct and simplify logic; full
201  * validation for 433 structures (many duplicates) in AY236, BH64, MV64, MV116,
202  * JM, and L (836 lines)
203  *
204  * 6/30/17 Jmol 14.20.1 major rewrite of Rule 4b (999 lines)
205  *
206  * 6/25/17 Jmol 14.19.1 minor fixes for Rule 4b and 5 for BH64_012-015; better
207  * atropisomer check
208  *
209  * 6/12/2017 Jmol 14.18.2 tested for Rule 1b sphere (AY236.53, 163, 173, 192);
210  * 957 lines
211  *
212  * 6/8/2017 Jmol 14.18.2 removed unnecessary presort for Rule 1b
213  *
214  * 5/27/17 Jmol 14.17.2 fully interfaced using SimpleNode and SimpleEdge
215  *
216  * 5/27/17 Jmol 14.17.1 fully validated; simplified code; 978 lines
217  *
218  * 5/17/17 Jmol 14.16.1. adds helicene M/P chirality; 959 lines validated using
219  * CCDC structures HEXHEL02 HEXHEL03 HEXHEL04 ODAGOS ODAHAF
220  * http://pubs.rsc.org/en/content/articlehtml/2017/CP/C6CP07552E
221  *
222  * 5/14/17 Jmol 14.15.5. trimmed up and documented; no need for lone pairs; 948
223  * lines
224  *
225  * 5/13/17 Jmol 14.15.4. algorithm simplified; validated for mixed Rule 4b
226  * systems involving auxiliary R/S, M/P, and seqCis/seqTrans; 959 lines
227  *
228  * 5/06/17 validated for 236 compound set AY-236.
229  *
230  * 5/02/17 validated for 161 compounds, including M/P, m/p (axial chirality for
231  * biaryls and odd-number cumulenes)
232  *
233  * 4/29/17 validated for 160 compounds, including M/P, m/p (axial chirality for
234  * biaryls and odd-number cumulenes)
235  *
236  * 4/28/17 Validated for 146 compounds, including imines and diazines, sulfur,
237  * phosphorus
238  *
239  * 4/27/17 Rules 3-5 preliminary version 14.15.1
240  *
241  * 4/6/17 Introduced in Jmol 14.12.0; validated for Rules 1 and 2 in Jmol
242  * 14.13.2; 100 lines
243  *
244  *
245  * NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE!
246  *
247  * Added logic to Rule 1b:
248  *
249  * Rule 1b: In comparing duplicate atoms, the one with lower root distance has
250  * precedence, where root distance is defined as: (a) in the case of
251  * ring-closure duplicates, the sphere of the duplicated atom; and (b) in the
252  * case of multiple-bond duplicates, the sphere of the atom to which the
253  * duplicate atom is attached.
254  *
255  * Rationale: Using only the distance of the duplicated atom (current
256  * definition) introduces a Kekule bias, which can be illustrated with various
257  * simple models. By moving that distance to be the sphere of the parent atom of
258  * the duplicate, the problem is resolved.
259  *
260  * Added clarification to Rule 2:
261  *
262  * Rule 2: Higher mass precedes lower mass, where mass is defined in the case of
263  * nonduplicate atoms with identified isotopes for elements as their exact
264  * isotopic mass and, in all other cases, as their element's atomic weight.
265  *
266  * Rationale: BB is not self-consistent, including both "mass number" (in the
267  * rule) and "atomic mass" in the description, where "79Br < Br < 81Br". And
268  * again we have the same Kekule-ambiguous issue as in Rule 1b. The added
269  * clarification fixes the Kekule issue (not using isotope mass number for
270  * duplicate atoms), solves the problem that F < 19F (though 100% nat.
271  * abundance), and is easily programmable.
272  *
273  * In Jmol the logic is very simple, actually using the isotope mass number, but
274  * doing two checks:
275  *
276  * a) if one of four specific isotopes (16O, 52Cr, 96Mo, 175Lu), reverse the test, and
277  *
278  * b) if on the list of 100% natural isotopes or one of the non-natural
279  * elements, use the element's accepted atomic weight.
280  *
281  * See CIPAtom.getMass();
282  *
283  * PROPOSED Rule 6: An undifferentiated reference node has priority over any
284  * other undifferentiated node.
285  *
286  * Rationale: This rule is stated in CIP(1966) p. 357.
287  *
288  *
289  * @author Bob Hanson hansonr@stolaf.edu
290  */
291 public class CIPChirality {
292 
293   //The rules:
294   //
295   //  P-92.1.3.1 Sequence Rule 1 has two parts:
296   //
297   //    (a) Higher atomic number precedes lower;
298   //    (b) A duplicate atom node whose corresponding nonduplicated atom
299   //    node is the root or is closer to the root ranks higher than a
300   //    duplicate atom node whose corresponding nonduplicated node is
301   //    farther from the root.
302   //
303   //  P-92.1.3.2 Sequence Rule 2
304   //
305   //    Higher atomic mass number precedes lower;
306   //
307   //  P-92.1.3.3 Sequence Rule 3
308   //
309   //    When considering double bonds and planar tetraligand atoms, 'seqcis' = 'Z'
310   //    precedes 'seqtrans' = 'E' and this precedes nonstereogenic double bonds.
311   //
312   //  P-92.1.3.4 Sequence Rule 4 is best considered in three parts:
313   //
314   //    (a) Chiral stereogenic units precede pseudoasymmetric stereogenic units and
315   //        these precede nonstereogenic units.
316   //    (b) When two ligands have different descriptor pairs, then the one with the
317   //        first chosen like descriptor pairs has priority over the one with a
318   //        corresponding unlike descriptor pair.
319   //      (i) Like descriptor pairs are: 'RR', 'SS', 'MM', 'PP', 'RM', 'SP',
320   //          'seqCis/seqCis', 'seqTran/sseqTrans', 'RseqCis',
321   //          'SseqTrans', 'MseqCis', 'PseqTrans' ...;
322   //      (ii) Unlike discriptor pairs are 'RS', 'MP', 'RP', 'SM',
323   //           'seqCis/seqTrans', 'RseqTrans', 'SseqCis', 'PseqCis',
324   //           'MseqTrans'....
325   //    (c) 'r' precedes 's' and 'm' precedes 'p'
326   //
327   //  P-92.1.3.5 Sequence Rule 5
328   //
329   //    An atom or group with descriptor 'R', 'M' and 'seqCis' has priority over its
330   //    enantiomorph 'S', 'P' or 'seqTrans', 'seqCis' or 'seqTrans'.
331 
332   //
333   // Rule 1b proposal
334   //
335 
336   // Rule 1b: In comparing duplicate atoms, the one with lower root distance has
337   // precedence, where root distance is defined as: (a) in the case of ring-closure
338   // duplicates, the sphere of the duplicated atom; and (b) in the case of
339   // multiple-bond duplicates, the sphere of the atom to which the duplicate atom
340   // is attached.
341 
342   // [0]---1---2---3==4
343   //
344   //
345   // current:
346   //
347   //                (4)(3)
348   //                /  /
349   // [0]---1---2---3--4
350   //
351   //
352   // proposed:
353   //
354   //                (3)(4)
355   //                /  /
356   // [0]---1---2---3--4
357   //
358   //
359   //               7--6                        7==6
360   //              //   \                      /    \
361   //              8     5                    8      5
362   //           (ri-ng) /                  (ri=ng) //
363   // [0]---1---2---3==4        [0]---1---2---3---4
364   //
365   //
366   // current:
367   //
368   //                (4)(3)                    (?)(5)
369   //                /  /                      /  /
370   // [0]---1---2---3--4        [0]---1---2---3--4
371   //
372   //
373   // proposed:
374   //
375   //                (3)(4)                    (3)(4)
376   //                /  /                      /  /
377   // [0]---1---2---3--4        [0]---1---2---3--4
378   //
379 
380   //
381   // Implementation Notes
382   //
383   //
384 
385   // "Scoring" a vs. b involves returning 0 (TIE) or +/-n, where n>0 indicates b won, n < 0
386   // indicates a won, and the |n| indicates in which sphere the decision was made.
387   //
388   // This implementation is somewhat inefficient. It digs deep, looking for a first difference
389   // but only caches the sphere so that in the end the lowest sphere change is chosen.
390   // I don't think it has to do this. A breadth-first queue should work better. I just
391   // did not take the time to implement that.
392   //
393   // The basic strategy is to loop through all nine sequence rules (1a, 1b, 2, 3, 4a, 4b, 4c, 5, and 6)
394   // in order and exhaustively prior to applying the next rule:
395   //
396   // Rule 1a (atomic number -- note that this requires an aromaticity check first)
397   // Rule 1b (duplicated atom progenitor root-distance check; revised as described above
398   //         for aromatic systems.
399   // Rule 2  (nominal isotopic mass)
400   // Rule 3  (E/Z, not including enantiomorphic seqCis/seqTrans designations)
401   // Rule 4a (chiral precedes pseudochiral precedes nonstereochemical)
402   // Rule 4b (like precedes unlike)
403   // Rule 4c (r precedes s)
404   // Rule 5  (R precedes S; M precedes P; C precedes T)
405   // Rule 6  (promoted undifferentiated ligand precedes unpromoted undifferentiated ligand)
406   //
407   // Some nuances I've learned along the way here, some of which are still being checked:
408   //
409   // 1. Rule 1a Had to be revised to account for Kekule bias using averaging.
410   // 2. Rule 1b Had to be revised to account for Kekule bias (AY-236.215). Note that this
411   //            rule may only be applied AFTER Rule 1a has been applied exhaustively. In
412   //            my mind it deserves its own number for this reason. See AY-236.53,
413   //            (1S,5R)-bicyclo[3.1.0]hex-2-ene, for example.
414   // 3. Rule 2  This rule is simple to implement; must be executed only for ties from 1a and 1b.
415   //            However, it still needs Kekule consideration.
416   // 4. Rule 3  Requires the concept of "auxiliary" (temporary, digraph-specific) descriptors.
417   //            This involves the initial generation of all auxiliary descriptors, including r/s and R/S at
418   //            branching points. In the course of doing this, all rules, 1-6, must be employed
419   //            at these auxiliary centers using the already-created digraph.
420   //            Auxiliary descriptors must be generated in a depth-first manner -- that is,
421   //            from highest sphere to lowest within a branch. This is the key to not having
422   //            an analysis blow up or somehow require complex, impossible iteration.
423   // 5. Rule 4a needs to be addressed exhaustively prior to Rules 4b and 4c. This rule serves to
424   //            avoid the need for Rule 4b for all except the most unusual cases, where, for example,
425   //            there are two otherwise identical branches, but one can be identified as S and the
426   //            other only r or no-stereo, but not R. Thus, only branches that end up as R/R, R/S, S/S,
427   //            r/r, r/s, s/s, or ns/ns comparisons need be investigated by Rule 4b.
428   // 6. Rule 4b This rule filters out all diastereomorphic differences that do not involve r/s issues.
429   //            Somehow missed in the discussion is that the reference descriptor is determined
430   //            once and only once for each branch from the center under scrutiny. The key is to
431   //            determine two "Mata sequence" like/unlike lists of R and S descriptors, one for each
432   //            pair of branches being considered. This same reference carries through all future
433   //            iterations of the algorithm for that branch.
434   // 7. Rule 4c Again, this subrule must be invoked only after Rule 4b is completed.
435   //            It is implemented as for Rule 4a.
436   // 8. Rule 5  Setting of pseudoasymmetry (r/s, m/p) and checking for special cases requiring R/S
437   //            is done along the same line as Rule 4b, but in this case by setting the reference
438   //            descriptor to "R" for both sequences. The S-reference must also be checked to see
439   //            if these happen to be self-enantiomorphic ligands. Note that if any changes have
440   //            been made by Rule 4c, all descriptors must be recalculated; otherwise the ones
441   //            used in Rule 4b can be reused.
442   // 9. Rule 6  This new rule takes care of all spiro and high-symmetry cases. An optional
443   //            "full" version (set testflag1 TRUE) assigns all 'r' to bridgehead carbons of
444   //            bicyclo[2.2.2]octane, adamantane, tetrahedrane, cubane, and like-topology compounds,
445   //            and all 's' if half of the stereocenters are pointing "in" instead of "out".
446 
447   /**
448    * A useful idea is to switch from a tree metaphor to a "twisted strand" or
449    * "thread" metaphor. For example:
450    *
451    * (a) In Rule 1b, all ring-duplicates terminate on one of the nodes in the
452    * sequence of parent nodes going back to the root. This has nothing to do
453    * with branching.
454    *
455    * (b) Generation of auxiliary descriptors prior to implementation of Rule 3
456    * must start from the highest sphere, proceeding toward the root. In this
457    * process the path leading back to the root will have no stereodescriptors,
458    * but that does not matter, as its priority is guaranteed to be set by Rule
459    * 1a.
460    *
461    * (c) All rule 4b auxiliary nodes can be normalized by labeling them one of R
462    * or S; there is no need to consider them to be C/T (seqCis or seqTrans) or
463    * M/P, other than to immediately equate that to R/S. Note that C/T and M/P
464    * must be assigned to the sp2 node closest to the root.
465    *
466    * (d) Rule 4b comparisons can be analyzed using a straightforward ranking
467    * process as follows:
468    *
469    * (d1) Run through all nodes of a ligand branch, checking for descriptors
470    * (cipAtom.rule4Type). Save a list of ASCII strings that represent the
471    * priority of the chiral nodes relative to the given reference. Do this
472    * twice, first using R and second using S. The priority character takes the
473    * form of:
474    *
475    * (char)(64 + (a.cipPriority<<2) + (a.rule4Type == 0 ? 0 : a.rule4Type ==
476    * root.rule4Ref ? 1 : 2))
477    *
478    * Such that, for example, "@D@@@A" indicates a node in Sphere 5 with
479    * priorities 010000 and a "like" relationship to the reference. In the end,
480    * we might have something like this:
481    *
482    * "@@@A"
483    *
484    * "@@@A@@@A"
485    *
486    * "@@@A@@@B"
487    *
488    * indicating "AAB" -- "llu" (reading off only the last character for each
489    * thread).
490    *
491    * (d2) Compare these four lists of data. Sort these lists numerically. The
492    * ligand associated with the highest numerical value is preferred. In Jmol,
493    * this comparison is simply an XOR of pairs of bitsets. The bitset that has
494    * the lowest set bit of the XOR is the winner. Simple as that.
495    *
496    * The "like/unlike" business can be thought of as a simple diastereomorphic
497    * test. Thus, the lists are tested for the first point at which they become
498    * neither identical nor opposites, and only that point need be checked for
499    * likeness or unlikeness to the reference. I like thinking about it this way
500    * because it focuses on what Rule 4b's role is -- the identification of
501    * diastereomorphism.
502    *
503    * (e) Rule 4c takes care of diasteriomorphism related to enantiomorphic (r/s,
504    * m/p) sub-paths. It is analyzed exactly as for Rule 4a.
505    *
506    * (f) Rule 5 processing is a comparison similar to Rule 4b, where only the
507    * reference descriptor "R" is used. It tkes care of any remaining
508    * enantiomorphic issues, including the possibilty that some even-number
509    * combination of a combination of enantiomorphic pairs and self-enantiomeric
510    * ligands are present.
511    *
512    * (g) Rule 6 tests for high-symmetry cases, including cubane, adamantane,
513    * bicyclo[2.2.2]octane, etc. If auxiliary centers are recalculated (set
514    * testflag1 true), then these simple "all-out" isomers will be given all-r
515    * descriptors.
516    *
517    *
518    */
519 
520   // The algorithm:
521   //
522   //
523   //  getChirality(molecule) {
524   //    prefilterAtoms()
525   //    checkForAlkenes()
526   //    checkForSmallRings()
527   //    checkForHelicenes()
528   //    checkForBridgeheadNitrogens()
529   //    checkForKekuleIssues()
530   //    checkForAtropisomerism()
531   //    for(all filtered atoms) getAtomChirality(atom)
532   //    if (haveAlkenes) {
533   //      for(all double bonds) getBondChirality(a1, a2)
534   //      removeUnnecessaryEZDesignations()
535   //      indicateHeliceneChirality()
536   //    }
537   //  }
538   //
539   // getAtomChirality(atom) {
540   //   for (each Rule){
541   //     sortSubstituents()
542   //     if (done) return checkHandedness();
543   //   }
544   //   return NO_CHIRALITY
545   // }
546   //
547   //  getBondChirality(a1, a2) {
548   //    atop = getAlkeneEndTopPriority(a1)
549   //    btop = getAlkeneEndTopPriority(a2)
550   //    return (atop >= 0 && btop >= 0 ? getEneChirality(atop, a1, a2, btop) : NO_CHIRALITY)
551   //  }
552   //
553   // sortSubstituents() {
554   //   for (all pairs of substituents a1 and a2) {
555   //     score = a1.compareTo(a2, currentRule)
556   //     if (score == TIED)
557   //       score = breakTie(a1,a2)
558   // }
559   //
560   // breakTie(a,b) {
561   //    score = compareShallowly(a, b)
562   //    if (score != TIED) return score
563   //    a.sortSubstituents(), b.sortSubstituents()
564   //    return compareDeeply(a, b)
565   // }
566   //
567   // compareShallowly(a, b) {
568   //    for (each substituent pairing i in a and b) {
569   //      score = applyCurrentRule(a_i, b_i)
570   //      if (score != TIED) return score
571   //    }
572   //    return TIED
573   // }
574   //
575   // compareDeeply(a, b) {
576   //    bestScore = Integer.MAX_VALUE
577   //    for (each substituent pairing i in a and b) {
578   //      bestScore = min(bestScore, breakTie(a_i, b_i))
579   //    }
580   //    return bestScore
581   // }
582   //
583   // Of course, the actual code is a bit more complex than that.
584   //
585   // approximate line count calculated using MSDOS batch script:
586   //
587   //  copy ..\..\workspace\jmol\src\org\jmol\symmetry\CIPChirality.java+..\..\workspace\jmol\src\org\jmol\symmetry\CIPData.java tt
588   //  type tt | find /V "}"|find /V "//" |find /V "import" | find /V /I "track" | find /V "*" |find ";"|find /V "Logger"|find /V "System.out" |find /V "TEST" > t
589   //  type tt | find /V "}"|find /V "//" |find /V "import" | find /V /I "track" | find /V "*" |find "if"|find /V "Logger"|find /V "System.out" |find /V "TEST" >> t
590   //  type tt | find "COUNT_LINE"  >> t
591   //  type t |find " " /C
592 
593   /**
594    * These elements have 100% natural abundance; we will use their isotope mass number instead of their actual average mass, since there is no difference
595    */
596   static final String RULE_2_nXX_EQ_XX = ";9Be;19F;23Na;27Al;31P;45Sc;55Mn;59Co;75As;89Y;93Nb;98Tc;103Rh;127I;133Cs;141Pr;145Pm;159Tb;165Ho;169Tm;197Au;209Bi;209Po;210At;222Rn;223Fr;226Ra;227Ac;231Pa;232Th;and all > U (atomno > 92)";
597 
598   /**
599    * These elements have an isotope number that is a bit higher than the average
600    * mass, even though their actual isotope mass is a bit lower. We will change
601    * 16 to 15.9, 52 to 51.9, 96 to 95.9, 175 to 174.9 so as to force the unspecified
602    * mass atom to be higher priority than the specified one.
603    *
604    * All other isotopes can use their integer isotope mass number instead of looking up
605    * their exact isotope mass.
606    *
607    */
608   static final String RULE_2_REDUCE_ISOTOPE_MASS_NUMBER = ";16O;52Cr;96Mo;175Lu;";
609 
610   static final int NO_CHIRALITY = 0, TIED = 0;
611   static final int A_WINS = -1, B_WINS = 1;
612 
613   static final int IGNORE = Integer.MIN_VALUE;
614 
615   static final int UNDETERMINED = -1;
616 
617   static final int STEREO_R = JC.CIP_CHIRALITY_R_FLAG,
618       STEREO_S = JC.CIP_CHIRALITY_S_FLAG;
619 
620   static final int STEREO_M = JC.CIP_CHIRALITY_M_FLAG,
621       STEREO_P = JC.CIP_CHIRALITY_P_FLAG;
622 
623   static final int STEREO_Z = JC.CIP_CHIRALITY_seqcis_FLAG,
624       STEREO_E = JC.CIP_CHIRALITY_seqtrans_FLAG;
625 
626   static final int STEREO_BOTH_RS = STEREO_R | STEREO_S; // must be the number 3
627 
628   static final int STEREO_BOTH_EZ = STEREO_E | STEREO_Z;
629 
630   static final int RULE_1a = 1, RULE_1b = 2, RULE_2 = 3, RULE_3 = 4,
631       RULE_4a = 5, RULE_4b = 6, RULE_4c = 7, RULE_5 = 8, RULE_6 = 9;
632 
633   final static String[] ruleNames = { "", "1a", "1b", "2", "3", "4a", "4b",
634       "4c", "5", "6" }; // Logger only
635 
getRuleName(int rule)636   public String getRuleName(int rule) { // Logger only
637     return ruleNames[rule]; // Logger only
638   }
639 
640   /**
641    * maximum path to display for debugging only using SET DEBUG in Jmol
642    */
643   static final int MAX_PATH = 50;  // Logger
644 
645   /**
646    * maximum ring size that can have a double bond with no E/Z designation; also
647    * used for identifying aromatic rings and bridgehead nitrogens
648    */
649   static final int SMALL_RING_MAX = 7;
650 
651   /**
652    * the current rule being applied exhaustively
653    *
654    */
655   int currentRule = RULE_1a;
656 
657   /**
658    * The atom for which we are determining the stereochemistry
659    *
660    */
661   CIPAtom root;
662 
663   /**
664    * collected bitsets and more specialized SMILES/SMARTS searches and vwr references
665    *
666    */
667   CIPData data;
668 
669   /**
670    * are we tracking pathways for _M.CIPInfo?
671    *
672    */
673   boolean doTrack;
674 
675   /**
676    * are we in the midst of auxiliary center creation?
677    *
678    */
679   boolean isAux;
680 
681   /**
682    * set bits RULE_1a - RULE_6 to indicate a need for that rule based on what is in the model
683    */
684   BS bsNeedRule = new BS();
685 
686   /**
687    * do we have r or s and so will need to recalculate Mata like/unlike lists in Rule 5?
688    */
689   boolean havePseudoAuxiliary;
690 
691   /**
692    * incremental pointer providing a unique ID to every CIPAtom for debugging
693    *
694    */
695   int ptIDLogger;
696 
CIPChirality()697   public CIPChirality() {
698     // for reflection
699   }
700 
701   /**
702    * A general determination of chirality that involves ultimately all of Rules
703    * 1-6.
704    *
705    * @param data
706    *
707    */
getChiralityForAtoms(CIPData data)708   public void getChiralityForAtoms(CIPData data) {
709 
710     if (data.bsAtoms.cardinality() == 0)
711       return;
712 
713     this.data = data;
714 
715     doTrack = data.isTracker();
716 
717     ptIDLogger = 0;
718 
719     // using BSAtoms here because we need the entire graph,
720     // including multiple molecular units (AY-236.93
721 
722     BS bsToDo = (BS) data.bsMolecule.clone();
723     boolean haveAlkenes = preFilterAtomList(data.atoms, bsToDo, data.bsEnes);
724     if (!data.bsEnes.isEmpty())
725       data.getEneKekule();
726     Logger.info("bsKekule:" + data.bsKekuleAmbiguous);
727 
728     // set atom chiralities
729 
730     bsToDo = (BS) data.bsAtoms.clone();
731 
732     for (int i = bsToDo.nextSetBit(0); i >= 0; i = bsToDo.nextSetBit(i + 1)) {
733       SimpleNode a = data.atoms[i];
734       a.setCIPChirality(0);
735       ptIDLogger = 0;
736       int c = getAtomChiralityLimited(a, null, null);
737       a.setCIPChirality(c == CIPChirality.NO_CHIRALITY ? JC.CIP_CHIRALITY_NONE : c
738           | ((currentRule - 1) << JC.CIP_CHIRALITY_NAME_OFFSET));
739       if (doTrack && c != CIPChirality.NO_CHIRALITY)
740         data.getRootTrackerResult(root);
741     }
742     if (haveAlkenes) {
743 
744       // set bond chiralities E/Z and M/P
745 
746       Lst<int[]> lstEZ = new Lst<int[]>();
747       for (int i = bsToDo.nextSetBit(0); i >= 0; i = bsToDo.nextSetBit(i + 1))
748         getAtomBondChirality(data.atoms[i], lstEZ, bsToDo);
749       if (data.lstSmallRings.length > 0 && lstEZ.size() > 0)
750         clearSmallRingEZ(data.atoms, lstEZ);
751 
752       // Add helicene chiralities -- predetermined using a Jmol SMARTS conformational search.
753       //
754       // M: A{a}(.t:-10,-40)a(.t:-10,-40)aaa
755       // P: A{a}(.t:10,40)a(.t:10,40)aaa
756       //
757       // Note that indicators are on the first and last aromatic atoms {a}.
758 
759       setStereoFromSmiles(data.bsHelixM, STEREO_M, data.atoms);
760       setStereoFromSmiles(data.bsHelixP, STEREO_P, data.atoms);
761     }
762 
763     if (Logger.debugging) {
764       Logger.info("Kekule ambiguous = " + data.bsKekuleAmbiguous);
765       Logger.info("small rings = " + PT.toJSON(null, data.lstSmallRings));
766     }
767 
768   }
769 
setStereoFromSmiles(BS bsHelix, int stereo, SimpleNode[] atoms)770   private void setStereoFromSmiles(BS bsHelix, int stereo, SimpleNode[] atoms) {
771     if (bsHelix != null)
772       for (int i = bsHelix.nextSetBit(0); i >= 0; i = bsHelix
773           .nextSetBit(i + 1))
774         atoms[i].setCIPChirality(stereo);
775   }
776 
777   /**
778    * Remove unnecessary atoms from the list and let us know if we have alkenes
779    * to consider.
780    *
781    * @param atoms
782    * @param bsToDo
783    * @param bsEnes
784    * @return whether we have any alkenes that could be EZ
785    */
preFilterAtomList(SimpleNode[] atoms, BS bsToDo, BS bsEnes)786   private boolean preFilterAtomList(SimpleNode[] atoms, BS bsToDo, BS bsEnes) {
787     boolean haveAlkenes = false;
788     for (int i = bsToDo.nextSetBit(0); i >= 0; i = bsToDo.nextSetBit(i + 1)) {
789       if (!data.couldBeChiralAtom(atoms[i])) {
790         bsToDo.clear(i);
791         continue;
792       }
793       switch (data.couldBeChiralAlkene(atoms[i], null)) {
794       case UNDETERMINED:
795         break;
796       case STEREO_Z:
797         bsEnes.set(i);
798         //$FALL-THROUGH$
799       case STEREO_M:
800         haveAlkenes = true;
801         break;
802       }
803     }
804     return haveAlkenes;
805   }
806 
807   /**
808    * Check if an atom is 1st row.
809    *
810    * @param a
811    * @return elemno > 2 && elemno <= 10
812    */
isFirstRow(SimpleNode a)813   static boolean isFirstRow(SimpleNode a) {
814     int n = a.getElementNumber();
815     return (n > 2 && n <= 10);
816   }
817 
818   /**
819    * Remove E/Z designations for small-rings double bonds (IUPAC
820    * 2013.P-93.5.1.4.1).
821    *
822    * @param atoms
823    * @param lstEZ
824    */
clearSmallRingEZ(SimpleNode[] atoms, Lst<int[]> lstEZ)825   private void clearSmallRingEZ(SimpleNode[] atoms, Lst<int[]> lstEZ) {
826     for (int j = data.lstSmallRings.length; --j >= 0;)
827       data.lstSmallRings[j].andNot(data.bsAtropisomeric);
828     for (int i = lstEZ.size(); --i >= 0;) {
829       int[] ab = lstEZ.get(i);
830       for (int j = data.lstSmallRings.length; --j >= 0;) {
831         BS ring = data.lstSmallRings[j];
832         if (ring.get(ab[0]) && ring.get(ab[1])) {
833           atoms[ab[0]].setCIPChirality(JC.CIP_CHIRALITY_NONE);
834           atoms[ab[1]].setCIPChirality(JC.CIP_CHIRALITY_NONE);
835         }
836       }
837     }
838   }
839 
840   /**
841    * Get E/Z characteristics for specific atoms. Also check here for
842    * atropisomeric M/P designations
843    *
844    * @param atom
845    * @param lstEZ
846    * @param bsToDo
847    */
848 
getAtomBondChirality(SimpleNode atom, Lst<int[]> lstEZ, BS bsToDo)849   private void getAtomBondChirality(SimpleNode atom, Lst<int[]> lstEZ, BS bsToDo) {
850     int index = atom.getIndex();
851     SimpleEdge[] bonds = atom.getEdges();
852     int c = NO_CHIRALITY;
853     boolean isAtropic = data.bsAtropisomeric.get(index);
854     for (int j = bonds.length; --j >= 0;) {
855       SimpleEdge bond = bonds[j];
856       SimpleNode atom1;
857       int index1;
858       if (isAtropic) {
859         atom1 = bonds[j].getOtherNode(atom);
860         index1 = atom1.getIndex();
861         if (!data.bsAtropisomeric.get(index1))
862           continue;
863         c = setBondChirality(atom, atom1, atom, atom1, true);
864       } else if (data.getBondOrder(bond) == 2) {
865         atom1 = getLastCumuleneAtom(bond, atom, null, null);
866         index1 = atom1.getIndex();
867         if (index1 < index)
868           continue;
869         c = getBondChiralityLimited(bond, atom);
870       } else {
871         continue;
872       }
873       if (c != NO_CHIRALITY) {
874         if (!isAtropic)
875           lstEZ.addLast(new int[] { index, index1 });
876         bsToDo.clear(index);
877         bsToDo.clear(index1);
878       }
879       if (isAtropic)
880         break;
881     }
882   }
883 
884   /**
885    *
886    * @param bond
887    * @param atom
888    * @param nSP2
889    *        returns the number of sp2 carbons in this alkene or cumulene
890    * @param parents
891    * @return the terminal atom of this alkene or cumulene
892    */
getLastCumuleneAtom(SimpleEdge bond, SimpleNode atom, int[] nSP2, SimpleNode[] parents)893   private SimpleNode getLastCumuleneAtom(SimpleEdge bond, SimpleNode atom,
894                                          int[] nSP2, SimpleNode[] parents) {
895     // we know this is a double bond
896     SimpleNode atom2 = bond.getOtherNode(atom);
897     if (parents != null) {
898       parents[0] = atom2;
899       parents[1] = atom;
900     }
901     // connected atom must have only two covalent bonds
902     if (nSP2 != null)
903       nSP2[0] = 2;
904     int ppt = 0;
905     while (true) { // COUNT_LINE
906       if (atom2.getCovalentBondCount() != 2)
907         return atom2;
908       SimpleEdge[] edges = atom2.getEdges();
909       for (int i = edges.length; --i >= 0;) {
910         SimpleNode atom3 = (bond = edges[i]).getOtherNode(atom2);
911         if (atom3 == atom)
912           continue;
913         // connected atom must only have one other bond, and it must be double to continue
914         if (data.getBondOrder(bond) != 2)
915           return atom2; // was atom3
916         if (parents != null) {
917           if (ppt == 0) {
918             parents[0] = atom2;
919             ppt = 1;
920           }
921           parents[1] = atom2;
922         }
923         // a=2=3
924         if (nSP2 != null)
925           nSP2[0]++;
926         atom = atom2;
927         atom2 = atom3;
928         // we know we only have two covalent bonds
929         break;
930       }
931     }
932   }
933 
934   /**
935    * Determine R/S or one half of E/Z determination
936    *
937    * @param atom
938    *        ignored if a is not null (just checking ene end top priority)
939    * @param cipAtom
940    *        ignored if atom is not null
941    * @param parentAtom
942    *        null for tetrahedral, other alkene carbon for E/Z
943    *
944    * @return if and E/Z test, [0:none, 1: atoms[0] is higher, 2: atoms[1] is
945    *         higher] otherwise [0:none, 1:R, 2:S]
946    */
getAtomChiralityLimited(SimpleNode atom, CIPAtom cipAtom, SimpleNode parentAtom)947   int getAtomChiralityLimited(SimpleNode atom, CIPAtom cipAtom,
948                                       SimpleNode parentAtom) {
949     int rs = NO_CHIRALITY;
950     bsNeedRule.clearAll();
951     bsNeedRule.set(RULE_1a);
952     try {
953       boolean isAlkeneEndCheck = (atom == null);
954       if (isAlkeneEndCheck) {
955         // This is an alkene end determination.
956         atom = (root = cipAtom).atom;
957         cipAtom.htPathPoints = (cipAtom.parent = new CIPAtom().create(
958             parentAtom, null, true, false, false)).htPathPoints;
959       } else {
960         if (!(root = cipAtom = (cipAtom == null ? new CIPAtom().create(atom, null, false, false,
961             false) : cipAtom)).isSP3) {
962           // This is a root-atom call.
963           // Just checking here that center has 4 covalent bonds or is trigonal pyramidal.
964           return NO_CHIRALITY;
965         }
966       }
967       if (cipAtom.setNode()) {
968         for (currentRule = RULE_1a; currentRule <= RULE_6; currentRule++) {
969           //          if (Logger.debugging)
970           //            Logger.info("-Rule " + getRuleName(currentRule)
971           //                + " CIPChirality for " + cipAtom + "-----"); // Logger
972           int nPrioritiesPrev = cipAtom.nPriorities;
973           switch (currentRule) {
974           case RULE_2:
975             if (cipAtom.rule6refIndex >= 0)
976               bsNeedRule.set(RULE_2);
977             break;
978           case RULE_3:
979             // We need to create auxiliary descriptors PRIOR to Rule 3,
980             // as seqcis and seqtrans are auxiliary only
981             //  BH64_037
982             isAux = true;
983             doTrack = false;
984             havePseudoAuxiliary = false;
985             cipAtom.createAuxiliaryDescriptors(null, null);
986             doTrack = data.isTracker();
987             isAux = false;
988             break;
989           case RULE_4a:
990             if (!bsNeedRule.get(RULE_4a)) {
991               // We can skip Rules 4a - 5 if there are no chirality centers.
992               currentRule = RULE_5;
993               continue;
994             }
995             //$FALL-THROUGH$
996           case RULE_4b:
997           case RULE_4c:
998             // We need to presort with no tie-breaking for Rules 4a, 4b, and 4c.
999             cipAtom.sortSubstituents(Integer.MIN_VALUE);
1000             bsNeedRule.set(currentRule);
1001             break;
1002           case RULE_5:
1003             // We need to presort with no tie-breaking for Rule 5,
1004             // clearing the Rule4List is advisable, as Rule 4c may have changed orders.
1005             if (havePseudoAuxiliary)
1006               cipAtom.clearRule4Lists();
1007             cipAtom.sortSubstituents(Integer.MIN_VALUE);
1008             bsNeedRule.set(currentRule);
1009             break;
1010           case RULE_6:
1011             // We only need to do Rule 6 under certain conditions.
1012             bsNeedRule.setBitTo(RULE_6,
1013                 (cipAtom.rule6refIndex < 0 && (rs = cipAtom.getRule6Descriptor(false)) != NO_CHIRALITY));
1014             break;
1015           }
1016           if (!bsNeedRule.get(currentRule))
1017             continue;
1018 
1019           // initial call to sortSubstituents does all, recursively
1020 
1021           if (rs == NO_CHIRALITY && cipAtom.sortSubstituents(0)) {
1022             if (Logger.debuggingHigh && cipAtom.h1Count < 2) {
1023               for (int i = 0; i < cipAtom.bondCount; i++) { // Logger
1024                 if (cipAtom.atoms[i] != null) // Logger
1025                   Logger.info(cipAtom.atoms[i] + " " + cipAtom.priorities[i]); // Logger
1026               }
1027             }
1028 
1029             // If this is an alkene end check, we just use STEREO_S and STEREO_R as markers
1030 
1031             if (isAlkeneEndCheck)
1032               return cipAtom.getEneTop();
1033 
1034             rs = data.checkHandedness(cipAtom);
1035             if (currentRule == RULE_5) {
1036               if (cipAtom.nPriorities == 4 && nPrioritiesPrev == 2)
1037                 cipAtom.isRule5Pseudo = !cipAtom.isRule5Pseudo;
1038               if (cipAtom.isRule5Pseudo)
1039                 rs |= JC.CIP_CHIRALITY_PSEUDO_FLAG;
1040 
1041               // Exclude special cases:
1042 
1043               //  P-92.2.1.1(c) pseudoasymmetric centers must have
1044               //                two and only two enantiomorphic ligands
1045               //
1046 
1047               // Rule 5 has decided the issue, but how many decisions did we make?
1048               // If priorities [0 0 2 2] went to [0 1 2 3] then
1049               // we have two Rule-5 decisions -- R,S,R',S'.
1050               // In that case, Rule 5 results in R/S, not r/s.
1051               //
1052               //     S
1053               //     -
1054               //     -
1055               // R---C---R'      despite this being Rule 5, the results is R, not r.
1056               //     -
1057               //     -
1058               //     S'
1059               //
1060               // --------- mirror plane
1061               //
1062               //     R'
1063               //     -
1064               //     -
1065               // S---C---S'     not superimposible
1066               //     -
1067               //     -
1068               //     R
1069               //
1070               // in addition, Rule 4c may not have caught a case where a ligand is self-enantiomorphic.
1071               //
1072               // s'          r'
1073               //  \         /
1074               //   N---C---N
1075               //  /         \
1076               // r           s
1077               //
1078               // Neither ligand changes chirality with
1079               // since there are no descriptors on N, we won't catch this there.
1080             }
1081             if (Logger.debugging)
1082               Logger.info(atom + " " + JC.getCIPChiralityName(rs) + " by Rule "
1083                   + getRuleName(currentRule)
1084                   + "\n----------------------------------"); // Logger
1085             return rs;
1086           }
1087         }
1088       }
1089     } catch (Throwable e) {
1090       System.out.println(e + " in CIPChirality " + currentRule);
1091       /**
1092        * @j2sNative alert(e);
1093        */
1094       {
1095         e.printStackTrace();
1096       }
1097       return STEREO_BOTH_RS;
1098     }
1099     return rs;
1100   }
1101 
1102   /**
1103    * Determine the axial or E/Z chirality for this bond, with the given starting
1104    * atom a
1105    *
1106    * @param bond
1107    * @param a
1108    *        first atom to consider, or null
1109    * @return one of: {NO_CHIRALITY | STEREO_Z | STEREO_E | STEREO_Ra | STEREO_Sa
1110    *         | STEREO_ra | STEREO_sa}
1111    */
getBondChiralityLimited(SimpleEdge bond, SimpleNode a)1112   private int getBondChiralityLimited(SimpleEdge bond, SimpleNode a) {
1113     if (a == null)
1114       a = bond.getOtherNode(null);
1115     if (data.couldBeChiralAlkene(a, bond) == UNDETERMINED)
1116       return NO_CHIRALITY;
1117     int[] nSP2 = new int[1];
1118     SimpleNode[] parents = new SimpleNode[2];
1119     SimpleNode b = getLastCumuleneAtom(bond, a, nSP2, parents);
1120     boolean isAxial = nSP2[0] % 2 == 1;
1121     if (!isAxial && data.bsAromatic.get(a.getIndex()))
1122       return UNDETERMINED;
1123     int c = setBondChirality(a, parents[0], parents[1], b, isAxial);
1124     if (Logger.debugging)
1125       Logger.info("get Bond Chirality " + JC.getCIPChiralityName(c) + " " + bond);
1126     return c;
1127   }
1128 
1129   /**
1130    * Determine the axial or E/Z chirality for the a-b bond.
1131    *
1132    * @param a
1133    * @param pa
1134    * @param pb
1135    * @param b
1136    * @param isAxial
1137    * @return one of: {NO_CHIRALITY | STEREO_Z | STEREO_E | STEREO_M | STEREO_P |
1138    *         STEREO_m | STEREO_p}
1139    */
setBondChirality(SimpleNode a, SimpleNode pa, SimpleNode pb, SimpleNode b, boolean isAxial)1140   private int setBondChirality(SimpleNode a, SimpleNode pa, SimpleNode pb,
1141                                SimpleNode b, boolean isAxial) {
1142     CIPAtom a1 = new CIPAtom().create(a, null, true, false, false);
1143     CIPAtom b2 = new CIPAtom().create(b, null, true, false, false);
1144     int atop = getAtomChiralityLimited(null, a1, pa) - 1;
1145     int ruleA = currentRule;
1146     int btop = getAtomChiralityLimited(null, b2, pb) - 1;
1147     int ruleB = currentRule;
1148     if (isAxial && a1.nRootDuplicates > 3 && atop < 0 && btop < 0) {
1149       // No resolution for odd cumulene, but we have multiple root duplicates.
1150       // Quick check of Rule 6.
1151       ruleA = ruleB = currentRule = RULE_6;
1152       b2.rule6refIndex = a1.atoms[atop = a1.getEneTop() - 1].atomIndex;
1153       if (b2.sortSubstituents(0))
1154         btop = b2.getEneTop() - 1;
1155     }
1156     int c = (atop >= 0 && btop >= 0 ? getEneChirality(b2.atoms[btop], b2, a1,
1157         a1.atoms[atop], isAxial, true) : NO_CHIRALITY);
1158     if (c != NO_CHIRALITY
1159         && (isAxial || !data.bsAtropisomeric.get(a.getIndex())
1160             && !data.bsAtropisomeric.get(b.getIndex()))) {
1161       //
1162       // We must check maxRules.
1163       //
1164       // nonaxial:
1165       //
1166       //    a     c
1167       //     \   /
1168       //      C=C
1169       //     /   \
1170       //    b     d
1171       //
1172       //    R     R'
1173       //     \   /
1174       //      C=C
1175       //     /   \
1176       //    S     S'
1177       //
1178       // planar flip is unchanged, and this is seqcis, seqtrans
1179       //
1180       //    a     R'
1181       //     \   /
1182       //      C=C
1183       //     /   \
1184       //    b     S'
1185       //
1186       // planar flip is changed, and this is seqCis, seqTrans
1187       //
1188       //
1189       // axial is the opposite:
1190       //
1191       // see AY236.70 and AY236.170
1192       //
1193       //
1194       //    a       c
1195       //     \     /
1196       //      C=C=C
1197       //     /     \
1198       //    b       d
1199       //
1200       //    R       R'
1201       //     \     /
1202       //      C=C=C
1203       //     /     \
1204       //    S       S'
1205       //
1206       // planar flip is changed, and this is M, P
1207       //
1208       //    a       R
1209       //     \     /
1210       //      C=C=C
1211       //     /     \
1212       //    b       S
1213       //
1214       // planar flip is unchanged, and this is m, p
1215 
1216       if (isAxial == (ruleA == RULE_5) == (ruleB == RULE_5))
1217         c &= ~JC.CIP_CHIRALITY_PSEUDO_FLAG;
1218       else
1219         c |= JC.CIP_CHIRALITY_PSEUDO_FLAG;
1220 
1221       a.setCIPChirality(c | ((ruleA - 1) << JC.CIP_CHIRALITY_NAME_OFFSET));
1222       b.setCIPChirality(c | ((ruleB - 1) << JC.CIP_CHIRALITY_NAME_OFFSET));
1223       if (Logger.debugging)
1224         Logger.info(a + "-" + b + " " + JC.getCIPChiralityName(c));
1225     }
1226     return c;
1227   }
1228 
1229   /**
1230    * Determine the stereochemistry of a bond
1231    *
1232    * @param winner1
1233    * @param end1
1234    * @param end2
1235    * @param winner2
1236    * @param isAxial
1237    *        if an odd-cumulene
1238    * @param allowPseudo
1239    *        if we are working from a high-level bond stereochemistry method
1240    * @return STEREO_M, STEREO_P, STEREO_Z, STEREO_E, or NO_CHIRALITY
1241    */
getEneChirality(CIPAtom winner1, CIPAtom end1, CIPAtom end2, CIPAtom winner2, boolean isAxial, boolean allowPseudo)1242   int getEneChirality(CIPAtom winner1, CIPAtom end1, CIPAtom end2,
1243                       CIPAtom winner2, boolean isAxial, boolean allowPseudo) {
1244     return (winner1 == null || winner2 == null || winner1.atom == null
1245         || winner2.atom == null ? NO_CHIRALITY : isAxial ? data.isPositiveTorsion(winner1,
1246         end1, end2, winner2) : data.isCis(winner1, end1, end2, winner2));
1247   }
1248 
1249   class CIPAtom implements Comparable<CIPAtom>, Cloneable {
1250 
1251     /**
1252      * odd/even toggle for comparisons of Rule5 results that would make for R/S, not r/s, if this flag is false.
1253      *
1254      */
1255     boolean isRule5Pseudo = true;
1256 
1257     /**
1258      * unique ID for this CIPAtom for debugging only
1259      *
1260      */
1261     private int id;
1262 
1263     /**
1264      * bond distance from the root atom to this atom
1265      */
1266     private int sphere;
1267 
1268     /**
1269      * Rule 1b measure: Distance from root of the corresponding nonduplicated
1270      * atom (duplicate nodes only).
1271      *
1272      * AMENDED HERE for duplicate nodes associated with a double-bond in a
1273      * 6-membered-ring benzenoid (benzene, naphthalene, pyridine, pyrazoline,
1274      * etc.) "Kekule-ambiguous" system to be its sphere.
1275      *
1276      */
1277 
1278     private int rootDistance;
1279 
1280     /**
1281      * a flag to prevent finalization of an atom node more than once
1282      *
1283      */
1284     private boolean isSet;
1285 
1286     /**
1287      * a flag to indicate atom that is a duplicate, either due to
1288      * ring closure or multiple bonding -- element number and mass, but no
1289      * substituents; slightly lower in priority than standard atoms.
1290      *
1291      */
1292     boolean isDuplicate = true;
1293 
1294     /**
1295      * a flag to indicate an atom that has no substituents; a branch end point;
1296      * typically H or a halogen (F, Cl, Br, I); also set TRUE if there is a problem
1297      * setting an atom; does not include duplicates
1298      *
1299      */
1300     boolean isTerminal;
1301 
1302     /**
1303      * is one atom of a double bond
1304      */
1305 
1306     private boolean isAlkene;
1307 
1308     /**
1309      * the associated Jmol (or otherwise) atom; use of the SimpleNode interface
1310      * allows us to implement this in SMILES or Jmol as well as providing an
1311      * interface other programs could use if implementing this code
1312      *
1313      */
1314     SimpleNode atom;
1315 
1316     /**
1317      * the application-assigned unique atom index for this atom; used in
1318      * updating lstSmallRings
1319      *
1320      */
1321     int atomIndex = -1;
1322 
1323     /**
1324      * true atom covalent bond count; cached for better performance
1325      */
1326     int bondCount;
1327 
1328     /**
1329      * Rule 1a nominal element number; may be fractional for Kekule issues
1330      */
1331     float elemNo;
1332 
1333     /**
1334      * Rule 2 isotope mass number if identified or average atomic mass if not
1335      *
1336      * C (12.011) > 12C, O (15.999) < 16O, and F (18.998) < 19F
1337      *
1338      * Source:
1339      *
1340      */
1341     private float mass = UNDETERMINED;
1342 
1343     ///// SUBSTITUENTS ////
1344 
1345     /**
1346      * direct ancestor of this atom
1347      *
1348      */
1349     CIPAtom parent;
1350 
1351     /**
1352      * sphere-1 node in this atom's root branch
1353      */
1354     CIPAtom rootSubstituent;
1355 
1356     /**
1357      * a count of how many 1H atoms we have found on this atom; used to halt
1358      * further processing of this atom
1359      */
1360     int h1Count;
1361 
1362     /**
1363      * the substituents -- up to four supported here at this time
1364      *
1365      */
1366     CIPAtom[] atoms = new CIPAtom[4];
1367 
1368     /**
1369      * number of substituent atoms (non-null atoms[] entries)
1370      */
1371     private int nAtoms;
1372 
1373     /**
1374      * bitset of indices of the associated atoms in the path to this atom node
1375      * from the root; used to keep track of the path to this atom in order to
1376      * prevent infinite cycling; the last atom in the path when cyclic is a
1377      * duplicate atom.
1378      *
1379      */
1380     private BS bsPath;
1381 
1382     /**
1383      * String path, for debugging
1384      *
1385      */
1386     String myPath = "";
1387 
1388     /**
1389      * priorities associated with each subsituent from high (0) to low (3); due
1390      * to equivaliencies at a given rule level, these numbers may duplicate and
1391      * have gaps - for example, [0 2 0 3]
1392      */
1393     int[] oldPriorities, priorities = new int[4];
1394 
1395     /**
1396      * the number of distinct priorities determined for this atom for the
1397      * current rule; 0-4 for the root atom; 0-3 for all others
1398      */
1399     int oldNPriorities, nPriorities;
1400 
1401     /**
1402      * current priority 0-3; used for Rule 4b and 5 priority sorting
1403      *
1404      */
1405     int priority;
1406 
1407     /**
1408      * ASCII-encoded priority string
1409      */
1410     private String chiralPath;
1411 
1412     /**
1413      * number of root-duplicate atoms (root atom only
1414      */
1415 
1416     int nRootDuplicates;
1417 
1418     /**
1419      * Rule 1b hash table that maintains distance of the associated
1420      * nonduplicated atom node
1421      *
1422      */
1423     Map<Integer, Integer> htPathPoints;
1424 
1425     /**
1426      * reference index for Rule 6 -- root atom only
1427      */
1428     int rule6refIndex = -1;
1429 
1430     /**
1431      * a list of only the undifferentiated ligands in Rule 6 -- root atom only
1432      */
1433     private BS bsRule6Subs;
1434 
1435     /////// double and triple bonds ///////
1436 
1437     /**
1438      * first atom of an alkene or cumulene atom
1439      */
1440 
1441     private CIPAtom alkeneParent;
1442 
1443     /**
1444      * last atom of an alkene or cumulene atom
1445      */
1446 
1447     private CIPAtom alkeneChild;
1448 
1449     /**
1450      * a flag used in Rule 3 to indicate the second carbon of a double bond
1451      */
1452 
1453     private boolean isAlkeneAtom2;
1454 
1455     /**
1456      * is an atom that is involved in more than one Kekule form
1457      */
1458     private boolean isKekuleAmbiguous;
1459 
1460     /**
1461      * first =X= atom in a string of =X=X=X=...
1462      */
1463     private CIPAtom nextSP2;
1464 
1465     /**
1466      * potentially useful information that this duplicate is from an double- or
1467      * triple-bond, not a ring closure
1468      */
1469     private boolean multipleBondDuplicate;
1470 
1471     /**
1472      * alkene or even cumulene, so chirality will be EZ, not MP
1473      */
1474     private boolean isEvenEne = true;
1475 
1476     //// AUXILIARY CHIRALITY (SET JUST PRIOR TO RULE 3) /////
1477 
1478     /**
1479      * auxiliary chirality E/Z for this atom node
1480      */
1481     private int auxEZ = UNDETERMINED;
1482 
1483     /**
1484      * a flag set false in evaluation of Rule 5 to indicate that there was more
1485      * than one R/S decision made, so this center cannot be r/s; initially just
1486      * indicates that the atom has 4 covalent bonds or is trigonal pyriamidal
1487      */
1488     boolean isSP3 = true;
1489 
1490     /**
1491      * auxiliary chirality as determined in createAuxiliaryRule4Data;
1492      * possibilities include R/S, r/s, M/P, m/p, C/T (but not c/t), or ~ (ASCII
1493      * 126, no stereochemistry); for sorting purposes C=M=R < p=r=s < ~
1494      */
1495     private char auxChirality = '~';
1496 
1497     /**
1498      * points to next branching point that has two or more chiral branches
1499      */
1500     private CIPAtom nextChiralBranch;
1501 
1502     /**
1503      * a check for downstream chirality
1504      *
1505      */
1506     private boolean isChiralPath;
1507 
1508     /**
1509      * the auxiiary chirality type: 0: ~, 1: R, 2: S; normalized to R/S even if
1510      * M/P or C/T
1511      */
1512     int rule4Type;
1513 
1514     /**
1515      * used to count the number of priorities
1516      */
1517     private BS bsTemp = new BS();
1518 
1519     /**
1520      * Rule 4b reference chirality (R or S, 1 or 2); root only
1521      */
1522     private int rule4Ref;
1523 
1524     /**
1525      * bitsets for tracking R and S for Rule 4b
1526      */
1527     BS[] listRS;
1528 
CIPAtom()1529     CIPAtom() {
1530       // had a problem in JavaScript that the constructor of an inner function cannot
1531       // access this.b$ yet. That assignment is made after construction.
1532     }
1533 
1534     /**
1535      *
1536      * @param atom
1537      *        or null to indicate a null placeholder
1538      * @param parent
1539      * @param isAlkene
1540      * @param isDuplicate
1541      * @param isParentBond
1542      * @return this
1543      */
1544     @SuppressWarnings("unchecked")
create(SimpleNode atom, CIPAtom parent, boolean isAlkene, boolean isDuplicate, boolean isParentBond)1545     CIPAtom create(SimpleNode atom, CIPAtom parent, boolean isAlkene,
1546                    boolean isDuplicate, boolean isParentBond) {
1547       this.id = ++ptIDLogger;
1548       this.parent = parent;
1549       if (atom == null)
1550         return this;
1551       this.isAlkene = isAlkene;
1552       this.atom = atom;
1553       atomIndex = atom.getIndex();
1554       if (atom.getIsotopeNumber() > 0)
1555         bsNeedRule.set(RULE_2);
1556       this.isDuplicate = multipleBondDuplicate = isDuplicate;
1557       isKekuleAmbiguous = (data.bsKekuleAmbiguous != null && data.bsKekuleAmbiguous
1558           .get(atomIndex));
1559       elemNo = (isDuplicate && isKekuleAmbiguous ?
1560           parent.getKekuleElementNumber()
1561           : atom.getElementNumber());
1562       bondCount = atom.getCovalentBondCount();
1563       isSP3 = (bondCount == 4 || bondCount == 3 && !isAlkene
1564           && (elemNo > 10 || data.bsAzacyclic != null && data.bsAzacyclic.get(atomIndex)));
1565       if (parent != null)
1566         sphere = parent.sphere + 1;
1567       if (sphere == 1) {
1568         rootSubstituent = this;
1569         htPathPoints = new Hashtable<Integer, Integer>();
1570       } else if (parent != null) {
1571         rootSubstituent = parent.rootSubstituent;
1572         htPathPoints = (Map<Integer, Integer>) ((Hashtable<Integer, Integer>) parent.htPathPoints)
1573             .clone();
1574       }
1575       bsPath = (parent == null ? new BS() : (BS) parent.bsPath.clone());
1576 
1577       if (isDuplicate)
1578         bsNeedRule.set(RULE_3);
1579       rootDistance = sphere;
1580       // The rootDistance for a nonDuplicate atom is just its sphere.
1581       // The rootDistance for a duplicate atom is (by IUPAC) the sphere of its duplicated atom.
1582       // I argue that for aromatic compounds, this introduces a Kekule problem and that for
1583       // those cases, the rootDistance should be its own sphere, not the sphere of its duplicated atom.
1584       // This shows up in AV-360#215.
1585 
1586       if (parent == null) {
1587         // original atom
1588         bsPath.set(atomIndex);
1589       } else if (multipleBondDuplicate
1590           ) {
1591         rootDistance--;
1592       } else if (bsPath.get(atomIndex)) {
1593         bsNeedRule.setBitTo(RULE_1b, (this.isDuplicate = true));
1594         if ((rootDistance = (atom == root.atom ? 0 : isParentBond ? parent.sphere
1595             : htPathPoints.get(Integer.valueOf(atomIndex)).intValue())) == 0) {
1596           root.nRootDuplicates++;
1597         }
1598       } else {
1599         bsPath.set(atomIndex);
1600         htPathPoints.put(Integer.valueOf(atomIndex), Integer.valueOf(rootDistance));
1601       }
1602       if (doTrack) {
1603         if (sphere < MAX_PATH) // Logger
1604           myPath = (parent != null ? parent.myPath + "-" : "") + this; // Logger
1605         if (Logger.debuggingHigh)
1606           Logger.info("new CIPAtom " + myPath);
1607       }
1608       return this;
1609     }
1610 
1611     /**
1612      * Check ene for first nonduplicate.
1613      *
1614      * @return 1 or 2
1615      */
getEneTop()1616     int getEneTop() {
1617       return (atoms[0].isDuplicate ? 2 : 1);
1618     }
1619 
1620     /**
1621      * The original Rule 6 implementation; allows cubane, tetrahedrane, and
1622      * bicyclo[2.2.2]octane to be ns.
1623      *
1624      *
1625      * @param isAux
1626      *
1627      * @return NO_CHIRALITY or descriptor
1628      */
getRule6Descriptor(boolean isAux)1629     int getRule6Descriptor(boolean isAux) {
1630       if (nPriorities > 2
1631           || (isAux ? countAuxDuplicates(atomIndex) : nRootDuplicates) <= 2)
1632         return NO_CHIRALITY;
1633 
1634       // we have more than two root-duplicates and priorities array is one of:
1635       // [0 0 0 0] CIP Helv Chim. Acta 1966 #33 -- double spiran
1636       // [0 0 0 0] CIP 1982 S4
1637       // [0 0 2 2] P-93.5.3.2 spiro
1638       // [0 1 1 1] or [0 0 0 3] CIP Helv. Chim. Acta 1966 #32 -- C3-symmetric
1639       int i1 = (priorities[0] == priorities[1] ? 0 : 1);
1640       int i2 = (priorities[2] != priorities[3] ? 3 : 4);
1641       int istep = (priorities[2] == priorities[1] ? 1 : 2);
1642       int rsRM = 0, rsSP = 0;
1643       BS bsSubs = new BS();
1644       for (int i = i1; i < i2; i++)
1645         bsSubs.set(atoms[i].atomIndex);
1646       if (nPriorities == 1)
1647         i2 = 2; // actually, we only want two for double spiran or S4
1648 
1649       // Check result of promoting each undifferentiated ligand.
1650       // Exit if two R or two S are found.
1651 
1652       CIPAtom cipAtom = null;
1653       int rs;
1654       for (int i = i1; i < i2; i += istep) {
1655         if (data.testRule6Full) {
1656           // Full Rule 6 -- allow for new auxiliary descriptors in Rule 6.
1657           // This will cause bicyclo[2.2.2]octane to be rr.
1658           cipAtom = new CIPAtom().create(atom, null, false, false, false);
1659           cipAtom.rule6refIndex = atoms[i].atomIndex;
1660           cipAtom.setNode();
1661           for (int j = 0; j < 4; j++) {
1662             cipAtom.atoms[j] = (CIPAtom) atoms[j].clone();
1663             cipAtom.priorities[j] = priorities[j];
1664           }
1665           cipAtom.bsRule6Subs = bsSubs;
1666           rs = getAtomChiralityLimited(atom, cipAtom, null);
1667           currentRule = RULE_6;
1668           if (rs == NO_CHIRALITY)
1669             return NO_CHIRALITY;
1670         } else {
1671           // Original Rule 6 -- just check for switches.
1672           // Bicyclo[2.2.2]octane will be ns.
1673           root.bsRule6Subs = new BS();
1674           root.rule6refIndex = atoms[i].atomIndex;
1675           saveRestorePriorities(false);
1676           sortSubstituents(Integer.MIN_VALUE);
1677           if (!sortSubstituents(0))
1678             return NO_CHIRALITY;
1679           rs = data.checkHandedness(this);
1680           saveRestorePriorities(true);
1681         }
1682         // If the descriptor is r or s, that is our descriptor.
1683         if ((rs & JC.CIP_CHIRALITY_PSEUDO_FLAG) == 0) {
1684           // If it is not r/s, check for more than one R or more than one S.
1685           // If more than one R or more than one S, that is our descriptor.
1686           if (rs == STEREO_R || rs == STEREO_M) {
1687             if (rsRM == 0) {
1688               rsRM = rs;
1689               continue;
1690             }
1691           } else if (rsSP == 0) {
1692             rsSP = rs;
1693             continue;
1694           }
1695         }
1696         // We have a determination.
1697         return rs;
1698       }
1699       return NO_CHIRALITY;
1700     }
1701 
1702     /**
1703      * Reset priorities after each Rule 6 test.
1704      *
1705      * @param isRestore
1706      */
saveRestorePriorities(boolean isRestore)1707     private void saveRestorePriorities(boolean isRestore) {
1708       if (isRestore) {
1709         priorities = oldPriorities;
1710         nPriorities = oldNPriorities;
1711       } else {
1712         oldPriorities = Arrays.copyOf(priorities, 4);
1713         oldNPriorities = nPriorities;
1714       }
1715       for (int i = 0; i < nAtoms; i++)
1716           atoms[i].saveRestorePriorities(isRestore);
1717     }
1718 
1719     /**
1720      * Get a count of the number of duplicate nodes to the auxiliary atom.
1721      *
1722      * @param index
1723      *
1724      * @return the number of dublicates to the auxiliary center
1725      */
countAuxDuplicates(int index)1726     private int countAuxDuplicates(int index) {
1727       int n = 0;
1728       for (int i = 0; i < 4; i++) {
1729         if (atoms[i] == null)
1730           continue;
1731         if (atoms[i].isDuplicate) {
1732           if (atoms[i].atomIndex == index)
1733             n++;
1734         } else {
1735           n += atoms[i].countAuxDuplicates(index);
1736         }
1737       }
1738       return n;
1739     }
1740 
1741     /**
1742      * get the atomic mass only if needed by Rule 2, testing for three special
1743      * conditions in the case of isotopes:
1744      *
1745      * If a duplicate, or an isotope that is 100% nat abundant is specified, or
1746      * isotopic mass is not specified, just use the average mass.
1747      *
1748      * Otherwise, use the integer isotope mass number, taking care in the cases
1749      * of 16O, 52Cr, 96Mo, and 175Lu, to reduce this integer by 0.1 so as to put
1750      * it in the correct relationship to the element's average mass.
1751      *
1752      * @return mass or mass surrogate
1753      */
getMass()1754     private float getMass() {
1755       if (isDuplicate)
1756         return 0;
1757       if (mass == UNDETERMINED) {
1758         if (isDuplicate || (mass = atom.getMass()) != (int) mass
1759             || isType(RULE_2_nXX_EQ_XX))
1760           return (mass == UNDETERMINED ? mass = Elements.getAtomicMass((int) elemNo)
1761               : mass);
1762         // for 16O;52Cr;96Mo;175Lu;
1763         // adjust integer mass number down just a bit to account
1764         // for average mass being slightly higher than actual isotope mass, not lower
1765         if (isType(RULE_2_REDUCE_ISOTOPE_MASS_NUMBER))
1766           mass -= 0.1f;
1767       }
1768       return mass;
1769     }
1770 
isType(String rule2Type)1771     private boolean isType(String rule2Type) {
1772       return PT.isOneOf(
1773           (int) mass + Elements.elementSymbolFromNumber((int) elemNo),
1774           rule2Type);
1775     }
1776 
1777     /**
1778      * Calculate the average element numbers of associated double-bond atoms
1779      * weighted by their most significant Kekule resonance contributor(s). We
1780      * only consider simple benzenoid systems -- 6-membered rings and their
1781      * 6-memebered rings fused to them. Calculated for the parent of an
1782      * sp2-duplicate atom.
1783      *
1784      * @return an averaged element number
1785      */
getKekuleElementNumber()1786     private float getKekuleElementNumber() {
1787       SimpleEdge[] edges = atom.getEdges();
1788       SimpleEdge bond;
1789       float ave = 0;//atom.getElementNumber();
1790       int n = 0;//1;
1791       for (int i = edges.length; --i >= 0;)
1792         if ((bond = edges[i]).isCovalent()) {
1793           SimpleNode other = bond.getOtherNode(atom);
1794           if (data.bsKekuleAmbiguous.get(other.getIndex())) {
1795 //            System.out.println(this + " adding " + other + " " + ave + " " + n);
1796             n++;
1797             ave += other.getElementNumber();
1798           }
1799         }
1800       return ave / n;
1801     }
1802 
1803     /**
1804      * Set the atom to have substituents.
1805      *
1806      * @return true if a valid atom for consideration
1807      *
1808      */
setNode()1809     boolean setNode() {
1810       // notice we are setting isSet TRUE here, not just testing it.
1811       if (isSet || (isSet = true) && isDuplicate)
1812         return true;
1813       int index = atom.getIndex();
1814       SimpleEdge[] bonds = atom.getEdges();
1815       int nBonds = bonds.length;
1816       if (Logger.debuggingHigh)
1817         Logger.info("set " + this);
1818       int pt = 0;
1819       for (int i = 0; i < nBonds; i++) {
1820         SimpleEdge bond = bonds[i];
1821         if (!bond.isCovalent())
1822           continue;
1823         SimpleNode other = bond.getOtherNode(atom);
1824         boolean isParentBond = (parent != null && parent.atom == other);
1825         int order = data.getBondOrder(bond);
1826         if (order == 2) {
1827           if (elemNo > 10 || !isFirstRow(other))
1828             order = 1;
1829           else {
1830             isAlkene = true;
1831             if (isParentBond)
1832               setEne();
1833           }
1834         }
1835         if (nBonds == 1 && order == 1 && isParentBond)
1836           return isTerminal = true;
1837         // from here on, isTerminal indicates an error condition
1838         switch (order) {
1839         case 3:
1840           if (addAtom(pt++, other, isParentBond, false, isParentBond) == null)
1841             return !(isTerminal = true);
1842           //$FALL-THROUGH$
1843         case 2:
1844           if (addAtom(pt++, other, order != 2 || isParentBond, order == 2,
1845               isParentBond) == null)
1846             return !(isTerminal = true);
1847           //$FALL-THROUGH$
1848         case 1:
1849           if (isParentBond
1850               || addAtom(pt++, other, order != 1 && elemNo <= 10, false, false) != null)
1851             break;
1852           //$FALL-THROUGH$
1853         default:
1854           return !(isTerminal = true);
1855         }
1856       }
1857       nAtoms = pt;
1858       switch (pt) {
1859       case 2:
1860       case 3:
1861         // [c-,r5d3n+0,r5d2o+0]
1862         if (elemNo == 6 && data.bsNegativeAromatic.get(index) || data.bsXAromatic.get(index)) {
1863           nAtoms++;
1864           addAtom(pt++, this.atom, true, false, false);
1865         }
1866         break;
1867 
1868       }
1869       if (pt < 4)
1870         for (; pt < atoms.length; pt++)
1871           atoms[pt] = new CIPAtom().create(null, this, false, true, false);
1872 
1873       // Do an initial very shallow atom-only Rule 1 sort using a.compareTo(b)
1874 
1875       try {
1876         Arrays.sort(atoms);
1877       } catch (Exception e) {
1878         e.printStackTrace();
1879       }
1880       return true;
1881     }
1882 
1883     /**
1884      * Set all ene-related fields upon finding the second atom.
1885      *
1886      */
setEne()1887     private void setEne() {
1888       parent.alkeneChild = null;
1889       alkeneParent = (parent.alkeneParent == null ? parent
1890           : parent.alkeneParent);
1891       alkeneParent.alkeneChild = this;
1892       nextSP2 = parent;
1893       if (parent.alkeneParent == null)
1894         parent.nextSP2 = this;
1895       if (atom.getCovalentBondCount() == 2 && atom.getValence() == 4) {
1896         parent.isAlkeneAtom2 = false;
1897         alkeneParent.isEvenEne = !alkeneParent.isEvenEne;
1898       } else {
1899         isAlkeneAtom2 = true;
1900       }
1901     }
1902 
1903     /**
1904      * Add a new atom or return null
1905      *
1906      * @param i
1907      * @param other
1908      * @param isDuplicate
1909      * @param isAlkene
1910      * @param isParentBond
1911      * @return new atom or null
1912      */
addAtom(int i, SimpleNode other, boolean isDuplicate, boolean isAlkene, boolean isParentBond)1913     private CIPAtom addAtom(int i, SimpleNode other, boolean isDuplicate,
1914                     boolean isAlkene, boolean isParentBond) {
1915       if (i >= atoms.length) {
1916         if (Logger.debugging)
1917           Logger.info(" too many bonds on " + atom);
1918         return null;
1919       }
1920       if (other.getElementNumber() == 1 && other.getIsotopeNumber() == 0) {
1921         if (++h1Count > 1) {
1922           if (parent == null) {
1923             // For top level, we do not allow two 1H atoms.
1924             if (Logger.debuggingHigh)
1925               Logger.info(" second H atom found on " + atom);
1926             return null;
1927           }
1928         }
1929       }
1930       return atoms[i] = new CIPAtom().create(other, this, isAlkene,
1931           isDuplicate, isParentBond);
1932     }
1933 
1934     ///// sorting methods /////
1935 
1936     /**
1937      * Deep-Sort the substituents of an atom, setting the node's atoms[] and
1938      * priorities[] arrays. Checking for "ties" that will lead to
1939      * pseudochirality is also done here.
1940      *
1941      * @param sphere
1942      *        current working sphere; Integer.MIN_VALUE to not break ties
1943      * @return all priorities assigned
1944      *
1945      */
sortSubstituents(int sphere)1946     boolean sortSubstituents(int sphere) {
1947 
1948       // runs about 20% faster with this check
1949       if (nPriorities == (sphere < 1 ? 4 : 3))
1950         return true;
1951 
1952       // Note that this method calls breakTie and is called recursively from breakTie.
1953 
1954       boolean ignoreTies = (sphere == Integer.MIN_VALUE);
1955       if (ignoreTies) {
1956         // If this is Rule 4a, 4c, or 6, we must presort the full tree without breaking ties
1957         if (isTerminal)
1958           return false;
1959         switch (currentRule) {
1960         case RULE_4a:
1961         case RULE_4c:
1962           for (int i = 0; i < 4; i++)
1963             if (atoms[i] != null && (atoms[i].isChiralPath || atoms[i].nextChiralBranch != null))
1964               atoms[i].sortSubstituents(Integer.MIN_VALUE);
1965           if (isAlkene) // was isSP3
1966             return false;
1967           break;
1968         case RULE_6:
1969           for (int i = 0; i < 4; i++)
1970             if (atoms[i] != null && !atoms[i].isDuplicate
1971                 && atoms[i].atom != null && atoms[i].setNode())
1972               atoms[i].sortSubstituents(Integer.MIN_VALUE);
1973           break;
1974        }
1975       }
1976 
1977       ignoreTies |= (currentRule == RULE_4b || currentRule == RULE_5);
1978 
1979       int[] indices = new int[4];
1980 
1981       int[] newPriorities = new int[4];
1982 
1983       if (Logger.debuggingHigh && h1Count < 2) {
1984         Logger.info(root + "---sortSubstituents---" + this);
1985         for (int i = 0; i < 4; i++) { // Logger
1986           Logger.info(getRuleName(currentRule) + ": " + this + "[" + i + "]="
1987               + atoms[i].myPath + " " + Integer.toHexString(priorities[i])); // Logger
1988         }
1989         Logger.info("---" + nPriorities);
1990       }
1991 
1992       int loser;
1993       for (int i = 0; i < 3; i++) {
1994         CIPAtom a = atoms[i];
1995         boolean aLoses = a.isDuplicate && currentRule > RULE_1b;
1996         for (int j = i + 1; j < 4; j++) {
1997           CIPAtom b = atoms[loser = j];
1998           int score = TIED;
1999 
2000           // Check:
2001 
2002           // (a) if one of the atoms is a phantom atom (P-92.1.4.1); if not, then
2003           // (b) if the prioritiy has already been set; if not, then
2004           // (c) if the current rule decides; if not, then
2005           // (d) if the tie can be broken in the next sphere
2006           switch (b.atom == null || priorities[i] < priorities[j] ? A_WINS
2007               : aLoses || a.atom == null || priorities[j] < priorities[i] ? B_WINS
2008                   : (score = a.checkCurrentRule(b)) != TIED  && score != IGNORE || ignoreTies ? score
2009                           : sign(a.breakTie(b, sphere + 1))) {
2010           case B_WINS:
2011             loser = i;
2012             //$FALL-THROUGH$
2013           case A_WINS:
2014             newPriorities[loser]++;
2015             if (doTrack && score != TIED && (sphere == 0 || ignoreTies))
2016               data.track(CIPChirality.this, a, b, 1, score, false);
2017             //$FALL-THROUGH$
2018           case IGNORE:
2019           case TIED:
2020             indices[loser]++;
2021             continue;
2022           }
2023         }
2024       }
2025 
2026       // update nPriorities and all arrays
2027 
2028       bsTemp.clearAll(); // track number of priorities
2029       CIPAtom[] newAtoms = new CIPAtom[4];
2030       for (int i = 0; i < 4; i++) {
2031         int pt = indices[i];
2032         CIPAtom a = newAtoms[pt] = atoms[i];
2033         int p = newPriorities[i];
2034         if (a.atom != null)
2035           bsTemp.set(p);
2036         a.priority = priorities[pt] = p;
2037       }
2038       atoms = newAtoms;
2039       nPriorities = bsTemp.cardinality();
2040       if (Logger.debuggingHigh && atoms[2].atom != null && atoms[2].elemNo != 1) { // Logger
2041         Logger.info(dots() + atom + " nPriorities = " + nPriorities);
2042         for (int i = 0; i < 4; i++) { // Logger
2043           Logger.info(dots() + myPath + "[" + i + "]=" + atoms[i] + " "
2044               + priorities[i] + " " + Integer.toHexString(priorities[i])); // Logger
2045         }
2046         Logger.info(dots() + "-------" + nPriorities);
2047       }
2048 
2049       // We are done if the number of priorities equals the bond count.
2050 
2051       return (nPriorities == bondCount);
2052     }
2053 
2054     /**
2055      * Provide an indent for clarity in debugging messages
2056      *
2057      * @return a string of dots based on the value of atom.sphere.
2058      */
dots()2059     private String dots() {
2060       return ".....................".substring(0, Math.min(20, sphere)); // Logger
2061     }
2062 
2063     /**
2064      * Break a tie at any level in the iteration between to atoms that otherwise
2065      * are the same by sorting their substituents.
2066      *
2067      * @param b
2068      * @param sphere
2069      *        current working sphere
2070      * @return [0 (TIED), -1 (A_WINS), or 1 (B_WINS)] * (sphere where broken)
2071      */
breakTie(CIPAtom b, int sphere)2072     private int breakTie(CIPAtom b, int sphere) {
2073 
2074       int finalScore = TIED;
2075 
2076       while (true) {
2077         //System.out.println("breaking tie for " + this + " " + b);
2078 
2079         // Phase I: Quick check of this atom itself
2080 
2081         // return TIED if:
2082 
2083         // a) this is a duplicate, and we are done with Rule 1b
2084         // b) two duplicates are of the same node (atom and root distance)
2085         // c) one or the other can't be set (because it has too many connections), or
2086         // d) both are terminal or both are duplicates (no atoms to check)
2087 
2088         if (isDuplicate
2089             && (currentRule > RULE_1b || b.isDuplicate && atom == b.atom
2090                 && rootDistance == b.rootDistance)
2091                 || !setNode() || !b.setNode()
2092                 || isTerminal && b.isTerminal
2093                 || isDuplicate && b.isDuplicate)
2094           break;
2095 
2096 
2097         // We are done if one of these is terminal
2098         // (for the next sphere, unless one is a duplicate -- Custer Rule 1b "duplicate > nonduplicate").
2099 
2100         if (isTerminal != b.isTerminal) {
2101           finalScore = (isTerminal ? B_WINS : A_WINS)
2102               * (sphere + (b.isDuplicate || isDuplicate ? 0 : 1)); // COUNT_LINE
2103           if (doTrack)
2104             data.track(CIPChirality.this, this, b, sphere, finalScore, true);
2105           break;
2106         }
2107         // Do a duplicate check.
2108 
2109         // If this is not a TIE, we do not have to go any further.
2110 
2111         // NOTE THAT THIS CHECK IS NOT EXPLICIT IN THE RULES
2112         // BECAUSE DUPLICATES LOSE IN THE NEXT SPHERE, NOT THIS ONE.
2113         // THE NEED FOR (sphere+1) in AY236.53, 163, 173, 192 SHOWS THAT
2114         // SUBRULE 1a MUST BE COMPLETED EXHAUSTIVELY PRIOR TO SUBRULE 1b.
2115         //
2116         // Note that this check must return "N+1", because that is where the actual difference is.
2117         // This nuance is not clear from the "simplified" digraphs found in Chapter 9.
2118         //
2119         // Say we have {O (O) C} and {O O H}
2120         //
2121         // The rules require that we first only look at just the atoms, so OOC beats OOH in this sphere,
2122         // but there are no atoms to check on (O), so we can do the check here to save time, reporting back
2123         // to breatTie that we found a difference, but not in this sphere.
2124         // This allows C to still beat H in this sphere, but had it been {O (O} C} and {O O C}, then
2125         // we would be done.
2126 
2127         int score = (currentRule > RULE_1b ? TIED : unlikeDuplicates(b));
2128         if (score != TIED) {
2129           finalScore = score * (sphere + 1); // COUNT_LINE
2130           if (doTrack)
2131             data.track(CIPChirality.this, this, b, sphere, finalScore, false);
2132           break;
2133         }
2134         // Phase II -- shallow check only
2135         //
2136         // Compare only in the current substitutent sphere.
2137         //
2138         // Check to see if any of the three connections to a and b are
2139         // different themselves, without any deeper check.
2140         //
2141         // This requires that both a annd b have their ligands sorted
2142         // at least in a preliminary fashion, using Array.sort() and compareTo()
2143         // for Rules 1a, 1b, and 2.
2144         // But if we do not do a presort, then we have to do those first.
2145         // Doing the presort saves considerably on run time.
2146 
2147         for (int i = 0; i < nAtoms; i++)
2148           if ((score = atoms[i].checkCurrentRule(b.atoms[i])) != TIED) {
2149             finalScore = score * (sphere + 1); // COUNT_LINE
2150             if (doTrack)
2151               data.track(CIPChirality.this, atoms[i], b.atoms[i], sphere, finalScore, false);
2152             break;
2153           }
2154 
2155         if (finalScore != TIED) {
2156           break;
2157         }
2158         // Time to do a full sort of eash ligand, including breaking ties
2159 
2160         sortSubstituents(sphere);
2161         b.sortSubstituents(sphere);
2162 
2163         // Phase III -- check deeply using re-entrant call to breakTie
2164         //
2165         // Now iteratively deep-sort each list based on substituents
2166         // and then check them one by one to see if the tie can be broken.
2167 
2168         // Note that if not catching duplicates early, we must check for
2169         // nAtoms == 0 and set finalScore in that case to B_WINS * (sphere + 1)
2170         // but we are checking for duplicates early in this implementation.
2171 
2172         for (int i = 0, abs, absScore = Integer.MAX_VALUE; i < nAtoms; i++) {
2173           if ((score = atoms[i].breakTie(b.atoms[i], sphere + 1)) != TIED
2174               && (abs = Math.abs(score)) < absScore) {
2175             absScore = abs;
2176             finalScore = score;
2177           }
2178         }
2179         break;
2180       }
2181       return finalScore;
2182     }
2183 
2184     ///// atom comparison methods //////
2185 
2186     /**
2187      * Used in Array.sort when an atom is set and Collection.sort when
2188      * determining the Mata like/unlike sequence for Rules 4b and 5. Includes a
2189      * preliminary check for duplicates, since we know that that atom will
2190      * ultimately be lower priority if all other rules are tied.
2191      *      *
2192      * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS)
2193      */
2194     @Override
compareTo(CIPAtom b)2195     public int compareTo(CIPAtom b) {
2196       int score;
2197       return (root.rule4Ref == NO_CHIRALITY ?
2198          // standard constitutional presort - Rules 1a, 1b, and 2
2199           (b == null ? A_WINS
2200               : (atom == null) != (b.atom == null) ? (atom == null ? B_WINS
2201                   : A_WINS) : (score = compareRule1a(b)) != TIED ? score
2202                   : (score = unlikeDuplicates(b)) != TIED ? score
2203                       : isDuplicate ? compareRule1b(b) : compareRule2(b))
2204          :
2205          // Rule 4b/5 Mata list referenced sort -- first by sphere, then a
2206          // simple string sort
2207            sphere < b.sphere ? -1 : sphere > b.sphere ? 1 : chiralPath
2208               .compareTo(b.chiralPath));
2209     }
2210 
2211     /**
2212      * Check this atom "A" vs a challenger "B" against the current rule.
2213      *
2214      * Note that example BB 2013 page 1258, P93.5.2.3 requires that RULE_1b be
2215      * applied only after RULE_1a has been applied exhaustively for a sphere;
2216      * otherwise C1 is R, not S (AY-236.53).
2217      *
2218      * @param b
2219      * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS), or Intege.MIN_VALUE
2220      *         (IGNORE)
2221      */
checkCurrentRule(CIPAtom b)2222     private int checkCurrentRule(CIPAtom b) {
2223       switch (currentRule) {
2224       default:
2225       case RULE_1a:
2226         return compareRule1a(b);
2227       case RULE_1b:
2228         return compareRule1b(b);
2229       case RULE_2:
2230         return compareRule2(b);
2231       case RULE_3:
2232         return compareRule3(b); // can be IGNORE
2233       case RULE_4a:
2234         return compareRules4ac(b, " sr SR PM");
2235       case RULE_4b:
2236       case RULE_5:
2237         // can be terminal when we are checking the two groups on an alkene end
2238         return (isTerminal || b.isTerminal ? TIED : compareRule4b5(b));
2239       case RULE_4c:
2240         return compareRules4ac(b, " s r p m");
2241       case RULE_6:
2242         return compareRule6(b);
2243       }
2244     }
2245 
2246     /**
2247      * This check is not technically one of those listed in the rules, but it is
2248      * useful when preparing to check substituents because if one of the atoms
2249      * has substituents and the other doesn't, we are done -- there is no reason
2250      * to check substituents.
2251      *
2252      *
2253      * @param b
2254      * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS)
2255      */
unlikeDuplicates(CIPAtom b)2256     private int unlikeDuplicates(CIPAtom b) {
2257       return b.isDuplicate == isDuplicate ? TIED : isDuplicate ? B_WINS : A_WINS;
2258     }
2259 
2260     /**
2261      * Looking for phantom atom (atom number 0) or element number
2262      *
2263      * @param b
2264      * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS)
2265      */
compareRule1a(CIPAtom b)2266     private int compareRule1a(CIPAtom b) {
2267       return b.atom == null ? A_WINS
2268           : atom == null ? B_WINS
2269           : b.elemNo < elemNo ? A_WINS
2270               : b.elemNo > elemNo ? B_WINS
2271                   : TIED;
2272     }
2273 
2274     /**
2275      * Looking for root distance -- duplicate atoms only.
2276      * (We have checked for
2277      *
2278      * @param b
2279      *
2280      *
2281      * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS)
2282      */
2283 
compareRule1b(CIPAtom b)2284     private int compareRule1b(CIPAtom b) {
2285       return Integer.compare(rootDistance, b.rootDistance);
2286     }
2287 
2288     /**
2289      * Chapter 9 Rule 2. atomic mass, with possible reversal due to use of mass
2290      * numbers.
2291      *
2292      * Also checks for temporary Rule 6 promotion for full Rule 6 implemenation
2293      * (rr bicyclo[2.2.2]octane)
2294      *
2295      * @param b
2296      * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS)
2297      */
compareRule2(CIPAtom b)2298     private int compareRule2(CIPAtom b) {
2299       return (atomIndex == b.atomIndex? TIED : getMass() > b.getMass() ? A_WINS
2300           : mass < b.mass ? B_WINS
2301               // full Rule 6 priority check is done here
2302               : root.rule6refIndex < 0 ? TIED
2303               : !root.bsRule6Subs.get(atomIndex)
2304               || !root.bsRule6Subs.get(b.atomIndex) ?
2305                   TIED
2306               : root.rule6refIndex == atomIndex ? A_WINS : root.rule6refIndex == b.atomIndex ? B_WINS : TIED);
2307     }
2308 
2309     /**
2310      * Chapter 9 Rule 3. E/Z.
2311      *
2312      * We carry out this step only as a tie in the sphere AFTER the final atom
2313      * of the alkene or even-cumulene.
2314      *
2315      * If the this atom and the comparison atom b are on the same parent, then
2316      * this is simply a request to break their tie based on Rule 3; otherwise
2317      * two paths are being checked for one being seqCis and one being seqTrans.
2318      *
2319      * @param b
2320      * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS)
2321      */
compareRule3(CIPAtom b)2322     private int compareRule3(CIPAtom b) {
2323       return isDuplicate || b.isDuplicate || !parent.isAlkeneAtom2
2324           || !b.parent.isAlkeneAtom2 || !parent.alkeneParent.isEvenEne
2325           || !b.parent.alkeneParent.isEvenEne || parent == b.parent ? TIED
2326           : Integer.compare(parent.auxEZ, b.parent.auxEZ);
2327     }
2328 
2329     /**
2330      * Chapter 9 Rules 4a and 4c. This method allows for "RS" to be checked as
2331      * "either R or S". See AY236.66, AY236.67,
2332      * AY236.147,148,156,170,171,201,202, etc. (4a)
2333      *
2334      * @param b
2335      * @param test
2336      *        String to test against; depends upon subrule being checked
2337      * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS)
2338      */
compareRules4ac(CIPAtom b, String test)2339     private int compareRules4ac(CIPAtom b, String test) {
2340       if (isTerminal || isDuplicate)
2341         return TIED;
2342       int isRa = test.indexOf(auxChirality), isRb = test
2343           .indexOf(b.auxChirality);
2344       return (isRa > isRb + 1 ? A_WINS : isRb > isRa + 1 ? B_WINS : TIED);
2345     }
2346 
2347     /**
2348      * Compare the better R-ref or S-ref list for A with the same for B.
2349      *
2350      * @param b
2351      * @return A_WINS, B_WINS, or IGNORE
2352      */
compareRule4b5(CIPAtom b)2353     private int compareRule4b5(CIPAtom b) {
2354       BS bsA = getBetter4bList();
2355       BS bsB = b.getBetter4bList();
2356       BS best = compareLikeUnlike(bsA, bsB);
2357       int score = (best == null ? IGNORE : best == bsA ? A_WINS : B_WINS);
2358       if (best != null) {
2359         if (currentRule == RULE_5) {
2360           // this is our check for self-enantiomorphic ligands
2361           // if the two ligands are each self-enatiomorphic (first chiral sphere r,s only, no R,S),
2362           // then we need to switch to R/S mode.
2363           // This is tested as (A(R) > B(R)) == (A(S) > B(S))
2364           if ((compareLikeUnlike(listRS[STEREO_S], b.listRS[STEREO_S]) == listRS[STEREO_S])
2365               == (best == bsA))
2366           parent.isRule5Pseudo = !parent.isRule5Pseudo;
2367         }
2368         if (doTrack)
2369           data.track(CIPChirality.this, this, b, 1, score, false);
2370       }
2371       return score;
2372     }
2373 
2374     /**
2375      * Just look for match with the Rule 6 reference atom index
2376      *
2377      * @param b
2378      * @return A_WINS, B_WINS, or TIED
2379      */
compareRule6(CIPAtom b)2380     private int compareRule6(CIPAtom b) {
2381       return ((atomIndex == root.rule6refIndex) == (b.atomIndex == root.rule6refIndex) ? TIED
2382           : atomIndex == root.rule6refIndex ? A_WINS : B_WINS);
2383     }
2384 
2385     ////// Rule 4b, 5 processsing //////
2386 
2387     /**
2388      * Clear Rule 4b information if Rule-5 pseudochiral centers have been found,
2389      * as that could change the order of descriptors in the Mata list.
2390      */
clearRule4Lists()2391     void clearRule4Lists() {
2392       listRS = null;
2393       for (int i = 0; i < 4 && atoms[i] != null; i++)
2394         atoms[i].clearRule4Lists();
2395     }
2396 
2397     /**
2398      * Create Mata-style linear atom.listRS and return the better of R-ref or S-ref.
2399      * Used first in Rule 4b and again in Rule 5.
2400      * @return the better 4b list
2401      */
getBetter4bList()2402     private BS getBetter4bList() {
2403       if (listRS != null)
2404         return listRS[currentRule == RULE_5 ? STEREO_R : 0];
2405       BS bs;
2406       listRS = new BS[] { null, bs = rank4bAndRead(null), rank4bAndRead(bs)};
2407       Logger.info("getBest " + currentRule + " " + this + " " + listRS[STEREO_R] + listRS[STEREO_S] + " " + myPath);
2408       bs = compareLikeUnlike(listRS[STEREO_R], listRS[STEREO_S]);
2409       return listRS[0] = (currentRule == RULE_5 || bs == null ? listRS[STEREO_R] : bs);
2410     }
2411 
2412     /**
2413      * A thread-based sphere-ordered implementation that
2414      * takes into account that lists cross the boundaries of branches.
2415      *
2416      * @param bsR
2417      *        null if this is for R or the R-ref list if this is for S
2418      * @return ranked list
2419      */
rank4bAndRead(BS bsR)2420     private BS rank4bAndRead(BS bsR) {
2421       boolean isS = (bsR != null);
2422       int ref = (isS ? STEREO_S : STEREO_R);
2423       BS list = new BS();
2424       Lst<CIPAtom> chiralAtoms = new Lst<CIPAtom>();
2425       root.rule4Ref = ref;
2426       addChiralAtoms(chiralAtoms, ref);
2427       Collections.sort(chiralAtoms);
2428       root.rule4Ref = NO_CHIRALITY;
2429       for (int i = 0, n = chiralAtoms.size(); i < n; i++) {
2430         if (Logger.debugging)
2431           Logger.info("" + ref + " " + this + " " + chiralAtoms.get(i).chiralPath);
2432         if (chiralAtoms.get(i).rule4Type == ref)
2433           list.set(i);
2434       }
2435       return list;
2436     }
2437 
2438     /**
2439      * Create an ASCII string that allows the list of descriptors to be
2440      * generated in order. This list must take into account both the
2441      * current (Rule 4a or Rule 4c) priority group (0 highest to 3 lowest)
2442      * of each point along the path back to the root for this node.
2443      * This is done by multiplying the priority group by 4 and adding in
2444      * the like- unlike-ness of the node as one of 0 (ns), 1 (like), or 2 (unlike).
2445      *
2446      * In this way, we can get cross-branch sorting, not just the typical
2447      * breadth-first sorting that we usually use for ranking. For example,
2448      * in BH_64_085, two branches of a ligand each have two branches that
2449      * have identical Rule 4a priorities, leading to four branches that must be
2450      * sorted as the "highest-priority" set in Rule 4b.
2451      *
2452      * @param chiralAtoms
2453      * @param ref
2454      */
addChiralAtoms(Lst<CIPAtom> chiralAtoms, int ref)2455     private void addChiralAtoms(Lst<CIPAtom> chiralAtoms, int ref) {
2456       // encodings:
2457       //
2458       // @: 0-priority ns
2459       // A: 0-priority LIKE
2460       // B: 0-priority UNLIKE
2461       // C: n/a
2462       // D: 1-priority ns
2463       // E: 1-priority LIKE
2464       // F: 1-priority UNLIKE
2465       // G: n/a
2466       // H: 2-priority ns
2467       // I: 2-priority LIKE
2468       // J: 2-priority UNLIKE
2469       // K: n/a
2470       // L: 3-priority ns
2471       // M: 3-priority LIKE
2472       // N: 3-priority UNLIKE
2473 
2474       // For example, in the processing of BH64_073 C28, we get for ligand C89
2475       // in Rule 4b:
2476       // R-ref:
2477       //      1 C89 @DA
2478       //      1 C89 @DADA
2479       //      1 C89 @DADB
2480       //      1 C89 @DADADA
2481       //      1 C89 @DADADB
2482       //      1 C89 @DADBDA
2483       //      1 C89 @DADBDB
2484       // S-ref:
2485       //      2 C89 @DB
2486       //      2 C89 @DBDA
2487       //      2 C89 @DBDB
2488       //      2 C89 @DBDADA
2489       //      2 C89 @DBDADB
2490       //      2 C89 @DBDBDA
2491       //      2 C89 @DBDBDB
2492       //
2493       // resulting R- and S-lists, from last descriptor only:
2494       // (R)llululu and (S)ulululu
2495       //
2496       // and in BH64_085, we get different results for Rule 4b than for Rule 5:
2497       //
2498       // Rule 4b:
2499       // R-ref:
2500       //      1 N17 @D@@A
2501       //      1 N17 @D@@A
2502       //      1 N17 @D@@B
2503       //      1 N17 @D@@B
2504       //      1 N17 @D@@AE
2505       //      1 N17 @D@@AF
2506       //      1 N17 @D@@BE
2507       //      1 N17 @D@@BF
2508       //
2509       // giving AABBEFEF, or lluululu
2510       //
2511       // Rule 5, after a Rule 4c sort:
2512       // R-ref:
2513       //      1 N17 @D@@A
2514       //      1 N17 @D@@B
2515       //      1 N17 @DD@A
2516       //      1 N17 @DD@B
2517       //      1 N17 @D@@AE
2518       //      1 N17 @D@@BF
2519       //      1 N17 @DD@AF
2520       //      1 N17 @DD@BE
2521       //
2522       // giving ABABEFFE or lululuul
2523 
2524       if (atom == null || isTerminal || isDuplicate)
2525         return;
2526       if (rule4Type != 0) {
2527         String s = "";
2528         CIPAtom a = this;
2529         while (a != null) {
2530           s = (char)(64 + (a.priority<<2) + (a.rule4Type == 0 ? 0 : a.rule4Type == ref ? 1 : 2)) + s;
2531           if ((a = a.parent) != null && a.chiralPath != null) {
2532             s = a.chiralPath + s;
2533             break;
2534           }
2535         }
2536         chiralPath = s;
2537         chiralAtoms.addLast(this);
2538       }
2539       for (int i = 0; i < 4; i++)
2540         if (atoms[i] != null)
2541           atoms[i].addChiralAtoms(chiralAtoms, ref);
2542     }
2543 
compareLikeUnlike(BS bsA, BS bsB)2544     private BS compareLikeUnlike(BS bsA, BS bsB) {
2545       BS bsXOR = (BS) bsB.clone();
2546       // A   = 1101111   // llullll
2547       // B   = 1100111   // lluulll
2548       // xor = 0001000
2549       bsXOR.xor(bsA);
2550       int l = bsXOR.nextSetBit(0);
2551       return (l < 0 ? null : bsA.get(l) ? bsA : bsB);
2552     }
2553 
2554     ///// auxiliary processing
2555 
2556     /**
2557      * By far the most complex of the methods, this method creates a list of
2558      * downstream (higher-sphere) auxiliary chirality designators, starting with
2559      * those furthest from the root and moving in, toward the root.
2560      *
2561      * @param node1
2562      *        first node; sphere 1
2563      * @param ret
2564      *        CIPAtom of next stereochemical branching point
2565      *
2566      * @return collective string, with setting of rule4List
2567      */
createAuxiliaryDescriptors(CIPAtom node1, CIPAtom[] ret)2568     boolean createAuxiliaryDescriptors(CIPAtom node1, CIPAtom[] ret) {
2569       boolean isChiralPath = false;
2570       char c = '~';
2571       if (atom == null)
2572         return false;
2573       setNode();
2574       int rs = -1, nRS = 0;
2575       CIPAtom[] ret1 = new CIPAtom[1];
2576       boolean skipRules4And5 = false;
2577       boolean prevIsChiral = true;
2578       // have to allow two same because could be a C3-symmetric subunit
2579       boolean allowTwoSame = (!isAlkene && nPriorities <= (node1 == null ? 2
2580           : 1));
2581       for (int i = 0; i < 4; i++) {
2582         CIPAtom a = atoms[i];
2583         if (a != null && !a.isDuplicate && !a.isTerminal) {
2584           // we use ret1 to pass a reference to the next branch with two or more chiral paths
2585           ret1[0] = null;
2586           boolean aIsChiralPath = a.createAuxiliaryDescriptors(
2587               node1 == null ? a : node1, ret1);
2588           if (ret1[0] != null && ret != null)
2589             ret[0] = nextChiralBranch = a.nextChiralBranch;
2590           if (a.nextChiralBranch != null || aIsChiralPath) {
2591             nRS++;
2592             isChiralPath = aIsChiralPath;
2593             prevIsChiral = true;
2594           } else {
2595             if (!allowTwoSame && !prevIsChiral
2596                 && priorities[i] == priorities[i - 1]) {
2597               return false;
2598             }
2599             prevIsChiral = false;
2600           }
2601         }
2602       }
2603       boolean isBranch = (nRS >= 2);
2604       switch (nRS) {
2605       case 0:
2606         isChiralPath = false;
2607         //$FALL-THROUGH$
2608       case 1:
2609         skipRules4And5 = true;
2610         break;
2611       case 2:
2612       case 3:
2613       case 4:
2614         isChiralPath = false;
2615         if (ret != null)
2616           ret[0] = nextChiralBranch = this;
2617         break;
2618       }
2619       if (isAlkene) {
2620         if (alkeneChild != null) {
2621           // must be alkeneParent -- first C of an alkene -- this is where C/T is recorded
2622           // All odd cumulenes need to be checked.
2623           // If it is an alkene or even cumulene, we must do an auxiliary check
2624           // only if it is not already a defined stereochemistry, because in that
2625           // case we have a simple E/Z (c/t), and there is no need to check AND
2626           // it does not contribute to the Mata sequence (similar to r/s or m/p).
2627           //
2628 
2629           if (!isEvenEne || (auxEZ == STEREO_BOTH_EZ || auxEZ == UNDETERMINED)
2630               && !isKekuleAmbiguous && alkeneChild.bondCount >= 2) {
2631             int[] rule2 = (isEvenEne ? new int[1] : null);
2632             rs = getAuxEneWinnerChirality(this, alkeneChild, !isEvenEne, rule2);
2633 
2634             //
2635             // Note that we can have C/T (rule4Type = R/S):
2636             //
2637             //    R      x
2638             //     \    /
2639             //       ==
2640             //     /    \
2641             //    S      root
2642             //
2643             // flips sense upon planar inversion; determination was Rule 5.
2644             //
2645             // and ALSO we can have c/t here that has not been discovered yet
2646             //
2647             //
2648             //   SR      x
2649             //     \    /
2650             //       ==
2651             //     /    \
2652             //   SS      root
2653 
2654             if (rs == NO_CHIRALITY) {
2655               auxEZ = alkeneChild.auxEZ = STEREO_BOTH_EZ;
2656             } else {
2657               isChiralPath = true;
2658               if (rule2 != null && rule2[0] != RULE_5) {
2659                 // normal even-ene seqcis/seqtrans
2660                 auxEZ = alkeneChild.auxEZ = rs;
2661                 if (Logger.debuggingHigh)
2662                   Logger.info("alkene type " + this + " " + (auxEZ == STEREO_E ? "E" : "Z"));
2663               } else if (!isBranch) {
2664                 // Normalize M/P and Z/E to R/S
2665                 switch (rs) {
2666                 case STEREO_M:
2667                 case STEREO_Z:
2668                   rs = STEREO_R;
2669                   c = 'R';
2670                   isChiralPath = true;
2671                   break;
2672                 case STEREO_P:
2673                 case STEREO_E:
2674                   rs = STEREO_S;
2675                   c = 'S';
2676                   isChiralPath = true;
2677                   break;
2678                 }
2679                 auxChirality = c;
2680                 rule4Type = rs;
2681               }
2682             }
2683           }
2684         }
2685       } else if (isSP3 && ret != null) {
2686         // if here, adj is TIED (0) or NOT_RELEVANT
2687         CIPAtom atom1 = (CIPAtom) clone();
2688         if (atom1.setNode()) {
2689           atom1.addReturnPath(null, this);
2690           int rule = RULE_1a;
2691           for (; rule <= RULE_6; rule++)
2692             // two C3 groups....
2693             if ((!skipRules4And5 || rule < RULE_4a || rule > RULE_5)
2694                 && atom1.auxSort(rule))
2695               break;
2696           if (rule > RULE_6) {
2697             c = '~';
2698           } else {
2699             rs = data.checkHandedness(atom1);
2700             isChiralPath |= (rs != NO_CHIRALITY);
2701             c = (rs == STEREO_R ? 'R' : rs == STEREO_S ? 'S' : '~');
2702             if (rule == RULE_5) {
2703               c = (c == 'R' ? 'r' : c == 'S' ? 's' : '~');
2704               if (rs != NO_CHIRALITY)
2705                 havePseudoAuxiliary = true;
2706             } else {
2707               rule4Type = rs;
2708             }
2709           }
2710         }
2711         auxChirality = c;
2712       }
2713       if (node1 == null)
2714         bsNeedRule.setBitTo(RULE_4a, nRS > 0);
2715       if (c != '~') {
2716         Logger.info("creating aux " + c + " for " + this + (myPath.length() == 0 ? "" : " = " + myPath));
2717       }
2718       return (this.isChiralPath = isChiralPath);
2719     }
2720 
2721     /**
2722      * Sort by a given rule, preserving currentRule, which could be 4 or 5
2723      *
2724      * @param rule
2725      * @return true if a decision has been made
2726      */
auxSort(int rule)2727     private boolean auxSort(int rule) {
2728       int current = currentRule;
2729       currentRule = rule;
2730       int rule6ref = root.rule6refIndex;
2731       int nDup = root.nRootDuplicates;
2732       boolean isChiral = (rule == RULE_6 ?
2733         getRule6Descriptor(true) != NO_CHIRALITY : sortSubstituents(0));
2734       root.nRootDuplicates = nDup;
2735       root.rule6refIndex = rule6ref;
2736       currentRule = current;
2737       return isChiral;
2738     }
2739 
2740     /**
2741      * Determine the winner on one end of an alkene or cumulene and return also
2742      * the rule by which that was determined.
2743      *
2744      * @param end1
2745      * @param end2
2746      * @param isAxial
2747      * @param retRule2
2748      *        return for rule found for child end (furthest from root)
2749      * @return one of: {NO_CHIRALITY | STEREO_Z | STEREO_E | STEREO_M |
2750      *         STEREO_P}
2751      */
getAuxEneWinnerChirality(CIPAtom end1, CIPAtom end2, boolean isAxial, int[] retRule2)2752     private int getAuxEneWinnerChirality(CIPAtom end1, CIPAtom end2,
2753                                          boolean isAxial, int[] retRule2) {
2754       if (isAxial && end1.nextSP2 == end2)
2755         return  NO_CHIRALITY; // allene terminating on root
2756       CIPAtom winner1 = getAuxEneEndWinner(end1, end1.nextSP2, null);
2757       CIPAtom winner2 = (winner1 == null || winner1.atom == null ? null
2758           : getAuxEneEndWinner(end2, end2.nextSP2, retRule2));
2759       if (Logger.debuggingHigh)
2760         Logger.info(this + " alkene end winners " + winner1 + winner2);
2761       return getEneChirality(winner1, end1, end2, winner2, isAxial, false);
2762     }
2763 
2764     /**
2765      * Get the atom that is the highest priority of two atoms on the end of a
2766      * double bond after sorting from Rule 1a through a given rule (Rule 3 or
2767      * Rule 5)
2768      *
2769      * @param end
2770      * @param prevSP2
2771      * @param retRule
2772      *        return for deciding rule
2773      * @return higher-priority atom, or null if they are equivalent
2774      */
getAuxEneEndWinner(CIPAtom end, CIPAtom prevSP2, int[] retRule)2775     private CIPAtom getAuxEneEndWinner(CIPAtom end, CIPAtom prevSP2,
2776                                        int[] retRule) {
2777       CIPAtom atom1 = (CIPAtom) end.clone();
2778       if (atom1.parent != prevSP2)
2779         atom1.addReturnPath(prevSP2, end);
2780       CIPAtom a;
2781       for (int rule = RULE_1a; rule <= RULE_6; rule++) {
2782         if (atom1.auxSort(rule)) {
2783           for (int i = 0; i < 4; i++) {
2784             a = atom1.atoms[i];
2785             if (!a.multipleBondDuplicate) {
2786               if (atom1.priorities[i] != atom1.priorities[i + 1]) {
2787                 if (retRule != null)
2788                   retRule[0] = rule;
2789                 return (a.atom == null ? null : a);
2790               }
2791             }
2792           }
2793         }
2794       }
2795       return null;
2796     }
2797 
2798     /**
2799      * Add the path back to the root for an auxiliary center.
2800      *
2801      * @param newParent
2802      * @param fromAtom
2803      */
addReturnPath(CIPAtom newParent, CIPAtom fromAtom)2804     private void addReturnPath(CIPAtom newParent, CIPAtom fromAtom) {
2805       Lst<CIPAtom> path = new Lst<CIPAtom>();
2806       CIPAtom thisAtom = this, newSub, oldParent = fromAtom, oldSub = newParent;
2807       // create path back to root
2808       while (oldParent.parent != null && oldParent.parent.atoms[0] != null) { // COUNT_LINE
2809         if (Logger.debuggingHigh)
2810           Logger.info("path:" + oldParent.parent + "->" + oldParent);
2811         path.addLast(oldParent = oldParent.parent);
2812       }
2813       path.addLast(null);
2814       for (int i = 0, n = path.size(); i < n; i++) {
2815         oldParent = path.get(i);
2816         newSub = (oldParent == null ? new CIPAtom().create(null, this,
2817             isAlkene, true, false) : (CIPAtom) oldParent.clone());
2818         newSub.nPriorities = 0;
2819         newSub.sphere = thisAtom.sphere + 1;
2820         thisAtom.replaceParentSubstituent(oldSub, newParent, newSub);
2821         if (i > 0 && thisAtom.isAlkene && !thisAtom.isAlkeneAtom2) {
2822           // reverse senses of alkenes
2823           if (newParent.isAlkeneAtom2) {
2824             newParent.isAlkeneAtom2 = false;
2825             thisAtom.alkeneParent = newParent;
2826           }
2827           thisAtom.setEne();
2828         }
2829         newParent = thisAtom;
2830         thisAtom = newSub;
2831         oldSub = fromAtom;
2832         fromAtom = oldParent;
2833       }
2834     }
2835 
2836     /**
2837      * Swap a substituent and the parent in preparation for reverse traversal of
2838      * this path back to the root atom.
2839      *
2840      * @param oldSub
2841      * @param newParent
2842      * @param newSub
2843      */
replaceParentSubstituent(CIPAtom oldSub, CIPAtom newParent, CIPAtom newSub)2844     private void replaceParentSubstituent(CIPAtom oldSub, CIPAtom newParent,
2845                                           CIPAtom newSub) {
2846       for (int i = 0; i < 4; i++)
2847         if (atoms[i] == oldSub || newParent == null && atoms[i].atom == null) {
2848           if (Logger.debuggingHigh)
2849             Logger.info("reversed: " + newParent + "->" + this + "->" + newSub);
2850           parent = newParent;
2851           atoms[i] = newSub;
2852           Arrays.sort(atoms);
2853           break;
2854         }
2855     }
2856 
2857     ///// general utility methods
2858 
2859     /**
2860      * Just a simple signum for integers
2861      *
2862      * @param score
2863      * @return 0, -1, or 1
2864      */
sign(int score)2865     private int sign(int score) {
2866       return (score < 0 ? -1 : score > 0 ? 1 : 0);
2867     }
2868 
2869     @Override
clone()2870     public Object clone() {
2871       CIPAtom a = null;
2872       try {
2873         a = (CIPAtom) super.clone();
2874       } catch (CloneNotSupportedException e) {
2875       }
2876       a.id = ptIDLogger++;
2877       a.atoms = new CIPAtom[4];
2878       for (int i = 0; i < 4; i++)
2879         a.atoms[i] = atoms[i];
2880       a.priorities = new int[4];
2881       a.htPathPoints = htPathPoints;
2882       a.alkeneParent = null;
2883       a.auxEZ = UNDETERMINED;
2884       a.rule4Type = NO_CHIRALITY;
2885       a.listRS = null;
2886       if (Logger.debuggingHigh)
2887         a.myPath = a.toString();
2888       return a;
2889     }
2890 
2891     @Override
toString()2892     public String toString() {
2893       if (atom == null)
2894         return "<null>";
2895       if (Logger.debuggingHigh)
2896         return ("[" + currentRule + "." + sphere
2897             + "," + id + "." + (isDuplicate ? parent.atom : atom).getAtomName()
2898             + (isDuplicate ? "*(" + rootDistance + ")" : "")
2899             + (auxChirality == '~' ? "" : "" + auxChirality) + " " + elemNo
2900             + "]");
2901       return (isDuplicate ? "(" + atom.getAtomName() + "."  + rootDistance + ")"
2902           : atom.getAtomName());
2903 
2904     }
2905   }
2906 
2907 }
2908