1 /* $RCSfile$ 2 * $Author$ 3 * $Date$ 4 * $Revision$ 5 * 6 * Copyright (C) 2017 The Jmol Development Team 7 * 8 * 9 * Contact: jmol-developers@lists.sf.net 10 * 11 * This library is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU Lesser General Public 13 * License as published by the Free Software Foundation; either 14 * version 2.1 of the License, or (at your option) any later version. 15 * 16 * This library is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Lesser General Public License for more details. 20 * 21 * You should have received a copy of the GNU Lesser General Public 22 * License along with this library; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 24 * 02110-1301, USA. 25 */ 26 27 package org.jmol.symmetry; 28 29 import java.util.Arrays; 30 import java.util.Collections; 31 import java.util.Hashtable; 32 import java.util.Map; 33 34 import javajs.util.BS; 35 import javajs.util.Lst; 36 import javajs.util.PT; 37 38 import org.jmol.util.Elements; 39 import org.jmol.util.Logger; 40 import org.jmol.util.SimpleEdge; 41 import org.jmol.util.SimpleNode; 42 import org.jmol.viewer.JC; 43 44 /** 45 * A fully validated relatively efficient implementation of Cahn-Ingold-Prelog 46 * rules for assigning R/S, M/P, and E/Z stereochemical descriptors. Based on 47 * IUPAC Blue Book rules of 2013 and assorted corrections. 48 * 49 * IUPAC Project: Corrections, Revisions and Extension for the Nomenclature of 50 * Organic Chemistry - IUPAC Recommendations and Preferred Names 2013 (the IUPAC 51 * Blue Book) 52 * https://iupac.org/projects/project-details/?project_nr=2001-043-1-800 53 * http://www.sbcs.qmul.ac.uk/iupac/bibliog/BBerrors.html 54 * 55 * Settable options: 56 * 57 * set testflag1 use advanced in/out-sensitive Rule 6 (r,r-bicyclo[2.2.2]octane) 58 * set testflag2 turn off tracking (saving of _M.CIPInfo) for speed 59 * 60 * Features include: 61 * 62 * - deeply validated 63 * 64 * - includes revised Rules 1b, and 2 65 * 66 * - includes a proposed Rule 6 67 * 68 * - implemented in Java (Jmol) and JavaScript (JSmol) 69 * 70 * - only a few Java classes; < 1000 lines 71 * 72 * - efficient, one-pass process for each center using a single finite digraph 73 * for all auxiliary descriptors 74 * 75 * - exhaustive processing of all 9 sequence rules (1a, 1b, 2, 3, 4a, 4b, 4c, 5, 76 * 6) 77 * 78 * - includes R/S, r/s, M/P (axial, not planar), E/Z 79 * 80 * - covers any-length odd and even cumulenes 81 * 82 * - uses Jmol conformational SMARTS to detect atropisomers and helicenes 83 * 84 * - covers chiral phosphorus and sulfur, including trigonal pyramidal and 85 * tetrahedral 86 * 87 * - properly treats complex combinations of R/S, M/P, and seqCis/seqTrans 88 * centers (Rule 4b) 89 * 90 * - properly treats neutral-species resonance structures using fractional 91 * atomic mass and a modified Rule 1b 92 * 93 * - implements CIP spiro rule (BB P-93.5.3.1) as part of Rule 6 94 * 95 * - detects small rings (fewer than 8 members) and removes E/Z specifications 96 * for such 97 * 98 * - detects chiral bridgehead nitrogens and E/Z imines and diazines 99 * 100 * - reports atom descriptor along with the rule that ultimately decided it 101 * 102 * - fills _M.CIPInfo with detailed information about how each ligand was decided 103 * (feature turned off by set testflag2) 104 * 105 * - generates advanced Rule 6 descriptors for cubane and the like. (Generally 'r') 106 * using set testflag1 107 * 108 * Primary 236-compound Chapter-9 validation set (AY-236) provided by Andrey 109 * Yerin, ACD/Labs (Moscow). 110 * 111 * Mikko Vainio also supplied a 64-compound testing suite (MV-64), which is 112 * available on SourceForge in the Jmol-datafiles directory. 113 * (https://sourceforge.net/p/jmol/code/HEAD/tree/trunk/Jmol-datafiles/cip). 114 * 115 * Additional test structures provided by John Mayfield. 116 * 117 * Additional thanks to the IUPAC Blue Book Revision project, specifically 118 * Karl-Heinz Hellwich for alerting me to the errata page for the 2013 IUPAC 119 * specs (http://www.chem.qmul.ac.uk/iupac/bibliog/BBerrors.html), Gerry Moss 120 * for discussions, Andrey Yerin for discussion and digraph checking. 121 * 122 * Many thanks to the members of the BlueObelisk-Discuss group, particularly 123 * Mikko Vainio, John Mayfield (aka John May), Wolf Ihlenfeldt, and Egon 124 * Willighagen, for encouragement, examples, serious skepticism, and extremely 125 * helpful advice. 126 * 127 * References: 128 * 129 * CIP(1966) R.S. Cahn, C. Ingold, V. Prelog, Specification of Molecular 130 * Chirality, Angew.Chem. Internat. Edit. 5, 385ff 131 * 132 * Custer(1986) Roland H. Custer, Mathematical Statements About the Revised 133 * CIP-System, MATCH, 21, 1986, 3-31 134 * http://match.pmf.kg.ac.rs/electronic_versions/Match21/match21_3-31.pdf 135 * 136 * Mata(1993) Paulina Mata, Ana M. Lobo, Chris Marshall, A.Peter Johnson The CIP 137 * sequence rules: Analysis and proposal for a revision, Tetrahedron: Asymmetry, 138 * Volume 4, Issue 4, April 1993, Pages 657-668 139 * 140 * Mata(1994) Paulina Mata, Ana M. Lobo, Chris Marshall, and A. Peter Johnson, 141 * Implementation of the Cahn-Ingold-Prelog System for Stereochemical Perception 142 * in the LHASA Program, J. Chem. Inf. Comput. Sci. 1994, 34, 491-504 491 143 * http://pubs.acs.org/doi/abs/10.1021/ci00019a004 144 * 145 * Mata(2005) Paulina Mata, Ana M. Lobo, The Cahn, Ingold and Prelog System: 146 * eliminating ambiguity in the comparison of diastereomorphic and 147 * enantiomorphic ligands, Tetrahedron: Asymmetry, Volume 16, Issue 13, 4 July 148 * 2005, Pages 2215-2223 149 * 150 * Favre(2013) Henri A Favre, Warren H Powell, Nomenclature of Organic Chemistry 151 * : IUPAC Recommendations and Preferred Names 2013 DOI:10.1039/9781849733069 152 * http://pubs.rsc.org/en/content/ebook/9780854041824#!divbookcontent 153 * 154 * code history: 155 * 156 * 5/12/18 Jmol 14.29.14 fixes minor Rule 5 bug and adds advanced Rule 6 in/out testflag1 option (857 lines) 157 * 158 * 5/1/18 Jmol 14.29.14 fixes enantiomorphic Rule 5 R/S check for BH64_85 and BH64_86 159 * 160 * 4/25/18 Jmol 14.29.14 fixes spiroallene Rule 6 issue for BH64_84 161 * 162 * 4/23/18 Jmol 14.29.14 fixes Rule 2 for JM_008, involving mass and duplicates (824 lines) 163 * 164 * 4/11/18 Jmol 14.29.13 adds optional CIPDataTracker class (822 lines) 165 * 166 * 4/2/18 Jmol 14.29.13 adds optional CIPDataSmiles class 167 * 168 * 4/2/18 Jmol 14.29.13 adds John's "mancude-like" cyclic conjugated ene Kekule 169 * averaging 170 * 171 * 12/10/17 Jmol 14.29.9 adds CIPData, mancude Kekule averaging 172 * 173 * 11/11/17 Jmol 14.25.1 adds "duplicate over terminal" in Rule 1b; streamlined 174 * (777 lines) 175 * 176 * 11/05/17 Jmol 14.24.1 fixes a problem with seqCis/seqTrans and also with Rule 177 * 2 (799 lines) 178 * 179 * 10/17/17 Jmol 14.20.10 adds S4 check in Rule 6 and also fixes bug in aux 180 * descriptors being skipped when two ligands are equivalent for the root (798 181 * lines) 182 * 183 * 9/19/17 CIPChirality code simplification (778 lines) 184 * 185 * 9/14/17 Jmol 14.20.6 switching to Mikko's idea for Rule 4b and 5. Abandons 186 * "thread" idea. Uses breadth-first algorithm for generating bitsets for R and 187 * S. Processing time reduced by 50%. Still could be optimized some. (820 lines) 188 * 189 * 7/25/17 Jmol 14.20.4 consolidates all ene determinations; moves auxiliary 190 * descriptor generation to prior to Rule 3 (850 lines) 7/23/17 Jmol 14.20.4 191 * adds Rule 6; rewrite/consolidate spiro, C3, double spiran code (853 lines) 192 * 193 * 7/19/17 Jmol 14.20.3 fixing Rule 2 (880 lines) 7/13/17 Jmol 14.20.3 more 194 * thorough spiro testing (858 lines) 7/10/17 Jmol 14.20.2 adding check for C3 195 * and double spiran (CIP 1966 #32 and #33) 7/8/17 Jmol 14.20.2 adding presort 196 * for Rules 4a and 4c (test12.mol; 828 lines) 197 * 198 * 7/7/17 Jmol 14.20.1 minor coding efficiencies (833 lines) 199 * 200 * 7/6/17 Jmol 14.20.1 major rewrite to correct and simplify logic; full 201 * validation for 433 structures (many duplicates) in AY236, BH64, MV64, MV116, 202 * JM, and L (836 lines) 203 * 204 * 6/30/17 Jmol 14.20.1 major rewrite of Rule 4b (999 lines) 205 * 206 * 6/25/17 Jmol 14.19.1 minor fixes for Rule 4b and 5 for BH64_012-015; better 207 * atropisomer check 208 * 209 * 6/12/2017 Jmol 14.18.2 tested for Rule 1b sphere (AY236.53, 163, 173, 192); 210 * 957 lines 211 * 212 * 6/8/2017 Jmol 14.18.2 removed unnecessary presort for Rule 1b 213 * 214 * 5/27/17 Jmol 14.17.2 fully interfaced using SimpleNode and SimpleEdge 215 * 216 * 5/27/17 Jmol 14.17.1 fully validated; simplified code; 978 lines 217 * 218 * 5/17/17 Jmol 14.16.1. adds helicene M/P chirality; 959 lines validated using 219 * CCDC structures HEXHEL02 HEXHEL03 HEXHEL04 ODAGOS ODAHAF 220 * http://pubs.rsc.org/en/content/articlehtml/2017/CP/C6CP07552E 221 * 222 * 5/14/17 Jmol 14.15.5. trimmed up and documented; no need for lone pairs; 948 223 * lines 224 * 225 * 5/13/17 Jmol 14.15.4. algorithm simplified; validated for mixed Rule 4b 226 * systems involving auxiliary R/S, M/P, and seqCis/seqTrans; 959 lines 227 * 228 * 5/06/17 validated for 236 compound set AY-236. 229 * 230 * 5/02/17 validated for 161 compounds, including M/P, m/p (axial chirality for 231 * biaryls and odd-number cumulenes) 232 * 233 * 4/29/17 validated for 160 compounds, including M/P, m/p (axial chirality for 234 * biaryls and odd-number cumulenes) 235 * 236 * 4/28/17 Validated for 146 compounds, including imines and diazines, sulfur, 237 * phosphorus 238 * 239 * 4/27/17 Rules 3-5 preliminary version 14.15.1 240 * 241 * 4/6/17 Introduced in Jmol 14.12.0; validated for Rules 1 and 2 in Jmol 242 * 14.13.2; 100 lines 243 * 244 * 245 * NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! NOTE! 246 * 247 * Added logic to Rule 1b: 248 * 249 * Rule 1b: In comparing duplicate atoms, the one with lower root distance has 250 * precedence, where root distance is defined as: (a) in the case of 251 * ring-closure duplicates, the sphere of the duplicated atom; and (b) in the 252 * case of multiple-bond duplicates, the sphere of the atom to which the 253 * duplicate atom is attached. 254 * 255 * Rationale: Using only the distance of the duplicated atom (current 256 * definition) introduces a Kekule bias, which can be illustrated with various 257 * simple models. By moving that distance to be the sphere of the parent atom of 258 * the duplicate, the problem is resolved. 259 * 260 * Added clarification to Rule 2: 261 * 262 * Rule 2: Higher mass precedes lower mass, where mass is defined in the case of 263 * nonduplicate atoms with identified isotopes for elements as their exact 264 * isotopic mass and, in all other cases, as their element's atomic weight. 265 * 266 * Rationale: BB is not self-consistent, including both "mass number" (in the 267 * rule) and "atomic mass" in the description, where "79Br < Br < 81Br". And 268 * again we have the same Kekule-ambiguous issue as in Rule 1b. The added 269 * clarification fixes the Kekule issue (not using isotope mass number for 270 * duplicate atoms), solves the problem that F < 19F (though 100% nat. 271 * abundance), and is easily programmable. 272 * 273 * In Jmol the logic is very simple, actually using the isotope mass number, but 274 * doing two checks: 275 * 276 * a) if one of four specific isotopes (16O, 52Cr, 96Mo, 175Lu), reverse the test, and 277 * 278 * b) if on the list of 100% natural isotopes or one of the non-natural 279 * elements, use the element's accepted atomic weight. 280 * 281 * See CIPAtom.getMass(); 282 * 283 * PROPOSED Rule 6: An undifferentiated reference node has priority over any 284 * other undifferentiated node. 285 * 286 * Rationale: This rule is stated in CIP(1966) p. 357. 287 * 288 * 289 * @author Bob Hanson hansonr@stolaf.edu 290 */ 291 public class CIPChirality { 292 293 //The rules: 294 // 295 // P-92.1.3.1 Sequence Rule 1 has two parts: 296 // 297 // (a) Higher atomic number precedes lower; 298 // (b) A duplicate atom node whose corresponding nonduplicated atom 299 // node is the root or is closer to the root ranks higher than a 300 // duplicate atom node whose corresponding nonduplicated node is 301 // farther from the root. 302 // 303 // P-92.1.3.2 Sequence Rule 2 304 // 305 // Higher atomic mass number precedes lower; 306 // 307 // P-92.1.3.3 Sequence Rule 3 308 // 309 // When considering double bonds and planar tetraligand atoms, 'seqcis' = 'Z' 310 // precedes 'seqtrans' = 'E' and this precedes nonstereogenic double bonds. 311 // 312 // P-92.1.3.4 Sequence Rule 4 is best considered in three parts: 313 // 314 // (a) Chiral stereogenic units precede pseudoasymmetric stereogenic units and 315 // these precede nonstereogenic units. 316 // (b) When two ligands have different descriptor pairs, then the one with the 317 // first chosen like descriptor pairs has priority over the one with a 318 // corresponding unlike descriptor pair. 319 // (i) Like descriptor pairs are: 'RR', 'SS', 'MM', 'PP', 'RM', 'SP', 320 // 'seqCis/seqCis', 'seqTran/sseqTrans', 'RseqCis', 321 // 'SseqTrans', 'MseqCis', 'PseqTrans' ...; 322 // (ii) Unlike discriptor pairs are 'RS', 'MP', 'RP', 'SM', 323 // 'seqCis/seqTrans', 'RseqTrans', 'SseqCis', 'PseqCis', 324 // 'MseqTrans'.... 325 // (c) 'r' precedes 's' and 'm' precedes 'p' 326 // 327 // P-92.1.3.5 Sequence Rule 5 328 // 329 // An atom or group with descriptor 'R', 'M' and 'seqCis' has priority over its 330 // enantiomorph 'S', 'P' or 'seqTrans', 'seqCis' or 'seqTrans'. 331 332 // 333 // Rule 1b proposal 334 // 335 336 // Rule 1b: In comparing duplicate atoms, the one with lower root distance has 337 // precedence, where root distance is defined as: (a) in the case of ring-closure 338 // duplicates, the sphere of the duplicated atom; and (b) in the case of 339 // multiple-bond duplicates, the sphere of the atom to which the duplicate atom 340 // is attached. 341 342 // [0]---1---2---3==4 343 // 344 // 345 // current: 346 // 347 // (4)(3) 348 // / / 349 // [0]---1---2---3--4 350 // 351 // 352 // proposed: 353 // 354 // (3)(4) 355 // / / 356 // [0]---1---2---3--4 357 // 358 // 359 // 7--6 7==6 360 // // \ / \ 361 // 8 5 8 5 362 // (ri-ng) / (ri=ng) // 363 // [0]---1---2---3==4 [0]---1---2---3---4 364 // 365 // 366 // current: 367 // 368 // (4)(3) (?)(5) 369 // / / / / 370 // [0]---1---2---3--4 [0]---1---2---3--4 371 // 372 // 373 // proposed: 374 // 375 // (3)(4) (3)(4) 376 // / / / / 377 // [0]---1---2---3--4 [0]---1---2---3--4 378 // 379 380 // 381 // Implementation Notes 382 // 383 // 384 385 // "Scoring" a vs. b involves returning 0 (TIE) or +/-n, where n>0 indicates b won, n < 0 386 // indicates a won, and the |n| indicates in which sphere the decision was made. 387 // 388 // This implementation is somewhat inefficient. It digs deep, looking for a first difference 389 // but only caches the sphere so that in the end the lowest sphere change is chosen. 390 // I don't think it has to do this. A breadth-first queue should work better. I just 391 // did not take the time to implement that. 392 // 393 // The basic strategy is to loop through all nine sequence rules (1a, 1b, 2, 3, 4a, 4b, 4c, 5, and 6) 394 // in order and exhaustively prior to applying the next rule: 395 // 396 // Rule 1a (atomic number -- note that this requires an aromaticity check first) 397 // Rule 1b (duplicated atom progenitor root-distance check; revised as described above 398 // for aromatic systems. 399 // Rule 2 (nominal isotopic mass) 400 // Rule 3 (E/Z, not including enantiomorphic seqCis/seqTrans designations) 401 // Rule 4a (chiral precedes pseudochiral precedes nonstereochemical) 402 // Rule 4b (like precedes unlike) 403 // Rule 4c (r precedes s) 404 // Rule 5 (R precedes S; M precedes P; C precedes T) 405 // Rule 6 (promoted undifferentiated ligand precedes unpromoted undifferentiated ligand) 406 // 407 // Some nuances I've learned along the way here, some of which are still being checked: 408 // 409 // 1. Rule 1a Had to be revised to account for Kekule bias using averaging. 410 // 2. Rule 1b Had to be revised to account for Kekule bias (AY-236.215). Note that this 411 // rule may only be applied AFTER Rule 1a has been applied exhaustively. In 412 // my mind it deserves its own number for this reason. See AY-236.53, 413 // (1S,5R)-bicyclo[3.1.0]hex-2-ene, for example. 414 // 3. Rule 2 This rule is simple to implement; must be executed only for ties from 1a and 1b. 415 // However, it still needs Kekule consideration. 416 // 4. Rule 3 Requires the concept of "auxiliary" (temporary, digraph-specific) descriptors. 417 // This involves the initial generation of all auxiliary descriptors, including r/s and R/S at 418 // branching points. In the course of doing this, all rules, 1-6, must be employed 419 // at these auxiliary centers using the already-created digraph. 420 // Auxiliary descriptors must be generated in a depth-first manner -- that is, 421 // from highest sphere to lowest within a branch. This is the key to not having 422 // an analysis blow up or somehow require complex, impossible iteration. 423 // 5. Rule 4a needs to be addressed exhaustively prior to Rules 4b and 4c. This rule serves to 424 // avoid the need for Rule 4b for all except the most unusual cases, where, for example, 425 // there are two otherwise identical branches, but one can be identified as S and the 426 // other only r or no-stereo, but not R. Thus, only branches that end up as R/R, R/S, S/S, 427 // r/r, r/s, s/s, or ns/ns comparisons need be investigated by Rule 4b. 428 // 6. Rule 4b This rule filters out all diastereomorphic differences that do not involve r/s issues. 429 // Somehow missed in the discussion is that the reference descriptor is determined 430 // once and only once for each branch from the center under scrutiny. The key is to 431 // determine two "Mata sequence" like/unlike lists of R and S descriptors, one for each 432 // pair of branches being considered. This same reference carries through all future 433 // iterations of the algorithm for that branch. 434 // 7. Rule 4c Again, this subrule must be invoked only after Rule 4b is completed. 435 // It is implemented as for Rule 4a. 436 // 8. Rule 5 Setting of pseudoasymmetry (r/s, m/p) and checking for special cases requiring R/S 437 // is done along the same line as Rule 4b, but in this case by setting the reference 438 // descriptor to "R" for both sequences. The S-reference must also be checked to see 439 // if these happen to be self-enantiomorphic ligands. Note that if any changes have 440 // been made by Rule 4c, all descriptors must be recalculated; otherwise the ones 441 // used in Rule 4b can be reused. 442 // 9. Rule 6 This new rule takes care of all spiro and high-symmetry cases. An optional 443 // "full" version (set testflag1 TRUE) assigns all 'r' to bridgehead carbons of 444 // bicyclo[2.2.2]octane, adamantane, tetrahedrane, cubane, and like-topology compounds, 445 // and all 's' if half of the stereocenters are pointing "in" instead of "out". 446 447 /** 448 * A useful idea is to switch from a tree metaphor to a "twisted strand" or 449 * "thread" metaphor. For example: 450 * 451 * (a) In Rule 1b, all ring-duplicates terminate on one of the nodes in the 452 * sequence of parent nodes going back to the root. This has nothing to do 453 * with branching. 454 * 455 * (b) Generation of auxiliary descriptors prior to implementation of Rule 3 456 * must start from the highest sphere, proceeding toward the root. In this 457 * process the path leading back to the root will have no stereodescriptors, 458 * but that does not matter, as its priority is guaranteed to be set by Rule 459 * 1a. 460 * 461 * (c) All rule 4b auxiliary nodes can be normalized by labeling them one of R 462 * or S; there is no need to consider them to be C/T (seqCis or seqTrans) or 463 * M/P, other than to immediately equate that to R/S. Note that C/T and M/P 464 * must be assigned to the sp2 node closest to the root. 465 * 466 * (d) Rule 4b comparisons can be analyzed using a straightforward ranking 467 * process as follows: 468 * 469 * (d1) Run through all nodes of a ligand branch, checking for descriptors 470 * (cipAtom.rule4Type). Save a list of ASCII strings that represent the 471 * priority of the chiral nodes relative to the given reference. Do this 472 * twice, first using R and second using S. The priority character takes the 473 * form of: 474 * 475 * (char)(64 + (a.cipPriority<<2) + (a.rule4Type == 0 ? 0 : a.rule4Type == 476 * root.rule4Ref ? 1 : 2)) 477 * 478 * Such that, for example, "@D@@@A" indicates a node in Sphere 5 with 479 * priorities 010000 and a "like" relationship to the reference. In the end, 480 * we might have something like this: 481 * 482 * "@@@A" 483 * 484 * "@@@A@@@A" 485 * 486 * "@@@A@@@B" 487 * 488 * indicating "AAB" -- "llu" (reading off only the last character for each 489 * thread). 490 * 491 * (d2) Compare these four lists of data. Sort these lists numerically. The 492 * ligand associated with the highest numerical value is preferred. In Jmol, 493 * this comparison is simply an XOR of pairs of bitsets. The bitset that has 494 * the lowest set bit of the XOR is the winner. Simple as that. 495 * 496 * The "like/unlike" business can be thought of as a simple diastereomorphic 497 * test. Thus, the lists are tested for the first point at which they become 498 * neither identical nor opposites, and only that point need be checked for 499 * likeness or unlikeness to the reference. I like thinking about it this way 500 * because it focuses on what Rule 4b's role is -- the identification of 501 * diastereomorphism. 502 * 503 * (e) Rule 4c takes care of diasteriomorphism related to enantiomorphic (r/s, 504 * m/p) sub-paths. It is analyzed exactly as for Rule 4a. 505 * 506 * (f) Rule 5 processing is a comparison similar to Rule 4b, where only the 507 * reference descriptor "R" is used. It tkes care of any remaining 508 * enantiomorphic issues, including the possibilty that some even-number 509 * combination of a combination of enantiomorphic pairs and self-enantiomeric 510 * ligands are present. 511 * 512 * (g) Rule 6 tests for high-symmetry cases, including cubane, adamantane, 513 * bicyclo[2.2.2]octane, etc. If auxiliary centers are recalculated (set 514 * testflag1 true), then these simple "all-out" isomers will be given all-r 515 * descriptors. 516 * 517 * 518 */ 519 520 // The algorithm: 521 // 522 // 523 // getChirality(molecule) { 524 // prefilterAtoms() 525 // checkForAlkenes() 526 // checkForSmallRings() 527 // checkForHelicenes() 528 // checkForBridgeheadNitrogens() 529 // checkForKekuleIssues() 530 // checkForAtropisomerism() 531 // for(all filtered atoms) getAtomChirality(atom) 532 // if (haveAlkenes) { 533 // for(all double bonds) getBondChirality(a1, a2) 534 // removeUnnecessaryEZDesignations() 535 // indicateHeliceneChirality() 536 // } 537 // } 538 // 539 // getAtomChirality(atom) { 540 // for (each Rule){ 541 // sortSubstituents() 542 // if (done) return checkHandedness(); 543 // } 544 // return NO_CHIRALITY 545 // } 546 // 547 // getBondChirality(a1, a2) { 548 // atop = getAlkeneEndTopPriority(a1) 549 // btop = getAlkeneEndTopPriority(a2) 550 // return (atop >= 0 && btop >= 0 ? getEneChirality(atop, a1, a2, btop) : NO_CHIRALITY) 551 // } 552 // 553 // sortSubstituents() { 554 // for (all pairs of substituents a1 and a2) { 555 // score = a1.compareTo(a2, currentRule) 556 // if (score == TIED) 557 // score = breakTie(a1,a2) 558 // } 559 // 560 // breakTie(a,b) { 561 // score = compareShallowly(a, b) 562 // if (score != TIED) return score 563 // a.sortSubstituents(), b.sortSubstituents() 564 // return compareDeeply(a, b) 565 // } 566 // 567 // compareShallowly(a, b) { 568 // for (each substituent pairing i in a and b) { 569 // score = applyCurrentRule(a_i, b_i) 570 // if (score != TIED) return score 571 // } 572 // return TIED 573 // } 574 // 575 // compareDeeply(a, b) { 576 // bestScore = Integer.MAX_VALUE 577 // for (each substituent pairing i in a and b) { 578 // bestScore = min(bestScore, breakTie(a_i, b_i)) 579 // } 580 // return bestScore 581 // } 582 // 583 // Of course, the actual code is a bit more complex than that. 584 // 585 // approximate line count calculated using MSDOS batch script: 586 // 587 // copy ..\..\workspace\jmol\src\org\jmol\symmetry\CIPChirality.java+..\..\workspace\jmol\src\org\jmol\symmetry\CIPData.java tt 588 // type tt | find /V "}"|find /V "//" |find /V "import" | find /V /I "track" | find /V "*" |find ";"|find /V "Logger"|find /V "System.out" |find /V "TEST" > t 589 // type tt | find /V "}"|find /V "//" |find /V "import" | find /V /I "track" | find /V "*" |find "if"|find /V "Logger"|find /V "System.out" |find /V "TEST" >> t 590 // type tt | find "COUNT_LINE" >> t 591 // type t |find " " /C 592 593 /** 594 * These elements have 100% natural abundance; we will use their isotope mass number instead of their actual average mass, since there is no difference 595 */ 596 static final String RULE_2_nXX_EQ_XX = ";9Be;19F;23Na;27Al;31P;45Sc;55Mn;59Co;75As;89Y;93Nb;98Tc;103Rh;127I;133Cs;141Pr;145Pm;159Tb;165Ho;169Tm;197Au;209Bi;209Po;210At;222Rn;223Fr;226Ra;227Ac;231Pa;232Th;and all > U (atomno > 92)"; 597 598 /** 599 * These elements have an isotope number that is a bit higher than the average 600 * mass, even though their actual isotope mass is a bit lower. We will change 601 * 16 to 15.9, 52 to 51.9, 96 to 95.9, 175 to 174.9 so as to force the unspecified 602 * mass atom to be higher priority than the specified one. 603 * 604 * All other isotopes can use their integer isotope mass number instead of looking up 605 * their exact isotope mass. 606 * 607 */ 608 static final String RULE_2_REDUCE_ISOTOPE_MASS_NUMBER = ";16O;52Cr;96Mo;175Lu;"; 609 610 static final int NO_CHIRALITY = 0, TIED = 0; 611 static final int A_WINS = -1, B_WINS = 1; 612 613 static final int IGNORE = Integer.MIN_VALUE; 614 615 static final int UNDETERMINED = -1; 616 617 static final int STEREO_R = JC.CIP_CHIRALITY_R_FLAG, 618 STEREO_S = JC.CIP_CHIRALITY_S_FLAG; 619 620 static final int STEREO_M = JC.CIP_CHIRALITY_M_FLAG, 621 STEREO_P = JC.CIP_CHIRALITY_P_FLAG; 622 623 static final int STEREO_Z = JC.CIP_CHIRALITY_seqcis_FLAG, 624 STEREO_E = JC.CIP_CHIRALITY_seqtrans_FLAG; 625 626 static final int STEREO_BOTH_RS = STEREO_R | STEREO_S; // must be the number 3 627 628 static final int STEREO_BOTH_EZ = STEREO_E | STEREO_Z; 629 630 static final int RULE_1a = 1, RULE_1b = 2, RULE_2 = 3, RULE_3 = 4, 631 RULE_4a = 5, RULE_4b = 6, RULE_4c = 7, RULE_5 = 8, RULE_6 = 9; 632 633 final static String[] ruleNames = { "", "1a", "1b", "2", "3", "4a", "4b", 634 "4c", "5", "6" }; // Logger only 635 getRuleName(int rule)636 public String getRuleName(int rule) { // Logger only 637 return ruleNames[rule]; // Logger only 638 } 639 640 /** 641 * maximum path to display for debugging only using SET DEBUG in Jmol 642 */ 643 static final int MAX_PATH = 50; // Logger 644 645 /** 646 * maximum ring size that can have a double bond with no E/Z designation; also 647 * used for identifying aromatic rings and bridgehead nitrogens 648 */ 649 static final int SMALL_RING_MAX = 7; 650 651 /** 652 * the current rule being applied exhaustively 653 * 654 */ 655 int currentRule = RULE_1a; 656 657 /** 658 * The atom for which we are determining the stereochemistry 659 * 660 */ 661 CIPAtom root; 662 663 /** 664 * collected bitsets and more specialized SMILES/SMARTS searches and vwr references 665 * 666 */ 667 CIPData data; 668 669 /** 670 * are we tracking pathways for _M.CIPInfo? 671 * 672 */ 673 boolean doTrack; 674 675 /** 676 * are we in the midst of auxiliary center creation? 677 * 678 */ 679 boolean isAux; 680 681 /** 682 * set bits RULE_1a - RULE_6 to indicate a need for that rule based on what is in the model 683 */ 684 BS bsNeedRule = new BS(); 685 686 /** 687 * do we have r or s and so will need to recalculate Mata like/unlike lists in Rule 5? 688 */ 689 boolean havePseudoAuxiliary; 690 691 /** 692 * incremental pointer providing a unique ID to every CIPAtom for debugging 693 * 694 */ 695 int ptIDLogger; 696 CIPChirality()697 public CIPChirality() { 698 // for reflection 699 } 700 701 /** 702 * A general determination of chirality that involves ultimately all of Rules 703 * 1-6. 704 * 705 * @param data 706 * 707 */ getChiralityForAtoms(CIPData data)708 public void getChiralityForAtoms(CIPData data) { 709 710 if (data.bsAtoms.cardinality() == 0) 711 return; 712 713 this.data = data; 714 715 doTrack = data.isTracker(); 716 717 ptIDLogger = 0; 718 719 // using BSAtoms here because we need the entire graph, 720 // including multiple molecular units (AY-236.93 721 722 BS bsToDo = (BS) data.bsMolecule.clone(); 723 boolean haveAlkenes = preFilterAtomList(data.atoms, bsToDo, data.bsEnes); 724 if (!data.bsEnes.isEmpty()) 725 data.getEneKekule(); 726 Logger.info("bsKekule:" + data.bsKekuleAmbiguous); 727 728 // set atom chiralities 729 730 bsToDo = (BS) data.bsAtoms.clone(); 731 732 for (int i = bsToDo.nextSetBit(0); i >= 0; i = bsToDo.nextSetBit(i + 1)) { 733 SimpleNode a = data.atoms[i]; 734 a.setCIPChirality(0); 735 ptIDLogger = 0; 736 int c = getAtomChiralityLimited(a, null, null); 737 a.setCIPChirality(c == CIPChirality.NO_CHIRALITY ? JC.CIP_CHIRALITY_NONE : c 738 | ((currentRule - 1) << JC.CIP_CHIRALITY_NAME_OFFSET)); 739 if (doTrack && c != CIPChirality.NO_CHIRALITY) 740 data.getRootTrackerResult(root); 741 } 742 if (haveAlkenes) { 743 744 // set bond chiralities E/Z and M/P 745 746 Lst<int[]> lstEZ = new Lst<int[]>(); 747 for (int i = bsToDo.nextSetBit(0); i >= 0; i = bsToDo.nextSetBit(i + 1)) 748 getAtomBondChirality(data.atoms[i], lstEZ, bsToDo); 749 if (data.lstSmallRings.length > 0 && lstEZ.size() > 0) 750 clearSmallRingEZ(data.atoms, lstEZ); 751 752 // Add helicene chiralities -- predetermined using a Jmol SMARTS conformational search. 753 // 754 // M: A{a}(.t:-10,-40)a(.t:-10,-40)aaa 755 // P: A{a}(.t:10,40)a(.t:10,40)aaa 756 // 757 // Note that indicators are on the first and last aromatic atoms {a}. 758 759 setStereoFromSmiles(data.bsHelixM, STEREO_M, data.atoms); 760 setStereoFromSmiles(data.bsHelixP, STEREO_P, data.atoms); 761 } 762 763 if (Logger.debugging) { 764 Logger.info("Kekule ambiguous = " + data.bsKekuleAmbiguous); 765 Logger.info("small rings = " + PT.toJSON(null, data.lstSmallRings)); 766 } 767 768 } 769 setStereoFromSmiles(BS bsHelix, int stereo, SimpleNode[] atoms)770 private void setStereoFromSmiles(BS bsHelix, int stereo, SimpleNode[] atoms) { 771 if (bsHelix != null) 772 for (int i = bsHelix.nextSetBit(0); i >= 0; i = bsHelix 773 .nextSetBit(i + 1)) 774 atoms[i].setCIPChirality(stereo); 775 } 776 777 /** 778 * Remove unnecessary atoms from the list and let us know if we have alkenes 779 * to consider. 780 * 781 * @param atoms 782 * @param bsToDo 783 * @param bsEnes 784 * @return whether we have any alkenes that could be EZ 785 */ preFilterAtomList(SimpleNode[] atoms, BS bsToDo, BS bsEnes)786 private boolean preFilterAtomList(SimpleNode[] atoms, BS bsToDo, BS bsEnes) { 787 boolean haveAlkenes = false; 788 for (int i = bsToDo.nextSetBit(0); i >= 0; i = bsToDo.nextSetBit(i + 1)) { 789 if (!data.couldBeChiralAtom(atoms[i])) { 790 bsToDo.clear(i); 791 continue; 792 } 793 switch (data.couldBeChiralAlkene(atoms[i], null)) { 794 case UNDETERMINED: 795 break; 796 case STEREO_Z: 797 bsEnes.set(i); 798 //$FALL-THROUGH$ 799 case STEREO_M: 800 haveAlkenes = true; 801 break; 802 } 803 } 804 return haveAlkenes; 805 } 806 807 /** 808 * Check if an atom is 1st row. 809 * 810 * @param a 811 * @return elemno > 2 && elemno <= 10 812 */ isFirstRow(SimpleNode a)813 static boolean isFirstRow(SimpleNode a) { 814 int n = a.getElementNumber(); 815 return (n > 2 && n <= 10); 816 } 817 818 /** 819 * Remove E/Z designations for small-rings double bonds (IUPAC 820 * 2013.P-93.5.1.4.1). 821 * 822 * @param atoms 823 * @param lstEZ 824 */ clearSmallRingEZ(SimpleNode[] atoms, Lst<int[]> lstEZ)825 private void clearSmallRingEZ(SimpleNode[] atoms, Lst<int[]> lstEZ) { 826 for (int j = data.lstSmallRings.length; --j >= 0;) 827 data.lstSmallRings[j].andNot(data.bsAtropisomeric); 828 for (int i = lstEZ.size(); --i >= 0;) { 829 int[] ab = lstEZ.get(i); 830 for (int j = data.lstSmallRings.length; --j >= 0;) { 831 BS ring = data.lstSmallRings[j]; 832 if (ring.get(ab[0]) && ring.get(ab[1])) { 833 atoms[ab[0]].setCIPChirality(JC.CIP_CHIRALITY_NONE); 834 atoms[ab[1]].setCIPChirality(JC.CIP_CHIRALITY_NONE); 835 } 836 } 837 } 838 } 839 840 /** 841 * Get E/Z characteristics for specific atoms. Also check here for 842 * atropisomeric M/P designations 843 * 844 * @param atom 845 * @param lstEZ 846 * @param bsToDo 847 */ 848 getAtomBondChirality(SimpleNode atom, Lst<int[]> lstEZ, BS bsToDo)849 private void getAtomBondChirality(SimpleNode atom, Lst<int[]> lstEZ, BS bsToDo) { 850 int index = atom.getIndex(); 851 SimpleEdge[] bonds = atom.getEdges(); 852 int c = NO_CHIRALITY; 853 boolean isAtropic = data.bsAtropisomeric.get(index); 854 for (int j = bonds.length; --j >= 0;) { 855 SimpleEdge bond = bonds[j]; 856 SimpleNode atom1; 857 int index1; 858 if (isAtropic) { 859 atom1 = bonds[j].getOtherNode(atom); 860 index1 = atom1.getIndex(); 861 if (!data.bsAtropisomeric.get(index1)) 862 continue; 863 c = setBondChirality(atom, atom1, atom, atom1, true); 864 } else if (data.getBondOrder(bond) == 2) { 865 atom1 = getLastCumuleneAtom(bond, atom, null, null); 866 index1 = atom1.getIndex(); 867 if (index1 < index) 868 continue; 869 c = getBondChiralityLimited(bond, atom); 870 } else { 871 continue; 872 } 873 if (c != NO_CHIRALITY) { 874 if (!isAtropic) 875 lstEZ.addLast(new int[] { index, index1 }); 876 bsToDo.clear(index); 877 bsToDo.clear(index1); 878 } 879 if (isAtropic) 880 break; 881 } 882 } 883 884 /** 885 * 886 * @param bond 887 * @param atom 888 * @param nSP2 889 * returns the number of sp2 carbons in this alkene or cumulene 890 * @param parents 891 * @return the terminal atom of this alkene or cumulene 892 */ getLastCumuleneAtom(SimpleEdge bond, SimpleNode atom, int[] nSP2, SimpleNode[] parents)893 private SimpleNode getLastCumuleneAtom(SimpleEdge bond, SimpleNode atom, 894 int[] nSP2, SimpleNode[] parents) { 895 // we know this is a double bond 896 SimpleNode atom2 = bond.getOtherNode(atom); 897 if (parents != null) { 898 parents[0] = atom2; 899 parents[1] = atom; 900 } 901 // connected atom must have only two covalent bonds 902 if (nSP2 != null) 903 nSP2[0] = 2; 904 int ppt = 0; 905 while (true) { // COUNT_LINE 906 if (atom2.getCovalentBondCount() != 2) 907 return atom2; 908 SimpleEdge[] edges = atom2.getEdges(); 909 for (int i = edges.length; --i >= 0;) { 910 SimpleNode atom3 = (bond = edges[i]).getOtherNode(atom2); 911 if (atom3 == atom) 912 continue; 913 // connected atom must only have one other bond, and it must be double to continue 914 if (data.getBondOrder(bond) != 2) 915 return atom2; // was atom3 916 if (parents != null) { 917 if (ppt == 0) { 918 parents[0] = atom2; 919 ppt = 1; 920 } 921 parents[1] = atom2; 922 } 923 // a=2=3 924 if (nSP2 != null) 925 nSP2[0]++; 926 atom = atom2; 927 atom2 = atom3; 928 // we know we only have two covalent bonds 929 break; 930 } 931 } 932 } 933 934 /** 935 * Determine R/S or one half of E/Z determination 936 * 937 * @param atom 938 * ignored if a is not null (just checking ene end top priority) 939 * @param cipAtom 940 * ignored if atom is not null 941 * @param parentAtom 942 * null for tetrahedral, other alkene carbon for E/Z 943 * 944 * @return if and E/Z test, [0:none, 1: atoms[0] is higher, 2: atoms[1] is 945 * higher] otherwise [0:none, 1:R, 2:S] 946 */ getAtomChiralityLimited(SimpleNode atom, CIPAtom cipAtom, SimpleNode parentAtom)947 int getAtomChiralityLimited(SimpleNode atom, CIPAtom cipAtom, 948 SimpleNode parentAtom) { 949 int rs = NO_CHIRALITY; 950 bsNeedRule.clearAll(); 951 bsNeedRule.set(RULE_1a); 952 try { 953 boolean isAlkeneEndCheck = (atom == null); 954 if (isAlkeneEndCheck) { 955 // This is an alkene end determination. 956 atom = (root = cipAtom).atom; 957 cipAtom.htPathPoints = (cipAtom.parent = new CIPAtom().create( 958 parentAtom, null, true, false, false)).htPathPoints; 959 } else { 960 if (!(root = cipAtom = (cipAtom == null ? new CIPAtom().create(atom, null, false, false, 961 false) : cipAtom)).isSP3) { 962 // This is a root-atom call. 963 // Just checking here that center has 4 covalent bonds or is trigonal pyramidal. 964 return NO_CHIRALITY; 965 } 966 } 967 if (cipAtom.setNode()) { 968 for (currentRule = RULE_1a; currentRule <= RULE_6; currentRule++) { 969 // if (Logger.debugging) 970 // Logger.info("-Rule " + getRuleName(currentRule) 971 // + " CIPChirality for " + cipAtom + "-----"); // Logger 972 int nPrioritiesPrev = cipAtom.nPriorities; 973 switch (currentRule) { 974 case RULE_2: 975 if (cipAtom.rule6refIndex >= 0) 976 bsNeedRule.set(RULE_2); 977 break; 978 case RULE_3: 979 // We need to create auxiliary descriptors PRIOR to Rule 3, 980 // as seqcis and seqtrans are auxiliary only 981 // BH64_037 982 isAux = true; 983 doTrack = false; 984 havePseudoAuxiliary = false; 985 cipAtom.createAuxiliaryDescriptors(null, null); 986 doTrack = data.isTracker(); 987 isAux = false; 988 break; 989 case RULE_4a: 990 if (!bsNeedRule.get(RULE_4a)) { 991 // We can skip Rules 4a - 5 if there are no chirality centers. 992 currentRule = RULE_5; 993 continue; 994 } 995 //$FALL-THROUGH$ 996 case RULE_4b: 997 case RULE_4c: 998 // We need to presort with no tie-breaking for Rules 4a, 4b, and 4c. 999 cipAtom.sortSubstituents(Integer.MIN_VALUE); 1000 bsNeedRule.set(currentRule); 1001 break; 1002 case RULE_5: 1003 // We need to presort with no tie-breaking for Rule 5, 1004 // clearing the Rule4List is advisable, as Rule 4c may have changed orders. 1005 if (havePseudoAuxiliary) 1006 cipAtom.clearRule4Lists(); 1007 cipAtom.sortSubstituents(Integer.MIN_VALUE); 1008 bsNeedRule.set(currentRule); 1009 break; 1010 case RULE_6: 1011 // We only need to do Rule 6 under certain conditions. 1012 bsNeedRule.setBitTo(RULE_6, 1013 (cipAtom.rule6refIndex < 0 && (rs = cipAtom.getRule6Descriptor(false)) != NO_CHIRALITY)); 1014 break; 1015 } 1016 if (!bsNeedRule.get(currentRule)) 1017 continue; 1018 1019 // initial call to sortSubstituents does all, recursively 1020 1021 if (rs == NO_CHIRALITY && cipAtom.sortSubstituents(0)) { 1022 if (Logger.debuggingHigh && cipAtom.h1Count < 2) { 1023 for (int i = 0; i < cipAtom.bondCount; i++) { // Logger 1024 if (cipAtom.atoms[i] != null) // Logger 1025 Logger.info(cipAtom.atoms[i] + " " + cipAtom.priorities[i]); // Logger 1026 } 1027 } 1028 1029 // If this is an alkene end check, we just use STEREO_S and STEREO_R as markers 1030 1031 if (isAlkeneEndCheck) 1032 return cipAtom.getEneTop(); 1033 1034 rs = data.checkHandedness(cipAtom); 1035 if (currentRule == RULE_5) { 1036 if (cipAtom.nPriorities == 4 && nPrioritiesPrev == 2) 1037 cipAtom.isRule5Pseudo = !cipAtom.isRule5Pseudo; 1038 if (cipAtom.isRule5Pseudo) 1039 rs |= JC.CIP_CHIRALITY_PSEUDO_FLAG; 1040 1041 // Exclude special cases: 1042 1043 // P-92.2.1.1(c) pseudoasymmetric centers must have 1044 // two and only two enantiomorphic ligands 1045 // 1046 1047 // Rule 5 has decided the issue, but how many decisions did we make? 1048 // If priorities [0 0 2 2] went to [0 1 2 3] then 1049 // we have two Rule-5 decisions -- R,S,R',S'. 1050 // In that case, Rule 5 results in R/S, not r/s. 1051 // 1052 // S 1053 // - 1054 // - 1055 // R---C---R' despite this being Rule 5, the results is R, not r. 1056 // - 1057 // - 1058 // S' 1059 // 1060 // --------- mirror plane 1061 // 1062 // R' 1063 // - 1064 // - 1065 // S---C---S' not superimposible 1066 // - 1067 // - 1068 // R 1069 // 1070 // in addition, Rule 4c may not have caught a case where a ligand is self-enantiomorphic. 1071 // 1072 // s' r' 1073 // \ / 1074 // N---C---N 1075 // / \ 1076 // r s 1077 // 1078 // Neither ligand changes chirality with 1079 // since there are no descriptors on N, we won't catch this there. 1080 } 1081 if (Logger.debugging) 1082 Logger.info(atom + " " + JC.getCIPChiralityName(rs) + " by Rule " 1083 + getRuleName(currentRule) 1084 + "\n----------------------------------"); // Logger 1085 return rs; 1086 } 1087 } 1088 } 1089 } catch (Throwable e) { 1090 System.out.println(e + " in CIPChirality " + currentRule); 1091 /** 1092 * @j2sNative alert(e); 1093 */ 1094 { 1095 e.printStackTrace(); 1096 } 1097 return STEREO_BOTH_RS; 1098 } 1099 return rs; 1100 } 1101 1102 /** 1103 * Determine the axial or E/Z chirality for this bond, with the given starting 1104 * atom a 1105 * 1106 * @param bond 1107 * @param a 1108 * first atom to consider, or null 1109 * @return one of: {NO_CHIRALITY | STEREO_Z | STEREO_E | STEREO_Ra | STEREO_Sa 1110 * | STEREO_ra | STEREO_sa} 1111 */ getBondChiralityLimited(SimpleEdge bond, SimpleNode a)1112 private int getBondChiralityLimited(SimpleEdge bond, SimpleNode a) { 1113 if (a == null) 1114 a = bond.getOtherNode(null); 1115 if (data.couldBeChiralAlkene(a, bond) == UNDETERMINED) 1116 return NO_CHIRALITY; 1117 int[] nSP2 = new int[1]; 1118 SimpleNode[] parents = new SimpleNode[2]; 1119 SimpleNode b = getLastCumuleneAtom(bond, a, nSP2, parents); 1120 boolean isAxial = nSP2[0] % 2 == 1; 1121 if (!isAxial && data.bsAromatic.get(a.getIndex())) 1122 return UNDETERMINED; 1123 int c = setBondChirality(a, parents[0], parents[1], b, isAxial); 1124 if (Logger.debugging) 1125 Logger.info("get Bond Chirality " + JC.getCIPChiralityName(c) + " " + bond); 1126 return c; 1127 } 1128 1129 /** 1130 * Determine the axial or E/Z chirality for the a-b bond. 1131 * 1132 * @param a 1133 * @param pa 1134 * @param pb 1135 * @param b 1136 * @param isAxial 1137 * @return one of: {NO_CHIRALITY | STEREO_Z | STEREO_E | STEREO_M | STEREO_P | 1138 * STEREO_m | STEREO_p} 1139 */ setBondChirality(SimpleNode a, SimpleNode pa, SimpleNode pb, SimpleNode b, boolean isAxial)1140 private int setBondChirality(SimpleNode a, SimpleNode pa, SimpleNode pb, 1141 SimpleNode b, boolean isAxial) { 1142 CIPAtom a1 = new CIPAtom().create(a, null, true, false, false); 1143 CIPAtom b2 = new CIPAtom().create(b, null, true, false, false); 1144 int atop = getAtomChiralityLimited(null, a1, pa) - 1; 1145 int ruleA = currentRule; 1146 int btop = getAtomChiralityLimited(null, b2, pb) - 1; 1147 int ruleB = currentRule; 1148 if (isAxial && a1.nRootDuplicates > 3 && atop < 0 && btop < 0) { 1149 // No resolution for odd cumulene, but we have multiple root duplicates. 1150 // Quick check of Rule 6. 1151 ruleA = ruleB = currentRule = RULE_6; 1152 b2.rule6refIndex = a1.atoms[atop = a1.getEneTop() - 1].atomIndex; 1153 if (b2.sortSubstituents(0)) 1154 btop = b2.getEneTop() - 1; 1155 } 1156 int c = (atop >= 0 && btop >= 0 ? getEneChirality(b2.atoms[btop], b2, a1, 1157 a1.atoms[atop], isAxial, true) : NO_CHIRALITY); 1158 if (c != NO_CHIRALITY 1159 && (isAxial || !data.bsAtropisomeric.get(a.getIndex()) 1160 && !data.bsAtropisomeric.get(b.getIndex()))) { 1161 // 1162 // We must check maxRules. 1163 // 1164 // nonaxial: 1165 // 1166 // a c 1167 // \ / 1168 // C=C 1169 // / \ 1170 // b d 1171 // 1172 // R R' 1173 // \ / 1174 // C=C 1175 // / \ 1176 // S S' 1177 // 1178 // planar flip is unchanged, and this is seqcis, seqtrans 1179 // 1180 // a R' 1181 // \ / 1182 // C=C 1183 // / \ 1184 // b S' 1185 // 1186 // planar flip is changed, and this is seqCis, seqTrans 1187 // 1188 // 1189 // axial is the opposite: 1190 // 1191 // see AY236.70 and AY236.170 1192 // 1193 // 1194 // a c 1195 // \ / 1196 // C=C=C 1197 // / \ 1198 // b d 1199 // 1200 // R R' 1201 // \ / 1202 // C=C=C 1203 // / \ 1204 // S S' 1205 // 1206 // planar flip is changed, and this is M, P 1207 // 1208 // a R 1209 // \ / 1210 // C=C=C 1211 // / \ 1212 // b S 1213 // 1214 // planar flip is unchanged, and this is m, p 1215 1216 if (isAxial == (ruleA == RULE_5) == (ruleB == RULE_5)) 1217 c &= ~JC.CIP_CHIRALITY_PSEUDO_FLAG; 1218 else 1219 c |= JC.CIP_CHIRALITY_PSEUDO_FLAG; 1220 1221 a.setCIPChirality(c | ((ruleA - 1) << JC.CIP_CHIRALITY_NAME_OFFSET)); 1222 b.setCIPChirality(c | ((ruleB - 1) << JC.CIP_CHIRALITY_NAME_OFFSET)); 1223 if (Logger.debugging) 1224 Logger.info(a + "-" + b + " " + JC.getCIPChiralityName(c)); 1225 } 1226 return c; 1227 } 1228 1229 /** 1230 * Determine the stereochemistry of a bond 1231 * 1232 * @param winner1 1233 * @param end1 1234 * @param end2 1235 * @param winner2 1236 * @param isAxial 1237 * if an odd-cumulene 1238 * @param allowPseudo 1239 * if we are working from a high-level bond stereochemistry method 1240 * @return STEREO_M, STEREO_P, STEREO_Z, STEREO_E, or NO_CHIRALITY 1241 */ getEneChirality(CIPAtom winner1, CIPAtom end1, CIPAtom end2, CIPAtom winner2, boolean isAxial, boolean allowPseudo)1242 int getEneChirality(CIPAtom winner1, CIPAtom end1, CIPAtom end2, 1243 CIPAtom winner2, boolean isAxial, boolean allowPseudo) { 1244 return (winner1 == null || winner2 == null || winner1.atom == null 1245 || winner2.atom == null ? NO_CHIRALITY : isAxial ? data.isPositiveTorsion(winner1, 1246 end1, end2, winner2) : data.isCis(winner1, end1, end2, winner2)); 1247 } 1248 1249 class CIPAtom implements Comparable<CIPAtom>, Cloneable { 1250 1251 /** 1252 * odd/even toggle for comparisons of Rule5 results that would make for R/S, not r/s, if this flag is false. 1253 * 1254 */ 1255 boolean isRule5Pseudo = true; 1256 1257 /** 1258 * unique ID for this CIPAtom for debugging only 1259 * 1260 */ 1261 private int id; 1262 1263 /** 1264 * bond distance from the root atom to this atom 1265 */ 1266 private int sphere; 1267 1268 /** 1269 * Rule 1b measure: Distance from root of the corresponding nonduplicated 1270 * atom (duplicate nodes only). 1271 * 1272 * AMENDED HERE for duplicate nodes associated with a double-bond in a 1273 * 6-membered-ring benzenoid (benzene, naphthalene, pyridine, pyrazoline, 1274 * etc.) "Kekule-ambiguous" system to be its sphere. 1275 * 1276 */ 1277 1278 private int rootDistance; 1279 1280 /** 1281 * a flag to prevent finalization of an atom node more than once 1282 * 1283 */ 1284 private boolean isSet; 1285 1286 /** 1287 * a flag to indicate atom that is a duplicate, either due to 1288 * ring closure or multiple bonding -- element number and mass, but no 1289 * substituents; slightly lower in priority than standard atoms. 1290 * 1291 */ 1292 boolean isDuplicate = true; 1293 1294 /** 1295 * a flag to indicate an atom that has no substituents; a branch end point; 1296 * typically H or a halogen (F, Cl, Br, I); also set TRUE if there is a problem 1297 * setting an atom; does not include duplicates 1298 * 1299 */ 1300 boolean isTerminal; 1301 1302 /** 1303 * is one atom of a double bond 1304 */ 1305 1306 private boolean isAlkene; 1307 1308 /** 1309 * the associated Jmol (or otherwise) atom; use of the SimpleNode interface 1310 * allows us to implement this in SMILES or Jmol as well as providing an 1311 * interface other programs could use if implementing this code 1312 * 1313 */ 1314 SimpleNode atom; 1315 1316 /** 1317 * the application-assigned unique atom index for this atom; used in 1318 * updating lstSmallRings 1319 * 1320 */ 1321 int atomIndex = -1; 1322 1323 /** 1324 * true atom covalent bond count; cached for better performance 1325 */ 1326 int bondCount; 1327 1328 /** 1329 * Rule 1a nominal element number; may be fractional for Kekule issues 1330 */ 1331 float elemNo; 1332 1333 /** 1334 * Rule 2 isotope mass number if identified or average atomic mass if not 1335 * 1336 * C (12.011) > 12C, O (15.999) < 16O, and F (18.998) < 19F 1337 * 1338 * Source: 1339 * 1340 */ 1341 private float mass = UNDETERMINED; 1342 1343 ///// SUBSTITUENTS //// 1344 1345 /** 1346 * direct ancestor of this atom 1347 * 1348 */ 1349 CIPAtom parent; 1350 1351 /** 1352 * sphere-1 node in this atom's root branch 1353 */ 1354 CIPAtom rootSubstituent; 1355 1356 /** 1357 * a count of how many 1H atoms we have found on this atom; used to halt 1358 * further processing of this atom 1359 */ 1360 int h1Count; 1361 1362 /** 1363 * the substituents -- up to four supported here at this time 1364 * 1365 */ 1366 CIPAtom[] atoms = new CIPAtom[4]; 1367 1368 /** 1369 * number of substituent atoms (non-null atoms[] entries) 1370 */ 1371 private int nAtoms; 1372 1373 /** 1374 * bitset of indices of the associated atoms in the path to this atom node 1375 * from the root; used to keep track of the path to this atom in order to 1376 * prevent infinite cycling; the last atom in the path when cyclic is a 1377 * duplicate atom. 1378 * 1379 */ 1380 private BS bsPath; 1381 1382 /** 1383 * String path, for debugging 1384 * 1385 */ 1386 String myPath = ""; 1387 1388 /** 1389 * priorities associated with each subsituent from high (0) to low (3); due 1390 * to equivaliencies at a given rule level, these numbers may duplicate and 1391 * have gaps - for example, [0 2 0 3] 1392 */ 1393 int[] oldPriorities, priorities = new int[4]; 1394 1395 /** 1396 * the number of distinct priorities determined for this atom for the 1397 * current rule; 0-4 for the root atom; 0-3 for all others 1398 */ 1399 int oldNPriorities, nPriorities; 1400 1401 /** 1402 * current priority 0-3; used for Rule 4b and 5 priority sorting 1403 * 1404 */ 1405 int priority; 1406 1407 /** 1408 * ASCII-encoded priority string 1409 */ 1410 private String chiralPath; 1411 1412 /** 1413 * number of root-duplicate atoms (root atom only 1414 */ 1415 1416 int nRootDuplicates; 1417 1418 /** 1419 * Rule 1b hash table that maintains distance of the associated 1420 * nonduplicated atom node 1421 * 1422 */ 1423 Map<Integer, Integer> htPathPoints; 1424 1425 /** 1426 * reference index for Rule 6 -- root atom only 1427 */ 1428 int rule6refIndex = -1; 1429 1430 /** 1431 * a list of only the undifferentiated ligands in Rule 6 -- root atom only 1432 */ 1433 private BS bsRule6Subs; 1434 1435 /////// double and triple bonds /////// 1436 1437 /** 1438 * first atom of an alkene or cumulene atom 1439 */ 1440 1441 private CIPAtom alkeneParent; 1442 1443 /** 1444 * last atom of an alkene or cumulene atom 1445 */ 1446 1447 private CIPAtom alkeneChild; 1448 1449 /** 1450 * a flag used in Rule 3 to indicate the second carbon of a double bond 1451 */ 1452 1453 private boolean isAlkeneAtom2; 1454 1455 /** 1456 * is an atom that is involved in more than one Kekule form 1457 */ 1458 private boolean isKekuleAmbiguous; 1459 1460 /** 1461 * first =X= atom in a string of =X=X=X=... 1462 */ 1463 private CIPAtom nextSP2; 1464 1465 /** 1466 * potentially useful information that this duplicate is from an double- or 1467 * triple-bond, not a ring closure 1468 */ 1469 private boolean multipleBondDuplicate; 1470 1471 /** 1472 * alkene or even cumulene, so chirality will be EZ, not MP 1473 */ 1474 private boolean isEvenEne = true; 1475 1476 //// AUXILIARY CHIRALITY (SET JUST PRIOR TO RULE 3) ///// 1477 1478 /** 1479 * auxiliary chirality E/Z for this atom node 1480 */ 1481 private int auxEZ = UNDETERMINED; 1482 1483 /** 1484 * a flag set false in evaluation of Rule 5 to indicate that there was more 1485 * than one R/S decision made, so this center cannot be r/s; initially just 1486 * indicates that the atom has 4 covalent bonds or is trigonal pyriamidal 1487 */ 1488 boolean isSP3 = true; 1489 1490 /** 1491 * auxiliary chirality as determined in createAuxiliaryRule4Data; 1492 * possibilities include R/S, r/s, M/P, m/p, C/T (but not c/t), or ~ (ASCII 1493 * 126, no stereochemistry); for sorting purposes C=M=R < p=r=s < ~ 1494 */ 1495 private char auxChirality = '~'; 1496 1497 /** 1498 * points to next branching point that has two or more chiral branches 1499 */ 1500 private CIPAtom nextChiralBranch; 1501 1502 /** 1503 * a check for downstream chirality 1504 * 1505 */ 1506 private boolean isChiralPath; 1507 1508 /** 1509 * the auxiiary chirality type: 0: ~, 1: R, 2: S; normalized to R/S even if 1510 * M/P or C/T 1511 */ 1512 int rule4Type; 1513 1514 /** 1515 * used to count the number of priorities 1516 */ 1517 private BS bsTemp = new BS(); 1518 1519 /** 1520 * Rule 4b reference chirality (R or S, 1 or 2); root only 1521 */ 1522 private int rule4Ref; 1523 1524 /** 1525 * bitsets for tracking R and S for Rule 4b 1526 */ 1527 BS[] listRS; 1528 CIPAtom()1529 CIPAtom() { 1530 // had a problem in JavaScript that the constructor of an inner function cannot 1531 // access this.b$ yet. That assignment is made after construction. 1532 } 1533 1534 /** 1535 * 1536 * @param atom 1537 * or null to indicate a null placeholder 1538 * @param parent 1539 * @param isAlkene 1540 * @param isDuplicate 1541 * @param isParentBond 1542 * @return this 1543 */ 1544 @SuppressWarnings("unchecked") create(SimpleNode atom, CIPAtom parent, boolean isAlkene, boolean isDuplicate, boolean isParentBond)1545 CIPAtom create(SimpleNode atom, CIPAtom parent, boolean isAlkene, 1546 boolean isDuplicate, boolean isParentBond) { 1547 this.id = ++ptIDLogger; 1548 this.parent = parent; 1549 if (atom == null) 1550 return this; 1551 this.isAlkene = isAlkene; 1552 this.atom = atom; 1553 atomIndex = atom.getIndex(); 1554 if (atom.getIsotopeNumber() > 0) 1555 bsNeedRule.set(RULE_2); 1556 this.isDuplicate = multipleBondDuplicate = isDuplicate; 1557 isKekuleAmbiguous = (data.bsKekuleAmbiguous != null && data.bsKekuleAmbiguous 1558 .get(atomIndex)); 1559 elemNo = (isDuplicate && isKekuleAmbiguous ? 1560 parent.getKekuleElementNumber() 1561 : atom.getElementNumber()); 1562 bondCount = atom.getCovalentBondCount(); 1563 isSP3 = (bondCount == 4 || bondCount == 3 && !isAlkene 1564 && (elemNo > 10 || data.bsAzacyclic != null && data.bsAzacyclic.get(atomIndex))); 1565 if (parent != null) 1566 sphere = parent.sphere + 1; 1567 if (sphere == 1) { 1568 rootSubstituent = this; 1569 htPathPoints = new Hashtable<Integer, Integer>(); 1570 } else if (parent != null) { 1571 rootSubstituent = parent.rootSubstituent; 1572 htPathPoints = (Map<Integer, Integer>) ((Hashtable<Integer, Integer>) parent.htPathPoints) 1573 .clone(); 1574 } 1575 bsPath = (parent == null ? new BS() : (BS) parent.bsPath.clone()); 1576 1577 if (isDuplicate) 1578 bsNeedRule.set(RULE_3); 1579 rootDistance = sphere; 1580 // The rootDistance for a nonDuplicate atom is just its sphere. 1581 // The rootDistance for a duplicate atom is (by IUPAC) the sphere of its duplicated atom. 1582 // I argue that for aromatic compounds, this introduces a Kekule problem and that for 1583 // those cases, the rootDistance should be its own sphere, not the sphere of its duplicated atom. 1584 // This shows up in AV-360#215. 1585 1586 if (parent == null) { 1587 // original atom 1588 bsPath.set(atomIndex); 1589 } else if (multipleBondDuplicate 1590 ) { 1591 rootDistance--; 1592 } else if (bsPath.get(atomIndex)) { 1593 bsNeedRule.setBitTo(RULE_1b, (this.isDuplicate = true)); 1594 if ((rootDistance = (atom == root.atom ? 0 : isParentBond ? parent.sphere 1595 : htPathPoints.get(Integer.valueOf(atomIndex)).intValue())) == 0) { 1596 root.nRootDuplicates++; 1597 } 1598 } else { 1599 bsPath.set(atomIndex); 1600 htPathPoints.put(Integer.valueOf(atomIndex), Integer.valueOf(rootDistance)); 1601 } 1602 if (doTrack) { 1603 if (sphere < MAX_PATH) // Logger 1604 myPath = (parent != null ? parent.myPath + "-" : "") + this; // Logger 1605 if (Logger.debuggingHigh) 1606 Logger.info("new CIPAtom " + myPath); 1607 } 1608 return this; 1609 } 1610 1611 /** 1612 * Check ene for first nonduplicate. 1613 * 1614 * @return 1 or 2 1615 */ getEneTop()1616 int getEneTop() { 1617 return (atoms[0].isDuplicate ? 2 : 1); 1618 } 1619 1620 /** 1621 * The original Rule 6 implementation; allows cubane, tetrahedrane, and 1622 * bicyclo[2.2.2]octane to be ns. 1623 * 1624 * 1625 * @param isAux 1626 * 1627 * @return NO_CHIRALITY or descriptor 1628 */ getRule6Descriptor(boolean isAux)1629 int getRule6Descriptor(boolean isAux) { 1630 if (nPriorities > 2 1631 || (isAux ? countAuxDuplicates(atomIndex) : nRootDuplicates) <= 2) 1632 return NO_CHIRALITY; 1633 1634 // we have more than two root-duplicates and priorities array is one of: 1635 // [0 0 0 0] CIP Helv Chim. Acta 1966 #33 -- double spiran 1636 // [0 0 0 0] CIP 1982 S4 1637 // [0 0 2 2] P-93.5.3.2 spiro 1638 // [0 1 1 1] or [0 0 0 3] CIP Helv. Chim. Acta 1966 #32 -- C3-symmetric 1639 int i1 = (priorities[0] == priorities[1] ? 0 : 1); 1640 int i2 = (priorities[2] != priorities[3] ? 3 : 4); 1641 int istep = (priorities[2] == priorities[1] ? 1 : 2); 1642 int rsRM = 0, rsSP = 0; 1643 BS bsSubs = new BS(); 1644 for (int i = i1; i < i2; i++) 1645 bsSubs.set(atoms[i].atomIndex); 1646 if (nPriorities == 1) 1647 i2 = 2; // actually, we only want two for double spiran or S4 1648 1649 // Check result of promoting each undifferentiated ligand. 1650 // Exit if two R or two S are found. 1651 1652 CIPAtom cipAtom = null; 1653 int rs; 1654 for (int i = i1; i < i2; i += istep) { 1655 if (data.testRule6Full) { 1656 // Full Rule 6 -- allow for new auxiliary descriptors in Rule 6. 1657 // This will cause bicyclo[2.2.2]octane to be rr. 1658 cipAtom = new CIPAtom().create(atom, null, false, false, false); 1659 cipAtom.rule6refIndex = atoms[i].atomIndex; 1660 cipAtom.setNode(); 1661 for (int j = 0; j < 4; j++) { 1662 cipAtom.atoms[j] = (CIPAtom) atoms[j].clone(); 1663 cipAtom.priorities[j] = priorities[j]; 1664 } 1665 cipAtom.bsRule6Subs = bsSubs; 1666 rs = getAtomChiralityLimited(atom, cipAtom, null); 1667 currentRule = RULE_6; 1668 if (rs == NO_CHIRALITY) 1669 return NO_CHIRALITY; 1670 } else { 1671 // Original Rule 6 -- just check for switches. 1672 // Bicyclo[2.2.2]octane will be ns. 1673 root.bsRule6Subs = new BS(); 1674 root.rule6refIndex = atoms[i].atomIndex; 1675 saveRestorePriorities(false); 1676 sortSubstituents(Integer.MIN_VALUE); 1677 if (!sortSubstituents(0)) 1678 return NO_CHIRALITY; 1679 rs = data.checkHandedness(this); 1680 saveRestorePriorities(true); 1681 } 1682 // If the descriptor is r or s, that is our descriptor. 1683 if ((rs & JC.CIP_CHIRALITY_PSEUDO_FLAG) == 0) { 1684 // If it is not r/s, check for more than one R or more than one S. 1685 // If more than one R or more than one S, that is our descriptor. 1686 if (rs == STEREO_R || rs == STEREO_M) { 1687 if (rsRM == 0) { 1688 rsRM = rs; 1689 continue; 1690 } 1691 } else if (rsSP == 0) { 1692 rsSP = rs; 1693 continue; 1694 } 1695 } 1696 // We have a determination. 1697 return rs; 1698 } 1699 return NO_CHIRALITY; 1700 } 1701 1702 /** 1703 * Reset priorities after each Rule 6 test. 1704 * 1705 * @param isRestore 1706 */ saveRestorePriorities(boolean isRestore)1707 private void saveRestorePriorities(boolean isRestore) { 1708 if (isRestore) { 1709 priorities = oldPriorities; 1710 nPriorities = oldNPriorities; 1711 } else { 1712 oldPriorities = Arrays.copyOf(priorities, 4); 1713 oldNPriorities = nPriorities; 1714 } 1715 for (int i = 0; i < nAtoms; i++) 1716 atoms[i].saveRestorePriorities(isRestore); 1717 } 1718 1719 /** 1720 * Get a count of the number of duplicate nodes to the auxiliary atom. 1721 * 1722 * @param index 1723 * 1724 * @return the number of dublicates to the auxiliary center 1725 */ countAuxDuplicates(int index)1726 private int countAuxDuplicates(int index) { 1727 int n = 0; 1728 for (int i = 0; i < 4; i++) { 1729 if (atoms[i] == null) 1730 continue; 1731 if (atoms[i].isDuplicate) { 1732 if (atoms[i].atomIndex == index) 1733 n++; 1734 } else { 1735 n += atoms[i].countAuxDuplicates(index); 1736 } 1737 } 1738 return n; 1739 } 1740 1741 /** 1742 * get the atomic mass only if needed by Rule 2, testing for three special 1743 * conditions in the case of isotopes: 1744 * 1745 * If a duplicate, or an isotope that is 100% nat abundant is specified, or 1746 * isotopic mass is not specified, just use the average mass. 1747 * 1748 * Otherwise, use the integer isotope mass number, taking care in the cases 1749 * of 16O, 52Cr, 96Mo, and 175Lu, to reduce this integer by 0.1 so as to put 1750 * it in the correct relationship to the element's average mass. 1751 * 1752 * @return mass or mass surrogate 1753 */ getMass()1754 private float getMass() { 1755 if (isDuplicate) 1756 return 0; 1757 if (mass == UNDETERMINED) { 1758 if (isDuplicate || (mass = atom.getMass()) != (int) mass 1759 || isType(RULE_2_nXX_EQ_XX)) 1760 return (mass == UNDETERMINED ? mass = Elements.getAtomicMass((int) elemNo) 1761 : mass); 1762 // for 16O;52Cr;96Mo;175Lu; 1763 // adjust integer mass number down just a bit to account 1764 // for average mass being slightly higher than actual isotope mass, not lower 1765 if (isType(RULE_2_REDUCE_ISOTOPE_MASS_NUMBER)) 1766 mass -= 0.1f; 1767 } 1768 return mass; 1769 } 1770 isType(String rule2Type)1771 private boolean isType(String rule2Type) { 1772 return PT.isOneOf( 1773 (int) mass + Elements.elementSymbolFromNumber((int) elemNo), 1774 rule2Type); 1775 } 1776 1777 /** 1778 * Calculate the average element numbers of associated double-bond atoms 1779 * weighted by their most significant Kekule resonance contributor(s). We 1780 * only consider simple benzenoid systems -- 6-membered rings and their 1781 * 6-memebered rings fused to them. Calculated for the parent of an 1782 * sp2-duplicate atom. 1783 * 1784 * @return an averaged element number 1785 */ getKekuleElementNumber()1786 private float getKekuleElementNumber() { 1787 SimpleEdge[] edges = atom.getEdges(); 1788 SimpleEdge bond; 1789 float ave = 0;//atom.getElementNumber(); 1790 int n = 0;//1; 1791 for (int i = edges.length; --i >= 0;) 1792 if ((bond = edges[i]).isCovalent()) { 1793 SimpleNode other = bond.getOtherNode(atom); 1794 if (data.bsKekuleAmbiguous.get(other.getIndex())) { 1795 // System.out.println(this + " adding " + other + " " + ave + " " + n); 1796 n++; 1797 ave += other.getElementNumber(); 1798 } 1799 } 1800 return ave / n; 1801 } 1802 1803 /** 1804 * Set the atom to have substituents. 1805 * 1806 * @return true if a valid atom for consideration 1807 * 1808 */ setNode()1809 boolean setNode() { 1810 // notice we are setting isSet TRUE here, not just testing it. 1811 if (isSet || (isSet = true) && isDuplicate) 1812 return true; 1813 int index = atom.getIndex(); 1814 SimpleEdge[] bonds = atom.getEdges(); 1815 int nBonds = bonds.length; 1816 if (Logger.debuggingHigh) 1817 Logger.info("set " + this); 1818 int pt = 0; 1819 for (int i = 0; i < nBonds; i++) { 1820 SimpleEdge bond = bonds[i]; 1821 if (!bond.isCovalent()) 1822 continue; 1823 SimpleNode other = bond.getOtherNode(atom); 1824 boolean isParentBond = (parent != null && parent.atom == other); 1825 int order = data.getBondOrder(bond); 1826 if (order == 2) { 1827 if (elemNo > 10 || !isFirstRow(other)) 1828 order = 1; 1829 else { 1830 isAlkene = true; 1831 if (isParentBond) 1832 setEne(); 1833 } 1834 } 1835 if (nBonds == 1 && order == 1 && isParentBond) 1836 return isTerminal = true; 1837 // from here on, isTerminal indicates an error condition 1838 switch (order) { 1839 case 3: 1840 if (addAtom(pt++, other, isParentBond, false, isParentBond) == null) 1841 return !(isTerminal = true); 1842 //$FALL-THROUGH$ 1843 case 2: 1844 if (addAtom(pt++, other, order != 2 || isParentBond, order == 2, 1845 isParentBond) == null) 1846 return !(isTerminal = true); 1847 //$FALL-THROUGH$ 1848 case 1: 1849 if (isParentBond 1850 || addAtom(pt++, other, order != 1 && elemNo <= 10, false, false) != null) 1851 break; 1852 //$FALL-THROUGH$ 1853 default: 1854 return !(isTerminal = true); 1855 } 1856 } 1857 nAtoms = pt; 1858 switch (pt) { 1859 case 2: 1860 case 3: 1861 // [c-,r5d3n+0,r5d2o+0] 1862 if (elemNo == 6 && data.bsNegativeAromatic.get(index) || data.bsXAromatic.get(index)) { 1863 nAtoms++; 1864 addAtom(pt++, this.atom, true, false, false); 1865 } 1866 break; 1867 1868 } 1869 if (pt < 4) 1870 for (; pt < atoms.length; pt++) 1871 atoms[pt] = new CIPAtom().create(null, this, false, true, false); 1872 1873 // Do an initial very shallow atom-only Rule 1 sort using a.compareTo(b) 1874 1875 try { 1876 Arrays.sort(atoms); 1877 } catch (Exception e) { 1878 e.printStackTrace(); 1879 } 1880 return true; 1881 } 1882 1883 /** 1884 * Set all ene-related fields upon finding the second atom. 1885 * 1886 */ setEne()1887 private void setEne() { 1888 parent.alkeneChild = null; 1889 alkeneParent = (parent.alkeneParent == null ? parent 1890 : parent.alkeneParent); 1891 alkeneParent.alkeneChild = this; 1892 nextSP2 = parent; 1893 if (parent.alkeneParent == null) 1894 parent.nextSP2 = this; 1895 if (atom.getCovalentBondCount() == 2 && atom.getValence() == 4) { 1896 parent.isAlkeneAtom2 = false; 1897 alkeneParent.isEvenEne = !alkeneParent.isEvenEne; 1898 } else { 1899 isAlkeneAtom2 = true; 1900 } 1901 } 1902 1903 /** 1904 * Add a new atom or return null 1905 * 1906 * @param i 1907 * @param other 1908 * @param isDuplicate 1909 * @param isAlkene 1910 * @param isParentBond 1911 * @return new atom or null 1912 */ addAtom(int i, SimpleNode other, boolean isDuplicate, boolean isAlkene, boolean isParentBond)1913 private CIPAtom addAtom(int i, SimpleNode other, boolean isDuplicate, 1914 boolean isAlkene, boolean isParentBond) { 1915 if (i >= atoms.length) { 1916 if (Logger.debugging) 1917 Logger.info(" too many bonds on " + atom); 1918 return null; 1919 } 1920 if (other.getElementNumber() == 1 && other.getIsotopeNumber() == 0) { 1921 if (++h1Count > 1) { 1922 if (parent == null) { 1923 // For top level, we do not allow two 1H atoms. 1924 if (Logger.debuggingHigh) 1925 Logger.info(" second H atom found on " + atom); 1926 return null; 1927 } 1928 } 1929 } 1930 return atoms[i] = new CIPAtom().create(other, this, isAlkene, 1931 isDuplicate, isParentBond); 1932 } 1933 1934 ///// sorting methods ///// 1935 1936 /** 1937 * Deep-Sort the substituents of an atom, setting the node's atoms[] and 1938 * priorities[] arrays. Checking for "ties" that will lead to 1939 * pseudochirality is also done here. 1940 * 1941 * @param sphere 1942 * current working sphere; Integer.MIN_VALUE to not break ties 1943 * @return all priorities assigned 1944 * 1945 */ sortSubstituents(int sphere)1946 boolean sortSubstituents(int sphere) { 1947 1948 // runs about 20% faster with this check 1949 if (nPriorities == (sphere < 1 ? 4 : 3)) 1950 return true; 1951 1952 // Note that this method calls breakTie and is called recursively from breakTie. 1953 1954 boolean ignoreTies = (sphere == Integer.MIN_VALUE); 1955 if (ignoreTies) { 1956 // If this is Rule 4a, 4c, or 6, we must presort the full tree without breaking ties 1957 if (isTerminal) 1958 return false; 1959 switch (currentRule) { 1960 case RULE_4a: 1961 case RULE_4c: 1962 for (int i = 0; i < 4; i++) 1963 if (atoms[i] != null && (atoms[i].isChiralPath || atoms[i].nextChiralBranch != null)) 1964 atoms[i].sortSubstituents(Integer.MIN_VALUE); 1965 if (isAlkene) // was isSP3 1966 return false; 1967 break; 1968 case RULE_6: 1969 for (int i = 0; i < 4; i++) 1970 if (atoms[i] != null && !atoms[i].isDuplicate 1971 && atoms[i].atom != null && atoms[i].setNode()) 1972 atoms[i].sortSubstituents(Integer.MIN_VALUE); 1973 break; 1974 } 1975 } 1976 1977 ignoreTies |= (currentRule == RULE_4b || currentRule == RULE_5); 1978 1979 int[] indices = new int[4]; 1980 1981 int[] newPriorities = new int[4]; 1982 1983 if (Logger.debuggingHigh && h1Count < 2) { 1984 Logger.info(root + "---sortSubstituents---" + this); 1985 for (int i = 0; i < 4; i++) { // Logger 1986 Logger.info(getRuleName(currentRule) + ": " + this + "[" + i + "]=" 1987 + atoms[i].myPath + " " + Integer.toHexString(priorities[i])); // Logger 1988 } 1989 Logger.info("---" + nPriorities); 1990 } 1991 1992 int loser; 1993 for (int i = 0; i < 3; i++) { 1994 CIPAtom a = atoms[i]; 1995 boolean aLoses = a.isDuplicate && currentRule > RULE_1b; 1996 for (int j = i + 1; j < 4; j++) { 1997 CIPAtom b = atoms[loser = j]; 1998 int score = TIED; 1999 2000 // Check: 2001 2002 // (a) if one of the atoms is a phantom atom (P-92.1.4.1); if not, then 2003 // (b) if the prioritiy has already been set; if not, then 2004 // (c) if the current rule decides; if not, then 2005 // (d) if the tie can be broken in the next sphere 2006 switch (b.atom == null || priorities[i] < priorities[j] ? A_WINS 2007 : aLoses || a.atom == null || priorities[j] < priorities[i] ? B_WINS 2008 : (score = a.checkCurrentRule(b)) != TIED && score != IGNORE || ignoreTies ? score 2009 : sign(a.breakTie(b, sphere + 1))) { 2010 case B_WINS: 2011 loser = i; 2012 //$FALL-THROUGH$ 2013 case A_WINS: 2014 newPriorities[loser]++; 2015 if (doTrack && score != TIED && (sphere == 0 || ignoreTies)) 2016 data.track(CIPChirality.this, a, b, 1, score, false); 2017 //$FALL-THROUGH$ 2018 case IGNORE: 2019 case TIED: 2020 indices[loser]++; 2021 continue; 2022 } 2023 } 2024 } 2025 2026 // update nPriorities and all arrays 2027 2028 bsTemp.clearAll(); // track number of priorities 2029 CIPAtom[] newAtoms = new CIPAtom[4]; 2030 for (int i = 0; i < 4; i++) { 2031 int pt = indices[i]; 2032 CIPAtom a = newAtoms[pt] = atoms[i]; 2033 int p = newPriorities[i]; 2034 if (a.atom != null) 2035 bsTemp.set(p); 2036 a.priority = priorities[pt] = p; 2037 } 2038 atoms = newAtoms; 2039 nPriorities = bsTemp.cardinality(); 2040 if (Logger.debuggingHigh && atoms[2].atom != null && atoms[2].elemNo != 1) { // Logger 2041 Logger.info(dots() + atom + " nPriorities = " + nPriorities); 2042 for (int i = 0; i < 4; i++) { // Logger 2043 Logger.info(dots() + myPath + "[" + i + "]=" + atoms[i] + " " 2044 + priorities[i] + " " + Integer.toHexString(priorities[i])); // Logger 2045 } 2046 Logger.info(dots() + "-------" + nPriorities); 2047 } 2048 2049 // We are done if the number of priorities equals the bond count. 2050 2051 return (nPriorities == bondCount); 2052 } 2053 2054 /** 2055 * Provide an indent for clarity in debugging messages 2056 * 2057 * @return a string of dots based on the value of atom.sphere. 2058 */ dots()2059 private String dots() { 2060 return ".....................".substring(0, Math.min(20, sphere)); // Logger 2061 } 2062 2063 /** 2064 * Break a tie at any level in the iteration between to atoms that otherwise 2065 * are the same by sorting their substituents. 2066 * 2067 * @param b 2068 * @param sphere 2069 * current working sphere 2070 * @return [0 (TIED), -1 (A_WINS), or 1 (B_WINS)] * (sphere where broken) 2071 */ breakTie(CIPAtom b, int sphere)2072 private int breakTie(CIPAtom b, int sphere) { 2073 2074 int finalScore = TIED; 2075 2076 while (true) { 2077 //System.out.println("breaking tie for " + this + " " + b); 2078 2079 // Phase I: Quick check of this atom itself 2080 2081 // return TIED if: 2082 2083 // a) this is a duplicate, and we are done with Rule 1b 2084 // b) two duplicates are of the same node (atom and root distance) 2085 // c) one or the other can't be set (because it has too many connections), or 2086 // d) both are terminal or both are duplicates (no atoms to check) 2087 2088 if (isDuplicate 2089 && (currentRule > RULE_1b || b.isDuplicate && atom == b.atom 2090 && rootDistance == b.rootDistance) 2091 || !setNode() || !b.setNode() 2092 || isTerminal && b.isTerminal 2093 || isDuplicate && b.isDuplicate) 2094 break; 2095 2096 2097 // We are done if one of these is terminal 2098 // (for the next sphere, unless one is a duplicate -- Custer Rule 1b "duplicate > nonduplicate"). 2099 2100 if (isTerminal != b.isTerminal) { 2101 finalScore = (isTerminal ? B_WINS : A_WINS) 2102 * (sphere + (b.isDuplicate || isDuplicate ? 0 : 1)); // COUNT_LINE 2103 if (doTrack) 2104 data.track(CIPChirality.this, this, b, sphere, finalScore, true); 2105 break; 2106 } 2107 // Do a duplicate check. 2108 2109 // If this is not a TIE, we do not have to go any further. 2110 2111 // NOTE THAT THIS CHECK IS NOT EXPLICIT IN THE RULES 2112 // BECAUSE DUPLICATES LOSE IN THE NEXT SPHERE, NOT THIS ONE. 2113 // THE NEED FOR (sphere+1) in AY236.53, 163, 173, 192 SHOWS THAT 2114 // SUBRULE 1a MUST BE COMPLETED EXHAUSTIVELY PRIOR TO SUBRULE 1b. 2115 // 2116 // Note that this check must return "N+1", because that is where the actual difference is. 2117 // This nuance is not clear from the "simplified" digraphs found in Chapter 9. 2118 // 2119 // Say we have {O (O) C} and {O O H} 2120 // 2121 // The rules require that we first only look at just the atoms, so OOC beats OOH in this sphere, 2122 // but there are no atoms to check on (O), so we can do the check here to save time, reporting back 2123 // to breatTie that we found a difference, but not in this sphere. 2124 // This allows C to still beat H in this sphere, but had it been {O (O} C} and {O O C}, then 2125 // we would be done. 2126 2127 int score = (currentRule > RULE_1b ? TIED : unlikeDuplicates(b)); 2128 if (score != TIED) { 2129 finalScore = score * (sphere + 1); // COUNT_LINE 2130 if (doTrack) 2131 data.track(CIPChirality.this, this, b, sphere, finalScore, false); 2132 break; 2133 } 2134 // Phase II -- shallow check only 2135 // 2136 // Compare only in the current substitutent sphere. 2137 // 2138 // Check to see if any of the three connections to a and b are 2139 // different themselves, without any deeper check. 2140 // 2141 // This requires that both a annd b have their ligands sorted 2142 // at least in a preliminary fashion, using Array.sort() and compareTo() 2143 // for Rules 1a, 1b, and 2. 2144 // But if we do not do a presort, then we have to do those first. 2145 // Doing the presort saves considerably on run time. 2146 2147 for (int i = 0; i < nAtoms; i++) 2148 if ((score = atoms[i].checkCurrentRule(b.atoms[i])) != TIED) { 2149 finalScore = score * (sphere + 1); // COUNT_LINE 2150 if (doTrack) 2151 data.track(CIPChirality.this, atoms[i], b.atoms[i], sphere, finalScore, false); 2152 break; 2153 } 2154 2155 if (finalScore != TIED) { 2156 break; 2157 } 2158 // Time to do a full sort of eash ligand, including breaking ties 2159 2160 sortSubstituents(sphere); 2161 b.sortSubstituents(sphere); 2162 2163 // Phase III -- check deeply using re-entrant call to breakTie 2164 // 2165 // Now iteratively deep-sort each list based on substituents 2166 // and then check them one by one to see if the tie can be broken. 2167 2168 // Note that if not catching duplicates early, we must check for 2169 // nAtoms == 0 and set finalScore in that case to B_WINS * (sphere + 1) 2170 // but we are checking for duplicates early in this implementation. 2171 2172 for (int i = 0, abs, absScore = Integer.MAX_VALUE; i < nAtoms; i++) { 2173 if ((score = atoms[i].breakTie(b.atoms[i], sphere + 1)) != TIED 2174 && (abs = Math.abs(score)) < absScore) { 2175 absScore = abs; 2176 finalScore = score; 2177 } 2178 } 2179 break; 2180 } 2181 return finalScore; 2182 } 2183 2184 ///// atom comparison methods ////// 2185 2186 /** 2187 * Used in Array.sort when an atom is set and Collection.sort when 2188 * determining the Mata like/unlike sequence for Rules 4b and 5. Includes a 2189 * preliminary check for duplicates, since we know that that atom will 2190 * ultimately be lower priority if all other rules are tied. 2191 * * 2192 * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS) 2193 */ 2194 @Override compareTo(CIPAtom b)2195 public int compareTo(CIPAtom b) { 2196 int score; 2197 return (root.rule4Ref == NO_CHIRALITY ? 2198 // standard constitutional presort - Rules 1a, 1b, and 2 2199 (b == null ? A_WINS 2200 : (atom == null) != (b.atom == null) ? (atom == null ? B_WINS 2201 : A_WINS) : (score = compareRule1a(b)) != TIED ? score 2202 : (score = unlikeDuplicates(b)) != TIED ? score 2203 : isDuplicate ? compareRule1b(b) : compareRule2(b)) 2204 : 2205 // Rule 4b/5 Mata list referenced sort -- first by sphere, then a 2206 // simple string sort 2207 sphere < b.sphere ? -1 : sphere > b.sphere ? 1 : chiralPath 2208 .compareTo(b.chiralPath)); 2209 } 2210 2211 /** 2212 * Check this atom "A" vs a challenger "B" against the current rule. 2213 * 2214 * Note that example BB 2013 page 1258, P93.5.2.3 requires that RULE_1b be 2215 * applied only after RULE_1a has been applied exhaustively for a sphere; 2216 * otherwise C1 is R, not S (AY-236.53). 2217 * 2218 * @param b 2219 * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS), or Intege.MIN_VALUE 2220 * (IGNORE) 2221 */ checkCurrentRule(CIPAtom b)2222 private int checkCurrentRule(CIPAtom b) { 2223 switch (currentRule) { 2224 default: 2225 case RULE_1a: 2226 return compareRule1a(b); 2227 case RULE_1b: 2228 return compareRule1b(b); 2229 case RULE_2: 2230 return compareRule2(b); 2231 case RULE_3: 2232 return compareRule3(b); // can be IGNORE 2233 case RULE_4a: 2234 return compareRules4ac(b, " sr SR PM"); 2235 case RULE_4b: 2236 case RULE_5: 2237 // can be terminal when we are checking the two groups on an alkene end 2238 return (isTerminal || b.isTerminal ? TIED : compareRule4b5(b)); 2239 case RULE_4c: 2240 return compareRules4ac(b, " s r p m"); 2241 case RULE_6: 2242 return compareRule6(b); 2243 } 2244 } 2245 2246 /** 2247 * This check is not technically one of those listed in the rules, but it is 2248 * useful when preparing to check substituents because if one of the atoms 2249 * has substituents and the other doesn't, we are done -- there is no reason 2250 * to check substituents. 2251 * 2252 * 2253 * @param b 2254 * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS) 2255 */ unlikeDuplicates(CIPAtom b)2256 private int unlikeDuplicates(CIPAtom b) { 2257 return b.isDuplicate == isDuplicate ? TIED : isDuplicate ? B_WINS : A_WINS; 2258 } 2259 2260 /** 2261 * Looking for phantom atom (atom number 0) or element number 2262 * 2263 * @param b 2264 * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS) 2265 */ compareRule1a(CIPAtom b)2266 private int compareRule1a(CIPAtom b) { 2267 return b.atom == null ? A_WINS 2268 : atom == null ? B_WINS 2269 : b.elemNo < elemNo ? A_WINS 2270 : b.elemNo > elemNo ? B_WINS 2271 : TIED; 2272 } 2273 2274 /** 2275 * Looking for root distance -- duplicate atoms only. 2276 * (We have checked for 2277 * 2278 * @param b 2279 * 2280 * 2281 * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS) 2282 */ 2283 compareRule1b(CIPAtom b)2284 private int compareRule1b(CIPAtom b) { 2285 return Integer.compare(rootDistance, b.rootDistance); 2286 } 2287 2288 /** 2289 * Chapter 9 Rule 2. atomic mass, with possible reversal due to use of mass 2290 * numbers. 2291 * 2292 * Also checks for temporary Rule 6 promotion for full Rule 6 implemenation 2293 * (rr bicyclo[2.2.2]octane) 2294 * 2295 * @param b 2296 * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS) 2297 */ compareRule2(CIPAtom b)2298 private int compareRule2(CIPAtom b) { 2299 return (atomIndex == b.atomIndex? TIED : getMass() > b.getMass() ? A_WINS 2300 : mass < b.mass ? B_WINS 2301 // full Rule 6 priority check is done here 2302 : root.rule6refIndex < 0 ? TIED 2303 : !root.bsRule6Subs.get(atomIndex) 2304 || !root.bsRule6Subs.get(b.atomIndex) ? 2305 TIED 2306 : root.rule6refIndex == atomIndex ? A_WINS : root.rule6refIndex == b.atomIndex ? B_WINS : TIED); 2307 } 2308 2309 /** 2310 * Chapter 9 Rule 3. E/Z. 2311 * 2312 * We carry out this step only as a tie in the sphere AFTER the final atom 2313 * of the alkene or even-cumulene. 2314 * 2315 * If the this atom and the comparison atom b are on the same parent, then 2316 * this is simply a request to break their tie based on Rule 3; otherwise 2317 * two paths are being checked for one being seqCis and one being seqTrans. 2318 * 2319 * @param b 2320 * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS) 2321 */ compareRule3(CIPAtom b)2322 private int compareRule3(CIPAtom b) { 2323 return isDuplicate || b.isDuplicate || !parent.isAlkeneAtom2 2324 || !b.parent.isAlkeneAtom2 || !parent.alkeneParent.isEvenEne 2325 || !b.parent.alkeneParent.isEvenEne || parent == b.parent ? TIED 2326 : Integer.compare(parent.auxEZ, b.parent.auxEZ); 2327 } 2328 2329 /** 2330 * Chapter 9 Rules 4a and 4c. This method allows for "RS" to be checked as 2331 * "either R or S". See AY236.66, AY236.67, 2332 * AY236.147,148,156,170,171,201,202, etc. (4a) 2333 * 2334 * @param b 2335 * @param test 2336 * String to test against; depends upon subrule being checked 2337 * @return 0 (TIED), -1 (A_WINS), or 1 (B_WINS) 2338 */ compareRules4ac(CIPAtom b, String test)2339 private int compareRules4ac(CIPAtom b, String test) { 2340 if (isTerminal || isDuplicate) 2341 return TIED; 2342 int isRa = test.indexOf(auxChirality), isRb = test 2343 .indexOf(b.auxChirality); 2344 return (isRa > isRb + 1 ? A_WINS : isRb > isRa + 1 ? B_WINS : TIED); 2345 } 2346 2347 /** 2348 * Compare the better R-ref or S-ref list for A with the same for B. 2349 * 2350 * @param b 2351 * @return A_WINS, B_WINS, or IGNORE 2352 */ compareRule4b5(CIPAtom b)2353 private int compareRule4b5(CIPAtom b) { 2354 BS bsA = getBetter4bList(); 2355 BS bsB = b.getBetter4bList(); 2356 BS best = compareLikeUnlike(bsA, bsB); 2357 int score = (best == null ? IGNORE : best == bsA ? A_WINS : B_WINS); 2358 if (best != null) { 2359 if (currentRule == RULE_5) { 2360 // this is our check for self-enantiomorphic ligands 2361 // if the two ligands are each self-enatiomorphic (first chiral sphere r,s only, no R,S), 2362 // then we need to switch to R/S mode. 2363 // This is tested as (A(R) > B(R)) == (A(S) > B(S)) 2364 if ((compareLikeUnlike(listRS[STEREO_S], b.listRS[STEREO_S]) == listRS[STEREO_S]) 2365 == (best == bsA)) 2366 parent.isRule5Pseudo = !parent.isRule5Pseudo; 2367 } 2368 if (doTrack) 2369 data.track(CIPChirality.this, this, b, 1, score, false); 2370 } 2371 return score; 2372 } 2373 2374 /** 2375 * Just look for match with the Rule 6 reference atom index 2376 * 2377 * @param b 2378 * @return A_WINS, B_WINS, or TIED 2379 */ compareRule6(CIPAtom b)2380 private int compareRule6(CIPAtom b) { 2381 return ((atomIndex == root.rule6refIndex) == (b.atomIndex == root.rule6refIndex) ? TIED 2382 : atomIndex == root.rule6refIndex ? A_WINS : B_WINS); 2383 } 2384 2385 ////// Rule 4b, 5 processsing ////// 2386 2387 /** 2388 * Clear Rule 4b information if Rule-5 pseudochiral centers have been found, 2389 * as that could change the order of descriptors in the Mata list. 2390 */ clearRule4Lists()2391 void clearRule4Lists() { 2392 listRS = null; 2393 for (int i = 0; i < 4 && atoms[i] != null; i++) 2394 atoms[i].clearRule4Lists(); 2395 } 2396 2397 /** 2398 * Create Mata-style linear atom.listRS and return the better of R-ref or S-ref. 2399 * Used first in Rule 4b and again in Rule 5. 2400 * @return the better 4b list 2401 */ getBetter4bList()2402 private BS getBetter4bList() { 2403 if (listRS != null) 2404 return listRS[currentRule == RULE_5 ? STEREO_R : 0]; 2405 BS bs; 2406 listRS = new BS[] { null, bs = rank4bAndRead(null), rank4bAndRead(bs)}; 2407 Logger.info("getBest " + currentRule + " " + this + " " + listRS[STEREO_R] + listRS[STEREO_S] + " " + myPath); 2408 bs = compareLikeUnlike(listRS[STEREO_R], listRS[STEREO_S]); 2409 return listRS[0] = (currentRule == RULE_5 || bs == null ? listRS[STEREO_R] : bs); 2410 } 2411 2412 /** 2413 * A thread-based sphere-ordered implementation that 2414 * takes into account that lists cross the boundaries of branches. 2415 * 2416 * @param bsR 2417 * null if this is for R or the R-ref list if this is for S 2418 * @return ranked list 2419 */ rank4bAndRead(BS bsR)2420 private BS rank4bAndRead(BS bsR) { 2421 boolean isS = (bsR != null); 2422 int ref = (isS ? STEREO_S : STEREO_R); 2423 BS list = new BS(); 2424 Lst<CIPAtom> chiralAtoms = new Lst<CIPAtom>(); 2425 root.rule4Ref = ref; 2426 addChiralAtoms(chiralAtoms, ref); 2427 Collections.sort(chiralAtoms); 2428 root.rule4Ref = NO_CHIRALITY; 2429 for (int i = 0, n = chiralAtoms.size(); i < n; i++) { 2430 if (Logger.debugging) 2431 Logger.info("" + ref + " " + this + " " + chiralAtoms.get(i).chiralPath); 2432 if (chiralAtoms.get(i).rule4Type == ref) 2433 list.set(i); 2434 } 2435 return list; 2436 } 2437 2438 /** 2439 * Create an ASCII string that allows the list of descriptors to be 2440 * generated in order. This list must take into account both the 2441 * current (Rule 4a or Rule 4c) priority group (0 highest to 3 lowest) 2442 * of each point along the path back to the root for this node. 2443 * This is done by multiplying the priority group by 4 and adding in 2444 * the like- unlike-ness of the node as one of 0 (ns), 1 (like), or 2 (unlike). 2445 * 2446 * In this way, we can get cross-branch sorting, not just the typical 2447 * breadth-first sorting that we usually use for ranking. For example, 2448 * in BH_64_085, two branches of a ligand each have two branches that 2449 * have identical Rule 4a priorities, leading to four branches that must be 2450 * sorted as the "highest-priority" set in Rule 4b. 2451 * 2452 * @param chiralAtoms 2453 * @param ref 2454 */ addChiralAtoms(Lst<CIPAtom> chiralAtoms, int ref)2455 private void addChiralAtoms(Lst<CIPAtom> chiralAtoms, int ref) { 2456 // encodings: 2457 // 2458 // @: 0-priority ns 2459 // A: 0-priority LIKE 2460 // B: 0-priority UNLIKE 2461 // C: n/a 2462 // D: 1-priority ns 2463 // E: 1-priority LIKE 2464 // F: 1-priority UNLIKE 2465 // G: n/a 2466 // H: 2-priority ns 2467 // I: 2-priority LIKE 2468 // J: 2-priority UNLIKE 2469 // K: n/a 2470 // L: 3-priority ns 2471 // M: 3-priority LIKE 2472 // N: 3-priority UNLIKE 2473 2474 // For example, in the processing of BH64_073 C28, we get for ligand C89 2475 // in Rule 4b: 2476 // R-ref: 2477 // 1 C89 @DA 2478 // 1 C89 @DADA 2479 // 1 C89 @DADB 2480 // 1 C89 @DADADA 2481 // 1 C89 @DADADB 2482 // 1 C89 @DADBDA 2483 // 1 C89 @DADBDB 2484 // S-ref: 2485 // 2 C89 @DB 2486 // 2 C89 @DBDA 2487 // 2 C89 @DBDB 2488 // 2 C89 @DBDADA 2489 // 2 C89 @DBDADB 2490 // 2 C89 @DBDBDA 2491 // 2 C89 @DBDBDB 2492 // 2493 // resulting R- and S-lists, from last descriptor only: 2494 // (R)llululu and (S)ulululu 2495 // 2496 // and in BH64_085, we get different results for Rule 4b than for Rule 5: 2497 // 2498 // Rule 4b: 2499 // R-ref: 2500 // 1 N17 @D@@A 2501 // 1 N17 @D@@A 2502 // 1 N17 @D@@B 2503 // 1 N17 @D@@B 2504 // 1 N17 @D@@AE 2505 // 1 N17 @D@@AF 2506 // 1 N17 @D@@BE 2507 // 1 N17 @D@@BF 2508 // 2509 // giving AABBEFEF, or lluululu 2510 // 2511 // Rule 5, after a Rule 4c sort: 2512 // R-ref: 2513 // 1 N17 @D@@A 2514 // 1 N17 @D@@B 2515 // 1 N17 @DD@A 2516 // 1 N17 @DD@B 2517 // 1 N17 @D@@AE 2518 // 1 N17 @D@@BF 2519 // 1 N17 @DD@AF 2520 // 1 N17 @DD@BE 2521 // 2522 // giving ABABEFFE or lululuul 2523 2524 if (atom == null || isTerminal || isDuplicate) 2525 return; 2526 if (rule4Type != 0) { 2527 String s = ""; 2528 CIPAtom a = this; 2529 while (a != null) { 2530 s = (char)(64 + (a.priority<<2) + (a.rule4Type == 0 ? 0 : a.rule4Type == ref ? 1 : 2)) + s; 2531 if ((a = a.parent) != null && a.chiralPath != null) { 2532 s = a.chiralPath + s; 2533 break; 2534 } 2535 } 2536 chiralPath = s; 2537 chiralAtoms.addLast(this); 2538 } 2539 for (int i = 0; i < 4; i++) 2540 if (atoms[i] != null) 2541 atoms[i].addChiralAtoms(chiralAtoms, ref); 2542 } 2543 compareLikeUnlike(BS bsA, BS bsB)2544 private BS compareLikeUnlike(BS bsA, BS bsB) { 2545 BS bsXOR = (BS) bsB.clone(); 2546 // A = 1101111 // llullll 2547 // B = 1100111 // lluulll 2548 // xor = 0001000 2549 bsXOR.xor(bsA); 2550 int l = bsXOR.nextSetBit(0); 2551 return (l < 0 ? null : bsA.get(l) ? bsA : bsB); 2552 } 2553 2554 ///// auxiliary processing 2555 2556 /** 2557 * By far the most complex of the methods, this method creates a list of 2558 * downstream (higher-sphere) auxiliary chirality designators, starting with 2559 * those furthest from the root and moving in, toward the root. 2560 * 2561 * @param node1 2562 * first node; sphere 1 2563 * @param ret 2564 * CIPAtom of next stereochemical branching point 2565 * 2566 * @return collective string, with setting of rule4List 2567 */ createAuxiliaryDescriptors(CIPAtom node1, CIPAtom[] ret)2568 boolean createAuxiliaryDescriptors(CIPAtom node1, CIPAtom[] ret) { 2569 boolean isChiralPath = false; 2570 char c = '~'; 2571 if (atom == null) 2572 return false; 2573 setNode(); 2574 int rs = -1, nRS = 0; 2575 CIPAtom[] ret1 = new CIPAtom[1]; 2576 boolean skipRules4And5 = false; 2577 boolean prevIsChiral = true; 2578 // have to allow two same because could be a C3-symmetric subunit 2579 boolean allowTwoSame = (!isAlkene && nPriorities <= (node1 == null ? 2 2580 : 1)); 2581 for (int i = 0; i < 4; i++) { 2582 CIPAtom a = atoms[i]; 2583 if (a != null && !a.isDuplicate && !a.isTerminal) { 2584 // we use ret1 to pass a reference to the next branch with two or more chiral paths 2585 ret1[0] = null; 2586 boolean aIsChiralPath = a.createAuxiliaryDescriptors( 2587 node1 == null ? a : node1, ret1); 2588 if (ret1[0] != null && ret != null) 2589 ret[0] = nextChiralBranch = a.nextChiralBranch; 2590 if (a.nextChiralBranch != null || aIsChiralPath) { 2591 nRS++; 2592 isChiralPath = aIsChiralPath; 2593 prevIsChiral = true; 2594 } else { 2595 if (!allowTwoSame && !prevIsChiral 2596 && priorities[i] == priorities[i - 1]) { 2597 return false; 2598 } 2599 prevIsChiral = false; 2600 } 2601 } 2602 } 2603 boolean isBranch = (nRS >= 2); 2604 switch (nRS) { 2605 case 0: 2606 isChiralPath = false; 2607 //$FALL-THROUGH$ 2608 case 1: 2609 skipRules4And5 = true; 2610 break; 2611 case 2: 2612 case 3: 2613 case 4: 2614 isChiralPath = false; 2615 if (ret != null) 2616 ret[0] = nextChiralBranch = this; 2617 break; 2618 } 2619 if (isAlkene) { 2620 if (alkeneChild != null) { 2621 // must be alkeneParent -- first C of an alkene -- this is where C/T is recorded 2622 // All odd cumulenes need to be checked. 2623 // If it is an alkene or even cumulene, we must do an auxiliary check 2624 // only if it is not already a defined stereochemistry, because in that 2625 // case we have a simple E/Z (c/t), and there is no need to check AND 2626 // it does not contribute to the Mata sequence (similar to r/s or m/p). 2627 // 2628 2629 if (!isEvenEne || (auxEZ == STEREO_BOTH_EZ || auxEZ == UNDETERMINED) 2630 && !isKekuleAmbiguous && alkeneChild.bondCount >= 2) { 2631 int[] rule2 = (isEvenEne ? new int[1] : null); 2632 rs = getAuxEneWinnerChirality(this, alkeneChild, !isEvenEne, rule2); 2633 2634 // 2635 // Note that we can have C/T (rule4Type = R/S): 2636 // 2637 // R x 2638 // \ / 2639 // == 2640 // / \ 2641 // S root 2642 // 2643 // flips sense upon planar inversion; determination was Rule 5. 2644 // 2645 // and ALSO we can have c/t here that has not been discovered yet 2646 // 2647 // 2648 // SR x 2649 // \ / 2650 // == 2651 // / \ 2652 // SS root 2653 2654 if (rs == NO_CHIRALITY) { 2655 auxEZ = alkeneChild.auxEZ = STEREO_BOTH_EZ; 2656 } else { 2657 isChiralPath = true; 2658 if (rule2 != null && rule2[0] != RULE_5) { 2659 // normal even-ene seqcis/seqtrans 2660 auxEZ = alkeneChild.auxEZ = rs; 2661 if (Logger.debuggingHigh) 2662 Logger.info("alkene type " + this + " " + (auxEZ == STEREO_E ? "E" : "Z")); 2663 } else if (!isBranch) { 2664 // Normalize M/P and Z/E to R/S 2665 switch (rs) { 2666 case STEREO_M: 2667 case STEREO_Z: 2668 rs = STEREO_R; 2669 c = 'R'; 2670 isChiralPath = true; 2671 break; 2672 case STEREO_P: 2673 case STEREO_E: 2674 rs = STEREO_S; 2675 c = 'S'; 2676 isChiralPath = true; 2677 break; 2678 } 2679 auxChirality = c; 2680 rule4Type = rs; 2681 } 2682 } 2683 } 2684 } 2685 } else if (isSP3 && ret != null) { 2686 // if here, adj is TIED (0) or NOT_RELEVANT 2687 CIPAtom atom1 = (CIPAtom) clone(); 2688 if (atom1.setNode()) { 2689 atom1.addReturnPath(null, this); 2690 int rule = RULE_1a; 2691 for (; rule <= RULE_6; rule++) 2692 // two C3 groups.... 2693 if ((!skipRules4And5 || rule < RULE_4a || rule > RULE_5) 2694 && atom1.auxSort(rule)) 2695 break; 2696 if (rule > RULE_6) { 2697 c = '~'; 2698 } else { 2699 rs = data.checkHandedness(atom1); 2700 isChiralPath |= (rs != NO_CHIRALITY); 2701 c = (rs == STEREO_R ? 'R' : rs == STEREO_S ? 'S' : '~'); 2702 if (rule == RULE_5) { 2703 c = (c == 'R' ? 'r' : c == 'S' ? 's' : '~'); 2704 if (rs != NO_CHIRALITY) 2705 havePseudoAuxiliary = true; 2706 } else { 2707 rule4Type = rs; 2708 } 2709 } 2710 } 2711 auxChirality = c; 2712 } 2713 if (node1 == null) 2714 bsNeedRule.setBitTo(RULE_4a, nRS > 0); 2715 if (c != '~') { 2716 Logger.info("creating aux " + c + " for " + this + (myPath.length() == 0 ? "" : " = " + myPath)); 2717 } 2718 return (this.isChiralPath = isChiralPath); 2719 } 2720 2721 /** 2722 * Sort by a given rule, preserving currentRule, which could be 4 or 5 2723 * 2724 * @param rule 2725 * @return true if a decision has been made 2726 */ auxSort(int rule)2727 private boolean auxSort(int rule) { 2728 int current = currentRule; 2729 currentRule = rule; 2730 int rule6ref = root.rule6refIndex; 2731 int nDup = root.nRootDuplicates; 2732 boolean isChiral = (rule == RULE_6 ? 2733 getRule6Descriptor(true) != NO_CHIRALITY : sortSubstituents(0)); 2734 root.nRootDuplicates = nDup; 2735 root.rule6refIndex = rule6ref; 2736 currentRule = current; 2737 return isChiral; 2738 } 2739 2740 /** 2741 * Determine the winner on one end of an alkene or cumulene and return also 2742 * the rule by which that was determined. 2743 * 2744 * @param end1 2745 * @param end2 2746 * @param isAxial 2747 * @param retRule2 2748 * return for rule found for child end (furthest from root) 2749 * @return one of: {NO_CHIRALITY | STEREO_Z | STEREO_E | STEREO_M | 2750 * STEREO_P} 2751 */ getAuxEneWinnerChirality(CIPAtom end1, CIPAtom end2, boolean isAxial, int[] retRule2)2752 private int getAuxEneWinnerChirality(CIPAtom end1, CIPAtom end2, 2753 boolean isAxial, int[] retRule2) { 2754 if (isAxial && end1.nextSP2 == end2) 2755 return NO_CHIRALITY; // allene terminating on root 2756 CIPAtom winner1 = getAuxEneEndWinner(end1, end1.nextSP2, null); 2757 CIPAtom winner2 = (winner1 == null || winner1.atom == null ? null 2758 : getAuxEneEndWinner(end2, end2.nextSP2, retRule2)); 2759 if (Logger.debuggingHigh) 2760 Logger.info(this + " alkene end winners " + winner1 + winner2); 2761 return getEneChirality(winner1, end1, end2, winner2, isAxial, false); 2762 } 2763 2764 /** 2765 * Get the atom that is the highest priority of two atoms on the end of a 2766 * double bond after sorting from Rule 1a through a given rule (Rule 3 or 2767 * Rule 5) 2768 * 2769 * @param end 2770 * @param prevSP2 2771 * @param retRule 2772 * return for deciding rule 2773 * @return higher-priority atom, or null if they are equivalent 2774 */ getAuxEneEndWinner(CIPAtom end, CIPAtom prevSP2, int[] retRule)2775 private CIPAtom getAuxEneEndWinner(CIPAtom end, CIPAtom prevSP2, 2776 int[] retRule) { 2777 CIPAtom atom1 = (CIPAtom) end.clone(); 2778 if (atom1.parent != prevSP2) 2779 atom1.addReturnPath(prevSP2, end); 2780 CIPAtom a; 2781 for (int rule = RULE_1a; rule <= RULE_6; rule++) { 2782 if (atom1.auxSort(rule)) { 2783 for (int i = 0; i < 4; i++) { 2784 a = atom1.atoms[i]; 2785 if (!a.multipleBondDuplicate) { 2786 if (atom1.priorities[i] != atom1.priorities[i + 1]) { 2787 if (retRule != null) 2788 retRule[0] = rule; 2789 return (a.atom == null ? null : a); 2790 } 2791 } 2792 } 2793 } 2794 } 2795 return null; 2796 } 2797 2798 /** 2799 * Add the path back to the root for an auxiliary center. 2800 * 2801 * @param newParent 2802 * @param fromAtom 2803 */ addReturnPath(CIPAtom newParent, CIPAtom fromAtom)2804 private void addReturnPath(CIPAtom newParent, CIPAtom fromAtom) { 2805 Lst<CIPAtom> path = new Lst<CIPAtom>(); 2806 CIPAtom thisAtom = this, newSub, oldParent = fromAtom, oldSub = newParent; 2807 // create path back to root 2808 while (oldParent.parent != null && oldParent.parent.atoms[0] != null) { // COUNT_LINE 2809 if (Logger.debuggingHigh) 2810 Logger.info("path:" + oldParent.parent + "->" + oldParent); 2811 path.addLast(oldParent = oldParent.parent); 2812 } 2813 path.addLast(null); 2814 for (int i = 0, n = path.size(); i < n; i++) { 2815 oldParent = path.get(i); 2816 newSub = (oldParent == null ? new CIPAtom().create(null, this, 2817 isAlkene, true, false) : (CIPAtom) oldParent.clone()); 2818 newSub.nPriorities = 0; 2819 newSub.sphere = thisAtom.sphere + 1; 2820 thisAtom.replaceParentSubstituent(oldSub, newParent, newSub); 2821 if (i > 0 && thisAtom.isAlkene && !thisAtom.isAlkeneAtom2) { 2822 // reverse senses of alkenes 2823 if (newParent.isAlkeneAtom2) { 2824 newParent.isAlkeneAtom2 = false; 2825 thisAtom.alkeneParent = newParent; 2826 } 2827 thisAtom.setEne(); 2828 } 2829 newParent = thisAtom; 2830 thisAtom = newSub; 2831 oldSub = fromAtom; 2832 fromAtom = oldParent; 2833 } 2834 } 2835 2836 /** 2837 * Swap a substituent and the parent in preparation for reverse traversal of 2838 * this path back to the root atom. 2839 * 2840 * @param oldSub 2841 * @param newParent 2842 * @param newSub 2843 */ replaceParentSubstituent(CIPAtom oldSub, CIPAtom newParent, CIPAtom newSub)2844 private void replaceParentSubstituent(CIPAtom oldSub, CIPAtom newParent, 2845 CIPAtom newSub) { 2846 for (int i = 0; i < 4; i++) 2847 if (atoms[i] == oldSub || newParent == null && atoms[i].atom == null) { 2848 if (Logger.debuggingHigh) 2849 Logger.info("reversed: " + newParent + "->" + this + "->" + newSub); 2850 parent = newParent; 2851 atoms[i] = newSub; 2852 Arrays.sort(atoms); 2853 break; 2854 } 2855 } 2856 2857 ///// general utility methods 2858 2859 /** 2860 * Just a simple signum for integers 2861 * 2862 * @param score 2863 * @return 0, -1, or 1 2864 */ sign(int score)2865 private int sign(int score) { 2866 return (score < 0 ? -1 : score > 0 ? 1 : 0); 2867 } 2868 2869 @Override clone()2870 public Object clone() { 2871 CIPAtom a = null; 2872 try { 2873 a = (CIPAtom) super.clone(); 2874 } catch (CloneNotSupportedException e) { 2875 } 2876 a.id = ptIDLogger++; 2877 a.atoms = new CIPAtom[4]; 2878 for (int i = 0; i < 4; i++) 2879 a.atoms[i] = atoms[i]; 2880 a.priorities = new int[4]; 2881 a.htPathPoints = htPathPoints; 2882 a.alkeneParent = null; 2883 a.auxEZ = UNDETERMINED; 2884 a.rule4Type = NO_CHIRALITY; 2885 a.listRS = null; 2886 if (Logger.debuggingHigh) 2887 a.myPath = a.toString(); 2888 return a; 2889 } 2890 2891 @Override toString()2892 public String toString() { 2893 if (atom == null) 2894 return "<null>"; 2895 if (Logger.debuggingHigh) 2896 return ("[" + currentRule + "." + sphere 2897 + "," + id + "." + (isDuplicate ? parent.atom : atom).getAtomName() 2898 + (isDuplicate ? "*(" + rootDistance + ")" : "") 2899 + (auxChirality == '~' ? "" : "" + auxChirality) + " " + elemNo 2900 + "]"); 2901 return (isDuplicate ? "(" + atom.getAtomName() + "." + rootDistance + ")" 2902 : atom.getAtomName()); 2903 2904 } 2905 } 2906 2907 } 2908