1 //
2 //  Copyright (C) 2004-2021 Greg Landrum and other RDKit contributors
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include <GraphMol/RDKitBase.h>
11 #include <RDGeneral/Ranking.h>
12 #include <GraphMol/new_canon.h>
13 #include <GraphMol/QueryOps.h>
14 #include <RDGeneral/types.h>
15 #include <sstream>
16 #include <set>
17 #include <algorithm>
18 #include <RDGeneral/utils.h>
19 #include <RDGeneral/Invariant.h>
20 #include <RDGeneral/RDLog.h>
21 
22 #include <boost/dynamic_bitset.hpp>
23 #include <Geometry/point.h>
24 #include "Chirality.h"
25 
26 // #define VERBOSE_CANON 1
27 
28 namespace RDKit {
29 
30 namespace {
shouldDetectDoubleBondStereo(const Bond * bond)31 bool shouldDetectDoubleBondStereo(const Bond *bond) {
32   const RingInfo *ri = bond->getOwningMol().getRingInfo();
33   return (!ri->numBondRings(bond->getIdx()) ||
34           ri->minBondRingSize(bond->getIdx()) > 7);
35 }
36 
37 // ----------------------------------- -----------------------------------
38 // This algorithm is identical to that used in the CombiCode Mol file
39 //  parser (also developed by RD).
40 //
41 //
42 // SUMMARY:
43 //   Derive a chiral code for an atom that has a wedged (or dashed) bond
44 //   drawn to it.
45 //
46 // RETURNS:
47 //   The chiral type
48 //
49 // CAVEATS:
50 //   This is careful to ensure that the central atom has 4 neighbors and
51 //   only single bonds to it, but that's about it.
52 //
53 // NOTE: this isn't careful at all about checking to make sure that
54 // things actually *should* be chiral. e.g. if the file has a
55 // 3-coordinate N with a wedged bond, it will make some erroneous
56 // assumptions about the chirality.
57 //
58 // ----------------------------------- -----------------------------------
59 
atomChiralTypeFromBondDir(const ROMol & mol,const Bond * bond,const Conformer * conf)60 Atom::ChiralType atomChiralTypeFromBondDir(const ROMol &mol, const Bond *bond,
61                                            const Conformer *conf) {
62   PRECONDITION(bond, "no bond");
63   PRECONDITION(conf, "no conformer");
64   Bond::BondDir bondDir = bond->getBondDir();
65   PRECONDITION(bondDir == Bond::BEGINWEDGE || bondDir == Bond::BEGINDASH,
66                "bad bond direction");
67 
68   // NOTE that according to the CT file spec, wedging assigns chirality
69   // to the atom at the point of the wedge, (atom 1 in the bond).
70   const Atom *atom = bond->getBeginAtom();
71   PRECONDITION(atom, "no atom");
72 
73   // we can't do anything with atoms that have more than 4 neighbors:
74   if (atom->getDegree() > 4) {
75     return Atom::CHI_UNSPECIFIED;
76   }
77   const Atom *bondAtom = bond->getEndAtom();
78 
79   Atom::ChiralType res = Atom::CHI_UNSPECIFIED;
80 
81   INT_LIST neighborBondIndices;
82   RDGeom::Point3D centerLoc, tmpPt;
83   centerLoc = conf->getAtomPos(atom->getIdx());
84   tmpPt = conf->getAtomPos(bondAtom->getIdx());
85   centerLoc.z = 0.0;
86   tmpPt.z = 0.0;
87 
88   RDGeom::Point3D refVect = centerLoc.directionVector(tmpPt);
89 
90   //----------------------------------------------------------
91   //
92   //  start by ensuring that all the bonds to neighboring atoms
93   //  are single bonds and collecting a list of neighbor indices:
94   //
95   //----------------------------------------------------------
96   bool hSeen = false;
97 
98   neighborBondIndices.push_back(bond->getIdx());
99   if (bondAtom->getAtomicNum() == 1 && bondAtom->getIsotope() == 0) {
100     hSeen = true;
101   }
102 
103   bool allSingle = true;
104   ROMol::OEDGE_ITER beg, end;
105   boost::tie(beg, end) = mol.getAtomBonds(atom);
106   while (beg != end) {
107     const Bond *nbrBond = mol[*beg];
108     if (nbrBond->getBondType() != Bond::SINGLE) {
109       allSingle = false;
110       // break;
111     }
112     if (nbrBond != bond) {
113       if ((nbrBond->getOtherAtom(atom)->getAtomicNum() == 1 &&
114            nbrBond->getOtherAtom(atom)->getIsotope() == 0)) {
115         hSeen = true;
116       }
117       neighborBondIndices.push_back(nbrBond->getIdx());
118     }
119     ++beg;
120   }
121   size_t nNbrs = neighborBondIndices.size();
122 
123   //----------------------------------------------------------
124   //
125   //  Return now if there aren't at least 3 non-H bonds to the atom.
126   //  (we can implicitly add a single H to 3 coordinate atoms, but
127   //  we're horked otherwise).
128   //
129   //----------------------------------------------------------
130   if (nNbrs < 3 || (hSeen && nNbrs < 4)) {
131     return Atom::CHI_UNSPECIFIED;
132   }
133 
134   //----------------------------------------------------------
135   //
136   //  Continue if there are all single bonds or if we're considering
137   //  4-coordinate P or S
138   //
139   //----------------------------------------------------------
140   if (allSingle || atom->getAtomicNum() == 15 || atom->getAtomicNum() == 16) {
141     //------------------------------------------------------------
142     //
143     //  Here we need to figure out the rotation direction between
144     //  the neighbor bonds and the wedged bond:
145     //
146     //------------------------------------------------------------
147     bool isCCW = true;
148     INT_LIST::const_iterator bondIter = neighborBondIndices.begin();
149     ++bondIter;
150     auto bond1 = mol.getBondWithIdx(*bondIter);
151     int oaid = bond1->getOtherAtom(atom)->getIdx();
152     tmpPt = conf->getAtomPos(oaid);
153     tmpPt.z = 0;
154     auto atomVect0 = centerLoc.directionVector(tmpPt);
155     auto angle01 = refVect.signedAngleTo(atomVect0);
156 
157     ++bondIter;
158     auto bond2 = mol.getBondWithIdx(*bondIter);
159     oaid = bond2->getOtherAtom(atom)->getIdx();
160     tmpPt = conf->getAtomPos(oaid);
161     tmpPt.z = 0;
162     auto atomVect1 = centerLoc.directionVector(tmpPt);
163     auto angle02 = refVect.signedAngleTo(atomVect1);
164 
165     // order everything so that looking from the top in a CCW direction we
166     // have 0, 1, 2
167     bool swappedIt = false;
168     if (angle01 > angle02) {
169       // std::cerr << " swap because " << angle01 << " " << angle02 << " "
170       //           << bond1->getIdx() << "->" << bond2->getIdx() << std::endl;
171       std::swap(angle01, angle02);
172       std::swap(bond1, bond2);
173       std::swap(atomVect0, atomVect1);
174       swappedIt = true;
175     }
176 
177     double firstAngle, secondAngle;
178     // We proceed differently for 3 and 4 coordinate atoms:
179     double angle12;
180     if (nNbrs == 4) {
181       bool flipIt = false;
182       // grab the angle to the last neighbor:
183       ++bondIter;
184       auto bond3 = mol.getBondWithIdx(*bondIter);
185       oaid = bond3->getOtherAtom(atom)->getIdx();
186       tmpPt = conf->getAtomPos(oaid);
187       tmpPt.z = 0;
188       auto atomVect2 = centerLoc.directionVector(tmpPt);
189       angle12 = refVect.signedAngleTo(atomVect2);
190 
191       // find the lowest and second-lowest angle and keep track of
192       // whether or not we have to do a non-cyclic permutation to
193       // get there:
194       if (angle01 < angle02) {
195         if (angle02 < angle12) {
196           // order is angle01 -> angle02 -> angle12
197           firstAngle = angle01;
198           secondAngle = angle02;
199         } else if (angle01 < angle12) {
200           // order is angle01 -> angle12 -> angle02
201           firstAngle = angle01;
202           secondAngle = angle12;
203           flipIt = true;
204         } else {
205           // order is angle12 -> angle01 -> angle02
206           firstAngle = angle12;
207           secondAngle = angle01;
208         }
209       } else if (angle01 < angle12) {
210         // order is angle02 -> angle01 -> angle12
211         firstAngle = angle02;
212         secondAngle = angle01;
213         flipIt = true;
214       } else {
215         if (angle02 < angle12) {
216           // order is angle02 -> angle12 -> angle01
217           firstAngle = angle02;
218           secondAngle = angle12;
219         } else {
220           // order is angle12 -> angle02 -> angle01
221           firstAngle = angle12;
222           secondAngle = angle02;
223           flipIt = true;
224         }
225       }
226       if (flipIt) {
227         isCCW = !isCCW;
228       }
229     } else {
230       // it's three coordinate.  Things are a bit different here
231       // because we have to at least kind of figure out where the
232       // hydrogen might be.
233 
234       // before getting started with that, use some of the inchi rules
235       // for contradictory stereochemistry
236       // (Table 10 in the InChi v1 technical manual)
237 
238       angle12 = atomVect0.signedAngleTo(atomVect1);
239       double angle20 = atomVect1.signedAngleTo(refVect);
240 
241       // to simplify the code below, pick out the directions of the bonds
242       // if and only if they start at our atom:
243       auto dir0 = bondDir;
244       auto dir1 = (bond1->getBeginAtomIdx() == bond->getBeginAtomIdx())
245                       ? bond1->getBondDir()
246                       : Bond::NONE;
247       auto dir2 = (bond2->getBeginAtomIdx() == bond->getBeginAtomIdx())
248                       ? bond2->getBondDir()
249                       : Bond::NONE;
250 
251       // we know bond 0 has the direction set
252 
253       //  this one is never allowed with different directions:
254       //     0   2
255       //      \ /
256       //       C
257       //       *
258       //       1
259       if (angle01 < (M_PI - 1e-3) && angle12 < (M_PI - 1e-3) &&
260           angle20 < (M_PI - 1e-3)) {
261         if ((dir1 != Bond::NONE && dir1 != dir0) ||
262             (dir2 != Bond::NONE && dir2 != dir0)) {
263           BOOST_LOG(rdWarningLog)
264               << "Warning: conflicting stereochemistry at atom "
265               << bond->getBeginAtomIdx() << " ignored."
266               << " by rule 1a." << std::endl;
267           return Atom::CHI_UNSPECIFIED;
268         }
269       } else {
270         // otherwise they cannot all be the same
271         if (dir1 == dir0 && dir1 == dir2) {
272           BOOST_LOG(rdWarningLog)
273               << "Warning: conflicting stereochemistry at atom "
274               << bond->getBeginAtomIdx() << " ignored."
275               << " by rule 1b." << std::endl;
276           return Atom::CHI_UNSPECIFIED;
277         }
278 
279         // the remaining cases where stereo is allowed are:
280         //      0        1 0 2
281         //      *         \*/
282         //  1 - C - 2      C      for these two bond1 and bond2 must match
283         //    and
284         //      1        2 1 0
285         //      *         \*/
286         //  2 - C - 0      C      for these two bond0 and bond2 must match
287         //    and
288         //      2        0 2 1
289         //      *         \*/
290         //  0 - C - 1      C      for these two bond0 and bond1 must match
291         //
292 
293         if (dir1 != Bond::NONE && dir1 != dir0) {
294           if ((angle01 >= M_PI) ||  // last two examples
295               (angle02 > M_PI && dir2 != Bond::NONE &&
296                dir1 != dir2) ||  // top two examples
297               (angle02 <= M_PI && dir2 != Bond::NONE &&
298                dir0 != dir2)) {  // middle two examples
299             BOOST_LOG(rdWarningLog)
300                 << "Warning: conflicting stereochemistry at atom "
301                 << bond->getBeginAtomIdx() << " ignored."
302                 << " by rule 2a." << std::endl;
303             return Atom::CHI_UNSPECIFIED;
304           }
305         } else if (dir0 == dir2) {
306           // all bonds are the same and we already removed the "all
307           // angles less than 180" case above
308           BOOST_LOG(rdWarningLog)
309               << "Warning: conflicting stereochemistry at atom "
310               << bond->getBeginAtomIdx() << " ignored."
311               << " by rule 2b." << std::endl;
312           return Atom::CHI_UNSPECIFIED;
313         }
314 
315         if (angle01 < angle02) {
316           firstAngle = angle01;
317           secondAngle = angle02;
318           isCCW = true;
319         } else {
320           firstAngle = angle02;
321           secondAngle = angle01;
322           isCCW = false;
323         }
324         if (secondAngle - firstAngle >= (M_PI - 1e-4)) {
325           // it's a situation like one of these:
326           //
327           //      0        1 0 2
328           //      *         \*/
329           //  1 - C - 2      C
330           //
331           // In each of these cases, the implicit H is between atoms 1
332           // and 2, so we need to flip the rotation direction (go
333           // around the back).
334           isCCW = !isCCW;
335         }
336       }
337     }
338     // reverse the rotation direction if the reference is wedged down:
339     if (bondDir == Bond::BEGINDASH) {
340       isCCW = !isCCW;
341     }
342     if (swappedIt) {
343       // we swapped the order of the bonds to simplify the analysis above:
344       isCCW = !isCCW;
345     }
346 
347     // ----------------
348     //
349     // We now have the rotation direction using mol-file order.
350     // We need to convert that into the appropriate label for the
351     // central atom
352     //
353     // ----------------
354     int nSwaps = atom->getPerturbationOrder(neighborBondIndices);
355     if (nSwaps % 2) {
356       isCCW = !isCCW;
357     }
358     if (isCCW) {
359       res = Atom::CHI_TETRAHEDRAL_CCW;
360     } else {
361       res = Atom::CHI_TETRAHEDRAL_CW;
362     }
363   }
364 
365   return res;
366 }  // namespace RDKit
367 
getOppositeBondDir(Bond::BondDir dir)368 Bond::BondDir getOppositeBondDir(Bond::BondDir dir) {
369   PRECONDITION(dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT,
370                "bad bond direction");
371   switch (dir) {
372     case Bond::ENDDOWNRIGHT:
373       return Bond::ENDUPRIGHT;
374     case Bond::ENDUPRIGHT:
375       return Bond::ENDDOWNRIGHT;
376     default:
377       return Bond::NONE;
378   }
379 }
380 
setBondDirRelativeToAtom(Bond * bond,Atom * atom,Bond::BondDir dir,bool reverse,boost::dynamic_bitset<> & needsDir)381 void setBondDirRelativeToAtom(Bond *bond, Atom *atom, Bond::BondDir dir,
382                               bool reverse, boost::dynamic_bitset<> &needsDir) {
383   PRECONDITION(bond, "bad bond");
384   PRECONDITION(atom, "bad atom");
385   PRECONDITION(dir == Bond::ENDUPRIGHT || dir == Bond::ENDDOWNRIGHT, "bad dir");
386   PRECONDITION(atom == bond->getBeginAtom() || atom == bond->getEndAtom(),
387                "atom doesn't belong to bond");
388   RDUNUSED_PARAM(needsDir);
389 
390   if (bond->getBeginAtom() != atom) {
391     reverse = !reverse;
392   }
393 
394   if (reverse) {
395     dir = (dir == Bond::ENDUPRIGHT ? Bond::ENDDOWNRIGHT : Bond::ENDUPRIGHT);
396   }
397   // to ensure maximum compatibility, even when a bond has unknown stereo (set
398   // explicitly and recorded in _UnknownStereo property), I will still let a
399   // direction to be computed. You must check the _UnknownStereo property to
400   // make sure whether this bond is explicitly set to have no direction info.
401   // This makes sense because the direction info are all derived from
402   // coordinates, the _UnknownStereo property is like extra metadata to be
403   // used with the direction info.
404   bond->setBondDir(dir);
405 }
406 
isLinearArrangement(const RDGeom::Point3D & v1,const RDGeom::Point3D & v2)407 bool isLinearArrangement(const RDGeom::Point3D &v1, const RDGeom::Point3D &v2) {
408   double lsq = v1.lengthSq() * v2.lengthSq();
409 
410   // treat zero length vectors as linear
411   if (lsq < 1.0e-6) return true;
412 
413   double dotProd = v1.dotProduct(v2);
414 
415   double cos178 =
416       -0.999388;  // == cos(M_PI-0.035), corresponds to a tolerance of 2 degrees
417   return dotProd < cos178 * sqrt(lsq);
418 }
419 
updateDoubleBondNeighbors(ROMol & mol,Bond * dblBond,const Conformer * conf,boost::dynamic_bitset<> & needsDir,std::vector<unsigned int> & singleBondCounts,const VECT_INT_VECT & singleBondNbrs)420 void updateDoubleBondNeighbors(ROMol &mol, Bond *dblBond, const Conformer *conf,
421                                boost::dynamic_bitset<> &needsDir,
422                                std::vector<unsigned int> &singleBondCounts,
423                                const VECT_INT_VECT &singleBondNbrs) {
424   // we want to deal only with double bonds:
425   PRECONDITION(dblBond, "bad bond");
426   PRECONDITION(dblBond->getBondType() == Bond::DOUBLE, "not a double bond");
427   if (!needsDir[dblBond->getIdx()]) {
428     return;
429   }
430   needsDir.set(dblBond->getIdx(), 0);
431 #if 0
432   std::cerr << "**********************\n";
433   std::cerr << "**********************\n";
434   std::cerr << "**********************\n";
435   std::cerr << "UDBN: " << dblBond->getIdx() << " "
436             << dblBond->getBeginAtomIdx() << "=" << dblBond->getEndAtomIdx()
437             << "\n";
438 #endif
439 
440   ROMol::OEDGE_ITER beg, end;
441   std::vector<Bond *> followupBonds;
442 
443   Bond *bond1 = nullptr, *obond1 = nullptr;
444   bool squiggleBondSeen = false;
445   bool doubleBondSeen = false;
446   boost::tie(beg, end) = mol.getAtomBonds(dblBond->getBeginAtom());
447   while (beg != end) {
448     Bond *tBond = mol[*beg];
449     if (tBond == dblBond) {
450       ++beg;
451       continue;
452     }
453     if (tBond->getBondType() == Bond::SINGLE ||
454         tBond->getBondType() == Bond::AROMATIC) {
455       // prefer bonds that already have their directionality set
456       // or that are adjacent to more double bonds:
457       if (!bond1) {
458         bond1 = tBond;
459       } else if (needsDir[tBond->getIdx()]) {
460         if (singleBondCounts[tBond->getIdx()] >
461             singleBondCounts[bond1->getIdx()]) {
462           obond1 = bond1;
463           bond1 = tBond;
464         } else {
465           obond1 = tBond;
466         }
467       } else {
468         obond1 = bond1;
469         bond1 = tBond;
470       }
471     } else if (tBond->getBondType() == Bond::DOUBLE) {
472       doubleBondSeen = true;
473     }
474     int explicit_unknown_stereo;
475     if ((tBond->getBondType() == Bond::SINGLE ||
476          tBond->getBondType() == Bond::AROMATIC) &&
477         (tBond->getBondDir() == Bond::UNKNOWN ||
478          ((tBond->getPropIfPresent<int>(common_properties::_UnknownStereo,
479                                         explicit_unknown_stereo) &&
480            explicit_unknown_stereo)))) {
481       squiggleBondSeen = true;
482       break;
483     }
484 
485     ++beg;
486   }
487   // Don't do any direction setting if we've seen a squiggle bond, but do mark
488   // the double bond as a crossed bond and return
489   if (!bond1 || squiggleBondSeen || doubleBondSeen) {
490     if (!doubleBondSeen) {
491       // FIX: This is the fix for #2649, but it will need to be modified once we
492       // decide to properly handle allenese
493       dblBond->setBondDir(Bond::EITHERDOUBLE);
494     }
495     return;
496   }
497 
498   Bond *bond2 = nullptr, *obond2 = nullptr;
499   boost::tie(beg, end) = mol.getAtomBonds(dblBond->getEndAtom());
500   while (beg != end) {
501     Bond *tBond = mol[*beg];
502     if (tBond == dblBond) {
503       ++beg;
504       continue;
505     }
506     if (tBond->getBondType() == Bond::SINGLE ||
507         tBond->getBondType() == Bond::AROMATIC) {
508       if (!bond2) {
509         bond2 = tBond;
510       } else if (needsDir[tBond->getIdx()]) {
511         if (singleBondCounts[tBond->getIdx()] >
512             singleBondCounts[bond2->getIdx()]) {
513           obond2 = bond2;
514           bond2 = tBond;
515         } else {
516           obond2 = tBond;
517         }
518       } else {
519         // we already had a bond2 and we don't need to set the direction
520         // on the new one, so swap.
521         obond2 = bond2;
522         bond2 = tBond;
523       }
524     } else if (tBond->getBondType() == Bond::DOUBLE) {
525       doubleBondSeen = true;
526     }
527     int explicit_unknown_stereo;
528     if (tBond->getBondType() == Bond::SINGLE &&
529         (tBond->getBondDir() == Bond::UNKNOWN ||
530          ((tBond->getPropIfPresent<int>(common_properties::_UnknownStereo,
531                                         explicit_unknown_stereo) &&
532            explicit_unknown_stereo)))) {
533       squiggleBondSeen = true;
534       break;
535     }
536     ++beg;
537   }
538   // Don't do any direction setting if we've seen a squiggle bond, but do mark
539   // the double bond as a crossed bond and return
540   if (!bond2 || squiggleBondSeen || doubleBondSeen) {
541     if (!doubleBondSeen) {
542       // FIX: This is the fix for #2649, but it will need to be modified once we
543       // decide to properly handle allenese
544       dblBond->setBondDir(Bond::EITHERDOUBLE);
545     }
546     return;
547   }
548 
549   CHECK_INVARIANT(bond1 && bond2, "no bonds found");
550 
551   bool sameTorsionDir = false;
552   if (conf) {
553     RDGeom::Point3D beginP = conf->getAtomPos(dblBond->getBeginAtomIdx());
554     RDGeom::Point3D endP = conf->getAtomPos(dblBond->getEndAtomIdx());
555     RDGeom::Point3D bond1P =
556         conf->getAtomPos(bond1->getOtherAtomIdx(dblBond->getBeginAtomIdx()));
557     RDGeom::Point3D bond2P =
558         conf->getAtomPos(bond2->getOtherAtomIdx(dblBond->getEndAtomIdx()));
559     // check for a linear arrangement of atoms on either end:
560     bool linear = false;
561     RDGeom::Point3D p1;
562     RDGeom::Point3D p2;
563     p1 = bond1P - beginP;
564     p2 = endP - beginP;
565     if (isLinearArrangement(p1, p2)) {
566       if (!obond1) {
567         linear = true;
568       } else {
569         // one of the bonds was linear; what about the other one?
570         Bond *tBond = bond1;
571         bond1 = obond1;
572         obond1 = tBond;
573         bond1P = conf->getAtomPos(
574             bond1->getOtherAtomIdx(dblBond->getBeginAtomIdx()));
575         p1 = bond1P - beginP;
576         if (isLinearArrangement(p1, p2)) {
577           linear = true;
578         }
579       }
580     }
581     if (!linear) {
582       p1 = bond2P - endP;
583       p2 = beginP - endP;
584       if (isLinearArrangement(p1, p2)) {
585         if (!obond2) {
586           linear = true;
587         } else {
588           Bond *tBond = bond2;
589           bond2 = obond2;
590           obond2 = tBond;
591           bond2P = conf->getAtomPos(
592               bond2->getOtherAtomIdx(dblBond->getEndAtomIdx()));
593           p1 = bond2P - beginP;
594           if (isLinearArrangement(p1, p2)) {
595             linear = true;
596           }
597         }
598       }
599     }
600     if (linear) {
601       dblBond->setBondDir(Bond::EITHERDOUBLE);
602       return;
603     }
604 
605     double ang = RDGeom::computeDihedralAngle(bond1P, beginP, endP, bond2P);
606     if (ang < M_PI / 2) {
607       sameTorsionDir = false;
608     } else {
609       sameTorsionDir = true;
610     }
611     // std::cerr << "   angle: " << ang << " sameTorsionDir: " << sameTorsionDir
612     // << "\n";
613   } else {
614     if (dblBond->getStereo() == Bond::STEREOCIS ||
615         dblBond->getStereo() == Bond::STEREOZ) {
616       sameTorsionDir = false;
617     } else if (dblBond->getStereo() == Bond::STEREOTRANS ||
618                dblBond->getStereo() == Bond::STEREOE) {
619       sameTorsionDir = true;
620     } else {
621       return;
622     }
623     // if bond1 or bond2 are not to the stereo-controlling atoms, flip
624     // our expections of the torsion dir
625     int bond1AtomIdx = bond1->getOtherAtomIdx(dblBond->getBeginAtomIdx());
626     if (bond1AtomIdx != dblBond->getStereoAtoms()[0] &&
627         bond1AtomIdx != dblBond->getStereoAtoms()[1]) {
628       sameTorsionDir = !sameTorsionDir;
629     }
630     int bond2AtomIdx = bond2->getOtherAtomIdx(dblBond->getEndAtomIdx());
631     if (bond2AtomIdx != dblBond->getStereoAtoms()[0] &&
632         bond2AtomIdx != dblBond->getStereoAtoms()[1]) {
633       sameTorsionDir = !sameTorsionDir;
634     }
635   }
636 
637   /*
638      Time for some clarificatory text, because this gets really
639      confusing really fast.
640 
641      The dihedral angle analysis above is based on viewing things
642      with an atom order as follows:
643 
644      1
645       \
646        2 = 3
647             \
648              4
649 
650      so dihedrals > 90 correspond to sameDir=true
651 
652      however, the stereochemistry representation is
653      based on something more like this:
654 
655      2
656       \
657        1 = 3
658             \
659              4
660      (i.e. we consider the direction-setting single bonds to be
661       starting at the double-bonded atom)
662 
663   */
664   bool reverseBondDir = sameTorsionDir;
665 
666   Atom *atom1 = dblBond->getBeginAtom(), *atom2 = dblBond->getEndAtom();
667   if (needsDir[bond1->getIdx()]) {
668     for (auto bidx : singleBondNbrs[bond1->getIdx()]) {
669       // std::cerr << "       neighbor from: " << bond1->getIdx() << " " << bidx
670       //           << ": " << needsDir[bidx] << std::endl;
671       if (needsDir[bidx]) {
672         followupBonds.push_back(mol.getBondWithIdx(bidx));
673       }
674     }
675   }
676   if (needsDir[bond2->getIdx()]) {
677     for (auto bidx : singleBondNbrs[bond2->getIdx()]) {
678       // std::cerr << "       neighbor from: " << bond2->getIdx() << " " << bidx
679       //           << ": " << needsDir[bidx] << std::endl;
680       if (needsDir[bidx]) {
681         followupBonds.push_back(mol.getBondWithIdx(bidx));
682       }
683     }
684   }
685   if (!needsDir[bond1->getIdx()]) {
686     if (!needsDir[bond2->getIdx()]) {
687       // check that we agree
688     } else {
689       if (bond1->getBeginAtom() != atom1) {
690         reverseBondDir = !reverseBondDir;
691       }
692       setBondDirRelativeToAtom(bond2, atom2, bond1->getBondDir(),
693                                reverseBondDir, needsDir);
694     }
695   } else if (!needsDir[bond2->getIdx()]) {
696     if (bond2->getBeginAtom() != atom2) {
697       reverseBondDir = !reverseBondDir;
698     }
699     setBondDirRelativeToAtom(bond1, atom1, bond2->getBondDir(), reverseBondDir,
700                              needsDir);
701   } else {
702     setBondDirRelativeToAtom(bond1, atom1, Bond::ENDDOWNRIGHT, false, needsDir);
703     setBondDirRelativeToAtom(bond2, atom2, Bond::ENDDOWNRIGHT, reverseBondDir,
704                              needsDir);
705   }
706   needsDir[bond1->getIdx()] = 0;
707   needsDir[bond2->getIdx()] = 0;
708   if (obond1 && needsDir[obond1->getIdx()]) {
709     setBondDirRelativeToAtom(obond1, atom1, bond1->getBondDir(),
710                              bond1->getBeginAtom() == atom1, needsDir);
711     needsDir[obond1->getIdx()] = 0;
712   }
713   if (obond2 && needsDir[obond2->getIdx()]) {
714     setBondDirRelativeToAtom(obond2, atom2, bond2->getBondDir(),
715                              bond2->getBeginAtom() == atom2, needsDir);
716     needsDir[obond2->getIdx()] = 0;
717   }
718 #if 0
719   std::cerr << "  1:" << bond1->getIdx() << " ";
720   if (obond1)
721     std::cerr << obond1->getIdx() << std::endl;
722   else
723     std::cerr << "N/A" << std::endl;
724   std::cerr << "  2:" << bond2->getIdx() << " ";
725   if (obond2)
726     std::cerr << obond2->getIdx() << std::endl;
727   else
728     std::cerr << "N/A" << std::endl;
729   std::cerr << "**********************\n";
730   std::cerr << "**********************\n";
731   std::cerr << "**********************\n";
732 #endif
733   for (Bond *oDblBond : followupBonds) {
734     // std::cerr << "FOLLOWUP: " << oDblBond->getIdx() << " "
735     //           << needsDir[oDblBond->getIdx()] << std::endl;
736     updateDoubleBondNeighbors(mol, oDblBond, conf, needsDir, singleBondCounts,
737                               singleBondNbrs);
738   }
739 }
740 
isBondCandidateForStereo(const Bond * bond)741 bool isBondCandidateForStereo(const Bond *bond) {
742   PRECONDITION(bond, "no bond");
743   if (bond->getBondType() == Bond::DOUBLE &&
744       bond->getStereo() != Bond::STEREOANY &&
745       bond->getBondDir() != Bond::EITHERDOUBLE &&
746       bond->getBeginAtom()->getDegree() > 1 &&
747       bond->getEndAtom()->getDegree() > 1 &&
748       shouldDetectDoubleBondStereo(bond)) {
749     return true;
750   }
751   return false;
752 }
753 
findHighestCIPNeighbor(const Atom * atom,const Atom * skipAtom)754 const Atom *findHighestCIPNeighbor(const Atom *atom, const Atom *skipAtom) {
755   PRECONDITION(atom, "bad atom");
756 
757   unsigned bestCipRank = 0;
758   const Atom *bestCipRankedAtom = nullptr;
759   const auto &mol = atom->getOwningMol();
760 
761   for (const auto &index :
762        boost::make_iterator_range(mol.getAtomNeighbors(atom))) {
763     const auto neighbor = mol[index];
764     if (neighbor == skipAtom) {
765       continue;
766     }
767     unsigned cip = 0;
768     if (!neighbor->getPropIfPresent(common_properties::_CIPRank, cip)) {
769       // If at least one of the atoms doesn't have a CIP rank, the highest rank
770       // does not make sense, so return a nullptr.
771       return nullptr;
772     } else if (cip > bestCipRank || bestCipRankedAtom == nullptr) {
773       bestCipRank = cip;
774       bestCipRankedAtom = neighbor;
775     } else if (cip == bestCipRank) {
776       // This also doesn't make sense if there is a tie (if that's possible).
777       // We still keep the best CIP rank in case something better comes around
778       // (also not sure if that's possible).
779       BOOST_LOG(rdWarningLog)
780           << "Warning: duplicate CIP ranks found in findHighestCIPNeighbor()"
781           << std::endl;
782       bestCipRankedAtom = nullptr;
783     }
784   }
785   return bestCipRankedAtom;
786 }
787 
788 }  // namespace
789 
790 namespace Chirality {
791 typedef std::pair<int, int> INT_PAIR;
792 typedef std::vector<INT_PAIR> INT_PAIR_VECT;
793 typedef std::vector<INT_PAIR>::iterator INT_PAIR_VECT_I;
794 typedef std::vector<INT_PAIR>::const_iterator INT_PAIR_VECT_CI;
795 
796 typedef INT_VECT CIP_ENTRY;
797 typedef std::vector<CIP_ENTRY> CIP_ENTRY_VECT;
798 
799 template <typename T>
debugVect(const std::vector<T> arg)800 void debugVect(const std::vector<T> arg) {
801   typename std::vector<T>::const_iterator viIt;
802   std::stringstream outS;
803   for (viIt = arg.begin(); viIt != arg.end(); viIt++) {
804     outS << *viIt << " ";
805   }
806   BOOST_LOG(rdDebugLog) << outS.str() << std::endl;
807 }
808 
809 // --------------------------------------------------
810 //
811 // Calculates chiral invariants for the atoms of a molecule
812 //  These are based on Labute's proposal in:
813 //  "An Efficient Algorithm for the Determination of Topological
814 //   RS Chirality" Journal of the CCG (1996)
815 //
816 // --------------------------------------------------
buildCIPInvariants(const ROMol & mol,DOUBLE_VECT & res)817 void buildCIPInvariants(const ROMol &mol, DOUBLE_VECT &res) {
818   PRECONDITION(res.size() >= mol.getNumAtoms(), "res vect too small");
819   int atsSoFar = 0;
820   //
821   // NOTE:
822   // If you make modifications to this, keep in mind that it is
823   // essential that the initial comparison of ranks behave properly.
824   // So, though it seems like it would makes sense to include
825   // information about the number of Hs (or charge, etc) in the CIP
826   // invariants, this will result in bad rankings.  For example, in
827   // this molecule: OC[C@H](C)O, including the number of Hs would
828   // cause the methyl group (atom 3) to be ranked higher than the CH2
829   // connected to O (atom 1).  This is totally wrong.
830   //
831   // We also don't include any pre-existing stereochemistry information.
832   // Though R and S assignments do factor in to the priorities of atoms,
833   // we're starting here from scratch and we'll let the R and S stuff
834   // be taken into account during the iterations.
835   //
836   for (ROMol::ConstAtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
837        ++atIt) {
838     const unsigned short nMassBits = 10;
839     const unsigned short maxMass = 1 << nMassBits;
840     Atom const *atom = *atIt;
841     unsigned long invariant = 0;
842     int num = atom->getAtomicNum() % 128;
843     // get an int with the deviation in the mass from the default:
844     int mass = 0;
845     if (atom->getIsotope()) {
846       mass =
847           atom->getIsotope() -
848           PeriodicTable::getTable()->getMostCommonIsotope(atom->getAtomicNum());
849       if (mass >= 0) {
850         mass += 1;
851       }
852     }
853     mass += maxMass / 2;
854     if (mass < 0) {
855       mass = 0;
856     } else {
857       mass = mass % maxMass;
858     }
859 
860 #if 0
861         // NOTE: the inclusion of hybridization in the invariant (as
862         // suggested in the original paper), leads to the situation
863         // that
864         //   C[C@@](O)(C=C)C(C)CC
865         // and
866         //   C[C@@](O)(C=C)C(C)CO
867         // are assigned S chirality even though the rest of the world
868         // seems to agree that they ought to be R (atom 3, sp2, is ranked
869         // higher than atom 5, sp3, no matter what their environments)
870         int hyb=0;
871         switch(atom->getHybridization()) {
872         case Atom::SP: hyb=6;break;
873         case Atom::SP2: hyb=5;break;
874         case Atom::SP3: hyb=1;break;
875         case Atom::SP3D: hyb=3;break;
876         case Atom::SP3D2: hyb=2;break;
877         default: break;
878         }
879 #endif
880 
881     invariant = num;  // 7 bits here
882     invariant = (invariant << nMassBits) | mass;
883 
884     int mapnum = -1;
885     atom->getPropIfPresent(common_properties::molAtomMapNumber, mapnum);
886     mapnum = (mapnum + 1) % 1024;  // increment to allow map numbers of zero
887                                    // (though that would be stupid)
888     invariant = (invariant << 10) | mapnum;
889 
890     res[atsSoFar++] = invariant;
891   }
892 }
893 
iterateCIPRanks(const ROMol & mol,const DOUBLE_VECT & invars,UINT_VECT & ranks,bool seedWithInvars)894 void iterateCIPRanks(const ROMol &mol, const DOUBLE_VECT &invars,
895                      UINT_VECT &ranks, bool seedWithInvars) {
896   PRECONDITION(invars.size() == mol.getNumAtoms(), "bad invars size");
897   PRECONDITION(ranks.size() >= mol.getNumAtoms(), "bad ranks size");
898 
899   unsigned int numAtoms = mol.getNumAtoms();
900   CIP_ENTRY_VECT cipEntries(numAtoms);
901   for (auto &vec : cipEntries) {
902     vec.reserve(16);
903   }
904 #ifdef VERBOSE_CANON
905   BOOST_LOG(rdDebugLog) << "invariants:" << std::endl;
906   for (unsigned int i = 0; i < numAtoms; i++) {
907     BOOST_LOG(rdDebugLog) << i << ": " << invars[i] << std::endl;
908   }
909 #endif
910 
911   // rank those:
912   Rankers::rankVect(invars, ranks);
913 #ifdef VERBOSE_CANON
914   BOOST_LOG(rdDebugLog) << "initial ranks:" << std::endl;
915   for (unsigned int i = 0; i < numAtoms; ++i) {
916     BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << std::endl;
917   }
918 #endif
919   // Start each atom's rank vector with its atomic number:
920   //  Note: in general one should avoid the temptation to
921   //  use invariants here, those lead to incorrect answers
922   for (unsigned int i = 0; i < numAtoms; i++) {
923     if (seedWithInvars) {
924       cipEntries[i].push_back(static_cast<int>(invars[i]));
925     } else {
926       cipEntries[i].push_back(mol[i]->getAtomicNum());
927       cipEntries[i].push_back(static_cast<int>(ranks[i]));
928     }
929   }
930 
931   // Loop until either:
932   //   1) all classes are uniquified
933   //   2) the number of ranks doesn't change from one iteration to
934   //      the next
935   //   3) we've gone through maxIts times
936   //      maxIts is calculated by dividing the number of atoms
937   //      by 2. That's a pessimal version of the
938   //      maximum number of steps required for two atoms to
939   //      "feel" each other (each influences one additional
940   //      neighbor shell per iteration).
941   unsigned int maxIts = numAtoms / 2 + 1;
942   unsigned int numIts = 0;
943   int lastNumRanks = -1;
944   unsigned int numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1;
945   std::vector<unsigned int> counts(ranks.size());
946   std::vector<unsigned int> updatedNbrIdxs;
947   updatedNbrIdxs.reserve(8);
948   while (numRanks < numAtoms && numIts < maxIts &&
949          (lastNumRanks < 0 ||
950           static_cast<unsigned int>(lastNumRanks) < numRanks)) {
951     unsigned int longestEntry = 0;
952     // ----------------------------------------------------
953     //
954     // for each atom, get a sorted list of its neighbors' ranks:
955     //
956     for (unsigned int index = 0; index < numAtoms; ++index) {
957       // Note: counts is cleaned up when we drain into cipEntries.
958       updatedNbrIdxs.clear();
959 
960       // start by pushing on our neighbors' ranks:
961       ROMol::OEDGE_ITER beg, end;
962       boost::tie(beg, end) = mol.getAtomBonds(mol[index]);
963       while (beg != end) {
964         const Bond *bond = mol[*beg];
965         ++beg;
966         unsigned int nbrIdx = bond->getOtherAtomIdx(index);
967         updatedNbrIdxs.push_back(nbrIdx);
968 
969         // put the neighbor in 2N times where N is the bond order as a double.
970         // this is to treat aromatic linkages on fair footing. i.e. at least in
971         // the first iteration --c(:c):c and --C(=C)-C should look the same.
972         // this was part of issue 3009911
973 
974         // a special case for chiral phosphorus compounds
975         // (this was leading to incorrect assignment of R/S labels ):
976         bool isChiralPhosphorusSpecialCase = false;
977         if (bond->getBondType() == Bond::DOUBLE) {
978           const Atom *nbr = mol[nbrIdx];
979           if (nbr->getAtomicNum() == 15) {
980             unsigned int nbrDeg = nbr->getDegree();
981             isChiralPhosphorusSpecialCase = nbrDeg == 3 || nbrDeg == 4;
982           }
983         };
984 
985         // general justification of this is:
986         // Paragraph 2.2. in the 1966 article is "Valence-Bond Conventions:
987         // Multiple-Bond Unsaturation and Aromaticity". It contains several
988         // conventions of which convention (b) is the one applying here:
989         // "(b) Contributions by d orbitals to bonds of quadriligant atoms are
990         // neglected."
991         // FIX: this applies to more than just P
992         if (isChiralPhosphorusSpecialCase) {
993           counts[nbrIdx] += 1;
994         } else {
995           counts[nbrIdx] += getTwiceBondType(*bond);
996         }
997       }
998 
999       // For each of our neighbors' ranks weighted by bond type, copy it N times
1000       // to our cipEntry in reverse rank order, where N is the weight.
1001       if (updatedNbrIdxs.size() > 1) {  // compare vs 1 for performance.
1002         std::sort(std::begin(updatedNbrIdxs), std::end(updatedNbrIdxs),
1003                   [&ranks](unsigned int idx1, unsigned int idx2) {
1004                     return ranks[idx1] > ranks[idx2];
1005                   });
1006       }
1007       auto &cipEntry = cipEntries[index];
1008       for (auto nbrIdx : updatedNbrIdxs) {
1009         unsigned int count = counts[nbrIdx];
1010         cipEntry.insert(cipEntry.end(), count, ranks[nbrIdx] + 1);
1011         counts[nbrIdx] = 0;
1012       }
1013       // add a zero for each coordinated H as long as we're not a query atom
1014       if (!mol[index]->hasQuery()) {
1015         cipEntry.insert(cipEntry.end(), mol[index]->getTotalNumHs(), 0);
1016       }
1017 
1018       if (cipEntry.size() > longestEntry) {
1019         longestEntry = rdcast<unsigned int>(cipEntry.size());
1020       }
1021     }
1022     // ----------------------------------------------------
1023     //
1024     // pad the entries so that we compare rounds to themselves:
1025     //
1026     for (unsigned int index = 0; index < numAtoms; ++index) {
1027       auto sz = rdcast<unsigned int>(cipEntries[index].size());
1028       if (sz < longestEntry) {
1029         cipEntries[index].insert(cipEntries[index].end(), longestEntry - sz,
1030                                  -1);
1031       }
1032     }
1033     // ----------------------------------------------------
1034     //
1035     // sort the new ranks and update the list of active indices:
1036     //
1037     lastNumRanks = numRanks;
1038 
1039     Rankers::rankVect(cipEntries, ranks);
1040     numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1;
1041 
1042     // now truncate each vector and stick the rank at the end
1043     for (unsigned int i = 0; i < numAtoms; ++i) {
1044       cipEntries[i][numIts + 1] = ranks[i];
1045       cipEntries[i].erase(cipEntries[i].begin() + numIts + 2,
1046                           cipEntries[i].end());
1047     }
1048 
1049     ++numIts;
1050 #ifdef VERBOSE_CANON
1051     BOOST_LOG(rdDebugLog) << "strings and ranks:" << std::endl;
1052     for (unsigned int i = 0; i < numAtoms; i++) {
1053       BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << " > ";
1054       debugVect(cipEntries[i]);
1055     }
1056 #endif
1057   }
1058 }
1059 // Figure out the CIP ranks for the atoms of a molecule
assignAtomCIPRanks(const ROMol & mol,UINT_VECT & ranks)1060 void assignAtomCIPRanks(const ROMol &mol, UINT_VECT &ranks) {
1061   PRECONDITION((!ranks.size() || ranks.size() >= mol.getNumAtoms()),
1062                "bad ranks size");
1063   if (!ranks.size()) {
1064     ranks.resize(mol.getNumAtoms());
1065   }
1066   unsigned int numAtoms = mol.getNumAtoms();
1067 #ifndef USE_NEW_STEREOCHEMISTRY
1068   // get the initial invariants:
1069   DOUBLE_VECT invars(numAtoms, 0);
1070   buildCIPInvariants(mol, invars);
1071   iterateCIPRanks(mol, invars, ranks, false);
1072 #else
1073   Canon::chiralRankMolAtoms(mol, ranks);
1074 #endif
1075 
1076   // copy the ranks onto the atoms:
1077   for (unsigned int i = 0; i < numAtoms; ++i) {
1078     mol[i]->setProp(common_properties::_CIPRank, ranks[i], 1);
1079   }
1080 }
1081 
1082 // construct a vector with <atomIdx,direction> pairs for
1083 // neighbors of a given atom.  This list will only be
1084 // non-empty if at least one of the bonds has its direction
1085 // set.
findAtomNeighborDirHelper(const ROMol & mol,const Atom * atom,const Bond * refBond,UINT_VECT & ranks,INT_PAIR_VECT & neighbors,bool & hasExplicitUnknownStereo)1086 void findAtomNeighborDirHelper(const ROMol &mol, const Atom *atom,
1087                                const Bond *refBond, UINT_VECT &ranks,
1088                                INT_PAIR_VECT &neighbors,
1089                                bool &hasExplicitUnknownStereo) {
1090   PRECONDITION(atom, "bad atom");
1091   PRECONDITION(refBond, "bad bond");
1092 
1093   bool seenDir = false;
1094   ROMol::OEDGE_ITER beg, end;
1095   boost::tie(beg, end) = mol.getAtomBonds(atom);
1096   while (beg != end) {
1097     const Bond *bond = mol[*beg];
1098     // check whether this bond is explicitly set to have unknown stereo
1099     if (!hasExplicitUnknownStereo) {
1100       int explicit_unknown_stereo;
1101       if (bond->getBondDir() == Bond::UNKNOWN  // there's a squiggle bond
1102           || (bond->getPropIfPresent<int>(common_properties::_UnknownStereo,
1103                                           explicit_unknown_stereo) &&
1104               explicit_unknown_stereo)) {
1105         hasExplicitUnknownStereo = true;
1106       }
1107     }
1108 
1109     Bond::BondDir dir = bond->getBondDir();
1110     if (bond->getIdx() != refBond->getIdx()) {
1111       if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) {
1112         seenDir = true;
1113         // If we're considering the bond "backwards", (i.e. from end
1114         // to beginning, reverse the effective direction:
1115         if (atom != bond->getBeginAtom()) {
1116           if (dir == Bond::ENDDOWNRIGHT) {
1117             dir = Bond::ENDUPRIGHT;
1118           } else {
1119             dir = Bond::ENDDOWNRIGHT;
1120           }
1121         }
1122       }
1123       Atom *nbrAtom = bond->getOtherAtom(atom);
1124       neighbors.push_back(std::make_pair(nbrAtom->getIdx(), dir));
1125     }
1126     ++beg;
1127   }
1128   if (!seenDir) {
1129     neighbors.clear();
1130   } else {
1131     if (neighbors.size() == 2 &&
1132         ranks[neighbors[0].first] == ranks[neighbors[1].first]) {
1133       // the two substituents are identical, no stereochemistry here:
1134       neighbors.clear();
1135     } else {
1136       // it's possible that direction was set only one of the bonds, set the
1137       // other
1138       // bond's direction to be reversed:
1139       if (neighbors[0].second != Bond::ENDDOWNRIGHT &&
1140           neighbors[0].second != Bond::ENDUPRIGHT) {
1141         CHECK_INVARIANT(neighbors.size() > 1, "too few neighbors");
1142         neighbors[0].second = neighbors[1].second == Bond::ENDDOWNRIGHT
1143                                   ? Bond::ENDUPRIGHT
1144                                   : Bond::ENDDOWNRIGHT;
1145       } else if (neighbors.size() > 1 &&
1146                  neighbors[1].second != Bond::ENDDOWNRIGHT &&
1147                  neighbors[1].second != Bond::ENDUPRIGHT) {
1148         neighbors[1].second = neighbors[0].second == Bond::ENDDOWNRIGHT
1149                                   ? Bond::ENDUPRIGHT
1150                                   : Bond::ENDDOWNRIGHT;
1151       }
1152     }
1153   }
1154 }
1155 
1156 // find the neighbors for an atoms that are not connected by single bond that is
1157 // not refBond
1158 // if checkDir is true only neighbor atoms with bonds marked with a direction
1159 // will be returned
findAtomNeighborsHelper(const ROMol & mol,const Atom * atom,const Bond * refBond,UINT_VECT & neighbors,bool checkDir=false,bool includeAromatic=false)1160 void findAtomNeighborsHelper(const ROMol &mol, const Atom *atom,
1161                              const Bond *refBond, UINT_VECT &neighbors,
1162                              bool checkDir = false,
1163                              bool includeAromatic = false) {
1164   PRECONDITION(atom, "bad atom");
1165   PRECONDITION(refBond, "bad bond");
1166   neighbors.clear();
1167   ROMol::OEDGE_ITER beg, end;
1168   boost::tie(beg, end) = mol.getAtomBonds(atom);
1169   while (beg != end) {
1170     const Bond *bond = mol[*beg];
1171     Bond::BondDir dir = bond->getBondDir();
1172     if ((bond->getBondType() == Bond::SINGLE ||
1173          (includeAromatic && bond->getBondType() == Bond::AROMATIC)) &&
1174         bond->getIdx() != refBond->getIdx()) {
1175       if (checkDir) {
1176         if ((dir != Bond::ENDDOWNRIGHT) && (dir != Bond::ENDUPRIGHT)) {
1177           ++beg;
1178           continue;
1179         }
1180       }
1181       Atom *nbrAtom = bond->getOtherAtom(atom);
1182       neighbors.push_back(nbrAtom->getIdx());
1183     }
1184     ++beg;
1185   }
1186 }
1187 
1188 // conditions for an atom to be a candidate for ring stereochem:
1189 //   1) two non-ring neighbors that have different ranks
1190 //   2) one non-ring neighbor and two ring neighbors (the ring neighbors will
1191 //      have the same rank)
1192 //   3) four ring neighbors with three different ranks
1193 //   4) three ring neighbors with two different ranks
1194 //     example for this last one: C[C@H]1CC2CCCC3CCCC(C1)[C@@H]23
1195 // Note that N atoms are only candidates if they are in a 3-ring
atomIsCandidateForRingStereochem(const ROMol & mol,const Atom * atom)1196 bool atomIsCandidateForRingStereochem(const ROMol &mol, const Atom *atom) {
1197   PRECONDITION(atom, "bad atom");
1198   bool res = false;
1199   std::set<unsigned int> nbrRanks;
1200   if (!atom->getPropIfPresent(common_properties::_ringStereochemCand, res)) {
1201     const RingInfo *ringInfo = mol.getRingInfo();
1202     if (ringInfo->isInitialized() && ringInfo->numAtomRings(atom->getIdx())) {
1203       // three-coordinate N additional requirements:
1204       //   in a ring of size 3  (from InChI)
1205       // OR
1206       //   a bridgehead (RDKit extension)
1207       if (atom->getAtomicNum() == 7 && atom->getDegree() == 3 &&
1208           !ringInfo->isAtomInRingOfSize(atom->getIdx(), 3) &&
1209           !queryIsAtomBridgehead(atom)) {
1210         return false;
1211       }
1212       ROMol::OEDGE_ITER beg, end;
1213       boost::tie(beg, end) = mol.getAtomBonds(atom);
1214       std::vector<const Atom *> nonRingNbrs;
1215       std::vector<const Atom *> ringNbrs;
1216       while (beg != end) {
1217         const Bond *bond = mol[*beg];
1218         if (!ringInfo->numBondRings(bond->getIdx())) {
1219           nonRingNbrs.push_back(bond->getOtherAtom(atom));
1220         } else {
1221           const Atom *nbr = bond->getOtherAtom(atom);
1222           ringNbrs.push_back(nbr);
1223           unsigned int rnk = 0;
1224           nbr->getPropIfPresent(common_properties::_CIPRank, rnk);
1225           nbrRanks.insert(rnk);
1226         }
1227         ++beg;
1228       }
1229       unsigned int rank1 = 0, rank2 = 0;
1230       switch (nonRingNbrs.size()) {
1231         case 2:
1232           if (nonRingNbrs[0]->getPropIfPresent(common_properties::_CIPRank,
1233                                                rank1) &&
1234               nonRingNbrs[1]->getPropIfPresent(common_properties::_CIPRank,
1235                                                rank2)) {
1236             if (rank1 == rank2) {
1237               res = false;
1238             } else {
1239               res = true;
1240             }
1241           }
1242           break;
1243         case 1:
1244           if (ringNbrs.size() >= 2) {
1245             res = true;
1246           }
1247           break;
1248         case 0:
1249           if (ringNbrs.size() == 4 && nbrRanks.size() == 3) {
1250             res = true;
1251           } else if (ringNbrs.size() == 3 && nbrRanks.size() == 2) {
1252             res = true;
1253           } else {
1254             res = false;
1255           }
1256           break;
1257         default:
1258           res = false;
1259       }
1260     }
1261     atom->setProp(common_properties::_ringStereochemCand, res, 1);
1262   }
1263   return res;
1264 }
1265 
1266 // finds all possible chiral special cases.
1267 // at the moment this is just candidates for ring stereochemistry
findChiralAtomSpecialCases(ROMol & mol,boost::dynamic_bitset<> & possibleSpecialCases)1268 void findChiralAtomSpecialCases(ROMol &mol,
1269                                 boost::dynamic_bitset<> &possibleSpecialCases) {
1270   PRECONDITION(possibleSpecialCases.size() >= mol.getNumAtoms(),
1271                "bit vector too small");
1272   possibleSpecialCases.reset();
1273   if (!mol.getRingInfo()->isInitialized()) {
1274     VECT_INT_VECT sssrs;
1275     MolOps::symmetrizeSSSR(mol, sssrs);
1276   }
1277   boost::dynamic_bitset<> atomsSeen(mol.getNumAtoms());
1278   boost::dynamic_bitset<> atomsUsed(mol.getNumAtoms());
1279   boost::dynamic_bitset<> bondsSeen(mol.getNumBonds());
1280 
1281   for (ROMol::AtomIterator ait = mol.beginAtoms(); ait != mol.endAtoms();
1282        ++ait) {
1283     const Atom *atom = *ait;
1284     if (atomsSeen[atom->getIdx()]) {
1285       continue;
1286     }
1287     if (atom->getChiralTag() == Atom::CHI_UNSPECIFIED ||
1288         atom->hasProp(common_properties::_CIPCode) ||
1289         !mol.getRingInfo()->numAtomRings(atom->getIdx()) ||
1290         !atomIsCandidateForRingStereochem(mol, atom)) {
1291       continue;
1292     }
1293     // do a BFS from this ring atom along ring bonds and find other
1294     // stereochemistry candidates.
1295     std::list<const Atom *> nextAtoms;
1296     // start with finding viable neighbors
1297     ROMol::OEDGE_ITER beg, end;
1298     boost::tie(beg, end) = mol.getAtomBonds(atom);
1299     while (beg != end) {
1300       unsigned int bidx = mol[*beg]->getIdx();
1301       if (!bondsSeen[bidx]) {
1302         bondsSeen.set(bidx);
1303         if (mol.getRingInfo()->numBondRings(bidx)) {
1304           const Atom *oatom = mol[*beg]->getOtherAtom(atom);
1305           if (!atomsSeen[oatom->getIdx()]) {
1306             nextAtoms.push_back(oatom);
1307             atomsUsed.set(oatom->getIdx());
1308           }
1309         }
1310       }
1311       ++beg;
1312     }
1313     INT_VECT ringStereoAtoms(0);
1314     if (!nextAtoms.empty()) {
1315       atom->getPropIfPresent(common_properties::_ringStereoAtoms,
1316                              ringStereoAtoms);
1317     }
1318 
1319     while (!nextAtoms.empty()) {
1320       const Atom *ratom = nextAtoms.front();
1321       nextAtoms.pop_front();
1322       atomsSeen.set(ratom->getIdx());
1323       if (ratom->getChiralTag() != Atom::CHI_UNSPECIFIED &&
1324           !ratom->hasProp(common_properties::_CIPCode) &&
1325           atomIsCandidateForRingStereochem(mol, ratom)) {
1326         int same = (ratom->getChiralTag() == atom->getChiralTag()) ? 1 : -1;
1327         ringStereoAtoms.push_back(same * (ratom->getIdx() + 1));
1328         INT_VECT oringatoms(0);
1329         ratom->getPropIfPresent(common_properties::_ringStereoAtoms,
1330                                 oringatoms);
1331         oringatoms.push_back(same * (atom->getIdx() + 1));
1332         ratom->setProp(common_properties::_ringStereoAtoms, oringatoms, true);
1333         possibleSpecialCases.set(ratom->getIdx());
1334         possibleSpecialCases.set(atom->getIdx());
1335       }
1336       // now push this atom's neighbors
1337       boost::tie(beg, end) = mol.getAtomBonds(ratom);
1338       while (beg != end) {
1339         unsigned int bidx = mol[*beg]->getIdx();
1340         if (!bondsSeen[bidx]) {
1341           bondsSeen.set(bidx);
1342           if (mol.getRingInfo()->numBondRings(bidx)) {
1343             const Atom *oatom = mol[*beg]->getOtherAtom(ratom);
1344             if (!atomsSeen[oatom->getIdx()] && !atomsUsed[oatom->getIdx()]) {
1345               nextAtoms.push_back(oatom);
1346               atomsUsed.set(oatom->getIdx());
1347             }
1348           }
1349         }
1350         ++beg;
1351       }
1352     }  // end of BFS
1353     if (ringStereoAtoms.size() != 0) {
1354       atom->setProp(common_properties::_ringStereoAtoms, ringStereoAtoms, true);
1355       // because we're only going to hit each ring atom once, the first atom we
1356       // encounter in a ring is going to end up with all the other atoms set as
1357       // stereoAtoms, but each of them will only have the first atom present. We
1358       // need to fix that. because the traverse from the first atom only
1359       // followed ring bonds, these things are all by definition in one ring
1360       // system. (Q: is this true if there's a spiro center in there?)
1361       INT_VECT same(mol.getNumAtoms(), 0);
1362       for (auto ringAtomEntry : ringStereoAtoms) {
1363         int ringAtomIdx =
1364             ringAtomEntry < 0 ? -ringAtomEntry - 1 : ringAtomEntry - 1;
1365         same[ringAtomIdx] = ringAtomEntry;
1366       }
1367       for (INT_VECT_CI rae = ringStereoAtoms.begin();
1368            rae != ringStereoAtoms.end(); ++rae) {
1369         int ringAtomEntry = *rae;
1370         int ringAtomIdx =
1371             ringAtomEntry < 0 ? -ringAtomEntry - 1 : ringAtomEntry - 1;
1372         INT_VECT lringatoms(0);
1373         mol.getAtomWithIdx(ringAtomIdx)
1374             ->getPropIfPresent(common_properties::_ringStereoAtoms, lringatoms);
1375         CHECK_INVARIANT(lringatoms.size() > 0, "no other ring atoms found.");
1376         for (auto orae = rae + 1; orae != ringStereoAtoms.end(); ++orae) {
1377           int oringAtomEntry = *orae;
1378           int oringAtomIdx =
1379               oringAtomEntry < 0 ? -oringAtomEntry - 1 : oringAtomEntry - 1;
1380           int theseDifferent = (ringAtomEntry < 0) ^ (oringAtomEntry < 0);
1381           lringatoms.push_back(theseDifferent ? -(oringAtomIdx + 1)
1382                                               : (oringAtomIdx + 1));
1383           INT_VECT olringatoms(0);
1384           mol.getAtomWithIdx(oringAtomIdx)
1385               ->getPropIfPresent(common_properties::_ringStereoAtoms,
1386                                  olringatoms);
1387           CHECK_INVARIANT(olringatoms.size() > 0, "no other ring atoms found.");
1388           olringatoms.push_back(theseDifferent ? -(ringAtomIdx + 1)
1389                                                : (ringAtomIdx + 1));
1390           mol.getAtomWithIdx(oringAtomIdx)
1391               ->setProp(common_properties::_ringStereoAtoms, olringatoms);
1392         }
1393         mol.getAtomWithIdx(ringAtomIdx)
1394             ->setProp(common_properties::_ringStereoAtoms, lringatoms);
1395       }
1396 
1397     } else {
1398       possibleSpecialCases.reset(atom->getIdx());
1399     }
1400     atomsSeen.set(atom->getIdx());
1401   }
1402 }
1403 
isAtomPotentialChiralCenter(const Atom * atom,const ROMol & mol,const UINT_VECT & ranks,Chirality::INT_PAIR_VECT & nbrs)1404 std::pair<bool, bool> isAtomPotentialChiralCenter(
1405     const Atom *atom, const ROMol &mol, const UINT_VECT &ranks,
1406     Chirality::INT_PAIR_VECT &nbrs) {
1407   // loop over all neighbors and form a decorated list of their
1408   // ranks:
1409   bool legalCenter = true;
1410   bool hasDupes = false;
1411 
1412   if (atom->getTotalDegree() > 4) {
1413     // we only know tetrahedral chirality
1414     legalCenter = false;
1415   } else {
1416     // cases we can exclude immediately without having to look at neighbors
1417     // ranks:
1418     if (atom->getTotalDegree() < 3) {
1419       legalCenter = false;
1420     } else if (atom->getDegree() < 3 &&
1421                (atom->getAtomicNum() != 15 && atom->getAtomicNum() != 33)) {
1422       // less than three neighbors is never stereogenic
1423       // unless it is a phosphine/arsine with implicit H (this is from InChI)
1424       legalCenter = false;
1425     } else if (atom->getDegree() == 3 && atom->getTotalNumHs() != 1) {
1426       // assume something that's really three coordinate isn't potentially
1427       // chiral, then look for exceptions
1428       legalCenter = false;
1429       if (atom->getAtomicNum() == 7) {
1430         // three-coordinate N additional requirements:
1431         //   in a ring of size 3  (from InChI)
1432         // OR
1433         /// is a bridgehead atom (RDKit extension)
1434         if (mol.getRingInfo()->isAtomInRingOfSize(atom->getIdx(), 3) ||
1435             queryIsAtomBridgehead(atom)) {
1436           legalCenter = true;
1437         }
1438       } else if (atom->getAtomicNum() == 15 || atom->getAtomicNum() == 33) {
1439         // three-coordinate phosphines and arsines
1440         // are always treated as stereogenic even with H atom neighbors.
1441         // (this is from InChI)
1442         legalCenter = true;
1443       } else if (atom->getAtomicNum() == 16 || atom->getAtomicNum() == 34) {
1444         if (atom->getExplicitValence() == 4 ||
1445             (atom->getExplicitValence() == 3 && atom->getFormalCharge() == 1)) {
1446           // we also accept sulfur or selenium with either a positive charge
1447           // or a double bond:
1448           legalCenter = true;
1449         }
1450       }
1451     }
1452 
1453     if (legalCenter) {
1454       boost::dynamic_bitset<> codesSeen(mol.getNumAtoms());
1455       ROMol::OEDGE_ITER beg, end;
1456       boost::tie(beg, end) = mol.getAtomBonds(atom);
1457       while (beg != end) {
1458         unsigned int otherIdx = mol[*beg]->getOtherAtom(atom)->getIdx();
1459         CHECK_INVARIANT(ranks[otherIdx] < mol.getNumAtoms(),
1460                         "CIP rank higher than the number of atoms.");
1461         // watch for neighbors with duplicate ranks, which would mean
1462         // that we cannot be chiral:
1463         if (codesSeen[ranks[otherIdx]]) {
1464           // we've already seen this code, it's a dupe
1465           hasDupes = true;
1466           break;
1467         }
1468         codesSeen[ranks[otherIdx]] = 1;
1469         nbrs.push_back(std::make_pair(ranks[otherIdx], mol[*beg]->getIdx()));
1470         ++beg;
1471       }
1472     }
1473   }
1474   return std::make_pair(legalCenter, hasDupes);
1475 }
1476 
1477 // returns a pair:
1478 //   1) are there unassigned stereoatoms
1479 //   2) did we assign any?
assignAtomChiralCodes(ROMol & mol,UINT_VECT & ranks,bool flagPossibleStereoCenters)1480 std::pair<bool, bool> assignAtomChiralCodes(ROMol &mol, UINT_VECT &ranks,
1481                                             bool flagPossibleStereoCenters) {
1482   PRECONDITION((!ranks.size() || ranks.size() == mol.getNumAtoms()),
1483                "bad rank vector size");
1484   bool atomChanged = false;
1485   unsigned int unassignedAtoms = 0;
1486 
1487   // ------------------
1488   // now loop over each atom and, if it's marked as chiral,
1489   //  figure out the appropriate CIP label:
1490   for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
1491        ++atIt) {
1492     Atom *atom = *atIt;
1493     Atom::ChiralType tag = atom->getChiralTag();
1494 
1495     // only worry about this atom if it has a marked chirality
1496     // we understand:
1497     if (flagPossibleStereoCenters ||
1498         (tag != Atom::CHI_UNSPECIFIED && tag != Atom::CHI_OTHER)) {
1499       if (atom->hasProp(common_properties::_CIPCode)) {
1500         continue;
1501       }
1502 
1503       if (!ranks.size()) {
1504         //  if we need to, get the "CIP" ranking of each atom:
1505         assignAtomCIPRanks(mol, ranks);
1506       }
1507       Chirality::INT_PAIR_VECT nbrs;
1508       bool legalCenter, hasDupes;
1509       // note that hasDupes is only evaluated if legalCenter==true
1510       boost::tie(legalCenter, hasDupes) =
1511           isAtomPotentialChiralCenter(atom, mol, ranks, nbrs);
1512       if (legalCenter) {
1513         ++unassignedAtoms;
1514       }
1515       if (legalCenter && !hasDupes && flagPossibleStereoCenters) {
1516         atom->setProp(common_properties::_ChiralityPossible, 1);
1517       }
1518 
1519       if (legalCenter && !hasDupes && tag != Atom::CHI_UNSPECIFIED &&
1520           tag != Atom::CHI_OTHER) {
1521         // stereochem is possible and we have no duplicate neighbors, assign
1522         // a CIP code:
1523         atomChanged = true;
1524         --unassignedAtoms;
1525 
1526         // sort the list of neighbors by their CIP ranks:
1527         std::sort(nbrs.begin(), nbrs.end(), Rankers::pairLess<int, int>());
1528 
1529         // collect the list of neighbor indices:
1530         std::list<int> nbrIndices;
1531         for (Chirality::INT_PAIR_VECT_CI nbrIt = nbrs.begin();
1532              nbrIt != nbrs.end(); ++nbrIt) {
1533           nbrIndices.push_back((*nbrIt).second);
1534         }
1535         // ask the atom how many swaps we have to make:
1536         int nSwaps = atom->getPerturbationOrder(nbrIndices);
1537 
1538         // if the atom has 3 neighbors and a hydrogen, add a swap:
1539         if (nbrIndices.size() == 3 && atom->getTotalNumHs() == 1) {
1540           ++nSwaps;
1541         }
1542 
1543         // if that number is odd, we'll change our chirality:
1544         if (nSwaps % 2) {
1545           if (tag == Atom::CHI_TETRAHEDRAL_CCW) {
1546             tag = Atom::CHI_TETRAHEDRAL_CW;
1547           } else {
1548             tag = Atom::CHI_TETRAHEDRAL_CCW;
1549           }
1550         }
1551         // now assign the CIP code:
1552         std::string cipCode;
1553         if (tag == Atom::CHI_TETRAHEDRAL_CCW) {
1554           cipCode = "S";
1555         } else {
1556           cipCode = "R";
1557         }
1558         atom->setProp(common_properties::_CIPCode, cipCode);
1559       }
1560     }
1561   }
1562   return std::make_pair((unassignedAtoms > 0), atomChanged);
1563 }
1564 
1565 // returns a pair:
1566 //   1) are there unassigned stereo bonds?
1567 //   2) did we assign any?
assignBondStereoCodes(ROMol & mol,UINT_VECT & ranks)1568 std::pair<bool, bool> assignBondStereoCodes(ROMol &mol, UINT_VECT &ranks) {
1569   PRECONDITION((!ranks.size() || ranks.size() == mol.getNumAtoms()),
1570                "bad rank vector size");
1571   bool assignedABond = false;
1572   unsigned int unassignedBonds = 0;
1573   boost::dynamic_bitset<> bondsToClear(mol.getNumBonds());
1574   // find the double bonds:
1575   for (ROMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds();
1576        ++bondIt) {
1577     if ((*bondIt)->getBondType() == Bond::DOUBLE) {
1578       Bond *dblBond = *bondIt;
1579       if (dblBond->getStereo() != Bond::STEREONONE) {
1580         continue;
1581       }
1582       if (!ranks.size()) {
1583         assignAtomCIPRanks(mol, ranks);
1584       }
1585       dblBond->getStereoAtoms().clear();
1586 
1587       // at the moment we are ignoring stereochem on ring bonds with less than
1588       // 8
1589       // members.
1590       if (shouldDetectDoubleBondStereo(dblBond)) {
1591         const Atom *begAtom = dblBond->getBeginAtom();
1592         const Atom *endAtom = dblBond->getEndAtom();
1593         // we're only going to handle 2 or three coordinate atoms:
1594         if ((begAtom->getDegree() == 2 || begAtom->getDegree() == 3) &&
1595             (endAtom->getDegree() == 2 || endAtom->getDegree() == 3)) {
1596           ++unassignedBonds;
1597 
1598           // look around each atom and see if it has at least one bond with
1599           // direction marked:
1600 
1601           // the pairs here are: atomrank,bonddir
1602           Chirality::INT_PAIR_VECT begAtomNeighbors, endAtomNeighbors;
1603           bool hasExplicitUnknownStereo = false;
1604           int bgn_stereo = false, end_stereo = false;
1605           if ((dblBond->getBeginAtom()->getPropIfPresent(
1606                    common_properties::_UnknownStereo, bgn_stereo) &&
1607                bgn_stereo) ||
1608               (dblBond->getEndAtom()->getPropIfPresent(
1609                    common_properties::_UnknownStereo, end_stereo) &&
1610                end_stereo)) {
1611             hasExplicitUnknownStereo = true;
1612           }
1613           Chirality::findAtomNeighborDirHelper(mol, begAtom, dblBond, ranks,
1614                                                begAtomNeighbors,
1615                                                hasExplicitUnknownStereo);
1616           Chirality::findAtomNeighborDirHelper(mol, endAtom, dblBond, ranks,
1617                                                endAtomNeighbors,
1618                                                hasExplicitUnknownStereo);
1619 
1620           if (begAtomNeighbors.size() && endAtomNeighbors.size()) {
1621             // Each atom has at least one neighboring bond with marked
1622             // directionality.  Find the highest-ranked directionality
1623             // on each side:
1624 
1625             int begDir, endDir, endNbrAid, begNbrAid;
1626             if (begAtomNeighbors.size() == 1 ||
1627                 ranks[begAtomNeighbors[0].first] >
1628                     ranks[begAtomNeighbors[1].first]) {
1629               begDir = begAtomNeighbors[0].second;
1630               begNbrAid = begAtomNeighbors[0].first;
1631             } else {
1632               begDir = begAtomNeighbors[1].second;
1633               begNbrAid = begAtomNeighbors[1].first;
1634             }
1635             if (endAtomNeighbors.size() == 1 ||
1636                 ranks[endAtomNeighbors[0].first] >
1637                     ranks[endAtomNeighbors[1].first]) {
1638               endDir = endAtomNeighbors[0].second;
1639               endNbrAid = endAtomNeighbors[0].first;
1640             } else {
1641               endDir = endAtomNeighbors[1].second;
1642               endNbrAid = endAtomNeighbors[1].first;
1643             }
1644 
1645             bool conflictingBegin =
1646                 (begAtomNeighbors.size() == 2 &&
1647                  begAtomNeighbors[0].second == begAtomNeighbors[1].second);
1648             bool conflictingEnd =
1649                 (endAtomNeighbors.size() == 2 &&
1650                  endAtomNeighbors[0].second == endAtomNeighbors[1].second);
1651             if (conflictingBegin || conflictingEnd) {
1652               dblBond->setStereo(Bond::STEREONONE);
1653               BOOST_LOG(rdWarningLog) << "Conflicting single bond directions "
1654                                          "around double bond at index "
1655                                       << dblBond->getIdx() << "." << std::endl;
1656               BOOST_LOG(rdWarningLog) << "  BondStereo set to STEREONONE and "
1657                                          "single bond directions set to NONE."
1658                                       << std::endl;
1659               assignedABond = true;
1660               if (conflictingBegin) {
1661                 bondsToClear[mol.getBondBetweenAtoms(begAtomNeighbors[0].first,
1662                                                      begAtom->getIdx())
1663                                  ->getIdx()] = 1;
1664                 bondsToClear[mol.getBondBetweenAtoms(begAtomNeighbors[1].first,
1665                                                      begAtom->getIdx())
1666                                  ->getIdx()] = 1;
1667               }
1668               if (conflictingEnd) {
1669                 bondsToClear[mol.getBondBetweenAtoms(endAtomNeighbors[0].first,
1670                                                      endAtom->getIdx())
1671                                  ->getIdx()] = 1;
1672                 bondsToClear[mol.getBondBetweenAtoms(endAtomNeighbors[1].first,
1673                                                      endAtom->getIdx())
1674                                  ->getIdx()] = 1;
1675               }
1676             } else {
1677               dblBond->getStereoAtoms().push_back(begNbrAid);
1678               dblBond->getStereoAtoms().push_back(endNbrAid);
1679               if (hasExplicitUnknownStereo) {
1680                 dblBond->setStereo(Bond::STEREOANY);
1681                 assignedABond = true;
1682               } else if (begDir == endDir) {
1683                 // In findAtomNeighborDirHelper, we've set up the
1684                 // bond directions here so that they correspond to
1685                 // having both single bonds START at the double bond.
1686                 // This means that if the single bonds point in the same
1687                 // direction, the bond is cis, "Z"
1688                 dblBond->setStereo(Bond::STEREOZ);
1689                 assignedABond = true;
1690               } else {
1691                 dblBond->setStereo(Bond::STEREOE);
1692                 assignedABond = true;
1693               }
1694             }
1695             --unassignedBonds;
1696           }
1697         }
1698       }
1699     }
1700   }
1701 
1702   for (unsigned int i = 0; i < mol.getNumBonds(); ++i) {
1703     if (bondsToClear[i]) {
1704       mol.getBondWithIdx(i)->setBondDir(Bond::NONE);
1705     }
1706   }
1707 
1708   return std::make_pair(unassignedBonds > 0, assignedABond);
1709 }
1710 
1711 // reassign atom ranks by supplementing the current ranks
1712 // with information about known chirality
rerankAtoms(const ROMol & mol,UINT_VECT & ranks)1713 void rerankAtoms(const ROMol &mol, UINT_VECT &ranks) {
1714   PRECONDITION(ranks.size() == mol.getNumAtoms(), "bad rank vector size");
1715   unsigned int factor = 100;
1716   while (factor < mol.getNumAtoms()) {
1717     factor *= 10;
1718   }
1719 
1720 #ifdef VERBOSE_CANON
1721   BOOST_LOG(rdDebugLog) << "rerank PRE: " << std::endl;
1722   for (int i = 0; i < mol.getNumAtoms(); i++) {
1723     BOOST_LOG(rdDebugLog) << "  " << i << ": " << ranks[i] << std::endl;
1724   }
1725 #endif
1726 
1727   DOUBLE_VECT invars(mol.getNumAtoms());
1728   // and now supplement them:
1729   for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
1730     invars[i] = ranks[i] * factor;
1731     const Atom *atom = mol.getAtomWithIdx(i);
1732     // Priority order: R > S > nothing
1733     std::string cipCode;
1734     if (atom->getPropIfPresent(common_properties::_CIPCode, cipCode)) {
1735       if (cipCode == "S") {
1736         invars[i] += 10;
1737       } else if (cipCode == "R") {
1738         invars[i] += 20;
1739       }
1740     }
1741     ROMol::OEDGE_ITER beg, end;
1742     boost::tie(beg, end) = mol.getAtomBonds(atom);
1743     while (beg != end) {
1744       const Bond *oBond = mol[*beg];
1745       if (oBond->getBondType() == Bond::DOUBLE) {
1746         if (oBond->getStereo() == Bond::STEREOE) {
1747           invars[i] += 1;
1748         } else if (oBond->getStereo() == Bond::STEREOZ) {
1749           invars[i] += 2;
1750         }
1751       }
1752       ++beg;
1753     }
1754   }
1755   iterateCIPRanks(mol, invars, ranks, true);
1756   // copy the ranks onto the atoms:
1757   for (unsigned int i = 0; i < mol.getNumAtoms(); i++) {
1758     mol.getAtomWithIdx(i)->setProp(common_properties::_CIPRank, ranks[i]);
1759   }
1760 
1761 #ifdef VERBOSE_CANON
1762   BOOST_LOG(rdDebugLog) << "   post: " << std::endl;
1763   for (int i = 0; i < mol.getNumAtoms(); i++) {
1764     BOOST_LOG(rdDebugLog) << "  " << i << ": " << ranks[i] << std::endl;
1765   }
1766 #endif
1767 }
1768 
hasStereoBondDir(const Bond * bond)1769 bool hasStereoBondDir(const Bond *bond) {
1770   PRECONDITION(bond, "no bond");
1771   return bond->getBondDir() == Bond::BondDir::ENDDOWNRIGHT ||
1772          bond->getBondDir() == Bond::BondDir::ENDUPRIGHT;
1773 }
1774 
getNeighboringDirectedBond(const ROMol & mol,const Atom * atom)1775 const Bond *getNeighboringDirectedBond(const ROMol &mol, const Atom *atom) {
1776   PRECONDITION(atom, "no atom");
1777   for (const auto &bondIdx :
1778        boost::make_iterator_range(mol.getAtomBonds(atom))) {
1779     const Bond *bond = mol[bondIdx];
1780 
1781     if (bond->getBondType() != Bond::BondType::DOUBLE &&
1782         hasStereoBondDir(bond)) {
1783       return bond;
1784     }
1785   }
1786   return nullptr;
1787 }
1788 
translateEZLabelToCisTrans(Bond::BondStereo label)1789 Bond::BondStereo translateEZLabelToCisTrans(Bond::BondStereo label) {
1790   switch (label) {
1791     case Bond::STEREOE:
1792       return Bond::STEREOTRANS;
1793     case Bond::STEREOZ:
1794       return Bond::STEREOCIS;
1795     default:
1796       return label;
1797   }
1798 }
1799 
findStereoAtoms(const Bond * bond)1800 INT_VECT findStereoAtoms(const Bond *bond) {
1801   PRECONDITION(bond, "bad bond");
1802   PRECONDITION(bond->hasOwningMol(), "no mol");
1803   PRECONDITION(bond->getBondType() == Bond::DOUBLE, "not double bond");
1804   PRECONDITION(bond->getStereo() > Bond::BondStereo::STEREOANY,
1805                "no defined stereo");
1806 
1807   if (!bond->getStereoAtoms().empty()) {
1808     return bond->getStereoAtoms();
1809   }
1810   if (bond->getStereo() == Bond::BondStereo::STEREOE ||
1811       bond->getStereo() == Bond::BondStereo::STEREOZ) {
1812     const Atom *startStereoAtom =
1813         findHighestCIPNeighbor(bond->getBeginAtom(), bond->getEndAtom());
1814     const Atom *endStereoAtom =
1815         findHighestCIPNeighbor(bond->getEndAtom(), bond->getBeginAtom());
1816 
1817     if (startStereoAtom == nullptr || endStereoAtom == nullptr) {
1818       return {};
1819     }
1820 
1821     int startStereoAtomIdx = static_cast<int>(startStereoAtom->getIdx());
1822     int endStereoAtomIdx = static_cast<int>(endStereoAtom->getIdx());
1823 
1824     return {startStereoAtomIdx, endStereoAtomIdx};
1825   } else {
1826     BOOST_LOG(rdWarningLog) << "Unable to assign stereo atoms for bond "
1827                             << bond->getIdx() << std::endl;
1828     return {};
1829   }
1830 }
1831 
1832 }  // namespace Chirality
1833 
1834 namespace MolOps {
1835 
1836 /*
1837     We're going to do this iteratively:
1838       1) assign atom stereochemistry
1839       2) assign bond stereochemistry
1840       3) if there are still unresolved atoms or bonds
1841          repeat the above steps as necessary
1842  */
assignStereochemistry(ROMol & mol,bool cleanIt,bool force,bool flagPossibleStereoCenters)1843 void assignStereochemistry(ROMol &mol, bool cleanIt, bool force,
1844                            bool flagPossibleStereoCenters) {
1845   if (!force && mol.hasProp(common_properties::_StereochemDone)) {
1846     return;
1847   }
1848 
1849   // later we're going to need ring information, get it now if we don't
1850   // have it already:
1851   if (!mol.getRingInfo()->isInitialized()) {
1852     MolOps::fastFindRings(mol);
1853   }
1854 
1855 #if 0
1856   std::cerr << ">>>>>>>>>>>>>\n";
1857   std::cerr << "assign stereochem\n";
1858   mol.debugMol(std::cerr);
1859 #endif
1860 
1861   // as part of the preparation, we'll loop over the atoms and
1862   // bonds to see if anything has stereochemistry
1863   // indicated. There's no point in doing the work here if there
1864   // are neither stereocenters nor bonds that we need to consider.
1865   // The exception to this is when flagPossibleStereoCenters is
1866   // true; then we always need to do the work
1867   bool hasStereoAtoms = flagPossibleStereoCenters;
1868   for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
1869        ++atIt) {
1870     if (cleanIt) {
1871       if ((*atIt)->hasProp(common_properties::_CIPCode)) {
1872         (*atIt)->clearProp(common_properties::_CIPCode);
1873       }
1874       if ((*atIt)->hasProp(common_properties::_ChiralityPossible)) {
1875         (*atIt)->clearProp(common_properties::_ChiralityPossible);
1876       }
1877     }
1878     if (!hasStereoAtoms && (*atIt)->getChiralTag() != Atom::CHI_UNSPECIFIED &&
1879         (*atIt)->getChiralTag() != Atom::CHI_OTHER) {
1880       hasStereoAtoms = true;
1881     }
1882   }
1883   bool hasStereoBonds = false;
1884   for (ROMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds();
1885        ++bondIt) {
1886     if (cleanIt) {
1887       if ((*bondIt)->getBondType() == Bond::DOUBLE) {
1888         if ((*bondIt)->getBondDir() == Bond::EITHERDOUBLE) {
1889           (*bondIt)->setStereo(Bond::STEREOANY);
1890         } else if ((*bondIt)->getStereo() != Bond::STEREOANY) {
1891           (*bondIt)->setStereo(Bond::STEREONONE);
1892           (*bondIt)->getStereoAtoms().clear();
1893         }
1894       }
1895     }
1896     if (!hasStereoBonds && (*bondIt)->getBondType() == Bond::DOUBLE) {
1897       ROMol::OEDGE_ITER beg, end;
1898       boost::tie(beg, end) = mol.getAtomBonds((*bondIt)->getBeginAtom());
1899       while (!hasStereoBonds && beg != end) {
1900         const Bond *nbond = mol[*beg];
1901         ++beg;
1902         if (nbond->getBondDir() == Bond::ENDDOWNRIGHT ||
1903             nbond->getBondDir() == Bond::ENDUPRIGHT) {
1904           hasStereoBonds = true;
1905         }
1906       }
1907       boost::tie(beg, end) = mol.getAtomBonds((*bondIt)->getEndAtom());
1908       while (!hasStereoBonds && beg != end) {
1909         const Bond *nbond = mol[*beg];
1910         ++beg;
1911         if (nbond->getBondDir() == Bond::ENDDOWNRIGHT ||
1912             nbond->getBondDir() == Bond::ENDUPRIGHT) {
1913           hasStereoBonds = true;
1914         }
1915       }
1916     }
1917     if (!cleanIt && hasStereoBonds) {
1918       break;  // no reason to keep iterating if we've already
1919               // determined there are stereo bonds to consider
1920     }
1921   }
1922   UINT_VECT atomRanks;
1923   bool keepGoing = hasStereoAtoms | hasStereoBonds;
1924   bool changedStereoAtoms, changedStereoBonds;
1925   while (keepGoing) {
1926     if (hasStereoAtoms) {
1927       boost::tie(hasStereoAtoms, changedStereoAtoms) =
1928           Chirality::assignAtomChiralCodes(mol, atomRanks,
1929                                            flagPossibleStereoCenters);
1930     } else {
1931       changedStereoAtoms = false;
1932     }
1933     if (hasStereoBonds) {
1934       boost::tie(hasStereoBonds, changedStereoBonds) =
1935           Chirality::assignBondStereoCodes(mol, atomRanks);
1936     } else {
1937       changedStereoBonds = false;
1938     }
1939     keepGoing = (hasStereoAtoms || hasStereoBonds) &&
1940                 (changedStereoAtoms || changedStereoBonds);
1941 
1942     if (keepGoing) {
1943       // update the atom ranks based on the new information we have:
1944       Chirality::rerankAtoms(mol, atomRanks);
1945     }
1946 #if 0
1947     std::cout << "*************** done iteration " << keepGoing
1948               << " ***********" << std::endl;
1949     mol.debugMol(std::cout);
1950     std::cout << "*************** done iteration " << keepGoing
1951               << " ***********" << std::endl;
1952 #endif
1953   }
1954 
1955   if (cleanIt) {
1956     // if the ranks are needed again, this will force them to be
1957     // re-calculated based on the stereo calculated above.
1958     // atomRanks.clear();
1959 
1960     for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
1961          ++atIt) {
1962       if ((*atIt)->hasProp(common_properties::_ringStereochemCand)) {
1963         (*atIt)->clearProp(common_properties::_ringStereochemCand);
1964       }
1965       if ((*atIt)->hasProp(common_properties::_ringStereoAtoms)) {
1966         (*atIt)->clearProp(common_properties::_ringStereoAtoms);
1967       }
1968     }
1969     boost::dynamic_bitset<> possibleSpecialCases(mol.getNumAtoms());
1970     Chirality::findChiralAtomSpecialCases(mol, possibleSpecialCases);
1971 
1972     for (auto atom : mol.atoms()) {
1973       if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED &&
1974           !atom->hasProp(common_properties::_CIPCode) &&
1975           (!possibleSpecialCases[atom->getIdx()] ||
1976            !atom->hasProp(common_properties::_ringStereoAtoms))) {
1977         atom->setChiralTag(Atom::CHI_UNSPECIFIED);
1978 
1979         // If the atom has an explicit hydrogen and no charge, that H
1980         // was probably put there solely because of the chirality.
1981         // So we'll go ahead and remove it.
1982         // This was Issue 194
1983         if (atom->getNumExplicitHs() == 1 && atom->getFormalCharge() == 0 &&
1984             !atom->getIsAromatic()) {
1985           atom->setNumExplicitHs(0);
1986           atom->setNoImplicit(false);
1987           atom->calcExplicitValence(false);
1988           atom->calcImplicitValence(false);
1989         }
1990       }
1991     }
1992     for (auto bond : mol.bonds()) {
1993       // wedged bonds to atoms that have no stereochem
1994       // should be removed. (github issue 87)
1995       if ((bond->getBondDir() == Bond::BEGINWEDGE ||
1996            bond->getBondDir() == Bond::BEGINDASH) &&
1997           bond->getBeginAtom()->getChiralTag() == Atom::CHI_UNSPECIFIED &&
1998           bond->getEndAtom()->getChiralTag() == Atom::CHI_UNSPECIFIED) {
1999         bond->setBondDir(Bond::NONE);
2000       }
2001 
2002       // check for directionality on single bonds around
2003       // double bonds without stereo. This was github #2422
2004       if (bond->getBondType() == Bond::DOUBLE &&
2005           (bond->getStereo() == Bond::STEREOANY ||
2006            bond->getStereo() == Bond::STEREONONE)) {
2007         std::vector<Atom *> batoms = {bond->getBeginAtom(), bond->getEndAtom()};
2008         for (auto batom : batoms) {
2009           for (const auto &nbri :
2010                boost::make_iterator_range(mol.getAtomBonds(batom))) {
2011             auto nbrBndI = mol[nbri];
2012             if ((nbrBndI->getBondDir() == Bond::ENDDOWNRIGHT ||
2013                  nbrBndI->getBondDir() == Bond::ENDUPRIGHT) &&
2014                 (nbrBndI->getBondType() == Bond::SINGLE ||
2015                  nbrBndI->getBondType() == Bond::AROMATIC)) {
2016               // direction is set, and we know it's not because of our
2017               // bond. What about other neighbors?
2018               bool okToClear = true;
2019               for (const auto &nbrj : boost::make_iterator_range(
2020                        mol.getAtomBonds(nbrBndI->getOtherAtom(batom)))) {
2021                 auto nbrBndJ = mol[nbrj];
2022                 if (nbrBndJ->getBondType() == Bond::DOUBLE &&
2023                     nbrBndJ->getStereo() != Bond::STEREOANY &&
2024                     nbrBndJ->getStereo() != Bond::STEREONONE) {
2025                   okToClear = false;
2026                   break;
2027                 }
2028               }
2029               if (okToClear) {
2030                 nbrBndI->setBondDir(Bond::NONE);
2031               }
2032             }
2033           }
2034         }
2035       }
2036 #if 0
2037       // make sure CIS/TRANS assignments are actually stereo bonds
2038       if ((*bondIt)->getBondType() == Bond::DOUBLE) {
2039         if ((*bondIt)->getStereo() == Bond::STEREOCIS ||
2040             (*bondIt)->getStereo() == Bond::STEREOTRANS) {
2041           if (!atomRanks.size()) {
2042             Chirality::assignAtomCIPRanks(mol, atomRanks);
2043           }
2044 
2045           const Atom *begAtom = (*bondIt)->getBeginAtom(),
2046                      *endAtom = (*bondIt)->getEndAtom();
2047           UINT_VECT begAtomNeighbors, endAtomNeighbors;
2048           Chirality::findAtomNeighborsHelper(mol, begAtom, *bondIt,
2049                                              begAtomNeighbors);
2050           Chirality::findAtomNeighborsHelper(mol, endAtom, *bondIt,
2051                                              endAtomNeighbors);
2052 
2053           // Note, this relies on this being a hydrogen-suppressed
2054           // graph as the 'Note' in the doc string of this function
2055           // indicates is a pre-condition.
2056           if ((begAtomNeighbors.size() == 2 &&
2057                atomRanks[begAtomNeighbors[0]] ==
2058                    atomRanks[begAtomNeighbors[1]]) ||
2059               (endAtomNeighbors.size() == 2 &&
2060                atomRanks[endAtomNeighbors[0]] ==
2061                    atomRanks[endAtomNeighbors[1]])) {
2062             (*bondIt)->setStereo(Bond::STEREONONE);
2063             (*bondIt)->getStereoAtoms().clear();
2064           }
2065         }
2066       }
2067 #endif
2068     }
2069   }
2070   mol.setProp(common_properties::_StereochemDone, 1, true);
2071 
2072 #if 0
2073   std::cerr << "---\n";
2074   mol.debugMol(std::cerr);
2075   std::cerr << "<<<<<<<<<<<<<<<<\n";
2076 #endif
2077 }
2078 
2079 // Find bonds than can be cis/trans in a molecule and mark them as
2080 // Bond::STEREOANY.
findPotentialStereoBonds(ROMol & mol,bool cleanIt)2081 void findPotentialStereoBonds(ROMol &mol, bool cleanIt) {
2082   // FIX: The earlier thought was to provide an optional argument to ignore or
2083   // consider
2084   //  double bonds in a ring. But I am removing this optional argument and
2085   //  ignoring ring bonds
2086   //  completely for now. This is because finding a potential stereo bond in a
2087   //  ring involves
2088   //  more than just checking the CIPranks for the neighbors - SP 05/04/04
2089 
2090   // make this function callable multiple times
2091   if ((mol.hasProp(common_properties::_BondsPotentialStereo)) && (!cleanIt)) {
2092     return;
2093   } else {
2094     UINT_VECT ranks;
2095     ranks.resize(mol.getNumAtoms());
2096     bool cipDone = false;
2097 
2098     ROMol::BondIterator bondIt;
2099     for (bondIt = mol.beginBonds(); bondIt != mol.endBonds(); ++bondIt) {
2100       if ((*bondIt)->getBondType() == Bond::DOUBLE &&
2101           !(mol.getRingInfo()->numBondRings((*bondIt)->getIdx()))) {
2102         // we are ignoring ring bonds here - read the FIX above
2103         Bond *dblBond = *bondIt;
2104         // proceed only if we either want to clean the stereocode on this bond,
2105         // if none is set on it yet, or it is STEREOANY and we need to find
2106         // stereoatoms
2107         if (cleanIt || dblBond->getStereo() == Bond::STEREONONE ||
2108             (dblBond->getStereo() == Bond::STEREOANY &&
2109              dblBond->getStereoAtoms().size() != 2)) {
2110           dblBond->setStereo(Bond::STEREONONE);
2111           const Atom *begAtom = dblBond->getBeginAtom(),
2112                      *endAtom = dblBond->getEndAtom();
2113           // we're only going to handle 2 or three coordinate atoms:
2114           if ((begAtom->getDegree() == 2 || begAtom->getDegree() == 3) &&
2115               (endAtom->getDegree() == 2 || endAtom->getDegree() == 3)) {
2116             // ------------------
2117             // get the CIP ranking of each atom if we need it:
2118             if (!cipDone) {
2119               if (!begAtom->hasProp(common_properties::_CIPRank)) {
2120                 Chirality::assignAtomCIPRanks(mol, ranks);
2121               } else {
2122                 // no need to recompute if we don't need to recompute. :-)
2123                 for (unsigned int ai = 0; ai < mol.getNumAtoms(); ++ai) {
2124                   ranks[ai] = mol.getAtomWithIdx(ai)->getProp<unsigned int>(
2125                       common_properties::_CIPRank);
2126                 }
2127               }
2128               cipDone = true;
2129             }
2130             // find the neighbors for the begin atom and the endAtom
2131             UINT_VECT begAtomNeighbors, endAtomNeighbors;
2132             bool checkDir = false;
2133             bool includeAromatic = true;
2134             Chirality::findAtomNeighborsHelper(mol, begAtom, dblBond,
2135                                                begAtomNeighbors, checkDir,
2136                                                includeAromatic);
2137             Chirality::findAtomNeighborsHelper(mol, endAtom, dblBond,
2138                                                endAtomNeighbors, checkDir,
2139                                                includeAromatic);
2140             if (begAtomNeighbors.size() > 0 && endAtomNeighbors.size() > 0) {
2141               if ((begAtomNeighbors.size() == 2) &&
2142                   (endAtomNeighbors.size() == 2)) {
2143 // if both of the atoms have 2 neighbors (other than the one
2144 // connected
2145 // by the double bond) and ....
2146 #if 0
2147                 std::cerr << "Bond: " << dblBond->getIdx() << " "
2148                           << begAtom->getIdx() << "=" << endAtom->getIdx()
2149                           << std::endl;
2150                 std::cerr << "   " << begAtomNeighbors[0] << "="
2151                           << ranks[begAtomNeighbors[0]] << ":";
2152                 std::cerr << "   " << begAtomNeighbors[1] << "="
2153                           << ranks[begAtomNeighbors[1]] << std::endl;
2154                 std::cerr << "   " << endAtomNeighbors[0] << "="
2155                           << ranks[endAtomNeighbors[0]] << ":";
2156                 std::cerr << "   " << endAtomNeighbors[1] << "="
2157                           << ranks[endAtomNeighbors[1]] << std::endl;
2158 #endif
2159                 if ((ranks[begAtomNeighbors[0]] !=
2160                      ranks[begAtomNeighbors[1]]) &&
2161                     (ranks[endAtomNeighbors[0]] !=
2162                      ranks[endAtomNeighbors[1]])) {
2163                   // the neighbors ranks are different at both the ends,
2164                   // this bond can be part of a cis/trans system
2165                   if (ranks[begAtomNeighbors[0]] > ranks[begAtomNeighbors[1]]) {
2166                     dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]);
2167                   } else {
2168                     dblBond->getStereoAtoms().push_back(begAtomNeighbors[1]);
2169                   }
2170                   if (ranks[endAtomNeighbors[0]] > ranks[endAtomNeighbors[1]]) {
2171                     dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]);
2172                   } else {
2173                     dblBond->getStereoAtoms().push_back(endAtomNeighbors[1]);
2174                   }
2175                 }
2176               } else if (begAtomNeighbors.size() == 2) {
2177                 // if the begAtom has two neighbors and ....
2178                 if (ranks[begAtomNeighbors[0]] != ranks[begAtomNeighbors[1]]) {
2179                   // their ranks are different
2180                   if (ranks[begAtomNeighbors[0]] > ranks[begAtomNeighbors[1]]) {
2181                     dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]);
2182                   } else {
2183                     dblBond->getStereoAtoms().push_back(begAtomNeighbors[1]);
2184                   }
2185                   dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]);
2186                 }
2187               } else if (endAtomNeighbors.size() == 2) {
2188                 // if the endAtom has two neighbors and ...
2189                 if (ranks[endAtomNeighbors[0]] != ranks[endAtomNeighbors[1]]) {
2190                   // their ranks are different
2191                   dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]);
2192                   if (ranks[endAtomNeighbors[0]] > ranks[endAtomNeighbors[1]]) {
2193                     dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]);
2194                   } else {
2195                     dblBond->getStereoAtoms().push_back(endAtomNeighbors[1]);
2196                   }
2197                 }
2198               } else {
2199                 // end and beg atoms has only one neighbor each, it doesn't
2200                 // matter what the ranks are:
2201                 dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]);
2202                 dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]);
2203               }  // end of different number of neighbors on beg and end atoms
2204 
2205               // mark this double bond as a potential stereo bond
2206               if (!dblBond->getStereoAtoms().empty()) {
2207                 dblBond->setStereo(Bond::STEREOANY);
2208               }
2209             }  // end of check that beg and end atoms have at least 1
2210                // neighbor:
2211           }    // end of 2 and 3 coordinated atoms only
2212         }      // end of we want it or CIP code is not set
2213       }        // end of double bond
2214     }          // end of for loop over all bonds
2215     mol.setProp(common_properties::_BondsPotentialStereo, 1, true);
2216   }
2217 }
2218 
2219 // removes chirality markers from sp and sp2 hybridized centers:
cleanupChirality(RWMol & mol)2220 void cleanupChirality(RWMol &mol) {
2221   for (ROMol::AtomIterator atomIt = mol.beginAtoms(); atomIt != mol.endAtoms();
2222        ++atomIt) {
2223     if ((*atomIt)->getChiralTag() != Atom::CHI_UNSPECIFIED &&
2224         (*atomIt)->getHybridization() < Atom::SP3) {
2225       (*atomIt)->setChiralTag(Atom::CHI_UNSPECIFIED);
2226     }
2227   }
2228 }
2229 
assignChiralTypesFrom3D(ROMol & mol,int confId,bool replaceExistingTags)2230 void assignChiralTypesFrom3D(ROMol &mol, int confId, bool replaceExistingTags) {
2231   const double ZERO_VOLUME_TOL = 0.1;
2232   if (!mol.getNumConformers()) {
2233     return;
2234   }
2235   const Conformer &conf = mol.getConformer(confId);
2236   if (!conf.is3D()) {
2237     return;
2238   }
2239 
2240   // if the molecule already has stereochemistry
2241   // perceived, remove the flags that indicate
2242   // this... what we're about to do will require
2243   // that we go again.
2244   if (mol.hasProp(common_properties::_StereochemDone)) {
2245     mol.clearProp(common_properties::_StereochemDone);
2246   }
2247 
2248   for (ROMol::AtomIterator atomIt = mol.beginAtoms(); atomIt != mol.endAtoms();
2249        ++atomIt) {
2250     Atom *atom = *atomIt;
2251     // if we aren't replacing existing tags and the atom is already tagged,
2252     // punt:
2253     if (!replaceExistingTags && atom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
2254       continue;
2255     }
2256     atom->setChiralTag(Atom::CHI_UNSPECIFIED);
2257     // additional reasons to skip the atom:
2258     if (atom->getDegree() < 3 || atom->getTotalDegree() > 4) {
2259       // not enough explicit neighbors or too many total neighbors
2260       continue;
2261     } else {
2262       int anum = atom->getAtomicNum();
2263       if (anum != 16 && anum != 34 &&  // S or Se are special
2264                                        // (just using the InChI list for now)
2265           (atom->getTotalDegree() != 4 ||  // not enough total neighbors
2266            atom->getTotalNumHs(true) > 1)) {
2267         continue;
2268       }
2269     }
2270     const RDGeom::Point3D &p0 = conf.getAtomPos(atom->getIdx());
2271     ROMol::ADJ_ITER nbrIdx, endNbrs;
2272     boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
2273     const RDGeom::Point3D &p1 = conf.getAtomPos(*nbrIdx);
2274     ++nbrIdx;
2275     const RDGeom::Point3D &p2 = conf.getAtomPos(*nbrIdx);
2276     ++nbrIdx;
2277     const RDGeom::Point3D &p3 = conf.getAtomPos(*nbrIdx);
2278 
2279     RDGeom::Point3D v1 = p1 - p0;
2280     RDGeom::Point3D v2 = p2 - p0;
2281     RDGeom::Point3D v3 = p3 - p0;
2282 
2283     double chiralVol = v1.dotProduct(v2.crossProduct(v3));
2284     if (chiralVol < -ZERO_VOLUME_TOL) {
2285       atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
2286     } else if (chiralVol > ZERO_VOLUME_TOL) {
2287       atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
2288     } else {
2289       atom->setChiralTag(Atom::CHI_UNSPECIFIED);
2290     }
2291   }
2292 }
2293 
assignChiralTypesFromMolParity(ROMol & mol,bool replaceExistingTags)2294 void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags) {
2295   static const std::vector<Atom::ChiralType> chiralTypeVect{
2296       Atom::CHI_TETRAHEDRAL_CW, Atom::CHI_TETRAHEDRAL_CCW};
2297   // if the molecule already has stereochemistry
2298   // perceived, remove the flags that indicate
2299   // this... what we're about to do will require
2300   // that we go again.
2301   if (mol.hasProp(common_properties::_StereochemDone)) {
2302     mol.clearProp(common_properties::_StereochemDone);
2303   }
2304   // Atom-based parity
2305   // Number the atoms surrounding the stereo center with 1, 2, 3, and 4
2306   // in order of increasing atom number (position in the atom block)
2307   // (an implicit hydrogen should be considered the highest numbered atom).
2308   // View the center from a position such that the bond connecting the
2309   // highest-numbered atom (4) projects behind the plane formed by
2310   // atoms 1, 2, and 3.
2311   //
2312   // Parity 1 (CW)        Parity 2 (CCW)
2313   //     3   1                3   2
2314   //      \ /                  \ /
2315   //       |                    |
2316   //       2                    1
2317   //
2318   for (auto atom : mol.atoms()) {
2319     // if we aren't replacing existing tags and the atom is already tagged,
2320     // punt:
2321     if (!replaceExistingTags && atom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
2322       continue;
2323     }
2324     int parity = 0;
2325     atom->getPropIfPresent(common_properties::molParity, parity);
2326     if (parity <= 0 || parity > 2 || atom->getDegree() < 3) {
2327       atom->setChiralTag(Atom::CHI_UNSPECIFIED);
2328       continue;
2329     }
2330     // if we are here, parity was 1 (CW) or 2 (CCW)
2331     // now we set parity 0 to be CW and 1 to be CCW
2332     --parity;
2333     RDKit::ROMol::OBOND_ITER_PAIR nbrBonds = mol.getAtomBonds(atom);
2334     INT_LIST nbrBondIdxList;
2335     std::transform(
2336         nbrBonds.first, nbrBonds.second, std::back_inserter(nbrBondIdxList),
2337         [mol](const ROMol::edge_descriptor &e) { return mol[e]->getIdx(); });
2338     unsigned int atomIdx = atom->getIdx();
2339     nbrBondIdxList.sort([mol, atomIdx](const int ai, const int bi) {
2340       return (mol.getBondWithIdx(ai)->getOtherAtomIdx(atomIdx) <
2341               mol.getBondWithIdx(bi)->getOtherAtomIdx(atomIdx));
2342     });
2343     int nSwaps = atom->getPerturbationOrder(nbrBondIdxList);
2344     if (nSwaps % 2) {
2345       parity = 1 - parity;
2346     }
2347     atom->setChiralTag(chiralTypeVect[parity]);
2348     if (atom->getImplicitValence() == -1) {
2349       atom->calcExplicitValence(false);
2350       atom->calcImplicitValence(false);
2351     }
2352     // within the RD representation, if a three-coordinate atom
2353     // is chiral and has an implicit H, that H needs to be made explicit:
2354     if (atom->getDegree() == 3 && !atom->getNumExplicitHs() &&
2355         atom->getNumImplicitHs() == 1) {
2356       atom->setNumExplicitHs(1);
2357       // recalculated number of implicit Hs:
2358       atom->updatePropertyCache();
2359     }
2360   }
2361 }
2362 
setDoubleBondNeighborDirections(ROMol & mol,const Conformer * conf)2363 void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf) {
2364   // used to store the number of single bonds a given
2365   // single bond is adjacent to
2366   std::vector<unsigned int> singleBondCounts(mol.getNumBonds(), 0);
2367   std::vector<Bond *> bondsInPlay;
2368   // keeps track of which single bonds are adjacent to each double bond:
2369   VECT_INT_VECT dblBondNbrs(mol.getNumBonds());
2370   // keeps track of which double bonds are adjacent to each single bond:
2371   VECT_INT_VECT singleBondNbrs(mol.getNumBonds());
2372   // keeps track of which single bonds need a dir set and which double bonds
2373   // need to have their neighbors' dirs set
2374   boost::dynamic_bitset<> needsDir(mol.getNumBonds());
2375 
2376   // find double bonds that should be considered for
2377   // stereochemistry
2378   // NOTE that we are explicitly excluding double bonds in rings
2379   // with this test.
2380   bool resetRings = false;
2381   if (!mol.getRingInfo()->isInitialized()) {
2382     resetRings = true;
2383     MolOps::fastFindRings(mol);
2384   }
2385 
2386   for (RWMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds();
2387        ++bondIt) {
2388     if (isBondCandidateForStereo(*bondIt)) {
2389       const Atom *a1 = (*bondIt)->getBeginAtom();
2390       const Atom *a2 = (*bondIt)->getEndAtom();
2391 
2392       ROMol::OEDGE_ITER beg, end;
2393       boost::tie(beg, end) = mol.getAtomBonds(a1);
2394       while (beg != end) {
2395         const Bond *nbrBond = mol[*beg];
2396         if (nbrBond->getBondType() == Bond::SINGLE ||
2397             nbrBond->getBondType() == Bond::AROMATIC) {
2398           singleBondCounts[nbrBond->getIdx()] += 1;
2399           auto nbrDir = nbrBond->getBondDir();
2400           if (nbrDir == Bond::BondDir::NONE ||
2401               nbrDir == Bond::BondDir::ENDDOWNRIGHT ||
2402               nbrDir == Bond::BondDir::ENDUPRIGHT) {
2403             needsDir[nbrBond->getIdx()] = 1;
2404           }
2405           needsDir[(*bondIt)->getIdx()] = 1;
2406           dblBondNbrs[(*bondIt)->getIdx()].push_back(nbrBond->getIdx());
2407           // the search may seem inefficient, but these vectors are going to
2408           // be at most 2 long (with very few exceptions). It's just not worth
2409           // using a different data structure
2410           if (std::find(singleBondNbrs[nbrBond->getIdx()].begin(),
2411                         singleBondNbrs[nbrBond->getIdx()].end(),
2412                         (*bondIt)->getIdx()) ==
2413               singleBondNbrs[nbrBond->getIdx()].end()) {
2414             singleBondNbrs[nbrBond->getIdx()].push_back((*bondIt)->getIdx());
2415           }
2416         }
2417         ++beg;
2418       }
2419       boost::tie(beg, end) = mol.getAtomBonds(a2);
2420       while (beg != end) {
2421         const Bond *nbrBond = mol[*beg];
2422         if (nbrBond->getBondType() == Bond::SINGLE ||
2423             nbrBond->getBondType() == Bond::AROMATIC) {
2424           singleBondCounts[nbrBond->getIdx()] += 1;
2425           auto nbrDir = nbrBond->getBondDir();
2426           if (nbrDir == Bond::BondDir::NONE ||
2427               nbrDir == Bond::BondDir::ENDDOWNRIGHT ||
2428               nbrDir == Bond::BondDir::ENDUPRIGHT) {
2429             needsDir[nbrBond->getIdx()] = 1;
2430           }
2431           needsDir[(*bondIt)->getIdx()] = 1;
2432           dblBondNbrs[(*bondIt)->getIdx()].push_back(nbrBond->getIdx());
2433 
2434           // the search may seem inefficient, but these vectors are going to
2435           // be at most 2 long (with very few exceptions). It's just not worth
2436           // using a different data structure
2437           if (std::find(singleBondNbrs[nbrBond->getIdx()].begin(),
2438                         singleBondNbrs[nbrBond->getIdx()].end(),
2439                         (*bondIt)->getIdx()) ==
2440               singleBondNbrs[nbrBond->getIdx()].end()) {
2441             singleBondNbrs[nbrBond->getIdx()].push_back((*bondIt)->getIdx());
2442           }
2443         }
2444         ++beg;
2445       }
2446       bondsInPlay.push_back(*bondIt);
2447     }
2448   }
2449 
2450   if (!bondsInPlay.size()) {
2451     if (resetRings) {
2452       mol.getRingInfo()->reset();
2453     }
2454     return;
2455   }
2456 
2457   // order the double bonds based on the singleBondCounts of their neighbors:
2458   std::vector<std::pair<unsigned int, Bond *>> orderedBondsInPlay;
2459   for (auto dblBond : bondsInPlay) {
2460     unsigned int countHere =
2461         std::accumulate(dblBondNbrs[dblBond->getIdx()].begin(),
2462                         dblBondNbrs[dblBond->getIdx()].end(), 0);
2463     // and favor double bonds that are *not* in rings. The combination of
2464     // using the sum above (instead of the max) and this ring-membershipt test
2465     // seem to fix sf.net issue 3009836
2466     if (!(mol.getRingInfo()->numBondRings(dblBond->getIdx()))) {
2467       countHere *= 10;
2468     }
2469     orderedBondsInPlay.push_back(std::make_pair(countHere, dblBond));
2470   }
2471   std::sort(orderedBondsInPlay.begin(), orderedBondsInPlay.end());
2472 
2473   // oof, now loop over the double bonds in that order and
2474   // update their neighbor directionalities:
2475   std::vector<std::pair<unsigned int, Bond *>>::reverse_iterator pairIter;
2476   for (pairIter = orderedBondsInPlay.rbegin();
2477        pairIter != orderedBondsInPlay.rend(); ++pairIter) {
2478     // std::cerr << "RESET?: " << pairIter->second->getIdx() << " "
2479     //           << pairIter->second->getStereo() << std::endl;
2480     updateDoubleBondNeighbors(mol, pairIter->second, conf, needsDir,
2481                               singleBondCounts, singleBondNbrs);
2482   }
2483   if (resetRings) {
2484     mol.getRingInfo()->reset();
2485   }
2486 }
2487 
detectBondStereochemistry(ROMol & mol,int confId)2488 void detectBondStereochemistry(ROMol &mol, int confId) {
2489   if (!mol.getNumConformers()) {
2490     return;
2491   }
2492   const Conformer &conf = mol.getConformer(confId);
2493   setDoubleBondNeighborDirections(mol, &conf);
2494 }
2495 
setBondStereoFromDirections(ROMol & mol)2496 void setBondStereoFromDirections(ROMol &mol) {
2497   for (Bond *bond : mol.bonds()) {
2498     if (bond->getBondType() == Bond::DOUBLE) {
2499       const Atom *stereoBondBeginAtom = bond->getBeginAtom();
2500       const Atom *stereoBondEndAtom = bond->getEndAtom();
2501 
2502       const Bond *directedBondAtBegin =
2503           Chirality::getNeighboringDirectedBond(mol, stereoBondBeginAtom);
2504       const Bond *directedBondAtEnd =
2505           Chirality::getNeighboringDirectedBond(mol, stereoBondEndAtom);
2506 
2507       if (directedBondAtBegin != nullptr && directedBondAtEnd != nullptr) {
2508         unsigned beginSideStereoAtom =
2509             directedBondAtBegin->getOtherAtomIdx(stereoBondBeginAtom->getIdx());
2510         unsigned endSideStereoAtom =
2511             directedBondAtEnd->getOtherAtomIdx(stereoBondEndAtom->getIdx());
2512 
2513         bond->setStereoAtoms(beginSideStereoAtom, endSideStereoAtom);
2514 
2515         auto beginSideBondDirection = directedBondAtBegin->getBondDir();
2516         if (directedBondAtBegin->getBeginAtom() == stereoBondBeginAtom) {
2517           beginSideBondDirection = getOppositeBondDir(beginSideBondDirection);
2518         }
2519 
2520         auto endSideBondDirection = directedBondAtEnd->getBondDir();
2521         if (directedBondAtEnd->getEndAtom() == stereoBondEndAtom) {
2522           endSideBondDirection = getOppositeBondDir(endSideBondDirection);
2523         }
2524 
2525         if (beginSideBondDirection == endSideBondDirection) {
2526           bond->setStereo(Bond::STEREOTRANS);
2527         } else {
2528           bond->setStereo(Bond::STEREOCIS);
2529         }
2530       }
2531     }
2532   }
2533 }
2534 
assignStereochemistryFrom3D(ROMol & mol,int confId,bool replaceExistingTags)2535 void assignStereochemistryFrom3D(ROMol &mol, int confId,
2536                                  bool replaceExistingTags) {
2537   if (!mol.getNumConformers() || !mol.getConformer(confId).is3D()) {
2538     return;
2539   }
2540 
2541   detectBondStereochemistry(mol, confId);
2542   assignChiralTypesFrom3D(mol, confId, replaceExistingTags);
2543   bool force = true;
2544   bool flagPossibleStereoCenters = true;
2545   assignStereochemistry(mol, replaceExistingTags, force,
2546                         flagPossibleStereoCenters);
2547 }
2548 
assignChiralTypesFromBondDirs(ROMol & mol,const int confId,const bool replaceExistingTags)2549 void assignChiralTypesFromBondDirs(ROMol &mol, const int confId,
2550                                    const bool replaceExistingTags) {
2551   if (!mol.getNumConformers()) {
2552     return;
2553   }
2554   auto conf = mol.getConformer(confId);
2555   boost::dynamic_bitset<> atomsSet(mol.getNumAtoms(), 0);
2556   for (auto &bond : mol.bonds()) {
2557     const Bond::BondDir dir = bond->getBondDir();
2558     if (dir != Bond::UNKNOWN) {
2559       // the bond is marked as chiral:
2560       if (dir == Bond::BEGINWEDGE || dir == Bond::BEGINDASH) {
2561         Atom *atom = bond->getBeginAtom();
2562         if (atomsSet[atom->getIdx()] ||
2563             (!replaceExistingTags &&
2564              atom->getChiralTag() != Atom::CHI_UNSPECIFIED)) {
2565           continue;
2566         }
2567         if (atom->getImplicitValence() == -1) {
2568           atom->calcExplicitValence(false);
2569           atom->calcImplicitValence(false);
2570         }
2571         Atom::ChiralType code = atomChiralTypeFromBondDir(mol, bond, &conf);
2572         if (code != Atom::ChiralType::CHI_UNSPECIFIED) {
2573           atomsSet.set(atom->getIdx());
2574           //   std::cerr << "atom " << atom->getIdx() << " code " << code
2575           //             << " from bond " << bond->getIdx() << std::endl;
2576         }
2577         atom->setChiralTag(code);
2578 
2579         // within the RD representation, if a three-coordinate atom
2580         // is chiral and has an implicit H, that H needs to be made explicit:
2581         if (atom->getDegree() == 3 && !atom->getNumExplicitHs() &&
2582             atom->getNumImplicitHs() == 1) {
2583           atom->setNumExplicitHs(1);
2584           // recalculated number of implicit Hs:
2585           atom->updatePropertyCache();
2586         }
2587       }
2588     }
2589   }
2590 }
2591 
removeStereochemistry(ROMol & mol)2592 void removeStereochemistry(ROMol &mol) {
2593   if (mol.hasProp(common_properties::_StereochemDone)) {
2594     mol.clearProp(common_properties::_StereochemDone);
2595   }
2596   for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
2597        ++atIt) {
2598     (*atIt)->setChiralTag(Atom::CHI_UNSPECIFIED);
2599     if ((*atIt)->hasProp(common_properties::_CIPCode)) {
2600       (*atIt)->clearProp(common_properties::_CIPCode);
2601     }
2602     if ((*atIt)->hasProp(common_properties::_CIPRank)) {
2603       (*atIt)->clearProp(common_properties::_CIPRank);
2604     }
2605   }
2606   for (ROMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds();
2607        ++bondIt) {
2608     if ((*bondIt)->getBondType() == Bond::DOUBLE) {
2609       (*bondIt)->setStereo(Bond::STEREONONE);
2610       (*bondIt)->getStereoAtoms().clear();
2611     } else if ((*bondIt)->getBondType() == Bond::SINGLE) {
2612       (*bondIt)->setBondDir(Bond::NONE);
2613     }
2614   }
2615   std::vector<StereoGroup> sgs;
2616   static_cast<RWMol &>(mol).setStereoGroups(std::move(sgs));
2617 }
2618 
2619 }  // end of namespace MolOps
2620 }  // end of namespace RDKit
2621