1 //
2 // Copyright (C) 2004-2021 Greg Landrum and other RDKit contributors
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <GraphMol/RDKitBase.h>
11 #include <RDGeneral/Ranking.h>
12 #include <GraphMol/new_canon.h>
13 #include <GraphMol/QueryOps.h>
14 #include <RDGeneral/types.h>
15 #include <sstream>
16 #include <set>
17 #include <algorithm>
18 #include <RDGeneral/utils.h>
19 #include <RDGeneral/Invariant.h>
20 #include <RDGeneral/RDLog.h>
21
22 #include <boost/dynamic_bitset.hpp>
23 #include <Geometry/point.h>
24 #include "Chirality.h"
25
26 // #define VERBOSE_CANON 1
27
28 namespace RDKit {
29
30 namespace {
shouldDetectDoubleBondStereo(const Bond * bond)31 bool shouldDetectDoubleBondStereo(const Bond *bond) {
32 const RingInfo *ri = bond->getOwningMol().getRingInfo();
33 return (!ri->numBondRings(bond->getIdx()) ||
34 ri->minBondRingSize(bond->getIdx()) > 7);
35 }
36
37 // ----------------------------------- -----------------------------------
38 // This algorithm is identical to that used in the CombiCode Mol file
39 // parser (also developed by RD).
40 //
41 //
42 // SUMMARY:
43 // Derive a chiral code for an atom that has a wedged (or dashed) bond
44 // drawn to it.
45 //
46 // RETURNS:
47 // The chiral type
48 //
49 // CAVEATS:
50 // This is careful to ensure that the central atom has 4 neighbors and
51 // only single bonds to it, but that's about it.
52 //
53 // NOTE: this isn't careful at all about checking to make sure that
54 // things actually *should* be chiral. e.g. if the file has a
55 // 3-coordinate N with a wedged bond, it will make some erroneous
56 // assumptions about the chirality.
57 //
58 // ----------------------------------- -----------------------------------
59
atomChiralTypeFromBondDir(const ROMol & mol,const Bond * bond,const Conformer * conf)60 Atom::ChiralType atomChiralTypeFromBondDir(const ROMol &mol, const Bond *bond,
61 const Conformer *conf) {
62 PRECONDITION(bond, "no bond");
63 PRECONDITION(conf, "no conformer");
64 Bond::BondDir bondDir = bond->getBondDir();
65 PRECONDITION(bondDir == Bond::BEGINWEDGE || bondDir == Bond::BEGINDASH,
66 "bad bond direction");
67
68 // NOTE that according to the CT file spec, wedging assigns chirality
69 // to the atom at the point of the wedge, (atom 1 in the bond).
70 const Atom *atom = bond->getBeginAtom();
71 PRECONDITION(atom, "no atom");
72
73 // we can't do anything with atoms that have more than 4 neighbors:
74 if (atom->getDegree() > 4) {
75 return Atom::CHI_UNSPECIFIED;
76 }
77 const Atom *bondAtom = bond->getEndAtom();
78
79 Atom::ChiralType res = Atom::CHI_UNSPECIFIED;
80
81 INT_LIST neighborBondIndices;
82 RDGeom::Point3D centerLoc, tmpPt;
83 centerLoc = conf->getAtomPos(atom->getIdx());
84 tmpPt = conf->getAtomPos(bondAtom->getIdx());
85 centerLoc.z = 0.0;
86 tmpPt.z = 0.0;
87
88 RDGeom::Point3D refVect = centerLoc.directionVector(tmpPt);
89
90 //----------------------------------------------------------
91 //
92 // start by ensuring that all the bonds to neighboring atoms
93 // are single bonds and collecting a list of neighbor indices:
94 //
95 //----------------------------------------------------------
96 bool hSeen = false;
97
98 neighborBondIndices.push_back(bond->getIdx());
99 if (bondAtom->getAtomicNum() == 1 && bondAtom->getIsotope() == 0) {
100 hSeen = true;
101 }
102
103 bool allSingle = true;
104 ROMol::OEDGE_ITER beg, end;
105 boost::tie(beg, end) = mol.getAtomBonds(atom);
106 while (beg != end) {
107 const Bond *nbrBond = mol[*beg];
108 if (nbrBond->getBondType() != Bond::SINGLE) {
109 allSingle = false;
110 // break;
111 }
112 if (nbrBond != bond) {
113 if ((nbrBond->getOtherAtom(atom)->getAtomicNum() == 1 &&
114 nbrBond->getOtherAtom(atom)->getIsotope() == 0)) {
115 hSeen = true;
116 }
117 neighborBondIndices.push_back(nbrBond->getIdx());
118 }
119 ++beg;
120 }
121 size_t nNbrs = neighborBondIndices.size();
122
123 //----------------------------------------------------------
124 //
125 // Return now if there aren't at least 3 non-H bonds to the atom.
126 // (we can implicitly add a single H to 3 coordinate atoms, but
127 // we're horked otherwise).
128 //
129 //----------------------------------------------------------
130 if (nNbrs < 3 || (hSeen && nNbrs < 4)) {
131 return Atom::CHI_UNSPECIFIED;
132 }
133
134 //----------------------------------------------------------
135 //
136 // Continue if there are all single bonds or if we're considering
137 // 4-coordinate P or S
138 //
139 //----------------------------------------------------------
140 if (allSingle || atom->getAtomicNum() == 15 || atom->getAtomicNum() == 16) {
141 //------------------------------------------------------------
142 //
143 // Here we need to figure out the rotation direction between
144 // the neighbor bonds and the wedged bond:
145 //
146 //------------------------------------------------------------
147 bool isCCW = true;
148 INT_LIST::const_iterator bondIter = neighborBondIndices.begin();
149 ++bondIter;
150 auto bond1 = mol.getBondWithIdx(*bondIter);
151 int oaid = bond1->getOtherAtom(atom)->getIdx();
152 tmpPt = conf->getAtomPos(oaid);
153 tmpPt.z = 0;
154 auto atomVect0 = centerLoc.directionVector(tmpPt);
155 auto angle01 = refVect.signedAngleTo(atomVect0);
156
157 ++bondIter;
158 auto bond2 = mol.getBondWithIdx(*bondIter);
159 oaid = bond2->getOtherAtom(atom)->getIdx();
160 tmpPt = conf->getAtomPos(oaid);
161 tmpPt.z = 0;
162 auto atomVect1 = centerLoc.directionVector(tmpPt);
163 auto angle02 = refVect.signedAngleTo(atomVect1);
164
165 // order everything so that looking from the top in a CCW direction we
166 // have 0, 1, 2
167 bool swappedIt = false;
168 if (angle01 > angle02) {
169 // std::cerr << " swap because " << angle01 << " " << angle02 << " "
170 // << bond1->getIdx() << "->" << bond2->getIdx() << std::endl;
171 std::swap(angle01, angle02);
172 std::swap(bond1, bond2);
173 std::swap(atomVect0, atomVect1);
174 swappedIt = true;
175 }
176
177 double firstAngle, secondAngle;
178 // We proceed differently for 3 and 4 coordinate atoms:
179 double angle12;
180 if (nNbrs == 4) {
181 bool flipIt = false;
182 // grab the angle to the last neighbor:
183 ++bondIter;
184 auto bond3 = mol.getBondWithIdx(*bondIter);
185 oaid = bond3->getOtherAtom(atom)->getIdx();
186 tmpPt = conf->getAtomPos(oaid);
187 tmpPt.z = 0;
188 auto atomVect2 = centerLoc.directionVector(tmpPt);
189 angle12 = refVect.signedAngleTo(atomVect2);
190
191 // find the lowest and second-lowest angle and keep track of
192 // whether or not we have to do a non-cyclic permutation to
193 // get there:
194 if (angle01 < angle02) {
195 if (angle02 < angle12) {
196 // order is angle01 -> angle02 -> angle12
197 firstAngle = angle01;
198 secondAngle = angle02;
199 } else if (angle01 < angle12) {
200 // order is angle01 -> angle12 -> angle02
201 firstAngle = angle01;
202 secondAngle = angle12;
203 flipIt = true;
204 } else {
205 // order is angle12 -> angle01 -> angle02
206 firstAngle = angle12;
207 secondAngle = angle01;
208 }
209 } else if (angle01 < angle12) {
210 // order is angle02 -> angle01 -> angle12
211 firstAngle = angle02;
212 secondAngle = angle01;
213 flipIt = true;
214 } else {
215 if (angle02 < angle12) {
216 // order is angle02 -> angle12 -> angle01
217 firstAngle = angle02;
218 secondAngle = angle12;
219 } else {
220 // order is angle12 -> angle02 -> angle01
221 firstAngle = angle12;
222 secondAngle = angle02;
223 flipIt = true;
224 }
225 }
226 if (flipIt) {
227 isCCW = !isCCW;
228 }
229 } else {
230 // it's three coordinate. Things are a bit different here
231 // because we have to at least kind of figure out where the
232 // hydrogen might be.
233
234 // before getting started with that, use some of the inchi rules
235 // for contradictory stereochemistry
236 // (Table 10 in the InChi v1 technical manual)
237
238 angle12 = atomVect0.signedAngleTo(atomVect1);
239 double angle20 = atomVect1.signedAngleTo(refVect);
240
241 // to simplify the code below, pick out the directions of the bonds
242 // if and only if they start at our atom:
243 auto dir0 = bondDir;
244 auto dir1 = (bond1->getBeginAtomIdx() == bond->getBeginAtomIdx())
245 ? bond1->getBondDir()
246 : Bond::NONE;
247 auto dir2 = (bond2->getBeginAtomIdx() == bond->getBeginAtomIdx())
248 ? bond2->getBondDir()
249 : Bond::NONE;
250
251 // we know bond 0 has the direction set
252
253 // this one is never allowed with different directions:
254 // 0 2
255 // \ /
256 // C
257 // *
258 // 1
259 if (angle01 < (M_PI - 1e-3) && angle12 < (M_PI - 1e-3) &&
260 angle20 < (M_PI - 1e-3)) {
261 if ((dir1 != Bond::NONE && dir1 != dir0) ||
262 (dir2 != Bond::NONE && dir2 != dir0)) {
263 BOOST_LOG(rdWarningLog)
264 << "Warning: conflicting stereochemistry at atom "
265 << bond->getBeginAtomIdx() << " ignored."
266 << " by rule 1a." << std::endl;
267 return Atom::CHI_UNSPECIFIED;
268 }
269 } else {
270 // otherwise they cannot all be the same
271 if (dir1 == dir0 && dir1 == dir2) {
272 BOOST_LOG(rdWarningLog)
273 << "Warning: conflicting stereochemistry at atom "
274 << bond->getBeginAtomIdx() << " ignored."
275 << " by rule 1b." << std::endl;
276 return Atom::CHI_UNSPECIFIED;
277 }
278
279 // the remaining cases where stereo is allowed are:
280 // 0 1 0 2
281 // * \*/
282 // 1 - C - 2 C for these two bond1 and bond2 must match
283 // and
284 // 1 2 1 0
285 // * \*/
286 // 2 - C - 0 C for these two bond0 and bond2 must match
287 // and
288 // 2 0 2 1
289 // * \*/
290 // 0 - C - 1 C for these two bond0 and bond1 must match
291 //
292
293 if (dir1 != Bond::NONE && dir1 != dir0) {
294 if ((angle01 >= M_PI) || // last two examples
295 (angle02 > M_PI && dir2 != Bond::NONE &&
296 dir1 != dir2) || // top two examples
297 (angle02 <= M_PI && dir2 != Bond::NONE &&
298 dir0 != dir2)) { // middle two examples
299 BOOST_LOG(rdWarningLog)
300 << "Warning: conflicting stereochemistry at atom "
301 << bond->getBeginAtomIdx() << " ignored."
302 << " by rule 2a." << std::endl;
303 return Atom::CHI_UNSPECIFIED;
304 }
305 } else if (dir0 == dir2) {
306 // all bonds are the same and we already removed the "all
307 // angles less than 180" case above
308 BOOST_LOG(rdWarningLog)
309 << "Warning: conflicting stereochemistry at atom "
310 << bond->getBeginAtomIdx() << " ignored."
311 << " by rule 2b." << std::endl;
312 return Atom::CHI_UNSPECIFIED;
313 }
314
315 if (angle01 < angle02) {
316 firstAngle = angle01;
317 secondAngle = angle02;
318 isCCW = true;
319 } else {
320 firstAngle = angle02;
321 secondAngle = angle01;
322 isCCW = false;
323 }
324 if (secondAngle - firstAngle >= (M_PI - 1e-4)) {
325 // it's a situation like one of these:
326 //
327 // 0 1 0 2
328 // * \*/
329 // 1 - C - 2 C
330 //
331 // In each of these cases, the implicit H is between atoms 1
332 // and 2, so we need to flip the rotation direction (go
333 // around the back).
334 isCCW = !isCCW;
335 }
336 }
337 }
338 // reverse the rotation direction if the reference is wedged down:
339 if (bondDir == Bond::BEGINDASH) {
340 isCCW = !isCCW;
341 }
342 if (swappedIt) {
343 // we swapped the order of the bonds to simplify the analysis above:
344 isCCW = !isCCW;
345 }
346
347 // ----------------
348 //
349 // We now have the rotation direction using mol-file order.
350 // We need to convert that into the appropriate label for the
351 // central atom
352 //
353 // ----------------
354 int nSwaps = atom->getPerturbationOrder(neighborBondIndices);
355 if (nSwaps % 2) {
356 isCCW = !isCCW;
357 }
358 if (isCCW) {
359 res = Atom::CHI_TETRAHEDRAL_CCW;
360 } else {
361 res = Atom::CHI_TETRAHEDRAL_CW;
362 }
363 }
364
365 return res;
366 } // namespace RDKit
367
getOppositeBondDir(Bond::BondDir dir)368 Bond::BondDir getOppositeBondDir(Bond::BondDir dir) {
369 PRECONDITION(dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT,
370 "bad bond direction");
371 switch (dir) {
372 case Bond::ENDDOWNRIGHT:
373 return Bond::ENDUPRIGHT;
374 case Bond::ENDUPRIGHT:
375 return Bond::ENDDOWNRIGHT;
376 default:
377 return Bond::NONE;
378 }
379 }
380
setBondDirRelativeToAtom(Bond * bond,Atom * atom,Bond::BondDir dir,bool reverse,boost::dynamic_bitset<> & needsDir)381 void setBondDirRelativeToAtom(Bond *bond, Atom *atom, Bond::BondDir dir,
382 bool reverse, boost::dynamic_bitset<> &needsDir) {
383 PRECONDITION(bond, "bad bond");
384 PRECONDITION(atom, "bad atom");
385 PRECONDITION(dir == Bond::ENDUPRIGHT || dir == Bond::ENDDOWNRIGHT, "bad dir");
386 PRECONDITION(atom == bond->getBeginAtom() || atom == bond->getEndAtom(),
387 "atom doesn't belong to bond");
388 RDUNUSED_PARAM(needsDir);
389
390 if (bond->getBeginAtom() != atom) {
391 reverse = !reverse;
392 }
393
394 if (reverse) {
395 dir = (dir == Bond::ENDUPRIGHT ? Bond::ENDDOWNRIGHT : Bond::ENDUPRIGHT);
396 }
397 // to ensure maximum compatibility, even when a bond has unknown stereo (set
398 // explicitly and recorded in _UnknownStereo property), I will still let a
399 // direction to be computed. You must check the _UnknownStereo property to
400 // make sure whether this bond is explicitly set to have no direction info.
401 // This makes sense because the direction info are all derived from
402 // coordinates, the _UnknownStereo property is like extra metadata to be
403 // used with the direction info.
404 bond->setBondDir(dir);
405 }
406
isLinearArrangement(const RDGeom::Point3D & v1,const RDGeom::Point3D & v2)407 bool isLinearArrangement(const RDGeom::Point3D &v1, const RDGeom::Point3D &v2) {
408 double lsq = v1.lengthSq() * v2.lengthSq();
409
410 // treat zero length vectors as linear
411 if (lsq < 1.0e-6) return true;
412
413 double dotProd = v1.dotProduct(v2);
414
415 double cos178 =
416 -0.999388; // == cos(M_PI-0.035), corresponds to a tolerance of 2 degrees
417 return dotProd < cos178 * sqrt(lsq);
418 }
419
updateDoubleBondNeighbors(ROMol & mol,Bond * dblBond,const Conformer * conf,boost::dynamic_bitset<> & needsDir,std::vector<unsigned int> & singleBondCounts,const VECT_INT_VECT & singleBondNbrs)420 void updateDoubleBondNeighbors(ROMol &mol, Bond *dblBond, const Conformer *conf,
421 boost::dynamic_bitset<> &needsDir,
422 std::vector<unsigned int> &singleBondCounts,
423 const VECT_INT_VECT &singleBondNbrs) {
424 // we want to deal only with double bonds:
425 PRECONDITION(dblBond, "bad bond");
426 PRECONDITION(dblBond->getBondType() == Bond::DOUBLE, "not a double bond");
427 if (!needsDir[dblBond->getIdx()]) {
428 return;
429 }
430 needsDir.set(dblBond->getIdx(), 0);
431 #if 0
432 std::cerr << "**********************\n";
433 std::cerr << "**********************\n";
434 std::cerr << "**********************\n";
435 std::cerr << "UDBN: " << dblBond->getIdx() << " "
436 << dblBond->getBeginAtomIdx() << "=" << dblBond->getEndAtomIdx()
437 << "\n";
438 #endif
439
440 ROMol::OEDGE_ITER beg, end;
441 std::vector<Bond *> followupBonds;
442
443 Bond *bond1 = nullptr, *obond1 = nullptr;
444 bool squiggleBondSeen = false;
445 bool doubleBondSeen = false;
446 boost::tie(beg, end) = mol.getAtomBonds(dblBond->getBeginAtom());
447 while (beg != end) {
448 Bond *tBond = mol[*beg];
449 if (tBond == dblBond) {
450 ++beg;
451 continue;
452 }
453 if (tBond->getBondType() == Bond::SINGLE ||
454 tBond->getBondType() == Bond::AROMATIC) {
455 // prefer bonds that already have their directionality set
456 // or that are adjacent to more double bonds:
457 if (!bond1) {
458 bond1 = tBond;
459 } else if (needsDir[tBond->getIdx()]) {
460 if (singleBondCounts[tBond->getIdx()] >
461 singleBondCounts[bond1->getIdx()]) {
462 obond1 = bond1;
463 bond1 = tBond;
464 } else {
465 obond1 = tBond;
466 }
467 } else {
468 obond1 = bond1;
469 bond1 = tBond;
470 }
471 } else if (tBond->getBondType() == Bond::DOUBLE) {
472 doubleBondSeen = true;
473 }
474 int explicit_unknown_stereo;
475 if ((tBond->getBondType() == Bond::SINGLE ||
476 tBond->getBondType() == Bond::AROMATIC) &&
477 (tBond->getBondDir() == Bond::UNKNOWN ||
478 ((tBond->getPropIfPresent<int>(common_properties::_UnknownStereo,
479 explicit_unknown_stereo) &&
480 explicit_unknown_stereo)))) {
481 squiggleBondSeen = true;
482 break;
483 }
484
485 ++beg;
486 }
487 // Don't do any direction setting if we've seen a squiggle bond, but do mark
488 // the double bond as a crossed bond and return
489 if (!bond1 || squiggleBondSeen || doubleBondSeen) {
490 if (!doubleBondSeen) {
491 // FIX: This is the fix for #2649, but it will need to be modified once we
492 // decide to properly handle allenese
493 dblBond->setBondDir(Bond::EITHERDOUBLE);
494 }
495 return;
496 }
497
498 Bond *bond2 = nullptr, *obond2 = nullptr;
499 boost::tie(beg, end) = mol.getAtomBonds(dblBond->getEndAtom());
500 while (beg != end) {
501 Bond *tBond = mol[*beg];
502 if (tBond == dblBond) {
503 ++beg;
504 continue;
505 }
506 if (tBond->getBondType() == Bond::SINGLE ||
507 tBond->getBondType() == Bond::AROMATIC) {
508 if (!bond2) {
509 bond2 = tBond;
510 } else if (needsDir[tBond->getIdx()]) {
511 if (singleBondCounts[tBond->getIdx()] >
512 singleBondCounts[bond2->getIdx()]) {
513 obond2 = bond2;
514 bond2 = tBond;
515 } else {
516 obond2 = tBond;
517 }
518 } else {
519 // we already had a bond2 and we don't need to set the direction
520 // on the new one, so swap.
521 obond2 = bond2;
522 bond2 = tBond;
523 }
524 } else if (tBond->getBondType() == Bond::DOUBLE) {
525 doubleBondSeen = true;
526 }
527 int explicit_unknown_stereo;
528 if (tBond->getBondType() == Bond::SINGLE &&
529 (tBond->getBondDir() == Bond::UNKNOWN ||
530 ((tBond->getPropIfPresent<int>(common_properties::_UnknownStereo,
531 explicit_unknown_stereo) &&
532 explicit_unknown_stereo)))) {
533 squiggleBondSeen = true;
534 break;
535 }
536 ++beg;
537 }
538 // Don't do any direction setting if we've seen a squiggle bond, but do mark
539 // the double bond as a crossed bond and return
540 if (!bond2 || squiggleBondSeen || doubleBondSeen) {
541 if (!doubleBondSeen) {
542 // FIX: This is the fix for #2649, but it will need to be modified once we
543 // decide to properly handle allenese
544 dblBond->setBondDir(Bond::EITHERDOUBLE);
545 }
546 return;
547 }
548
549 CHECK_INVARIANT(bond1 && bond2, "no bonds found");
550
551 bool sameTorsionDir = false;
552 if (conf) {
553 RDGeom::Point3D beginP = conf->getAtomPos(dblBond->getBeginAtomIdx());
554 RDGeom::Point3D endP = conf->getAtomPos(dblBond->getEndAtomIdx());
555 RDGeom::Point3D bond1P =
556 conf->getAtomPos(bond1->getOtherAtomIdx(dblBond->getBeginAtomIdx()));
557 RDGeom::Point3D bond2P =
558 conf->getAtomPos(bond2->getOtherAtomIdx(dblBond->getEndAtomIdx()));
559 // check for a linear arrangement of atoms on either end:
560 bool linear = false;
561 RDGeom::Point3D p1;
562 RDGeom::Point3D p2;
563 p1 = bond1P - beginP;
564 p2 = endP - beginP;
565 if (isLinearArrangement(p1, p2)) {
566 if (!obond1) {
567 linear = true;
568 } else {
569 // one of the bonds was linear; what about the other one?
570 Bond *tBond = bond1;
571 bond1 = obond1;
572 obond1 = tBond;
573 bond1P = conf->getAtomPos(
574 bond1->getOtherAtomIdx(dblBond->getBeginAtomIdx()));
575 p1 = bond1P - beginP;
576 if (isLinearArrangement(p1, p2)) {
577 linear = true;
578 }
579 }
580 }
581 if (!linear) {
582 p1 = bond2P - endP;
583 p2 = beginP - endP;
584 if (isLinearArrangement(p1, p2)) {
585 if (!obond2) {
586 linear = true;
587 } else {
588 Bond *tBond = bond2;
589 bond2 = obond2;
590 obond2 = tBond;
591 bond2P = conf->getAtomPos(
592 bond2->getOtherAtomIdx(dblBond->getEndAtomIdx()));
593 p1 = bond2P - beginP;
594 if (isLinearArrangement(p1, p2)) {
595 linear = true;
596 }
597 }
598 }
599 }
600 if (linear) {
601 dblBond->setBondDir(Bond::EITHERDOUBLE);
602 return;
603 }
604
605 double ang = RDGeom::computeDihedralAngle(bond1P, beginP, endP, bond2P);
606 if (ang < M_PI / 2) {
607 sameTorsionDir = false;
608 } else {
609 sameTorsionDir = true;
610 }
611 // std::cerr << " angle: " << ang << " sameTorsionDir: " << sameTorsionDir
612 // << "\n";
613 } else {
614 if (dblBond->getStereo() == Bond::STEREOCIS ||
615 dblBond->getStereo() == Bond::STEREOZ) {
616 sameTorsionDir = false;
617 } else if (dblBond->getStereo() == Bond::STEREOTRANS ||
618 dblBond->getStereo() == Bond::STEREOE) {
619 sameTorsionDir = true;
620 } else {
621 return;
622 }
623 // if bond1 or bond2 are not to the stereo-controlling atoms, flip
624 // our expections of the torsion dir
625 int bond1AtomIdx = bond1->getOtherAtomIdx(dblBond->getBeginAtomIdx());
626 if (bond1AtomIdx != dblBond->getStereoAtoms()[0] &&
627 bond1AtomIdx != dblBond->getStereoAtoms()[1]) {
628 sameTorsionDir = !sameTorsionDir;
629 }
630 int bond2AtomIdx = bond2->getOtherAtomIdx(dblBond->getEndAtomIdx());
631 if (bond2AtomIdx != dblBond->getStereoAtoms()[0] &&
632 bond2AtomIdx != dblBond->getStereoAtoms()[1]) {
633 sameTorsionDir = !sameTorsionDir;
634 }
635 }
636
637 /*
638 Time for some clarificatory text, because this gets really
639 confusing really fast.
640
641 The dihedral angle analysis above is based on viewing things
642 with an atom order as follows:
643
644 1
645 \
646 2 = 3
647 \
648 4
649
650 so dihedrals > 90 correspond to sameDir=true
651
652 however, the stereochemistry representation is
653 based on something more like this:
654
655 2
656 \
657 1 = 3
658 \
659 4
660 (i.e. we consider the direction-setting single bonds to be
661 starting at the double-bonded atom)
662
663 */
664 bool reverseBondDir = sameTorsionDir;
665
666 Atom *atom1 = dblBond->getBeginAtom(), *atom2 = dblBond->getEndAtom();
667 if (needsDir[bond1->getIdx()]) {
668 for (auto bidx : singleBondNbrs[bond1->getIdx()]) {
669 // std::cerr << " neighbor from: " << bond1->getIdx() << " " << bidx
670 // << ": " << needsDir[bidx] << std::endl;
671 if (needsDir[bidx]) {
672 followupBonds.push_back(mol.getBondWithIdx(bidx));
673 }
674 }
675 }
676 if (needsDir[bond2->getIdx()]) {
677 for (auto bidx : singleBondNbrs[bond2->getIdx()]) {
678 // std::cerr << " neighbor from: " << bond2->getIdx() << " " << bidx
679 // << ": " << needsDir[bidx] << std::endl;
680 if (needsDir[bidx]) {
681 followupBonds.push_back(mol.getBondWithIdx(bidx));
682 }
683 }
684 }
685 if (!needsDir[bond1->getIdx()]) {
686 if (!needsDir[bond2->getIdx()]) {
687 // check that we agree
688 } else {
689 if (bond1->getBeginAtom() != atom1) {
690 reverseBondDir = !reverseBondDir;
691 }
692 setBondDirRelativeToAtom(bond2, atom2, bond1->getBondDir(),
693 reverseBondDir, needsDir);
694 }
695 } else if (!needsDir[bond2->getIdx()]) {
696 if (bond2->getBeginAtom() != atom2) {
697 reverseBondDir = !reverseBondDir;
698 }
699 setBondDirRelativeToAtom(bond1, atom1, bond2->getBondDir(), reverseBondDir,
700 needsDir);
701 } else {
702 setBondDirRelativeToAtom(bond1, atom1, Bond::ENDDOWNRIGHT, false, needsDir);
703 setBondDirRelativeToAtom(bond2, atom2, Bond::ENDDOWNRIGHT, reverseBondDir,
704 needsDir);
705 }
706 needsDir[bond1->getIdx()] = 0;
707 needsDir[bond2->getIdx()] = 0;
708 if (obond1 && needsDir[obond1->getIdx()]) {
709 setBondDirRelativeToAtom(obond1, atom1, bond1->getBondDir(),
710 bond1->getBeginAtom() == atom1, needsDir);
711 needsDir[obond1->getIdx()] = 0;
712 }
713 if (obond2 && needsDir[obond2->getIdx()]) {
714 setBondDirRelativeToAtom(obond2, atom2, bond2->getBondDir(),
715 bond2->getBeginAtom() == atom2, needsDir);
716 needsDir[obond2->getIdx()] = 0;
717 }
718 #if 0
719 std::cerr << " 1:" << bond1->getIdx() << " ";
720 if (obond1)
721 std::cerr << obond1->getIdx() << std::endl;
722 else
723 std::cerr << "N/A" << std::endl;
724 std::cerr << " 2:" << bond2->getIdx() << " ";
725 if (obond2)
726 std::cerr << obond2->getIdx() << std::endl;
727 else
728 std::cerr << "N/A" << std::endl;
729 std::cerr << "**********************\n";
730 std::cerr << "**********************\n";
731 std::cerr << "**********************\n";
732 #endif
733 for (Bond *oDblBond : followupBonds) {
734 // std::cerr << "FOLLOWUP: " << oDblBond->getIdx() << " "
735 // << needsDir[oDblBond->getIdx()] << std::endl;
736 updateDoubleBondNeighbors(mol, oDblBond, conf, needsDir, singleBondCounts,
737 singleBondNbrs);
738 }
739 }
740
isBondCandidateForStereo(const Bond * bond)741 bool isBondCandidateForStereo(const Bond *bond) {
742 PRECONDITION(bond, "no bond");
743 if (bond->getBondType() == Bond::DOUBLE &&
744 bond->getStereo() != Bond::STEREOANY &&
745 bond->getBondDir() != Bond::EITHERDOUBLE &&
746 bond->getBeginAtom()->getDegree() > 1 &&
747 bond->getEndAtom()->getDegree() > 1 &&
748 shouldDetectDoubleBondStereo(bond)) {
749 return true;
750 }
751 return false;
752 }
753
findHighestCIPNeighbor(const Atom * atom,const Atom * skipAtom)754 const Atom *findHighestCIPNeighbor(const Atom *atom, const Atom *skipAtom) {
755 PRECONDITION(atom, "bad atom");
756
757 unsigned bestCipRank = 0;
758 const Atom *bestCipRankedAtom = nullptr;
759 const auto &mol = atom->getOwningMol();
760
761 for (const auto &index :
762 boost::make_iterator_range(mol.getAtomNeighbors(atom))) {
763 const auto neighbor = mol[index];
764 if (neighbor == skipAtom) {
765 continue;
766 }
767 unsigned cip = 0;
768 if (!neighbor->getPropIfPresent(common_properties::_CIPRank, cip)) {
769 // If at least one of the atoms doesn't have a CIP rank, the highest rank
770 // does not make sense, so return a nullptr.
771 return nullptr;
772 } else if (cip > bestCipRank || bestCipRankedAtom == nullptr) {
773 bestCipRank = cip;
774 bestCipRankedAtom = neighbor;
775 } else if (cip == bestCipRank) {
776 // This also doesn't make sense if there is a tie (if that's possible).
777 // We still keep the best CIP rank in case something better comes around
778 // (also not sure if that's possible).
779 BOOST_LOG(rdWarningLog)
780 << "Warning: duplicate CIP ranks found in findHighestCIPNeighbor()"
781 << std::endl;
782 bestCipRankedAtom = nullptr;
783 }
784 }
785 return bestCipRankedAtom;
786 }
787
788 } // namespace
789
790 namespace Chirality {
791 typedef std::pair<int, int> INT_PAIR;
792 typedef std::vector<INT_PAIR> INT_PAIR_VECT;
793 typedef std::vector<INT_PAIR>::iterator INT_PAIR_VECT_I;
794 typedef std::vector<INT_PAIR>::const_iterator INT_PAIR_VECT_CI;
795
796 typedef INT_VECT CIP_ENTRY;
797 typedef std::vector<CIP_ENTRY> CIP_ENTRY_VECT;
798
799 template <typename T>
debugVect(const std::vector<T> arg)800 void debugVect(const std::vector<T> arg) {
801 typename std::vector<T>::const_iterator viIt;
802 std::stringstream outS;
803 for (viIt = arg.begin(); viIt != arg.end(); viIt++) {
804 outS << *viIt << " ";
805 }
806 BOOST_LOG(rdDebugLog) << outS.str() << std::endl;
807 }
808
809 // --------------------------------------------------
810 //
811 // Calculates chiral invariants for the atoms of a molecule
812 // These are based on Labute's proposal in:
813 // "An Efficient Algorithm for the Determination of Topological
814 // RS Chirality" Journal of the CCG (1996)
815 //
816 // --------------------------------------------------
buildCIPInvariants(const ROMol & mol,DOUBLE_VECT & res)817 void buildCIPInvariants(const ROMol &mol, DOUBLE_VECT &res) {
818 PRECONDITION(res.size() >= mol.getNumAtoms(), "res vect too small");
819 int atsSoFar = 0;
820 //
821 // NOTE:
822 // If you make modifications to this, keep in mind that it is
823 // essential that the initial comparison of ranks behave properly.
824 // So, though it seems like it would makes sense to include
825 // information about the number of Hs (or charge, etc) in the CIP
826 // invariants, this will result in bad rankings. For example, in
827 // this molecule: OC[C@H](C)O, including the number of Hs would
828 // cause the methyl group (atom 3) to be ranked higher than the CH2
829 // connected to O (atom 1). This is totally wrong.
830 //
831 // We also don't include any pre-existing stereochemistry information.
832 // Though R and S assignments do factor in to the priorities of atoms,
833 // we're starting here from scratch and we'll let the R and S stuff
834 // be taken into account during the iterations.
835 //
836 for (ROMol::ConstAtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
837 ++atIt) {
838 const unsigned short nMassBits = 10;
839 const unsigned short maxMass = 1 << nMassBits;
840 Atom const *atom = *atIt;
841 unsigned long invariant = 0;
842 int num = atom->getAtomicNum() % 128;
843 // get an int with the deviation in the mass from the default:
844 int mass = 0;
845 if (atom->getIsotope()) {
846 mass =
847 atom->getIsotope() -
848 PeriodicTable::getTable()->getMostCommonIsotope(atom->getAtomicNum());
849 if (mass >= 0) {
850 mass += 1;
851 }
852 }
853 mass += maxMass / 2;
854 if (mass < 0) {
855 mass = 0;
856 } else {
857 mass = mass % maxMass;
858 }
859
860 #if 0
861 // NOTE: the inclusion of hybridization in the invariant (as
862 // suggested in the original paper), leads to the situation
863 // that
864 // C[C@@](O)(C=C)C(C)CC
865 // and
866 // C[C@@](O)(C=C)C(C)CO
867 // are assigned S chirality even though the rest of the world
868 // seems to agree that they ought to be R (atom 3, sp2, is ranked
869 // higher than atom 5, sp3, no matter what their environments)
870 int hyb=0;
871 switch(atom->getHybridization()) {
872 case Atom::SP: hyb=6;break;
873 case Atom::SP2: hyb=5;break;
874 case Atom::SP3: hyb=1;break;
875 case Atom::SP3D: hyb=3;break;
876 case Atom::SP3D2: hyb=2;break;
877 default: break;
878 }
879 #endif
880
881 invariant = num; // 7 bits here
882 invariant = (invariant << nMassBits) | mass;
883
884 int mapnum = -1;
885 atom->getPropIfPresent(common_properties::molAtomMapNumber, mapnum);
886 mapnum = (mapnum + 1) % 1024; // increment to allow map numbers of zero
887 // (though that would be stupid)
888 invariant = (invariant << 10) | mapnum;
889
890 res[atsSoFar++] = invariant;
891 }
892 }
893
iterateCIPRanks(const ROMol & mol,const DOUBLE_VECT & invars,UINT_VECT & ranks,bool seedWithInvars)894 void iterateCIPRanks(const ROMol &mol, const DOUBLE_VECT &invars,
895 UINT_VECT &ranks, bool seedWithInvars) {
896 PRECONDITION(invars.size() == mol.getNumAtoms(), "bad invars size");
897 PRECONDITION(ranks.size() >= mol.getNumAtoms(), "bad ranks size");
898
899 unsigned int numAtoms = mol.getNumAtoms();
900 CIP_ENTRY_VECT cipEntries(numAtoms);
901 for (auto &vec : cipEntries) {
902 vec.reserve(16);
903 }
904 #ifdef VERBOSE_CANON
905 BOOST_LOG(rdDebugLog) << "invariants:" << std::endl;
906 for (unsigned int i = 0; i < numAtoms; i++) {
907 BOOST_LOG(rdDebugLog) << i << ": " << invars[i] << std::endl;
908 }
909 #endif
910
911 // rank those:
912 Rankers::rankVect(invars, ranks);
913 #ifdef VERBOSE_CANON
914 BOOST_LOG(rdDebugLog) << "initial ranks:" << std::endl;
915 for (unsigned int i = 0; i < numAtoms; ++i) {
916 BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << std::endl;
917 }
918 #endif
919 // Start each atom's rank vector with its atomic number:
920 // Note: in general one should avoid the temptation to
921 // use invariants here, those lead to incorrect answers
922 for (unsigned int i = 0; i < numAtoms; i++) {
923 if (seedWithInvars) {
924 cipEntries[i].push_back(static_cast<int>(invars[i]));
925 } else {
926 cipEntries[i].push_back(mol[i]->getAtomicNum());
927 cipEntries[i].push_back(static_cast<int>(ranks[i]));
928 }
929 }
930
931 // Loop until either:
932 // 1) all classes are uniquified
933 // 2) the number of ranks doesn't change from one iteration to
934 // the next
935 // 3) we've gone through maxIts times
936 // maxIts is calculated by dividing the number of atoms
937 // by 2. That's a pessimal version of the
938 // maximum number of steps required for two atoms to
939 // "feel" each other (each influences one additional
940 // neighbor shell per iteration).
941 unsigned int maxIts = numAtoms / 2 + 1;
942 unsigned int numIts = 0;
943 int lastNumRanks = -1;
944 unsigned int numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1;
945 std::vector<unsigned int> counts(ranks.size());
946 std::vector<unsigned int> updatedNbrIdxs;
947 updatedNbrIdxs.reserve(8);
948 while (numRanks < numAtoms && numIts < maxIts &&
949 (lastNumRanks < 0 ||
950 static_cast<unsigned int>(lastNumRanks) < numRanks)) {
951 unsigned int longestEntry = 0;
952 // ----------------------------------------------------
953 //
954 // for each atom, get a sorted list of its neighbors' ranks:
955 //
956 for (unsigned int index = 0; index < numAtoms; ++index) {
957 // Note: counts is cleaned up when we drain into cipEntries.
958 updatedNbrIdxs.clear();
959
960 // start by pushing on our neighbors' ranks:
961 ROMol::OEDGE_ITER beg, end;
962 boost::tie(beg, end) = mol.getAtomBonds(mol[index]);
963 while (beg != end) {
964 const Bond *bond = mol[*beg];
965 ++beg;
966 unsigned int nbrIdx = bond->getOtherAtomIdx(index);
967 updatedNbrIdxs.push_back(nbrIdx);
968
969 // put the neighbor in 2N times where N is the bond order as a double.
970 // this is to treat aromatic linkages on fair footing. i.e. at least in
971 // the first iteration --c(:c):c and --C(=C)-C should look the same.
972 // this was part of issue 3009911
973
974 // a special case for chiral phosphorus compounds
975 // (this was leading to incorrect assignment of R/S labels ):
976 bool isChiralPhosphorusSpecialCase = false;
977 if (bond->getBondType() == Bond::DOUBLE) {
978 const Atom *nbr = mol[nbrIdx];
979 if (nbr->getAtomicNum() == 15) {
980 unsigned int nbrDeg = nbr->getDegree();
981 isChiralPhosphorusSpecialCase = nbrDeg == 3 || nbrDeg == 4;
982 }
983 };
984
985 // general justification of this is:
986 // Paragraph 2.2. in the 1966 article is "Valence-Bond Conventions:
987 // Multiple-Bond Unsaturation and Aromaticity". It contains several
988 // conventions of which convention (b) is the one applying here:
989 // "(b) Contributions by d orbitals to bonds of quadriligant atoms are
990 // neglected."
991 // FIX: this applies to more than just P
992 if (isChiralPhosphorusSpecialCase) {
993 counts[nbrIdx] += 1;
994 } else {
995 counts[nbrIdx] += getTwiceBondType(*bond);
996 }
997 }
998
999 // For each of our neighbors' ranks weighted by bond type, copy it N times
1000 // to our cipEntry in reverse rank order, where N is the weight.
1001 if (updatedNbrIdxs.size() > 1) { // compare vs 1 for performance.
1002 std::sort(std::begin(updatedNbrIdxs), std::end(updatedNbrIdxs),
1003 [&ranks](unsigned int idx1, unsigned int idx2) {
1004 return ranks[idx1] > ranks[idx2];
1005 });
1006 }
1007 auto &cipEntry = cipEntries[index];
1008 for (auto nbrIdx : updatedNbrIdxs) {
1009 unsigned int count = counts[nbrIdx];
1010 cipEntry.insert(cipEntry.end(), count, ranks[nbrIdx] + 1);
1011 counts[nbrIdx] = 0;
1012 }
1013 // add a zero for each coordinated H as long as we're not a query atom
1014 if (!mol[index]->hasQuery()) {
1015 cipEntry.insert(cipEntry.end(), mol[index]->getTotalNumHs(), 0);
1016 }
1017
1018 if (cipEntry.size() > longestEntry) {
1019 longestEntry = rdcast<unsigned int>(cipEntry.size());
1020 }
1021 }
1022 // ----------------------------------------------------
1023 //
1024 // pad the entries so that we compare rounds to themselves:
1025 //
1026 for (unsigned int index = 0; index < numAtoms; ++index) {
1027 auto sz = rdcast<unsigned int>(cipEntries[index].size());
1028 if (sz < longestEntry) {
1029 cipEntries[index].insert(cipEntries[index].end(), longestEntry - sz,
1030 -1);
1031 }
1032 }
1033 // ----------------------------------------------------
1034 //
1035 // sort the new ranks and update the list of active indices:
1036 //
1037 lastNumRanks = numRanks;
1038
1039 Rankers::rankVect(cipEntries, ranks);
1040 numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1;
1041
1042 // now truncate each vector and stick the rank at the end
1043 for (unsigned int i = 0; i < numAtoms; ++i) {
1044 cipEntries[i][numIts + 1] = ranks[i];
1045 cipEntries[i].erase(cipEntries[i].begin() + numIts + 2,
1046 cipEntries[i].end());
1047 }
1048
1049 ++numIts;
1050 #ifdef VERBOSE_CANON
1051 BOOST_LOG(rdDebugLog) << "strings and ranks:" << std::endl;
1052 for (unsigned int i = 0; i < numAtoms; i++) {
1053 BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << " > ";
1054 debugVect(cipEntries[i]);
1055 }
1056 #endif
1057 }
1058 }
1059 // Figure out the CIP ranks for the atoms of a molecule
assignAtomCIPRanks(const ROMol & mol,UINT_VECT & ranks)1060 void assignAtomCIPRanks(const ROMol &mol, UINT_VECT &ranks) {
1061 PRECONDITION((!ranks.size() || ranks.size() >= mol.getNumAtoms()),
1062 "bad ranks size");
1063 if (!ranks.size()) {
1064 ranks.resize(mol.getNumAtoms());
1065 }
1066 unsigned int numAtoms = mol.getNumAtoms();
1067 #ifndef USE_NEW_STEREOCHEMISTRY
1068 // get the initial invariants:
1069 DOUBLE_VECT invars(numAtoms, 0);
1070 buildCIPInvariants(mol, invars);
1071 iterateCIPRanks(mol, invars, ranks, false);
1072 #else
1073 Canon::chiralRankMolAtoms(mol, ranks);
1074 #endif
1075
1076 // copy the ranks onto the atoms:
1077 for (unsigned int i = 0; i < numAtoms; ++i) {
1078 mol[i]->setProp(common_properties::_CIPRank, ranks[i], 1);
1079 }
1080 }
1081
1082 // construct a vector with <atomIdx,direction> pairs for
1083 // neighbors of a given atom. This list will only be
1084 // non-empty if at least one of the bonds has its direction
1085 // set.
findAtomNeighborDirHelper(const ROMol & mol,const Atom * atom,const Bond * refBond,UINT_VECT & ranks,INT_PAIR_VECT & neighbors,bool & hasExplicitUnknownStereo)1086 void findAtomNeighborDirHelper(const ROMol &mol, const Atom *atom,
1087 const Bond *refBond, UINT_VECT &ranks,
1088 INT_PAIR_VECT &neighbors,
1089 bool &hasExplicitUnknownStereo) {
1090 PRECONDITION(atom, "bad atom");
1091 PRECONDITION(refBond, "bad bond");
1092
1093 bool seenDir = false;
1094 ROMol::OEDGE_ITER beg, end;
1095 boost::tie(beg, end) = mol.getAtomBonds(atom);
1096 while (beg != end) {
1097 const Bond *bond = mol[*beg];
1098 // check whether this bond is explicitly set to have unknown stereo
1099 if (!hasExplicitUnknownStereo) {
1100 int explicit_unknown_stereo;
1101 if (bond->getBondDir() == Bond::UNKNOWN // there's a squiggle bond
1102 || (bond->getPropIfPresent<int>(common_properties::_UnknownStereo,
1103 explicit_unknown_stereo) &&
1104 explicit_unknown_stereo)) {
1105 hasExplicitUnknownStereo = true;
1106 }
1107 }
1108
1109 Bond::BondDir dir = bond->getBondDir();
1110 if (bond->getIdx() != refBond->getIdx()) {
1111 if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) {
1112 seenDir = true;
1113 // If we're considering the bond "backwards", (i.e. from end
1114 // to beginning, reverse the effective direction:
1115 if (atom != bond->getBeginAtom()) {
1116 if (dir == Bond::ENDDOWNRIGHT) {
1117 dir = Bond::ENDUPRIGHT;
1118 } else {
1119 dir = Bond::ENDDOWNRIGHT;
1120 }
1121 }
1122 }
1123 Atom *nbrAtom = bond->getOtherAtom(atom);
1124 neighbors.push_back(std::make_pair(nbrAtom->getIdx(), dir));
1125 }
1126 ++beg;
1127 }
1128 if (!seenDir) {
1129 neighbors.clear();
1130 } else {
1131 if (neighbors.size() == 2 &&
1132 ranks[neighbors[0].first] == ranks[neighbors[1].first]) {
1133 // the two substituents are identical, no stereochemistry here:
1134 neighbors.clear();
1135 } else {
1136 // it's possible that direction was set only one of the bonds, set the
1137 // other
1138 // bond's direction to be reversed:
1139 if (neighbors[0].second != Bond::ENDDOWNRIGHT &&
1140 neighbors[0].second != Bond::ENDUPRIGHT) {
1141 CHECK_INVARIANT(neighbors.size() > 1, "too few neighbors");
1142 neighbors[0].second = neighbors[1].second == Bond::ENDDOWNRIGHT
1143 ? Bond::ENDUPRIGHT
1144 : Bond::ENDDOWNRIGHT;
1145 } else if (neighbors.size() > 1 &&
1146 neighbors[1].second != Bond::ENDDOWNRIGHT &&
1147 neighbors[1].second != Bond::ENDUPRIGHT) {
1148 neighbors[1].second = neighbors[0].second == Bond::ENDDOWNRIGHT
1149 ? Bond::ENDUPRIGHT
1150 : Bond::ENDDOWNRIGHT;
1151 }
1152 }
1153 }
1154 }
1155
1156 // find the neighbors for an atoms that are not connected by single bond that is
1157 // not refBond
1158 // if checkDir is true only neighbor atoms with bonds marked with a direction
1159 // will be returned
findAtomNeighborsHelper(const ROMol & mol,const Atom * atom,const Bond * refBond,UINT_VECT & neighbors,bool checkDir=false,bool includeAromatic=false)1160 void findAtomNeighborsHelper(const ROMol &mol, const Atom *atom,
1161 const Bond *refBond, UINT_VECT &neighbors,
1162 bool checkDir = false,
1163 bool includeAromatic = false) {
1164 PRECONDITION(atom, "bad atom");
1165 PRECONDITION(refBond, "bad bond");
1166 neighbors.clear();
1167 ROMol::OEDGE_ITER beg, end;
1168 boost::tie(beg, end) = mol.getAtomBonds(atom);
1169 while (beg != end) {
1170 const Bond *bond = mol[*beg];
1171 Bond::BondDir dir = bond->getBondDir();
1172 if ((bond->getBondType() == Bond::SINGLE ||
1173 (includeAromatic && bond->getBondType() == Bond::AROMATIC)) &&
1174 bond->getIdx() != refBond->getIdx()) {
1175 if (checkDir) {
1176 if ((dir != Bond::ENDDOWNRIGHT) && (dir != Bond::ENDUPRIGHT)) {
1177 ++beg;
1178 continue;
1179 }
1180 }
1181 Atom *nbrAtom = bond->getOtherAtom(atom);
1182 neighbors.push_back(nbrAtom->getIdx());
1183 }
1184 ++beg;
1185 }
1186 }
1187
1188 // conditions for an atom to be a candidate for ring stereochem:
1189 // 1) two non-ring neighbors that have different ranks
1190 // 2) one non-ring neighbor and two ring neighbors (the ring neighbors will
1191 // have the same rank)
1192 // 3) four ring neighbors with three different ranks
1193 // 4) three ring neighbors with two different ranks
1194 // example for this last one: C[C@H]1CC2CCCC3CCCC(C1)[C@@H]23
1195 // Note that N atoms are only candidates if they are in a 3-ring
atomIsCandidateForRingStereochem(const ROMol & mol,const Atom * atom)1196 bool atomIsCandidateForRingStereochem(const ROMol &mol, const Atom *atom) {
1197 PRECONDITION(atom, "bad atom");
1198 bool res = false;
1199 std::set<unsigned int> nbrRanks;
1200 if (!atom->getPropIfPresent(common_properties::_ringStereochemCand, res)) {
1201 const RingInfo *ringInfo = mol.getRingInfo();
1202 if (ringInfo->isInitialized() && ringInfo->numAtomRings(atom->getIdx())) {
1203 // three-coordinate N additional requirements:
1204 // in a ring of size 3 (from InChI)
1205 // OR
1206 // a bridgehead (RDKit extension)
1207 if (atom->getAtomicNum() == 7 && atom->getDegree() == 3 &&
1208 !ringInfo->isAtomInRingOfSize(atom->getIdx(), 3) &&
1209 !queryIsAtomBridgehead(atom)) {
1210 return false;
1211 }
1212 ROMol::OEDGE_ITER beg, end;
1213 boost::tie(beg, end) = mol.getAtomBonds(atom);
1214 std::vector<const Atom *> nonRingNbrs;
1215 std::vector<const Atom *> ringNbrs;
1216 while (beg != end) {
1217 const Bond *bond = mol[*beg];
1218 if (!ringInfo->numBondRings(bond->getIdx())) {
1219 nonRingNbrs.push_back(bond->getOtherAtom(atom));
1220 } else {
1221 const Atom *nbr = bond->getOtherAtom(atom);
1222 ringNbrs.push_back(nbr);
1223 unsigned int rnk = 0;
1224 nbr->getPropIfPresent(common_properties::_CIPRank, rnk);
1225 nbrRanks.insert(rnk);
1226 }
1227 ++beg;
1228 }
1229 unsigned int rank1 = 0, rank2 = 0;
1230 switch (nonRingNbrs.size()) {
1231 case 2:
1232 if (nonRingNbrs[0]->getPropIfPresent(common_properties::_CIPRank,
1233 rank1) &&
1234 nonRingNbrs[1]->getPropIfPresent(common_properties::_CIPRank,
1235 rank2)) {
1236 if (rank1 == rank2) {
1237 res = false;
1238 } else {
1239 res = true;
1240 }
1241 }
1242 break;
1243 case 1:
1244 if (ringNbrs.size() >= 2) {
1245 res = true;
1246 }
1247 break;
1248 case 0:
1249 if (ringNbrs.size() == 4 && nbrRanks.size() == 3) {
1250 res = true;
1251 } else if (ringNbrs.size() == 3 && nbrRanks.size() == 2) {
1252 res = true;
1253 } else {
1254 res = false;
1255 }
1256 break;
1257 default:
1258 res = false;
1259 }
1260 }
1261 atom->setProp(common_properties::_ringStereochemCand, res, 1);
1262 }
1263 return res;
1264 }
1265
1266 // finds all possible chiral special cases.
1267 // at the moment this is just candidates for ring stereochemistry
findChiralAtomSpecialCases(ROMol & mol,boost::dynamic_bitset<> & possibleSpecialCases)1268 void findChiralAtomSpecialCases(ROMol &mol,
1269 boost::dynamic_bitset<> &possibleSpecialCases) {
1270 PRECONDITION(possibleSpecialCases.size() >= mol.getNumAtoms(),
1271 "bit vector too small");
1272 possibleSpecialCases.reset();
1273 if (!mol.getRingInfo()->isInitialized()) {
1274 VECT_INT_VECT sssrs;
1275 MolOps::symmetrizeSSSR(mol, sssrs);
1276 }
1277 boost::dynamic_bitset<> atomsSeen(mol.getNumAtoms());
1278 boost::dynamic_bitset<> atomsUsed(mol.getNumAtoms());
1279 boost::dynamic_bitset<> bondsSeen(mol.getNumBonds());
1280
1281 for (ROMol::AtomIterator ait = mol.beginAtoms(); ait != mol.endAtoms();
1282 ++ait) {
1283 const Atom *atom = *ait;
1284 if (atomsSeen[atom->getIdx()]) {
1285 continue;
1286 }
1287 if (atom->getChiralTag() == Atom::CHI_UNSPECIFIED ||
1288 atom->hasProp(common_properties::_CIPCode) ||
1289 !mol.getRingInfo()->numAtomRings(atom->getIdx()) ||
1290 !atomIsCandidateForRingStereochem(mol, atom)) {
1291 continue;
1292 }
1293 // do a BFS from this ring atom along ring bonds and find other
1294 // stereochemistry candidates.
1295 std::list<const Atom *> nextAtoms;
1296 // start with finding viable neighbors
1297 ROMol::OEDGE_ITER beg, end;
1298 boost::tie(beg, end) = mol.getAtomBonds(atom);
1299 while (beg != end) {
1300 unsigned int bidx = mol[*beg]->getIdx();
1301 if (!bondsSeen[bidx]) {
1302 bondsSeen.set(bidx);
1303 if (mol.getRingInfo()->numBondRings(bidx)) {
1304 const Atom *oatom = mol[*beg]->getOtherAtom(atom);
1305 if (!atomsSeen[oatom->getIdx()]) {
1306 nextAtoms.push_back(oatom);
1307 atomsUsed.set(oatom->getIdx());
1308 }
1309 }
1310 }
1311 ++beg;
1312 }
1313 INT_VECT ringStereoAtoms(0);
1314 if (!nextAtoms.empty()) {
1315 atom->getPropIfPresent(common_properties::_ringStereoAtoms,
1316 ringStereoAtoms);
1317 }
1318
1319 while (!nextAtoms.empty()) {
1320 const Atom *ratom = nextAtoms.front();
1321 nextAtoms.pop_front();
1322 atomsSeen.set(ratom->getIdx());
1323 if (ratom->getChiralTag() != Atom::CHI_UNSPECIFIED &&
1324 !ratom->hasProp(common_properties::_CIPCode) &&
1325 atomIsCandidateForRingStereochem(mol, ratom)) {
1326 int same = (ratom->getChiralTag() == atom->getChiralTag()) ? 1 : -1;
1327 ringStereoAtoms.push_back(same * (ratom->getIdx() + 1));
1328 INT_VECT oringatoms(0);
1329 ratom->getPropIfPresent(common_properties::_ringStereoAtoms,
1330 oringatoms);
1331 oringatoms.push_back(same * (atom->getIdx() + 1));
1332 ratom->setProp(common_properties::_ringStereoAtoms, oringatoms, true);
1333 possibleSpecialCases.set(ratom->getIdx());
1334 possibleSpecialCases.set(atom->getIdx());
1335 }
1336 // now push this atom's neighbors
1337 boost::tie(beg, end) = mol.getAtomBonds(ratom);
1338 while (beg != end) {
1339 unsigned int bidx = mol[*beg]->getIdx();
1340 if (!bondsSeen[bidx]) {
1341 bondsSeen.set(bidx);
1342 if (mol.getRingInfo()->numBondRings(bidx)) {
1343 const Atom *oatom = mol[*beg]->getOtherAtom(ratom);
1344 if (!atomsSeen[oatom->getIdx()] && !atomsUsed[oatom->getIdx()]) {
1345 nextAtoms.push_back(oatom);
1346 atomsUsed.set(oatom->getIdx());
1347 }
1348 }
1349 }
1350 ++beg;
1351 }
1352 } // end of BFS
1353 if (ringStereoAtoms.size() != 0) {
1354 atom->setProp(common_properties::_ringStereoAtoms, ringStereoAtoms, true);
1355 // because we're only going to hit each ring atom once, the first atom we
1356 // encounter in a ring is going to end up with all the other atoms set as
1357 // stereoAtoms, but each of them will only have the first atom present. We
1358 // need to fix that. because the traverse from the first atom only
1359 // followed ring bonds, these things are all by definition in one ring
1360 // system. (Q: is this true if there's a spiro center in there?)
1361 INT_VECT same(mol.getNumAtoms(), 0);
1362 for (auto ringAtomEntry : ringStereoAtoms) {
1363 int ringAtomIdx =
1364 ringAtomEntry < 0 ? -ringAtomEntry - 1 : ringAtomEntry - 1;
1365 same[ringAtomIdx] = ringAtomEntry;
1366 }
1367 for (INT_VECT_CI rae = ringStereoAtoms.begin();
1368 rae != ringStereoAtoms.end(); ++rae) {
1369 int ringAtomEntry = *rae;
1370 int ringAtomIdx =
1371 ringAtomEntry < 0 ? -ringAtomEntry - 1 : ringAtomEntry - 1;
1372 INT_VECT lringatoms(0);
1373 mol.getAtomWithIdx(ringAtomIdx)
1374 ->getPropIfPresent(common_properties::_ringStereoAtoms, lringatoms);
1375 CHECK_INVARIANT(lringatoms.size() > 0, "no other ring atoms found.");
1376 for (auto orae = rae + 1; orae != ringStereoAtoms.end(); ++orae) {
1377 int oringAtomEntry = *orae;
1378 int oringAtomIdx =
1379 oringAtomEntry < 0 ? -oringAtomEntry - 1 : oringAtomEntry - 1;
1380 int theseDifferent = (ringAtomEntry < 0) ^ (oringAtomEntry < 0);
1381 lringatoms.push_back(theseDifferent ? -(oringAtomIdx + 1)
1382 : (oringAtomIdx + 1));
1383 INT_VECT olringatoms(0);
1384 mol.getAtomWithIdx(oringAtomIdx)
1385 ->getPropIfPresent(common_properties::_ringStereoAtoms,
1386 olringatoms);
1387 CHECK_INVARIANT(olringatoms.size() > 0, "no other ring atoms found.");
1388 olringatoms.push_back(theseDifferent ? -(ringAtomIdx + 1)
1389 : (ringAtomIdx + 1));
1390 mol.getAtomWithIdx(oringAtomIdx)
1391 ->setProp(common_properties::_ringStereoAtoms, olringatoms);
1392 }
1393 mol.getAtomWithIdx(ringAtomIdx)
1394 ->setProp(common_properties::_ringStereoAtoms, lringatoms);
1395 }
1396
1397 } else {
1398 possibleSpecialCases.reset(atom->getIdx());
1399 }
1400 atomsSeen.set(atom->getIdx());
1401 }
1402 }
1403
isAtomPotentialChiralCenter(const Atom * atom,const ROMol & mol,const UINT_VECT & ranks,Chirality::INT_PAIR_VECT & nbrs)1404 std::pair<bool, bool> isAtomPotentialChiralCenter(
1405 const Atom *atom, const ROMol &mol, const UINT_VECT &ranks,
1406 Chirality::INT_PAIR_VECT &nbrs) {
1407 // loop over all neighbors and form a decorated list of their
1408 // ranks:
1409 bool legalCenter = true;
1410 bool hasDupes = false;
1411
1412 if (atom->getTotalDegree() > 4) {
1413 // we only know tetrahedral chirality
1414 legalCenter = false;
1415 } else {
1416 // cases we can exclude immediately without having to look at neighbors
1417 // ranks:
1418 if (atom->getTotalDegree() < 3) {
1419 legalCenter = false;
1420 } else if (atom->getDegree() < 3 &&
1421 (atom->getAtomicNum() != 15 && atom->getAtomicNum() != 33)) {
1422 // less than three neighbors is never stereogenic
1423 // unless it is a phosphine/arsine with implicit H (this is from InChI)
1424 legalCenter = false;
1425 } else if (atom->getDegree() == 3 && atom->getTotalNumHs() != 1) {
1426 // assume something that's really three coordinate isn't potentially
1427 // chiral, then look for exceptions
1428 legalCenter = false;
1429 if (atom->getAtomicNum() == 7) {
1430 // three-coordinate N additional requirements:
1431 // in a ring of size 3 (from InChI)
1432 // OR
1433 /// is a bridgehead atom (RDKit extension)
1434 if (mol.getRingInfo()->isAtomInRingOfSize(atom->getIdx(), 3) ||
1435 queryIsAtomBridgehead(atom)) {
1436 legalCenter = true;
1437 }
1438 } else if (atom->getAtomicNum() == 15 || atom->getAtomicNum() == 33) {
1439 // three-coordinate phosphines and arsines
1440 // are always treated as stereogenic even with H atom neighbors.
1441 // (this is from InChI)
1442 legalCenter = true;
1443 } else if (atom->getAtomicNum() == 16 || atom->getAtomicNum() == 34) {
1444 if (atom->getExplicitValence() == 4 ||
1445 (atom->getExplicitValence() == 3 && atom->getFormalCharge() == 1)) {
1446 // we also accept sulfur or selenium with either a positive charge
1447 // or a double bond:
1448 legalCenter = true;
1449 }
1450 }
1451 }
1452
1453 if (legalCenter) {
1454 boost::dynamic_bitset<> codesSeen(mol.getNumAtoms());
1455 ROMol::OEDGE_ITER beg, end;
1456 boost::tie(beg, end) = mol.getAtomBonds(atom);
1457 while (beg != end) {
1458 unsigned int otherIdx = mol[*beg]->getOtherAtom(atom)->getIdx();
1459 CHECK_INVARIANT(ranks[otherIdx] < mol.getNumAtoms(),
1460 "CIP rank higher than the number of atoms.");
1461 // watch for neighbors with duplicate ranks, which would mean
1462 // that we cannot be chiral:
1463 if (codesSeen[ranks[otherIdx]]) {
1464 // we've already seen this code, it's a dupe
1465 hasDupes = true;
1466 break;
1467 }
1468 codesSeen[ranks[otherIdx]] = 1;
1469 nbrs.push_back(std::make_pair(ranks[otherIdx], mol[*beg]->getIdx()));
1470 ++beg;
1471 }
1472 }
1473 }
1474 return std::make_pair(legalCenter, hasDupes);
1475 }
1476
1477 // returns a pair:
1478 // 1) are there unassigned stereoatoms
1479 // 2) did we assign any?
assignAtomChiralCodes(ROMol & mol,UINT_VECT & ranks,bool flagPossibleStereoCenters)1480 std::pair<bool, bool> assignAtomChiralCodes(ROMol &mol, UINT_VECT &ranks,
1481 bool flagPossibleStereoCenters) {
1482 PRECONDITION((!ranks.size() || ranks.size() == mol.getNumAtoms()),
1483 "bad rank vector size");
1484 bool atomChanged = false;
1485 unsigned int unassignedAtoms = 0;
1486
1487 // ------------------
1488 // now loop over each atom and, if it's marked as chiral,
1489 // figure out the appropriate CIP label:
1490 for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
1491 ++atIt) {
1492 Atom *atom = *atIt;
1493 Atom::ChiralType tag = atom->getChiralTag();
1494
1495 // only worry about this atom if it has a marked chirality
1496 // we understand:
1497 if (flagPossibleStereoCenters ||
1498 (tag != Atom::CHI_UNSPECIFIED && tag != Atom::CHI_OTHER)) {
1499 if (atom->hasProp(common_properties::_CIPCode)) {
1500 continue;
1501 }
1502
1503 if (!ranks.size()) {
1504 // if we need to, get the "CIP" ranking of each atom:
1505 assignAtomCIPRanks(mol, ranks);
1506 }
1507 Chirality::INT_PAIR_VECT nbrs;
1508 bool legalCenter, hasDupes;
1509 // note that hasDupes is only evaluated if legalCenter==true
1510 boost::tie(legalCenter, hasDupes) =
1511 isAtomPotentialChiralCenter(atom, mol, ranks, nbrs);
1512 if (legalCenter) {
1513 ++unassignedAtoms;
1514 }
1515 if (legalCenter && !hasDupes && flagPossibleStereoCenters) {
1516 atom->setProp(common_properties::_ChiralityPossible, 1);
1517 }
1518
1519 if (legalCenter && !hasDupes && tag != Atom::CHI_UNSPECIFIED &&
1520 tag != Atom::CHI_OTHER) {
1521 // stereochem is possible and we have no duplicate neighbors, assign
1522 // a CIP code:
1523 atomChanged = true;
1524 --unassignedAtoms;
1525
1526 // sort the list of neighbors by their CIP ranks:
1527 std::sort(nbrs.begin(), nbrs.end(), Rankers::pairLess<int, int>());
1528
1529 // collect the list of neighbor indices:
1530 std::list<int> nbrIndices;
1531 for (Chirality::INT_PAIR_VECT_CI nbrIt = nbrs.begin();
1532 nbrIt != nbrs.end(); ++nbrIt) {
1533 nbrIndices.push_back((*nbrIt).second);
1534 }
1535 // ask the atom how many swaps we have to make:
1536 int nSwaps = atom->getPerturbationOrder(nbrIndices);
1537
1538 // if the atom has 3 neighbors and a hydrogen, add a swap:
1539 if (nbrIndices.size() == 3 && atom->getTotalNumHs() == 1) {
1540 ++nSwaps;
1541 }
1542
1543 // if that number is odd, we'll change our chirality:
1544 if (nSwaps % 2) {
1545 if (tag == Atom::CHI_TETRAHEDRAL_CCW) {
1546 tag = Atom::CHI_TETRAHEDRAL_CW;
1547 } else {
1548 tag = Atom::CHI_TETRAHEDRAL_CCW;
1549 }
1550 }
1551 // now assign the CIP code:
1552 std::string cipCode;
1553 if (tag == Atom::CHI_TETRAHEDRAL_CCW) {
1554 cipCode = "S";
1555 } else {
1556 cipCode = "R";
1557 }
1558 atom->setProp(common_properties::_CIPCode, cipCode);
1559 }
1560 }
1561 }
1562 return std::make_pair((unassignedAtoms > 0), atomChanged);
1563 }
1564
1565 // returns a pair:
1566 // 1) are there unassigned stereo bonds?
1567 // 2) did we assign any?
assignBondStereoCodes(ROMol & mol,UINT_VECT & ranks)1568 std::pair<bool, bool> assignBondStereoCodes(ROMol &mol, UINT_VECT &ranks) {
1569 PRECONDITION((!ranks.size() || ranks.size() == mol.getNumAtoms()),
1570 "bad rank vector size");
1571 bool assignedABond = false;
1572 unsigned int unassignedBonds = 0;
1573 boost::dynamic_bitset<> bondsToClear(mol.getNumBonds());
1574 // find the double bonds:
1575 for (ROMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds();
1576 ++bondIt) {
1577 if ((*bondIt)->getBondType() == Bond::DOUBLE) {
1578 Bond *dblBond = *bondIt;
1579 if (dblBond->getStereo() != Bond::STEREONONE) {
1580 continue;
1581 }
1582 if (!ranks.size()) {
1583 assignAtomCIPRanks(mol, ranks);
1584 }
1585 dblBond->getStereoAtoms().clear();
1586
1587 // at the moment we are ignoring stereochem on ring bonds with less than
1588 // 8
1589 // members.
1590 if (shouldDetectDoubleBondStereo(dblBond)) {
1591 const Atom *begAtom = dblBond->getBeginAtom();
1592 const Atom *endAtom = dblBond->getEndAtom();
1593 // we're only going to handle 2 or three coordinate atoms:
1594 if ((begAtom->getDegree() == 2 || begAtom->getDegree() == 3) &&
1595 (endAtom->getDegree() == 2 || endAtom->getDegree() == 3)) {
1596 ++unassignedBonds;
1597
1598 // look around each atom and see if it has at least one bond with
1599 // direction marked:
1600
1601 // the pairs here are: atomrank,bonddir
1602 Chirality::INT_PAIR_VECT begAtomNeighbors, endAtomNeighbors;
1603 bool hasExplicitUnknownStereo = false;
1604 int bgn_stereo = false, end_stereo = false;
1605 if ((dblBond->getBeginAtom()->getPropIfPresent(
1606 common_properties::_UnknownStereo, bgn_stereo) &&
1607 bgn_stereo) ||
1608 (dblBond->getEndAtom()->getPropIfPresent(
1609 common_properties::_UnknownStereo, end_stereo) &&
1610 end_stereo)) {
1611 hasExplicitUnknownStereo = true;
1612 }
1613 Chirality::findAtomNeighborDirHelper(mol, begAtom, dblBond, ranks,
1614 begAtomNeighbors,
1615 hasExplicitUnknownStereo);
1616 Chirality::findAtomNeighborDirHelper(mol, endAtom, dblBond, ranks,
1617 endAtomNeighbors,
1618 hasExplicitUnknownStereo);
1619
1620 if (begAtomNeighbors.size() && endAtomNeighbors.size()) {
1621 // Each atom has at least one neighboring bond with marked
1622 // directionality. Find the highest-ranked directionality
1623 // on each side:
1624
1625 int begDir, endDir, endNbrAid, begNbrAid;
1626 if (begAtomNeighbors.size() == 1 ||
1627 ranks[begAtomNeighbors[0].first] >
1628 ranks[begAtomNeighbors[1].first]) {
1629 begDir = begAtomNeighbors[0].second;
1630 begNbrAid = begAtomNeighbors[0].first;
1631 } else {
1632 begDir = begAtomNeighbors[1].second;
1633 begNbrAid = begAtomNeighbors[1].first;
1634 }
1635 if (endAtomNeighbors.size() == 1 ||
1636 ranks[endAtomNeighbors[0].first] >
1637 ranks[endAtomNeighbors[1].first]) {
1638 endDir = endAtomNeighbors[0].second;
1639 endNbrAid = endAtomNeighbors[0].first;
1640 } else {
1641 endDir = endAtomNeighbors[1].second;
1642 endNbrAid = endAtomNeighbors[1].first;
1643 }
1644
1645 bool conflictingBegin =
1646 (begAtomNeighbors.size() == 2 &&
1647 begAtomNeighbors[0].second == begAtomNeighbors[1].second);
1648 bool conflictingEnd =
1649 (endAtomNeighbors.size() == 2 &&
1650 endAtomNeighbors[0].second == endAtomNeighbors[1].second);
1651 if (conflictingBegin || conflictingEnd) {
1652 dblBond->setStereo(Bond::STEREONONE);
1653 BOOST_LOG(rdWarningLog) << "Conflicting single bond directions "
1654 "around double bond at index "
1655 << dblBond->getIdx() << "." << std::endl;
1656 BOOST_LOG(rdWarningLog) << " BondStereo set to STEREONONE and "
1657 "single bond directions set to NONE."
1658 << std::endl;
1659 assignedABond = true;
1660 if (conflictingBegin) {
1661 bondsToClear[mol.getBondBetweenAtoms(begAtomNeighbors[0].first,
1662 begAtom->getIdx())
1663 ->getIdx()] = 1;
1664 bondsToClear[mol.getBondBetweenAtoms(begAtomNeighbors[1].first,
1665 begAtom->getIdx())
1666 ->getIdx()] = 1;
1667 }
1668 if (conflictingEnd) {
1669 bondsToClear[mol.getBondBetweenAtoms(endAtomNeighbors[0].first,
1670 endAtom->getIdx())
1671 ->getIdx()] = 1;
1672 bondsToClear[mol.getBondBetweenAtoms(endAtomNeighbors[1].first,
1673 endAtom->getIdx())
1674 ->getIdx()] = 1;
1675 }
1676 } else {
1677 dblBond->getStereoAtoms().push_back(begNbrAid);
1678 dblBond->getStereoAtoms().push_back(endNbrAid);
1679 if (hasExplicitUnknownStereo) {
1680 dblBond->setStereo(Bond::STEREOANY);
1681 assignedABond = true;
1682 } else if (begDir == endDir) {
1683 // In findAtomNeighborDirHelper, we've set up the
1684 // bond directions here so that they correspond to
1685 // having both single bonds START at the double bond.
1686 // This means that if the single bonds point in the same
1687 // direction, the bond is cis, "Z"
1688 dblBond->setStereo(Bond::STEREOZ);
1689 assignedABond = true;
1690 } else {
1691 dblBond->setStereo(Bond::STEREOE);
1692 assignedABond = true;
1693 }
1694 }
1695 --unassignedBonds;
1696 }
1697 }
1698 }
1699 }
1700 }
1701
1702 for (unsigned int i = 0; i < mol.getNumBonds(); ++i) {
1703 if (bondsToClear[i]) {
1704 mol.getBondWithIdx(i)->setBondDir(Bond::NONE);
1705 }
1706 }
1707
1708 return std::make_pair(unassignedBonds > 0, assignedABond);
1709 }
1710
1711 // reassign atom ranks by supplementing the current ranks
1712 // with information about known chirality
rerankAtoms(const ROMol & mol,UINT_VECT & ranks)1713 void rerankAtoms(const ROMol &mol, UINT_VECT &ranks) {
1714 PRECONDITION(ranks.size() == mol.getNumAtoms(), "bad rank vector size");
1715 unsigned int factor = 100;
1716 while (factor < mol.getNumAtoms()) {
1717 factor *= 10;
1718 }
1719
1720 #ifdef VERBOSE_CANON
1721 BOOST_LOG(rdDebugLog) << "rerank PRE: " << std::endl;
1722 for (int i = 0; i < mol.getNumAtoms(); i++) {
1723 BOOST_LOG(rdDebugLog) << " " << i << ": " << ranks[i] << std::endl;
1724 }
1725 #endif
1726
1727 DOUBLE_VECT invars(mol.getNumAtoms());
1728 // and now supplement them:
1729 for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
1730 invars[i] = ranks[i] * factor;
1731 const Atom *atom = mol.getAtomWithIdx(i);
1732 // Priority order: R > S > nothing
1733 std::string cipCode;
1734 if (atom->getPropIfPresent(common_properties::_CIPCode, cipCode)) {
1735 if (cipCode == "S") {
1736 invars[i] += 10;
1737 } else if (cipCode == "R") {
1738 invars[i] += 20;
1739 }
1740 }
1741 ROMol::OEDGE_ITER beg, end;
1742 boost::tie(beg, end) = mol.getAtomBonds(atom);
1743 while (beg != end) {
1744 const Bond *oBond = mol[*beg];
1745 if (oBond->getBondType() == Bond::DOUBLE) {
1746 if (oBond->getStereo() == Bond::STEREOE) {
1747 invars[i] += 1;
1748 } else if (oBond->getStereo() == Bond::STEREOZ) {
1749 invars[i] += 2;
1750 }
1751 }
1752 ++beg;
1753 }
1754 }
1755 iterateCIPRanks(mol, invars, ranks, true);
1756 // copy the ranks onto the atoms:
1757 for (unsigned int i = 0; i < mol.getNumAtoms(); i++) {
1758 mol.getAtomWithIdx(i)->setProp(common_properties::_CIPRank, ranks[i]);
1759 }
1760
1761 #ifdef VERBOSE_CANON
1762 BOOST_LOG(rdDebugLog) << " post: " << std::endl;
1763 for (int i = 0; i < mol.getNumAtoms(); i++) {
1764 BOOST_LOG(rdDebugLog) << " " << i << ": " << ranks[i] << std::endl;
1765 }
1766 #endif
1767 }
1768
hasStereoBondDir(const Bond * bond)1769 bool hasStereoBondDir(const Bond *bond) {
1770 PRECONDITION(bond, "no bond");
1771 return bond->getBondDir() == Bond::BondDir::ENDDOWNRIGHT ||
1772 bond->getBondDir() == Bond::BondDir::ENDUPRIGHT;
1773 }
1774
getNeighboringDirectedBond(const ROMol & mol,const Atom * atom)1775 const Bond *getNeighboringDirectedBond(const ROMol &mol, const Atom *atom) {
1776 PRECONDITION(atom, "no atom");
1777 for (const auto &bondIdx :
1778 boost::make_iterator_range(mol.getAtomBonds(atom))) {
1779 const Bond *bond = mol[bondIdx];
1780
1781 if (bond->getBondType() != Bond::BondType::DOUBLE &&
1782 hasStereoBondDir(bond)) {
1783 return bond;
1784 }
1785 }
1786 return nullptr;
1787 }
1788
translateEZLabelToCisTrans(Bond::BondStereo label)1789 Bond::BondStereo translateEZLabelToCisTrans(Bond::BondStereo label) {
1790 switch (label) {
1791 case Bond::STEREOE:
1792 return Bond::STEREOTRANS;
1793 case Bond::STEREOZ:
1794 return Bond::STEREOCIS;
1795 default:
1796 return label;
1797 }
1798 }
1799
findStereoAtoms(const Bond * bond)1800 INT_VECT findStereoAtoms(const Bond *bond) {
1801 PRECONDITION(bond, "bad bond");
1802 PRECONDITION(bond->hasOwningMol(), "no mol");
1803 PRECONDITION(bond->getBondType() == Bond::DOUBLE, "not double bond");
1804 PRECONDITION(bond->getStereo() > Bond::BondStereo::STEREOANY,
1805 "no defined stereo");
1806
1807 if (!bond->getStereoAtoms().empty()) {
1808 return bond->getStereoAtoms();
1809 }
1810 if (bond->getStereo() == Bond::BondStereo::STEREOE ||
1811 bond->getStereo() == Bond::BondStereo::STEREOZ) {
1812 const Atom *startStereoAtom =
1813 findHighestCIPNeighbor(bond->getBeginAtom(), bond->getEndAtom());
1814 const Atom *endStereoAtom =
1815 findHighestCIPNeighbor(bond->getEndAtom(), bond->getBeginAtom());
1816
1817 if (startStereoAtom == nullptr || endStereoAtom == nullptr) {
1818 return {};
1819 }
1820
1821 int startStereoAtomIdx = static_cast<int>(startStereoAtom->getIdx());
1822 int endStereoAtomIdx = static_cast<int>(endStereoAtom->getIdx());
1823
1824 return {startStereoAtomIdx, endStereoAtomIdx};
1825 } else {
1826 BOOST_LOG(rdWarningLog) << "Unable to assign stereo atoms for bond "
1827 << bond->getIdx() << std::endl;
1828 return {};
1829 }
1830 }
1831
1832 } // namespace Chirality
1833
1834 namespace MolOps {
1835
1836 /*
1837 We're going to do this iteratively:
1838 1) assign atom stereochemistry
1839 2) assign bond stereochemistry
1840 3) if there are still unresolved atoms or bonds
1841 repeat the above steps as necessary
1842 */
assignStereochemistry(ROMol & mol,bool cleanIt,bool force,bool flagPossibleStereoCenters)1843 void assignStereochemistry(ROMol &mol, bool cleanIt, bool force,
1844 bool flagPossibleStereoCenters) {
1845 if (!force && mol.hasProp(common_properties::_StereochemDone)) {
1846 return;
1847 }
1848
1849 // later we're going to need ring information, get it now if we don't
1850 // have it already:
1851 if (!mol.getRingInfo()->isInitialized()) {
1852 MolOps::fastFindRings(mol);
1853 }
1854
1855 #if 0
1856 std::cerr << ">>>>>>>>>>>>>\n";
1857 std::cerr << "assign stereochem\n";
1858 mol.debugMol(std::cerr);
1859 #endif
1860
1861 // as part of the preparation, we'll loop over the atoms and
1862 // bonds to see if anything has stereochemistry
1863 // indicated. There's no point in doing the work here if there
1864 // are neither stereocenters nor bonds that we need to consider.
1865 // The exception to this is when flagPossibleStereoCenters is
1866 // true; then we always need to do the work
1867 bool hasStereoAtoms = flagPossibleStereoCenters;
1868 for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
1869 ++atIt) {
1870 if (cleanIt) {
1871 if ((*atIt)->hasProp(common_properties::_CIPCode)) {
1872 (*atIt)->clearProp(common_properties::_CIPCode);
1873 }
1874 if ((*atIt)->hasProp(common_properties::_ChiralityPossible)) {
1875 (*atIt)->clearProp(common_properties::_ChiralityPossible);
1876 }
1877 }
1878 if (!hasStereoAtoms && (*atIt)->getChiralTag() != Atom::CHI_UNSPECIFIED &&
1879 (*atIt)->getChiralTag() != Atom::CHI_OTHER) {
1880 hasStereoAtoms = true;
1881 }
1882 }
1883 bool hasStereoBonds = false;
1884 for (ROMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds();
1885 ++bondIt) {
1886 if (cleanIt) {
1887 if ((*bondIt)->getBondType() == Bond::DOUBLE) {
1888 if ((*bondIt)->getBondDir() == Bond::EITHERDOUBLE) {
1889 (*bondIt)->setStereo(Bond::STEREOANY);
1890 } else if ((*bondIt)->getStereo() != Bond::STEREOANY) {
1891 (*bondIt)->setStereo(Bond::STEREONONE);
1892 (*bondIt)->getStereoAtoms().clear();
1893 }
1894 }
1895 }
1896 if (!hasStereoBonds && (*bondIt)->getBondType() == Bond::DOUBLE) {
1897 ROMol::OEDGE_ITER beg, end;
1898 boost::tie(beg, end) = mol.getAtomBonds((*bondIt)->getBeginAtom());
1899 while (!hasStereoBonds && beg != end) {
1900 const Bond *nbond = mol[*beg];
1901 ++beg;
1902 if (nbond->getBondDir() == Bond::ENDDOWNRIGHT ||
1903 nbond->getBondDir() == Bond::ENDUPRIGHT) {
1904 hasStereoBonds = true;
1905 }
1906 }
1907 boost::tie(beg, end) = mol.getAtomBonds((*bondIt)->getEndAtom());
1908 while (!hasStereoBonds && beg != end) {
1909 const Bond *nbond = mol[*beg];
1910 ++beg;
1911 if (nbond->getBondDir() == Bond::ENDDOWNRIGHT ||
1912 nbond->getBondDir() == Bond::ENDUPRIGHT) {
1913 hasStereoBonds = true;
1914 }
1915 }
1916 }
1917 if (!cleanIt && hasStereoBonds) {
1918 break; // no reason to keep iterating if we've already
1919 // determined there are stereo bonds to consider
1920 }
1921 }
1922 UINT_VECT atomRanks;
1923 bool keepGoing = hasStereoAtoms | hasStereoBonds;
1924 bool changedStereoAtoms, changedStereoBonds;
1925 while (keepGoing) {
1926 if (hasStereoAtoms) {
1927 boost::tie(hasStereoAtoms, changedStereoAtoms) =
1928 Chirality::assignAtomChiralCodes(mol, atomRanks,
1929 flagPossibleStereoCenters);
1930 } else {
1931 changedStereoAtoms = false;
1932 }
1933 if (hasStereoBonds) {
1934 boost::tie(hasStereoBonds, changedStereoBonds) =
1935 Chirality::assignBondStereoCodes(mol, atomRanks);
1936 } else {
1937 changedStereoBonds = false;
1938 }
1939 keepGoing = (hasStereoAtoms || hasStereoBonds) &&
1940 (changedStereoAtoms || changedStereoBonds);
1941
1942 if (keepGoing) {
1943 // update the atom ranks based on the new information we have:
1944 Chirality::rerankAtoms(mol, atomRanks);
1945 }
1946 #if 0
1947 std::cout << "*************** done iteration " << keepGoing
1948 << " ***********" << std::endl;
1949 mol.debugMol(std::cout);
1950 std::cout << "*************** done iteration " << keepGoing
1951 << " ***********" << std::endl;
1952 #endif
1953 }
1954
1955 if (cleanIt) {
1956 // if the ranks are needed again, this will force them to be
1957 // re-calculated based on the stereo calculated above.
1958 // atomRanks.clear();
1959
1960 for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
1961 ++atIt) {
1962 if ((*atIt)->hasProp(common_properties::_ringStereochemCand)) {
1963 (*atIt)->clearProp(common_properties::_ringStereochemCand);
1964 }
1965 if ((*atIt)->hasProp(common_properties::_ringStereoAtoms)) {
1966 (*atIt)->clearProp(common_properties::_ringStereoAtoms);
1967 }
1968 }
1969 boost::dynamic_bitset<> possibleSpecialCases(mol.getNumAtoms());
1970 Chirality::findChiralAtomSpecialCases(mol, possibleSpecialCases);
1971
1972 for (auto atom : mol.atoms()) {
1973 if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED &&
1974 !atom->hasProp(common_properties::_CIPCode) &&
1975 (!possibleSpecialCases[atom->getIdx()] ||
1976 !atom->hasProp(common_properties::_ringStereoAtoms))) {
1977 atom->setChiralTag(Atom::CHI_UNSPECIFIED);
1978
1979 // If the atom has an explicit hydrogen and no charge, that H
1980 // was probably put there solely because of the chirality.
1981 // So we'll go ahead and remove it.
1982 // This was Issue 194
1983 if (atom->getNumExplicitHs() == 1 && atom->getFormalCharge() == 0 &&
1984 !atom->getIsAromatic()) {
1985 atom->setNumExplicitHs(0);
1986 atom->setNoImplicit(false);
1987 atom->calcExplicitValence(false);
1988 atom->calcImplicitValence(false);
1989 }
1990 }
1991 }
1992 for (auto bond : mol.bonds()) {
1993 // wedged bonds to atoms that have no stereochem
1994 // should be removed. (github issue 87)
1995 if ((bond->getBondDir() == Bond::BEGINWEDGE ||
1996 bond->getBondDir() == Bond::BEGINDASH) &&
1997 bond->getBeginAtom()->getChiralTag() == Atom::CHI_UNSPECIFIED &&
1998 bond->getEndAtom()->getChiralTag() == Atom::CHI_UNSPECIFIED) {
1999 bond->setBondDir(Bond::NONE);
2000 }
2001
2002 // check for directionality on single bonds around
2003 // double bonds without stereo. This was github #2422
2004 if (bond->getBondType() == Bond::DOUBLE &&
2005 (bond->getStereo() == Bond::STEREOANY ||
2006 bond->getStereo() == Bond::STEREONONE)) {
2007 std::vector<Atom *> batoms = {bond->getBeginAtom(), bond->getEndAtom()};
2008 for (auto batom : batoms) {
2009 for (const auto &nbri :
2010 boost::make_iterator_range(mol.getAtomBonds(batom))) {
2011 auto nbrBndI = mol[nbri];
2012 if ((nbrBndI->getBondDir() == Bond::ENDDOWNRIGHT ||
2013 nbrBndI->getBondDir() == Bond::ENDUPRIGHT) &&
2014 (nbrBndI->getBondType() == Bond::SINGLE ||
2015 nbrBndI->getBondType() == Bond::AROMATIC)) {
2016 // direction is set, and we know it's not because of our
2017 // bond. What about other neighbors?
2018 bool okToClear = true;
2019 for (const auto &nbrj : boost::make_iterator_range(
2020 mol.getAtomBonds(nbrBndI->getOtherAtom(batom)))) {
2021 auto nbrBndJ = mol[nbrj];
2022 if (nbrBndJ->getBondType() == Bond::DOUBLE &&
2023 nbrBndJ->getStereo() != Bond::STEREOANY &&
2024 nbrBndJ->getStereo() != Bond::STEREONONE) {
2025 okToClear = false;
2026 break;
2027 }
2028 }
2029 if (okToClear) {
2030 nbrBndI->setBondDir(Bond::NONE);
2031 }
2032 }
2033 }
2034 }
2035 }
2036 #if 0
2037 // make sure CIS/TRANS assignments are actually stereo bonds
2038 if ((*bondIt)->getBondType() == Bond::DOUBLE) {
2039 if ((*bondIt)->getStereo() == Bond::STEREOCIS ||
2040 (*bondIt)->getStereo() == Bond::STEREOTRANS) {
2041 if (!atomRanks.size()) {
2042 Chirality::assignAtomCIPRanks(mol, atomRanks);
2043 }
2044
2045 const Atom *begAtom = (*bondIt)->getBeginAtom(),
2046 *endAtom = (*bondIt)->getEndAtom();
2047 UINT_VECT begAtomNeighbors, endAtomNeighbors;
2048 Chirality::findAtomNeighborsHelper(mol, begAtom, *bondIt,
2049 begAtomNeighbors);
2050 Chirality::findAtomNeighborsHelper(mol, endAtom, *bondIt,
2051 endAtomNeighbors);
2052
2053 // Note, this relies on this being a hydrogen-suppressed
2054 // graph as the 'Note' in the doc string of this function
2055 // indicates is a pre-condition.
2056 if ((begAtomNeighbors.size() == 2 &&
2057 atomRanks[begAtomNeighbors[0]] ==
2058 atomRanks[begAtomNeighbors[1]]) ||
2059 (endAtomNeighbors.size() == 2 &&
2060 atomRanks[endAtomNeighbors[0]] ==
2061 atomRanks[endAtomNeighbors[1]])) {
2062 (*bondIt)->setStereo(Bond::STEREONONE);
2063 (*bondIt)->getStereoAtoms().clear();
2064 }
2065 }
2066 }
2067 #endif
2068 }
2069 }
2070 mol.setProp(common_properties::_StereochemDone, 1, true);
2071
2072 #if 0
2073 std::cerr << "---\n";
2074 mol.debugMol(std::cerr);
2075 std::cerr << "<<<<<<<<<<<<<<<<\n";
2076 #endif
2077 }
2078
2079 // Find bonds than can be cis/trans in a molecule and mark them as
2080 // Bond::STEREOANY.
findPotentialStereoBonds(ROMol & mol,bool cleanIt)2081 void findPotentialStereoBonds(ROMol &mol, bool cleanIt) {
2082 // FIX: The earlier thought was to provide an optional argument to ignore or
2083 // consider
2084 // double bonds in a ring. But I am removing this optional argument and
2085 // ignoring ring bonds
2086 // completely for now. This is because finding a potential stereo bond in a
2087 // ring involves
2088 // more than just checking the CIPranks for the neighbors - SP 05/04/04
2089
2090 // make this function callable multiple times
2091 if ((mol.hasProp(common_properties::_BondsPotentialStereo)) && (!cleanIt)) {
2092 return;
2093 } else {
2094 UINT_VECT ranks;
2095 ranks.resize(mol.getNumAtoms());
2096 bool cipDone = false;
2097
2098 ROMol::BondIterator bondIt;
2099 for (bondIt = mol.beginBonds(); bondIt != mol.endBonds(); ++bondIt) {
2100 if ((*bondIt)->getBondType() == Bond::DOUBLE &&
2101 !(mol.getRingInfo()->numBondRings((*bondIt)->getIdx()))) {
2102 // we are ignoring ring bonds here - read the FIX above
2103 Bond *dblBond = *bondIt;
2104 // proceed only if we either want to clean the stereocode on this bond,
2105 // if none is set on it yet, or it is STEREOANY and we need to find
2106 // stereoatoms
2107 if (cleanIt || dblBond->getStereo() == Bond::STEREONONE ||
2108 (dblBond->getStereo() == Bond::STEREOANY &&
2109 dblBond->getStereoAtoms().size() != 2)) {
2110 dblBond->setStereo(Bond::STEREONONE);
2111 const Atom *begAtom = dblBond->getBeginAtom(),
2112 *endAtom = dblBond->getEndAtom();
2113 // we're only going to handle 2 or three coordinate atoms:
2114 if ((begAtom->getDegree() == 2 || begAtom->getDegree() == 3) &&
2115 (endAtom->getDegree() == 2 || endAtom->getDegree() == 3)) {
2116 // ------------------
2117 // get the CIP ranking of each atom if we need it:
2118 if (!cipDone) {
2119 if (!begAtom->hasProp(common_properties::_CIPRank)) {
2120 Chirality::assignAtomCIPRanks(mol, ranks);
2121 } else {
2122 // no need to recompute if we don't need to recompute. :-)
2123 for (unsigned int ai = 0; ai < mol.getNumAtoms(); ++ai) {
2124 ranks[ai] = mol.getAtomWithIdx(ai)->getProp<unsigned int>(
2125 common_properties::_CIPRank);
2126 }
2127 }
2128 cipDone = true;
2129 }
2130 // find the neighbors for the begin atom and the endAtom
2131 UINT_VECT begAtomNeighbors, endAtomNeighbors;
2132 bool checkDir = false;
2133 bool includeAromatic = true;
2134 Chirality::findAtomNeighborsHelper(mol, begAtom, dblBond,
2135 begAtomNeighbors, checkDir,
2136 includeAromatic);
2137 Chirality::findAtomNeighborsHelper(mol, endAtom, dblBond,
2138 endAtomNeighbors, checkDir,
2139 includeAromatic);
2140 if (begAtomNeighbors.size() > 0 && endAtomNeighbors.size() > 0) {
2141 if ((begAtomNeighbors.size() == 2) &&
2142 (endAtomNeighbors.size() == 2)) {
2143 // if both of the atoms have 2 neighbors (other than the one
2144 // connected
2145 // by the double bond) and ....
2146 #if 0
2147 std::cerr << "Bond: " << dblBond->getIdx() << " "
2148 << begAtom->getIdx() << "=" << endAtom->getIdx()
2149 << std::endl;
2150 std::cerr << " " << begAtomNeighbors[0] << "="
2151 << ranks[begAtomNeighbors[0]] << ":";
2152 std::cerr << " " << begAtomNeighbors[1] << "="
2153 << ranks[begAtomNeighbors[1]] << std::endl;
2154 std::cerr << " " << endAtomNeighbors[0] << "="
2155 << ranks[endAtomNeighbors[0]] << ":";
2156 std::cerr << " " << endAtomNeighbors[1] << "="
2157 << ranks[endAtomNeighbors[1]] << std::endl;
2158 #endif
2159 if ((ranks[begAtomNeighbors[0]] !=
2160 ranks[begAtomNeighbors[1]]) &&
2161 (ranks[endAtomNeighbors[0]] !=
2162 ranks[endAtomNeighbors[1]])) {
2163 // the neighbors ranks are different at both the ends,
2164 // this bond can be part of a cis/trans system
2165 if (ranks[begAtomNeighbors[0]] > ranks[begAtomNeighbors[1]]) {
2166 dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]);
2167 } else {
2168 dblBond->getStereoAtoms().push_back(begAtomNeighbors[1]);
2169 }
2170 if (ranks[endAtomNeighbors[0]] > ranks[endAtomNeighbors[1]]) {
2171 dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]);
2172 } else {
2173 dblBond->getStereoAtoms().push_back(endAtomNeighbors[1]);
2174 }
2175 }
2176 } else if (begAtomNeighbors.size() == 2) {
2177 // if the begAtom has two neighbors and ....
2178 if (ranks[begAtomNeighbors[0]] != ranks[begAtomNeighbors[1]]) {
2179 // their ranks are different
2180 if (ranks[begAtomNeighbors[0]] > ranks[begAtomNeighbors[1]]) {
2181 dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]);
2182 } else {
2183 dblBond->getStereoAtoms().push_back(begAtomNeighbors[1]);
2184 }
2185 dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]);
2186 }
2187 } else if (endAtomNeighbors.size() == 2) {
2188 // if the endAtom has two neighbors and ...
2189 if (ranks[endAtomNeighbors[0]] != ranks[endAtomNeighbors[1]]) {
2190 // their ranks are different
2191 dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]);
2192 if (ranks[endAtomNeighbors[0]] > ranks[endAtomNeighbors[1]]) {
2193 dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]);
2194 } else {
2195 dblBond->getStereoAtoms().push_back(endAtomNeighbors[1]);
2196 }
2197 }
2198 } else {
2199 // end and beg atoms has only one neighbor each, it doesn't
2200 // matter what the ranks are:
2201 dblBond->getStereoAtoms().push_back(begAtomNeighbors[0]);
2202 dblBond->getStereoAtoms().push_back(endAtomNeighbors[0]);
2203 } // end of different number of neighbors on beg and end atoms
2204
2205 // mark this double bond as a potential stereo bond
2206 if (!dblBond->getStereoAtoms().empty()) {
2207 dblBond->setStereo(Bond::STEREOANY);
2208 }
2209 } // end of check that beg and end atoms have at least 1
2210 // neighbor:
2211 } // end of 2 and 3 coordinated atoms only
2212 } // end of we want it or CIP code is not set
2213 } // end of double bond
2214 } // end of for loop over all bonds
2215 mol.setProp(common_properties::_BondsPotentialStereo, 1, true);
2216 }
2217 }
2218
2219 // removes chirality markers from sp and sp2 hybridized centers:
cleanupChirality(RWMol & mol)2220 void cleanupChirality(RWMol &mol) {
2221 for (ROMol::AtomIterator atomIt = mol.beginAtoms(); atomIt != mol.endAtoms();
2222 ++atomIt) {
2223 if ((*atomIt)->getChiralTag() != Atom::CHI_UNSPECIFIED &&
2224 (*atomIt)->getHybridization() < Atom::SP3) {
2225 (*atomIt)->setChiralTag(Atom::CHI_UNSPECIFIED);
2226 }
2227 }
2228 }
2229
assignChiralTypesFrom3D(ROMol & mol,int confId,bool replaceExistingTags)2230 void assignChiralTypesFrom3D(ROMol &mol, int confId, bool replaceExistingTags) {
2231 const double ZERO_VOLUME_TOL = 0.1;
2232 if (!mol.getNumConformers()) {
2233 return;
2234 }
2235 const Conformer &conf = mol.getConformer(confId);
2236 if (!conf.is3D()) {
2237 return;
2238 }
2239
2240 // if the molecule already has stereochemistry
2241 // perceived, remove the flags that indicate
2242 // this... what we're about to do will require
2243 // that we go again.
2244 if (mol.hasProp(common_properties::_StereochemDone)) {
2245 mol.clearProp(common_properties::_StereochemDone);
2246 }
2247
2248 for (ROMol::AtomIterator atomIt = mol.beginAtoms(); atomIt != mol.endAtoms();
2249 ++atomIt) {
2250 Atom *atom = *atomIt;
2251 // if we aren't replacing existing tags and the atom is already tagged,
2252 // punt:
2253 if (!replaceExistingTags && atom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
2254 continue;
2255 }
2256 atom->setChiralTag(Atom::CHI_UNSPECIFIED);
2257 // additional reasons to skip the atom:
2258 if (atom->getDegree() < 3 || atom->getTotalDegree() > 4) {
2259 // not enough explicit neighbors or too many total neighbors
2260 continue;
2261 } else {
2262 int anum = atom->getAtomicNum();
2263 if (anum != 16 && anum != 34 && // S or Se are special
2264 // (just using the InChI list for now)
2265 (atom->getTotalDegree() != 4 || // not enough total neighbors
2266 atom->getTotalNumHs(true) > 1)) {
2267 continue;
2268 }
2269 }
2270 const RDGeom::Point3D &p0 = conf.getAtomPos(atom->getIdx());
2271 ROMol::ADJ_ITER nbrIdx, endNbrs;
2272 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
2273 const RDGeom::Point3D &p1 = conf.getAtomPos(*nbrIdx);
2274 ++nbrIdx;
2275 const RDGeom::Point3D &p2 = conf.getAtomPos(*nbrIdx);
2276 ++nbrIdx;
2277 const RDGeom::Point3D &p3 = conf.getAtomPos(*nbrIdx);
2278
2279 RDGeom::Point3D v1 = p1 - p0;
2280 RDGeom::Point3D v2 = p2 - p0;
2281 RDGeom::Point3D v3 = p3 - p0;
2282
2283 double chiralVol = v1.dotProduct(v2.crossProduct(v3));
2284 if (chiralVol < -ZERO_VOLUME_TOL) {
2285 atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CW);
2286 } else if (chiralVol > ZERO_VOLUME_TOL) {
2287 atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
2288 } else {
2289 atom->setChiralTag(Atom::CHI_UNSPECIFIED);
2290 }
2291 }
2292 }
2293
assignChiralTypesFromMolParity(ROMol & mol,bool replaceExistingTags)2294 void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags) {
2295 static const std::vector<Atom::ChiralType> chiralTypeVect{
2296 Atom::CHI_TETRAHEDRAL_CW, Atom::CHI_TETRAHEDRAL_CCW};
2297 // if the molecule already has stereochemistry
2298 // perceived, remove the flags that indicate
2299 // this... what we're about to do will require
2300 // that we go again.
2301 if (mol.hasProp(common_properties::_StereochemDone)) {
2302 mol.clearProp(common_properties::_StereochemDone);
2303 }
2304 // Atom-based parity
2305 // Number the atoms surrounding the stereo center with 1, 2, 3, and 4
2306 // in order of increasing atom number (position in the atom block)
2307 // (an implicit hydrogen should be considered the highest numbered atom).
2308 // View the center from a position such that the bond connecting the
2309 // highest-numbered atom (4) projects behind the plane formed by
2310 // atoms 1, 2, and 3.
2311 //
2312 // Parity 1 (CW) Parity 2 (CCW)
2313 // 3 1 3 2
2314 // \ / \ /
2315 // | |
2316 // 2 1
2317 //
2318 for (auto atom : mol.atoms()) {
2319 // if we aren't replacing existing tags and the atom is already tagged,
2320 // punt:
2321 if (!replaceExistingTags && atom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
2322 continue;
2323 }
2324 int parity = 0;
2325 atom->getPropIfPresent(common_properties::molParity, parity);
2326 if (parity <= 0 || parity > 2 || atom->getDegree() < 3) {
2327 atom->setChiralTag(Atom::CHI_UNSPECIFIED);
2328 continue;
2329 }
2330 // if we are here, parity was 1 (CW) or 2 (CCW)
2331 // now we set parity 0 to be CW and 1 to be CCW
2332 --parity;
2333 RDKit::ROMol::OBOND_ITER_PAIR nbrBonds = mol.getAtomBonds(atom);
2334 INT_LIST nbrBondIdxList;
2335 std::transform(
2336 nbrBonds.first, nbrBonds.second, std::back_inserter(nbrBondIdxList),
2337 [mol](const ROMol::edge_descriptor &e) { return mol[e]->getIdx(); });
2338 unsigned int atomIdx = atom->getIdx();
2339 nbrBondIdxList.sort([mol, atomIdx](const int ai, const int bi) {
2340 return (mol.getBondWithIdx(ai)->getOtherAtomIdx(atomIdx) <
2341 mol.getBondWithIdx(bi)->getOtherAtomIdx(atomIdx));
2342 });
2343 int nSwaps = atom->getPerturbationOrder(nbrBondIdxList);
2344 if (nSwaps % 2) {
2345 parity = 1 - parity;
2346 }
2347 atom->setChiralTag(chiralTypeVect[parity]);
2348 if (atom->getImplicitValence() == -1) {
2349 atom->calcExplicitValence(false);
2350 atom->calcImplicitValence(false);
2351 }
2352 // within the RD representation, if a three-coordinate atom
2353 // is chiral and has an implicit H, that H needs to be made explicit:
2354 if (atom->getDegree() == 3 && !atom->getNumExplicitHs() &&
2355 atom->getNumImplicitHs() == 1) {
2356 atom->setNumExplicitHs(1);
2357 // recalculated number of implicit Hs:
2358 atom->updatePropertyCache();
2359 }
2360 }
2361 }
2362
setDoubleBondNeighborDirections(ROMol & mol,const Conformer * conf)2363 void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf) {
2364 // used to store the number of single bonds a given
2365 // single bond is adjacent to
2366 std::vector<unsigned int> singleBondCounts(mol.getNumBonds(), 0);
2367 std::vector<Bond *> bondsInPlay;
2368 // keeps track of which single bonds are adjacent to each double bond:
2369 VECT_INT_VECT dblBondNbrs(mol.getNumBonds());
2370 // keeps track of which double bonds are adjacent to each single bond:
2371 VECT_INT_VECT singleBondNbrs(mol.getNumBonds());
2372 // keeps track of which single bonds need a dir set and which double bonds
2373 // need to have their neighbors' dirs set
2374 boost::dynamic_bitset<> needsDir(mol.getNumBonds());
2375
2376 // find double bonds that should be considered for
2377 // stereochemistry
2378 // NOTE that we are explicitly excluding double bonds in rings
2379 // with this test.
2380 bool resetRings = false;
2381 if (!mol.getRingInfo()->isInitialized()) {
2382 resetRings = true;
2383 MolOps::fastFindRings(mol);
2384 }
2385
2386 for (RWMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds();
2387 ++bondIt) {
2388 if (isBondCandidateForStereo(*bondIt)) {
2389 const Atom *a1 = (*bondIt)->getBeginAtom();
2390 const Atom *a2 = (*bondIt)->getEndAtom();
2391
2392 ROMol::OEDGE_ITER beg, end;
2393 boost::tie(beg, end) = mol.getAtomBonds(a1);
2394 while (beg != end) {
2395 const Bond *nbrBond = mol[*beg];
2396 if (nbrBond->getBondType() == Bond::SINGLE ||
2397 nbrBond->getBondType() == Bond::AROMATIC) {
2398 singleBondCounts[nbrBond->getIdx()] += 1;
2399 auto nbrDir = nbrBond->getBondDir();
2400 if (nbrDir == Bond::BondDir::NONE ||
2401 nbrDir == Bond::BondDir::ENDDOWNRIGHT ||
2402 nbrDir == Bond::BondDir::ENDUPRIGHT) {
2403 needsDir[nbrBond->getIdx()] = 1;
2404 }
2405 needsDir[(*bondIt)->getIdx()] = 1;
2406 dblBondNbrs[(*bondIt)->getIdx()].push_back(nbrBond->getIdx());
2407 // the search may seem inefficient, but these vectors are going to
2408 // be at most 2 long (with very few exceptions). It's just not worth
2409 // using a different data structure
2410 if (std::find(singleBondNbrs[nbrBond->getIdx()].begin(),
2411 singleBondNbrs[nbrBond->getIdx()].end(),
2412 (*bondIt)->getIdx()) ==
2413 singleBondNbrs[nbrBond->getIdx()].end()) {
2414 singleBondNbrs[nbrBond->getIdx()].push_back((*bondIt)->getIdx());
2415 }
2416 }
2417 ++beg;
2418 }
2419 boost::tie(beg, end) = mol.getAtomBonds(a2);
2420 while (beg != end) {
2421 const Bond *nbrBond = mol[*beg];
2422 if (nbrBond->getBondType() == Bond::SINGLE ||
2423 nbrBond->getBondType() == Bond::AROMATIC) {
2424 singleBondCounts[nbrBond->getIdx()] += 1;
2425 auto nbrDir = nbrBond->getBondDir();
2426 if (nbrDir == Bond::BondDir::NONE ||
2427 nbrDir == Bond::BondDir::ENDDOWNRIGHT ||
2428 nbrDir == Bond::BondDir::ENDUPRIGHT) {
2429 needsDir[nbrBond->getIdx()] = 1;
2430 }
2431 needsDir[(*bondIt)->getIdx()] = 1;
2432 dblBondNbrs[(*bondIt)->getIdx()].push_back(nbrBond->getIdx());
2433
2434 // the search may seem inefficient, but these vectors are going to
2435 // be at most 2 long (with very few exceptions). It's just not worth
2436 // using a different data structure
2437 if (std::find(singleBondNbrs[nbrBond->getIdx()].begin(),
2438 singleBondNbrs[nbrBond->getIdx()].end(),
2439 (*bondIt)->getIdx()) ==
2440 singleBondNbrs[nbrBond->getIdx()].end()) {
2441 singleBondNbrs[nbrBond->getIdx()].push_back((*bondIt)->getIdx());
2442 }
2443 }
2444 ++beg;
2445 }
2446 bondsInPlay.push_back(*bondIt);
2447 }
2448 }
2449
2450 if (!bondsInPlay.size()) {
2451 if (resetRings) {
2452 mol.getRingInfo()->reset();
2453 }
2454 return;
2455 }
2456
2457 // order the double bonds based on the singleBondCounts of their neighbors:
2458 std::vector<std::pair<unsigned int, Bond *>> orderedBondsInPlay;
2459 for (auto dblBond : bondsInPlay) {
2460 unsigned int countHere =
2461 std::accumulate(dblBondNbrs[dblBond->getIdx()].begin(),
2462 dblBondNbrs[dblBond->getIdx()].end(), 0);
2463 // and favor double bonds that are *not* in rings. The combination of
2464 // using the sum above (instead of the max) and this ring-membershipt test
2465 // seem to fix sf.net issue 3009836
2466 if (!(mol.getRingInfo()->numBondRings(dblBond->getIdx()))) {
2467 countHere *= 10;
2468 }
2469 orderedBondsInPlay.push_back(std::make_pair(countHere, dblBond));
2470 }
2471 std::sort(orderedBondsInPlay.begin(), orderedBondsInPlay.end());
2472
2473 // oof, now loop over the double bonds in that order and
2474 // update their neighbor directionalities:
2475 std::vector<std::pair<unsigned int, Bond *>>::reverse_iterator pairIter;
2476 for (pairIter = orderedBondsInPlay.rbegin();
2477 pairIter != orderedBondsInPlay.rend(); ++pairIter) {
2478 // std::cerr << "RESET?: " << pairIter->second->getIdx() << " "
2479 // << pairIter->second->getStereo() << std::endl;
2480 updateDoubleBondNeighbors(mol, pairIter->second, conf, needsDir,
2481 singleBondCounts, singleBondNbrs);
2482 }
2483 if (resetRings) {
2484 mol.getRingInfo()->reset();
2485 }
2486 }
2487
detectBondStereochemistry(ROMol & mol,int confId)2488 void detectBondStereochemistry(ROMol &mol, int confId) {
2489 if (!mol.getNumConformers()) {
2490 return;
2491 }
2492 const Conformer &conf = mol.getConformer(confId);
2493 setDoubleBondNeighborDirections(mol, &conf);
2494 }
2495
setBondStereoFromDirections(ROMol & mol)2496 void setBondStereoFromDirections(ROMol &mol) {
2497 for (Bond *bond : mol.bonds()) {
2498 if (bond->getBondType() == Bond::DOUBLE) {
2499 const Atom *stereoBondBeginAtom = bond->getBeginAtom();
2500 const Atom *stereoBondEndAtom = bond->getEndAtom();
2501
2502 const Bond *directedBondAtBegin =
2503 Chirality::getNeighboringDirectedBond(mol, stereoBondBeginAtom);
2504 const Bond *directedBondAtEnd =
2505 Chirality::getNeighboringDirectedBond(mol, stereoBondEndAtom);
2506
2507 if (directedBondAtBegin != nullptr && directedBondAtEnd != nullptr) {
2508 unsigned beginSideStereoAtom =
2509 directedBondAtBegin->getOtherAtomIdx(stereoBondBeginAtom->getIdx());
2510 unsigned endSideStereoAtom =
2511 directedBondAtEnd->getOtherAtomIdx(stereoBondEndAtom->getIdx());
2512
2513 bond->setStereoAtoms(beginSideStereoAtom, endSideStereoAtom);
2514
2515 auto beginSideBondDirection = directedBondAtBegin->getBondDir();
2516 if (directedBondAtBegin->getBeginAtom() == stereoBondBeginAtom) {
2517 beginSideBondDirection = getOppositeBondDir(beginSideBondDirection);
2518 }
2519
2520 auto endSideBondDirection = directedBondAtEnd->getBondDir();
2521 if (directedBondAtEnd->getEndAtom() == stereoBondEndAtom) {
2522 endSideBondDirection = getOppositeBondDir(endSideBondDirection);
2523 }
2524
2525 if (beginSideBondDirection == endSideBondDirection) {
2526 bond->setStereo(Bond::STEREOTRANS);
2527 } else {
2528 bond->setStereo(Bond::STEREOCIS);
2529 }
2530 }
2531 }
2532 }
2533 }
2534
assignStereochemistryFrom3D(ROMol & mol,int confId,bool replaceExistingTags)2535 void assignStereochemistryFrom3D(ROMol &mol, int confId,
2536 bool replaceExistingTags) {
2537 if (!mol.getNumConformers() || !mol.getConformer(confId).is3D()) {
2538 return;
2539 }
2540
2541 detectBondStereochemistry(mol, confId);
2542 assignChiralTypesFrom3D(mol, confId, replaceExistingTags);
2543 bool force = true;
2544 bool flagPossibleStereoCenters = true;
2545 assignStereochemistry(mol, replaceExistingTags, force,
2546 flagPossibleStereoCenters);
2547 }
2548
assignChiralTypesFromBondDirs(ROMol & mol,const int confId,const bool replaceExistingTags)2549 void assignChiralTypesFromBondDirs(ROMol &mol, const int confId,
2550 const bool replaceExistingTags) {
2551 if (!mol.getNumConformers()) {
2552 return;
2553 }
2554 auto conf = mol.getConformer(confId);
2555 boost::dynamic_bitset<> atomsSet(mol.getNumAtoms(), 0);
2556 for (auto &bond : mol.bonds()) {
2557 const Bond::BondDir dir = bond->getBondDir();
2558 if (dir != Bond::UNKNOWN) {
2559 // the bond is marked as chiral:
2560 if (dir == Bond::BEGINWEDGE || dir == Bond::BEGINDASH) {
2561 Atom *atom = bond->getBeginAtom();
2562 if (atomsSet[atom->getIdx()] ||
2563 (!replaceExistingTags &&
2564 atom->getChiralTag() != Atom::CHI_UNSPECIFIED)) {
2565 continue;
2566 }
2567 if (atom->getImplicitValence() == -1) {
2568 atom->calcExplicitValence(false);
2569 atom->calcImplicitValence(false);
2570 }
2571 Atom::ChiralType code = atomChiralTypeFromBondDir(mol, bond, &conf);
2572 if (code != Atom::ChiralType::CHI_UNSPECIFIED) {
2573 atomsSet.set(atom->getIdx());
2574 // std::cerr << "atom " << atom->getIdx() << " code " << code
2575 // << " from bond " << bond->getIdx() << std::endl;
2576 }
2577 atom->setChiralTag(code);
2578
2579 // within the RD representation, if a three-coordinate atom
2580 // is chiral and has an implicit H, that H needs to be made explicit:
2581 if (atom->getDegree() == 3 && !atom->getNumExplicitHs() &&
2582 atom->getNumImplicitHs() == 1) {
2583 atom->setNumExplicitHs(1);
2584 // recalculated number of implicit Hs:
2585 atom->updatePropertyCache();
2586 }
2587 }
2588 }
2589 }
2590 }
2591
removeStereochemistry(ROMol & mol)2592 void removeStereochemistry(ROMol &mol) {
2593 if (mol.hasProp(common_properties::_StereochemDone)) {
2594 mol.clearProp(common_properties::_StereochemDone);
2595 }
2596 for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
2597 ++atIt) {
2598 (*atIt)->setChiralTag(Atom::CHI_UNSPECIFIED);
2599 if ((*atIt)->hasProp(common_properties::_CIPCode)) {
2600 (*atIt)->clearProp(common_properties::_CIPCode);
2601 }
2602 if ((*atIt)->hasProp(common_properties::_CIPRank)) {
2603 (*atIt)->clearProp(common_properties::_CIPRank);
2604 }
2605 }
2606 for (ROMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds();
2607 ++bondIt) {
2608 if ((*bondIt)->getBondType() == Bond::DOUBLE) {
2609 (*bondIt)->setStereo(Bond::STEREONONE);
2610 (*bondIt)->getStereoAtoms().clear();
2611 } else if ((*bondIt)->getBondType() == Bond::SINGLE) {
2612 (*bondIt)->setBondDir(Bond::NONE);
2613 }
2614 }
2615 std::vector<StereoGroup> sgs;
2616 static_cast<RWMol &>(mol).setStereoGroups(std::move(sgs));
2617 }
2618
2619 } // end of namespace MolOps
2620 } // end of namespace RDKit
2621