1 //
2 //  Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include <GraphMol/RDKitBase.h>
11 #include <GraphMol/Canon.h>
12 #include <GraphMol/SmilesParse/SmilesParseOps.h>
13 #include <GraphMol/RDKitQueries.h>
14 #include <RDGeneral/Exceptions.h>
15 #include <RDGeneral/hash/hash.hpp>
16 #include <RDGeneral/utils.h>
17 #include <algorithm>
18 
19 namespace RDKit {
20 namespace Canon {
21 namespace {
isUnsaturated(const Atom * atom,const ROMol & mol)22 bool isUnsaturated(const Atom *atom, const ROMol &mol) {
23   for (const auto &bndItr :
24        boost::make_iterator_range(mol.getAtomBonds(atom))) {
25     // can't just check for single bonds, because dative bonds also have an
26     // order of 1
27     if (mol[bndItr]->getBondTypeAsDouble() > 1) {
28       return true;
29     }
30   }
31   return false;
32 }
33 
hasSingleHQuery(const Atom::QUERYATOM_QUERY * q)34 bool hasSingleHQuery(const Atom::QUERYATOM_QUERY *q) {
35   // list queries are series of nested ors of AtomAtomicNum queries
36   PRECONDITION(q, "bad query");
37   bool res = false;
38   std::string descr = q->getDescription();
39   if (descr == "AtomAnd") {
40     for (auto cIt = q->beginChildren(); cIt != q->endChildren(); ++cIt) {
41       std::string descr = (*cIt)->getDescription();
42       if (descr == "AtomHCount") {
43         if (!(*cIt)->getNegation() &&
44             ((ATOM_EQUALS_QUERY *)(*cIt).get())->getVal() == 1) {
45           return true;
46         }
47         return false;
48       } else if (descr == "AtomAnd") {
49         res = hasSingleHQuery((*cIt).get());
50         if (res) {
51           return true;
52         }
53       }
54     }
55   }
56   return res;
57 }
58 
atomHasFourthValence(const Atom * atom)59 bool atomHasFourthValence(const Atom *atom) {
60   if (atom->getNumExplicitHs() == 1 || atom->getImplicitValence() == 1) {
61     return true;
62   }
63   if (atom->hasQuery()) {
64     // the SMARTS [C@@H] produces an atom with a H query, but we also
65     // need to treat this like an explicit H for chirality purposes
66     // This was Github #1489
67     return hasSingleHQuery(atom->getQuery());
68   }
69   return false;
70 }
71 }  // end of anonymous namespace
72 
chiralAtomNeedsTagInversion(const RDKit::ROMol & mol,const RDKit::Atom * atom,bool isAtomFirst,size_t numClosures)73 bool chiralAtomNeedsTagInversion(const RDKit::ROMol &mol,
74                                  const RDKit::Atom *atom, bool isAtomFirst,
75                                  size_t numClosures) {
76   PRECONDITION(atom, "bad atom");
77   return atom->getDegree() == 3 &&
78          ((isAtomFirst && atom->getNumExplicitHs() == 1) ||
79           (!atomHasFourthValence(atom) && numClosures == 1 &&
80            !isUnsaturated(atom, mol)));
81 }
82 
83 struct _possibleCompare
84     : public std::binary_function<PossibleType, PossibleType, bool> {
operator ()RDKit::Canon::_possibleCompare85   bool operator()(const PossibleType &arg1, const PossibleType &arg2) const {
86     return (arg1.get<0>() < arg2.get<0>());
87   }
88 };
89 
checkBondsInSameBranch(MolStack & molStack,Bond * dblBnd,Bond * dirBnd)90 bool checkBondsInSameBranch(MolStack &molStack, Bond *dblBnd, Bond *dirBnd) {
91   bool seenDblBond = false;
92   int branchCounter = 0;
93   for (const auto &item : molStack) {
94     switch (item.type) {
95       case MOL_STACK_BOND:
96         if (item.obj.bond == dirBnd || item.obj.bond == dblBnd) {
97           if (seenDblBond) {
98             return branchCounter == 0;
99           } else {
100             seenDblBond = true;
101           }
102         }
103         break;
104       case MOL_STACK_BRANCH_OPEN:
105         if (seenDblBond) {
106           ++branchCounter;
107         }
108         break;
109       case MOL_STACK_BRANCH_CLOSE:
110         if (seenDblBond) {
111           --branchCounter;
112         }
113         break;
114       default:
115         break;
116     }
117   }
118   // We should not ever hit this. But if we do, returning false
119   // causes the same behavior as before this patch.
120   return false;
121 }
122 
switchBondDir(Bond * bond)123 void switchBondDir(Bond *bond) {
124   PRECONDITION(bond, "bad bond");
125   PRECONDITION(bond->getBondType() == Bond::SINGLE || bond->getIsAromatic(),
126                "bad bond type");
127   switch (bond->getBondDir()) {
128     case Bond::ENDUPRIGHT:
129       bond->setBondDir(Bond::ENDDOWNRIGHT);
130       break;
131     case Bond::ENDDOWNRIGHT:
132       bond->setBondDir(Bond::ENDUPRIGHT);
133       break;
134     default:
135       break;
136   }
137 }
138 
139 // FIX: this may only be of interest from the SmilesWriter, should we
140 // move it there?
141 //
142 //
canonicalizeDoubleBond(Bond * dblBond,UINT_VECT & bondVisitOrders,UINT_VECT & atomVisitOrders,UINT_VECT & bondDirCounts,UINT_VECT & atomDirCounts,MolStack & molStack)143 void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders,
144                             UINT_VECT &atomVisitOrders,
145                             UINT_VECT &bondDirCounts, UINT_VECT &atomDirCounts,
146                             MolStack &molStack) {
147   PRECONDITION(dblBond, "bad bond");
148   PRECONDITION(dblBond->getBondType() == Bond::DOUBLE, "bad bond order");
149   PRECONDITION(dblBond->getStereo() > Bond::STEREOANY, "bad bond stereo");
150   PRECONDITION(dblBond->getStereoAtoms().size() >= 2, "bad bond stereo atoms");
151   PRECONDITION(atomVisitOrders[dblBond->getBeginAtomIdx()] > 0 ||
152                    atomVisitOrders[dblBond->getEndAtomIdx()] > 0,
153                "neither end atom traversed");
154 
155   Atom *atom1 = dblBond->getBeginAtom();
156   Atom *atom2 = dblBond->getEndAtom();
157   // we only worry about double bonds that begin and end at atoms
158   // of degree 2 or 3:
159   if ((atom1->getDegree() != 2 && atom1->getDegree() != 3) ||
160       (atom2->getDegree() != 2 && atom2->getDegree() != 3)) {
161     return;
162   }
163   // ensure that atom1 is the lower numbered atom of the double bond (the one
164   // traversed first)
165   if (atomVisitOrders[dblBond->getBeginAtomIdx()] >=
166       atomVisitOrders[dblBond->getEndAtomIdx()]) {
167     std::swap(atom1, atom2);
168   }
169 
170   Bond *firstFromAtom1 = nullptr, *secondFromAtom1 = nullptr;
171   Bond *firstFromAtom2 = nullptr, *secondFromAtom2 = nullptr;
172 
173   ROMol &mol = dblBond->getOwningMol();
174   auto firstVisitOrder = mol.getNumBonds() + 1;
175 
176   ROMol::OBOND_ITER_PAIR atomBonds;
177   // -------------------------------------------------------
178   // find the lowest visit order bonds from each end and determine
179   // if anything is already constraining our choice of directions:
180   bool dir1Set = false, dir2Set = false;
181   for (const auto &bndItr :
182        boost::make_iterator_range(mol.getAtomBonds(atom1))) {
183     auto bond = mol[bndItr];
184     if (bond != dblBond) {
185       auto bondIdx = bond->getIdx();
186       if (bondDirCounts[bondIdx] > 0) {
187         dir1Set = true;
188       }
189       if (!firstFromAtom1 || bondVisitOrders[bondIdx] < firstVisitOrder) {
190         if (firstFromAtom1) {
191           secondFromAtom1 = firstFromAtom1;
192         }
193         firstFromAtom1 = bond;
194         firstVisitOrder = bondVisitOrders[bondIdx];
195       } else {
196         secondFromAtom1 = bond;
197       }
198     }
199   }
200   firstVisitOrder = mol.getNumBonds() + 1;
201   for (const auto &bndItr :
202        boost::make_iterator_range(mol.getAtomBonds(atom2))) {
203     auto bond = mol[bndItr];
204     if (bond != dblBond) {
205       auto bondIdx = bond->getIdx();
206       if (bondDirCounts[bondIdx] > 0) {
207         dir2Set = true;
208       }
209       if (!firstFromAtom2 || bondVisitOrders[bondIdx] < firstVisitOrder) {
210         if (firstFromAtom2) {
211           secondFromAtom2 = firstFromAtom2;
212         }
213         firstFromAtom2 = bond;
214         firstVisitOrder = bondVisitOrders[bondIdx];
215       } else {
216         secondFromAtom2 = bond;
217       }
218     }
219   }
220 
221   // make sure we found everything we need to find:
222   CHECK_INVARIANT(firstFromAtom1, "could not find atom1");
223   CHECK_INVARIANT(firstFromAtom2, "could not find atom2");
224   CHECK_INVARIANT(atom1->getDegree() == 2 || secondFromAtom1,
225                   "inconsistency at atom1");
226   CHECK_INVARIANT(atom2->getDegree() == 2 || secondFromAtom2,
227                   "inconsistency at atom2");
228 
229   bool setFromBond1 = true;
230   Bond::BondDir atom1Dir = Bond::NONE;
231   Bond::BondDir atom2Dir = Bond::NONE;
232   Bond *atom1ControllingBond = firstFromAtom1;
233   Bond *atom2ControllingBond = firstFromAtom2;
234   if (!dir1Set && !dir2Set) {
235     // ----------------------------------
236     // nothing has touched our bonds so far, so set the
237     // directions to "arbitrary" values:
238 
239     // the bond we came in on becomes ENDUPRIGHT:
240     atom1Dir = Bond::ENDUPRIGHT;
241     firstFromAtom1->setBondDir(atom1Dir);
242     bondDirCounts[firstFromAtom1->getIdx()] += 1;
243     atomDirCounts[atom1->getIdx()] += 1;
244   } else if (!dir2Set) {
245     // at least one of the bonds on atom1 has its directionality set already:
246     if (bondDirCounts[firstFromAtom1->getIdx()] > 0) {
247       // The first bond's direction has been set at some earlier point:
248       atom1Dir = firstFromAtom1->getBondDir();
249       bondDirCounts[firstFromAtom1->getIdx()] += 1;
250       atomDirCounts[atom1->getIdx()] += 1;
251       if (secondFromAtom1) {
252         // both bonds have their directionalities set, make sure
253         // they are compatible:
254         if (firstFromAtom1->getBondDir() == secondFromAtom1->getBondDir() &&
255             bondDirCounts[firstFromAtom2->getIdx()]) {
256           CHECK_INVARIANT(
257               ((firstFromAtom1->getBeginAtomIdx() == atom1->getIdx()) ^
258                (secondFromAtom1->getBeginAtomIdx() == atom1->getIdx())),
259               "inconsistent state");
260         }
261       }
262     } else {
263       // the second bond must be present and setting the direction:
264       CHECK_INVARIANT(secondFromAtom1, "inconsistent state");
265       CHECK_INVARIANT(bondDirCounts[secondFromAtom1->getIdx()] > 0,
266                       "inconsistent state");
267       // It must be the second bond setting the direction.
268       // This happens when the bond dir is set in a branch:
269       //        v- this double bond
270       //   CC(/C=P/N)=N/O
271       //      ^- the second bond sets the direction
272       // or when the first bond is a ring closure from an
273       // earlier traversed atom:
274       //             v- this double bond
275       //   NC1=NOC/C1=N\O
276       //     ^- this closure ends up being the first bond,
277       //        and it does not set the direction.
278       //
279       // This addresses parts of Issue 185 and sf.net Issue 1842174
280       //
281       atom1Dir = secondFromAtom1->getBondDir();
282 
283       firstFromAtom1->setBondDir(atom1Dir);
284       bondDirCounts[firstFromAtom1->getIdx()] += 1;
285       atomDirCounts[atom1->getIdx()] += 2;
286       atom1ControllingBond = secondFromAtom1;
287     }
288   } else {
289     // dir2 has been set, and dir1 hasn't: we're dealing with a stereochem
290     // specification on a ring double bond:
291     setFromBond1 = false;
292     // at least one of the bonds on atom2 has its directionality set already:
293     if (bondDirCounts[firstFromAtom2->getIdx()] > 0) {
294       // The second bond's direction has been set at some earlier point:
295       atom2Dir = firstFromAtom2->getBondDir();
296       bondDirCounts[firstFromAtom2->getIdx()] += 1;
297       atomDirCounts[atom2->getIdx()] += 1;
298       if (secondFromAtom2) {
299         // both bonds have their directionalities set, make sure
300         // they are compatible:
301         if (firstFromAtom2->getBondDir() == secondFromAtom2->getBondDir() &&
302             bondDirCounts[firstFromAtom1->getIdx()]) {
303           CHECK_INVARIANT(
304               ((firstFromAtom2->getBeginAtomIdx() == atom2->getIdx()) ^
305                (secondFromAtom2->getBeginAtomIdx() == atom2->getIdx())),
306               "inconsistent state");
307         }
308       }
309     } else {
310       // the second bond must be present and setting the direction:
311       CHECK_INVARIANT(secondFromAtom2, "inconsistent state");
312       CHECK_INVARIANT(bondDirCounts[secondFromAtom2->getIdx()] > 0,
313                       "inconsistent state");
314       // It must be the second bond setting the direction.
315       // This happens when the bond dir is set in a branch:
316       //        v- this double bond
317       //   CC(/C=P/N)=N/O
318       //      ^- the second bond sets the direction
319       // or when the first bond is a ring closure from an
320       // earlier traversed atom:
321       //             v- this double bond
322       //   NC1=NOC/C1=N\O
323       //     ^- this closure ends up being the first bond,
324       //        and it does not set the direction.
325       //
326       // This addresses parts of Issue 185 and sf.net Issue 1842174
327       //
328       atom2Dir = secondFromAtom2->getBondDir();
329 
330       firstFromAtom2->setBondDir(atom2Dir);
331       bondDirCounts[firstFromAtom2->getIdx()] += 1;
332       atomDirCounts[atom2->getIdx()] += 2;
333       atom2ControllingBond = secondFromAtom2;
334     }
335     // CHECK_INVARIANT(0,"ring stereochemistry not handled");
336   }  // end of the ring stereochemistry if
337 
338   // now set the directionality on the other side:
339   if (setFromBond1) {
340     if (dblBond->getStereo() == Bond::STEREOE ||
341         dblBond->getStereo() == Bond::STEREOTRANS) {
342       atom2Dir = atom1Dir;
343     } else if (dblBond->getStereo() == Bond::STEREOZ ||
344                dblBond->getStereo() == Bond::STEREOCIS) {
345       atom2Dir = (atom1Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
346                                                 : Bond::ENDUPRIGHT;
347     }
348     CHECK_INVARIANT(atom2Dir != Bond::NONE, "stereo not set");
349 
350     // If we're not looking at the bonds used to determine the
351     // stereochemistry, we need to flip the setting on the other bond:
352     const INT_VECT &stereoAtoms = dblBond->getStereoAtoms();
353 
354     auto isClosingRingBond = [](Bond *bond) {
355       if (bond == nullptr) {
356         return false;
357       }
358       auto beginIdx = bond->getBeginAtomIdx();
359       auto endIdx = bond->getEndAtomIdx();
360       return beginIdx > endIdx &&
361              beginIdx - endIdx > 1 &&
362              bond->hasProp(common_properties::_TraversalRingClosureBond);
363     };
364 
365     auto isFlipped = false;
366 
367     if (atom1->getDegree() == 3 &&
368         std::find(stereoAtoms.begin(), stereoAtoms.end(),
369                   static_cast<int>(atom1ControllingBond->getOtherAtomIdx(
370                       atom1->getIdx()))) == stereoAtoms.end()) {
371       isFlipped = true;
372       atom2Dir = (atom2Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
373                                                 : Bond::ENDUPRIGHT;
374     }
375     // std::cerr<<" 0 set bond 2: "<<firstFromAtom2->getIdx()<<"
376     // "<<atom2Dir<<std::endl;
377     if (atom2->getDegree() == 3 &&
378         std::find(stereoAtoms.begin(), stereoAtoms.end(),
379                   static_cast<int>(firstFromAtom2->getOtherAtomIdx(
380                       atom2->getIdx()))) == stereoAtoms.end()) {
381       isFlipped = true;
382       atom2Dir = (atom2Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
383                                                 : Bond::ENDUPRIGHT;
384     }
385 
386     if (!isFlipped &&
387           (isClosingRingBond(dblBond) ||
388           (isClosingRingBond(secondFromAtom1) &&
389             !secondFromAtom1->getIsAromatic() &&
390             secondFromAtom1->getBondDir() != Bond::NONE))
391         ) {
392       atom2Dir = (atom2Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
393                                                 : Bond::ENDUPRIGHT;
394     }
395     // std::cerr<<" 1 set bond 2: "<<firstFromAtom2->getIdx()<<"
396     // "<<atom2Dir<<std::endl;
397     firstFromAtom2->setBondDir(atom2Dir);
398 
399     bondDirCounts[firstFromAtom2->getIdx()] += 1;
400     atomDirCounts[atom2->getIdx()] += 1;
401   } else {
402     // we come before a ring closure:
403     if (dblBond->getStereo() == Bond::STEREOZ ||
404         dblBond->getStereo() == Bond::STEREOCIS) {
405       atom1Dir = atom2Dir;
406     } else if (dblBond->getStereo() == Bond::STEREOE ||
407                dblBond->getStereo() == Bond::STEREOTRANS) {
408       atom1Dir = (atom2Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
409                                                 : Bond::ENDUPRIGHT;
410     }
411     CHECK_INVARIANT(atom1Dir != Bond::NONE, "stereo not set");
412     // If we're not looking at the bonds used to determine the
413     // stereochemistry, we need to flip the setting on the other bond:
414     const INT_VECT &stereoAtoms = dblBond->getStereoAtoms();
415     if (atom2->getDegree() == 3 &&
416         std::find(stereoAtoms.begin(), stereoAtoms.end(),
417                   static_cast<int>(atom2ControllingBond->getOtherAtomIdx(
418                       atom2->getIdx()))) == stereoAtoms.end()) {
419       // std::cerr<<"flip 1"<<std::endl;
420       atom1Dir = (atom1Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
421                                                 : Bond::ENDUPRIGHT;
422     }
423     if (atom1->getDegree() == 3 &&
424         std::find(stereoAtoms.begin(), stereoAtoms.end(),
425                   static_cast<int>(firstFromAtom1->getOtherAtomIdx(
426                       atom1->getIdx()))) == stereoAtoms.end()) {
427       // std::cerr<<"flip 2"<<std::endl;
428       atom1Dir = (atom1Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
429                                                 : Bond::ENDUPRIGHT;
430     }
431 
432     firstFromAtom1->setBondDir(atom1Dir);
433     switchBondDir(firstFromAtom1);
434     bondDirCounts[firstFromAtom1->getIdx()] += 1;
435     atomDirCounts[atom1->getIdx()] += 1;
436   }
437 
438   // -----------------------------------
439   //
440   // Check if there are other bonds from atoms 1 and 2 that need
441   // to have their directionalities set:
442   ///
443   if (atom1->getDegree() == 3) {
444     if (!bondDirCounts[secondFromAtom1->getIdx()]) {
445       // This bond (the second bond from the starting atom of the double bond)
446       // is a special case.  It's going to appear in a branch in the smiles:
447       //     X\C(\Y)=C/Z
448       //         ^
449       //         |- here
450       // so it actually needs to go down with the *same* direction as the
451       // bond that's already been set (because "pulling the bond out of the
452       // branch" reverses its direction).
453       // A quick example.  This SMILES:
454       //     F/C(\Cl)=C/F
455       // is *wrong*. This is the correct form:
456       //     F/C(/Cl)=C/F
457       // So, since we want this bond to have the opposite direction to the
458       // other one, we put it in with the same direction.
459       // This was Issue 183
460 
461       // UNLESS the bond is not in a branch (in the smiles) (e.g. firstFromAtom1
462       // branches off a cycle, and secondFromAtom1 shows up at the end of the
463       // cycle). This was Github Issue #2023, see it for an example.
464       if (checkBondsInSameBranch(molStack, dblBond, secondFromAtom1)) {
465         auto otherDir = (firstFromAtom1->getBondDir() == Bond::ENDUPRIGHT)
466                             ? Bond::ENDDOWNRIGHT
467                             : Bond::ENDUPRIGHT;
468         secondFromAtom1->setBondDir(otherDir);
469       } else {
470         secondFromAtom1->setBondDir(firstFromAtom1->getBondDir());
471       }
472     }
473     bondDirCounts[secondFromAtom1->getIdx()] += 1;
474     atomDirCounts[atom1->getIdx()] += 1;
475   }
476 
477   if (atom2->getDegree() == 3) {
478     if (!bondDirCounts[secondFromAtom2->getIdx()]) {
479       // Here we set the bond direction to be opposite the other one (since
480       // both come after the atom connected to the double bond).
481       Bond::BondDir otherDir;
482       if (!secondFromAtom2->hasProp(
483               common_properties::_TraversalRingClosureBond)) {
484         otherDir = (firstFromAtom2->getBondDir() == Bond::ENDUPRIGHT)
485                        ? Bond::ENDDOWNRIGHT
486                        : Bond::ENDUPRIGHT;
487       } else {
488         // another one those irritating little reversal things due to
489         // ring closures
490         otherDir = firstFromAtom2->getBondDir();
491       }
492       secondFromAtom2->setBondDir(otherDir);
493     }
494     bondDirCounts[secondFromAtom2->getIdx()] += 1;
495     atomDirCounts[atom2->getIdx()] += 1;
496     // std::cerr<<"   other: "<<secondFromAtom2->getIdx()<<"
497     // "<<otherDir<<std::endl;
498   }
499 
500   if (setFromBond1) {
501     // This is an odd case... The bonds off the beginning atom are
502     // after the start atom in the traversal stack.  These need to
503     // have their directions reversed.  An example SMILES (unlikely
504     // to actually traverse this way is:
505     //   C(=C/O)/F    or C(/F)=C/O
506     // That bond is Z, without the reversal, this would come out:
507     //   C(=C/O)\F    or C(\F)=C/O
508     // which is E.
509     //
510     // In the case of three-coordinate atoms, we don't need to flip
511     // the second bond because the Issue 183 fix (above) already got
512     // that one.
513     //
514     // This was Issue 191 and continued into sf.net issue 1842174
515     if (bondVisitOrders[atom1ControllingBond->getIdx()] >
516         atomVisitOrders[atom1->getIdx()]) {
517       if (bondDirCounts[atom1ControllingBond->getIdx()] == 1) {
518         if (!atom1ControllingBond->hasProp(
519                 common_properties::_TraversalRingClosureBond)) {
520           // std::cerr<<"  switcheroo 1"<<std::endl;
521           switchBondDir(atom1ControllingBond);
522         }
523       } else if (bondDirCounts[firstFromAtom2->getIdx()] == 1) {
524         // the controlling bond at atom1 is being set by someone else, flip the
525         // direction
526         // on the atom2 bond instead:
527         // std::cerr<<"  switcheroo 2"<<std::endl;
528         switchBondDir(firstFromAtom2);
529         if (secondFromAtom2 && bondDirCounts[secondFromAtom2->getIdx()] >= 1) {
530           switchBondDir(secondFromAtom2);
531         }
532       }
533     }
534   }
535 
536   // something to watch out for here. For this molecule and traversal order:
537   //   0 1 2 3  4 5 6  7 8  <- atom numbers
538   //   C/C=C/C(/N=C/C)=C/C
539   //        ^  ^
540   //        |--|-- these two bonds must match in direction or the SMILES
541   //               is inconsistent (according to Daylight, Marvin does ok with
542   //               it)
543   // That means that the direction of the bond from atom 3->4 needs to be set
544   // when the bond from 2->3 is set.
545   // Issue2023: But only if 3->4 doesn't have a direction yet?
546   //
547   // I believe we only need to worry about this for the bonds from atom2.
548   const Atom *atom3 = firstFromAtom2->getOtherAtom(atom2);
549   if (atom3->getDegree() == 3) {
550     Bond *otherAtom3Bond = nullptr;
551     bool dblBondPresent = false;
552     atomBonds = mol.getAtomBonds(atom3);
553     while (atomBonds.first != atomBonds.second) {
554       Bond *tbond = mol[*atomBonds.first];
555       if (tbond->getBondType() == Bond::DOUBLE &&
556           tbond->getStereo() > Bond::STEREOANY) {
557         dblBondPresent = true;
558       } else if ((tbond->getBondType() == Bond::SINGLE) &&
559                  (tbond != firstFromAtom2)) {
560         otherAtom3Bond = tbond;
561       }
562       atomBonds.first++;
563     }
564     if (dblBondPresent && otherAtom3Bond &&
565         otherAtom3Bond->getBondDir() == Bond::NONE) {
566       // std::cerr<<"set!"<<std::endl;
567       otherAtom3Bond->setBondDir(firstFromAtom2->getBondDir());
568       bondDirCounts[otherAtom3Bond->getIdx()] += 1;
569       atomDirCounts[atom3->getIdx()] += 1;
570     }
571   }
572 }
573 
574 // finds cycles
dfsFindCycles(ROMol & mol,int atomIdx,int inBondIdx,std::vector<AtomColors> & colors,const UINT_VECT & ranks,UINT_VECT & atomOrders,VECT_INT_VECT & atomRingClosures,const boost::dynamic_bitset<> * bondsInPlay,const std::vector<std::string> * bondSymbols,bool doRandom)575 void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx,
576                    std::vector<AtomColors> &colors, const UINT_VECT &ranks,
577                    UINT_VECT &atomOrders, VECT_INT_VECT &atomRingClosures,
578                    const boost::dynamic_bitset<> *bondsInPlay,
579                    const std::vector<std::string> *bondSymbols, bool doRandom) {
580   Atom *atom = mol.getAtomWithIdx(atomIdx);
581   atomOrders.push_back(atomIdx);
582 
583   colors[atomIdx] = GREY_NODE;
584 
585   // ---------------------
586   //
587   //  Build the list of possible destinations from here
588   //
589   // ---------------------
590   std::vector<PossibleType> possibles;
591   possibles.resize(0);
592   ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom);
593   possibles.reserve(bondsPair.second - bondsPair.first);
594 
595   while (bondsPair.first != bondsPair.second) {
596     Bond *theBond = mol[*(bondsPair.first)];
597     bondsPair.first++;
598     if (bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) {
599       continue;
600     }
601     if (inBondIdx < 0 ||
602         theBond->getIdx() != static_cast<unsigned int>(inBondIdx)) {
603       int otherIdx = theBond->getOtherAtomIdx(atomIdx);
604       long rank = ranks[otherIdx];
605       // ---------------------
606       //
607       // things are a bit more complicated if we are sitting on a
608       // ring atom. we would like to traverse first to the
609       // ring-closure atoms, then to atoms outside the ring first,
610       // then to atoms in the ring that haven't already been visited
611       // (non-ring-closure atoms).
612       //
613       //  Here's how the black magic works:
614       //   - non-ring atom neighbors have their original ranks
615       //   - ring atom neighbors have this added to their ranks:
616       //       (MAX_BONDTYPE - bondOrder)*MAX_NATOMS*MAX_NATOMS
617       //   - ring-closure neighbors lose a factor of:
618       //       (MAX_BONDTYPE+1)*MAX_NATOMS*MAX_NATOMS
619       //
620       //  This tactic biases us to traverse to non-ring neighbors first,
621       //  original ordering if bond orders are all equal... crafty, neh?
622       //
623       // ---------------------
624       if (!doRandom) {
625         if (colors[otherIdx] == GREY_NODE) {
626           rank -= static_cast<int>(MAX_BONDTYPE + 1) * MAX_NATOMS * MAX_NATOMS;
627           if (!bondSymbols) {
628             rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
629                     MAX_NATOMS;
630           } else {
631             const std::string &symb = (*bondSymbols)[theBond->getIdx()];
632             std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end());
633             rank += (hsh % MAX_NATOMS) * MAX_NATOMS;
634           }
635         } else if (theBond->getOwningMol().getRingInfo()->numBondRings(
636                        theBond->getIdx())) {
637           if (!bondSymbols) {
638             rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
639                     MAX_NATOMS * MAX_NATOMS;
640           } else {
641             const std::string &symb = (*bondSymbols)[theBond->getIdx()];
642             std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end());
643             rank += (hsh % MAX_NATOMS) * MAX_NATOMS * MAX_NATOMS;
644           }
645         }
646       } else {
647         // randomize the rank
648         rank = getRandomGenerator()();
649       }
650       // std::cerr << "            " << atomIdx << ": " << otherIdx << " " <<
651       // rank
652       //           << std::endl;
653       // std::cerr<<"aIdx: "<< atomIdx <<"   p: "<<otherIdx<<" Rank:
654       // "<<ranks[otherIdx] <<" "<<colors[otherIdx]<<"
655       // "<<theBond->getBondType()<<" "<<rank<<std::endl;
656       possibles.emplace_back(rank, otherIdx, theBond);
657     }
658   }
659 
660   // ---------------------
661   //
662   //  Sort on ranks
663   //
664   // ---------------------
665   std::sort(possibles.begin(), possibles.end(), _possibleCompare());
666   // if (possibles.size())
667   //   std::cerr << " aIdx1: " << atomIdx
668   //             << " first: " << possibles.front().get<0>() << " "
669   //             << possibles.front().get<1>() << std::endl;
670   // // ---------------------
671   //
672   //  Now work the children
673   //
674   // ---------------------
675   for (auto &possible : possibles) {
676     int possibleIdx = possible.get<1>();
677     Bond *bond = possible.get<2>();
678     switch (colors[possibleIdx]) {
679       case WHITE_NODE:
680         // -----
681         // we haven't seen this node at all before, traverse
682         // -----
683         dfsFindCycles(mol, possibleIdx, bond->getIdx(), colors, ranks,
684                       atomOrders, atomRingClosures, bondsInPlay, bondSymbols,
685                       doRandom);
686         break;
687       case GREY_NODE:
688         // -----
689         // we've seen this, but haven't finished it (we're finishing a ring)
690         // -----
691         atomRingClosures[possibleIdx].push_back(bond->getIdx());
692         atomRingClosures[atomIdx].push_back(bond->getIdx());
693         break;
694       default:
695         // -----
696         // this node has been finished. don't do anything.
697         // -----
698         break;
699     }
700   }
701   colors[atomIdx] = BLACK_NODE;
702 }  // namespace Canon
703 
dfsBuildStack(ROMol & mol,int atomIdx,int inBondIdx,std::vector<AtomColors> & colors,VECT_INT_VECT & cycles,const UINT_VECT & ranks,UINT_VECT & cyclesAvailable,MolStack & molStack,UINT_VECT & atomOrders,UINT_VECT & bondVisitOrders,VECT_INT_VECT & atomRingClosures,std::vector<INT_LIST> & atomTraversalBondOrder,const boost::dynamic_bitset<> * bondsInPlay,const std::vector<std::string> * bondSymbols,bool doRandom)704 void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx,
705                    std::vector<AtomColors> &colors, VECT_INT_VECT &cycles,
706                    const UINT_VECT &ranks, UINT_VECT &cyclesAvailable,
707                    MolStack &molStack, UINT_VECT &atomOrders,
708                    UINT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures,
709                    std::vector<INT_LIST> &atomTraversalBondOrder,
710                    const boost::dynamic_bitset<> *bondsInPlay,
711                    const std::vector<std::string> *bondSymbols, bool doRandom) {
712 #if 0
713     std::cerr<<"traverse from atom: "<<atomIdx<<" via bond "<<inBondIdx<<" num cycles available: "
714              <<std::count(cyclesAvailable.begin(),cyclesAvailable.end(),1)<<std::endl;
715 #endif
716 
717   Atom *atom = mol.getAtomWithIdx(atomIdx);
718   INT_LIST directTravList, cycleEndList;
719   boost::dynamic_bitset<> seenFromHere(mol.getNumAtoms());
720 
721   seenFromHere.set(atomIdx);
722   molStack.push_back(MolStackElem(atom));
723   atomOrders[atom->getIdx()] = rdcast<int>(molStack.size());
724   colors[atomIdx] = GREY_NODE;
725 
726   INT_LIST travList;
727   if (inBondIdx >= 0) {
728     travList.push_back(inBondIdx);
729   }
730 
731   // ---------------------
732   //
733   //  Add any ring closures
734   //
735   // ---------------------
736   if (atomRingClosures[atomIdx].size()) {
737     std::vector<unsigned int> ringsClosed;
738     for (auto bIdx : atomRingClosures[atomIdx]) {
739       travList.push_back(bIdx);
740       Bond *bond = mol.getBondWithIdx(bIdx);
741       seenFromHere.set(bond->getOtherAtomIdx(atomIdx));
742       unsigned int ringIdx;
743       if (bond->getPropIfPresent(common_properties::_TraversalRingClosureBond,
744                                  ringIdx)) {
745         // this is end of the ring closure
746         // we can just pull the ring index from the bond itself:
747         molStack.push_back(MolStackElem(bond, atomIdx));
748         bondVisitOrders[bIdx] = molStack.size();
749         molStack.push_back(MolStackElem(ringIdx));
750         // don't make the ring digit immediately available again: we don't want
751         // to have the same
752         // ring digit opening and closing rings on an atom.
753         ringsClosed.push_back(ringIdx - 1);
754       } else {
755         // this is the beginning of the ring closure, we need to come up with a
756         // ring index:
757         auto cAIt =
758             std::find(cyclesAvailable.begin(), cyclesAvailable.end(), 1);
759         if (cAIt == cyclesAvailable.end()) {
760           throw ValueErrorException(
761               "Too many rings open at once. SMILES cannot be generated.");
762         }
763         unsigned int lowestRingIdx = cAIt - cyclesAvailable.begin();
764         cyclesAvailable[lowestRingIdx] = 0;
765         ++lowestRingIdx;
766         bond->setProp(common_properties::_TraversalRingClosureBond,
767                       lowestRingIdx);
768         molStack.push_back(MolStackElem(lowestRingIdx));
769       }
770     }
771     for (auto ringIdx : ringsClosed) {
772       cyclesAvailable[ringIdx] = 1;
773     }
774   }
775 
776   // ---------------------
777   //
778   //  Build the list of possible destinations from here
779   //
780   // ---------------------
781   std::vector<PossibleType> possibles;
782   possibles.resize(0);
783   ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom);
784   possibles.reserve(bondsPair.second - bondsPair.first);
785 
786   while (bondsPair.first != bondsPair.second) {
787     Bond *theBond = mol[*(bondsPair.first)];
788     bondsPair.first++;
789     if (bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) {
790       continue;
791     }
792     if (inBondIdx < 0 ||
793         theBond->getIdx() != static_cast<unsigned int>(inBondIdx)) {
794       int otherIdx = theBond->getOtherAtomIdx(atomIdx);
795       // ---------------------
796       //
797       // This time we skip the ring-closure atoms (we did them
798       // above); we want to traverse first to atoms outside the ring
799       // then to atoms in the ring that haven't already been visited
800       // (non-ring-closure atoms).
801       //
802       // otherwise it's the same ranking logic as above
803       //
804       // ---------------------
805       if (colors[otherIdx] != WHITE_NODE || seenFromHere[otherIdx]) {
806         // ring closure or finished atom... skip it.
807         continue;
808       }
809       unsigned long rank = ranks[otherIdx];
810       if (!doRandom) {
811         if (theBond->getOwningMol().getRingInfo()->numBondRings(
812                 theBond->getIdx())) {
813           if (!bondSymbols) {
814             rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
815                     MAX_NATOMS * MAX_NATOMS;
816           } else {
817             const std::string &symb = (*bondSymbols)[theBond->getIdx()];
818             std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end());
819             rank += (hsh % MAX_NATOMS) * MAX_NATOMS * MAX_NATOMS;
820           }
821         }
822       } else {
823         // randomize the rank
824         rank = getRandomGenerator()();
825       }
826 
827       possibles.emplace_back(rank, otherIdx, theBond);
828     }
829   }
830 
831   // ---------------------
832   //
833   //  Sort on ranks
834   //
835   // ---------------------
836   std::sort(possibles.begin(), possibles.end(), _possibleCompare());
837   // if (possibles.size())
838   //   std::cerr << " aIdx2: " << atomIdx
839   //             << " first: " << possibles.front().get<0>() << " "
840   //             << possibles.front().get<1>() << std::endl;
841 
842   // ---------------------
843   //
844   //  Now work the children
845   //
846   // ---------------------
847   for (auto possiblesIt = possibles.begin(); possiblesIt != possibles.end();
848        possiblesIt++) {
849     int possibleIdx = possiblesIt->get<1>();
850     if (colors[possibleIdx] != WHITE_NODE) {
851       // we're either done or it's a ring-closure, which we already processed...
852       // this test isn't strictly required, because we only added WHITE notes to
853       // the possibles list, but it seems logical to document it
854       continue;
855     }
856     Bond *bond = possiblesIt->get<2>();
857     Atom *otherAtom = mol.getAtomWithIdx(possibleIdx);
858     // ww might have some residual data from earlier calls, clean that up:
859     otherAtom->clearProp(common_properties::_TraversalBondIndexOrder);
860     travList.push_back(bond->getIdx());
861     if (possiblesIt + 1 != possibles.end()) {
862       // we're branching
863       molStack.push_back(
864           MolStackElem("(", rdcast<int>(possiblesIt - possibles.begin())));
865     }
866     molStack.push_back(MolStackElem(bond, atomIdx));
867     bondVisitOrders[bond->getIdx()] = molStack.size();
868     dfsBuildStack(mol, possibleIdx, bond->getIdx(), colors, cycles, ranks,
869                   cyclesAvailable, molStack, atomOrders, bondVisitOrders,
870                   atomRingClosures, atomTraversalBondOrder, bondsInPlay,
871                   bondSymbols, doRandom);
872     if (possiblesIt + 1 != possibles.end()) {
873       molStack.push_back(
874           MolStackElem(")", rdcast<int>(possiblesIt - possibles.begin())));
875     }
876   }
877 
878   atomTraversalBondOrder[atom->getIdx()] = travList;
879   colors[atomIdx] = BLACK_NODE;
880 }
881 
canonicalDFSTraversal(ROMol & mol,int atomIdx,int inBondIdx,std::vector<AtomColors> & colors,VECT_INT_VECT & cycles,const UINT_VECT & ranks,UINT_VECT & cyclesAvailable,MolStack & molStack,UINT_VECT & atomOrders,UINT_VECT & bondVisitOrders,VECT_INT_VECT & atomRingClosures,std::vector<INT_LIST> & atomTraversalBondOrder,const boost::dynamic_bitset<> * bondsInPlay,const std::vector<std::string> * bondSymbols,bool doRandom)882 void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx,
883                            std::vector<AtomColors> &colors,
884                            VECT_INT_VECT &cycles, const UINT_VECT &ranks,
885                            UINT_VECT &cyclesAvailable, MolStack &molStack,
886                            UINT_VECT &atomOrders, UINT_VECT &bondVisitOrders,
887                            VECT_INT_VECT &atomRingClosures,
888                            std::vector<INT_LIST> &atomTraversalBondOrder,
889                            const boost::dynamic_bitset<> *bondsInPlay,
890                            const std::vector<std::string> *bondSymbols,
891                            bool doRandom) {
892   PRECONDITION(colors.size() >= mol.getNumAtoms(), "vector too small");
893   PRECONDITION(ranks.size() >= mol.getNumAtoms(), "vector too small");
894   PRECONDITION(atomOrders.size() >= mol.getNumAtoms(), "vector too small");
895   PRECONDITION(bondVisitOrders.size() >= mol.getNumBonds(), "vector too small");
896   PRECONDITION(atomRingClosures.size() >= mol.getNumAtoms(),
897                "vector too small");
898   PRECONDITION(atomTraversalBondOrder.size() >= mol.getNumAtoms(),
899                "vector too small");
900   PRECONDITION(!bondsInPlay || bondsInPlay->size() >= mol.getNumBonds(),
901                "bondsInPlay too small");
902   PRECONDITION(!bondSymbols || bondSymbols->size() >= mol.getNumBonds(),
903                "bondSymbols too small");
904 
905   std::vector<AtomColors> tcolors;
906   tcolors.resize(colors.size());
907   std::copy(colors.begin(), colors.end(), tcolors.begin());
908   dfsFindCycles(mol, atomIdx, inBondIdx, tcolors, ranks, atomOrders,
909                 atomRingClosures, bondsInPlay, bondSymbols, doRandom);
910   dfsBuildStack(mol, atomIdx, inBondIdx, colors, cycles, ranks, cyclesAvailable,
911                 molStack, atomOrders, bondVisitOrders, atomRingClosures,
912                 atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom);
913 }
914 
canHaveDirection(const Bond * bond)915 bool canHaveDirection(const Bond *bond) {
916   PRECONDITION(bond, "bad bond");
917   Bond::BondType bondType = bond->getBondType();
918   return (bondType == Bond::SINGLE || bondType == Bond::AROMATIC);
919 }
920 
clearBondDirs(ROMol & mol,Bond * refBond,const Atom * fromAtom,UINT_VECT & bondDirCounts,UINT_VECT & atomDirCounts,const UINT_VECT & bondVisitOrders)921 void clearBondDirs(ROMol &mol, Bond *refBond, const Atom *fromAtom,
922                    UINT_VECT &bondDirCounts, UINT_VECT &atomDirCounts,
923                    const UINT_VECT &bondVisitOrders) {
924   RDUNUSED_PARAM(bondVisitOrders);
925   PRECONDITION(bondDirCounts.size() >= mol.getNumBonds(), "bad dirCount size");
926   PRECONDITION(refBond, "bad bond");
927   PRECONDITION(&refBond->getOwningMol() == &mol, "bad bond");
928   PRECONDITION(fromAtom, "bad atom");
929   PRECONDITION(&fromAtom->getOwningMol() == &mol, "bad bond");
930 
931 #if 0
932     std::copy(bondDirCounts.begin(),bondDirCounts.end(),std::ostream_iterator<int>(std::cerr,", "));
933     std::cerr<<"\n";
934     std::copy(atomDirCounts.begin(),atomDirCounts.end(),std::ostream_iterator<int>(std::cerr,", "));
935     std::cerr<<"\n";
936     std::cerr<<"cBD: bond: "<<refBond->getIdx()<<" atom: "<<fromAtom->getIdx()<<": ";
937 #endif
938   ROMol::OEDGE_ITER beg, end;
939   boost::tie(beg, end) = mol.getAtomBonds(fromAtom);
940   bool nbrPossible = false, adjusted = false;
941   while (beg != end) {
942     Bond *oBond = mol[*beg];
943     // std::cerr<<"  >>"<<oBond->getIdx()<<" "<<canHaveDirection(oBond)<<"
944     // "<<bondDirCounts[oBond->getIdx()]<<"-"<<bondDirCounts[refBond->getIdx()]<<"
945     // "<<atomDirCounts[oBond->getBeginAtomIdx()]<<"-"<<atomDirCounts[oBond->getEndAtomIdx()]<<std::endl;
946     if (oBond != refBond && canHaveDirection(oBond)) {
947       nbrPossible = true;
948       if ((bondDirCounts[oBond->getIdx()] >=
949            bondDirCounts[refBond->getIdx()]) &&
950           atomDirCounts[oBond->getBeginAtomIdx()] != 1 &&
951           atomDirCounts[oBond->getEndAtomIdx()] != 1) {
952         adjusted = true;
953         bondDirCounts[oBond->getIdx()] -= 1;
954         if (!bondDirCounts[oBond->getIdx()]) {
955           // no one is setting the direction here:
956           oBond->setBondDir(Bond::NONE);
957           atomDirCounts[oBond->getBeginAtomIdx()] -= 1;
958           atomDirCounts[oBond->getEndAtomIdx()] -= 1;
959           // std::cerr<<"ob:"<<oBond->getIdx()<<" ";
960         }
961       }
962     }
963     beg++;
964   }
965   if (nbrPossible && !adjusted &&
966       atomDirCounts[refBond->getBeginAtomIdx()] != 1 &&
967       atomDirCounts[refBond->getEndAtomIdx()] != 1) {
968     // we found a neighbor that could have directionality set,
969     // but it had a lower bondDirCount than us, so we must
970     // need to be adjusted:
971     bondDirCounts[refBond->getIdx()] -= 1;
972     if (!bondDirCounts[refBond->getIdx()]) {
973       refBond->setBondDir(Bond::NONE);
974       atomDirCounts[refBond->getBeginAtomIdx()] -= 1;
975       atomDirCounts[refBond->getEndAtomIdx()] -= 1;
976       // std::cerr<<"rb:"<<refBond->getIdx()<<" ";
977     }
978   }
979   // std::cerr<<std::endl;
980 }
981 
removeRedundantBondDirSpecs(ROMol & mol,MolStack & molStack,UINT_VECT & bondDirCounts,UINT_VECT & atomDirCounts,const UINT_VECT & bondVisitOrders)982 void removeRedundantBondDirSpecs(ROMol &mol, MolStack &molStack,
983                                  UINT_VECT &bondDirCounts,
984                                  UINT_VECT &atomDirCounts,
985                                  const UINT_VECT &bondVisitOrders) {
986   PRECONDITION(bondDirCounts.size() >= mol.getNumBonds(), "bad dirCount size");
987 #if 0
988     std::cerr<<"rRBDS: ";
989     mol.debugMol(std::cerr);
990     std::copy(bondDirCounts.begin(),bondDirCounts.end(),std::ostream_iterator<int>(std::cerr,", "));
991     std::cerr<<"\n";
992 #endif
993   // find bonds that have directions indicated that are redundant:
994   for (auto &msI : molStack) {
995     if (msI.type == MOL_STACK_BOND) {
996       Bond *tBond = msI.obj.bond;
997       const Atom *canonBeginAtom = mol.getAtomWithIdx(msI.number);
998       const Atom *canonEndAtom =
999           mol.getAtomWithIdx(tBond->getOtherAtomIdx(msI.number));
1000       if (canHaveDirection(tBond) && bondDirCounts[tBond->getIdx()] >= 1) {
1001         // start by finding the double bond that sets tBond's direction:
1002         const Atom *dblBondAtom = nullptr;
1003         ROMol::OEDGE_ITER beg, end;
1004         boost::tie(beg, end) = mol.getAtomBonds(canonBeginAtom);
1005         while (beg != end) {
1006           if (mol[*beg] != tBond && mol[*beg]->getBondType() == Bond::DOUBLE &&
1007               mol[*beg]->getStereo() > Bond::STEREOANY) {
1008             dblBondAtom =
1009                 canonBeginAtom;  // tBond->getOtherAtom(canonBeginAtom);
1010             break;
1011           }
1012           beg++;
1013         }
1014         if (dblBondAtom != nullptr) {
1015           clearBondDirs(mol, tBond, dblBondAtom, bondDirCounts, atomDirCounts,
1016                         bondVisitOrders);
1017         }
1018         dblBondAtom = nullptr;
1019         boost::tie(beg, end) = mol.getAtomBonds(canonEndAtom);
1020         while (beg != end) {
1021           if (mol[*beg] != tBond && mol[*beg]->getBondType() == Bond::DOUBLE &&
1022               mol[*beg]->getStereo() > Bond::STEREOANY) {
1023             dblBondAtom = canonEndAtom;  // tBond->getOtherAtom(canonEndAtom);
1024             break;
1025           }
1026           beg++;
1027         }
1028         if (dblBondAtom != nullptr) {
1029           clearBondDirs(mol, tBond, dblBondAtom, bondDirCounts, atomDirCounts,
1030                         bondVisitOrders);
1031         }
1032       } else if (tBond->getBondDir() != Bond::NONE) {
1033         // we aren't supposed to have a direction set, but we do:
1034         tBond->setBondDir(Bond::NONE);
1035       }
1036     }
1037   }
1038 }
1039 
canonicalizeFragment(ROMol & mol,int atomIdx,std::vector<AtomColors> & colors,const UINT_VECT & ranks,MolStack & molStack,const boost::dynamic_bitset<> * bondsInPlay,const std::vector<std::string> * bondSymbols,bool doIsomericSmiles,bool doRandom)1040 void canonicalizeFragment(ROMol &mol, int atomIdx,
1041                           std::vector<AtomColors> &colors,
1042                           const UINT_VECT &ranks, MolStack &molStack,
1043                           const boost::dynamic_bitset<> *bondsInPlay,
1044                           const std::vector<std::string> *bondSymbols,
1045                           bool doIsomericSmiles, bool doRandom) {
1046   PRECONDITION(colors.size() >= mol.getNumAtoms(), "vector too small");
1047   PRECONDITION(ranks.size() >= mol.getNumAtoms(), "vector too small");
1048   PRECONDITION(!bondsInPlay || bondsInPlay->size() >= mol.getNumBonds(),
1049                "bondsInPlay too small");
1050   PRECONDITION(!bondSymbols || bondSymbols->size() >= mol.getNumBonds(),
1051                "bondSymbols too small");
1052   unsigned int nAtoms = mol.getNumAtoms();
1053 
1054   UINT_VECT atomVisitOrders(nAtoms, 0);
1055   UINT_VECT bondVisitOrders(mol.getNumBonds(), 0);
1056   UINT_VECT bondDirCounts(mol.getNumBonds(), 0);
1057   UINT_VECT atomDirCounts(nAtoms, 0);
1058   UINT_VECT cyclesAvailable(MAX_CYCLES, 1);
1059   VECT_INT_VECT cycles(nAtoms);
1060 
1061   boost::dynamic_bitset<> ringStereoChemAdjusted(nAtoms);
1062 
1063   // make sure that we've done the stereo perception:
1064   if (!mol.hasProp(common_properties::_StereochemDone)) {
1065     MolOps::assignStereochemistry(mol, false);
1066   }
1067 
1068   // we need ring information; make sure findSSSR has been called before
1069   // if not call now
1070   if (!mol.getRingInfo()->isInitialized()) {
1071     MolOps::findSSSR(mol);
1072   }
1073   mol.getAtomWithIdx(atomIdx)->setProp(common_properties::_TraversalStartPoint,
1074                                        true);
1075 
1076   VECT_INT_VECT atomRingClosures(nAtoms);
1077   std::vector<INT_LIST> atomTraversalBondOrder(nAtoms);
1078   Canon::canonicalDFSTraversal(
1079       mol, atomIdx, -1, colors, cycles, ranks, cyclesAvailable, molStack,
1080       atomVisitOrders, bondVisitOrders, atomRingClosures,
1081       atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom);
1082 
1083   CHECK_INVARIANT(!molStack.empty(), "Empty stack.");
1084   CHECK_INVARIANT(molStack.begin()->type == MOL_STACK_ATOM,
1085                   "Corrupted stack. First element should be an atom.");
1086 
1087   // collect some information about traversal order on chiral atoms
1088   boost::dynamic_bitset<> numSwapsChiralAtoms(nAtoms);
1089   if (doIsomericSmiles) {
1090     for (const auto atom : mol.atoms()) {
1091       if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
1092         // check if all of this atom's bonds are in play
1093         for (const auto &bndItr :
1094              boost::make_iterator_range(mol.getAtomBonds(atom))) {
1095           if (bondsInPlay && !(*bondsInPlay)[mol[bndItr]->getIdx()]) {
1096             atom->setProp(common_properties::_brokenChirality, true);
1097             break;
1098           }
1099         }
1100         if (atom->hasProp(common_properties::_brokenChirality)) {
1101           continue;
1102         }
1103         const INT_LIST &trueOrder = atomTraversalBondOrder[atom->getIdx()];
1104 
1105         // Check if the atom can be chiral, and if chirality needs inversion
1106         if (trueOrder.size() >= 3) {
1107           int nSwaps;
1108           // We have to make sure that trueOrder contains all the
1109           // bonds, even if they won't be written to the SMARTS
1110           if (trueOrder.size() < atom->getDegree()) {
1111             INT_LIST tOrder = trueOrder;
1112             for (const auto &bndItr :
1113                  boost::make_iterator_range(mol.getAtomBonds(atom))) {
1114               int bndIdx = mol[bndItr]->getIdx();
1115               if (std::find(trueOrder.begin(), trueOrder.end(), bndIdx) ==
1116                   trueOrder.end()) {
1117                 tOrder.push_back(bndIdx);
1118                 break;
1119               }
1120             }
1121             nSwaps = atom->getPerturbationOrder(tOrder);
1122           } else {
1123             nSwaps = atom->getPerturbationOrder(trueOrder);
1124           }
1125           if (chiralAtomNeedsTagInversion(
1126                   mol, atom,
1127                   molStack.begin()->obj.atom->getIdx() == atom->getIdx(),
1128                   atomRingClosures[atom->getIdx()].size())) {
1129             // This is a special case. Here's an example:
1130             //   Our internal representation of a chiral center is equivalent
1131             //   to:
1132             //     [C@](F)(O)(C)[H]
1133             //   we'll be dumping it without the H, which entails a
1134             //   reordering:
1135             //     [C@@H](F)(O)C
1136             ++nSwaps;
1137           }
1138           if (nSwaps % 2) {
1139             numSwapsChiralAtoms.set(atom->getIdx());
1140           }
1141         }
1142       }
1143     }
1144   }
1145 
1146   // remove the current directions on single bonds around double bonds:
1147   for (auto bond : mol.bonds()) {
1148     Bond::BondDir dir = bond->getBondDir();
1149     if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) {
1150       bond->setBondDir(Bond::NONE);
1151     }
1152   }
1153 
1154 #if 0
1155     std::cerr<<"<11111111"<<std::endl;
1156 
1157     std::cerr<<"----------------------------------------->"<<std::endl;
1158     mol.debugMol(std::cerr);
1159 #endif
1160 
1161   // std::cerr<<"----->\ntraversal stack:"<<std::endl;
1162   // traverse the stack and canonicalize double bonds and atoms with (ring)
1163   // stereochemistry
1164   for (auto &msI : molStack) {
1165 #if 0
1166       if(msI->type == MOL_STACK_ATOM) std::cerr<<" atom: "<<msI->obj.atom->getIdx()<<std::endl;
1167       else if(msI->type == MOL_STACK_BOND) std::cerr<<" bond: "<<msI->obj.bond->getIdx()<<" "<<msI->number<<" "<<msI->obj.bond->getBeginAtomIdx()<<"-"<<msI->obj.bond->getEndAtomIdx()<<" order: "<<msI->obj.bond->getBondType()<<std::endl;
1168       else if(msI->type == MOL_STACK_RING) std::cerr<<" ring: "<<msI->number<<std::endl;
1169       else if(msI->type == MOL_STACK_BRANCH_OPEN) std::cerr<<" branch open"<<std::endl;
1170       else if(msI->type == MOL_STACK_BRANCH_CLOSE) std::cerr<<" branch close"<<std::endl;
1171 #endif
1172     if (msI.type == MOL_STACK_BOND &&
1173         msI.obj.bond->getBondType() == Bond::DOUBLE &&
1174         msI.obj.bond->getStereo() > Bond::STEREOANY) {
1175       if (msI.obj.bond->getStereoAtoms().size() >= 2) {
1176         Canon::canonicalizeDoubleBond(msI.obj.bond, bondVisitOrders,
1177                                       atomVisitOrders, bondDirCounts,
1178                                       atomDirCounts, molStack);
1179       } else {
1180         // bad stereo spec:
1181         msI.obj.bond->setStereo(Bond::STEREONONE);
1182       }
1183     }
1184     if (doIsomericSmiles) {
1185       if (msI.type == MOL_STACK_ATOM &&
1186           msI.obj.atom->getChiralTag() != Atom::CHI_UNSPECIFIED &&
1187           !msI.obj.atom->hasProp(common_properties::_brokenChirality)) {
1188         if (msI.obj.atom->hasProp(common_properties::_ringStereoAtoms)) {
1189           if (!ringStereoChemAdjusted[msI.obj.atom->getIdx()]) {
1190             msI.obj.atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
1191             ringStereoChemAdjusted.set(msI.obj.atom->getIdx());
1192           }
1193           const INT_VECT &ringStereoAtoms = msI.obj.atom->getProp<INT_VECT>(
1194               common_properties::_ringStereoAtoms);
1195           for (auto nbrV : ringStereoAtoms) {
1196             int nbrIdx = abs(nbrV) - 1;
1197             // Adjust the chirality flag of the ring stereo atoms according to
1198             // the first one
1199             if (!ringStereoChemAdjusted[nbrIdx] &&
1200                 atomVisitOrders[nbrIdx] >
1201                     atomVisitOrders[msI.obj.atom->getIdx()]) {
1202               mol.getAtomWithIdx(nbrIdx)->setChiralTag(
1203                   msI.obj.atom->getChiralTag());
1204               if (nbrV < 0) {
1205                 mol.getAtomWithIdx(nbrIdx)->invertChirality();
1206               }
1207               // Odd number of swaps for first chiral ring atom --> needs to be
1208               // swapped but we want to retain chirality
1209               if (numSwapsChiralAtoms[msI.obj.atom->getIdx()]) {
1210                 // Odd number of swaps for chiral ring neighbor --> needs to be
1211                 // swapped but we want to retain chirality
1212                 if (!numSwapsChiralAtoms[nbrIdx]) {
1213                   mol.getAtomWithIdx(nbrIdx)->invertChirality();
1214                 }
1215               }
1216               // Even number of swaps for first chiral ring atom --> don't need
1217               // to be swapped
1218               else {
1219                 // Odd number of swaps for chiral ring neighbor --> needs to be
1220                 // swapped
1221                 if (numSwapsChiralAtoms[nbrIdx]) {
1222                   mol.getAtomWithIdx(nbrIdx)->invertChirality();
1223                 }
1224               }
1225               ringStereoChemAdjusted.set(nbrIdx);
1226             }
1227           }
1228         } else {
1229           if (msI.obj.atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW) {
1230             if ((numSwapsChiralAtoms[msI.obj.atom->getIdx()])) {
1231               msI.obj.atom->invertChirality();
1232             }
1233           } else if (msI.obj.atom->getChiralTag() ==
1234                      Atom::CHI_TETRAHEDRAL_CCW) {
1235             if ((numSwapsChiralAtoms[msI.obj.atom->getIdx()])) {
1236               msI.obj.atom->invertChirality();
1237             }
1238           }
1239         }
1240       }
1241     }
1242   }
1243 #if 0
1244     std::cerr<<"<-----"<<std::endl;
1245 
1246     std::cerr<<"----------------------------------------->"<<std::endl;
1247     mol.debugMol(std::cerr);
1248 #endif
1249   Canon::removeRedundantBondDirSpecs(mol, molStack, bondDirCounts,
1250                                      atomDirCounts, bondVisitOrders);
1251 #if 0
1252     std::cerr<<"----------------------------------------->"<<std::endl;
1253     mol.debugMol(std::cerr);
1254     std::cerr<<"----------------------------------------->"<<std::endl;
1255 #endif
1256 }
1257 }  // namespace Canon
1258 }  // namespace RDKit
1259