1 //
2 // Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <GraphMol/RDKitBase.h>
11 #include <GraphMol/Canon.h>
12 #include <GraphMol/SmilesParse/SmilesParseOps.h>
13 #include <GraphMol/RDKitQueries.h>
14 #include <RDGeneral/Exceptions.h>
15 #include <RDGeneral/hash/hash.hpp>
16 #include <RDGeneral/utils.h>
17 #include <algorithm>
18
19 namespace RDKit {
20 namespace Canon {
21 namespace {
isUnsaturated(const Atom * atom,const ROMol & mol)22 bool isUnsaturated(const Atom *atom, const ROMol &mol) {
23 for (const auto &bndItr :
24 boost::make_iterator_range(mol.getAtomBonds(atom))) {
25 // can't just check for single bonds, because dative bonds also have an
26 // order of 1
27 if (mol[bndItr]->getBondTypeAsDouble() > 1) {
28 return true;
29 }
30 }
31 return false;
32 }
33
hasSingleHQuery(const Atom::QUERYATOM_QUERY * q)34 bool hasSingleHQuery(const Atom::QUERYATOM_QUERY *q) {
35 // list queries are series of nested ors of AtomAtomicNum queries
36 PRECONDITION(q, "bad query");
37 bool res = false;
38 std::string descr = q->getDescription();
39 if (descr == "AtomAnd") {
40 for (auto cIt = q->beginChildren(); cIt != q->endChildren(); ++cIt) {
41 std::string descr = (*cIt)->getDescription();
42 if (descr == "AtomHCount") {
43 if (!(*cIt)->getNegation() &&
44 ((ATOM_EQUALS_QUERY *)(*cIt).get())->getVal() == 1) {
45 return true;
46 }
47 return false;
48 } else if (descr == "AtomAnd") {
49 res = hasSingleHQuery((*cIt).get());
50 if (res) {
51 return true;
52 }
53 }
54 }
55 }
56 return res;
57 }
58
atomHasFourthValence(const Atom * atom)59 bool atomHasFourthValence(const Atom *atom) {
60 if (atom->getNumExplicitHs() == 1 || atom->getImplicitValence() == 1) {
61 return true;
62 }
63 if (atom->hasQuery()) {
64 // the SMARTS [C@@H] produces an atom with a H query, but we also
65 // need to treat this like an explicit H for chirality purposes
66 // This was Github #1489
67 return hasSingleHQuery(atom->getQuery());
68 }
69 return false;
70 }
71 } // end of anonymous namespace
72
chiralAtomNeedsTagInversion(const RDKit::ROMol & mol,const RDKit::Atom * atom,bool isAtomFirst,size_t numClosures)73 bool chiralAtomNeedsTagInversion(const RDKit::ROMol &mol,
74 const RDKit::Atom *atom, bool isAtomFirst,
75 size_t numClosures) {
76 PRECONDITION(atom, "bad atom");
77 return atom->getDegree() == 3 &&
78 ((isAtomFirst && atom->getNumExplicitHs() == 1) ||
79 (!atomHasFourthValence(atom) && numClosures == 1 &&
80 !isUnsaturated(atom, mol)));
81 }
82
83 struct _possibleCompare
84 : public std::binary_function<PossibleType, PossibleType, bool> {
operator ()RDKit::Canon::_possibleCompare85 bool operator()(const PossibleType &arg1, const PossibleType &arg2) const {
86 return (arg1.get<0>() < arg2.get<0>());
87 }
88 };
89
checkBondsInSameBranch(MolStack & molStack,Bond * dblBnd,Bond * dirBnd)90 bool checkBondsInSameBranch(MolStack &molStack, Bond *dblBnd, Bond *dirBnd) {
91 bool seenDblBond = false;
92 int branchCounter = 0;
93 for (const auto &item : molStack) {
94 switch (item.type) {
95 case MOL_STACK_BOND:
96 if (item.obj.bond == dirBnd || item.obj.bond == dblBnd) {
97 if (seenDblBond) {
98 return branchCounter == 0;
99 } else {
100 seenDblBond = true;
101 }
102 }
103 break;
104 case MOL_STACK_BRANCH_OPEN:
105 if (seenDblBond) {
106 ++branchCounter;
107 }
108 break;
109 case MOL_STACK_BRANCH_CLOSE:
110 if (seenDblBond) {
111 --branchCounter;
112 }
113 break;
114 default:
115 break;
116 }
117 }
118 // We should not ever hit this. But if we do, returning false
119 // causes the same behavior as before this patch.
120 return false;
121 }
122
switchBondDir(Bond * bond)123 void switchBondDir(Bond *bond) {
124 PRECONDITION(bond, "bad bond");
125 PRECONDITION(bond->getBondType() == Bond::SINGLE || bond->getIsAromatic(),
126 "bad bond type");
127 switch (bond->getBondDir()) {
128 case Bond::ENDUPRIGHT:
129 bond->setBondDir(Bond::ENDDOWNRIGHT);
130 break;
131 case Bond::ENDDOWNRIGHT:
132 bond->setBondDir(Bond::ENDUPRIGHT);
133 break;
134 default:
135 break;
136 }
137 }
138
139 // FIX: this may only be of interest from the SmilesWriter, should we
140 // move it there?
141 //
142 //
canonicalizeDoubleBond(Bond * dblBond,UINT_VECT & bondVisitOrders,UINT_VECT & atomVisitOrders,UINT_VECT & bondDirCounts,UINT_VECT & atomDirCounts,MolStack & molStack)143 void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders,
144 UINT_VECT &atomVisitOrders,
145 UINT_VECT &bondDirCounts, UINT_VECT &atomDirCounts,
146 MolStack &molStack) {
147 PRECONDITION(dblBond, "bad bond");
148 PRECONDITION(dblBond->getBondType() == Bond::DOUBLE, "bad bond order");
149 PRECONDITION(dblBond->getStereo() > Bond::STEREOANY, "bad bond stereo");
150 PRECONDITION(dblBond->getStereoAtoms().size() >= 2, "bad bond stereo atoms");
151 PRECONDITION(atomVisitOrders[dblBond->getBeginAtomIdx()] > 0 ||
152 atomVisitOrders[dblBond->getEndAtomIdx()] > 0,
153 "neither end atom traversed");
154
155 Atom *atom1 = dblBond->getBeginAtom();
156 Atom *atom2 = dblBond->getEndAtom();
157 // we only worry about double bonds that begin and end at atoms
158 // of degree 2 or 3:
159 if ((atom1->getDegree() != 2 && atom1->getDegree() != 3) ||
160 (atom2->getDegree() != 2 && atom2->getDegree() != 3)) {
161 return;
162 }
163 // ensure that atom1 is the lower numbered atom of the double bond (the one
164 // traversed first)
165 if (atomVisitOrders[dblBond->getBeginAtomIdx()] >=
166 atomVisitOrders[dblBond->getEndAtomIdx()]) {
167 std::swap(atom1, atom2);
168 }
169
170 Bond *firstFromAtom1 = nullptr, *secondFromAtom1 = nullptr;
171 Bond *firstFromAtom2 = nullptr, *secondFromAtom2 = nullptr;
172
173 ROMol &mol = dblBond->getOwningMol();
174 auto firstVisitOrder = mol.getNumBonds() + 1;
175
176 ROMol::OBOND_ITER_PAIR atomBonds;
177 // -------------------------------------------------------
178 // find the lowest visit order bonds from each end and determine
179 // if anything is already constraining our choice of directions:
180 bool dir1Set = false, dir2Set = false;
181 for (const auto &bndItr :
182 boost::make_iterator_range(mol.getAtomBonds(atom1))) {
183 auto bond = mol[bndItr];
184 if (bond != dblBond) {
185 auto bondIdx = bond->getIdx();
186 if (bondDirCounts[bondIdx] > 0) {
187 dir1Set = true;
188 }
189 if (!firstFromAtom1 || bondVisitOrders[bondIdx] < firstVisitOrder) {
190 if (firstFromAtom1) {
191 secondFromAtom1 = firstFromAtom1;
192 }
193 firstFromAtom1 = bond;
194 firstVisitOrder = bondVisitOrders[bondIdx];
195 } else {
196 secondFromAtom1 = bond;
197 }
198 }
199 }
200 firstVisitOrder = mol.getNumBonds() + 1;
201 for (const auto &bndItr :
202 boost::make_iterator_range(mol.getAtomBonds(atom2))) {
203 auto bond = mol[bndItr];
204 if (bond != dblBond) {
205 auto bondIdx = bond->getIdx();
206 if (bondDirCounts[bondIdx] > 0) {
207 dir2Set = true;
208 }
209 if (!firstFromAtom2 || bondVisitOrders[bondIdx] < firstVisitOrder) {
210 if (firstFromAtom2) {
211 secondFromAtom2 = firstFromAtom2;
212 }
213 firstFromAtom2 = bond;
214 firstVisitOrder = bondVisitOrders[bondIdx];
215 } else {
216 secondFromAtom2 = bond;
217 }
218 }
219 }
220
221 // make sure we found everything we need to find:
222 CHECK_INVARIANT(firstFromAtom1, "could not find atom1");
223 CHECK_INVARIANT(firstFromAtom2, "could not find atom2");
224 CHECK_INVARIANT(atom1->getDegree() == 2 || secondFromAtom1,
225 "inconsistency at atom1");
226 CHECK_INVARIANT(atom2->getDegree() == 2 || secondFromAtom2,
227 "inconsistency at atom2");
228
229 bool setFromBond1 = true;
230 Bond::BondDir atom1Dir = Bond::NONE;
231 Bond::BondDir atom2Dir = Bond::NONE;
232 Bond *atom1ControllingBond = firstFromAtom1;
233 Bond *atom2ControllingBond = firstFromAtom2;
234 if (!dir1Set && !dir2Set) {
235 // ----------------------------------
236 // nothing has touched our bonds so far, so set the
237 // directions to "arbitrary" values:
238
239 // the bond we came in on becomes ENDUPRIGHT:
240 atom1Dir = Bond::ENDUPRIGHT;
241 firstFromAtom1->setBondDir(atom1Dir);
242 bondDirCounts[firstFromAtom1->getIdx()] += 1;
243 atomDirCounts[atom1->getIdx()] += 1;
244 } else if (!dir2Set) {
245 // at least one of the bonds on atom1 has its directionality set already:
246 if (bondDirCounts[firstFromAtom1->getIdx()] > 0) {
247 // The first bond's direction has been set at some earlier point:
248 atom1Dir = firstFromAtom1->getBondDir();
249 bondDirCounts[firstFromAtom1->getIdx()] += 1;
250 atomDirCounts[atom1->getIdx()] += 1;
251 if (secondFromAtom1) {
252 // both bonds have their directionalities set, make sure
253 // they are compatible:
254 if (firstFromAtom1->getBondDir() == secondFromAtom1->getBondDir() &&
255 bondDirCounts[firstFromAtom2->getIdx()]) {
256 CHECK_INVARIANT(
257 ((firstFromAtom1->getBeginAtomIdx() == atom1->getIdx()) ^
258 (secondFromAtom1->getBeginAtomIdx() == atom1->getIdx())),
259 "inconsistent state");
260 }
261 }
262 } else {
263 // the second bond must be present and setting the direction:
264 CHECK_INVARIANT(secondFromAtom1, "inconsistent state");
265 CHECK_INVARIANT(bondDirCounts[secondFromAtom1->getIdx()] > 0,
266 "inconsistent state");
267 // It must be the second bond setting the direction.
268 // This happens when the bond dir is set in a branch:
269 // v- this double bond
270 // CC(/C=P/N)=N/O
271 // ^- the second bond sets the direction
272 // or when the first bond is a ring closure from an
273 // earlier traversed atom:
274 // v- this double bond
275 // NC1=NOC/C1=N\O
276 // ^- this closure ends up being the first bond,
277 // and it does not set the direction.
278 //
279 // This addresses parts of Issue 185 and sf.net Issue 1842174
280 //
281 atom1Dir = secondFromAtom1->getBondDir();
282
283 firstFromAtom1->setBondDir(atom1Dir);
284 bondDirCounts[firstFromAtom1->getIdx()] += 1;
285 atomDirCounts[atom1->getIdx()] += 2;
286 atom1ControllingBond = secondFromAtom1;
287 }
288 } else {
289 // dir2 has been set, and dir1 hasn't: we're dealing with a stereochem
290 // specification on a ring double bond:
291 setFromBond1 = false;
292 // at least one of the bonds on atom2 has its directionality set already:
293 if (bondDirCounts[firstFromAtom2->getIdx()] > 0) {
294 // The second bond's direction has been set at some earlier point:
295 atom2Dir = firstFromAtom2->getBondDir();
296 bondDirCounts[firstFromAtom2->getIdx()] += 1;
297 atomDirCounts[atom2->getIdx()] += 1;
298 if (secondFromAtom2) {
299 // both bonds have their directionalities set, make sure
300 // they are compatible:
301 if (firstFromAtom2->getBondDir() == secondFromAtom2->getBondDir() &&
302 bondDirCounts[firstFromAtom1->getIdx()]) {
303 CHECK_INVARIANT(
304 ((firstFromAtom2->getBeginAtomIdx() == atom2->getIdx()) ^
305 (secondFromAtom2->getBeginAtomIdx() == atom2->getIdx())),
306 "inconsistent state");
307 }
308 }
309 } else {
310 // the second bond must be present and setting the direction:
311 CHECK_INVARIANT(secondFromAtom2, "inconsistent state");
312 CHECK_INVARIANT(bondDirCounts[secondFromAtom2->getIdx()] > 0,
313 "inconsistent state");
314 // It must be the second bond setting the direction.
315 // This happens when the bond dir is set in a branch:
316 // v- this double bond
317 // CC(/C=P/N)=N/O
318 // ^- the second bond sets the direction
319 // or when the first bond is a ring closure from an
320 // earlier traversed atom:
321 // v- this double bond
322 // NC1=NOC/C1=N\O
323 // ^- this closure ends up being the first bond,
324 // and it does not set the direction.
325 //
326 // This addresses parts of Issue 185 and sf.net Issue 1842174
327 //
328 atom2Dir = secondFromAtom2->getBondDir();
329
330 firstFromAtom2->setBondDir(atom2Dir);
331 bondDirCounts[firstFromAtom2->getIdx()] += 1;
332 atomDirCounts[atom2->getIdx()] += 2;
333 atom2ControllingBond = secondFromAtom2;
334 }
335 // CHECK_INVARIANT(0,"ring stereochemistry not handled");
336 } // end of the ring stereochemistry if
337
338 // now set the directionality on the other side:
339 if (setFromBond1) {
340 if (dblBond->getStereo() == Bond::STEREOE ||
341 dblBond->getStereo() == Bond::STEREOTRANS) {
342 atom2Dir = atom1Dir;
343 } else if (dblBond->getStereo() == Bond::STEREOZ ||
344 dblBond->getStereo() == Bond::STEREOCIS) {
345 atom2Dir = (atom1Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
346 : Bond::ENDUPRIGHT;
347 }
348 CHECK_INVARIANT(atom2Dir != Bond::NONE, "stereo not set");
349
350 // If we're not looking at the bonds used to determine the
351 // stereochemistry, we need to flip the setting on the other bond:
352 const INT_VECT &stereoAtoms = dblBond->getStereoAtoms();
353
354 auto isClosingRingBond = [](Bond *bond) {
355 if (bond == nullptr) {
356 return false;
357 }
358 auto beginIdx = bond->getBeginAtomIdx();
359 auto endIdx = bond->getEndAtomIdx();
360 return beginIdx > endIdx &&
361 beginIdx - endIdx > 1 &&
362 bond->hasProp(common_properties::_TraversalRingClosureBond);
363 };
364
365 auto isFlipped = false;
366
367 if (atom1->getDegree() == 3 &&
368 std::find(stereoAtoms.begin(), stereoAtoms.end(),
369 static_cast<int>(atom1ControllingBond->getOtherAtomIdx(
370 atom1->getIdx()))) == stereoAtoms.end()) {
371 isFlipped = true;
372 atom2Dir = (atom2Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
373 : Bond::ENDUPRIGHT;
374 }
375 // std::cerr<<" 0 set bond 2: "<<firstFromAtom2->getIdx()<<"
376 // "<<atom2Dir<<std::endl;
377 if (atom2->getDegree() == 3 &&
378 std::find(stereoAtoms.begin(), stereoAtoms.end(),
379 static_cast<int>(firstFromAtom2->getOtherAtomIdx(
380 atom2->getIdx()))) == stereoAtoms.end()) {
381 isFlipped = true;
382 atom2Dir = (atom2Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
383 : Bond::ENDUPRIGHT;
384 }
385
386 if (!isFlipped &&
387 (isClosingRingBond(dblBond) ||
388 (isClosingRingBond(secondFromAtom1) &&
389 !secondFromAtom1->getIsAromatic() &&
390 secondFromAtom1->getBondDir() != Bond::NONE))
391 ) {
392 atom2Dir = (atom2Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
393 : Bond::ENDUPRIGHT;
394 }
395 // std::cerr<<" 1 set bond 2: "<<firstFromAtom2->getIdx()<<"
396 // "<<atom2Dir<<std::endl;
397 firstFromAtom2->setBondDir(atom2Dir);
398
399 bondDirCounts[firstFromAtom2->getIdx()] += 1;
400 atomDirCounts[atom2->getIdx()] += 1;
401 } else {
402 // we come before a ring closure:
403 if (dblBond->getStereo() == Bond::STEREOZ ||
404 dblBond->getStereo() == Bond::STEREOCIS) {
405 atom1Dir = atom2Dir;
406 } else if (dblBond->getStereo() == Bond::STEREOE ||
407 dblBond->getStereo() == Bond::STEREOTRANS) {
408 atom1Dir = (atom2Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
409 : Bond::ENDUPRIGHT;
410 }
411 CHECK_INVARIANT(atom1Dir != Bond::NONE, "stereo not set");
412 // If we're not looking at the bonds used to determine the
413 // stereochemistry, we need to flip the setting on the other bond:
414 const INT_VECT &stereoAtoms = dblBond->getStereoAtoms();
415 if (atom2->getDegree() == 3 &&
416 std::find(stereoAtoms.begin(), stereoAtoms.end(),
417 static_cast<int>(atom2ControllingBond->getOtherAtomIdx(
418 atom2->getIdx()))) == stereoAtoms.end()) {
419 // std::cerr<<"flip 1"<<std::endl;
420 atom1Dir = (atom1Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
421 : Bond::ENDUPRIGHT;
422 }
423 if (atom1->getDegree() == 3 &&
424 std::find(stereoAtoms.begin(), stereoAtoms.end(),
425 static_cast<int>(firstFromAtom1->getOtherAtomIdx(
426 atom1->getIdx()))) == stereoAtoms.end()) {
427 // std::cerr<<"flip 2"<<std::endl;
428 atom1Dir = (atom1Dir == Bond::ENDUPRIGHT) ? Bond::ENDDOWNRIGHT
429 : Bond::ENDUPRIGHT;
430 }
431
432 firstFromAtom1->setBondDir(atom1Dir);
433 switchBondDir(firstFromAtom1);
434 bondDirCounts[firstFromAtom1->getIdx()] += 1;
435 atomDirCounts[atom1->getIdx()] += 1;
436 }
437
438 // -----------------------------------
439 //
440 // Check if there are other bonds from atoms 1 and 2 that need
441 // to have their directionalities set:
442 ///
443 if (atom1->getDegree() == 3) {
444 if (!bondDirCounts[secondFromAtom1->getIdx()]) {
445 // This bond (the second bond from the starting atom of the double bond)
446 // is a special case. It's going to appear in a branch in the smiles:
447 // X\C(\Y)=C/Z
448 // ^
449 // |- here
450 // so it actually needs to go down with the *same* direction as the
451 // bond that's already been set (because "pulling the bond out of the
452 // branch" reverses its direction).
453 // A quick example. This SMILES:
454 // F/C(\Cl)=C/F
455 // is *wrong*. This is the correct form:
456 // F/C(/Cl)=C/F
457 // So, since we want this bond to have the opposite direction to the
458 // other one, we put it in with the same direction.
459 // This was Issue 183
460
461 // UNLESS the bond is not in a branch (in the smiles) (e.g. firstFromAtom1
462 // branches off a cycle, and secondFromAtom1 shows up at the end of the
463 // cycle). This was Github Issue #2023, see it for an example.
464 if (checkBondsInSameBranch(molStack, dblBond, secondFromAtom1)) {
465 auto otherDir = (firstFromAtom1->getBondDir() == Bond::ENDUPRIGHT)
466 ? Bond::ENDDOWNRIGHT
467 : Bond::ENDUPRIGHT;
468 secondFromAtom1->setBondDir(otherDir);
469 } else {
470 secondFromAtom1->setBondDir(firstFromAtom1->getBondDir());
471 }
472 }
473 bondDirCounts[secondFromAtom1->getIdx()] += 1;
474 atomDirCounts[atom1->getIdx()] += 1;
475 }
476
477 if (atom2->getDegree() == 3) {
478 if (!bondDirCounts[secondFromAtom2->getIdx()]) {
479 // Here we set the bond direction to be opposite the other one (since
480 // both come after the atom connected to the double bond).
481 Bond::BondDir otherDir;
482 if (!secondFromAtom2->hasProp(
483 common_properties::_TraversalRingClosureBond)) {
484 otherDir = (firstFromAtom2->getBondDir() == Bond::ENDUPRIGHT)
485 ? Bond::ENDDOWNRIGHT
486 : Bond::ENDUPRIGHT;
487 } else {
488 // another one those irritating little reversal things due to
489 // ring closures
490 otherDir = firstFromAtom2->getBondDir();
491 }
492 secondFromAtom2->setBondDir(otherDir);
493 }
494 bondDirCounts[secondFromAtom2->getIdx()] += 1;
495 atomDirCounts[atom2->getIdx()] += 1;
496 // std::cerr<<" other: "<<secondFromAtom2->getIdx()<<"
497 // "<<otherDir<<std::endl;
498 }
499
500 if (setFromBond1) {
501 // This is an odd case... The bonds off the beginning atom are
502 // after the start atom in the traversal stack. These need to
503 // have their directions reversed. An example SMILES (unlikely
504 // to actually traverse this way is:
505 // C(=C/O)/F or C(/F)=C/O
506 // That bond is Z, without the reversal, this would come out:
507 // C(=C/O)\F or C(\F)=C/O
508 // which is E.
509 //
510 // In the case of three-coordinate atoms, we don't need to flip
511 // the second bond because the Issue 183 fix (above) already got
512 // that one.
513 //
514 // This was Issue 191 and continued into sf.net issue 1842174
515 if (bondVisitOrders[atom1ControllingBond->getIdx()] >
516 atomVisitOrders[atom1->getIdx()]) {
517 if (bondDirCounts[atom1ControllingBond->getIdx()] == 1) {
518 if (!atom1ControllingBond->hasProp(
519 common_properties::_TraversalRingClosureBond)) {
520 // std::cerr<<" switcheroo 1"<<std::endl;
521 switchBondDir(atom1ControllingBond);
522 }
523 } else if (bondDirCounts[firstFromAtom2->getIdx()] == 1) {
524 // the controlling bond at atom1 is being set by someone else, flip the
525 // direction
526 // on the atom2 bond instead:
527 // std::cerr<<" switcheroo 2"<<std::endl;
528 switchBondDir(firstFromAtom2);
529 if (secondFromAtom2 && bondDirCounts[secondFromAtom2->getIdx()] >= 1) {
530 switchBondDir(secondFromAtom2);
531 }
532 }
533 }
534 }
535
536 // something to watch out for here. For this molecule and traversal order:
537 // 0 1 2 3 4 5 6 7 8 <- atom numbers
538 // C/C=C/C(/N=C/C)=C/C
539 // ^ ^
540 // |--|-- these two bonds must match in direction or the SMILES
541 // is inconsistent (according to Daylight, Marvin does ok with
542 // it)
543 // That means that the direction of the bond from atom 3->4 needs to be set
544 // when the bond from 2->3 is set.
545 // Issue2023: But only if 3->4 doesn't have a direction yet?
546 //
547 // I believe we only need to worry about this for the bonds from atom2.
548 const Atom *atom3 = firstFromAtom2->getOtherAtom(atom2);
549 if (atom3->getDegree() == 3) {
550 Bond *otherAtom3Bond = nullptr;
551 bool dblBondPresent = false;
552 atomBonds = mol.getAtomBonds(atom3);
553 while (atomBonds.first != atomBonds.second) {
554 Bond *tbond = mol[*atomBonds.first];
555 if (tbond->getBondType() == Bond::DOUBLE &&
556 tbond->getStereo() > Bond::STEREOANY) {
557 dblBondPresent = true;
558 } else if ((tbond->getBondType() == Bond::SINGLE) &&
559 (tbond != firstFromAtom2)) {
560 otherAtom3Bond = tbond;
561 }
562 atomBonds.first++;
563 }
564 if (dblBondPresent && otherAtom3Bond &&
565 otherAtom3Bond->getBondDir() == Bond::NONE) {
566 // std::cerr<<"set!"<<std::endl;
567 otherAtom3Bond->setBondDir(firstFromAtom2->getBondDir());
568 bondDirCounts[otherAtom3Bond->getIdx()] += 1;
569 atomDirCounts[atom3->getIdx()] += 1;
570 }
571 }
572 }
573
574 // finds cycles
dfsFindCycles(ROMol & mol,int atomIdx,int inBondIdx,std::vector<AtomColors> & colors,const UINT_VECT & ranks,UINT_VECT & atomOrders,VECT_INT_VECT & atomRingClosures,const boost::dynamic_bitset<> * bondsInPlay,const std::vector<std::string> * bondSymbols,bool doRandom)575 void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx,
576 std::vector<AtomColors> &colors, const UINT_VECT &ranks,
577 UINT_VECT &atomOrders, VECT_INT_VECT &atomRingClosures,
578 const boost::dynamic_bitset<> *bondsInPlay,
579 const std::vector<std::string> *bondSymbols, bool doRandom) {
580 Atom *atom = mol.getAtomWithIdx(atomIdx);
581 atomOrders.push_back(atomIdx);
582
583 colors[atomIdx] = GREY_NODE;
584
585 // ---------------------
586 //
587 // Build the list of possible destinations from here
588 //
589 // ---------------------
590 std::vector<PossibleType> possibles;
591 possibles.resize(0);
592 ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom);
593 possibles.reserve(bondsPair.second - bondsPair.first);
594
595 while (bondsPair.first != bondsPair.second) {
596 Bond *theBond = mol[*(bondsPair.first)];
597 bondsPair.first++;
598 if (bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) {
599 continue;
600 }
601 if (inBondIdx < 0 ||
602 theBond->getIdx() != static_cast<unsigned int>(inBondIdx)) {
603 int otherIdx = theBond->getOtherAtomIdx(atomIdx);
604 long rank = ranks[otherIdx];
605 // ---------------------
606 //
607 // things are a bit more complicated if we are sitting on a
608 // ring atom. we would like to traverse first to the
609 // ring-closure atoms, then to atoms outside the ring first,
610 // then to atoms in the ring that haven't already been visited
611 // (non-ring-closure atoms).
612 //
613 // Here's how the black magic works:
614 // - non-ring atom neighbors have their original ranks
615 // - ring atom neighbors have this added to their ranks:
616 // (MAX_BONDTYPE - bondOrder)*MAX_NATOMS*MAX_NATOMS
617 // - ring-closure neighbors lose a factor of:
618 // (MAX_BONDTYPE+1)*MAX_NATOMS*MAX_NATOMS
619 //
620 // This tactic biases us to traverse to non-ring neighbors first,
621 // original ordering if bond orders are all equal... crafty, neh?
622 //
623 // ---------------------
624 if (!doRandom) {
625 if (colors[otherIdx] == GREY_NODE) {
626 rank -= static_cast<int>(MAX_BONDTYPE + 1) * MAX_NATOMS * MAX_NATOMS;
627 if (!bondSymbols) {
628 rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
629 MAX_NATOMS;
630 } else {
631 const std::string &symb = (*bondSymbols)[theBond->getIdx()];
632 std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end());
633 rank += (hsh % MAX_NATOMS) * MAX_NATOMS;
634 }
635 } else if (theBond->getOwningMol().getRingInfo()->numBondRings(
636 theBond->getIdx())) {
637 if (!bondSymbols) {
638 rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
639 MAX_NATOMS * MAX_NATOMS;
640 } else {
641 const std::string &symb = (*bondSymbols)[theBond->getIdx()];
642 std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end());
643 rank += (hsh % MAX_NATOMS) * MAX_NATOMS * MAX_NATOMS;
644 }
645 }
646 } else {
647 // randomize the rank
648 rank = getRandomGenerator()();
649 }
650 // std::cerr << " " << atomIdx << ": " << otherIdx << " " <<
651 // rank
652 // << std::endl;
653 // std::cerr<<"aIdx: "<< atomIdx <<" p: "<<otherIdx<<" Rank:
654 // "<<ranks[otherIdx] <<" "<<colors[otherIdx]<<"
655 // "<<theBond->getBondType()<<" "<<rank<<std::endl;
656 possibles.emplace_back(rank, otherIdx, theBond);
657 }
658 }
659
660 // ---------------------
661 //
662 // Sort on ranks
663 //
664 // ---------------------
665 std::sort(possibles.begin(), possibles.end(), _possibleCompare());
666 // if (possibles.size())
667 // std::cerr << " aIdx1: " << atomIdx
668 // << " first: " << possibles.front().get<0>() << " "
669 // << possibles.front().get<1>() << std::endl;
670 // // ---------------------
671 //
672 // Now work the children
673 //
674 // ---------------------
675 for (auto &possible : possibles) {
676 int possibleIdx = possible.get<1>();
677 Bond *bond = possible.get<2>();
678 switch (colors[possibleIdx]) {
679 case WHITE_NODE:
680 // -----
681 // we haven't seen this node at all before, traverse
682 // -----
683 dfsFindCycles(mol, possibleIdx, bond->getIdx(), colors, ranks,
684 atomOrders, atomRingClosures, bondsInPlay, bondSymbols,
685 doRandom);
686 break;
687 case GREY_NODE:
688 // -----
689 // we've seen this, but haven't finished it (we're finishing a ring)
690 // -----
691 atomRingClosures[possibleIdx].push_back(bond->getIdx());
692 atomRingClosures[atomIdx].push_back(bond->getIdx());
693 break;
694 default:
695 // -----
696 // this node has been finished. don't do anything.
697 // -----
698 break;
699 }
700 }
701 colors[atomIdx] = BLACK_NODE;
702 } // namespace Canon
703
dfsBuildStack(ROMol & mol,int atomIdx,int inBondIdx,std::vector<AtomColors> & colors,VECT_INT_VECT & cycles,const UINT_VECT & ranks,UINT_VECT & cyclesAvailable,MolStack & molStack,UINT_VECT & atomOrders,UINT_VECT & bondVisitOrders,VECT_INT_VECT & atomRingClosures,std::vector<INT_LIST> & atomTraversalBondOrder,const boost::dynamic_bitset<> * bondsInPlay,const std::vector<std::string> * bondSymbols,bool doRandom)704 void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx,
705 std::vector<AtomColors> &colors, VECT_INT_VECT &cycles,
706 const UINT_VECT &ranks, UINT_VECT &cyclesAvailable,
707 MolStack &molStack, UINT_VECT &atomOrders,
708 UINT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures,
709 std::vector<INT_LIST> &atomTraversalBondOrder,
710 const boost::dynamic_bitset<> *bondsInPlay,
711 const std::vector<std::string> *bondSymbols, bool doRandom) {
712 #if 0
713 std::cerr<<"traverse from atom: "<<atomIdx<<" via bond "<<inBondIdx<<" num cycles available: "
714 <<std::count(cyclesAvailable.begin(),cyclesAvailable.end(),1)<<std::endl;
715 #endif
716
717 Atom *atom = mol.getAtomWithIdx(atomIdx);
718 INT_LIST directTravList, cycleEndList;
719 boost::dynamic_bitset<> seenFromHere(mol.getNumAtoms());
720
721 seenFromHere.set(atomIdx);
722 molStack.push_back(MolStackElem(atom));
723 atomOrders[atom->getIdx()] = rdcast<int>(molStack.size());
724 colors[atomIdx] = GREY_NODE;
725
726 INT_LIST travList;
727 if (inBondIdx >= 0) {
728 travList.push_back(inBondIdx);
729 }
730
731 // ---------------------
732 //
733 // Add any ring closures
734 //
735 // ---------------------
736 if (atomRingClosures[atomIdx].size()) {
737 std::vector<unsigned int> ringsClosed;
738 for (auto bIdx : atomRingClosures[atomIdx]) {
739 travList.push_back(bIdx);
740 Bond *bond = mol.getBondWithIdx(bIdx);
741 seenFromHere.set(bond->getOtherAtomIdx(atomIdx));
742 unsigned int ringIdx;
743 if (bond->getPropIfPresent(common_properties::_TraversalRingClosureBond,
744 ringIdx)) {
745 // this is end of the ring closure
746 // we can just pull the ring index from the bond itself:
747 molStack.push_back(MolStackElem(bond, atomIdx));
748 bondVisitOrders[bIdx] = molStack.size();
749 molStack.push_back(MolStackElem(ringIdx));
750 // don't make the ring digit immediately available again: we don't want
751 // to have the same
752 // ring digit opening and closing rings on an atom.
753 ringsClosed.push_back(ringIdx - 1);
754 } else {
755 // this is the beginning of the ring closure, we need to come up with a
756 // ring index:
757 auto cAIt =
758 std::find(cyclesAvailable.begin(), cyclesAvailable.end(), 1);
759 if (cAIt == cyclesAvailable.end()) {
760 throw ValueErrorException(
761 "Too many rings open at once. SMILES cannot be generated.");
762 }
763 unsigned int lowestRingIdx = cAIt - cyclesAvailable.begin();
764 cyclesAvailable[lowestRingIdx] = 0;
765 ++lowestRingIdx;
766 bond->setProp(common_properties::_TraversalRingClosureBond,
767 lowestRingIdx);
768 molStack.push_back(MolStackElem(lowestRingIdx));
769 }
770 }
771 for (auto ringIdx : ringsClosed) {
772 cyclesAvailable[ringIdx] = 1;
773 }
774 }
775
776 // ---------------------
777 //
778 // Build the list of possible destinations from here
779 //
780 // ---------------------
781 std::vector<PossibleType> possibles;
782 possibles.resize(0);
783 ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom);
784 possibles.reserve(bondsPair.second - bondsPair.first);
785
786 while (bondsPair.first != bondsPair.second) {
787 Bond *theBond = mol[*(bondsPair.first)];
788 bondsPair.first++;
789 if (bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) {
790 continue;
791 }
792 if (inBondIdx < 0 ||
793 theBond->getIdx() != static_cast<unsigned int>(inBondIdx)) {
794 int otherIdx = theBond->getOtherAtomIdx(atomIdx);
795 // ---------------------
796 //
797 // This time we skip the ring-closure atoms (we did them
798 // above); we want to traverse first to atoms outside the ring
799 // then to atoms in the ring that haven't already been visited
800 // (non-ring-closure atoms).
801 //
802 // otherwise it's the same ranking logic as above
803 //
804 // ---------------------
805 if (colors[otherIdx] != WHITE_NODE || seenFromHere[otherIdx]) {
806 // ring closure or finished atom... skip it.
807 continue;
808 }
809 unsigned long rank = ranks[otherIdx];
810 if (!doRandom) {
811 if (theBond->getOwningMol().getRingInfo()->numBondRings(
812 theBond->getIdx())) {
813 if (!bondSymbols) {
814 rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
815 MAX_NATOMS * MAX_NATOMS;
816 } else {
817 const std::string &symb = (*bondSymbols)[theBond->getIdx()];
818 std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end());
819 rank += (hsh % MAX_NATOMS) * MAX_NATOMS * MAX_NATOMS;
820 }
821 }
822 } else {
823 // randomize the rank
824 rank = getRandomGenerator()();
825 }
826
827 possibles.emplace_back(rank, otherIdx, theBond);
828 }
829 }
830
831 // ---------------------
832 //
833 // Sort on ranks
834 //
835 // ---------------------
836 std::sort(possibles.begin(), possibles.end(), _possibleCompare());
837 // if (possibles.size())
838 // std::cerr << " aIdx2: " << atomIdx
839 // << " first: " << possibles.front().get<0>() << " "
840 // << possibles.front().get<1>() << std::endl;
841
842 // ---------------------
843 //
844 // Now work the children
845 //
846 // ---------------------
847 for (auto possiblesIt = possibles.begin(); possiblesIt != possibles.end();
848 possiblesIt++) {
849 int possibleIdx = possiblesIt->get<1>();
850 if (colors[possibleIdx] != WHITE_NODE) {
851 // we're either done or it's a ring-closure, which we already processed...
852 // this test isn't strictly required, because we only added WHITE notes to
853 // the possibles list, but it seems logical to document it
854 continue;
855 }
856 Bond *bond = possiblesIt->get<2>();
857 Atom *otherAtom = mol.getAtomWithIdx(possibleIdx);
858 // ww might have some residual data from earlier calls, clean that up:
859 otherAtom->clearProp(common_properties::_TraversalBondIndexOrder);
860 travList.push_back(bond->getIdx());
861 if (possiblesIt + 1 != possibles.end()) {
862 // we're branching
863 molStack.push_back(
864 MolStackElem("(", rdcast<int>(possiblesIt - possibles.begin())));
865 }
866 molStack.push_back(MolStackElem(bond, atomIdx));
867 bondVisitOrders[bond->getIdx()] = molStack.size();
868 dfsBuildStack(mol, possibleIdx, bond->getIdx(), colors, cycles, ranks,
869 cyclesAvailable, molStack, atomOrders, bondVisitOrders,
870 atomRingClosures, atomTraversalBondOrder, bondsInPlay,
871 bondSymbols, doRandom);
872 if (possiblesIt + 1 != possibles.end()) {
873 molStack.push_back(
874 MolStackElem(")", rdcast<int>(possiblesIt - possibles.begin())));
875 }
876 }
877
878 atomTraversalBondOrder[atom->getIdx()] = travList;
879 colors[atomIdx] = BLACK_NODE;
880 }
881
canonicalDFSTraversal(ROMol & mol,int atomIdx,int inBondIdx,std::vector<AtomColors> & colors,VECT_INT_VECT & cycles,const UINT_VECT & ranks,UINT_VECT & cyclesAvailable,MolStack & molStack,UINT_VECT & atomOrders,UINT_VECT & bondVisitOrders,VECT_INT_VECT & atomRingClosures,std::vector<INT_LIST> & atomTraversalBondOrder,const boost::dynamic_bitset<> * bondsInPlay,const std::vector<std::string> * bondSymbols,bool doRandom)882 void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx,
883 std::vector<AtomColors> &colors,
884 VECT_INT_VECT &cycles, const UINT_VECT &ranks,
885 UINT_VECT &cyclesAvailable, MolStack &molStack,
886 UINT_VECT &atomOrders, UINT_VECT &bondVisitOrders,
887 VECT_INT_VECT &atomRingClosures,
888 std::vector<INT_LIST> &atomTraversalBondOrder,
889 const boost::dynamic_bitset<> *bondsInPlay,
890 const std::vector<std::string> *bondSymbols,
891 bool doRandom) {
892 PRECONDITION(colors.size() >= mol.getNumAtoms(), "vector too small");
893 PRECONDITION(ranks.size() >= mol.getNumAtoms(), "vector too small");
894 PRECONDITION(atomOrders.size() >= mol.getNumAtoms(), "vector too small");
895 PRECONDITION(bondVisitOrders.size() >= mol.getNumBonds(), "vector too small");
896 PRECONDITION(atomRingClosures.size() >= mol.getNumAtoms(),
897 "vector too small");
898 PRECONDITION(atomTraversalBondOrder.size() >= mol.getNumAtoms(),
899 "vector too small");
900 PRECONDITION(!bondsInPlay || bondsInPlay->size() >= mol.getNumBonds(),
901 "bondsInPlay too small");
902 PRECONDITION(!bondSymbols || bondSymbols->size() >= mol.getNumBonds(),
903 "bondSymbols too small");
904
905 std::vector<AtomColors> tcolors;
906 tcolors.resize(colors.size());
907 std::copy(colors.begin(), colors.end(), tcolors.begin());
908 dfsFindCycles(mol, atomIdx, inBondIdx, tcolors, ranks, atomOrders,
909 atomRingClosures, bondsInPlay, bondSymbols, doRandom);
910 dfsBuildStack(mol, atomIdx, inBondIdx, colors, cycles, ranks, cyclesAvailable,
911 molStack, atomOrders, bondVisitOrders, atomRingClosures,
912 atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom);
913 }
914
canHaveDirection(const Bond * bond)915 bool canHaveDirection(const Bond *bond) {
916 PRECONDITION(bond, "bad bond");
917 Bond::BondType bondType = bond->getBondType();
918 return (bondType == Bond::SINGLE || bondType == Bond::AROMATIC);
919 }
920
clearBondDirs(ROMol & mol,Bond * refBond,const Atom * fromAtom,UINT_VECT & bondDirCounts,UINT_VECT & atomDirCounts,const UINT_VECT & bondVisitOrders)921 void clearBondDirs(ROMol &mol, Bond *refBond, const Atom *fromAtom,
922 UINT_VECT &bondDirCounts, UINT_VECT &atomDirCounts,
923 const UINT_VECT &bondVisitOrders) {
924 RDUNUSED_PARAM(bondVisitOrders);
925 PRECONDITION(bondDirCounts.size() >= mol.getNumBonds(), "bad dirCount size");
926 PRECONDITION(refBond, "bad bond");
927 PRECONDITION(&refBond->getOwningMol() == &mol, "bad bond");
928 PRECONDITION(fromAtom, "bad atom");
929 PRECONDITION(&fromAtom->getOwningMol() == &mol, "bad bond");
930
931 #if 0
932 std::copy(bondDirCounts.begin(),bondDirCounts.end(),std::ostream_iterator<int>(std::cerr,", "));
933 std::cerr<<"\n";
934 std::copy(atomDirCounts.begin(),atomDirCounts.end(),std::ostream_iterator<int>(std::cerr,", "));
935 std::cerr<<"\n";
936 std::cerr<<"cBD: bond: "<<refBond->getIdx()<<" atom: "<<fromAtom->getIdx()<<": ";
937 #endif
938 ROMol::OEDGE_ITER beg, end;
939 boost::tie(beg, end) = mol.getAtomBonds(fromAtom);
940 bool nbrPossible = false, adjusted = false;
941 while (beg != end) {
942 Bond *oBond = mol[*beg];
943 // std::cerr<<" >>"<<oBond->getIdx()<<" "<<canHaveDirection(oBond)<<"
944 // "<<bondDirCounts[oBond->getIdx()]<<"-"<<bondDirCounts[refBond->getIdx()]<<"
945 // "<<atomDirCounts[oBond->getBeginAtomIdx()]<<"-"<<atomDirCounts[oBond->getEndAtomIdx()]<<std::endl;
946 if (oBond != refBond && canHaveDirection(oBond)) {
947 nbrPossible = true;
948 if ((bondDirCounts[oBond->getIdx()] >=
949 bondDirCounts[refBond->getIdx()]) &&
950 atomDirCounts[oBond->getBeginAtomIdx()] != 1 &&
951 atomDirCounts[oBond->getEndAtomIdx()] != 1) {
952 adjusted = true;
953 bondDirCounts[oBond->getIdx()] -= 1;
954 if (!bondDirCounts[oBond->getIdx()]) {
955 // no one is setting the direction here:
956 oBond->setBondDir(Bond::NONE);
957 atomDirCounts[oBond->getBeginAtomIdx()] -= 1;
958 atomDirCounts[oBond->getEndAtomIdx()] -= 1;
959 // std::cerr<<"ob:"<<oBond->getIdx()<<" ";
960 }
961 }
962 }
963 beg++;
964 }
965 if (nbrPossible && !adjusted &&
966 atomDirCounts[refBond->getBeginAtomIdx()] != 1 &&
967 atomDirCounts[refBond->getEndAtomIdx()] != 1) {
968 // we found a neighbor that could have directionality set,
969 // but it had a lower bondDirCount than us, so we must
970 // need to be adjusted:
971 bondDirCounts[refBond->getIdx()] -= 1;
972 if (!bondDirCounts[refBond->getIdx()]) {
973 refBond->setBondDir(Bond::NONE);
974 atomDirCounts[refBond->getBeginAtomIdx()] -= 1;
975 atomDirCounts[refBond->getEndAtomIdx()] -= 1;
976 // std::cerr<<"rb:"<<refBond->getIdx()<<" ";
977 }
978 }
979 // std::cerr<<std::endl;
980 }
981
removeRedundantBondDirSpecs(ROMol & mol,MolStack & molStack,UINT_VECT & bondDirCounts,UINT_VECT & atomDirCounts,const UINT_VECT & bondVisitOrders)982 void removeRedundantBondDirSpecs(ROMol &mol, MolStack &molStack,
983 UINT_VECT &bondDirCounts,
984 UINT_VECT &atomDirCounts,
985 const UINT_VECT &bondVisitOrders) {
986 PRECONDITION(bondDirCounts.size() >= mol.getNumBonds(), "bad dirCount size");
987 #if 0
988 std::cerr<<"rRBDS: ";
989 mol.debugMol(std::cerr);
990 std::copy(bondDirCounts.begin(),bondDirCounts.end(),std::ostream_iterator<int>(std::cerr,", "));
991 std::cerr<<"\n";
992 #endif
993 // find bonds that have directions indicated that are redundant:
994 for (auto &msI : molStack) {
995 if (msI.type == MOL_STACK_BOND) {
996 Bond *tBond = msI.obj.bond;
997 const Atom *canonBeginAtom = mol.getAtomWithIdx(msI.number);
998 const Atom *canonEndAtom =
999 mol.getAtomWithIdx(tBond->getOtherAtomIdx(msI.number));
1000 if (canHaveDirection(tBond) && bondDirCounts[tBond->getIdx()] >= 1) {
1001 // start by finding the double bond that sets tBond's direction:
1002 const Atom *dblBondAtom = nullptr;
1003 ROMol::OEDGE_ITER beg, end;
1004 boost::tie(beg, end) = mol.getAtomBonds(canonBeginAtom);
1005 while (beg != end) {
1006 if (mol[*beg] != tBond && mol[*beg]->getBondType() == Bond::DOUBLE &&
1007 mol[*beg]->getStereo() > Bond::STEREOANY) {
1008 dblBondAtom =
1009 canonBeginAtom; // tBond->getOtherAtom(canonBeginAtom);
1010 break;
1011 }
1012 beg++;
1013 }
1014 if (dblBondAtom != nullptr) {
1015 clearBondDirs(mol, tBond, dblBondAtom, bondDirCounts, atomDirCounts,
1016 bondVisitOrders);
1017 }
1018 dblBondAtom = nullptr;
1019 boost::tie(beg, end) = mol.getAtomBonds(canonEndAtom);
1020 while (beg != end) {
1021 if (mol[*beg] != tBond && mol[*beg]->getBondType() == Bond::DOUBLE &&
1022 mol[*beg]->getStereo() > Bond::STEREOANY) {
1023 dblBondAtom = canonEndAtom; // tBond->getOtherAtom(canonEndAtom);
1024 break;
1025 }
1026 beg++;
1027 }
1028 if (dblBondAtom != nullptr) {
1029 clearBondDirs(mol, tBond, dblBondAtom, bondDirCounts, atomDirCounts,
1030 bondVisitOrders);
1031 }
1032 } else if (tBond->getBondDir() != Bond::NONE) {
1033 // we aren't supposed to have a direction set, but we do:
1034 tBond->setBondDir(Bond::NONE);
1035 }
1036 }
1037 }
1038 }
1039
canonicalizeFragment(ROMol & mol,int atomIdx,std::vector<AtomColors> & colors,const UINT_VECT & ranks,MolStack & molStack,const boost::dynamic_bitset<> * bondsInPlay,const std::vector<std::string> * bondSymbols,bool doIsomericSmiles,bool doRandom)1040 void canonicalizeFragment(ROMol &mol, int atomIdx,
1041 std::vector<AtomColors> &colors,
1042 const UINT_VECT &ranks, MolStack &molStack,
1043 const boost::dynamic_bitset<> *bondsInPlay,
1044 const std::vector<std::string> *bondSymbols,
1045 bool doIsomericSmiles, bool doRandom) {
1046 PRECONDITION(colors.size() >= mol.getNumAtoms(), "vector too small");
1047 PRECONDITION(ranks.size() >= mol.getNumAtoms(), "vector too small");
1048 PRECONDITION(!bondsInPlay || bondsInPlay->size() >= mol.getNumBonds(),
1049 "bondsInPlay too small");
1050 PRECONDITION(!bondSymbols || bondSymbols->size() >= mol.getNumBonds(),
1051 "bondSymbols too small");
1052 unsigned int nAtoms = mol.getNumAtoms();
1053
1054 UINT_VECT atomVisitOrders(nAtoms, 0);
1055 UINT_VECT bondVisitOrders(mol.getNumBonds(), 0);
1056 UINT_VECT bondDirCounts(mol.getNumBonds(), 0);
1057 UINT_VECT atomDirCounts(nAtoms, 0);
1058 UINT_VECT cyclesAvailable(MAX_CYCLES, 1);
1059 VECT_INT_VECT cycles(nAtoms);
1060
1061 boost::dynamic_bitset<> ringStereoChemAdjusted(nAtoms);
1062
1063 // make sure that we've done the stereo perception:
1064 if (!mol.hasProp(common_properties::_StereochemDone)) {
1065 MolOps::assignStereochemistry(mol, false);
1066 }
1067
1068 // we need ring information; make sure findSSSR has been called before
1069 // if not call now
1070 if (!mol.getRingInfo()->isInitialized()) {
1071 MolOps::findSSSR(mol);
1072 }
1073 mol.getAtomWithIdx(atomIdx)->setProp(common_properties::_TraversalStartPoint,
1074 true);
1075
1076 VECT_INT_VECT atomRingClosures(nAtoms);
1077 std::vector<INT_LIST> atomTraversalBondOrder(nAtoms);
1078 Canon::canonicalDFSTraversal(
1079 mol, atomIdx, -1, colors, cycles, ranks, cyclesAvailable, molStack,
1080 atomVisitOrders, bondVisitOrders, atomRingClosures,
1081 atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom);
1082
1083 CHECK_INVARIANT(!molStack.empty(), "Empty stack.");
1084 CHECK_INVARIANT(molStack.begin()->type == MOL_STACK_ATOM,
1085 "Corrupted stack. First element should be an atom.");
1086
1087 // collect some information about traversal order on chiral atoms
1088 boost::dynamic_bitset<> numSwapsChiralAtoms(nAtoms);
1089 if (doIsomericSmiles) {
1090 for (const auto atom : mol.atoms()) {
1091 if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
1092 // check if all of this atom's bonds are in play
1093 for (const auto &bndItr :
1094 boost::make_iterator_range(mol.getAtomBonds(atom))) {
1095 if (bondsInPlay && !(*bondsInPlay)[mol[bndItr]->getIdx()]) {
1096 atom->setProp(common_properties::_brokenChirality, true);
1097 break;
1098 }
1099 }
1100 if (atom->hasProp(common_properties::_brokenChirality)) {
1101 continue;
1102 }
1103 const INT_LIST &trueOrder = atomTraversalBondOrder[atom->getIdx()];
1104
1105 // Check if the atom can be chiral, and if chirality needs inversion
1106 if (trueOrder.size() >= 3) {
1107 int nSwaps;
1108 // We have to make sure that trueOrder contains all the
1109 // bonds, even if they won't be written to the SMARTS
1110 if (trueOrder.size() < atom->getDegree()) {
1111 INT_LIST tOrder = trueOrder;
1112 for (const auto &bndItr :
1113 boost::make_iterator_range(mol.getAtomBonds(atom))) {
1114 int bndIdx = mol[bndItr]->getIdx();
1115 if (std::find(trueOrder.begin(), trueOrder.end(), bndIdx) ==
1116 trueOrder.end()) {
1117 tOrder.push_back(bndIdx);
1118 break;
1119 }
1120 }
1121 nSwaps = atom->getPerturbationOrder(tOrder);
1122 } else {
1123 nSwaps = atom->getPerturbationOrder(trueOrder);
1124 }
1125 if (chiralAtomNeedsTagInversion(
1126 mol, atom,
1127 molStack.begin()->obj.atom->getIdx() == atom->getIdx(),
1128 atomRingClosures[atom->getIdx()].size())) {
1129 // This is a special case. Here's an example:
1130 // Our internal representation of a chiral center is equivalent
1131 // to:
1132 // [C@](F)(O)(C)[H]
1133 // we'll be dumping it without the H, which entails a
1134 // reordering:
1135 // [C@@H](F)(O)C
1136 ++nSwaps;
1137 }
1138 if (nSwaps % 2) {
1139 numSwapsChiralAtoms.set(atom->getIdx());
1140 }
1141 }
1142 }
1143 }
1144 }
1145
1146 // remove the current directions on single bonds around double bonds:
1147 for (auto bond : mol.bonds()) {
1148 Bond::BondDir dir = bond->getBondDir();
1149 if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) {
1150 bond->setBondDir(Bond::NONE);
1151 }
1152 }
1153
1154 #if 0
1155 std::cerr<<"<11111111"<<std::endl;
1156
1157 std::cerr<<"----------------------------------------->"<<std::endl;
1158 mol.debugMol(std::cerr);
1159 #endif
1160
1161 // std::cerr<<"----->\ntraversal stack:"<<std::endl;
1162 // traverse the stack and canonicalize double bonds and atoms with (ring)
1163 // stereochemistry
1164 for (auto &msI : molStack) {
1165 #if 0
1166 if(msI->type == MOL_STACK_ATOM) std::cerr<<" atom: "<<msI->obj.atom->getIdx()<<std::endl;
1167 else if(msI->type == MOL_STACK_BOND) std::cerr<<" bond: "<<msI->obj.bond->getIdx()<<" "<<msI->number<<" "<<msI->obj.bond->getBeginAtomIdx()<<"-"<<msI->obj.bond->getEndAtomIdx()<<" order: "<<msI->obj.bond->getBondType()<<std::endl;
1168 else if(msI->type == MOL_STACK_RING) std::cerr<<" ring: "<<msI->number<<std::endl;
1169 else if(msI->type == MOL_STACK_BRANCH_OPEN) std::cerr<<" branch open"<<std::endl;
1170 else if(msI->type == MOL_STACK_BRANCH_CLOSE) std::cerr<<" branch close"<<std::endl;
1171 #endif
1172 if (msI.type == MOL_STACK_BOND &&
1173 msI.obj.bond->getBondType() == Bond::DOUBLE &&
1174 msI.obj.bond->getStereo() > Bond::STEREOANY) {
1175 if (msI.obj.bond->getStereoAtoms().size() >= 2) {
1176 Canon::canonicalizeDoubleBond(msI.obj.bond, bondVisitOrders,
1177 atomVisitOrders, bondDirCounts,
1178 atomDirCounts, molStack);
1179 } else {
1180 // bad stereo spec:
1181 msI.obj.bond->setStereo(Bond::STEREONONE);
1182 }
1183 }
1184 if (doIsomericSmiles) {
1185 if (msI.type == MOL_STACK_ATOM &&
1186 msI.obj.atom->getChiralTag() != Atom::CHI_UNSPECIFIED &&
1187 !msI.obj.atom->hasProp(common_properties::_brokenChirality)) {
1188 if (msI.obj.atom->hasProp(common_properties::_ringStereoAtoms)) {
1189 if (!ringStereoChemAdjusted[msI.obj.atom->getIdx()]) {
1190 msI.obj.atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW);
1191 ringStereoChemAdjusted.set(msI.obj.atom->getIdx());
1192 }
1193 const INT_VECT &ringStereoAtoms = msI.obj.atom->getProp<INT_VECT>(
1194 common_properties::_ringStereoAtoms);
1195 for (auto nbrV : ringStereoAtoms) {
1196 int nbrIdx = abs(nbrV) - 1;
1197 // Adjust the chirality flag of the ring stereo atoms according to
1198 // the first one
1199 if (!ringStereoChemAdjusted[nbrIdx] &&
1200 atomVisitOrders[nbrIdx] >
1201 atomVisitOrders[msI.obj.atom->getIdx()]) {
1202 mol.getAtomWithIdx(nbrIdx)->setChiralTag(
1203 msI.obj.atom->getChiralTag());
1204 if (nbrV < 0) {
1205 mol.getAtomWithIdx(nbrIdx)->invertChirality();
1206 }
1207 // Odd number of swaps for first chiral ring atom --> needs to be
1208 // swapped but we want to retain chirality
1209 if (numSwapsChiralAtoms[msI.obj.atom->getIdx()]) {
1210 // Odd number of swaps for chiral ring neighbor --> needs to be
1211 // swapped but we want to retain chirality
1212 if (!numSwapsChiralAtoms[nbrIdx]) {
1213 mol.getAtomWithIdx(nbrIdx)->invertChirality();
1214 }
1215 }
1216 // Even number of swaps for first chiral ring atom --> don't need
1217 // to be swapped
1218 else {
1219 // Odd number of swaps for chiral ring neighbor --> needs to be
1220 // swapped
1221 if (numSwapsChiralAtoms[nbrIdx]) {
1222 mol.getAtomWithIdx(nbrIdx)->invertChirality();
1223 }
1224 }
1225 ringStereoChemAdjusted.set(nbrIdx);
1226 }
1227 }
1228 } else {
1229 if (msI.obj.atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW) {
1230 if ((numSwapsChiralAtoms[msI.obj.atom->getIdx()])) {
1231 msI.obj.atom->invertChirality();
1232 }
1233 } else if (msI.obj.atom->getChiralTag() ==
1234 Atom::CHI_TETRAHEDRAL_CCW) {
1235 if ((numSwapsChiralAtoms[msI.obj.atom->getIdx()])) {
1236 msI.obj.atom->invertChirality();
1237 }
1238 }
1239 }
1240 }
1241 }
1242 }
1243 #if 0
1244 std::cerr<<"<-----"<<std::endl;
1245
1246 std::cerr<<"----------------------------------------->"<<std::endl;
1247 mol.debugMol(std::cerr);
1248 #endif
1249 Canon::removeRedundantBondDirSpecs(mol, molStack, bondDirCounts,
1250 atomDirCounts, bondVisitOrders);
1251 #if 0
1252 std::cerr<<"----------------------------------------->"<<std::endl;
1253 mol.debugMol(std::cerr);
1254 std::cerr<<"----------------------------------------->"<<std::endl;
1255 #endif
1256 }
1257 } // namespace Canon
1258 } // namespace RDKit
1259