1 //
2 //  Copyright (C) 2003-2021 Greg Landrum and Rational Discovery LLC
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include "RDKitBase.h"
11 #include <list>
12 #include "QueryAtom.h"
13 #include "QueryOps.h"
14 #include "MonomerInfo.h"
15 #include <Geometry/Transform3D.h>
16 #include <Geometry/point.h>
17 #include <boost/algorithm/string/classification.hpp>
18 #include <boost/dynamic_bitset.hpp>
19 #include <boost/range/iterator_range.hpp>
20 
21 namespace RDKit {
22 
23 // Local utility functionality:
24 namespace {
getAtomNeighborNot(ROMol * mol,const Atom * atom,const Atom * other)25 Atom *getAtomNeighborNot(ROMol *mol, const Atom *atom, const Atom *other) {
26   PRECONDITION(mol, "bad molecule");
27   PRECONDITION(atom, "bad atom");
28   PRECONDITION(atom->getDegree() > 1, "bad degree");
29   PRECONDITION(other, "bad atom");
30   Atom *res = nullptr;
31 
32   ROMol::ADJ_ITER nbrIdx, endNbrs;
33   boost::tie(nbrIdx, endNbrs) = mol->getAtomNeighbors(atom);
34   while (nbrIdx != endNbrs) {
35     if (*nbrIdx != other->getIdx()) {
36       res = mol->getAtomWithIdx(*nbrIdx);
37       break;
38     }
39     ++nbrIdx;
40   }
41 
42   POSTCONDITION(res, "no neighbor found");
43   return res;
44 }
45 
AssignHsResidueInfo(RWMol & mol)46 void AssignHsResidueInfo(RWMol &mol) {
47   int max_serial = 0;
48   unsigned int stopIdx = mol.getNumAtoms();
49   for (unsigned int aidx = 0; aidx < stopIdx; ++aidx) {
50     auto *info =
51         (AtomPDBResidueInfo *)(mol.getAtomWithIdx(aidx)->getMonomerInfo());
52     if (info && info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE &&
53         info->getSerialNumber() > max_serial) {
54       max_serial = info->getSerialNumber();
55     }
56   }
57 
58   AtomPDBResidueInfo *current_info = nullptr;
59   int current_h_id = 0;
60   for (unsigned int aidx = 0; aidx < stopIdx; ++aidx) {
61     Atom *newAt = mol.getAtomWithIdx(aidx);
62     auto *info = (AtomPDBResidueInfo *)(newAt->getMonomerInfo());
63     if (info && info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE) {
64       ROMol::ADJ_ITER begin, end;
65       boost::tie(begin, end) = mol.getAtomNeighbors(newAt);
66       while (begin != end) {
67         if (mol.getAtomWithIdx(*begin)->getAtomicNum() == 1) {
68           // Make all Hs unique - increment id even for existing
69           ++current_h_id;
70           // skip if hydrogen already has PDB info
71           auto *h_info = (AtomPDBResidueInfo *)mol.getAtomWithIdx(*begin)
72                              ->getMonomerInfo();
73           if (h_info &&
74               h_info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE) {
75             continue;
76           }
77           // the hydrogens have unique names on residue basis (H1, H2, ...)
78           if (!current_info ||
79               current_info->getResidueNumber() != info->getResidueNumber() ||
80               current_info->getChainId() != info->getChainId()) {
81             current_h_id = 1;
82             current_info = info;
83           }
84           std::string h_label = std::to_string(current_h_id);
85           if (h_label.length() > 3) {
86             h_label = h_label.substr(h_label.length() - 3, 3);
87           }
88           while (h_label.length() < 3) {
89             h_label = h_label + " ";
90           }
91           h_label = "H" + h_label;
92           // wrap around id to '3H12'
93           h_label = h_label.substr(3, 1) + h_label.substr(0, 3);
94           AtomPDBResidueInfo *newInfo = new AtomPDBResidueInfo(
95               h_label, max_serial, "", info->getResidueName(),
96               info->getResidueNumber(), info->getChainId(), "", 1.0, 0.0,
97               info->getIsHeteroAtom());
98           mol.getAtomWithIdx(*begin)->setMonomerInfo(newInfo);
99 
100           ++max_serial;
101         }
102         ++begin;
103       }
104     }
105   }
106 }
107 
getIsoMap(const ROMol & mol)108 std::map<unsigned int, std::vector<unsigned int>> getIsoMap(const ROMol &mol) {
109   std::map<unsigned int, std::vector<unsigned int>> isoMap;
110   for (auto atom : mol.atoms()) {
111     if (atom->hasProp(common_properties::_isotopicHs)) {
112       atom->clearProp(common_properties::_isotopicHs);
113     }
114   }
115   for (auto bond : mol.bonds()) {
116     auto ba = bond->getBeginAtom();
117     auto ea = bond->getEndAtom();
118     int ha = -1;
119     unsigned int iso;
120     if (ba->getAtomicNum() == 1 && ba->getIsotope() &&
121         ea->getAtomicNum() != 1) {
122       ha = ea->getIdx();
123       iso = ba->getIsotope();
124     } else if (ea->getAtomicNum() == 1 && ea->getIsotope() &&
125                ba->getAtomicNum() != 1) {
126       ha = ba->getIdx();
127       iso = ea->getIsotope();
128     }
129     if (ha == -1) {
130       continue;
131     }
132     auto &v = isoMap[ha];
133     v.push_back(iso);
134   }
135   return isoMap;
136 }
137 
138 }  // end of unnamed namespace
139 
140 namespace MolOps {
141 
142 namespace {
pickBisector(const RDGeom::Point3D & nbr1Vect,const RDGeom::Point3D & nbr2Vect,const RDGeom::Point3D & nbr3Vect)143 RDGeom::Point3D pickBisector(const RDGeom::Point3D &nbr1Vect,
144                              const RDGeom::Point3D &nbr2Vect,
145                              const RDGeom::Point3D &nbr3Vect) {
146   auto dirVect = nbr2Vect + nbr3Vect;
147   if (dirVect.lengthSq() < 1e-4) {
148     // nbr2Vect and nbr3Vect are anti-parallel (was #3854)
149     dirVect = nbr2Vect;
150     std::swap(dirVect.x, dirVect.y);
151     dirVect.x *= -1;
152   }
153   if (dirVect.dotProduct(nbr1Vect) < 0) {
154     dirVect *= -1;
155   }
156 
157   return dirVect;
158 }
159 }  // namespace
160 
setTerminalAtomCoords(ROMol & mol,unsigned int idx,unsigned int otherIdx)161 void setTerminalAtomCoords(ROMol &mol, unsigned int idx,
162                            unsigned int otherIdx) {
163   // we will loop over all the coordinates
164   PRECONDITION(otherIdx != idx, "degenerate atoms");
165   Atom *atom = mol.getAtomWithIdx(idx);
166   PRECONDITION(mol.getAtomDegree(atom) == 1, "bad atom degree");
167   const Bond *bond = mol.getBondBetweenAtoms(otherIdx, idx);
168   PRECONDITION(bond, "no bond between atoms");
169 
170   const Atom *otherAtom = mol.getAtomWithIdx(otherIdx);
171   double bondLength =
172       PeriodicTable::getTable()->getRb0(1) +
173       PeriodicTable::getTable()->getRb0(otherAtom->getAtomicNum());
174 
175   RDGeom::Point3D dirVect(0, 0, 0);
176 
177   RDGeom::Point3D perpVect, rotnAxis, nbrPerp;
178   RDGeom::Point3D nbr1Vect, nbr2Vect, nbr3Vect;
179   RDGeom::Transform3D tform;
180   RDGeom::Point3D otherPos, atomPos;
181 
182   const Atom *nbr1 = nullptr, *nbr2 = nullptr, *nbr3 = nullptr;
183   const Bond *nbrBond;
184   ROMol::ADJ_ITER nbrIdx, endNbrs;
185 
186   switch (otherAtom->getDegree()) {
187     case 1:
188       // --------------------------------------------------------------------------
189       //   No other atoms present:
190       // --------------------------------------------------------------------------
191       // loop over the conformations and set the coordinates
192       for (auto cfi = mol.beginConformers(); cfi != mol.endConformers();
193            cfi++) {
194         if ((*cfi)->is3D()) {
195           dirVect.z = 1;
196         } else {
197           dirVect.x = 1;
198         }
199         otherPos = (*cfi)->getAtomPos(otherIdx);
200         atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
201         (*cfi)->setAtomPos(idx, atomPos);
202       }
203       break;
204 
205     case 2:
206       // --------------------------------------------------------------------------
207       //  One other neighbor:
208       // --------------------------------------------------------------------------
209       nbr1 = getAtomNeighborNot(&mol, otherAtom, atom);
210       for (auto cfi = mol.beginConformers(); cfi != mol.endConformers();
211            ++cfi) {
212         otherPos = (*cfi)->getAtomPos(otherIdx);
213         RDGeom::Point3D nbr1Pos = (*cfi)->getAtomPos(nbr1->getIdx());
214         // get a normalized vector pointing away from the neighbor:
215         nbr1Vect = nbr1Pos - otherPos;
216         if (fabs(nbr1Vect.lengthSq()) < 1e-4) {
217           // no difference, which likely indicates that we have redundant atoms.
218           // just put it on top of the heavy atom. This was #678
219           (*cfi)->setAtomPos(idx, otherPos);
220           continue;
221         }
222         nbr1Vect.normalize();
223         nbr1Vect *= -1;
224 
225         // ok, nbr1Vect points away from the other atom, figure out where
226         // this H goes:
227         switch (otherAtom->getHybridization()) {
228           case Atom::SP3:
229             // get a perpendicular to nbr1Vect:
230             if ((*cfi)->is3D()) {
231               perpVect = nbr1Vect.getPerpendicular();
232             } else {
233               perpVect.z = 1.0;
234             }
235             // and move off it:
236             tform.SetRotation((180 - 109.471) * M_PI / 180., perpVect);
237             dirVect = tform * nbr1Vect;
238             atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
239             (*cfi)->setAtomPos(idx, atomPos);
240             break;
241           case Atom::SP2:
242             // default 3D position is to just take an arbitrary perpendicular
243             // for 2D we take the normal to the xy plane
244             if ((*cfi)->is3D()) {
245               perpVect = nbr1Vect.getPerpendicular();
246             } else {
247               perpVect.z = 1.0;
248             }
249             if (nbr1->getDegree() > 1) {
250               // can we use the neighboring atom to establish a perpendicular?
251               nbrBond = mol.getBondBetweenAtoms(otherIdx, nbr1->getIdx());
252               if (nbrBond->getIsAromatic() ||
253                   nbrBond->getBondType() == Bond::DOUBLE ||
254                   nbrBond->getIsConjugated()) {
255                 nbr2 = getAtomNeighborNot(&mol, nbr1, otherAtom);
256                 nbr2Vect =
257                     nbr1Pos.directionVector((*cfi)->getAtomPos(nbr2->getIdx()));
258                 perpVect = nbr2Vect.crossProduct(nbr1Vect);
259               }
260             }
261             perpVect.normalize();
262             // rotate the nbr1Vect 60 degrees about perpVect and we're done:
263             tform.SetRotation(60. * M_PI / 180., perpVect);
264             dirVect = tform * nbr1Vect;
265             atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
266             (*cfi)->setAtomPos(idx, atomPos);
267             break;
268           case Atom::SP:
269             // just lay the H along the vector:
270             dirVect = nbr1Vect;
271             atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
272             (*cfi)->setAtomPos(idx, atomPos);
273             break;
274           default:
275             // FIX: handle other hybridizations
276             // for now, just lay the H along the vector:
277             dirVect = nbr1Vect;
278             atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
279             (*cfi)->setAtomPos(idx, atomPos);
280         }
281       }
282       break;
283     case 3:
284       // --------------------------------------------------------------------------
285       // Two other neighbors:
286       // --------------------------------------------------------------------------
287       boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(otherAtom);
288       while (nbrIdx != endNbrs) {
289         if (*nbrIdx != idx) {
290           if (!nbr1) {
291             nbr1 = mol.getAtomWithIdx(*nbrIdx);
292           } else {
293             nbr2 = mol.getAtomWithIdx(*nbrIdx);
294           }
295         }
296         ++nbrIdx;
297       }
298       TEST_ASSERT(nbr1);
299       TEST_ASSERT(nbr2);
300       for (auto cfi = mol.beginConformers(); cfi != mol.endConformers();
301            ++cfi) {
302         // start along the average of the two vectors:
303         otherPos = (*cfi)->getAtomPos(otherIdx);
304         nbr1Vect = otherPos - (*cfi)->getAtomPos(nbr1->getIdx());
305         nbr2Vect = otherPos - (*cfi)->getAtomPos(nbr2->getIdx());
306         if (fabs(nbr1Vect.lengthSq()) < 1e-4 ||
307             fabs(nbr2Vect.lengthSq()) < 1e-4) {
308           // no difference, which likely indicates that we have redundant atoms.
309           // just put it on top of the heavy atom. This was #678
310           (*cfi)->setAtomPos(idx, otherPos);
311           continue;
312         }
313         nbr1Vect.normalize();
314         nbr2Vect.normalize();
315         dirVect = nbr1Vect + nbr2Vect;
316 
317         dirVect.normalize();
318         if ((*cfi)->is3D()) {
319           switch (otherAtom->getHybridization()) {
320             case Atom::SP3:
321               // get the perpendicular to the neighbors:
322               nbrPerp = nbr1Vect.crossProduct(nbr2Vect);
323               // and the perpendicular to that:
324               rotnAxis = nbrPerp.crossProduct(dirVect);
325               // and then rotate about that:
326               rotnAxis.normalize();
327               tform.SetRotation((109.471 / 2) * M_PI / 180., rotnAxis);
328               dirVect = tform * dirVect;
329               atomPos =
330                   otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
331               (*cfi)->setAtomPos(idx, atomPos);
332               break;
333             case Atom::SP2:
334               // don't need to do anything here, the H atom goes right on the
335               // direction vector
336               atomPos =
337                   otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
338               (*cfi)->setAtomPos(idx, atomPos);
339               break;
340             default:
341               // FIX: handle other hybridizations
342               // for now, just lay the H along the neighbor vector;
343               atomPos =
344                   otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
345               (*cfi)->setAtomPos(idx, atomPos);
346               break;
347           }
348         } else {
349           // don't need to do anything here, the H atom goes right on the
350           // direction vector
351           atomPos = otherPos + dirVect;
352           (*cfi)->setAtomPos(idx, atomPos);
353         }
354       }
355       break;
356     case 4:
357       // --------------------------------------------------------------------------
358       // Three other neighbors:
359       // --------------------------------------------------------------------------
360       boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(otherAtom);
361 
362       if (otherAtom->hasProp(common_properties::_CIPCode)) {
363         // if the central atom is chiral, we'll order the neighbors
364         // by CIP rank:
365         std::vector<std::pair<unsigned int, int>> nbrs;
366         while (nbrIdx != endNbrs) {
367           if (*nbrIdx != idx) {
368             const Atom *tAtom = mol.getAtomWithIdx(*nbrIdx);
369             unsigned int cip = 0;
370             tAtom->getPropIfPresent<unsigned int>(common_properties::_CIPRank,
371                                                   cip);
372             nbrs.emplace_back(cip, rdcast<int>(*nbrIdx));
373           }
374           ++nbrIdx;
375         }
376         std::sort(nbrs.begin(), nbrs.end());
377         nbr1 = mol.getAtomWithIdx(nbrs[0].second);
378         nbr2 = mol.getAtomWithIdx(nbrs[1].second);
379         nbr3 = mol.getAtomWithIdx(nbrs[2].second);
380       } else {
381         // central atom isn't chiral, so the neighbor ordering isn't important:
382         while (nbrIdx != endNbrs) {
383           if (*nbrIdx != idx) {
384             if (!nbr1) {
385               nbr1 = mol.getAtomWithIdx(*nbrIdx);
386             } else if (!nbr2) {
387               nbr2 = mol.getAtomWithIdx(*nbrIdx);
388             } else {
389               nbr3 = mol.getAtomWithIdx(*nbrIdx);
390             }
391           }
392           ++nbrIdx;
393         }
394       }
395       TEST_ASSERT(nbr1);
396       TEST_ASSERT(nbr2);
397       TEST_ASSERT(nbr3);
398       for (auto cfi = mol.beginConformers(); cfi != mol.endConformers();
399            ++cfi) {
400         otherPos = (*cfi)->getAtomPos(otherIdx);
401         nbr1Vect = otherPos - (*cfi)->getAtomPos(nbr1->getIdx());
402         nbr2Vect = otherPos - (*cfi)->getAtomPos(nbr2->getIdx());
403         nbr3Vect = otherPos - (*cfi)->getAtomPos(nbr3->getIdx());
404         if (fabs(nbr1Vect.lengthSq()) < 1e-4 ||
405             fabs(nbr2Vect.lengthSq()) < 1e-4 ||
406             fabs(nbr3Vect.lengthSq()) < 1e-4) {
407           // no difference, which likely indicates that we have redundant atoms.
408           // just put it on top of the heavy atom. This was #678
409           (*cfi)->setAtomPos(idx, otherPos);
410           continue;
411         }
412         nbr1Vect.normalize();
413         nbr2Vect.normalize();
414         nbr3Vect.normalize();
415 
416         // if three neighboring atoms are more or less planar, this
417         // is going to be in a quasi-random (but almost definitely bad)
418         // direction...
419         // correct for this (issue 2951221):
420         if ((*cfi)->is3D()) {
421           if (fabs(nbr3Vect.dotProduct(nbr1Vect.crossProduct(nbr2Vect))) <
422               0.1) {
423             // compute the normal:
424             dirVect = nbr1Vect.crossProduct(nbr2Vect);
425             std::string cipCode;
426             if (otherAtom->getPropIfPresent(common_properties::_CIPCode,
427                                             cipCode)) {
428               // the heavy atom is a chiral center, make sure
429               // that we went go the right direction to preserve
430               // its chirality. We use the chiral volume for this:
431               RDGeom::Point3D v1 = dirVect - nbr3Vect;
432               RDGeom::Point3D v2 = nbr1Vect - nbr3Vect;
433               RDGeom::Point3D v3 = nbr2Vect - nbr3Vect;
434               double vol = v1.dotProduct(v2.crossProduct(v3));
435               // FIX: this is almost certainly wrong and should use the chiral
436               // tag
437               if ((cipCode == "S" && vol < 0) || (cipCode == "R" && vol > 0)) {
438                 dirVect *= -1;
439               }
440             }
441           } else {
442             dirVect = nbr1Vect + nbr2Vect + nbr3Vect;
443           }
444         } else {
445           // we're in flatland
446 
447           // github #3879 and #908: find the two neighbors with the largest
448           // outer angle between them and then place the H to bisect that angle
449           // This is recommendation ST-1.1.4 from the 2006 IUPAC "Graphical
450           // representation of stereochemical configuration" guideline
451           auto angle12 = nbr1Vect.angleTo(nbr2Vect);
452           auto angle13 = nbr1Vect.angleTo(nbr3Vect);
453           auto angle23 = nbr2Vect.angleTo(nbr3Vect);
454           auto accum1 = angle12 + angle13;
455           auto accum2 = angle12 + angle23;
456           auto accum3 = angle13 + angle23;
457           if (accum1 <= accum2 && accum1 <= accum3) {
458             dirVect = pickBisector(nbr1Vect, nbr2Vect, nbr3Vect);
459           } else if (accum2 <= accum1 && accum2 <= accum3) {
460             dirVect = pickBisector(nbr2Vect, nbr1Vect, nbr3Vect);
461           } else {
462             dirVect = pickBisector(nbr3Vect, nbr1Vect, nbr2Vect);
463           }
464         }
465         dirVect.normalize();
466         atomPos = otherPos + dirVect * ((*cfi)->is3D() ? bondLength : 1.0);
467         (*cfi)->setAtomPos(idx, atomPos);
468       }
469       break;
470     default:
471       // --------------------------------------------------------------------------
472       // FIX: figure out what to do here
473       // --------------------------------------------------------------------------
474       atomPos = otherPos + dirVect * bondLength;
475       for (auto cfi = mol.beginConformers(); cfi != mol.endConformers();
476            ++cfi) {
477         (*cfi)->setAtomPos(idx, atomPos);
478       }
479       break;
480   }
481 }
482 
addHs(RWMol & mol,bool explicitOnly,bool addCoords,const UINT_VECT * onlyOnAtoms,bool addResidueInfo)483 void addHs(RWMol &mol, bool explicitOnly, bool addCoords,
484            const UINT_VECT *onlyOnAtoms, bool addResidueInfo) {
485   // when we hit each atom, clear its computed properties
486   // NOTE: it is essential that we not clear the ring info in the
487   // molecule's computed properties.  We don't want to have to
488   // regenerate that.  This caused Issue210 and Issue212:
489   mol.clearComputedProps(false);
490 
491   // precompute the number of hydrogens we are going to add so that we can
492   // pre-allocate the necessary space on the conformations of the molecule
493   // for their coordinates
494   unsigned int numAddHyds = 0;
495   for (auto at : mol.atoms()) {
496     if (!onlyOnAtoms || std::find(onlyOnAtoms->begin(), onlyOnAtoms->end(),
497                                   at->getIdx()) != onlyOnAtoms->end()) {
498       numAddHyds += at->getNumExplicitHs();
499       if (!explicitOnly) {
500         numAddHyds += at->getNumImplicitHs();
501       }
502     }
503   }
504   unsigned int nSize = mol.getNumAtoms() + numAddHyds;
505 
506   // loop over the conformations of the molecule and allocate new space
507   // for the H locations (need to do this even if we aren't adding coords so
508   // that the conformers have the correct number of atoms).
509   for (auto cfi = mol.beginConformers(); cfi != mol.endConformers(); ++cfi) {
510     (*cfi)->reserve(nSize);
511   }
512 
513   unsigned int stopIdx = mol.getNumAtoms();
514   for (unsigned int aidx = 0; aidx < stopIdx; ++aidx) {
515     if (onlyOnAtoms && std::find(onlyOnAtoms->begin(), onlyOnAtoms->end(),
516                                  aidx) == onlyOnAtoms->end()) {
517       continue;
518     }
519 
520     Atom *newAt = mol.getAtomWithIdx(aidx);
521 
522     std::vector<unsigned int> isoHs;
523     if (newAt->getPropIfPresent(common_properties::_isotopicHs, isoHs)) {
524       newAt->clearProp(common_properties::_isotopicHs);
525     }
526     std::vector<unsigned int>::const_iterator isoH = isoHs.begin();
527     unsigned int newIdx;
528     newAt->clearComputedProps();
529     // always convert explicit Hs
530     unsigned int onumexpl = newAt->getNumExplicitHs();
531     for (unsigned int i = 0; i < onumexpl; i++) {
532       newIdx = mol.addAtom(new Atom(1), false, true);
533       mol.addBond(aidx, newIdx, Bond::SINGLE);
534       auto hAtom = mol.getAtomWithIdx(newIdx);
535       hAtom->updatePropertyCache();
536       if (addCoords) {
537         setTerminalAtomCoords(mol, newIdx, aidx);
538       }
539       if (isoH != isoHs.end()) {
540         hAtom->setIsotope(*isoH);
541         ++isoH;
542       }
543     }
544     // clear the local property
545     newAt->setNumExplicitHs(0);
546 
547     if (!explicitOnly) {
548       // take care of implicits
549       for (unsigned int i = 0; i < mol.getAtomWithIdx(aidx)->getNumImplicitHs();
550            i++) {
551         newIdx = mol.addAtom(new Atom(1), false, true);
552         mol.addBond(aidx, newIdx, Bond::SINGLE);
553         // set the isImplicit label so that we can strip these back
554         // off later if need be.
555         auto hAtom = mol.getAtomWithIdx(newIdx);
556         hAtom->setProp(common_properties::isImplicit, 1);
557         hAtom->updatePropertyCache();
558         if (addCoords) {
559           setTerminalAtomCoords(mol, newIdx, aidx);
560         }
561         if (isoH != isoHs.end()) {
562           hAtom->setIsotope(*isoH);
563           ++isoH;
564         }
565       }
566       // be very clear about implicits not being allowed in this
567       // representation
568       newAt->setProp(common_properties::origNoImplicit, newAt->getNoImplicit(),
569                      true);
570       newAt->setNoImplicit(true);
571     }
572     // update the atom's derived properties (valence count, etc.)
573     // no sense in being strict here (was github #2782)
574     newAt->updatePropertyCache(false);
575     if (isoH != isoHs.end()) {
576       BOOST_LOG(rdWarningLog) << "extra H isotope information found on atom "
577                               << newAt->getIdx() << std::endl;
578     }
579   }
580   // take care of AtomPDBResidueInfo for Hs if root atom has it
581   if (addResidueInfo) {
582     AssignHsResidueInfo(mol);
583   }
584 }
585 
addHs(const ROMol & mol,bool explicitOnly,bool addCoords,const UINT_VECT * onlyOnAtoms,bool addResidueInfo)586 ROMol *addHs(const ROMol &mol, bool explicitOnly, bool addCoords,
587              const UINT_VECT *onlyOnAtoms, bool addResidueInfo) {
588   auto *res = new RWMol(mol);
589   addHs(*res, explicitOnly, addCoords, onlyOnAtoms, addResidueInfo);
590   return static_cast<ROMol *>(res);
591 };
592 
593 namespace {
594 // returns whether or not an adjustment was made, in case we want that info
adjustStereoAtomsIfRequired(RWMol & mol,const Atom * atom,const Atom * heavyAtom)595 bool adjustStereoAtomsIfRequired(RWMol &mol, const Atom *atom,
596                                  const Atom *heavyAtom) {
597   PRECONDITION(atom != nullptr, "bad atom");
598   PRECONDITION(heavyAtom != nullptr, "bad heavy atom");
599   // nothing we can do if the degree is only 2 (and we should have covered
600   // that earlier anyway)
601   if (heavyAtom->getDegree() == 2) {
602     return false;
603   }
604   const auto &cbnd =
605       mol.getBondBetweenAtoms(atom->getIdx(), heavyAtom->getIdx());
606   if (!cbnd) {
607     return false;
608   }
609   for (const auto &nbri :
610        boost::make_iterator_range(mol.getAtomBonds(heavyAtom))) {
611     Bond *bnd = mol[nbri];
612     if (bnd->getBondType() == Bond::DOUBLE &&
613         bnd->getStereo() > Bond::STEREOANY) {
614       auto sAtomIt = std::find(bnd->getStereoAtoms().begin(),
615                                bnd->getStereoAtoms().end(), atom->getIdx());
616       if (sAtomIt != bnd->getStereoAtoms().end()) {
617         // sAtomIt points to the position of this atom's index in the list.
618         // find the index of another atom attached to the heavy atom and
619         // use it to update sAtomIt
620         unsigned int dblNbrIdx = bnd->getOtherAtomIdx(heavyAtom->getIdx());
621         for (const auto &nbri :
622              boost::make_iterator_range(mol.getAtomNeighbors(heavyAtom))) {
623           const auto &nbr = mol[nbri];
624           if (nbr->getIdx() == dblNbrIdx || nbr->getIdx() == atom->getIdx()) {
625             continue;
626           }
627           *sAtomIt = nbr->getIdx();
628           bool madeAdjustment = true;
629           switch (bnd->getStereo()) {
630             case Bond::STEREOCIS:
631               bnd->setStereo(Bond::STEREOTRANS);
632               break;
633             case Bond::STEREOTRANS:
634               bnd->setStereo(Bond::STEREOCIS);
635               break;
636             default:
637               // I think we shouldn't need to do anything with E and Z...
638               madeAdjustment = false;
639               break;
640           }
641           return madeAdjustment;
642         }
643       }
644     }
645   }
646   return false;
647 }
648 
molRemoveH(RWMol & mol,unsigned int idx,bool updateExplicitCount)649 void molRemoveH(RWMol &mol, unsigned int idx, bool updateExplicitCount) {
650   auto atom = mol.getAtomWithIdx(idx);
651   PRECONDITION(atom->getAtomicNum() == 1, "idx corresponds to a non-Hydrogen");
652   for (const auto &nbri : boost::make_iterator_range(mol.getAtomBonds(atom))) {
653     const Bond *bond = mol[nbri];
654     Atom *heavyAtom = bond->getOtherAtom(atom);
655     int heavyAtomNum = heavyAtom->getAtomicNum();
656 
657     // we'll update the neighbor's explicit H count if we were told to
658     // *or* if the neighbor is chiral, in which case the H is needed
659     // in order to complete the coordination
660     // *or* if the neighbor has the noImplicit flag set:
661     if (updateExplicitCount || heavyAtom->getNoImplicit() ||
662         heavyAtom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
663       heavyAtom->setNumExplicitHs(heavyAtom->getNumExplicitHs() + 1);
664     } else {
665       // this is a special case related to Issue 228 and the
666       // "disappearing Hydrogen" problem discussed in MolOps::adjustHs
667       //
668       // If we remove a hydrogen from an aromatic N or P, or if
669       // the heavy atom it is connected to is not in its default
670       // valence state, we need to be *sure* to increment the
671       // explicit count, even if the H itself isn't marked as explicit
672       const INT_VECT &defaultVs =
673           PeriodicTable::getTable()->getValenceList(heavyAtomNum);
674       if (((heavyAtomNum == 7 || heavyAtomNum == 15) &&
675            heavyAtom->getIsAromatic()) ||
676           (std::find(defaultVs.begin() + 1, defaultVs.end(),
677                      heavyAtom->getTotalValence()) != defaultVs.end())) {
678         heavyAtom->setNumExplicitHs(heavyAtom->getNumExplicitHs() + 1);
679       }
680     }
681 
682     // One other consequence of removing the H from the graph is
683     // that we may change the ordering of the bonds about a
684     // chiral center.  This may change the chiral label at that
685     // atom.  We deal with that by explicitly checking here:
686     if (heavyAtom->getChiralTag() != Atom::CHI_UNSPECIFIED) {
687       INT_LIST neighborIndices;
688       for (const auto &nbri :
689            boost::make_iterator_range(mol.getAtomBonds(heavyAtom))) {
690         Bond *nbnd = mol[nbri];
691         if (nbnd->getIdx() != bond->getIdx()) {
692           neighborIndices.push_back(nbnd->getIdx());
693         }
694       }
695       neighborIndices.push_back(bond->getIdx());
696 
697       int nSwaps = heavyAtom->getPerturbationOrder(neighborIndices);
698       // std::cerr << "H: "<<atom->getIdx()<<" hvy:
699       // "<<heavyAtom->getIdx()<<" swaps: " << nSwaps<<std::endl;
700       if (nSwaps % 2) {
701         heavyAtom->invertChirality();
702       }
703     }
704 
705     // if it's a wavy bond, then we need to
706     // mark the beginning atom with the _UnknownStereo tag.
707     // so that we know later that something was affecting its
708     // stereochem
709     if (bond->getBondDir() == Bond::UNKNOWN &&
710         bond->getBeginAtomIdx() == heavyAtom->getIdx()) {
711       heavyAtom->setProp(common_properties::_UnknownStereo, 1);
712     } else if (bond->getBondDir() == Bond::ENDDOWNRIGHT ||
713                bond->getBondDir() == Bond::ENDUPRIGHT) {
714       // if the direction is set on this bond and the atom it's connected to
715       // has no other single bonds with directions set, then we need to set
716       // direction on one of the other neighbors in order to avoid double
717       // bond stereochemistry possibly being lost. This was github #754
718       bool foundADir = false;
719       Bond *oBond = nullptr;
720       for (const auto &nbri :
721            boost::make_iterator_range(mol.getAtomBonds(heavyAtom))) {
722         Bond *nbnd = mol[nbri];
723         if (nbnd->getIdx() != bond->getIdx() &&
724             nbnd->getBondType() == Bond::SINGLE) {
725           if (nbnd->getBondDir() == Bond::NONE) {
726             oBond = nbnd;
727           } else {
728             foundADir = true;
729           }
730         }
731       }
732       if (!foundADir && oBond != nullptr) {
733         bool flipIt = (oBond->getBeginAtom() == heavyAtom) &&
734                       (bond->getBeginAtom() == heavyAtom);
735         if (flipIt) {
736           oBond->setBondDir(bond->getBondDir() == Bond::ENDDOWNRIGHT
737                                 ? Bond::ENDUPRIGHT
738                                 : Bond::ENDDOWNRIGHT);
739         } else {
740           oBond->setBondDir(bond->getBondDir());
741         }
742       }
743       // if this atom is one of the stereoatoms for a double bond we need
744       // to switch the stereo atom on this end to be the other neighbor
745       // This was part of github #1810
746       adjustStereoAtomsIfRequired(mol, atom, heavyAtom);
747     } else {
748       // if this atom is one of the stereoatoms for a double bond we need
749       // to switch the stereo atom on this end to be the other neighbor
750       // This was part of github #1810
751       adjustStereoAtomsIfRequired(mol, atom, heavyAtom);
752     }
753   }
754   mol.removeAtom(atom);
755 }
756 
757 }  // end of anonymous namespace
758 
removeHs(RWMol & mol,const RemoveHsParameters & ps,bool sanitize)759 void removeHs(RWMol &mol, const RemoveHsParameters &ps, bool sanitize) {
760   if (ps.removeAndTrackIsotopes) {
761     // if there are any non-isotopic Hs remove them first
762     // to make sure chirality is preserved
763     bool needRemoveHs = false;
764     for (auto atom : mol.atoms()) {
765       if (atom->getAtomicNum() == 1 && atom->getIsotope() == 0) {
766         needRemoveHs = true;
767         break;
768       }
769     }
770     if (needRemoveHs) {
771       RemoveHsParameters psCopy(ps);
772       psCopy.removeAndTrackIsotopes = false;
773       psCopy.removeIsotopes = false;
774       removeHs(mol, psCopy, false);
775     }
776   }
777   for (auto atom : mol.atoms()) {
778     atom->updatePropertyCache(false);
779   }
780   if (ps.removeAndTrackIsotopes) {
781     for (const auto &pair : getIsoMap(mol)) {
782       mol.getAtomWithIdx(pair.first)
783           ->setProp(common_properties::_isotopicHs, pair.second);
784     }
785   }
786   boost::dynamic_bitset<> atomsToRemove{mol.getNumAtoms(), 0};
787   for (auto atom : mol.atoms()) {
788     if (atom->getAtomicNum() != 1) {
789       continue;
790     }
791     if (!ps.removeWithQuery && atom->hasQuery()) {
792       continue;
793     }
794     if (!ps.removeDegreeZero && !atom->getDegree()) {
795       if (ps.showWarnings) {
796         BOOST_LOG(rdWarningLog)
797             << "WARNING: not removing hydrogen atom without neighbors"
798             << std::endl;
799       }
800       continue;
801     }
802     if (!ps.removeHigherDegrees && atom->getDegree() > 1) {
803       continue;
804     }
805     if (!ps.removeIsotopes && !ps.removeAndTrackIsotopes &&
806         atom->getIsotope()) {
807       continue;
808     }
809     if (!ps.removeNonimplicit &&
810         !atom->hasProp(common_properties::isImplicit)) {
811       continue;
812     }
813     if (!ps.removeMapped && atom->getAtomMapNum()) {
814       continue;
815     }
816     if (!ps.removeInSGroups) {
817       bool skipIt = false;
818       for (const auto &sg : getSubstanceGroups(mol)) {
819         if (sg.includesAtom(atom->getIdx())) {
820           skipIt = true;
821           break;
822         }
823       }
824       if (skipIt) {
825         continue;
826       }
827     }
828     if (!ps.removeHydrides && atom->getFormalCharge() == -1) {
829       continue;
830     }
831     bool removeIt = true;
832     if (atom->getDegree() &&
833         (!ps.removeDummyNeighbors || !ps.removeDefiningBondStereo ||
834          !ps.removeOnlyHNeighbors)) {
835       bool onlyHNeighbors = true;
836       ROMol::ADJ_ITER begin, end;
837       boost::tie(begin, end) = mol.getAtomNeighbors(atom);
838       while (begin != end && removeIt) {
839         auto nbr = mol.getAtomWithIdx(*begin);
840         // is it a dummy?
841         if (!ps.removeDummyNeighbors && nbr->getAtomicNum() < 1) {
842           removeIt = false;
843           if (ps.showWarnings) {
844             BOOST_LOG(rdWarningLog) << "WARNING: not removing hydrogen atom "
845                                        "with dummy atom neighbors"
846                                     << std::endl;
847           }
848         }
849         if (!ps.removeOnlyHNeighbors && nbr->getAtomicNum() != 1) {
850           onlyHNeighbors = false;
851         }
852         if (!ps.removeWithWedgedBond) {
853           const auto bnd =
854               mol.getBondBetweenAtoms(atom->getIdx(), nbr->getIdx());
855           if (bnd->getBondDir() == Bond::BEGINDASH ||
856               bnd->getBondDir() == Bond::BEGINWEDGE) {
857             removeIt = false;
858             if (ps.showWarnings) {
859               BOOST_LOG(rdWarningLog) << "WARNING: not removing hydrogen atom "
860                                          "with wedged bond"
861                                       << std::endl;
862             }
863           }
864         }
865         // Check to see if the neighbor has a double bond and we're the only
866         // neighbor at this end.  This was part of github #1810
867         if (!ps.removeDefiningBondStereo && nbr->getDegree() == 2) {
868           for (const auto &nbri :
869                boost::make_iterator_range(mol.getAtomBonds(nbr))) {
870             const Bond *bnd = mol[nbri];
871             if (bnd->getBondType() == Bond::DOUBLE &&
872                 (bnd->getStereo() > Bond::STEREOANY ||
873                  mol.getBondBetweenAtoms(atom->getIdx(), nbr->getIdx())
874                          ->getBondDir() > Bond::NONE)) {
875               removeIt = false;
876               break;
877             }
878           }
879         }
880         ++begin;
881       }
882       if (removeIt && (!ps.removeOnlyHNeighbors && onlyHNeighbors)) {
883         removeIt = false;
884       }
885     }
886     if (removeIt) {
887       atomsToRemove.set(atom->getIdx());
888     }
889   }  // end of the loop over atoms
890   // now that we know which atoms need to be removed, go ahead and remove them
891   // NOTE: there's too much complexity around stereochemistry here
892   // to be able to safely use batch editing.
893   for (int idx = mol.getNumAtoms() - 1; idx >= 0; --idx) {
894     if (atomsToRemove[idx]) {
895       molRemoveH(mol, idx, ps.updateExplicitCount);
896     }
897   }
898   //
899   //  If we didn't only remove implicit Hs, which are guaranteed to
900   //  be the highest numbered atoms, we may have altered atom indices.
901   //  This can screw up derived properties (such as ring members), so
902   //  do some checks:
903   //
904   if (!atomsToRemove.empty() && ps.removeNonimplicit && sanitize) {
905     sanitizeMol(mol);
906   }
907 };
removeHs(const ROMol & mol,const RemoveHsParameters & ps,bool sanitize)908 ROMol *removeHs(const ROMol &mol, const RemoveHsParameters &ps, bool sanitize) {
909   auto *res = new RWMol(mol);
910   try {
911     removeHs(*res, ps, sanitize);
912   } catch (const MolSanitizeException &) {
913     delete res;
914     throw;
915   }
916   return static_cast<ROMol *>(res);
917 }
removeHs(RWMol & mol,bool implicitOnly,bool updateExplicitCount,bool sanitize)918 void removeHs(RWMol &mol, bool implicitOnly, bool updateExplicitCount,
919               bool sanitize) {
920   RemoveHsParameters ps;
921   ps.removeNonimplicit = !implicitOnly;
922   ps.updateExplicitCount = updateExplicitCount;
923   removeHs(mol, ps, sanitize);
924 };
removeHs(const ROMol & mol,bool implicitOnly,bool updateExplicitCount,bool sanitize)925 ROMol *removeHs(const ROMol &mol, bool implicitOnly, bool updateExplicitCount,
926                 bool sanitize) {
927   auto *res = new RWMol(mol);
928   try {
929     removeHs(*res, implicitOnly, updateExplicitCount, sanitize);
930   } catch (const MolSanitizeException &) {
931     delete res;
932     throw;
933   }
934   return static_cast<ROMol *>(res);
935 }
936 
removeAllHs(RWMol & mol,bool sanitize)937 void removeAllHs(RWMol &mol, bool sanitize) {
938   RemoveHsParameters ps;
939   ps.removeDegreeZero = true;
940   ps.removeHigherDegrees = true;
941   ps.removeOnlyHNeighbors = true;
942   ps.removeIsotopes = true;
943   ps.removeDummyNeighbors = true;
944   ps.removeDefiningBondStereo = true;
945   ps.removeWithWedgedBond = true;
946   ps.removeWithQuery = true;
947   ps.removeNonimplicit = true;
948   ps.removeInSGroups = true;
949   ps.showWarnings = false;
950   ps.removeHydrides = true;
951   removeHs(mol, ps, sanitize);
952 };
removeAllHs(const ROMol & mol,bool sanitize)953 ROMol *removeAllHs(const ROMol &mol, bool sanitize) {
954   auto *res = new RWMol(mol);
955   try {
956     removeAllHs(*res, sanitize);
957   } catch (const MolSanitizeException &) {
958     delete res;
959     throw;
960   }
961   return static_cast<ROMol *>(res);
962 }
963 
964 namespace {
isQueryH(const Atom * atom)965 bool isQueryH(const Atom *atom) {
966   PRECONDITION(atom, "bogus atom");
967   if (atom->getAtomicNum() == 1) {
968     // the simple case: the atom is flagged as being an H and
969     // has no query
970     if (!atom->hasQuery() ||
971         (!atom->getQuery()->getNegation() &&
972          atom->getQuery()->getDescription() == "AtomAtomicNum")) {
973       return true;
974     }
975   }
976 
977   if (atom->getDegree() != 1) {
978     // only degree 1
979     return false;
980   }
981 
982   if (atom->hasQuery() && atom->getQuery()->getNegation()) {
983     // we will not merge negated queries
984     return false;
985   }
986 
987   bool hasHQuery = false, hasOr = false;
988   if (atom->hasQuery()) {
989     if (atom->getQuery()->getDescription() == "AtomOr") {
990       hasOr = true;
991     }
992     std::list<QueryAtom::QUERYATOM_QUERY::CHILD_TYPE> childStack(
993         atom->getQuery()->beginChildren(), atom->getQuery()->endChildren());
994     // the logic gets too complicated if there's an OR in the children, so
995     // just punt on those (with a warning)
996     while (!(hasHQuery && hasOr) && childStack.size()) {
997       QueryAtom::QUERYATOM_QUERY::CHILD_TYPE query = childStack.front();
998       childStack.pop_front();
999       if (query->getDescription() == "AtomOr") {
1000         hasOr = true;
1001       } else if (query->getDescription() == "AtomAtomicNum") {
1002         if (static_cast<ATOM_EQUALS_QUERY *>(query.get())->getVal() == 1 &&
1003             !query->getNegation()) {
1004           hasHQuery = true;
1005         }
1006       } else {
1007         QueryAtom::QUERYATOM_QUERY::CHILD_VECT_CI child1;
1008         for (child1 = query->beginChildren(); child1 != query->endChildren();
1009              ++child1) {
1010           childStack.push_back(*child1);
1011         }
1012       }
1013     }
1014     // std::cerr<<"   !!!1 "<<atom->getIdx()<<" "<<hasHQuery<<"
1015     // "<<hasOr<<std::endl;
1016     if (hasHQuery && hasOr) {
1017       BOOST_LOG(rdWarningLog) << "WARNING: merging explicit H queries involved "
1018                                  "in ORs is not supported. This query will not "
1019                                  "be merged"
1020                               << std::endl;
1021       return false;
1022     }
1023   }
1024   return hasHQuery;
1025 }
1026 }  // namespace
1027 
1028 //
1029 //  This routine removes explicit hydrogens (and bonds to them) from
1030 //  the molecular graph and adds them as queries to the heavy atoms
1031 //  to which they are bound.  If the heavy atoms (or atom queries)
1032 //  already have hydrogen-count queries, they will be updated.
1033 //
1034 //  NOTE:
1035 //   - Hydrogens which aren't connected to a heavy atom will not be
1036 //     removed.  This prevents molecules like "[H][H]" from having
1037 //     all atoms removed.
1038 //
1039 //   - By default all hydrogens are removed, however if
1040 //     merge_unmapped_only is true, any hydrogen participating
1041 //     in an atom map will be retained
mergeQueryHs(RWMol & mol,bool mergeUnmappedOnly)1042 void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly) {
1043   std::vector<unsigned int> atomsToRemove;
1044 
1045   boost::dynamic_bitset<> hatoms(mol.getNumAtoms());
1046   for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
1047     hatoms[i] = isQueryH(mol.getAtomWithIdx(i));
1048   }
1049   unsigned int currIdx = 0, stopIdx = mol.getNumAtoms();
1050   while (currIdx < stopIdx) {
1051     Atom *atom = mol.getAtomWithIdx(currIdx);
1052     if (!hatoms[currIdx]) {
1053       unsigned int numHsToRemove = 0;
1054       ROMol::ADJ_ITER begin, end;
1055       boost::tie(begin, end) = mol.getAtomNeighbors(atom);
1056 
1057       while (begin != end) {
1058         if (hatoms[*begin]) {
1059           Atom &bgn = *mol.getAtomWithIdx(*begin);
1060           if (!mergeUnmappedOnly ||
1061               !bgn.hasProp(common_properties::molAtomMapNumber)) {
1062             atomsToRemove.push_back(rdcast<unsigned int>(*begin));
1063             ++numHsToRemove;
1064           }
1065         }
1066         ++begin;
1067       }
1068       if (numHsToRemove) {
1069         //
1070         //  We have H neighbors:
1071         //   Add the appropriate queries to compensate for their removal.
1072         //
1073         //  Examples:
1074         //    C[H] -> [C;!H0]
1075         //    C([H])[H] -> [C;!H0;!H1]
1076         //
1077         //  It would be more efficient to do this using range queries like:
1078         //    C([H])[H] -> [C;H{2-}]
1079         //  but that would produce non-standard SMARTS without the user
1080         //  having started with a non-standard SMARTS.
1081         //
1082         if (!atom->hasQuery()) {
1083           // it wasn't a query atom, we need to replace it so that we can add
1084           // a query:
1085           ATOM_EQUALS_QUERY *tmp = makeAtomNumQuery(atom->getAtomicNum());
1086           auto *newAt = new QueryAtom;
1087           newAt->setQuery(tmp);
1088           newAt->updateProps(*atom);
1089           mol.replaceAtom(atom->getIdx(), newAt);
1090           delete newAt;
1091           atom = mol.getAtomWithIdx(currIdx);
1092         }
1093         for (unsigned int i = 0; i < numHsToRemove; ++i) {
1094           ATOM_EQUALS_QUERY *tmp = makeAtomHCountQuery(i);
1095           tmp->setNegation(true);
1096           atom->expandQuery(tmp);
1097         }
1098       }  // end of numHsToRemove test
1099 
1100       // recurse if needed (was github isusue 544)
1101       if (atom->hasQuery()) {
1102         // std::cerr<<"  q: "<<atom->getQuery()->getDescription()<<std::endl;
1103         if (atom->getQuery()->getDescription() == "RecursiveStructure") {
1104           auto *rqm = static_cast<RWMol *>(const_cast<ROMol *>(
1105               static_cast<RecursiveStructureQuery *>(atom->getQuery())
1106                   ->getQueryMol()));
1107           mergeQueryHs(*rqm, mergeUnmappedOnly);
1108         }
1109 
1110         // FIX: shouldn't be repeating this code here
1111         std::list<QueryAtom::QUERYATOM_QUERY::CHILD_TYPE> childStack(
1112             atom->getQuery()->beginChildren(), atom->getQuery()->endChildren());
1113         while (childStack.size()) {
1114           QueryAtom::QUERYATOM_QUERY::CHILD_TYPE qry = childStack.front();
1115           childStack.pop_front();
1116           // std::cerr<<"      child: "<<qry->getDescription()<<std::endl;
1117           if (qry->getDescription() == "RecursiveStructure") {
1118             // std::cerr<<"    recurse"<<std::endl;
1119             auto *rqm = static_cast<RWMol *>(const_cast<ROMol *>(
1120                 static_cast<RecursiveStructureQuery *>(qry.get())
1121                     ->getQueryMol()));
1122             mergeQueryHs(*rqm, mergeUnmappedOnly);
1123             // std::cerr<<"    back"<<std::endl;
1124           } else if (qry->beginChildren() != qry->endChildren()) {
1125             childStack.insert(childStack.end(), qry->beginChildren(),
1126                               qry->endChildren());
1127           }
1128         }
1129       }  // end of recursion loop
1130     }
1131     ++currIdx;
1132   }
1133   mol.beginBatchEdit();
1134   for (auto aidx : atomsToRemove) {
1135     mol.removeAtom(aidx);
1136   }
1137   mol.commitBatchEdit();
1138 };
mergeQueryHs(const ROMol & mol,bool mergeUnmappedOnly)1139 ROMol *mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly) {
1140   auto *res = new RWMol(mol);
1141   mergeQueryHs(*res, mergeUnmappedOnly);
1142   return static_cast<ROMol *>(res);
1143 };
1144 
needsHs(const ROMol & mol)1145 bool needsHs(const ROMol &mol) {
1146   for (const auto atom : mol.atoms()) {
1147     unsigned int nHNbrs = 0;
1148     for (const auto nbri :
1149          boost::make_iterator_range(mol.getAtomNeighbors(atom))) {
1150       const auto nbr = mol[nbri];
1151       if (nbr->getAtomicNum() == 1) {
1152         ++nHNbrs;
1153       }
1154     }
1155     bool noNeighbors = false;
1156     if (atom->getTotalNumHs(noNeighbors) > nHNbrs) {
1157       return true;
1158     }
1159   }
1160   return false;
1161 }
1162 
1163 }  // namespace MolOps
1164 }  // namespace RDKit
1165