1 //
2 //  Copyright (C) 2013-2021 Paolo Tosco
3 //
4 //  Copyright (C) 2004-2006 Rational Discovery LLC
5 //
6 //   @@ All Rights Reserved @@
7 //  This file is part of the RDKit.
8 //  The contents are covered by the terms of the BSD license
9 //  which is included in the file license.txt, found at the root
10 //  of the RDKit source tree.
11 //
12 #include <GraphMol/RDKitBase.h>
13 #include <GraphMol/MolOps.h>
14 #include <ForceField/MMFF/Nonbonded.h>
15 #include <RDGeneral/Invariant.h>
16 #include <RDGeneral/RDLog.h>
17 #include <boost/dynamic_bitset.hpp>
18 
19 #include <GraphMol/QueryOps.h>
20 #include "AtomTyper.h"
21 #include <cstdarg>
22 
23 namespace RDKit {
24 namespace MMFF {
25 using namespace ForceFields::MMFF;
26 namespace DefaultParameters {
27 
getMMFFProp()28 const MMFFPropCollection *getMMFFProp() {
29   static MMFFPropCollection ds_instance("");
30   return &ds_instance;
31 }
32 
getMMFFArom()33 const MMFFAromCollection *getMMFFArom() {
34   static MMFFAromCollection ds_instance;
35   return &ds_instance;
36 }
37 
getMMFFBndk()38 const MMFFBndkCollection *getMMFFBndk() {
39   static MMFFBndkCollection ds_instance;
40   return &ds_instance;
41 }
42 
getMMFFBond()43 const MMFFBondCollection *getMMFFBond() {
44   static MMFFBondCollection ds_instance;
45   return &ds_instance;
46 }
47 
getMMFFChg()48 const MMFFChgCollection *getMMFFChg() {
49   static MMFFChgCollection ds_instance;
50   return &ds_instance;
51 }
52 
getMMFFDef()53 const MMFFDefCollection *getMMFFDef() {
54   static MMFFDefCollection ds_instance;
55   return &ds_instance;
56 }
57 
getMMFFHerschbachLaurie()58 const MMFFHerschbachLaurieCollection *getMMFFHerschbachLaurie() {
59   static MMFFHerschbachLaurieCollection ds_instance;
60   return &ds_instance;
61 }
62 
getMMFFPBCI()63 const MMFFPBCICollection *getMMFFPBCI() {
64   static MMFFPBCICollection ds_instance;
65   return &ds_instance;
66 }
67 
getMMFFAngle()68 const MMFFAngleCollection *getMMFFAngle() {
69   static MMFFAngleCollection ds_instance;
70   return &ds_instance;
71 }
72 
getMMFFStbn()73 const MMFFStbnCollection *getMMFFStbn() {
74   static MMFFStbnCollection ds_instance;
75   return &ds_instance;
76 }
77 
getMMFFDfsb()78 const MMFFDfsbCollection *getMMFFDfsb() {
79   static MMFFDfsbCollection ds_instance;
80   return &ds_instance;
81 }
82 
getMMFFCovRadPauEle()83 const MMFFCovRadPauEleCollection *getMMFFCovRadPauEle() {
84   static MMFFCovRadPauEleCollection ds_instance;
85   return &ds_instance;
86 }
87 
getMMFFTor(const bool isMMFFs)88 const MMFFTorCollection *getMMFFTor(const bool isMMFFs) {
89   static MMFFTorCollection MMFF94(false, "");
90   static MMFFTorCollection MMFF94s(true, "");
91   return (isMMFFs) ? &MMFF94s : &MMFF94;
92 }
93 
getMMFFOop(const bool isMMFFs)94 const MMFFOopCollection *getMMFFOop(const bool isMMFFs) {
95   static MMFFOopCollection MMFF94(false, "");
96   static MMFFOopCollection MMFF94s(true, "");
97   return (isMMFFs) ? &MMFF94s : &MMFF94;
98 }
99 
getMMFFVdW()100 const MMFFVdWCollection *getMMFFVdW() {
101   static MMFFVdWCollection ds_instance;
102   return &ds_instance;
103 }
104 
105 }  // namespace DefaultParameters
106 class RingMembership {
107  public:
RingMembership()108   RingMembership(){};
getIsInAromaticRing() const109   bool getIsInAromaticRing() const { return d_isInAromaticRing; }
setIsInAromaticRing(bool isInAromaticRing)110   void setIsInAromaticRing(bool isInAromaticRing) {
111     d_isInAromaticRing = isInAromaticRing;
112   }
getRingIdxSet() const113   const std::set<std::uint32_t> &getRingIdxSet() const { return d_ringIdxSet; }
getRingIdxSet()114   std::set<std::uint32_t> &getRingIdxSet() { return d_ringIdxSet; }
115 
116  private:
117   bool d_isInAromaticRing{false};
118   std::set<std::uint32_t> d_ringIdxSet;
119 };
120 
121 class RingMembershipSize {
122   typedef std::map<unsigned int, RingMembership> RingMembershipMap;
123   typedef std::map<unsigned int, RingMembershipMap> RingSizeMembershipMap;
124 
125  public:
126   static const std::uint32_t IS_AROMATIC_BIT;
127   RingMembershipSize(const ROMol &mol);
128   bool isAtomInAromaticRingOfSize(const Atom *atom,
129                                   const unsigned int ringSize) const;
130   bool areAtomsInSameAromaticRing(const Atom *atom1, const Atom *atom2) const;
131   bool areAtomsInSameRingOfSize(const unsigned int ringSize,
132                                 const unsigned int numAtoms, ...) const;
133 
134  private:
135   RingSizeMembershipMap d_ringSizeMembershipMap;
136 };
137 
138 const std::uint32_t RingMembershipSize::IS_AROMATIC_BIT = (1 << 31);
139 
RingMembershipSize(const ROMol & mol)140 RingMembershipSize::RingMembershipSize(const ROMol &mol) {
141   static const unsigned int MAX_NUM_RINGS = (0xFFFFFFFF >> 1);
142   const RingInfo *ringInfo = mol.getRingInfo();
143   const VECT_INT_VECT &atomRings = ringInfo->atomRings();
144   PRECONDITION(atomRings.size() < MAX_NUM_RINGS, "Too many rings");
145   for (std::uint32_t ringIdx = 0; ringIdx < atomRings.size(); ++ringIdx) {
146     unsigned int ringSize = atomRings[ringIdx].size();
147     std::uint32_t ringIdxWithAromaticFlag = ringIdx;
148     bool ringIsAromatic = isRingAromatic(mol, atomRings[ringIdx]);
149     if (ringIsAromatic) {
150       ringIdxWithAromaticFlag |= IS_AROMATIC_BIT;
151     }
152     auto it = d_ringSizeMembershipMap.find(ringSize);
153     if (it == d_ringSizeMembershipMap.end()) {
154       it = d_ringSizeMembershipMap
155                .insert(std::make_pair(ringSize, RingMembershipMap()))
156                .first;
157     }
158     for (int atomIdxIt : atomRings[ringIdx]) {
159       auto it2 = it->second.find(atomIdxIt);
160       if (it2 == it->second.end()) {
161         it2 = it->second.insert(std::make_pair(atomIdxIt, RingMembership()))
162                   .first;
163       }
164       it2->second.getRingIdxSet().insert(ringIdxWithAromaticFlag);
165       if (ringIsAromatic) {
166         it2->second.setIsInAromaticRing(true);
167       }
168     }
169   }
170 }
171 
172 // returns true if the atom is in an aromatic ring of size ringSize
isAtomInAromaticRingOfSize(const Atom * atom,const unsigned int ringSize) const173 bool RingMembershipSize::isAtomInAromaticRingOfSize(
174     const Atom *atom, const unsigned int ringSize) const {
175   auto it = d_ringSizeMembershipMap.find(ringSize);
176   bool isAromatic = (it != d_ringSizeMembershipMap.end());
177   if (isAromatic) {
178     auto it2 = it->second.find(atom->getIdx());
179     isAromatic = (it2 != it->second.end());
180     if (isAromatic) {
181       isAromatic = it2->second.getIsInAromaticRing();
182     }
183   }
184 
185   return isAromatic;
186 }
187 
areAtomsInSameAromaticRing(const Atom * atom1,const Atom * atom2) const188 bool RingMembershipSize::areAtomsInSameAromaticRing(const Atom *atom1,
189                                                     const Atom *atom2) const {
190   bool areInSameAromaticRing = false;
191 
192   for (auto it = d_ringSizeMembershipMap.begin();
193        !areInSameAromaticRing && (it != d_ringSizeMembershipMap.end()); ++it) {
194     std::vector<std::uint32_t> intersectVect;
195     auto it1 = it->second.find(atom1->getIdx());
196     auto it2 = it->second.find(atom2->getIdx());
197     if ((it1 != it->second.end()) && (it2 != it->second.end())) {
198       std::set_intersection(it1->second.getRingIdxSet().begin(),
199                             it1->second.getRingIdxSet().end(),
200                             it2->second.getRingIdxSet().begin(),
201                             it2->second.getRingIdxSet().end(),
202                             std::back_inserter(intersectVect));
203       for (std::vector<std::uint32_t>::const_iterator ivIt =
204                intersectVect.begin();
205            !areInSameAromaticRing && (ivIt != intersectVect.end()); ++ivIt) {
206         areInSameAromaticRing = *ivIt & IS_AROMATIC_BIT;
207       }
208     }
209   }
210 
211   return areInSameAromaticRing;
212 }
213 
areAtomsInSameRingOfSize(const unsigned int ringSize,const unsigned int numAtoms,...) const214 bool RingMembershipSize::areAtomsInSameRingOfSize(const unsigned int ringSize,
215                                                   const unsigned int numAtoms,
216                                                   ...) const {
217   va_list atoms;
218   bool areInSameRingOfSize = false;
219 
220   auto it = d_ringSizeMembershipMap.find(ringSize);
221   if (it != d_ringSizeMembershipMap.end()) {
222     va_start(atoms, numAtoms);
223     unsigned int idx1 = va_arg(atoms, const Atom *)->getIdx();
224     auto it1 = it->second.find(idx1);
225     if (it1 != it->second.end()) {
226       std::set<std::uint32_t> commonSet = it1->second.getRingIdxSet();
227       for (unsigned int i = 1; !commonSet.empty() && (i < numAtoms); ++i) {
228         areInSameRingOfSize = false;
229         unsigned int idx2 = va_arg(atoms, const Atom *)->getIdx();
230         auto it2 = it->second.find(idx2);
231         if (it2 == it->second.end()) {
232           break;
233         }
234         std::set<std::uint32_t> intersect;
235         std::set_intersection(commonSet.begin(), commonSet.end(),
236                               it2->second.getRingIdxSet().begin(),
237                               it2->second.getRingIdxSet().end(),
238                               std::inserter(intersect, intersect.end()));
239         commonSet = intersect;
240         areInSameRingOfSize = !commonSet.empty();
241       }
242     }
243     va_end(atoms);
244   }
245 
246   return areInSameRingOfSize;
247 }
248 
249 // given the atomic num, this function returns the periodic
250 // table row number, starting from 0 for hydrogen
getPeriodicTableRow(const int atomicNum)251 unsigned int getPeriodicTableRow(const int atomicNum) {
252   unsigned int periodicTableRow = 0;
253 
254   if ((atomicNum >= 3) && (atomicNum <= 10)) {
255     periodicTableRow = 1;
256   } else if ((atomicNum >= 11) && (atomicNum <= 18)) {
257     periodicTableRow = 2;
258   } else if ((atomicNum >= 19) && (atomicNum <= 36)) {
259     periodicTableRow = 3;
260   } else if ((atomicNum >= 37) && (atomicNum <= 54)) {
261     periodicTableRow = 4;
262   }
263 
264   return periodicTableRow;
265 }
266 
267 // given the atomic num, this function returns the periodic
268 // table row number, starting from 1 for helium
269 // Hydrogen has a special row number (0), while transition
270 // metals have the row number multiplied by 10
getPeriodicTableRowHL(const int atomicNum)271 unsigned int getPeriodicTableRowHL(const int atomicNum) {
272   unsigned int periodicTableRow = 0;
273 
274   if (atomicNum == 2) {
275     periodicTableRow = 1;
276   } else if ((atomicNum >= 3) && (atomicNum <= 10)) {
277     periodicTableRow = 2;
278   } else if ((atomicNum >= 11) && (atomicNum <= 18)) {
279     periodicTableRow = 3;
280   } else if ((atomicNum >= 19) && (atomicNum <= 36)) {
281     periodicTableRow = 4;
282   } else if ((atomicNum >= 37) && (atomicNum <= 54)) {
283     periodicTableRow = 5;
284   }
285   if (((atomicNum >= 21) && (atomicNum <= 30)) ||
286       ((atomicNum >= 39) && (atomicNum <= 48))) {
287     periodicTableRow *= 10;
288   }
289 
290   return periodicTableRow;
291 }
292 
293 // given the MMFF atom type, this function returns true
294 // if it is aromatic
isAromaticAtomType(const unsigned int atomType)295 bool isAromaticAtomType(const unsigned int atomType) {
296   static const unsigned int aromatic_array[] = {
297       37, 38, 39, 44, 58, 59, 63, 64, 65, 66, 69, 76, 78, 79, 80, 81, 82};
298   const std::set<unsigned int> aromaticTypes(
299       aromatic_array,
300       aromatic_array + sizeof(aromatic_array) / sizeof(aromatic_array[0]));
301 
302   return (aromaticTypes.find(atomType) != aromaticTypes.end());
303 }
304 
isRingAromatic(const ROMol & mol,const INT_VECT & ringIndxVect)305 bool isRingAromatic(const ROMol &mol, const INT_VECT &ringIndxVect) {
306   bool isAromatic = true;
307   for (unsigned int i = 0; isAromatic && (i < ringIndxVect.size() - 1); ++i) {
308     isAromatic = (mol.getBondBetweenAtoms(ringIndxVect[i], ringIndxVect[i + 1])
309                       ->getBondType() == Bond::AROMATIC);
310   }
311   return isAromatic;
312 }
313 
314 // returns true if the atom is in a ring of size ringSize
isAtomInAromaticRingOfSize(const Atom * atom,const unsigned int ringSize)315 bool isAtomInAromaticRingOfSize(const Atom *atom, const unsigned int ringSize) {
316   bool isAromatic = false;
317   const ROMol &mol = atom->getOwningMol();
318   const VECT_INT_VECT &atomRings = mol.getRingInfo()->atomRings();
319 
320   if (atom->getIsAromatic()) {
321     for (unsigned int i = 0; (!isAromatic) && (i < atomRings.size()); ++i) {
322       if ((atomRings[i].size() != ringSize) ||
323           (std::find(atomRings[i].begin(), atomRings[i].end(),
324                      atom->getIdx()) == atomRings[i].end())) {
325         continue;
326       }
327       isAromatic = isRingAromatic(mol, atomRings[i]);
328     }
329   }
330 
331   return isAromatic;
332 }
333 
334 // returns true if the atom is an N-oxide
isAtomNOxide(const Atom * atom)335 bool isAtomNOxide(const Atom *atom) {
336   bool isNOxide = false;
337   const ROMol &mol = atom->getOwningMol();
338   ROMol::ADJ_ITER nbrIdx;
339   ROMol::ADJ_ITER endNbrs;
340 
341   if ((atom->getAtomicNum() == 7) && (atom->getTotalDegree() >= 3)) {
342     // loop over neighbors
343     boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
344     for (; (!isNOxide) && (nbrIdx != endNbrs); ++nbrIdx) {
345       const Atom *nbrAtom = mol[*nbrIdx];
346       isNOxide =
347           ((nbrAtom->getAtomicNum() == 8) && (nbrAtom->getTotalDegree() == 1));
348     }
349   }
350 
351   return isNOxide;
352 }
353 
354 // if the angle formed by atoms with indexes idx1, idx2, idx3
355 // is in a ring of {3,4} atoms returns 3 or 4, respectively;
356 // otherwise it returns 0
isAngleInRingOfSize3or4(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3)357 unsigned int isAngleInRingOfSize3or4(const ROMol &mol, const unsigned int idx1,
358                                      const unsigned int idx2,
359                                      const unsigned int idx3) {
360   unsigned int ringSize = 0;
361 
362   if (mol.getBondBetweenAtoms(idx1, idx2) &&
363       mol.getBondBetweenAtoms(idx2, idx3)) {
364     if (mol.getBondBetweenAtoms(idx3, idx1)) {
365       ringSize = 3;
366     } else {
367       std::set<unsigned int> s1;
368       std::set<unsigned int> s2;
369       std::vector<int> intersect;
370       ROMol::ADJ_ITER nbrIdx;
371       ROMol::ADJ_ITER endNbrs;
372       unsigned int newIdx;
373       boost::tie(nbrIdx, endNbrs) =
374           mol.getAtomNeighbors(mol.getAtomWithIdx(idx1));
375       for (; nbrIdx != endNbrs; ++nbrIdx) {
376         newIdx = mol[*nbrIdx]->getIdx();
377         if (newIdx != idx2) {
378           s1.insert(newIdx);
379         }
380       }
381       boost::tie(nbrIdx, endNbrs) =
382           mol.getAtomNeighbors(mol.getAtomWithIdx(idx3));
383       for (; nbrIdx != endNbrs; ++nbrIdx) {
384         newIdx = mol[*nbrIdx]->getIdx();
385         if (newIdx != idx2) {
386           s2.insert(newIdx);
387         }
388       }
389       std::set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(),
390                             std::back_inserter(intersect));
391       if (intersect.size()) {
392         ringSize = 4;
393       }
394     }
395   }
396 
397   return ringSize;
398 }
399 
400 // if the dihedral angle formed by atoms with indexes idx1,
401 // idx2, idx3, idx4 is in a ring of {4,5} atoms returns 4 or 5,
402 // respectively; otherwise it returns 0
isTorsionInRingOfSize4or5(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,const unsigned int idx4)403 unsigned int isTorsionInRingOfSize4or5(const ROMol &mol,
404                                        const unsigned int idx1,
405                                        const unsigned int idx2,
406                                        const unsigned int idx3,
407                                        const unsigned int idx4) {
408   unsigned int ringSize = 0;
409 
410   if (mol.getBondBetweenAtoms(idx1, idx2) &&
411       mol.getBondBetweenAtoms(idx2, idx3) &&
412       mol.getBondBetweenAtoms(idx3, idx4)) {
413     if (mol.getBondBetweenAtoms(idx4, idx1)) {
414       ringSize = 4;
415     } else {
416       std::set<unsigned int> s1;
417       std::set<unsigned int> s2;
418       std::vector<int> intersect;
419       ROMol::ADJ_ITER nbrIdx;
420       ROMol::ADJ_ITER endNbrs;
421       unsigned int newIdx;
422       boost::tie(nbrIdx, endNbrs) =
423           mol.getAtomNeighbors(mol.getAtomWithIdx(idx1));
424       for (; nbrIdx != endNbrs; ++nbrIdx) {
425         newIdx = mol[*nbrIdx]->getIdx();
426         if (newIdx != idx2) {
427           s1.insert(newIdx);
428         }
429       }
430       boost::tie(nbrIdx, endNbrs) =
431           mol.getAtomNeighbors(mol.getAtomWithIdx(idx4));
432       for (; nbrIdx != endNbrs; ++nbrIdx) {
433         newIdx = mol[*nbrIdx]->getIdx();
434         if (newIdx != idx3) {
435           s2.insert(newIdx);
436         }
437       }
438       std::set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(),
439                             std::back_inserter(intersect));
440       if (intersect.size()) {
441         ringSize = 5;
442       }
443     }
444   }
445 
446   return ringSize;
447 }
448 
449 // return true if atoms are in the same ring of size ringSize
areAtomsInSameRingOfSize(const ROMol & mol,const unsigned int ringSize,const unsigned int numAtoms,...)450 bool areAtomsInSameRingOfSize(const ROMol &mol, const unsigned int ringSize,
451                               const unsigned int numAtoms, ...) {
452   unsigned int i;
453   bool areInSameRingOfSize = false;
454   const VECT_INT_VECT &atomRings = mol.getRingInfo()->atomRings();
455   unsigned int idx;
456   va_list atomIdxs;
457 
458   for (i = 0; (!areInSameRingOfSize) && (i < atomRings.size()); ++i) {
459     if (atomRings[i].size() != ringSize) {
460       continue;
461     }
462     areInSameRingOfSize = true;
463     va_start(atomIdxs, numAtoms);
464     for (unsigned int j = 0; areInSameRingOfSize && (j < numAtoms); ++j) {
465       idx = va_arg(atomIdxs, unsigned int);
466       areInSameRingOfSize = (std::find(atomRings[i].begin(), atomRings[i].end(),
467                                        idx) != atomRings[i].end());
468     }
469     va_end(atomIdxs);
470   }
471 
472   return areInSameRingOfSize;
473 }
474 
475 // return true if atoms are in the same aromatic ring
areAtomsInSameAromaticRing(const ROMol & mol,const unsigned int idx1,const unsigned int idx2)476 bool areAtomsInSameAromaticRing(const ROMol &mol, const unsigned int idx1,
477                                 const unsigned int idx2) {
478   unsigned int i;
479   unsigned int j;
480   bool areInSameAromatic = false;
481   const VECT_INT_VECT &atomRings = mol.getRingInfo()->atomRings();
482 
483   if (mol.getAtomWithIdx(idx1)->getIsAromatic() &&
484       mol.getAtomWithIdx(idx2)->getIsAromatic()) {
485     for (i = 0; (!areInSameAromatic) && (i < atomRings.size()); ++i) {
486       if ((std::find(atomRings[i].begin(), atomRings[i].end(), idx1) !=
487            atomRings[i].end()) &&
488           (std::find(atomRings[i].begin(), atomRings[i].end(), idx2) !=
489            atomRings[i].end())) {
490         areInSameAromatic = true;
491         for (j = 0; areInSameAromatic && (j < (atomRings[i].size() - 1)); ++j) {
492           areInSameAromatic =
493               (mol.getBondBetweenAtoms(atomRings[i][j], atomRings[i][j + 1])
494                    ->getBondType() == Bond::AROMATIC);
495         }
496       }
497     }
498   }
499 
500   return areInSameAromatic;
501 }
502 
503 // sets the aromaticity flags according to MMFF
setMMFFAromaticity(RWMol & mol)504 void setMMFFAromaticity(RWMol &mol) {
505   bool moveToNextRing = false;
506   bool isNOSinRing = false;
507   bool aromRingsAllSet = false;
508   bool exoDoubleBond = false;
509   bool canBeAromatic = false;
510   unsigned int i;
511   unsigned int j;
512   unsigned int nextInRing;
513   unsigned int pi_e = 0;
514   int nAromSet = 0;
515   int old_nAromSet = -1;
516   RingInfo *ringInfo = mol.getRingInfo();
517   Atom *atom;
518   Bond *bond;
519   const VECT_INT_VECT &atomRings = ringInfo->atomRings();
520   ROMol::ADJ_ITER nbrIdx;
521   ROMol::ADJ_ITER endNbrs;
522   boost::dynamic_bitset<> aromBitVect(mol.getNumAtoms());
523   boost::dynamic_bitset<> aromRingBitVect(atomRings.size());
524 
525   while ((!aromRingsAllSet) && atomRings.size() && (nAromSet > old_nAromSet)) {
526     // loop over all rings
527     for (i = 0; i < atomRings.size(); ++i) {
528       // add 2 pi electrons for each double bond in the ring
529       for (j = 0, pi_e = 0, moveToNextRing = false, isNOSinRing = false,
530           exoDoubleBond = false;
531            (!moveToNextRing) && (j < atomRings[i].size()); ++j) {
532         atom = mol.getAtomWithIdx(atomRings[i][j]);
533         // remember if this atom is nitrogen, oxygen or divalent sulfur
534         if ((atom->getAtomicNum() == 7) || (atom->getAtomicNum() == 8) ||
535             ((atom->getAtomicNum() == 16) && (atom->getDegree() == 2))) {
536           isNOSinRing = true;
537         }
538         // check whether this atom is double-bonded to next one in the ring
539         nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0]
540                                                       : atomRings[i][j + 1];
541         if (mol.getBondBetweenAtoms(atomRings[i][j], nextInRing)
542                 ->getBondType() == Bond::DOUBLE) {
543           pi_e += 2;
544         }
545         // if this is not a double bond, check whether this is carbon
546         // or nitrogen with total bond order = 4
547         else {
548           atom = mol.getAtomWithIdx(atomRings[i][j]);
549           // if not, move on
550           if ((atom->getAtomicNum() != 6) &&
551               (!((atom->getAtomicNum() == 7) &&
552                  ((atom->getExplicitValence() + atom->getNumImplicitHs()) ==
553                   4)))) {
554             continue;
555           }
556           // loop over neighbors
557           boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
558           for (; nbrIdx != endNbrs; ++nbrIdx) {
559             const Atom *nbrAtom = mol[*nbrIdx];
560             // if the neighbor is one of the ring atoms, skip it
561             // since we are looking for exocyclic neighbors
562             if (std::find(atomRings[i].begin(), atomRings[i].end(),
563                           nbrAtom->getIdx()) != atomRings[i].end()) {
564               continue;
565             }
566             // it the neighbor is single-bonded, skip it
567             if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx())
568                     ->getBondType() == Bond::SINGLE) {
569               continue;
570             }
571             // if the neighbor is in a ring and its aromaticity
572             // bit has not yet been set, then move to the next ring
573             // we'll take care of this later
574             if (queryIsAtomInRing(nbrAtom) &&
575                 (!(aromBitVect[nbrAtom->getIdx()]))) {
576               moveToNextRing = true;
577               break;
578             }
579             // if the neighbor is in an aromatic ring and is
580             // double-bonded to the current atom, add 1 pi electron
581             if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx())
582                     ->getBondType() == Bond::DOUBLE) {
583               if (nbrAtom->getIsAromatic()) {
584                 ++pi_e;
585               } else {
586                 exoDoubleBond = true;
587               }
588             }
589           }
590         }
591       }
592       // if we quit the loop at an early stage because aromaticity
593       // had not yet been set, then move to the next ring
594       if (moveToNextRing) {
595         continue;
596       }
597       // loop again over all ring atoms
598       for (j = 0, canBeAromatic = true; j < atomRings[i].size(); ++j) {
599         // set aromaticity as perceived
600         aromBitVect[atomRings[i][j]] = 1;
601         atom = mol.getAtomWithIdx(atomRings[i][j]);
602         // if this is is a non-sp2 carbon or nitrogen
603         // then this ring can't be aromatic
604         if (((atom->getAtomicNum() == 6) || (atom->getAtomicNum() == 7)) &&
605             (atom->getHybridization() != Atom::SP2)) {
606           canBeAromatic = false;
607         }
608       }
609       // if this ring can't be aromatic, move to the next one
610       if (!canBeAromatic) {
611         continue;
612       }
613       // if there is N, O, S; no exocyclic double bonds;
614       // the ring has an odd number of terms: add 2 pi electrons
615       if (isNOSinRing && (!exoDoubleBond) && (atomRings[i].size() % 2)) {
616         pi_e += 2;
617       }
618       // if this ring satisfies the 4n+2 rule,
619       // then mark its atoms as aromatic
620       if ((pi_e > 2) && (!((pi_e - 2) % 4))) {
621         aromRingBitVect[i] = 1;
622         for (j = 0; j < atomRings[i].size(); ++j) {
623           atom = mol.getAtomWithIdx(atomRings[i][j]);
624           atom->setIsAromatic(true);
625         }
626       }
627     }
628     // termination criterion: if we did not manage to set any more
629     // aromatic atoms compared to the previous iteration, then
630     // stop looping
631     old_nAromSet = nAromSet;
632     nAromSet = 0;
633     aromRingsAllSet = true;
634     for (i = 0; i < atomRings.size(); ++i) {
635       for (j = 0; j < atomRings[i].size(); ++j) {
636         if (aromBitVect[atomRings[i][j]]) {
637           ++nAromSet;
638         } else {
639           aromRingsAllSet = false;
640         }
641       }
642     }
643   }
644   for (i = 0; i < atomRings.size(); ++i) {
645     // if the ring is not aromatic, move to the next one
646     if (!aromRingBitVect[i]) {
647       continue;
648     }
649     for (j = 0; j < atomRings[i].size(); ++j) {
650       // mark all ring bonds as aromatic
651       nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0]
652                                                     : atomRings[i][j + 1];
653       bond = mol.getBondBetweenAtoms(atomRings[i][j], nextInRing);
654       bond->setBondType(Bond::AROMATIC);
655       bond->setIsAromatic(true);
656     }
657   }
658   for (i = 0; i < atomRings.size(); ++i) {
659     // if the ring is not aromatic, move to the next one
660     if (!aromRingBitVect[i]) {
661       continue;
662     }
663     for (j = 0; j < atomRings[i].size(); ++j) {
664       atom = mol.getAtomWithIdx(atomRings[i][j]);
665       if (atom->getAtomicNum() != 6) {
666         int iv = atom->calcImplicitValence(false);
667         atom->calcExplicitValence(false);
668         if (iv) {
669           atom->setNumExplicitHs(iv);
670           atom->calcImplicitValence(false);
671         }
672       }
673     }
674   }
675 }
676 
677 // sets the MMFF atomType for a heavy atom
setMMFFHeavyAtomType(const RingMembershipSize & rmSize,const Atom * atom)678 void MMFFMolProperties::setMMFFHeavyAtomType(const RingMembershipSize &rmSize,
679                                              const Atom *atom) {
680   unsigned int atomType = 0;
681   unsigned int i;
682   unsigned int j;
683   unsigned int nTermObondedToN = 0;
684   bool alphaOrBetaInSameRing = false;
685   bool isAlphaOS = false;
686   bool isBetaOS = false;
687   bool isNSO2orNSO3orNCN = false;
688   const ROMol &mol = atom->getOwningMol();
689   RingInfo *ringInfo = mol.getRingInfo();
690   ROMol::ADJ_ITER nbrIdx;
691   ROMol::ADJ_ITER endNbrs;
692   ROMol::ADJ_ITER nbr2Idx;
693   ROMol::ADJ_ITER end2Nbrs;
694   ROMol::ADJ_ITER nbr3Idx;
695   ROMol::ADJ_ITER end3Nbrs;
696   std::vector<const Atom *> alphaHet;
697   std::vector<const Atom *> betaHet;
698 
699   if (atom->getIsAromatic()) {
700     if (rmSize.isAtomInAromaticRingOfSize(atom, 5)) {
701       // 5-membered aromatic rings
702       // if ipso is carbon or nitrogen, find eventual alpha and beta heteroatoms
703       if ((atom->getAtomicNum() == 6) || (atom->getAtomicNum() == 7)) {
704         // loop over alpha neighbors
705         boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
706         for (; nbrIdx != endNbrs; ++nbrIdx) {
707           const Atom *nbrAtom = mol[*nbrIdx];
708           // if the alpha neighbor is not in a 5-membered aromatic
709           // ring, skip to the next neighbor
710           if (!rmSize.isAtomInAromaticRingOfSize(nbrAtom, 5)) {
711             continue;
712           }
713           // if the alpha neighbor belongs to the same ring of ipso atom
714           // and it is either oxygen, sulfur, or non-N-oxide trivalent nitrogen,
715           // add it to the alpha atom vector
716           if (rmSize.areAtomsInSameRingOfSize(5, 2, atom, nbrAtom) &&
717               ((nbrAtom->getAtomicNum() == 8) ||
718                (nbrAtom->getAtomicNum() == 16) ||
719                ((nbrAtom->getAtomicNum() == 7) &&
720                 (nbrAtom->getTotalDegree() == 3) &&
721                 (!isAtomNOxide(nbrAtom))))) {
722             alphaHet.push_back(nbrAtom);
723           }
724           // loop over beta neighbors
725           boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
726           for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
727             const Atom *nbr2Atom = mol[*nbr2Idx];
728             // if we have gone back to the ipso atom, move on
729             if (nbr2Atom->getIdx() == atom->getIdx()) {
730               continue;
731             }
732             // if the beta neighbor is not in a 5-membered aromatic
733             // ring, skip to the next neighbor
734             if (!rmSize.isAtomInAromaticRingOfSize(nbr2Atom, 5)) {
735               continue;
736             }
737             // if the beta neighbor belongs to the same ring of ipso atom
738             // and it is either oxygen, sulfur, or non-N-oxide trivalent
739             // nitrogen,
740             // add it to the beta atom vector
741             if (rmSize.areAtomsInSameRingOfSize(5, 2, atom, nbr2Atom) &&
742                 ((nbr2Atom->getAtomicNum() == 8) ||
743                  (nbr2Atom->getAtomicNum() == 16) ||
744                  ((nbr2Atom->getAtomicNum() == 7) &&
745                   (nbr2Atom->getTotalDegree() == 3) &&
746                   (!isAtomNOxide(nbr2Atom))))) {
747               betaHet.push_back(nbr2Atom);
748             }
749           }
750         }
751         isAlphaOS = false;
752         for (i = 0; (!isAlphaOS) && (i < alphaHet.size()); ++i) {
753           isAlphaOS = ((alphaHet[i]->getAtomicNum() == 8) ||
754                        (alphaHet[i]->getAtomicNum() == 16));
755         }
756         isBetaOS = false;
757         for (i = 0; (!isBetaOS) && (i < betaHet.size()); ++i) {
758           isBetaOS = ((betaHet[i]->getAtomicNum() == 8) ||
759                       (betaHet[i]->getAtomicNum() == 16));
760         }
761         if (alphaHet.size() && betaHet.size()) {
762           // do alpha and beta heteroatoms belong to the same ring?
763           for (i = 0; (!alphaOrBetaInSameRing) && (i < alphaHet.size()); ++i) {
764             for (j = 0; (!alphaOrBetaInSameRing) && (j < betaHet.size()); ++j) {
765               alphaOrBetaInSameRing = rmSize.areAtomsInSameRingOfSize(
766                   5, 2, alphaHet[i], betaHet[j]);
767             }
768           }
769         }
770       }
771 
772       switch (atom->getAtomicNum()) {
773         // Carbon
774         case 6:
775           // if there are no beta heteroatoms
776           if (!(betaHet.size())) {
777             // count how many 3-neighbor nitrogens we have
778             // to be CIM+, there must be at least two such nitrogens,
779             // one of which in a 5-membered aromatic ring and none
780             // in a 6-membered aromatic ring; additionally, one
781             // one of the hydrogens must be protonated
782             unsigned int nN = 0;
783             unsigned int nFormalCharge = 0;
784             unsigned int nInAromatic5Ring = 0;
785             unsigned int nInAromatic6Ring = 0;
786             boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
787             for (; nbrIdx != endNbrs; ++nbrIdx) {
788               const Atom *nbrAtom = mol[*nbrIdx];
789               if ((nbrAtom->getAtomicNum() == 7) &&
790                   (nbrAtom->getTotalDegree() == 3)) {
791                 ++nN;
792                 if ((nbrAtom->getFormalCharge() > 0) &&
793                     (!isAtomNOxide(nbrAtom))) {
794                   ++nFormalCharge;
795                 }
796                 if (rmSize.isAtomInAromaticRingOfSize(nbrAtom, 5)) {
797                   ++nInAromatic5Ring;
798                 }
799                 if (rmSize.isAtomInAromaticRingOfSize(nbrAtom, 6)) {
800                   ++nInAromatic6Ring;
801                 }
802               }
803             }
804             if ((((nN == 2) && nInAromatic5Ring) ||
805                  ((nN == 3) && (nInAromatic5Ring == 2))) &&
806                 nFormalCharge && (!nInAromatic6Ring)) {
807               // CIM+
808               // Aromatic carbon between N's in imidazolium
809               atomType = 80;
810               break;
811             }
812           }
813           // if there are neither alpha nor beta heteroatoms
814           // or if there are both, but they belong to different rings
815           if (alphaHet.size() == betaHet.size()) {
816             bool surroundedByBenzeneC = true;
817             bool surroundedByArom = true;
818             // loop over neighbors
819             // are all neighbors aromatic?
820             // are all neighbors benzene carbons?
821             boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
822             for (; nbrIdx != endNbrs; ++nbrIdx) {
823               const Atom *nbrAtom = mol[*nbrIdx];
824 
825               if ((nbrAtom->getAtomicNum() != 6) ||
826                   (!(ringInfo->isAtomInRingOfSize(nbrAtom->getIdx(), 6)))) {
827                 surroundedByBenzeneC = false;
828               }
829               if (rmSize.areAtomsInSameRingOfSize(5, 2, atom, nbrAtom) &&
830                   (!(nbrAtom->getIsAromatic()))) {
831                 surroundedByArom = false;
832               }
833             }
834             // if there are no alpha and beta heteroatoms and
835             // all neighbors are aromatic but not all of them
836             // benzene carbons, or if there are alpha and beta
837             // atoms but they belong to different rings, or if
838             // there are alpha and beta heteroatoms but no alpha
839             // oxygen or sulfur, then it's C5
840             if (((!(alphaHet.size())) && (!(betaHet.size())) &&
841                  (!surroundedByBenzeneC) && surroundedByArom) ||
842                 (alphaHet.size() && betaHet.size() &&
843                  ((!alphaOrBetaInSameRing) || ((!isAlphaOS) && (!isBetaOS))))) {
844               // C5
845               // General carbon in 5-membered heteroaromatic ring
846               atomType = 78;
847               break;
848             }
849           }
850           if (alphaHet.size() && ((!(betaHet.size())) || isAlphaOS)) {
851             // C5A
852             // Aromatic 5-ring C, alpha to N:, O: or S:
853             atomType = 63;
854             break;
855           }
856           if (betaHet.size() && ((!(alphaHet.size())) || isBetaOS)) {
857             // C5B
858             // Aromatic 5-ring C, alpha to N:, O: or S:
859             atomType = 64;
860             break;
861           }
862           break;
863 
864         // Nitrogen
865         case 7:
866           if (isAtomNOxide(atom)) {
867             // N5AX
868             // N-oxide nitrogen in 5-ring alpha position
869             // N5BX
870             // N-oxide nitrogen in 5-ring beta position
871             // N5OX
872             // N-oxide nitrogen in other 5-ring position
873             atomType = 82;
874             break;
875           }
876           // if there are neither alpha nor beta heteroatoms
877           if ((!(alphaHet.size())) && (!(betaHet.size()))) {
878             // if it is nitrogen
879             // if valence is 3, it's pyrrole nitrogen
880             if (atom->getTotalDegree() == 3) {
881               // NPYL
882               // Aromatic 5-ring nitrogen with pi lone pair
883               atomType = 39;
884               break;
885             }
886             // otherwise it is anionic
887             // N5M
888             // Nitrogen in 5-ring aromatic anion
889             atomType = 76;
890             break;
891           }
892           if ((atom->getTotalDegree() == 3) &&
893               alphaHet.size() != betaHet.size()) {
894             // NIM+
895             // Aromatic nitrogen in imidazolium
896             // N5A+
897             // Positive nitrogen in 5-ring alpha position
898             // N5B+
899             // Positive nitrogen in 5-ring beta position
900             // N5+
901             // Positive nitrogen in other 5-ring position
902             atomType = 81;
903             break;
904           }
905           // if there are alpha heteroatoms and either no beta heteroatoms
906           // or no alpha oxygen/sulfur
907           if (alphaHet.size() && ((!(betaHet.size())) || isAlphaOS)) {
908             // N5A
909             // Aromatic 5-ring N, alpha to N:, O: or S:
910             atomType = 65;
911             break;
912           }
913           // if there are beta heteroatoms and either no alpha heteroatoms
914           // or no beta oxygen/sulfur
915           if (betaHet.size() && ((!(alphaHet.size())) || isBetaOS)) {
916             // N5B
917             // Aromatic 5-ring N, beta to N:, O: or S:
918             atomType = 66;
919             break;
920           }
921           // if there are both alpha and beta heteroatoms
922           if (alphaHet.size() && betaHet.size()) {
923             // N5
924             // General nitrogen in 5-memebered heteroaromatic ring
925             atomType = 79;
926             break;
927           }
928           break;
929 
930         // Oxygen
931         case 8:
932           // OFUR
933           // Aromatic 5-ring oxygen with pi lone pair
934           atomType = 59;
935           break;
936 
937         // Sulfur
938         case 16:
939           // STHI
940           // Aromatic 5-ring sulfur with pi lone pair
941           atomType = 44;
942           break;
943       }
944     }
945 
946     if (!atomType && (rmSize.isAtomInAromaticRingOfSize(atom, 6))) {
947       // 6-membered aromatic rings
948       switch (atom->getAtomicNum()) {
949         // Carbon
950         case 6:
951           // CB
952           // Aromatic carbon, e.g., in benzene
953           atomType = 37;
954           break;
955 
956         // Nitrogen
957         case 7:
958           if (isAtomNOxide(atom)) {
959             // NPOX
960             // Pyridinium N-oxide nitrogen
961             atomType = 69;
962             break;
963           }
964           if (atom->getTotalDegree() == 3) {
965             // NPD+
966             // Aromatic nitrogen in pyridinium
967             atomType = 58;
968             break;
969           }
970           // NPYD
971           // Aromatic nitrogen with sigma lone pair
972           atomType = 38;
973           break;
974       }
975     }
976   }
977 
978   if (!atomType) {
979     // Aliphatic heavy atom types
980 
981     switch (atom->getAtomicNum()) {
982       // Lithium
983       case 3:
984         if (atom->getDegree() == 0) {
985           // LI+
986           // Lithium cation
987           atomType = 92;
988           break;
989         }
990         break;
991 
992       // Carbon
993       case 6:
994         // 4 neighbors
995         if (atom->getTotalDegree() == 4) {
996           if (ringInfo->isAtomInRingOfSize(atom->getIdx(), 3)) {
997             // CR3R
998             // Aliphatic carbon in 3-membered ring
999             atomType = 22;
1000             break;
1001           }
1002           if (ringInfo->isAtomInRingOfSize(atom->getIdx(), 4)) {
1003             // CR4R
1004             // Aliphatic carbon in 4-membered ring
1005             atomType = 20;
1006             break;
1007           }
1008           // CR
1009           // Alkyl carbon
1010           atomType = 1;
1011           break;
1012         }
1013         // 3 neighbors
1014         if (atom->getTotalDegree() == 3) {
1015           unsigned int nN2 = 0;
1016           unsigned int nN3 = 0;
1017           unsigned int nO = 0;
1018           unsigned int nS = 0;
1019           unsigned int doubleBondedElement = 0;
1020           boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1021           for (; nbrIdx != endNbrs; ++nbrIdx) {
1022             const Atom *nbrAtom = mol[*nbrIdx];
1023             // find if there is a double-bonded element
1024             if ((mol.getBondBetweenAtoms(nbrAtom->getIdx(), atom->getIdx()))
1025                     ->getBondType() == Bond::DOUBLE) {
1026               doubleBondedElement = nbrAtom->getAtomicNum();
1027             }
1028             // count how many terminal oxygen/sulfur atoms
1029             // are bonded to ipso
1030             if (nbrAtom->getTotalDegree() == 1) {
1031               if (nbrAtom->getAtomicNum() == 8) {
1032                 ++nO;
1033               } else if (nbrAtom->getAtomicNum() == 16) {
1034                 ++nS;
1035               }
1036             } else if (nbrAtom->getAtomicNum() == 7) {
1037               // count how many nitrogens with 3 neighbors
1038               // are bonded to ipso
1039               if (nbrAtom->getTotalDegree() == 3) {
1040                 ++nN3;
1041               }
1042               // count how many nitrogens with 2 neighbors
1043               // are double-bonded to ipso
1044               else if ((nbrAtom->getTotalDegree() == 2) &&
1045                        ((mol.getBondBetweenAtoms(nbrAtom->getIdx(),
1046                                                  atom->getIdx()))
1047                             ->getBondType() == Bond::DOUBLE)) {
1048                 ++nN2;
1049               }
1050             }
1051           }
1052           // if there are two or more nitrogens with 3 neighbors each,
1053           // and there are no nitrogens with two neighbors only,
1054           // and carbon is double-bonded to nitrogen
1055           if ((nN3 >= 2) && (!nN2) && (doubleBondedElement == 7)) {
1056             // CNN+
1057             // Carbon in +N=C-N: resonance structures
1058             // CGD+
1059             // Guanidinium carbon
1060             atomType = 57;
1061             break;
1062           }
1063           // if there are two terminal oxygen/sulfur atoms
1064           if ((nO == 2) || (nS == 2)) {
1065             // CO2M
1066             // Carbon in carboxylate anion
1067             // CS2M
1068             // Carbon in thiocarboxylate anion
1069             atomType = 41;
1070             break;
1071           }
1072           // if this carbon is in a 4-membered ring and
1073           // is double-bonded to another carbon
1074           if (ringInfo->isAtomInRingOfSize(atom->getIdx(), 4) &&
1075               (doubleBondedElement == 6)) {
1076             // CR4E
1077             // Olefinic carbon in 4-membered ring
1078             atomType = 30;
1079             break;
1080           }
1081           // if this carbon is is double-bonded to nitrogen,
1082           // oxygen, phosphorus or sulfur
1083           if ((doubleBondedElement == 7) || (doubleBondedElement == 8) ||
1084               (doubleBondedElement == 15) || (doubleBondedElement == 16)) {
1085             // C=N
1086             // Imine-atomType carbon
1087             // CGD
1088             // Guanidine carbon
1089             // C=O
1090             // Generic carbonyl carbon
1091             // C=OR
1092             // Ketone or aldehyde carbonyl carbon
1093             // C=ON
1094             // Amide carbonyl carbon
1095             // COO
1096             // Carboxylic acid or ester carbonyl carbon
1097             // COON
1098             // Carbamate carbonyl carbon
1099             // COOO
1100             // Carbonic acid or ester carbonyl function
1101             // C=OS
1102             // Thioester carbonyl carbon, double bonded to O
1103             // C=P
1104             // Carbon doubly bonded to P
1105             // C=S
1106             // Thioester carbon, double bonded to S
1107             // C=SN
1108             // Thioamide carbon, double bonded to S
1109             // CSO2
1110             // Carbon in >C=SO2
1111             // CS=O
1112             // Sulfinyl carbon in >C=S=O
1113             // CSS
1114             // Thiocarboxylic acid or ester carbon
1115             atomType = 3;
1116             break;
1117           }
1118           // otherwise it must be generic sp2 carbon
1119           // C=C
1120           // Vinylic carbon
1121           // CSP2
1122           // Generic sp2 carbon
1123           atomType = 2;
1124           break;
1125         }
1126         // 2 neighbors
1127         if (atom->getTotalDegree() == 2) {
1128           // CSP
1129           // Acetylenic carbon
1130           // =C=
1131           // Allenic carbon
1132           atomType = 4;
1133           break;
1134         }
1135         // 1 neighbor
1136         if (atom->getTotalDegree() == 1) {
1137           // C%-
1138           // Isonitrile carbon
1139           atomType = 60;
1140           break;
1141         }
1142         break;
1143 
1144       // Nitrogen
1145       case 7:
1146         // if the neighbor is phosphorus or sulfur
1147         // count the number of terminal oxygens bonded
1148         // to that phosphorus or sulfur atom
1149         boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1150         for (; nbrIdx != endNbrs; ++nbrIdx) {
1151           const Atom *nbrAtom = mol[*nbrIdx];
1152           // count how many terminal oxygen atoms
1153           // are bonded to ipso
1154           if ((nbrAtom->getAtomicNum() == 8) &&
1155               (nbrAtom->getTotalDegree() == 1)) {
1156             ++nTermObondedToN;
1157           }
1158           if (((atom->getExplicitValence() + atom->getNumImplicitHs()) >= 3) &&
1159               ((nbrAtom->getAtomicNum() == 15) ||
1160                (nbrAtom->getAtomicNum() == 16))) {
1161             unsigned int nObondedToSP = 0;
1162             boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1163             for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
1164               const Atom *nbr2Atom = mol[*nbr2Idx];
1165               if ((nbr2Atom->getAtomicNum() == 8) &&
1166                   (nbr2Atom->getTotalDegree() == 1)) {
1167                 ++nObondedToSP;
1168               }
1169             }
1170             // if there are two or more oxygens, ipso is a sulfonamide nitrogen
1171             if (!isNSO2orNSO3orNCN) {
1172               isNSO2orNSO3orNCN = (nObondedToSP >= 2);
1173             }
1174           }
1175         }
1176         // 4 neighbors
1177         if (atom->getTotalDegree() == 4) {
1178           if (isAtomNOxide(atom)) {
1179             // N3OX
1180             // sp3-hybridized N-oxide nitrogen
1181             atomType = 68;
1182             break;
1183           }
1184           // NR+
1185           // Quaternary nitrogen
1186           atomType = 34;
1187           break;
1188         }
1189         // 3 neighbors
1190         if (atom->getTotalDegree() == 3) {
1191           // total bond order >= 4
1192           if ((atom->getExplicitValence() + atom->getNumImplicitHs()) >= 4) {
1193             bool doubleBondedCN = false;
1194             boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1195             for (; nbrIdx != endNbrs; ++nbrIdx) {
1196               const Atom *nbrAtom = mol[*nbrIdx];
1197               // find if there is a double-bonded nitrogen,
1198               // or a carbon which is not bonded to other
1199               // nitrogen atoms with 3 neighbors
1200               if ((mol.getBondBetweenAtoms(nbrAtom->getIdx(), atom->getIdx()))
1201                       ->getBondType() == Bond::DOUBLE) {
1202                 doubleBondedCN = ((nbrAtom->getAtomicNum() == 7) ||
1203                                   (nbrAtom->getAtomicNum() == 6));
1204                 if (nbrAtom->getAtomicNum() == 6) {
1205                   boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1206                   for (; doubleBondedCN && (nbr2Idx != end2Nbrs); ++nbr2Idx) {
1207                     const Atom *nbr2Atom = mol[*nbr2Idx];
1208                     if (nbr2Atom->getIdx() == atom->getIdx()) {
1209                       continue;
1210                     }
1211                     doubleBondedCN = (!((nbr2Atom->getAtomicNum() == 7) &&
1212                                         (nbr2Atom->getTotalDegree() == 3)));
1213                   }
1214                 }
1215               }
1216             }
1217             // if there is a single terminal oxygen
1218             if (nTermObondedToN == 1) {
1219               // N2OX
1220               // sp2-hybridized N-oxide nitrogen
1221               atomType = 67;
1222               break;
1223             }
1224             // if there are two or more terminal oxygens
1225             if (nTermObondedToN >= 2) {
1226               // NO2
1227               // Nitrogen in nitro group
1228               // NO3
1229               // Nitrogen in nitrate group
1230               atomType = 45;
1231               break;
1232             }
1233             // if the carbon bonded to ipso is bonded to 1 nitrogen
1234             // with 3 neighbors, that nitrogen is ipso (>N+=C)
1235             // alternatively, if there is no carbon but ipso is
1236             // double bonded to nitrogen, we have >N+=N
1237             if (doubleBondedCN) {
1238               // N+=C
1239               // Iminium nitrogen
1240               // N+=N
1241               // Positively charged nitrogen doubly bonded to N
1242               atomType = 54;
1243               break;
1244             }
1245           }
1246           // total bond order >= 3
1247           if ((atom->getExplicitValence() + atom->getNumImplicitHs()) >= 3) {
1248             bool isNCOorNCS = false;
1249             bool isNCNplus = false;
1250             bool isNGDplus = false;
1251             bool isNNNorNNC = false;
1252             bool isNbrC = false;
1253             bool isNbrBenzeneC = false;
1254             unsigned int elementDoubleBondedToC = 0;
1255             unsigned int elementTripleBondedToC = 0;
1256             unsigned int nN2bondedToC = 0;
1257             unsigned int nN3bondedToC = 0;
1258             unsigned int nObondedToC = 0;
1259             unsigned int nSbondedToC = 0;
1260             // loop over neighbors
1261             boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1262             for (; nbrIdx != endNbrs; ++nbrIdx) {
1263               const Atom *nbrAtom = mol[*nbrIdx];
1264               // if the neighbor is carbon
1265               if (nbrAtom->getAtomicNum() == 6) {
1266                 isNbrC = true;
1267                 // check if we have a benzene carbon close to ipso
1268                 if (nbrAtom->getIsAromatic() &&
1269                     ringInfo->isAtomInRingOfSize(nbrAtom->getIdx(), 6)) {
1270                   isNbrBenzeneC = true;
1271                 }
1272                 nN2bondedToC = 0;
1273                 nN3bondedToC = 0;
1274                 nObondedToC = 0;
1275                 nSbondedToC = 0;
1276                 unsigned int nFormalCharge = 0;
1277                 unsigned int nInAromatic6Ring = 0;
1278                 // loop over carbon neighbors
1279                 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1280                 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
1281                   const Atom *nbr2Atom = mol[*nbr2Idx];
1282                   const Bond *bond = mol.getBondBetweenAtoms(
1283                       nbrAtom->getIdx(), nbr2Atom->getIdx());
1284                   // check if we have oxygen or sulfur double-bonded to this
1285                   // carbon
1286                   if ((bond->getBondType() == Bond::DOUBLE) &&
1287                       ((nbr2Atom->getAtomicNum() == 8) ||
1288                        (nbr2Atom->getAtomicNum() == 16))) {
1289                     isNCOorNCS = true;
1290                   }
1291                   // check if there is an atom double-bonded to this carbon,
1292                   // and if so find which element; if it is carbon or
1293                   // nitrogen (provided that the latter does not belong to
1294                   // multiple rings), also an aromatic bond is acceptable
1295                   if ((bond->getBondType() == Bond::DOUBLE) ||
1296                       ((bond->getBondType() == Bond::AROMATIC) &&
1297                        ((nbr2Atom->getAtomicNum() == 6) ||
1298                         ((nbr2Atom->getAtomicNum() == 7) &&
1299                          (queryIsAtomInNRings(nbr2Atom) == 1))))) {
1300                     elementDoubleBondedToC = nbr2Atom->getAtomicNum();
1301                   }
1302                   // check there is an atom triple-bonded to this carbon,
1303                   // and if so find which element
1304                   if (bond->getBondType() == Bond::TRIPLE) {
1305                     elementTripleBondedToC = nbr2Atom->getAtomicNum();
1306                   }
1307                   // if this carbon is bonded to a nitrogen with 3 neighbors
1308                   if ((nbr2Atom->getAtomicNum() == 7) &&
1309                       (nbr2Atom->getTotalDegree() == 3)) {
1310                     // count the number of +1 formal charges that we have
1311                     if (nbr2Atom->getFormalCharge() == 1) {
1312                       ++nFormalCharge;
1313                     }
1314                     if (rmSize.isAtomInAromaticRingOfSize(nbrAtom, 6)) {
1315                       ++nInAromatic6Ring;
1316                     }
1317                     // count how many oxygens are bonded to this nitrogen
1318                     // with 3 neighbors
1319                     unsigned int nObondedToN3 = 0;
1320                     boost::tie(nbr3Idx, end3Nbrs) =
1321                         mol.getAtomNeighbors(nbr2Atom);
1322                     for (; nbr3Idx != end3Nbrs; ++nbr3Idx) {
1323                       const Atom *nbr3Atom = mol[*nbr3Idx];
1324                       if (nbr3Atom->getAtomicNum() == 8) {
1325                         ++nObondedToN3;
1326                       }
1327                     }
1328                     // if there are less than 2 oxygens, this is neither
1329                     // a nitro group nor a nitrate, so increment the counter
1330                     // of nitrogens with 3 neighbors bonded to this carbon
1331                     // (C-N<)
1332                     if (nObondedToN3 < 2) {
1333                       ++nN3bondedToC;
1334                     }
1335                   }
1336                   // if this carbon is bonded to a nitrogen with 2 neighbors
1337                   // via a double or aromatic bond, increment the counter
1338                   // of nitrogens with 2 neighbors bonded to this carbon
1339                   // via a double or aromatic bond (C=N-)
1340                   if ((nbr2Atom->getAtomicNum() == 7) &&
1341                       (nbr2Atom->getTotalDegree() == 2) &&
1342                       ((bond->getBondType() == Bond::DOUBLE) ||
1343                        (bond->getBondType() == Bond::AROMATIC))) {
1344                     ++nN2bondedToC;
1345                   }
1346                   // if this carbon is bonded to an aromatic atom
1347                   if (nbr2Atom->getIsAromatic()) {
1348                     // if it is oxygen, increment the counter of
1349                     // aromatic oxygen atoms bonded to this carbon
1350                     if (nbr2Atom->getAtomicNum() == 8) {
1351                       ++nObondedToC;
1352                     }
1353                     // if it is sulfur, increment the counter of
1354                     // aromatic sulfur atoms bonded to this carbon
1355                     if (nbr2Atom->getAtomicNum() == 16) {
1356                       ++nSbondedToC;
1357                     }
1358                   }
1359                 }
1360                 // if nitrogen is bonded to this carbon via a double or aromatic
1361                 // bond
1362                 if (elementDoubleBondedToC == 7) {
1363                   // if 2 nitrogens with 3 neighbors and no nitrogens with 2
1364                   // neighbors
1365                   // are bonded to this carbon, and we have a formal charge,
1366                   // but not a 6-membered aromatic ring, and the carbon atom
1367                   // is not sp3, then this is an amidinium nitrogen (>N-C=N+<)
1368                   if ((nN3bondedToC == 2) && (!nN2bondedToC) && nFormalCharge &&
1369                       (!nInAromatic6Ring) && (nbrAtom->getTotalDegree() < 4)) {
1370                     isNCNplus = true;
1371                   }
1372                   // if 3 nitrogens with 3 neighbors are bonded
1373                   // to this carbon, then this is a guanidinium nitrogen
1374                   // ((>N-)2-C=N+<)
1375                   if (nN3bondedToC == 3) {
1376                     isNGDplus = true;
1377                   }
1378                 }
1379               }
1380               // if the neighbor is nitrogen
1381               if (nbrAtom->getAtomicNum() == 7) {
1382                 unsigned int nNbondedToN = 0;
1383                 unsigned int nObondedToN = 0;
1384                 unsigned int nSbondedToN = 0;
1385                 // loop over nitrogen neighbors
1386                 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1387                 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
1388                   const Atom *nbr2Atom = mol[*nbr2Idx];
1389                   const Bond *bond = mol.getBondBetweenAtoms(
1390                       nbrAtom->getIdx(), nbr2Atom->getIdx());
1391                   // if the bond to nitrogen is double
1392                   if (bond->getBondType() == Bond::DOUBLE) {
1393                     // if the neighbor is carbon (N=N-C)
1394                     if (nbr2Atom->getAtomicNum() == 6) {
1395                       // loop over carbon neighbors
1396                       boost::tie(nbr3Idx, end3Nbrs) =
1397                           mol.getAtomNeighbors(nbr2Atom);
1398                       for (; nbr3Idx != end3Nbrs; ++nbr3Idx) {
1399                         const Atom *nbr3Atom = mol[*nbr3Idx];
1400                         // if the nitrogen neighbor to ipso is met, move on
1401                         if (nbr3Atom->getIdx() == nbrAtom->getIdx()) {
1402                           continue;
1403                         }
1404                         // count how many nitrogen, oxygen, sulfur atoms
1405                         // are bonded to this carbon
1406                         switch (nbr3Atom->getAtomicNum()) {
1407                           case 7:
1408                             ++nNbondedToN;
1409                             break;
1410                           case 8:
1411                             ++nObondedToN;
1412                             break;
1413                           case 16:
1414                             ++nSbondedToN;
1415                             break;
1416                         }
1417                       }
1418                       // if there are no more nitrogens, no oxygen, no sulfur
1419                       // bonded
1420                       // to carbon, and the latter is not a benzene carbon
1421                       // then it is N=N-C
1422                       if ((!nObondedToN) && (!nSbondedToN) && (!nNbondedToN) &&
1423                           (!isNbrBenzeneC)) {
1424                         isNNNorNNC = true;
1425                       }
1426                     }
1427                     // if the neighbor is nitrogen (N=N-N) and ipso is not
1428                     // bonded
1429                     // to benzene carbon then it is N=N-N
1430                     if ((nbr2Atom->getAtomicNum() == 7) && (!isNbrBenzeneC)) {
1431                       isNNNorNNC = true;
1432                     }
1433                   }
1434                 }
1435               }
1436             }
1437             // if ipso nitrogen is bonded to carbon
1438             if (isNbrC) {
1439               // if neighbor carbon is triple-bonded to N, then ipso is N-C%N
1440               if (elementTripleBondedToC == 7) {
1441                 isNSO2orNSO3orNCN = true;
1442               }
1443               // if neighbor carbon is amidinium
1444               if (isNCNplus) {
1445                 // NCN+
1446                 // Either nitrogen in N+=C-N
1447                 atomType = 55;
1448                 break;
1449               }
1450               // if neighbor carbon is guanidinium
1451               if (isNGDplus) {
1452                 // NGD+
1453                 // Guanidinium nitrogen
1454                 atomType = 56;
1455                 break;
1456               }
1457               // if neighbor carbon is not bonded to oxygen or sulfur
1458               // and is not cyano, there two possibilities:
1459               // 1) ipso nitrogen is bonded to benzene carbon while no oxygen
1460               //    or sulfur are bonded to the latter: ipso is aniline nitrogen
1461               // 2) ipso nitrogen is bonded to a carbon which is double-bonded
1462               // to
1463               //    carbon, nitrogen or phosphorus, or triple-bonded to carbon
1464               if (((!isNCOorNCS) && (!isNSO2orNSO3orNCN)) &&
1465                   (((!nObondedToC) && (!nSbondedToC) && isNbrBenzeneC) ||
1466                    ((elementDoubleBondedToC == 6) ||
1467                     (elementDoubleBondedToC == 7) ||
1468                     (elementDoubleBondedToC == 15) ||
1469                     (elementTripleBondedToC == 6)))) {
1470                 // NC=C
1471                 // Enamine or aniline nitrogen, deloc. lp
1472                 // NC=N
1473                 // Nitrogen in N-C=N with deloc. lp
1474                 // NC=P
1475                 // Nitrogen in N-C=P with deloc. lp
1476                 // NC%C
1477                 // Nitrogen attached to C-C triple bond
1478                 atomType = 40;
1479                 break;
1480               }
1481             }
1482             // if ipso is not sulfonamide while it is either amide/thioamide
1483             // or >N-N=N-/>N-N=C<
1484             if ((!isNSO2orNSO3orNCN) && (isNCOorNCS || isNNNorNNC)) {
1485               // NC=O
1486               // Amide nitrogen
1487               // NC=S
1488               // Thioamide nitrogen
1489               // NN=C
1490               // Nitrogen in N-N=C moiety with deloc. lp
1491               // NN=N
1492               // Nitrogen in N-N=N moiety with deloc. lp
1493               atomType = 10;
1494               break;
1495             }
1496           }
1497         }
1498         // 2 neighbors
1499         if (atom->getTotalDegree() == 2) {
1500           // total bond order = 4
1501           if ((atom->getExplicitValence() + atom->getNumImplicitHs()) == 4) {
1502             // loop over neighbors
1503             bool isIsonitrile = false;
1504             boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1505             for (; (!isIsonitrile) && (nbrIdx != endNbrs); ++nbrIdx) {
1506               const Atom *nbrAtom = mol[*nbrIdx];
1507               // if neighbor is triple-bonded
1508               isIsonitrile =
1509                   ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
1510                        ->getBondType() == Bond::TRIPLE);
1511             }
1512             if (isIsonitrile) {
1513               // NR%
1514               // Isonitrile nitrogen
1515               atomType = 61;
1516               break;
1517             }
1518             // =N=
1519             // Central nitrogen in C=N=N or N=N=N
1520             atomType = 53;
1521             break;
1522           }
1523           // total bond order = 3
1524           if ((atom->getExplicitValence() + atom->getNumImplicitHs()) == 3) {
1525             // loop over neighbors
1526             bool isNitroso = false;
1527             bool isImineOrAzo = false;
1528             boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1529             for (; nbrIdx != endNbrs; ++nbrIdx) {
1530               const Atom *nbrAtom = mol[*nbrIdx];
1531               // if the neighbor is double bonded (-N=)
1532               if ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
1533                       ->getBondType() == Bond::DOUBLE) {
1534                 // if it is terminal oxygen (-N=O)
1535                 isNitroso =
1536                     ((nbrAtom->getAtomicNum() == 8) && (nTermObondedToN == 1));
1537                 // if it is carbon or nitrogen (-N=N-, -N=C<),
1538                 // ipso is imine or azo
1539                 isImineOrAzo = ((nbrAtom->getAtomicNum() == 6) ||
1540                                 (nbrAtom->getAtomicNum() == 7));
1541               }
1542             }
1543             if (isNitroso && (!isImineOrAzo)) {
1544               // N=O
1545               // Nitrogen in nitroso group
1546               atomType = 46;
1547               break;
1548             }
1549             if (isImineOrAzo) {
1550               // N=C
1551               // Imine nitrogen
1552               // N=N
1553               // Azo-group nitrogen
1554               atomType = 9;
1555               break;
1556             }
1557           }
1558           // total bond order >= 2
1559           if ((atom->getExplicitValence() + atom->getNumImplicitHs()) >= 2) {
1560             // loop over neighbors
1561             bool isNSO = false;
1562             boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1563             for (; (!isNSO) && (nbrIdx != endNbrs); ++nbrIdx) {
1564               const Atom *nbrAtom = mol[*nbrIdx];
1565               // if the neighbor is sulfur bonded to a single terminal oxygen
1566               if (nbrAtom->getAtomicNum() == 16) {
1567                 // loop over neighbors and count how many
1568                 // terminal oxygens are bonded to sulfur
1569                 unsigned int nTermObondedToS = 0;
1570                 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1571                 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
1572                   const Atom *nbr2Atom = mol[*nbr2Idx];
1573                   if ((nbr2Atom->getAtomicNum() == 8) &&
1574                       (nbr2Atom->getTotalDegree() == 1)) {
1575                     ++nTermObondedToS;
1576                   }
1577                 }
1578                 isNSO = (nTermObondedToS == 1);
1579               }
1580             }
1581             if (isNSO) {
1582               // NSO
1583               // Divalent nitrogen replacing monovalent O in SO2 group
1584               atomType = 48;
1585               break;
1586             }
1587             if (!isNSO2orNSO3orNCN) {
1588               // If it is not sulfonamide deprotonated nitrogen,
1589               // it is anionic nitrogen (>N::-)
1590               // NM
1591               // Anionic divalent nitrogen
1592               atomType = 62;
1593               break;
1594             }
1595           }
1596         }
1597         // if it is sulfonamide (3 neighbors) or cyano (2 neighbors)
1598         if (isNSO2orNSO3orNCN) {
1599           // NSO2
1600           // Sulfonamide nitrogen
1601           // NSO3
1602           // Sulfonamide nitrogen
1603           // NC%N
1604           // Nitrogen attached to cyano group
1605           atomType = 43;
1606           break;
1607         }
1608         // 1 neighbor
1609         if (atom->getTotalDegree() == 1) {
1610           bool isNSP = false;
1611           bool isNAZT = false;
1612           boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1613           for (; (!isNSP) && (!isNAZT) && (nbrIdx != endNbrs); ++nbrIdx) {
1614             const Atom *nbrAtom = mol[*nbrIdx];
1615             // if ipso is triple-bonded to its only neighbor
1616             isNSP =
1617                 ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
1618                      ->getBondType() == Bond::TRIPLE);
1619             // ipso is bonded to a nitrogen atom with 2 neighbors
1620             if ((nbrAtom->getAtomicNum() == 7) &&
1621                 (nbrAtom->getTotalDegree() == 2)) {
1622               // loop over nitrogen neighbors
1623               boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1624               for (; (!isNAZT) && (nbr2Idx != end2Nbrs); ++nbr2Idx) {
1625                 const Atom *nbr2Atom = mol[*nbr2Idx];
1626                 // if another nitrogen with 2 neighbors, or a carbon
1627                 // with 3 neighbors is found, ipso is NAZT
1628                 isNAZT = (((nbr2Atom->getAtomicNum() == 7) &&
1629                            (nbr2Atom->getTotalDegree() == 2)) ||
1630                           ((nbr2Atom->getAtomicNum() == 6) &&
1631                            (nbr2Atom->getTotalDegree() == 3)));
1632               }
1633             }
1634           }
1635           if (isNSP) {
1636             // NSP
1637             // Triply bonded nitrogen
1638             atomType = 42;
1639             break;
1640           }
1641           if (isNAZT) {
1642             // NAZT
1643             // Terminal nitrogen in azido or diazo group
1644             atomType = 47;
1645             break;
1646           }
1647         }
1648         // if nothing else was found
1649         // NR
1650         // Amine nitrogen
1651         atomType = 8;
1652         break;
1653 
1654       // Oxygen
1655       case 8:
1656         // 3 neighbors
1657         if (atom->getTotalDegree() == 3) {
1658           // O+
1659           // Oxonium oxygen
1660           atomType = 49;
1661           break;
1662         }
1663         // 2 neighbors
1664         if (atom->getTotalDegree() == 2) {
1665           if ((atom->getExplicitValence() + atom->getNumImplicitHs()) == 3) {
1666             // O=+
1667             // Oxenium oxygen
1668             atomType = 51;
1669             break;
1670           }
1671           // count how many hydrogens are bound to ipso
1672           unsigned int nHbondedToO = 0;
1673           boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1674           for (; nbrIdx != endNbrs; ++nbrIdx) {
1675             const Atom *nbrAtom = mol[*nbrIdx];
1676             if (nbrAtom->getAtomicNum() == 1) {
1677               ++nHbondedToO;
1678             }
1679           }
1680           if ((nHbondedToO + atom->getNumImplicitHs()) == 2) {
1681             // OH2
1682             // Oxygen in water
1683             atomType = 70;
1684             break;
1685           }
1686           // otherwise, ipso must be one of the following
1687           // OC=O
1688           // Carboxylic acid or ester oxygen
1689           // OC=C
1690           // Enolic or phenolic oxygen
1691           // OC=N
1692           // Oxygen in -O-C=N- moiety
1693           // OC=S
1694           // Divalent oxygen in thioacid or ester
1695           // ONO2
1696           // Divalent nitrate "ether" oxygen
1697           // ON=O
1698           // Divalent nitrate "ether" oxygen
1699           // OSO3
1700           // Divalent oxygen in sulfate group
1701           // OSO2
1702           // Divalent oxygen in sulfite group
1703           // OSO
1704           // One of two divalent oxygens attached to sulfur
1705           // OS=O
1706           // Divalent oxygen in R(RO)S=O
1707           // -OS
1708           // Other divalent oxygen attached to sulfur
1709           // OPO3
1710           // Divalent oxygen in phosphate group
1711           // OPO2
1712           // Divalent oxygen in phosphite group
1713           // OPO
1714           // Divalent oxygen, one of two oxygens attached to P
1715           // -OP
1716           // Other divalent oxygen attached to phosphorus
1717           atomType = 6;
1718           break;
1719         }
1720         // 1 neighbor
1721         if (atom->getDegree() <= 1) {
1722           unsigned int nNbondedToCorNorS = 0;
1723           unsigned int nObondedToCorNorS = 0;
1724           unsigned int nSbondedToCorNorS = 0;
1725           bool isOxideOBondedToH =
1726               atom->getNumExplicitHs() + atom->getNumImplicitHs() > 0;
1727           bool isCarboxylateO = false;
1728           bool isCarbonylO = false;
1729           bool isOxideOBondedToC = false;
1730           bool isNitrosoO = false;
1731           bool isOxideOBondedToN = false;
1732           bool isNOxideO = false;
1733           bool isNitroO = false;
1734           bool isThioSulfinateO = false;
1735           bool isSulfateO = false;
1736           bool isSulfoxideO = false;
1737           bool isPhosphateOrPerchlorateO = false;
1738           // loop over neighbors
1739           boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1740           for (; (nbrIdx != endNbrs) && (!isOxideOBondedToC) &&
1741                  (!isOxideOBondedToN) && (!isOxideOBondedToH) &&
1742                  (!isCarboxylateO) && (!isNitroO) && (!isNOxideO) &&
1743                  (!isThioSulfinateO) && (!isSulfateO) &&
1744                  (!isPhosphateOrPerchlorateO) && (!isCarbonylO) &&
1745                  (!isNitrosoO) && (!isSulfoxideO);
1746                ++nbrIdx) {
1747             const Atom *nbrAtom = mol[*nbrIdx];
1748             const Bond *bond =
1749                 mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx());
1750             // if the neighbor is carbon, nitrogen or sulfur
1751             if ((nbrAtom->getAtomicNum() == 6) ||
1752                 (nbrAtom->getAtomicNum() == 7) ||
1753                 (nbrAtom->getAtomicNum() == 16)) {
1754               // count how many terminal oxygen/sulfur atoms
1755               // or secondary nitrogens
1756               // are bonded to the carbon or nitrogen neighbor of ipso
1757               boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1758               for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
1759                 const Atom *nbr2Atom = mol[*nbr2Idx];
1760                 if ((nbr2Atom->getAtomicNum() == 7) &&
1761                     (nbr2Atom->getTotalDegree() == 2)) {
1762                   ++nNbondedToCorNorS;
1763                 }
1764                 if ((nbr2Atom->getAtomicNum() == 8) &&
1765                     (nbr2Atom->getTotalDegree() == 1)) {
1766                   ++nObondedToCorNorS;
1767                 }
1768                 if ((nbr2Atom->getAtomicNum() == 16) &&
1769                     (nbr2Atom->getTotalDegree() == 1)) {
1770                   ++nSbondedToCorNorS;
1771                 }
1772               }
1773             }
1774             // if ipso neighbor is hydrogen
1775             isOxideOBondedToH = (nbrAtom->getAtomicNum() == 1);
1776 
1777             // if ipso neighbor is carbon
1778             if (nbrAtom->getAtomicNum() == 6) {
1779               // if carbon neighbor is bonded to 2 oxygens,
1780               // ipso is carboxylate oxygen
1781               isCarboxylateO = (nObondedToCorNorS == 2);
1782               // if ipso oxygen is bonded to carbon
1783               // via a double bond, ipso is carbonyl oxygen
1784               isCarbonylO = (bond->getBondType() == Bond::DOUBLE);
1785               // if ipso oxygen is bonded to carbon via a
1786               // single bond, and there are no other bonded oxygens,
1787               // ipso is oxide oxygen
1788               isOxideOBondedToC = ((bond->getBondType() == Bond::SINGLE) &&
1789                                    (nObondedToCorNorS == 1));
1790             }
1791 
1792             // if ipso neighbor is nitrogen
1793             if (nbrAtom->getAtomicNum() == 7) {
1794               // if ipso oxygen is bonded to nitrogen
1795               // via a double bond, ipso is nitroso oxygen
1796               isNitrosoO = (bond->getBondType() == Bond::DOUBLE);
1797               // if ipso oxygen is bonded to nitrogen via a single bond
1798               // and there are no other bonded oxygens
1799               if ((bond->getBondType() == Bond::SINGLE) &&
1800                   (nObondedToCorNorS == 1)) {
1801                 // if nitrogen has 2 neighbors or, if the neighbors are 3,
1802                 // the total bond order on nitrogen is 3, ipso is oxide oxygen
1803                 isOxideOBondedToN = ((nbrAtom->getTotalDegree() == 2) ||
1804                                      ((nbrAtom->getExplicitValence() +
1805                                        nbrAtom->getNumImplicitHs()) == 3));
1806                 // if the total bond order on nitrogen is 4, ipso is N-oxide
1807                 // oxygen
1808                 isNOxideO = ((nbrAtom->getExplicitValence() +
1809                               nbrAtom->getNumImplicitHs()) == 4);
1810               }
1811               // if ipso oxygen is bonded to nitrogen which is bonded
1812               // to multiple oxygens, ipso is nitro/nitrate oxygen
1813               isNitroO = (nObondedToCorNorS >= 2);
1814             }
1815 
1816             // if ipso neighbor is sulfur
1817             if (nbrAtom->getAtomicNum() == 16) {
1818               // if ipso oxygen is bonded to sulfur and
1819               // the latter is bonded to another sulfur,
1820               // ipso is thiosulfinate oxygen
1821               isThioSulfinateO = (nSbondedToCorNorS == 1);
1822               // if ipso oxygen is bonded to sulfur via a single
1823               // bond or, if the bond is double, there are multiple
1824               // oxygen/nitrogen atoms bonded to that sulfur,
1825               // ipso is sulfate oxygen
1826               isSulfateO = ((bond->getBondType() == Bond::SINGLE) ||
1827                             ((bond->getBondType() == Bond::DOUBLE) &&
1828                              ((nObondedToCorNorS + nNbondedToCorNorS) > 1)));
1829               // if ipso oxygen is bonded to sulfur via a double
1830               // bond and the sum of oxygen/nitrogen atoms bonded
1831               // to that sulfur is 1, ipso is sulfoxide oxygen
1832               isSulfoxideO = ((bond->getBondType() == Bond::DOUBLE) &&
1833                               ((nObondedToCorNorS + nNbondedToCorNorS) == 1));
1834             }
1835 
1836             // if ipso neighbor is phosphorus or chlorine
1837             isPhosphateOrPerchlorateO = ((nbrAtom->getAtomicNum() == 15) ||
1838                                          (nbrAtom->getAtomicNum() == 17));
1839           }
1840           if (isOxideOBondedToC || isOxideOBondedToN || isOxideOBondedToH) {
1841             // OM
1842             // Oxide oxygen on sp3 carbon
1843             // OM2
1844             // Oxide oxygen on sp2 carbon
1845             // OM
1846             // Oxide oxygen on sp3 nitrogen (not in original MMFF.I Table III)
1847             // OM2
1848             // Oxide oxygen on sp2 nitrogen (not in original MMFF.I Table III)
1849             atomType = 35;
1850             break;
1851           }
1852           if (isCarboxylateO || isNitroO || isNOxideO || isThioSulfinateO ||
1853               isSulfateO || isPhosphateOrPerchlorateO) {
1854             // O2CM
1855             // Oxygen in carboxylate group
1856             // ONX
1857             // Oxygen in N-oxides
1858             // O2N
1859             // Oxygen in nitro group
1860             // O2NO
1861             // Nitro-group oxygen in nitrate
1862             // O3N
1863             // Nitrate anion oxygen
1864             // OSMS
1865             // Terminal oxygen in thiosulfinate anion
1866             // O-S
1867             // Single terminal O on tetracoordinate sulfur
1868             // O2S
1869             // One of 2 terminal O's on sulfur
1870             // O3S
1871             // One of 3 terminal O's on sulfur
1872             // O4S
1873             // Terminal O in sulfate anion
1874             // OP
1875             // Oxygen in phosphine oxide
1876             // O2P
1877             // One of 2 terminal O's on P
1878             // O3P
1879             // One of 3 terminal O's on P
1880             // O4P
1881             // One of 4 terminal O's on P
1882             // O4Cl
1883             // Oxygen in perchlorate anion
1884             atomType = 32;
1885             break;
1886           }
1887           if (isCarbonylO || isNitrosoO || isSulfoxideO) {
1888             // O=C
1889             // Generic carbonyl oxygen
1890             // O=CN
1891             // Carbonyl oxygen in amides
1892             // O=CR
1893             // Carbonyl oxygen in aldehydes and ketones
1894             // O=CO
1895             // Carbonyl oxygen in acids and esters
1896             // O=N
1897             // Nitroso oxygen
1898             // O=S
1899             // Doubly bonded sulfoxide oxygen
1900             atomType = 7;
1901             break;
1902           }
1903         }
1904         break;
1905 
1906       // Fluorine
1907       case 9:
1908         // 1 neighbor
1909         if (atom->getDegree() == 1) {
1910           // F
1911           // Fluorine
1912           atomType = 11;
1913           break;
1914         }
1915         if (atom->getDegree() == 0) {
1916           // F-
1917           // Fluoride anion
1918           atomType = 89;
1919           break;
1920         }
1921         break;
1922 
1923       // Sodium
1924       case 11:
1925         if (atom->getDegree() == 0) {
1926           // NA+
1927           // Sodium cation
1928           atomType = 93;
1929           break;
1930         }
1931         break;
1932 
1933       // Magnesium
1934       case 12:
1935         if (atom->getDegree() == 0) {
1936           // MG+2
1937           // Dipositive magnesium cation
1938           atomType = 99;
1939           break;
1940         }
1941         break;
1942 
1943       // Silicon
1944       case 14:
1945         // SI
1946         // Silicon
1947         atomType = 19;
1948         break;
1949 
1950       // Phosphorus
1951       case 15:
1952         if (atom->getTotalDegree() == 4) {
1953           // PO4
1954           // Phosphate group phosphorus
1955           // PO3
1956           // Phosphorus with 3 attached oxygens
1957           // PO2
1958           // Phosphorus with 2 attached oxygens
1959           // PO
1960           // Phosphine oxide phosphorus
1961           // PTET
1962           // General tetracoordinate phosphorus
1963           atomType = 25;
1964           break;
1965         }
1966         if (atom->getTotalDegree() == 3) {
1967           // P
1968           // Phosphorus in phosphines
1969           atomType = 26;
1970           break;
1971         }
1972         if (atom->getTotalDegree() == 2) {
1973           // -P=C
1974           // Phosphorus doubly bonded to C
1975           atomType = 75;
1976           break;
1977         }
1978         break;
1979 
1980       // Sulfur
1981       case 16:
1982         // 3  or 4 neighbors
1983         if ((atom->getTotalDegree() == 3) || (atom->getTotalDegree() == 4)) {
1984           unsigned int nOorNbondedToS = 0;
1985           unsigned int nSbondedToS = 0;
1986           bool isCDoubleBondedToS = false;
1987           // loop over neighbors
1988           boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1989           for (; nbrIdx != endNbrs; ++nbrIdx) {
1990             const Atom *nbrAtom = mol[*nbrIdx];
1991             // check if ipso sulfur is double-bonded to carbon
1992             if ((nbrAtom->getAtomicNum() == 6) &&
1993                 ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
1994                      ->getBondType() == Bond::DOUBLE)) {
1995               isCDoubleBondedToS = true;
1996             }
1997             // if the neighbor is terminal oxygen/sulfur
1998             // or secondary nitrogen, increment the respective counter
1999             if (((nbrAtom->getDegree() == 1) &&
2000                  (nbrAtom->getAtomicNum() == 8)) ||
2001                 ((nbrAtom->getTotalDegree() == 2) &&
2002                  (nbrAtom->getAtomicNum() == 7))) {
2003               ++nOorNbondedToS;
2004             }
2005             if ((nbrAtom->getDegree() == 1) &&
2006                 (nbrAtom->getAtomicNum() == 16)) {
2007               ++nSbondedToS;
2008             }
2009           }
2010           // if ipso sulfur has 3 neighbors and is bonded to
2011           // two atoms of oxygen/nitrogen and double-bonded
2012           // to carbon, or if it has 4 neighbors
2013           if (((atom->getTotalDegree() == 3) && (nOorNbondedToS == 2) &&
2014                (isCDoubleBondedToS)) ||
2015               (atom->getTotalDegree() == 4)) {
2016             // =SO2
2017             // Sulfone sulfur, doubly bonded to carbon
2018             atomType = 18;
2019             break;
2020           }
2021           // if ipso sulfur is bonded to both oxygen/nitrogen and sulfur
2022           if ((nOorNbondedToS && nSbondedToS) ||
2023               ((nOorNbondedToS == 2) && (!isCDoubleBondedToS))) {
2024             // SSOM
2025             // Tricoordinate sulfur in anionic thiosulfinate group
2026             atomType = 73;
2027             break;
2028           }
2029           // otherwise ipso sulfur is double bonded to oxygen or nitrogen
2030           // S=O
2031           // Sulfoxide sulfur
2032           // >S=N
2033           // Tricoordinate sulfur doubly bonded to N
2034           atomType = 17;
2035           break;
2036         }
2037         // 2 neighbors
2038         if (atom->getTotalDegree() == 2) {
2039           // loop over neighbors
2040           bool isODoubleBondedToS = false;
2041           boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
2042           for (; nbrIdx != endNbrs; ++nbrIdx) {
2043             const Atom *nbrAtom = mol[*nbrIdx];
2044             // check if ipso sulfur is double-bonded to oxygen
2045             if ((nbrAtom->getAtomicNum() == 8) &&
2046                 ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
2047                      ->getBondType() == Bond::DOUBLE)) {
2048               isODoubleBondedToS = true;
2049             }
2050           }
2051           // if ipso sulfur is double-bonded to oxygen
2052           if (isODoubleBondedToS) {
2053             // =S=O
2054             // Sulfinyl sulfur, e.g., in C=S=O
2055             atomType = 74;
2056             break;
2057           }
2058           // otherwise it is a thiol, sulfide or disulfide
2059           // S
2060           // Thiol, sulfide, or disulfide sulfur
2061           atomType = 15;
2062           break;
2063         }
2064         // 1 neighbor
2065         if (atom->getDegree() == 1) {
2066           unsigned int nTermSbondedToNbr = 0;
2067           bool isCDoubleBondedToS = false;
2068           // find the neighbor and count how many terminal sulfur
2069           // atoms are there, including ipso
2070           boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
2071           for (; nbrIdx != endNbrs; ++nbrIdx) {
2072             const Atom *nbrAtom = mol[*nbrIdx];
2073             boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
2074             for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
2075               const Atom *nbr2Atom = mol[*nbr2Idx];
2076               if ((nbr2Atom->getAtomicNum() == 16) &&
2077                   (nbr2Atom->getTotalDegree() == 1)) {
2078                 ++nTermSbondedToNbr;
2079               }
2080             }
2081             // check if ipso sulfur is double-bonded to carbon
2082             if ((nbrAtom->getAtomicNum() == 6) &&
2083                 ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
2084                      ->getBondType() == Bond::DOUBLE)) {
2085               isCDoubleBondedToS = true;
2086             }
2087           }
2088           // if ipso sulfur is double bonded to carbon and the latter
2089           // is not bonded to other terminal sulfur atoms, then it is
2090           // not a dithiocarboxylate, but a thioketone, etc.
2091           if (isCDoubleBondedToS && (nTermSbondedToNbr != 2)) {
2092             // S=C
2093             // Sulfur doubly bonded to carbon
2094             atomType = 16;
2095             break;
2096           }
2097           // otherwise ipso must be one of these
2098           // S-P
2099           // Terminal sulfur bonded to P
2100           // SM
2101           // Anionic terminal sulfur
2102           // SSMO
2103           // Terminal sulfur in thiosulfinate group
2104           atomType = 72;
2105           break;
2106         }
2107         break;
2108 
2109       // Chlorine
2110       case 17:
2111         // 4 neighbors
2112         if (atom->getTotalDegree() == 4) {
2113           // loop over neighbors and count the number
2114           // of bonded oxygens
2115           unsigned int nObondedToCl = 0;
2116           boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
2117           for (; nbrIdx != endNbrs; ++nbrIdx) {
2118             const Atom *nbrAtom = mol[*nbrIdx];
2119             if (nbrAtom->getAtomicNum() == 8) {
2120               ++nObondedToCl;
2121             }
2122           }
2123           // if there are 4 oxygens
2124           if (nObondedToCl == 4) {
2125             // CLO4
2126             // Perchlorate anione chlorine
2127             atomType = 77;
2128             break;
2129           }
2130         }
2131         // 1 neighbor
2132         if (atom->getTotalDegree() == 1) {
2133           // Cl
2134           // Chlorine
2135           atomType = 12;
2136           break;
2137         }
2138         // 0 neighbors
2139         if (atom->getDegree() == 0) {
2140           // Cl-
2141           // Chloride anion
2142           atomType = 90;
2143           break;
2144         }
2145         break;
2146 
2147       // Potassium
2148       case 19:
2149         if (atom->getDegree() == 0) {
2150           // K+
2151           // Potassium cation
2152           atomType = 94;
2153           break;
2154         }
2155         break;
2156 
2157       // Calcium
2158       case 20:
2159         if (atom->getDegree() == 0) {
2160           // CA+2
2161           // Dipositive calcium cation
2162           atomType = 96;
2163           break;
2164         }
2165         break;
2166 
2167       // Iron
2168       case 26:
2169         if (atom->getDegree() == 0) {
2170           if (atom->getFormalCharge() == 2) {
2171             // FE+2
2172             // Dipositive iron cation
2173             atomType = 87;
2174             break;
2175           }
2176           if (atom->getFormalCharge() == 3) {
2177             // FE+3
2178             // Tripositive iron cation
2179             atomType = 88;
2180             break;
2181           }
2182         }
2183         break;
2184 
2185       // Copper
2186       case 29:
2187         if (atom->getDegree() == 0) {
2188           if (atom->getFormalCharge() == 1) {
2189             // CU+1
2190             // Monopositive copper cation
2191             atomType = 97;
2192             break;
2193           }
2194           if (atom->getFormalCharge() == 2) {
2195             // CU+2
2196             // Dipositive copper cation
2197             atomType = 98;
2198             break;
2199           }
2200         }
2201         break;
2202 
2203       // Zinc
2204       case 30:
2205         if (atom->getDegree() == 0) {
2206           // ZN+2
2207           // Dipositive zinc cation
2208           atomType = 95;
2209           break;
2210         }
2211         break;
2212 
2213       // Bromine
2214       case 35:
2215         if (atom->getDegree() == 1) {
2216           // Br
2217           // Bromine
2218           atomType = 13;
2219           break;
2220         }
2221         if (atom->getDegree() == 0) {
2222           // BR-
2223           // Bromide anion
2224           atomType = 91;
2225           break;
2226         }
2227         break;
2228 
2229       // Iodine
2230       case 53:
2231         if (atom->getDegree() == 1) {
2232           // I
2233           // Iodine
2234           atomType = 14;
2235           break;
2236         }
2237         break;
2238     }
2239   }
2240   d_MMFFAtomPropertiesPtrVect[atom->getIdx()]->mmffAtomType = atomType;
2241   if (!atomType) {
2242     d_valid = false;
2243   }
2244 }
2245 
2246 // finds the MMFF atomType for a hydrogen atom
setMMFFHydrogenType(const Atom * atom)2247 void MMFFMolProperties::setMMFFHydrogenType(const Atom *atom) {
2248   unsigned int atomType = 0;
2249   bool isHOCCorHOCN = false;
2250   bool isHOCO = false;
2251   bool isHOP = false;
2252   bool isHOS = false;
2253   const ROMol &mol = atom->getOwningMol();
2254   ROMol::ADJ_ITER nbrIdx;
2255   ROMol::ADJ_ITER endNbrs;
2256   ROMol::ADJ_ITER nbr2Idx;
2257   ROMol::ADJ_ITER end2Nbrs;
2258   ROMol::ADJ_ITER nbr3Idx;
2259   ROMol::ADJ_ITER end3Nbrs;
2260 
2261   // loop over neighbors (actually there can be only one)
2262   boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
2263   for (; nbrIdx != endNbrs; ++nbrIdx) {
2264     const Atom *nbrAtom = mol[*nbrIdx];
2265     switch (nbrAtom->getAtomicNum()) {
2266       // carbon, silicon
2267       case 6:
2268       case 14:
2269         // HC
2270         // Hydrogen attached to carbon
2271         // HSI
2272         // Hydrogen attached to silicon
2273         atomType = 5;
2274         break;
2275 
2276       // nitrogen
2277       case 7:
2278         switch (this->getMMFFAtomType(nbrAtom->getIdx())) {
2279           case 8:
2280           // HNR
2281           // Generic hydrogen on sp3 nitrogen, e.g. in amines
2282           // H3N
2283           // Hydrogen in ammonia
2284           case 39:
2285           // HPYL
2286           // Hydrogen on nitrogen in pyrrole
2287           case 62:
2288           // HNR
2289           // Generic hydrogen on sp3 nitrogen, e.g. in amines
2290           case 67:
2291           case 68:
2292             // HNOX
2293             // Hydrogen on N in a N-oxide
2294             atomType = 23;
2295             break;
2296 
2297           case 34:
2298           // NR+
2299           // Quaternary nitrogen
2300           case 54:
2301           // N+=C
2302           // Iminium nitrogen
2303           // N+=N
2304           // Positively charged nitrogen doubly bonded to N
2305           case 55:
2306           // HNN+
2307           // Hydrogen on amidinium nitrogen
2308           case 56:
2309           // HGD+
2310           // Hydrogen on guanidinium nitrogen
2311           case 58:
2312           // NPD+
2313           // Aromatic nitrogen in pyridinium
2314           case 81:
2315             // HIM+
2316             // Hydrogen on imidazolium nitrogen
2317             atomType = 36;
2318             break;
2319 
2320           case 9:
2321             // HN=N
2322             // Hydrogen on azo nitrogen
2323             // HN=C
2324             // Hydrogen on imine nitrogen
2325             atomType = 27;
2326             break;
2327 
2328           default:
2329             // HNCC
2330             // Hydrogen on enamine nitrogen
2331             // HNCN
2332             // Hydrogen in H-N-C=N moiety
2333             // HNCO
2334             // Hydrogen on amide nitrogen
2335             // HNCS
2336             // Hydrogen on thioamide nitrogen
2337             // HNNC
2338             // Hydrogen in H-N-N=C moiety
2339             // HNNN
2340             // Hydrogen in H-N-N=N moiety
2341             // HNSO
2342             // Hydrogen on NSO, NSO2, or NSO3 nitrogen
2343             // HNC%
2344             // Hydrogen on N triply bonded to C
2345             // HSP2
2346             // Generic hydrogen on sp2 nitrogen
2347             atomType = 28;
2348             break;
2349         }
2350         break;
2351 
2352       // oxygen
2353       case 8:
2354         switch (this->getMMFFAtomType(nbrAtom->getIdx())) {
2355           case 49:
2356             // HO+
2357             // Hydrogen on oxonium oxygen
2358             atomType = 50;
2359             break;
2360 
2361           case 51:
2362             // HO=+
2363             // Hydrogen on oxenium oxygen
2364             atomType = 52;
2365             break;
2366 
2367           case 70:
2368             // HOH
2369             // Hydroxyl hydrogen in water
2370             atomType = 31;
2371             break;
2372 
2373           case 6:
2374             // for hydrogen bonded to atomType 6 oxygen we need to distinguish
2375             // among acidic hydrogens belonging to carboxylic/phospho acids,
2376             // enolic/phenolic/hydroxamic hydrogens and hydrogens whose oxygen
2377             // partner is bonded to sulfur. If none of these is found
2378             // it is either an alcohol or a generic hydroxyl hydrogen
2379             // loop over oxygen neighbors
2380             boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
2381             for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
2382               const Atom *nbr2Atom = mol[*nbr2Idx];
2383               // if the neighbor of oxygen is carbon, loop over the carbon
2384               // neighbors
2385               if (nbr2Atom->getAtomicNum() == 6) {
2386                 boost::tie(nbr3Idx, end3Nbrs) = mol.getAtomNeighbors(nbr2Atom);
2387                 for (; nbr3Idx != end3Nbrs; ++nbr3Idx) {
2388                   const Atom *nbr3Atom = mol[*nbr3Idx];
2389                   const Bond *bond = mol.getBondBetweenAtoms(
2390                       nbr2Atom->getIdx(), nbr3Atom->getIdx());
2391                   // if the starting oxygen is met, move on
2392                   if (nbr3Atom->getIdx() == nbrAtom->getIdx()) {
2393                     continue;
2394                   }
2395                   // if the carbon neighbor is another carbon or nitrogen
2396                   // bonded via a double or aromatic bond, ipso is HOCC/HOCN
2397                   if (((nbr3Atom->getAtomicNum() == 6) ||
2398                        (nbr3Atom->getAtomicNum() == 7)) &&
2399                       ((bond->getBondType() == Bond::DOUBLE) ||
2400                        (bond->getBondType() == Bond::AROMATIC))) {
2401                     isHOCCorHOCN = true;
2402                   }
2403                   // if the carbon neighbor is an oxygen bonded
2404                   // via a double bond, ipso is HOCO
2405                   if ((nbr3Atom->getAtomicNum() == 8) &&
2406                       (bond->getBondType() == Bond::DOUBLE)) {
2407                     isHOCO = true;
2408                   }
2409                 }
2410               }
2411               // if the neighbor of oxygen is phosphorus, ipso is HOCO
2412               if (nbr2Atom->getAtomicNum() == 15) {
2413                 isHOP = true;
2414               }
2415               // if the neighbor of oxygen is sulfur, ipso is HOS
2416               if (nbr2Atom->getAtomicNum() == 16) {
2417                 isHOS = true;
2418               }
2419             }
2420             if (isHOCO || isHOP) {
2421               // HOCO
2422               // Hydroxyl hydrogen in carboxylic acids
2423               atomType = 24;
2424               break;
2425             }
2426             if (isHOCCorHOCN) {
2427               // HOCC
2428               // Enolic or phenolic hydroxyl hydrogen
2429               // HOCN
2430               // Hydroxyl hydrogen in HO-C=N moiety
2431               atomType = 29;
2432               break;
2433             }
2434             if (isHOS) {
2435               // HOS
2436               // Hydrogen on oxygen attached to sulfur
2437               atomType = 33;
2438               break;
2439             }
2440             /* FALLTHRU */
2441           default:
2442             // HO
2443             // Generic hydroxyl hydrogen
2444             // HOR
2445             // Hydroxyl hydrogen in alcohols
2446             atomType = 21;
2447             break;
2448         }
2449         break;
2450 
2451       // phosphorus and sulfur
2452       case 15:
2453       case 16:
2454         // HP
2455         // Hydrogen attached to phosphorus
2456         // HS
2457         // Hydrogen attached to sulfur
2458         // HS=N
2459         // Hydrogen attached to >S= sulfur doubly bonded to N
2460         atomType = 71;
2461         break;
2462     }
2463   }
2464   d_MMFFAtomPropertiesPtrVect[atom->getIdx()]->mmffAtomType = atomType;
2465   if (!atomType) {
2466     d_valid = false;
2467   }
2468 }
2469 
2470 // sanitizes molecule according to MMFF requirements
2471 // returns MolOps::SANITIZE_NONE on success, the flag
2472 // which caused trouble in case of failure
sanitizeMMFFMol(RWMol & mol)2473 unsigned int sanitizeMMFFMol(RWMol &mol) {
2474   unsigned int error = 0;
2475 
2476   try {
2477     MolOps::sanitizeMol(
2478         mol, error,
2479         (unsigned int)(MolOps::SANITIZE_CLEANUP | MolOps::SANITIZE_PROPERTIES |
2480                        MolOps::SANITIZE_SYMMRINGS | MolOps::SANITIZE_KEKULIZE |
2481                        MolOps::SANITIZE_FINDRADICALS |
2482                        MolOps::SANITIZE_SETCONJUGATION |
2483                        MolOps::SANITIZE_SETHYBRIDIZATION |
2484                        MolOps::SANITIZE_CLEANUPCHIRALITY |
2485                        MolOps::SANITIZE_ADJUSTHS));
2486     if (!(mol.hasProp(common_properties::_MMFFSanitized))) {
2487       mol.setProp(common_properties::_MMFFSanitized, 1, true);
2488     }
2489   } catch (MolSanitizeException &) {
2490   }
2491 
2492   return error;
2493 }
2494 
2495 // constructs a MMFFMolProperties object for ROMol mol filled
2496 // with MMFF atom types, formal and partial charges
2497 // in case atom types are missing, d_valid is set to false,
2498 // charges are set to 0.0 and the force-field is unusable
MMFFMolProperties(ROMol & mol,const std::string & mmffVariant,std::uint8_t verbosity,std::ostream & oStream)2499 MMFFMolProperties::MMFFMolProperties(ROMol &mol, const std::string &mmffVariant,
2500                                      std::uint8_t verbosity,
2501                                      std::ostream &oStream)
2502     : d_valid(true),
2503       d_mmffs(mmffVariant == "MMFF94s" ? true : false),
2504       d_bondTerm(true),
2505       d_angleTerm(true),
2506       d_stretchBendTerm(true),
2507       d_oopTerm(true),
2508       d_torsionTerm(true),
2509       d_vdWTerm(true),
2510       d_eleTerm(true),
2511       d_dielConst(1.0),
2512       d_dielModel(CONSTANT),
2513       d_verbosity(verbosity),
2514       d_oStream(&oStream),
2515       d_MMFFAtomPropertiesPtrVect(mol.getNumAtoms()) {
2516   if (MolOps::needsHs(mol)) {
2517     BOOST_LOG(rdWarningLog)
2518         << "Molecule does not have explicit Hs. Consider calling AddHs()"
2519         << std::endl;
2520   }
2521   if (!mol.hasProp(common_properties::_MMFFSanitized)) {
2522     bool isAromaticSet = false;
2523     for (const auto atom : mol.atoms()) {
2524       if (atom->getIsAromatic()) {
2525         isAromaticSet = true;
2526         break;
2527       }
2528     }
2529     if (isAromaticSet) {
2530       MolOps::Kekulize((RWMol &)mol, true);
2531     }
2532     mol.setProp(common_properties::_MMFFSanitized, 1, true);
2533   }
2534   for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
2535     d_MMFFAtomPropertiesPtrVect[i] =
2536         MMFFAtomPropertiesPtr(new MMFFAtomProperties());
2537   }
2538   setMMFFAromaticity((RWMol &)mol);
2539   RingMembershipSize rmSize(mol);
2540   for (const auto atom : mol.atoms()) {
2541     if (atom->getAtomicNum() != 1) {
2542       this->setMMFFHeavyAtomType(rmSize, atom);
2543     }
2544   }
2545   for (const auto atom : mol.atoms()) {
2546     if (atom->getAtomicNum() == 1) {
2547       this->setMMFFHydrogenType(atom);
2548     }
2549   }
2550   if (this->isValid()) {
2551     this->computeMMFFCharges(mol);
2552   }
2553   if (verbosity == MMFF_VERBOSITY_HIGH) {
2554     oStream << "\n"
2555                "A T O M   T Y P E S   A N D   C H A R G E S\n\n"
2556                "          ATOM    FORMAL   PARTIAL\n"
2557                " ATOM     TYPE    CHARGE    CHARGE\n"
2558                "-----------------------------------"
2559             << std::endl;
2560     for (unsigned int idx = 0; idx < mol.getNumAtoms(); ++idx) {
2561       oStream << std::left << std::setw(2)
2562               << mol.getAtomWithIdx(idx)->getSymbol() << std::left << " #"
2563               << std::setw(5) << idx + 1 << std::right << std::setw(5)
2564               << (unsigned int)(this->getMMFFAtomType(idx)) << std::right
2565               << std::setw(10) << std::fixed << std::setprecision(3)
2566               << this->getMMFFFormalCharge(idx) << std::right << std::setw(10)
2567               << this->getMMFFPartialCharge(idx) << std::endl;
2568     }
2569     if (!(this->isValid())) {
2570       oStream << "\nMissing atom types - charges were not computed"
2571               << std::endl;
2572     }
2573   }
2574 }
2575 
2576 // returns the MMFF angle type of the angle formed
2577 // by atoms with indexes idx1, idx2, idx3
getMMFFAngleType(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3)2578 unsigned int MMFFMolProperties::getMMFFAngleType(const ROMol &mol,
2579                                                  const unsigned int idx1,
2580                                                  const unsigned int idx2,
2581                                                  const unsigned int idx3) {
2582   PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
2583 
2584   // ftp://ftp.wiley.com/public/journals/jcc/suppmat/17/553/MMFF-III_AppendixA.html
2585   //
2586   // AT[IJK]    Structural significance
2587   //--------------------------------------------------------------------------
2588   //  0		      The angle i-j-k is a "normal" bond angle
2589   //  1 		    Either bond i-j or bond j-k has a bond type of 1
2590   //  2		      Bonds i-j and j-k each have bond types of 1; the
2591   //  sum is 2. 3		      The angle occurs in a three-membered ring
2592   //  4		      The angle occurs in a four-membered ring
2593   //  5		      Is in a three-membered ring and the sum of the
2594   //  bond types is
2595   //  1
2596   //  6		      Is in a three-membered ring and the sum of the
2597   //  bond types is
2598   //  2
2599   //  7		      Is in a four-membered ring and the sum of the bond
2600   //  types is
2601   //  1
2602   //  8		      Is in a four-membered ring and the sum of the bond
2603   //  types is
2604   //  2
2605 
2606   unsigned int bondTypeSum =
2607       this->getMMFFBondType(mol.getBondBetweenAtoms(idx1, idx2)) +
2608       this->getMMFFBondType(mol.getBondBetweenAtoms(idx2, idx3));
2609   unsigned int angleType = bondTypeSum;
2610 
2611   unsigned int size = isAngleInRingOfSize3or4(mol, idx1, idx2, idx3);
2612   if (size) {
2613     angleType = size;
2614     if (bondTypeSum) {
2615       angleType += (bondTypeSum + size - 2);
2616     }
2617   }
2618 
2619   return angleType;
2620 }
2621 
2622 // returns the MMFF bond type of the bond
getMMFFBondType(const Bond * bond)2623 unsigned int MMFFMolProperties::getMMFFBondType(const Bond *bond) {
2624   PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
2625   PRECONDITION(bond, "invalid bond");
2626 
2627   const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
2628   const ForceFields::MMFF::MMFFProp *mmffPropAtom1 =
2629       (*mmffProp)(this->getMMFFAtomType(bond->getBeginAtomIdx()));
2630   const ForceFields::MMFF::MMFFProp *mmffPropAtom2 =
2631       (*mmffProp)(this->getMMFFAtomType(bond->getEndAtomIdx()));
2632 
2633   // return 1 if the bond is single and the properties for this
2634   // single bond match either those of sbmb or aromatic bonds
2635   // for this atom pair, 0 if they don't
2636   return (unsigned int)(((bond->getBondType() == Bond::SINGLE) &&
2637                          ((mmffPropAtom1->sbmb && mmffPropAtom2->sbmb) ||
2638                           (mmffPropAtom1->arom && mmffPropAtom2->arom)))
2639                             ? 1
2640                             : 0);
2641 }
2642 
2643 // given the angle type and the two bond types of the bond
2644 // which compose the angle, it returns the MMFF stretch-bend
2645 // type of the angle
getMMFFStretchBendType(const unsigned int angleType,const unsigned int bondType1,const unsigned int bondType2)2646 unsigned int getMMFFStretchBendType(const unsigned int angleType,
2647                                     const unsigned int bondType1,
2648                                     const unsigned int bondType2) {
2649   unsigned int stretchBendType = 0;
2650 
2651   switch (angleType) {
2652     case 1:
2653       stretchBendType = ((bondType1 || (bondType1 == bondType2)) ? 1 : 2);
2654       break;
2655 
2656     case 2:
2657       stretchBendType = 3;
2658       break;
2659 
2660     case 4:
2661       stretchBendType = 4;
2662       break;
2663 
2664     case 3:
2665       stretchBendType = 5;
2666       break;
2667 
2668     case 5:
2669       stretchBendType = ((bondType1 || (bondType1 == bondType2)) ? 6 : 7);
2670       break;
2671 
2672     case 6:
2673       stretchBendType = 8;
2674       break;
2675 
2676     case 7:
2677       stretchBendType = ((bondType1 || (bondType1 == bondType2)) ? 9 : 10);
2678       break;
2679 
2680     case 8:
2681       stretchBendType = 11;
2682       break;
2683   }
2684 
2685   return stretchBendType;
2686 }
2687 
2688 // given a dihedral angle formed by 4 atoms with indexes
2689 // idx1, idx2, idx3, idx4, it returns a std::pair whose first element
2690 // is the principal torsion type, and the second is the secondary
2691 // torsion type, to be used only if parameters could not be found
2692 // (empirically found - this is not mentioned either in MMFF.IV
2693 // nor in MMFF.V)
2694 const std::pair<unsigned int, unsigned int>
getMMFFTorsionType(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,const unsigned int idx4)2695 MMFFMolProperties::getMMFFTorsionType(const ROMol &mol, const unsigned int idx1,
2696                                       const unsigned int idx2,
2697                                       const unsigned int idx3,
2698                                       const unsigned int idx4) {
2699   PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
2700 
2701   const Bond *bondJK = mol.getBondBetweenAtoms(idx2, idx3);
2702   unsigned int bondTypeIJ =
2703       this->getMMFFBondType(mol.getBondBetweenAtoms(idx1, idx2));
2704   unsigned int bondTypeJK = this->getMMFFBondType(bondJK);
2705   unsigned int bondTypeKL =
2706       this->getMMFFBondType(mol.getBondBetweenAtoms(idx3, idx4));
2707   unsigned int torsionType = bondTypeJK;
2708   unsigned int secondTorsionType = 0;
2709 
2710   // according to MMFF.IV page 609 the condition should be as simple as
2711   // if ((bondTypeJK == 0) && ((bondTypeIJ == 1) || (bondTypeKL == 1))) {
2712   // but CYGUAN01 fails the test, so the following condition was
2713   // empirically determined to be the correct one
2714   if ((bondTypeJK == 0) && (bondJK->getBondType() == Bond::SINGLE) &&
2715       ((bondTypeIJ == 1) || (bondTypeKL == 1))) {
2716     torsionType = 2;
2717   }
2718   unsigned int size = isTorsionInRingOfSize4or5(mol, idx1, idx2, idx3, idx4);
2719   // the additional check on the existence of a bond between I and K or J and
2720   // L is to avoid assigning torsionType 4 to those torsions in a 4-membered
2721   // ring constituted by the fusion of two 3-membered rings, even though it
2722   // would be harmless for the energy calculation since parameters for
2723   // 4,22,22,22,22 and 0,22,22,22,22 are identical
2724   if ((size == 4) && (!(mol.getBondBetweenAtoms(idx1, idx3) ||
2725                         mol.getBondBetweenAtoms(idx2, idx4)))) {
2726     secondTorsionType = torsionType;
2727     torsionType = 4;
2728   } else if ((size == 5) && ((this->getMMFFAtomType(idx1) == 1) ||
2729                              (this->getMMFFAtomType(idx2) == 1) ||
2730                              (this->getMMFFAtomType(idx3) == 1) ||
2731                              (this->getMMFFAtomType(idx4) == 1))) {
2732     secondTorsionType = torsionType;
2733     torsionType = 5;
2734   }
2735 
2736   return std::make_pair(torsionType, secondTorsionType);
2737 }
2738 
2739 // empirical rule to compute bond stretching parameters if
2740 // tabulated parameters could not be found. The returned
2741 // pointer to a MMFFBond object must be freed by the caller
2742 const ForceFields::MMFF::MMFFBond *
getMMFFBondStretchEmpiricalRuleParams(const ROMol & mol,const Bond * bond)2743 MMFFMolProperties::getMMFFBondStretchEmpiricalRuleParams(const ROMol &mol,
2744                                                          const Bond *bond) {
2745   RDUNUSED_PARAM(mol);
2746   PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
2747 
2748   const MMFFBond *mmffBndkParams;
2749   const MMFFHerschbachLaurie *mmffHerschbachLaurieParams;
2750   const MMFFProp *mmffAtomPropParams[2];
2751   const MMFFCovRadPauEle *mmffAtomCovRadPauEleParams[2];
2752   const MMFFBndkCollection *mmffBndk = DefaultParameters::getMMFFBndk();
2753   const MMFFHerschbachLaurieCollection *mmffHerschbachLaurie =
2754       DefaultParameters::getMMFFHerschbachLaurie();
2755   const MMFFCovRadPauEleCollection *mmffCovRadPauEle =
2756       DefaultParameters::getMMFFCovRadPauEle();
2757   const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
2758 
2759   unsigned int atomicNum1 = bond->getBeginAtom()->getAtomicNum();
2760   unsigned int atomicNum2 = bond->getEndAtom()->getAtomicNum();
2761   mmffBndkParams = (*mmffBndk)(atomicNum1, atomicNum2);
2762   mmffAtomCovRadPauEleParams[0] = (*mmffCovRadPauEle)(atomicNum1);
2763   mmffAtomCovRadPauEleParams[1] = (*mmffCovRadPauEle)(atomicNum2);
2764   mmffAtomPropParams[0] =
2765       (*mmffProp)(this->getMMFFAtomType(bond->getBeginAtomIdx()));
2766   mmffAtomPropParams[1] =
2767       (*mmffProp)(this->getMMFFAtomType(bond->getEndAtomIdx()));
2768 
2769   PRECONDITION(mmffAtomCovRadPauEleParams[0],
2770                "covalent radius/Pauling electronegativity parameters for atom "
2771                "1 not found");
2772   PRECONDITION(mmffAtomCovRadPauEleParams[1],
2773                "covalent radius/Pauling electronegativity parameters for atom "
2774                "2 not found");
2775   PRECONDITION(mmffAtomPropParams[0],
2776                "property parameters for atom 1 not found");
2777   PRECONDITION(mmffAtomPropParams[1],
2778                "property parameters for atom 2 not found");
2779 
2780   auto *mmffBondParams = new ForceFields::MMFF::MMFFBond();
2781   const double c = (((atomicNum1 == 1) || (atomicNum2 == 1)) ? 0.050 : 0.085);
2782   const double n = 1.4;
2783 #if 0
2784       const double delta = 0.008;
2785 #endif
2786 #if 1
2787   const double delta = 0.0;
2788 #endif
2789   double r0_i[2];
2790 
2791   // MMFF.V, page 625
2792   for (unsigned int i = 0; i < 2; ++i) {
2793     r0_i[i] = mmffAtomCovRadPauEleParams[i]->r0;
2794 // the part of the empirical rule concerning H
2795 // parameters appears not to be used - tests are
2796 // passed only in its absence, hence it is
2797 // currently excluded
2798 #if 0
2799         switch (mmffAtomPropParams[i]->mltb) {
2800           case 1:
2801           case 2:
2802             H_i[i] = 2;
2803           break;
2804 
2805           case 3:
2806             H_i[i] = 1;
2807 
2808           default:
2809             H_i[i] = 3;
2810         }
2811 #endif
2812   }
2813 // also the part of the empirical rule concerning BO
2814 // parameters appears not to be used - tests are
2815 // passed only in its absence, hence it is
2816 // currently excluded
2817 #if 0
2818       unsigned int BO_ij = (unsigned int)(bond->getBondTypeAsDouble());
2819       if ((mmffAtomPropParams[0]->mltb == 1)
2820         && (mmffAtomPropParams[1]->mltb == 1)) {
2821         BO_ij = 4;
2822       }
2823       if (((mmffAtomPropParams[0]->mltb == 1)
2824         && (mmffAtomPropParams[1]->mltb == 2))
2825         || ((mmffAtomPropParams[0]->mltb == 2)
2826         && (mmffAtomPropParams[1]->mltb == 1))) {
2827         BO_ij = 5;
2828       }
2829       if (areAtomsInSameAromaticRing(mol,
2830         bond->getBeginAtomIdx(), bond->getEndAtomIdx())) {
2831         BO_ij = (((mmffAtomPropParams[0]->pilp == 0)
2832           && (mmffAtomPropParams[1]->pilp == 0)) ? 4 : 5);
2833       }
2834       if (BO_ij == 1) {
2835         for (unsigned int i = 0; i < 2; ++i) {
2836           std::cout << "H" << i << "=" << H_i[i] << std::endl;
2837           switch (H_i[i]) {
2838             case 1:
2839               r0_i[i] -= 0.08;
2840             break;
2841 
2842             case 2:
2843               r0_i[i] -= 0.03;
2844             break;
2845           }
2846         }
2847       }
2848       else {
2849         double dec = 0.0;
2850         switch (BO_ij) {
2851           case 5:
2852             dec = 0.04;
2853           break;
2854 
2855           case 4:
2856             dec = 0.075;
2857           break;
2858 
2859           case 3:
2860             dec = 0.17;
2861           break;
2862 
2863           case 2:
2864             dec = 0.10;
2865           break;
2866         }
2867         r0_i[0] -= dec;
2868         r0_i[1] -= dec;
2869       }
2870 #endif
2871   // equation (18) - MMFF.V, page 625
2872   mmffBondParams->r0 = (r0_i[0] + r0_i[1] -
2873                         c * pow(fabs(mmffAtomCovRadPauEleParams[0]->chi -
2874                                      mmffAtomCovRadPauEleParams[1]->chi),
2875                                 n) -
2876                         delta);
2877   if (mmffBndkParams) {
2878     // equation (19) - MMFF.V, page 625
2879     double coeff = mmffBndkParams->r0 / mmffBondParams->r0;
2880     double coeff2 = coeff * coeff;
2881     double coeff6 = coeff2 * coeff2 * coeff2;
2882     mmffBondParams->kb = mmffBndkParams->kb * coeff6;
2883   } else {
2884     // MMFF.V, page 627
2885     // Herschbach-Laurie version of Badger's rule
2886     // J. Chem. Phys. 35, 458 (1961); https://doi.org/10.1063/1.1731952
2887     // equation (8), page 5
2888     mmffHerschbachLaurieParams = (*mmffHerschbachLaurie)(
2889         getPeriodicTableRowHL(atomicNum1), getPeriodicTableRowHL(atomicNum2));
2890     mmffBondParams->kb =
2891         pow(10.0, -(mmffBondParams->r0 - mmffHerschbachLaurieParams->a_ij) /
2892                       mmffHerschbachLaurieParams->d_ij);
2893   }
2894 
2895   return (const ForceFields::MMFF::MMFFBond *)mmffBondParams;
2896 }
2897 
2898 // empirical rule to compute angle bending parameters if
2899 // tabulated parameters could not be found. The returned
2900 // pointer to a MMFFAngle object must be freed by the caller
getMMFFAngleBendEmpiricalRuleParams(const ROMol & mol,const ForceFields::MMFF::MMFFAngle * oldMMFFAngleParams,const ForceFields::MMFF::MMFFProp * mmffPropParamsCentralAtom,const ForceFields::MMFF::MMFFBond * mmffBondParams1,const ForceFields::MMFF::MMFFBond * mmffBondParams2,unsigned int idx1,unsigned int idx2,unsigned int idx3)2901 const ForceFields::MMFF::MMFFAngle *getMMFFAngleBendEmpiricalRuleParams(
2902     const ROMol &mol, const ForceFields::MMFF::MMFFAngle *oldMMFFAngleParams,
2903     const ForceFields::MMFF::MMFFProp *mmffPropParamsCentralAtom,
2904     const ForceFields::MMFF::MMFFBond *mmffBondParams1,
2905     const ForceFields::MMFF::MMFFBond *mmffBondParams2, unsigned int idx1,
2906     unsigned int idx2, unsigned int idx3) {
2907   int atomicNum[3];
2908   atomicNum[0] = mol.getAtomWithIdx(idx1)->getAtomicNum();
2909   atomicNum[1] = mol.getAtomWithIdx(idx2)->getAtomicNum();
2910   atomicNum[2] = mol.getAtomWithIdx(idx3)->getAtomicNum();
2911   auto *mmffAngleParams = new ForceFields::MMFF::MMFFAngle();
2912   unsigned int ringSize = isAngleInRingOfSize3or4(mol, idx1, idx2, idx3);
2913   if (!oldMMFFAngleParams) {
2914     // angle rest value empirical rule
2915     mmffAngleParams->theta0 = 120.0;
2916     switch (mmffPropParamsCentralAtom->crd) {
2917       case 4:
2918         // if the central atom has crd = 4
2919         mmffAngleParams->theta0 = 109.45;
2920         break;
2921 
2922       case 2:
2923         // if the central atom is oxygen
2924         if (atomicNum[1] == 8) {
2925           mmffAngleParams->theta0 = 105.0;
2926         }
2927         // if the central atom is linear
2928         else if (mmffPropParamsCentralAtom->linh == 1) {
2929           mmffAngleParams->theta0 = 180.0;
2930         }
2931         break;
2932 
2933       case 3:
2934         if ((mmffPropParamsCentralAtom->val == 3) &&
2935             (mmffPropParamsCentralAtom->mltb == 0)) {
2936           // if the central atom is nitrogen
2937           if (atomicNum[1] == 7) {
2938             mmffAngleParams->theta0 = 107.0;
2939           } else {
2940             mmffAngleParams->theta0 = 92.0;
2941           }
2942         }
2943         break;
2944     }
2945     if (ringSize == 3) {
2946       mmffAngleParams->theta0 = 60.0;
2947     } else if (ringSize == 4) {
2948       mmffAngleParams->theta0 = 90.0;
2949     }
2950   } else {
2951     mmffAngleParams->theta0 = oldMMFFAngleParams->theta0;
2952   }
2953   // angle force constant empirical rule
2954   double Z[3] = {0.0, 0.0, 0.0};
2955   double C[3] = {0.0, 0.0, 0.0};
2956   double beta = 1.75;
2957   for (unsigned int i = 0; i < 3; ++i) {
2958     // Table VI - MMFF.V, page 628
2959     switch (atomicNum[i]) {
2960       // Hydrogen
2961       case 1:
2962         Z[i] = 1.395;
2963         break;
2964 
2965       // Carbon
2966       case 6:
2967         Z[i] = 2.494;
2968         C[i] = 1.016;
2969         break;
2970 
2971       // Nitrogen
2972       case 7:
2973         Z[i] = 2.711;
2974         C[i] = 1.113;
2975         break;
2976 
2977       // Oxygen
2978       case 8:
2979         Z[i] = 3.045;
2980         C[i] = 1.337;
2981         break;
2982 
2983       // Fluorine
2984       case 9:
2985         Z[i] = 2.847;
2986         break;
2987 
2988       // Silicon
2989       case 14:
2990         Z[i] = 2.350;
2991         C[i] = 0.811;
2992         break;
2993 
2994       // Phosphorus
2995       case 15:
2996         Z[i] = 2.350;
2997         C[i] = 1.068;
2998         break;
2999 
3000       // Sulfur
3001       case 16:
3002         Z[i] = 2.980;
3003         C[i] = 1.249;
3004         break;
3005 
3006       // Chlorine
3007       case 17:
3008         Z[i] = 2.909;
3009         C[i] = 1.078;
3010         break;
3011 
3012       // Bromine
3013       case 35:
3014         Z[i] = 3.017;
3015         break;
3016 
3017       // Iodine
3018       case 53:
3019         Z[i] = 3.086;
3020         break;
3021     }
3022   }
3023   double r0_ij = mmffBondParams1->r0;
3024   double r0_jk = mmffBondParams2->r0;
3025   double D =
3026       (r0_ij - r0_jk) * (r0_ij - r0_jk) / ((r0_ij + r0_jk) * (r0_ij + r0_jk));
3027   double theta0_rad = DEG2RAD * mmffAngleParams->theta0;
3028   if (ringSize == 4) {
3029     beta *= 0.85;
3030   } else if (ringSize == 3) {
3031     beta *= 0.05;
3032   }
3033   // equation (20) - MMFF.V, page 628
3034   mmffAngleParams->ka =
3035       beta * Z[0] * C[1] * Z[2] /
3036       ((r0_ij + r0_jk) * theta0_rad * theta0_rad * exp(2.0 * D));
3037 
3038   return (const ForceFields::MMFF::MMFFAngle *)mmffAngleParams;
3039 }
3040 
3041 // empirical rule to compute torsional parameters if
3042 // tabulated parameters could not be found
3043 // the indexes of the two central atoms J and K
3044 // idx2 and idx3 must be supplied. The returned pointer
3045 // to a MMFFTor object must be freed by the caller
3046 const ForceFields::MMFF::MMFFTor *
getMMFFTorsionEmpiricalRuleParams(const ROMol & mol,unsigned int idx2,unsigned int idx3)3047 MMFFMolProperties::getMMFFTorsionEmpiricalRuleParams(const ROMol &mol,
3048                                                      unsigned int idx2,
3049                                                      unsigned int idx3) {
3050   PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
3051 
3052   const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
3053   const MMFFAromCollection *mmffArom = DefaultParameters::getMMFFArom();
3054   auto *mmffTorParams = new ForceFields::MMFF::MMFFTor();
3055   unsigned int jAtomType = this->getMMFFAtomType(idx2);
3056   unsigned int kAtomType = this->getMMFFAtomType(idx3);
3057   const MMFFProp *jMMFFProp = (*mmffProp)(jAtomType);
3058   const MMFFProp *kMMFFProp = (*mmffProp)(kAtomType);
3059   const Bond *bond = mol.getBondBetweenAtoms(idx2, idx3);
3060   double U[2] = {0.0, 0.0};
3061   double V[2] = {0.0, 0.0};
3062   double W[2] = {0.0, 0.0};
3063   double beta = 0.0;
3064   double pi_jk = 0.0;
3065   const auto N_jk = (double)((jMMFFProp->crd - 1) * (kMMFFProp->crd - 1));
3066   int atomicNum[2] = {mol.getAtomWithIdx(idx2)->getAtomicNum(),
3067                       mol.getAtomWithIdx(idx3)->getAtomicNum()};
3068 
3069   for (unsigned int i = 0; i < 2; ++i) {
3070     switch (atomicNum[i]) {
3071       // carbon
3072       case 6:
3073         U[i] = 2.0;
3074         V[i] = 2.12;
3075         break;
3076 
3077       // nitrogen
3078       case 7:
3079         U[i] = 2.0;
3080         V[i] = 1.5;
3081         break;
3082 
3083       // oxygen
3084       case 8:
3085         U[i] = 2.0;
3086         V[i] = 0.2;
3087         W[i] = 2.0;
3088         break;
3089 
3090       // silicon
3091       case 14:
3092         U[i] = 1.25;
3093         V[i] = 1.22;
3094         break;
3095 
3096       // phosphorus
3097       case 15:
3098         U[i] = 1.25;
3099         V[i] = 2.40;
3100         break;
3101 
3102       // sulfur
3103       case 16:
3104         U[i] = 1.25;
3105         V[i] = 0.49;
3106         W[i] = 8.0;
3107         break;
3108     }
3109   }
3110 
3111   // rule (a)
3112   if (jMMFFProp->linh || kMMFFProp->linh) {
3113     mmffTorParams->V1 = 0.0;
3114     mmffTorParams->V2 = 0.0;
3115     mmffTorParams->V3 = 0.0;
3116   }
3117 
3118   // rule (b)
3119   else if (mmffArom->isMMFFAromatic(jAtomType) &&
3120            mmffArom->isMMFFAromatic(kAtomType) && bond->getIsAromatic()) {
3121     beta = ((((jMMFFProp->val == 3) && (kMMFFProp->val == 4)) ||
3122              ((jMMFFProp->val == 4) && (kMMFFProp->val == 3)))
3123                 ? 3.0
3124                 : 6.0);
3125     pi_jk = (((jMMFFProp->pilp == 0) && (kMMFFProp->pilp == 0)) ? 0.5 : 0.3);
3126     mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3127   }
3128 
3129   // rule (c)
3130   else if (bond->getBondType() == Bond::DOUBLE) {
3131     beta = 6.0;
3132     pi_jk = (((jMMFFProp->mltb == 2) && (kMMFFProp->mltb == 2)) ? 1.0 : 0.4);
3133     mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3134   }
3135 
3136   // rule (d)
3137   else if ((jMMFFProp->crd == 4) && (kMMFFProp->crd == 4)) {
3138     mmffTorParams->V3 = sqrt(V[0] * V[1]) / N_jk;
3139   }
3140 
3141   // rule (e)
3142   else if ((jMMFFProp->crd == 4) && (kMMFFProp->crd != 4)) {
3143     if (((kMMFFProp->crd == 3) &&
3144          (((kMMFFProp->val == 4) || (kMMFFProp->val == 34)) ||
3145           kMMFFProp->mltb)) ||
3146         ((kMMFFProp->crd == 2) && ((kMMFFProp->val == 3) || kMMFFProp->mltb))) {
3147       mmffTorParams->V1 = 0.0;
3148       mmffTorParams->V2 = 0.0;
3149       mmffTorParams->V3 = 0.0;
3150     } else {
3151       mmffTorParams->V3 = sqrt(V[0] * V[1]) / N_jk;
3152     }
3153   }
3154 
3155   // rule (f)
3156   else if ((kMMFFProp->crd == 4) && (jMMFFProp->crd != 4)) {
3157     if (((jMMFFProp->crd == 3) &&
3158          (((jMMFFProp->val == 4) || (jMMFFProp->val == 34)) ||
3159           jMMFFProp->mltb)) ||
3160         ((jMMFFProp->crd == 2) && ((jMMFFProp->val == 3) || jMMFFProp->mltb))) {
3161       mmffTorParams->V1 = 0.0;
3162       mmffTorParams->V2 = 0.0;
3163       mmffTorParams->V3 = 0.0;
3164     } else {
3165       mmffTorParams->V3 = sqrt(V[0] * V[1]) / N_jk;
3166     }
3167   }
3168 
3169   // rule (g)
3170   else if (((bond->getBondType() == Bond::SINGLE) && jMMFFProp->mltb &&
3171             kMMFFProp->mltb) ||
3172            (jMMFFProp->mltb && kMMFFProp->pilp) ||
3173            (jMMFFProp->pilp && kMMFFProp->mltb)) {
3174     // case (1)
3175     if (jMMFFProp->pilp && kMMFFProp->pilp) {
3176       mmffTorParams->V1 = 0.0;
3177       mmffTorParams->V2 = 0.0;
3178       mmffTorParams->V3 = 0.0;
3179     }
3180     // case (2)
3181     else if (jMMFFProp->pilp && kMMFFProp->mltb) {
3182       beta = 6.0;
3183       if (jMMFFProp->mltb == 1) {
3184         pi_jk = 0.5;
3185       } else if ((getPeriodicTableRow(atomicNum[0]) == 2) &&
3186                  (getPeriodicTableRow(atomicNum[1]) == 2)) {
3187         pi_jk = 0.3;
3188       } else if ((getPeriodicTableRow(atomicNum[0]) != 2) ||
3189                  (getPeriodicTableRow(atomicNum[1]) != 2)) {
3190         pi_jk = 0.15;
3191       }
3192       mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3193     }
3194     // case (3)
3195     else if (kMMFFProp->pilp && jMMFFProp->mltb) {
3196       beta = 6.0;
3197       if (kMMFFProp->mltb == 1) {
3198         pi_jk = 0.5;
3199       } else if ((getPeriodicTableRow(atomicNum[0]) == 2) &&
3200                  (getPeriodicTableRow(atomicNum[1]) == 2)) {
3201         pi_jk = 0.3;
3202       } else if ((getPeriodicTableRow(atomicNum[0]) != 2) ||
3203                  (getPeriodicTableRow(atomicNum[1]) != 2)) {
3204         pi_jk = 0.15;
3205       }
3206       mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3207     }
3208     // case (4)
3209     else if (((jMMFFProp->mltb == 1) || (kMMFFProp->mltb == 1)) &&
3210              ((atomicNum[0] != 6) || (atomicNum[1] != 6))) {
3211       beta = 6.0;
3212       pi_jk = 0.4;
3213       mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3214     }
3215     // case (5)
3216     else {
3217       beta = 6.0;
3218       pi_jk = 0.15;
3219       mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3220     }
3221   }
3222 
3223   // rule (h)
3224   else {
3225     if (((atomicNum[0] == 8) || (atomicNum[0] == 16)) &&
3226         ((atomicNum[1] == 8) || (atomicNum[1] == 16))) {
3227       mmffTorParams->V2 = -sqrt(W[0] * W[1]);
3228     } else {
3229       mmffTorParams->V3 = sqrt(V[0] * V[1]) / N_jk;
3230     }
3231   }
3232 
3233   return (const MMFFTor *)mmffTorParams;
3234 }
3235 
3236 // populates the MMFFMolProperties object with MMFF
3237 // formal and partial charges
computeMMFFCharges(const ROMol & mol)3238 void MMFFMolProperties::computeMMFFCharges(const ROMol &mol) {
3239   PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
3240 
3241   unsigned int idx;
3242   unsigned int i;
3243   unsigned int j;
3244   unsigned int atomType;
3245   unsigned int nbrAtomType;
3246   unsigned int nConj = 0;
3247   unsigned int old_nConj = 0;
3248   double pChg = 0.0;
3249   double fChg = 0.0;
3250   boost::dynamic_bitset<> conjNBitVect(mol.getNumAtoms());
3251   VECT_INT_VECT atomRings = mol.getRingInfo()->atomRings();
3252   ROMol::ADJ_ITER nbrIdx;
3253   ROMol::ADJ_ITER endNbrs;
3254   ROMol::ADJ_ITER nbr2Idx;
3255   ROMol::ADJ_ITER end2Nbrs;
3256   const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
3257   const MMFFPBCICollection *mmffPBCI = DefaultParameters::getMMFFPBCI();
3258   const MMFFChgCollection *mmffChg = DefaultParameters::getMMFFChg();
3259 
3260   // We need to set formal charges upfront
3261   for (idx = 0; idx < mol.getNumAtoms(); ++idx) {
3262     const Atom *atom = mol.getAtomWithIdx(idx);
3263     atomType = this->getMMFFAtomType(idx);
3264     fChg = 0.0;
3265     switch (atomType) {
3266       // special cases
3267       case 32:
3268       // O2CM
3269       // Oxygen in carboxylate group
3270       case 72:
3271         // SM
3272         // Anionic terminal sulfur
3273         // loop over neighbors
3274         boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
3275         for (; nbrIdx != endNbrs; ++nbrIdx) {
3276           const Atom *nbrAtom = mol[*nbrIdx];
3277           nbrAtomType = this->getMMFFAtomType(nbrAtom->getIdx());
3278           // loop over neighbors of the neighbor
3279           // count how many terminal oxygen/sulfur atoms
3280           // or secondary nitrogens
3281           // are bonded to the neighbor of ipso
3282           int nSecNbondedToNbr = 0;
3283           int nTermOSbondedToNbr = 0;
3284           boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
3285           for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
3286             const Atom *nbr2Atom = mol[*nbr2Idx];
3287             // if it's nitrogen with 2 neighbors and it is not aromatic,
3288             // increment the counter of secondary nitrogens
3289             if ((nbr2Atom->getAtomicNum() == 7) &&
3290                 (nbr2Atom->getDegree() == 2) &&
3291                 (!(nbr2Atom->getIsAromatic()))) {
3292               ++nSecNbondedToNbr;
3293             }
3294             // if it's terminal oxygen/sulfur,
3295             // increment the terminal oxygen/sulfur counter
3296             if (((nbr2Atom->getAtomicNum() == 8) ||
3297                  (nbr2Atom->getAtomicNum() == 16)) &&
3298                 (nbr2Atom->getDegree() == 1)) {
3299               ++nTermOSbondedToNbr;
3300             }
3301           }
3302           // in case its sulfur with two terminal oxygen/sulfur atoms and one
3303           // secondary
3304           // nitrogen, this is a deprotonated sulfonamide, so we should not
3305           // consider
3306           // nitrogen as a replacement for oxygen/sulfur in a sulfone
3307           if ((nbrAtom->getAtomicNum() == 16) && (nTermOSbondedToNbr == 2) &&
3308               (nSecNbondedToNbr == 1)) {
3309             nSecNbondedToNbr = 0;
3310           }
3311           // if the neighbor is carbon
3312           if ((nbrAtom->getAtomicNum() == 6) && nTermOSbondedToNbr) {
3313             // O2CM
3314             // Oxygen in (thio)carboxylate group: charge is shared
3315             // across 2 oxygens/sulfur atoms in (thio)carboxylate,
3316             // 3 oxygen/sulfur atoms in (thio)carbonate
3317             // SM
3318             // Anionic terminal sulfur: charge is localized
3319             fChg = ((nTermOSbondedToNbr == 1)
3320                         ? -1.0
3321                         : -((double)(nTermOSbondedToNbr - 1) /
3322                             (double)nTermOSbondedToNbr));
3323             break;
3324           }
3325           // if the neighbor is NO2 or NO3
3326           if ((nbrAtomType == 45) && (nTermOSbondedToNbr == 3)) {
3327             // O3N
3328             // Nitrate anion oxygen
3329             fChg = -1.0 / 3.0;
3330             break;
3331           }
3332           // if the neighbor is PO2, PO3, PO4
3333           if ((nbrAtomType == 25) && nTermOSbondedToNbr) {
3334             // OP
3335             // Oxygen in phosphine oxide
3336             // O2P
3337             // One of 2 terminal O's on P
3338             // O3P
3339             // One of 3 terminal O's on P
3340             // O4P
3341             // One of 4 terminal O's on P
3342             fChg = ((nTermOSbondedToNbr == 1)
3343                         ? 0.0
3344                         : -((double)(nTermOSbondedToNbr - 1) /
3345                             (double)nTermOSbondedToNbr));
3346             break;
3347           }
3348           // if the neighbor is SO2, SO2N, SO3, SO4, SO2M, SSOM
3349           if ((nbrAtomType == 18) && nTermOSbondedToNbr) {
3350             // SO2
3351             // Sulfone sulfur
3352             // SO2N
3353             // Sulfonamide sulfur
3354             // SO3
3355             // Sulfonate group sulfur
3356             // SO4
3357             // Sulfate group sulfur
3358             // SNO
3359             // Sulfur in nitrogen analog of a sulfone
3360             fChg =
3361                 (((nSecNbondedToNbr + nTermOSbondedToNbr) == 2)
3362                      ? 0.0
3363                      : -((double)((nSecNbondedToNbr + nTermOSbondedToNbr) - 2) /
3364                          (double)nTermOSbondedToNbr));
3365             break;
3366           }
3367           if ((nbrAtomType == 73) && nTermOSbondedToNbr) {
3368             // SO2M
3369             // Sulfur in anionic sulfinate group
3370             // SSOM
3371             // Tricoordinate sulfur in anionic thiosulfinate group
3372             fChg = ((nTermOSbondedToNbr == 1)
3373                         ? 0.0
3374                         : -((double)(nTermOSbondedToNbr - 1) /
3375                             (double)nTermOSbondedToNbr));
3376             break;
3377           }
3378           if ((nbrAtomType == 77) && nTermOSbondedToNbr) {
3379             // O4Cl
3380             // Oxygen in perchlorate anion
3381             fChg = -(1.0 / (double)nTermOSbondedToNbr);
3382             break;
3383           }
3384         }
3385         break;
3386 
3387       case 76:
3388         // N5M
3389         // Nitrogen in 5-ring aromatic anion
3390         // we don't need to bother about the neighbors with N5M
3391         for (i = 0; i < atomRings.size(); ++i) {
3392           if ((std::find(atomRings[i].begin(), atomRings[i].end(), idx) !=
3393                atomRings[i].end())) {
3394             break;
3395           }
3396         }
3397         // find how many nitrogens with atom type 76 we have
3398         // and share the formal charge accordingly
3399         if (i < atomRings.size()) {
3400           unsigned int nNitrogensIn5Ring = 0;
3401           for (j = 0; j < atomRings[i].size(); ++j) {
3402             if (this->getMMFFAtomType(atomRings[i][j]) == 76) {
3403               ++nNitrogensIn5Ring;
3404             }
3405           }
3406           if (nNitrogensIn5Ring) {
3407             fChg = -(1.0 / (double)nNitrogensIn5Ring);
3408           }
3409         }
3410         break;
3411 
3412       case 55:
3413       case 56:
3414       case 81:
3415         // NIM+
3416         // Aromatic nitrogen in imidazolium
3417         // N5A+
3418         // Positive nitrogen in 5-ring alpha position
3419         // N5B+
3420         // Positive nitrogen in 5-ring beta position
3421         // N5+
3422         // Positive nitrogen in other 5-ring position
3423         // we need to loop over all molecule atoms
3424         // and find all those nitrogens with atom type
3425         // 81, 55 or 56, check whether they are conjugated
3426         // with ipso and keep on looping until no more
3427         // conjugated atoms can be found. Finally, we divide
3428         // the total formal charge that was found on the
3429         // conjugated system by the number of conjugated nitrogens
3430         // of types 81, 55 or 56 that were found.
3431         // This is not strictly what is described
3432         // in the MMFF papers, but it is the only way to get an
3433         // integer total formal charge, which makes sense to me
3434         // probably such conjugated systems are anyway out of the
3435         // scope of MMFF, but this is an attempt to correctly
3436         // deal with them somehow
3437         fChg = (double)(atom->getFormalCharge());
3438         nConj = 1;
3439         old_nConj = 0;
3440         conjNBitVect.reset();
3441         conjNBitVect[idx] = 1;
3442         while (nConj > old_nConj) {
3443           old_nConj = nConj;
3444           for (i = 0; i < mol.getNumAtoms(); ++i) {
3445             // if this atom is not marked as conj, move on
3446             if (!conjNBitVect[i]) {
3447               continue;
3448             }
3449             // loop over neighbors
3450             boost::tie(nbrIdx, endNbrs) =
3451                 mol.getAtomNeighbors(mol.getAtomWithIdx(i));
3452             for (; nbrIdx != endNbrs; ++nbrIdx) {
3453               const Atom *nbrAtom = mol[*nbrIdx];
3454               nbrAtomType = this->getMMFFAtomType(nbrAtom->getIdx());
3455               // if atom type is not 80 or 57, move on
3456               if ((nbrAtomType != 57) && (nbrAtomType != 80)) {
3457                 continue;
3458               }
3459               // loop over neighbors of the neighbor
3460               // if they are nitrogens of type 81, 55 or 56 and
3461               // they are not not marked as conjugated yet, do it
3462               // and increment the nConj counter by 1
3463               boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
3464               for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
3465                 const Atom *nbr2Atom = mol[*nbr2Idx];
3466                 // if atom type is not 81, 55 or 56, move on
3467                 nbrAtomType = this->getMMFFAtomType(nbr2Atom->getIdx());
3468                 if ((nbrAtomType != 55) && (nbrAtomType != 56) &&
3469                     (nbrAtomType != 81)) {
3470                   continue;
3471                 }
3472                 j = nbr2Atom->getIdx();
3473                 // if this nitrogen is not yet marked as conjugated,
3474                 // mark it and increment the counter and eventually
3475                 // adjust the total formal charge of the conjugated system
3476                 if (!conjNBitVect[j]) {
3477                   conjNBitVect[j] = 1;
3478                   fChg += (double)(nbr2Atom->getFormalCharge());
3479                   ++nConj;
3480                 }
3481               }
3482             }
3483           }
3484         }
3485         if (nConj) {
3486           fChg /= (double)nConj;
3487         }
3488         break;
3489 
3490       case 61:
3491         // loop over neighbors
3492         boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
3493         for (; nbrIdx != endNbrs; ++nbrIdx) {
3494           const Atom *nbrAtom = mol[*nbrIdx];
3495           // if it is diazonium, set a +1 formal charge on
3496           // the secondary nitrogen
3497           if (this->getMMFFAtomType(nbrAtom->getIdx()) == 42) {
3498             fChg = 1.0;
3499           }
3500         }
3501         break;
3502 
3503       // non-complicated +1 atom types
3504       case 34:
3505       // NR+
3506       // Quaternary nitrogen
3507       case 49:
3508       // O+
3509       // Oxonium oxygen
3510       case 51:
3511       // O=+
3512       // Oxenium oxygen
3513       case 54:
3514       // N+=C
3515       // Iminium nitrogen
3516       // N+=N
3517       // Positively charged nitrogen doubly bonded to N
3518       case 58:
3519       // NPD+
3520       // Aromatic nitrogen in pyridinium
3521       case 92:
3522       // LI+
3523       // Lithium cation
3524       case 93:
3525       // NA+
3526       // Sodium cation
3527       case 94:
3528       // K+
3529       // Potassium cation
3530       case 97:
3531         // CU+1
3532         // Monopositive copper cation
3533         fChg = 1.0;
3534         break;
3535 
3536       // non-complicated +2 atom types
3537       case 87:
3538       // FE+2
3539       // Dipositive iron cation
3540       case 95:
3541       // ZN+2
3542       // Dipositive zinc cation
3543       case 96:
3544       // CA+2
3545       // Dipositive calcium cation
3546       case 98:
3547       // CU+2
3548       // Dipositive copper cation
3549       case 99:
3550         // MG+2
3551         // Dipositive magnesium cation
3552         fChg = 2.0;
3553         break;
3554 
3555       // non-complicated +3 atom types
3556       case 88:
3557         // FE+3
3558         // Tripositive iron cation
3559         fChg = 3.0;
3560         break;
3561 
3562       // non-complicated -1 atom types
3563       case 35:
3564       // OM
3565       // Oxide oxygen on sp3 carbon
3566       // OM2
3567       // Oxide oxygen on sp2 carbon
3568       // OM
3569       // Oxide oxygen on sp3 nitrogen (not in original MMFF.I Table III)
3570       // OM2
3571       // Oxide oxygen on sp2 nitrogen (not in original MMFF.I Table III)
3572       case 62:
3573       // NM
3574       // Anionic divalent nitrogen
3575       case 89:
3576       // F-
3577       // Fluoride anion
3578       case 90:
3579       // Cl-
3580       // Chloride anion
3581       case 91:
3582         // BR-
3583         // Bromide anion
3584         fChg = -1.0;
3585         break;
3586     }
3587     this->setMMFFFormalCharge(idx, fChg);
3588   }
3589   // now we compute partial charges
3590   // See Halgren, T. MMFF.V, J. Comput. Chem. 1996, 17, 616-641
3591   // https://doi.org/10.1002/(SICI)1096-987X(199604)17:5/6<616::AID-JCC5>3.0.CO;2-X
3592   for (idx = 0; idx < mol.getNumAtoms(); ++idx) {
3593     const Atom *atom = mol.getAtomWithIdx(idx);
3594     atomType = this->getMMFFAtomType(idx);
3595     double q0 = this->getMMFFFormalCharge(idx);
3596     auto M = (double)((*mmffProp)(atomType)->crd);
3597     double v = (*mmffPBCI)(atomType)->fcadj;
3598     double sumFormalCharge = 0.0;
3599     double sumPartialCharge = 0.0;
3600     double nbrFormalCharge;
3601     std::pair<int, const MMFFChg *> mmffChgParams;
3602 
3603     if (isDoubleZero(v)) {
3604       // loop over neighbors
3605       boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
3606       for (; nbrIdx != endNbrs; ++nbrIdx) {
3607         const Atom *nbrAtom = mol[*nbrIdx];
3608         nbrFormalCharge = this->getMMFFFormalCharge(nbrAtom->getIdx());
3609         // if neighbors have a negative formal charge, the latter
3610         // influences the charge on ipso
3611         if (nbrFormalCharge < 0.0) {
3612           q0 += (nbrFormalCharge / (2.0 * (double)(nbrAtom->getDegree())));
3613         }
3614       }
3615     }
3616     // there is a special case for anionic divalent nitrogen
3617     // with positively charged neighbor
3618     if (atomType == 62) {
3619       boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
3620       for (; nbrIdx != endNbrs; ++nbrIdx) {
3621         const Atom *nbrAtom = mol[*nbrIdx];
3622         nbrFormalCharge = this->getMMFFFormalCharge(nbrAtom->getIdx());
3623         if (nbrFormalCharge > 0.0) {
3624           q0 -= (nbrFormalCharge / 2.0);
3625         }
3626       }
3627     }
3628     // loop over neighbors
3629     boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
3630     for (; nbrIdx != endNbrs; ++nbrIdx) {
3631       const Atom *nbrAtom = mol[*nbrIdx];
3632       const Bond *bond =
3633           mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx());
3634       // we need to determine the sign of bond charge
3635       // increments depending on the bonding relationship
3636       // i.e. we have parameters for [a,b] bonds
3637       // but it depends whether ipso is a or b
3638       unsigned int nbrAtomType = this->getMMFFAtomType(nbrAtom->getIdx());
3639       unsigned int bondType = this->getMMFFBondType(bond);
3640       mmffChgParams =
3641           mmffChg->getMMFFChgParams(bondType, atomType, nbrAtomType);
3642       sumPartialCharge +=
3643           (mmffChgParams.second
3644                ? (double)(mmffChgParams.first) * ((mmffChgParams.second)->bci)
3645                : ((*mmffPBCI)(atomType)->pbci -
3646                   (*mmffPBCI)(nbrAtomType)->pbci));
3647       nbrFormalCharge = this->getMMFFFormalCharge(nbrAtom->getIdx());
3648       sumFormalCharge += nbrFormalCharge;
3649     }
3650     // we compute ipso partial charge according to
3651     // equation 15, page 622 MMFF.V paper
3652     pChg = (1.0 - M * v) * q0 + v * sumFormalCharge + sumPartialCharge;
3653     this->setMMFFPartialCharge(atom->getIdx(), pChg);
3654   }
3655 }
3656 
getMMFFBondStretchParams(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,unsigned int & bondType,MMFFBond & mmffBondStretchParams)3657 bool MMFFMolProperties::getMMFFBondStretchParams(
3658     const ROMol &mol, const unsigned int idx1, const unsigned int idx2,
3659     unsigned int &bondType, MMFFBond &mmffBondStretchParams) {
3660   const MMFFBondCollection *mmffBond = DefaultParameters::getMMFFBond();
3661   bool res = false;
3662   if (isValid()) {
3663     unsigned int iAtomType = getMMFFAtomType(idx1);
3664     unsigned int jAtomType = getMMFFAtomType(idx2);
3665     const Bond *bond = mol.getBondBetweenAtoms(idx1, idx2);
3666     if (bond) {
3667       bondType = getMMFFBondType(bond);
3668       bool areMMFFBondParamsEmpirical = false;
3669       const MMFFBond *mmffBondParams =
3670           (*mmffBond)(bondType, iAtomType, jAtomType);
3671       if (!mmffBondParams) {
3672         mmffBondParams = getMMFFBondStretchEmpiricalRuleParams(mol, bond);
3673         areMMFFBondParamsEmpirical = true;
3674       }
3675       if (mmffBondParams) {
3676         mmffBondStretchParams = *mmffBondParams;
3677         if (areMMFFBondParamsEmpirical) {
3678           delete mmffBondParams;
3679         }
3680         res = true;
3681       }
3682     }
3683   }
3684   return res;
3685 }
3686 
getMMFFAngleBendParams(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,unsigned int & angleType,MMFFAngle & mmffAngleBendParams)3687 bool MMFFMolProperties::getMMFFAngleBendParams(const ROMol &mol,
3688                                                const unsigned int idx1,
3689                                                const unsigned int idx2,
3690                                                const unsigned int idx3,
3691                                                unsigned int &angleType,
3692                                                MMFFAngle &mmffAngleBendParams) {
3693   bool res = false;
3694   if (isValid() && mol.getBondBetweenAtoms(idx1, idx2) &&
3695       mol.getBondBetweenAtoms(idx2, idx3)) {
3696     const MMFFAngleCollection *mmffAngle = DefaultParameters::getMMFFAngle();
3697     const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
3698     unsigned int idx[3] = {idx1, idx2, idx3};
3699     MMFFBond mmffBondParams[2];
3700     unsigned int atomType[3];
3701     unsigned int i;
3702     angleType = getMMFFAngleType(mol, idx1, idx2, idx3);
3703     bool areMMFFAngleParamsEmpirical = false;
3704     for (i = 0; i < 3; ++i) {
3705       atomType[i] = getMMFFAtomType(idx[i]);
3706     }
3707     const MMFFAngle *mmffAngleParams =
3708         (*mmffAngle)(DefaultParameters::getMMFFDef(), angleType, atomType[0],
3709                      atomType[1], atomType[2]);
3710     const MMFFProp *mmffPropParamsCentralAtom = (*mmffProp)(atomType[1]);
3711     if ((!mmffAngleParams) || (isDoubleZero(mmffAngleParams->ka))) {
3712       areMMFFAngleParamsEmpirical = true;
3713       for (i = 0; areMMFFAngleParamsEmpirical && (i < 2); ++i) {
3714         unsigned int bondType;
3715         areMMFFAngleParamsEmpirical = getMMFFBondStretchParams(
3716             mol, idx[i], idx[i + 1], bondType, mmffBondParams[i]);
3717       }
3718       if (areMMFFAngleParamsEmpirical) {
3719         mmffAngleParams = getMMFFAngleBendEmpiricalRuleParams(
3720             mol, mmffAngleParams, mmffPropParamsCentralAtom, &mmffBondParams[0],
3721             &mmffBondParams[1], idx[0], idx[1], idx[2]);
3722       }
3723     }
3724     if (mmffAngleParams) {
3725       mmffAngleBendParams = *mmffAngleParams;
3726       res = true;
3727       if (areMMFFAngleParamsEmpirical) {
3728         delete mmffAngleParams;
3729       }
3730     }
3731   }
3732   return res;
3733 }
3734 
getMMFFStretchBendParams(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,unsigned int & stretchBendType,MMFFStbn & mmffStretchBendParams,MMFFBond mmffBondStretchParams[2],MMFFAngle & mmffAngleBendParams)3735 bool MMFFMolProperties::getMMFFStretchBendParams(
3736     const ROMol &mol, const unsigned int idx1, const unsigned int idx2,
3737     const unsigned int idx3, unsigned int &stretchBendType,
3738     MMFFStbn &mmffStretchBendParams, MMFFBond mmffBondStretchParams[2],
3739     MMFFAngle &mmffAngleBendParams) {
3740   bool res = false;
3741   if (isValid()) {
3742     const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
3743     const MMFFStbnCollection *mmffStbn = DefaultParameters::getMMFFStbn();
3744     const MMFFDfsbCollection *mmffDfsb = DefaultParameters::getMMFFDfsb();
3745     unsigned int idx[3] = {idx1, idx2, idx3};
3746     unsigned int atomType[3];
3747     unsigned int bondType[2];
3748     unsigned int angleType;
3749     const MMFFProp *mmffPropParamsCentralAtom =
3750         (*mmffProp)(getMMFFAtomType(idx[1]));
3751     if (!(mmffPropParamsCentralAtom->linh)) {
3752       res = true;
3753       unsigned int i = 0;
3754       for (i = 0; i < 3; ++i) {
3755         atomType[i] = getMMFFAtomType(idx[i]);
3756       }
3757       for (i = 0; res && (i < 2); ++i) {
3758         res = getMMFFBondStretchParams(mol, idx[i], idx[i + 1], bondType[i],
3759                                        mmffBondStretchParams[i]);
3760       }
3761       if (res) {
3762         res = getMMFFAngleBendParams(mol, idx1, idx2, idx3, angleType,
3763                                      mmffAngleBendParams);
3764       }
3765       std::pair<bool, const MMFFStbn *> mmffStbnParams;
3766       if (res) {
3767         stretchBendType = getMMFFStretchBendType(
3768             angleType, (atomType[0] <= atomType[2]) ? bondType[0] : bondType[1],
3769             (atomType[0] < atomType[2]) ? bondType[1] : bondType[0]);
3770         mmffStbnParams = mmffStbn->getMMFFStbnParams(
3771             stretchBendType, bondType[0], bondType[1], atomType[0], atomType[1],
3772             atomType[2]);
3773         if (!(mmffStbnParams.second)) {
3774           mmffStbnParams = mmffDfsb->getMMFFDfsbParams(
3775               getPeriodicTableRow(mol.getAtomWithIdx(idx1)->getAtomicNum()),
3776               getPeriodicTableRow(mol.getAtomWithIdx(idx2)->getAtomicNum()),
3777               getPeriodicTableRow(mol.getAtomWithIdx(idx3)->getAtomicNum()));
3778         }
3779         res = (!(isDoubleZero((mmffStbnParams.second)->kbaIJK) &&
3780                  isDoubleZero((mmffStbnParams.second)->kbaKJI)));
3781       }
3782       if (res) {
3783         if (mmffStbnParams.first) {
3784           mmffStretchBendParams.kbaIJK = (mmffStbnParams.second)->kbaKJI;
3785           mmffStretchBendParams.kbaKJI = (mmffStbnParams.second)->kbaIJK;
3786         } else {
3787           mmffStretchBendParams = *(mmffStbnParams.second);
3788         }
3789       }
3790     }
3791   }
3792   return res;
3793 }
3794 
getMMFFTorsionParams(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,const unsigned int idx4,unsigned int & torsionType,MMFFTor & mmffTorsionParams)3795 bool MMFFMolProperties::getMMFFTorsionParams(
3796     const ROMol &mol, const unsigned int idx1, const unsigned int idx2,
3797     const unsigned int idx3, const unsigned int idx4, unsigned int &torsionType,
3798     MMFFTor &mmffTorsionParams) {
3799   bool res = false;
3800   if (isValid() && mol.getBondBetweenAtoms(idx1, idx2) &&
3801       mol.getBondBetweenAtoms(idx2, idx3) &&
3802       mol.getBondBetweenAtoms(idx3, idx4)) {
3803     unsigned int i;
3804     unsigned int idx[4] = {idx1, idx2, idx3, idx4};
3805     unsigned int atomType[4];
3806     const MMFFTorCollection *mmffTor =
3807         DefaultParameters::getMMFFTor(getMMFFVariant() == "MMFF94s");
3808     for (i = 0; i < 4; ++i) {
3809       atomType[i] = getMMFFAtomType(idx[i]);
3810     }
3811     const std::pair<unsigned int, unsigned int> torTypePair =
3812         getMMFFTorsionType(mol, idx1, idx2, idx3, idx4);
3813     bool areMMFFTorParamsEmpirical = false;
3814     const std::pair<const unsigned int, const MMFFTor *> mmffTorPair =
3815         mmffTor->getMMFFTorParams(DefaultParameters::getMMFFDef(), torTypePair,
3816                                   atomType[0], atomType[1], atomType[2],
3817                                   atomType[3]);
3818     torsionType = (mmffTorPair.first ? mmffTorPair.first : torTypePair.first);
3819     const MMFFTor *mmffTorParams = mmffTorPair.second;
3820     if (!mmffTorParams) {
3821       torsionType = torTypePair.first;
3822       mmffTorParams = getMMFFTorsionEmpiricalRuleParams(mol, idx2, idx3);
3823       areMMFFTorParamsEmpirical = true;
3824     }
3825     res =
3826         (!(isDoubleZero(mmffTorParams->V1) && isDoubleZero(mmffTorParams->V2) &&
3827            isDoubleZero(mmffTorParams->V3)));
3828     if (res) {
3829       mmffTorsionParams = *mmffTorParams;
3830     }
3831     if (areMMFFTorParamsEmpirical) {
3832       delete mmffTorParams;
3833     }
3834   }
3835   return res;
3836 }
3837 
getMMFFOopBendParams(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,const unsigned int idx4,MMFFOop & mmffOopBendParams)3838 bool MMFFMolProperties::getMMFFOopBendParams(const ROMol &mol,
3839                                              const unsigned int idx1,
3840                                              const unsigned int idx2,
3841                                              const unsigned int idx3,
3842                                              const unsigned int idx4,
3843                                              MMFFOop &mmffOopBendParams) {
3844   bool res = false;
3845   if (isValid() && mol.getBondBetweenAtoms(idx1, idx2) &&
3846       mol.getBondBetweenAtoms(idx2, idx3) &&
3847       mol.getBondBetweenAtoms(idx2, idx4)) {
3848     unsigned int i;
3849     unsigned int idx[4] = {idx1, idx2, idx3, idx4};
3850     unsigned int atomType[4];
3851 
3852     const MMFFOopCollection *mmffOop =
3853         DefaultParameters::getMMFFOop(getMMFFVariant() == "MMFF94s");
3854     for (i = 0; i < 4; ++i) {
3855       atomType[i] = getMMFFAtomType(idx[i]);
3856     }
3857     const MMFFOop *mmffOopParams =
3858         (*mmffOop)(DefaultParameters::getMMFFDef(), atomType[0], atomType[1],
3859                    atomType[2], atomType[3]);
3860     // if no parameters could be found, we exclude this term (SURDOX02)
3861     if (mmffOopParams) {
3862       mmffOopBendParams = *mmffOopParams;
3863       res = true;
3864     }
3865   }
3866   return res;
3867 }
3868 
getMMFFVdWParams(const unsigned int idx1,const unsigned int idx2,MMFFVdWRijstarEps & mmffVdWParams)3869 bool MMFFMolProperties::getMMFFVdWParams(const unsigned int idx1,
3870                                          const unsigned int idx2,
3871                                          MMFFVdWRijstarEps &mmffVdWParams) {
3872   bool res = false;
3873   if (isValid()) {
3874     const MMFFVdWCollection *mmffVdW = DefaultParameters::getMMFFVdW();
3875     const unsigned int iAtomType = getMMFFAtomType(idx1);
3876     const unsigned int jAtomType = getMMFFAtomType(idx2);
3877     const MMFFVdW *mmffVdWParamsIAtom = (*mmffVdW)(iAtomType);
3878     const MMFFVdW *mmffVdWParamsJAtom = (*mmffVdW)(jAtomType);
3879     if (mmffVdWParamsIAtom && mmffVdWParamsJAtom) {
3880       mmffVdWParams.R_ij_starUnscaled = MMFF::Utils::calcUnscaledVdWMinimum(
3881           mmffVdW, mmffVdWParamsIAtom, mmffVdWParamsJAtom);
3882       mmffVdWParams.epsilonUnscaled = MMFF::Utils::calcUnscaledVdWWellDepth(
3883           mmffVdWParams.R_ij_starUnscaled, mmffVdWParamsIAtom,
3884           mmffVdWParamsJAtom);
3885       mmffVdWParams.R_ij_star = mmffVdWParams.R_ij_starUnscaled;
3886       mmffVdWParams.epsilon = mmffVdWParams.epsilonUnscaled;
3887       MMFF::Utils::scaleVdWParams(mmffVdWParams.R_ij_star,
3888                                   mmffVdWParams.epsilon, mmffVdW,
3889                                   mmffVdWParamsIAtom, mmffVdWParamsJAtom);
3890       res = true;
3891     }
3892   }
3893   return res;
3894 }
3895 }  // namespace MMFF
3896 }  // namespace RDKit
3897