1 //
2 // Copyright (C) 2013-2021 Paolo Tosco
3 //
4 // Copyright (C) 2004-2006 Rational Discovery LLC
5 //
6 // @@ All Rights Reserved @@
7 // This file is part of the RDKit.
8 // The contents are covered by the terms of the BSD license
9 // which is included in the file license.txt, found at the root
10 // of the RDKit source tree.
11 //
12 #include <GraphMol/RDKitBase.h>
13 #include <GraphMol/MolOps.h>
14 #include <ForceField/MMFF/Nonbonded.h>
15 #include <RDGeneral/Invariant.h>
16 #include <RDGeneral/RDLog.h>
17 #include <boost/dynamic_bitset.hpp>
18
19 #include <GraphMol/QueryOps.h>
20 #include "AtomTyper.h"
21 #include <cstdarg>
22
23 namespace RDKit {
24 namespace MMFF {
25 using namespace ForceFields::MMFF;
26 namespace DefaultParameters {
27
getMMFFProp()28 const MMFFPropCollection *getMMFFProp() {
29 static MMFFPropCollection ds_instance("");
30 return &ds_instance;
31 }
32
getMMFFArom()33 const MMFFAromCollection *getMMFFArom() {
34 static MMFFAromCollection ds_instance;
35 return &ds_instance;
36 }
37
getMMFFBndk()38 const MMFFBndkCollection *getMMFFBndk() {
39 static MMFFBndkCollection ds_instance;
40 return &ds_instance;
41 }
42
getMMFFBond()43 const MMFFBondCollection *getMMFFBond() {
44 static MMFFBondCollection ds_instance;
45 return &ds_instance;
46 }
47
getMMFFChg()48 const MMFFChgCollection *getMMFFChg() {
49 static MMFFChgCollection ds_instance;
50 return &ds_instance;
51 }
52
getMMFFDef()53 const MMFFDefCollection *getMMFFDef() {
54 static MMFFDefCollection ds_instance;
55 return &ds_instance;
56 }
57
getMMFFHerschbachLaurie()58 const MMFFHerschbachLaurieCollection *getMMFFHerschbachLaurie() {
59 static MMFFHerschbachLaurieCollection ds_instance;
60 return &ds_instance;
61 }
62
getMMFFPBCI()63 const MMFFPBCICollection *getMMFFPBCI() {
64 static MMFFPBCICollection ds_instance;
65 return &ds_instance;
66 }
67
getMMFFAngle()68 const MMFFAngleCollection *getMMFFAngle() {
69 static MMFFAngleCollection ds_instance;
70 return &ds_instance;
71 }
72
getMMFFStbn()73 const MMFFStbnCollection *getMMFFStbn() {
74 static MMFFStbnCollection ds_instance;
75 return &ds_instance;
76 }
77
getMMFFDfsb()78 const MMFFDfsbCollection *getMMFFDfsb() {
79 static MMFFDfsbCollection ds_instance;
80 return &ds_instance;
81 }
82
getMMFFCovRadPauEle()83 const MMFFCovRadPauEleCollection *getMMFFCovRadPauEle() {
84 static MMFFCovRadPauEleCollection ds_instance;
85 return &ds_instance;
86 }
87
getMMFFTor(const bool isMMFFs)88 const MMFFTorCollection *getMMFFTor(const bool isMMFFs) {
89 static MMFFTorCollection MMFF94(false, "");
90 static MMFFTorCollection MMFF94s(true, "");
91 return (isMMFFs) ? &MMFF94s : &MMFF94;
92 }
93
getMMFFOop(const bool isMMFFs)94 const MMFFOopCollection *getMMFFOop(const bool isMMFFs) {
95 static MMFFOopCollection MMFF94(false, "");
96 static MMFFOopCollection MMFF94s(true, "");
97 return (isMMFFs) ? &MMFF94s : &MMFF94;
98 }
99
getMMFFVdW()100 const MMFFVdWCollection *getMMFFVdW() {
101 static MMFFVdWCollection ds_instance;
102 return &ds_instance;
103 }
104
105 } // namespace DefaultParameters
106 class RingMembership {
107 public:
RingMembership()108 RingMembership(){};
getIsInAromaticRing() const109 bool getIsInAromaticRing() const { return d_isInAromaticRing; }
setIsInAromaticRing(bool isInAromaticRing)110 void setIsInAromaticRing(bool isInAromaticRing) {
111 d_isInAromaticRing = isInAromaticRing;
112 }
getRingIdxSet() const113 const std::set<std::uint32_t> &getRingIdxSet() const { return d_ringIdxSet; }
getRingIdxSet()114 std::set<std::uint32_t> &getRingIdxSet() { return d_ringIdxSet; }
115
116 private:
117 bool d_isInAromaticRing{false};
118 std::set<std::uint32_t> d_ringIdxSet;
119 };
120
121 class RingMembershipSize {
122 typedef std::map<unsigned int, RingMembership> RingMembershipMap;
123 typedef std::map<unsigned int, RingMembershipMap> RingSizeMembershipMap;
124
125 public:
126 static const std::uint32_t IS_AROMATIC_BIT;
127 RingMembershipSize(const ROMol &mol);
128 bool isAtomInAromaticRingOfSize(const Atom *atom,
129 const unsigned int ringSize) const;
130 bool areAtomsInSameAromaticRing(const Atom *atom1, const Atom *atom2) const;
131 bool areAtomsInSameRingOfSize(const unsigned int ringSize,
132 const unsigned int numAtoms, ...) const;
133
134 private:
135 RingSizeMembershipMap d_ringSizeMembershipMap;
136 };
137
138 const std::uint32_t RingMembershipSize::IS_AROMATIC_BIT = (1 << 31);
139
RingMembershipSize(const ROMol & mol)140 RingMembershipSize::RingMembershipSize(const ROMol &mol) {
141 static const unsigned int MAX_NUM_RINGS = (0xFFFFFFFF >> 1);
142 const RingInfo *ringInfo = mol.getRingInfo();
143 const VECT_INT_VECT &atomRings = ringInfo->atomRings();
144 PRECONDITION(atomRings.size() < MAX_NUM_RINGS, "Too many rings");
145 for (std::uint32_t ringIdx = 0; ringIdx < atomRings.size(); ++ringIdx) {
146 unsigned int ringSize = atomRings[ringIdx].size();
147 std::uint32_t ringIdxWithAromaticFlag = ringIdx;
148 bool ringIsAromatic = isRingAromatic(mol, atomRings[ringIdx]);
149 if (ringIsAromatic) {
150 ringIdxWithAromaticFlag |= IS_AROMATIC_BIT;
151 }
152 auto it = d_ringSizeMembershipMap.find(ringSize);
153 if (it == d_ringSizeMembershipMap.end()) {
154 it = d_ringSizeMembershipMap
155 .insert(std::make_pair(ringSize, RingMembershipMap()))
156 .first;
157 }
158 for (int atomIdxIt : atomRings[ringIdx]) {
159 auto it2 = it->second.find(atomIdxIt);
160 if (it2 == it->second.end()) {
161 it2 = it->second.insert(std::make_pair(atomIdxIt, RingMembership()))
162 .first;
163 }
164 it2->second.getRingIdxSet().insert(ringIdxWithAromaticFlag);
165 if (ringIsAromatic) {
166 it2->second.setIsInAromaticRing(true);
167 }
168 }
169 }
170 }
171
172 // returns true if the atom is in an aromatic ring of size ringSize
isAtomInAromaticRingOfSize(const Atom * atom,const unsigned int ringSize) const173 bool RingMembershipSize::isAtomInAromaticRingOfSize(
174 const Atom *atom, const unsigned int ringSize) const {
175 auto it = d_ringSizeMembershipMap.find(ringSize);
176 bool isAromatic = (it != d_ringSizeMembershipMap.end());
177 if (isAromatic) {
178 auto it2 = it->second.find(atom->getIdx());
179 isAromatic = (it2 != it->second.end());
180 if (isAromatic) {
181 isAromatic = it2->second.getIsInAromaticRing();
182 }
183 }
184
185 return isAromatic;
186 }
187
areAtomsInSameAromaticRing(const Atom * atom1,const Atom * atom2) const188 bool RingMembershipSize::areAtomsInSameAromaticRing(const Atom *atom1,
189 const Atom *atom2) const {
190 bool areInSameAromaticRing = false;
191
192 for (auto it = d_ringSizeMembershipMap.begin();
193 !areInSameAromaticRing && (it != d_ringSizeMembershipMap.end()); ++it) {
194 std::vector<std::uint32_t> intersectVect;
195 auto it1 = it->second.find(atom1->getIdx());
196 auto it2 = it->second.find(atom2->getIdx());
197 if ((it1 != it->second.end()) && (it2 != it->second.end())) {
198 std::set_intersection(it1->second.getRingIdxSet().begin(),
199 it1->second.getRingIdxSet().end(),
200 it2->second.getRingIdxSet().begin(),
201 it2->second.getRingIdxSet().end(),
202 std::back_inserter(intersectVect));
203 for (std::vector<std::uint32_t>::const_iterator ivIt =
204 intersectVect.begin();
205 !areInSameAromaticRing && (ivIt != intersectVect.end()); ++ivIt) {
206 areInSameAromaticRing = *ivIt & IS_AROMATIC_BIT;
207 }
208 }
209 }
210
211 return areInSameAromaticRing;
212 }
213
areAtomsInSameRingOfSize(const unsigned int ringSize,const unsigned int numAtoms,...) const214 bool RingMembershipSize::areAtomsInSameRingOfSize(const unsigned int ringSize,
215 const unsigned int numAtoms,
216 ...) const {
217 va_list atoms;
218 bool areInSameRingOfSize = false;
219
220 auto it = d_ringSizeMembershipMap.find(ringSize);
221 if (it != d_ringSizeMembershipMap.end()) {
222 va_start(atoms, numAtoms);
223 unsigned int idx1 = va_arg(atoms, const Atom *)->getIdx();
224 auto it1 = it->second.find(idx1);
225 if (it1 != it->second.end()) {
226 std::set<std::uint32_t> commonSet = it1->second.getRingIdxSet();
227 for (unsigned int i = 1; !commonSet.empty() && (i < numAtoms); ++i) {
228 areInSameRingOfSize = false;
229 unsigned int idx2 = va_arg(atoms, const Atom *)->getIdx();
230 auto it2 = it->second.find(idx2);
231 if (it2 == it->second.end()) {
232 break;
233 }
234 std::set<std::uint32_t> intersect;
235 std::set_intersection(commonSet.begin(), commonSet.end(),
236 it2->second.getRingIdxSet().begin(),
237 it2->second.getRingIdxSet().end(),
238 std::inserter(intersect, intersect.end()));
239 commonSet = intersect;
240 areInSameRingOfSize = !commonSet.empty();
241 }
242 }
243 va_end(atoms);
244 }
245
246 return areInSameRingOfSize;
247 }
248
249 // given the atomic num, this function returns the periodic
250 // table row number, starting from 0 for hydrogen
getPeriodicTableRow(const int atomicNum)251 unsigned int getPeriodicTableRow(const int atomicNum) {
252 unsigned int periodicTableRow = 0;
253
254 if ((atomicNum >= 3) && (atomicNum <= 10)) {
255 periodicTableRow = 1;
256 } else if ((atomicNum >= 11) && (atomicNum <= 18)) {
257 periodicTableRow = 2;
258 } else if ((atomicNum >= 19) && (atomicNum <= 36)) {
259 periodicTableRow = 3;
260 } else if ((atomicNum >= 37) && (atomicNum <= 54)) {
261 periodicTableRow = 4;
262 }
263
264 return periodicTableRow;
265 }
266
267 // given the atomic num, this function returns the periodic
268 // table row number, starting from 1 for helium
269 // Hydrogen has a special row number (0), while transition
270 // metals have the row number multiplied by 10
getPeriodicTableRowHL(const int atomicNum)271 unsigned int getPeriodicTableRowHL(const int atomicNum) {
272 unsigned int periodicTableRow = 0;
273
274 if (atomicNum == 2) {
275 periodicTableRow = 1;
276 } else if ((atomicNum >= 3) && (atomicNum <= 10)) {
277 periodicTableRow = 2;
278 } else if ((atomicNum >= 11) && (atomicNum <= 18)) {
279 periodicTableRow = 3;
280 } else if ((atomicNum >= 19) && (atomicNum <= 36)) {
281 periodicTableRow = 4;
282 } else if ((atomicNum >= 37) && (atomicNum <= 54)) {
283 periodicTableRow = 5;
284 }
285 if (((atomicNum >= 21) && (atomicNum <= 30)) ||
286 ((atomicNum >= 39) && (atomicNum <= 48))) {
287 periodicTableRow *= 10;
288 }
289
290 return periodicTableRow;
291 }
292
293 // given the MMFF atom type, this function returns true
294 // if it is aromatic
isAromaticAtomType(const unsigned int atomType)295 bool isAromaticAtomType(const unsigned int atomType) {
296 static const unsigned int aromatic_array[] = {
297 37, 38, 39, 44, 58, 59, 63, 64, 65, 66, 69, 76, 78, 79, 80, 81, 82};
298 const std::set<unsigned int> aromaticTypes(
299 aromatic_array,
300 aromatic_array + sizeof(aromatic_array) / sizeof(aromatic_array[0]));
301
302 return (aromaticTypes.find(atomType) != aromaticTypes.end());
303 }
304
isRingAromatic(const ROMol & mol,const INT_VECT & ringIndxVect)305 bool isRingAromatic(const ROMol &mol, const INT_VECT &ringIndxVect) {
306 bool isAromatic = true;
307 for (unsigned int i = 0; isAromatic && (i < ringIndxVect.size() - 1); ++i) {
308 isAromatic = (mol.getBondBetweenAtoms(ringIndxVect[i], ringIndxVect[i + 1])
309 ->getBondType() == Bond::AROMATIC);
310 }
311 return isAromatic;
312 }
313
314 // returns true if the atom is in a ring of size ringSize
isAtomInAromaticRingOfSize(const Atom * atom,const unsigned int ringSize)315 bool isAtomInAromaticRingOfSize(const Atom *atom, const unsigned int ringSize) {
316 bool isAromatic = false;
317 const ROMol &mol = atom->getOwningMol();
318 const VECT_INT_VECT &atomRings = mol.getRingInfo()->atomRings();
319
320 if (atom->getIsAromatic()) {
321 for (unsigned int i = 0; (!isAromatic) && (i < atomRings.size()); ++i) {
322 if ((atomRings[i].size() != ringSize) ||
323 (std::find(atomRings[i].begin(), atomRings[i].end(),
324 atom->getIdx()) == atomRings[i].end())) {
325 continue;
326 }
327 isAromatic = isRingAromatic(mol, atomRings[i]);
328 }
329 }
330
331 return isAromatic;
332 }
333
334 // returns true if the atom is an N-oxide
isAtomNOxide(const Atom * atom)335 bool isAtomNOxide(const Atom *atom) {
336 bool isNOxide = false;
337 const ROMol &mol = atom->getOwningMol();
338 ROMol::ADJ_ITER nbrIdx;
339 ROMol::ADJ_ITER endNbrs;
340
341 if ((atom->getAtomicNum() == 7) && (atom->getTotalDegree() >= 3)) {
342 // loop over neighbors
343 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
344 for (; (!isNOxide) && (nbrIdx != endNbrs); ++nbrIdx) {
345 const Atom *nbrAtom = mol[*nbrIdx];
346 isNOxide =
347 ((nbrAtom->getAtomicNum() == 8) && (nbrAtom->getTotalDegree() == 1));
348 }
349 }
350
351 return isNOxide;
352 }
353
354 // if the angle formed by atoms with indexes idx1, idx2, idx3
355 // is in a ring of {3,4} atoms returns 3 or 4, respectively;
356 // otherwise it returns 0
isAngleInRingOfSize3or4(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3)357 unsigned int isAngleInRingOfSize3or4(const ROMol &mol, const unsigned int idx1,
358 const unsigned int idx2,
359 const unsigned int idx3) {
360 unsigned int ringSize = 0;
361
362 if (mol.getBondBetweenAtoms(idx1, idx2) &&
363 mol.getBondBetweenAtoms(idx2, idx3)) {
364 if (mol.getBondBetweenAtoms(idx3, idx1)) {
365 ringSize = 3;
366 } else {
367 std::set<unsigned int> s1;
368 std::set<unsigned int> s2;
369 std::vector<int> intersect;
370 ROMol::ADJ_ITER nbrIdx;
371 ROMol::ADJ_ITER endNbrs;
372 unsigned int newIdx;
373 boost::tie(nbrIdx, endNbrs) =
374 mol.getAtomNeighbors(mol.getAtomWithIdx(idx1));
375 for (; nbrIdx != endNbrs; ++nbrIdx) {
376 newIdx = mol[*nbrIdx]->getIdx();
377 if (newIdx != idx2) {
378 s1.insert(newIdx);
379 }
380 }
381 boost::tie(nbrIdx, endNbrs) =
382 mol.getAtomNeighbors(mol.getAtomWithIdx(idx3));
383 for (; nbrIdx != endNbrs; ++nbrIdx) {
384 newIdx = mol[*nbrIdx]->getIdx();
385 if (newIdx != idx2) {
386 s2.insert(newIdx);
387 }
388 }
389 std::set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(),
390 std::back_inserter(intersect));
391 if (intersect.size()) {
392 ringSize = 4;
393 }
394 }
395 }
396
397 return ringSize;
398 }
399
400 // if the dihedral angle formed by atoms with indexes idx1,
401 // idx2, idx3, idx4 is in a ring of {4,5} atoms returns 4 or 5,
402 // respectively; otherwise it returns 0
isTorsionInRingOfSize4or5(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,const unsigned int idx4)403 unsigned int isTorsionInRingOfSize4or5(const ROMol &mol,
404 const unsigned int idx1,
405 const unsigned int idx2,
406 const unsigned int idx3,
407 const unsigned int idx4) {
408 unsigned int ringSize = 0;
409
410 if (mol.getBondBetweenAtoms(idx1, idx2) &&
411 mol.getBondBetweenAtoms(idx2, idx3) &&
412 mol.getBondBetweenAtoms(idx3, idx4)) {
413 if (mol.getBondBetweenAtoms(idx4, idx1)) {
414 ringSize = 4;
415 } else {
416 std::set<unsigned int> s1;
417 std::set<unsigned int> s2;
418 std::vector<int> intersect;
419 ROMol::ADJ_ITER nbrIdx;
420 ROMol::ADJ_ITER endNbrs;
421 unsigned int newIdx;
422 boost::tie(nbrIdx, endNbrs) =
423 mol.getAtomNeighbors(mol.getAtomWithIdx(idx1));
424 for (; nbrIdx != endNbrs; ++nbrIdx) {
425 newIdx = mol[*nbrIdx]->getIdx();
426 if (newIdx != idx2) {
427 s1.insert(newIdx);
428 }
429 }
430 boost::tie(nbrIdx, endNbrs) =
431 mol.getAtomNeighbors(mol.getAtomWithIdx(idx4));
432 for (; nbrIdx != endNbrs; ++nbrIdx) {
433 newIdx = mol[*nbrIdx]->getIdx();
434 if (newIdx != idx3) {
435 s2.insert(newIdx);
436 }
437 }
438 std::set_intersection(s1.begin(), s1.end(), s2.begin(), s2.end(),
439 std::back_inserter(intersect));
440 if (intersect.size()) {
441 ringSize = 5;
442 }
443 }
444 }
445
446 return ringSize;
447 }
448
449 // return true if atoms are in the same ring of size ringSize
areAtomsInSameRingOfSize(const ROMol & mol,const unsigned int ringSize,const unsigned int numAtoms,...)450 bool areAtomsInSameRingOfSize(const ROMol &mol, const unsigned int ringSize,
451 const unsigned int numAtoms, ...) {
452 unsigned int i;
453 bool areInSameRingOfSize = false;
454 const VECT_INT_VECT &atomRings = mol.getRingInfo()->atomRings();
455 unsigned int idx;
456 va_list atomIdxs;
457
458 for (i = 0; (!areInSameRingOfSize) && (i < atomRings.size()); ++i) {
459 if (atomRings[i].size() != ringSize) {
460 continue;
461 }
462 areInSameRingOfSize = true;
463 va_start(atomIdxs, numAtoms);
464 for (unsigned int j = 0; areInSameRingOfSize && (j < numAtoms); ++j) {
465 idx = va_arg(atomIdxs, unsigned int);
466 areInSameRingOfSize = (std::find(atomRings[i].begin(), atomRings[i].end(),
467 idx) != atomRings[i].end());
468 }
469 va_end(atomIdxs);
470 }
471
472 return areInSameRingOfSize;
473 }
474
475 // return true if atoms are in the same aromatic ring
areAtomsInSameAromaticRing(const ROMol & mol,const unsigned int idx1,const unsigned int idx2)476 bool areAtomsInSameAromaticRing(const ROMol &mol, const unsigned int idx1,
477 const unsigned int idx2) {
478 unsigned int i;
479 unsigned int j;
480 bool areInSameAromatic = false;
481 const VECT_INT_VECT &atomRings = mol.getRingInfo()->atomRings();
482
483 if (mol.getAtomWithIdx(idx1)->getIsAromatic() &&
484 mol.getAtomWithIdx(idx2)->getIsAromatic()) {
485 for (i = 0; (!areInSameAromatic) && (i < atomRings.size()); ++i) {
486 if ((std::find(atomRings[i].begin(), atomRings[i].end(), idx1) !=
487 atomRings[i].end()) &&
488 (std::find(atomRings[i].begin(), atomRings[i].end(), idx2) !=
489 atomRings[i].end())) {
490 areInSameAromatic = true;
491 for (j = 0; areInSameAromatic && (j < (atomRings[i].size() - 1)); ++j) {
492 areInSameAromatic =
493 (mol.getBondBetweenAtoms(atomRings[i][j], atomRings[i][j + 1])
494 ->getBondType() == Bond::AROMATIC);
495 }
496 }
497 }
498 }
499
500 return areInSameAromatic;
501 }
502
503 // sets the aromaticity flags according to MMFF
setMMFFAromaticity(RWMol & mol)504 void setMMFFAromaticity(RWMol &mol) {
505 bool moveToNextRing = false;
506 bool isNOSinRing = false;
507 bool aromRingsAllSet = false;
508 bool exoDoubleBond = false;
509 bool canBeAromatic = false;
510 unsigned int i;
511 unsigned int j;
512 unsigned int nextInRing;
513 unsigned int pi_e = 0;
514 int nAromSet = 0;
515 int old_nAromSet = -1;
516 RingInfo *ringInfo = mol.getRingInfo();
517 Atom *atom;
518 Bond *bond;
519 const VECT_INT_VECT &atomRings = ringInfo->atomRings();
520 ROMol::ADJ_ITER nbrIdx;
521 ROMol::ADJ_ITER endNbrs;
522 boost::dynamic_bitset<> aromBitVect(mol.getNumAtoms());
523 boost::dynamic_bitset<> aromRingBitVect(atomRings.size());
524
525 while ((!aromRingsAllSet) && atomRings.size() && (nAromSet > old_nAromSet)) {
526 // loop over all rings
527 for (i = 0; i < atomRings.size(); ++i) {
528 // add 2 pi electrons for each double bond in the ring
529 for (j = 0, pi_e = 0, moveToNextRing = false, isNOSinRing = false,
530 exoDoubleBond = false;
531 (!moveToNextRing) && (j < atomRings[i].size()); ++j) {
532 atom = mol.getAtomWithIdx(atomRings[i][j]);
533 // remember if this atom is nitrogen, oxygen or divalent sulfur
534 if ((atom->getAtomicNum() == 7) || (atom->getAtomicNum() == 8) ||
535 ((atom->getAtomicNum() == 16) && (atom->getDegree() == 2))) {
536 isNOSinRing = true;
537 }
538 // check whether this atom is double-bonded to next one in the ring
539 nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0]
540 : atomRings[i][j + 1];
541 if (mol.getBondBetweenAtoms(atomRings[i][j], nextInRing)
542 ->getBondType() == Bond::DOUBLE) {
543 pi_e += 2;
544 }
545 // if this is not a double bond, check whether this is carbon
546 // or nitrogen with total bond order = 4
547 else {
548 atom = mol.getAtomWithIdx(atomRings[i][j]);
549 // if not, move on
550 if ((atom->getAtomicNum() != 6) &&
551 (!((atom->getAtomicNum() == 7) &&
552 ((atom->getExplicitValence() + atom->getNumImplicitHs()) ==
553 4)))) {
554 continue;
555 }
556 // loop over neighbors
557 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
558 for (; nbrIdx != endNbrs; ++nbrIdx) {
559 const Atom *nbrAtom = mol[*nbrIdx];
560 // if the neighbor is one of the ring atoms, skip it
561 // since we are looking for exocyclic neighbors
562 if (std::find(atomRings[i].begin(), atomRings[i].end(),
563 nbrAtom->getIdx()) != atomRings[i].end()) {
564 continue;
565 }
566 // it the neighbor is single-bonded, skip it
567 if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx())
568 ->getBondType() == Bond::SINGLE) {
569 continue;
570 }
571 // if the neighbor is in a ring and its aromaticity
572 // bit has not yet been set, then move to the next ring
573 // we'll take care of this later
574 if (queryIsAtomInRing(nbrAtom) &&
575 (!(aromBitVect[nbrAtom->getIdx()]))) {
576 moveToNextRing = true;
577 break;
578 }
579 // if the neighbor is in an aromatic ring and is
580 // double-bonded to the current atom, add 1 pi electron
581 if (mol.getBondBetweenAtoms(atomRings[i][j], nbrAtom->getIdx())
582 ->getBondType() == Bond::DOUBLE) {
583 if (nbrAtom->getIsAromatic()) {
584 ++pi_e;
585 } else {
586 exoDoubleBond = true;
587 }
588 }
589 }
590 }
591 }
592 // if we quit the loop at an early stage because aromaticity
593 // had not yet been set, then move to the next ring
594 if (moveToNextRing) {
595 continue;
596 }
597 // loop again over all ring atoms
598 for (j = 0, canBeAromatic = true; j < atomRings[i].size(); ++j) {
599 // set aromaticity as perceived
600 aromBitVect[atomRings[i][j]] = 1;
601 atom = mol.getAtomWithIdx(atomRings[i][j]);
602 // if this is is a non-sp2 carbon or nitrogen
603 // then this ring can't be aromatic
604 if (((atom->getAtomicNum() == 6) || (atom->getAtomicNum() == 7)) &&
605 (atom->getHybridization() != Atom::SP2)) {
606 canBeAromatic = false;
607 }
608 }
609 // if this ring can't be aromatic, move to the next one
610 if (!canBeAromatic) {
611 continue;
612 }
613 // if there is N, O, S; no exocyclic double bonds;
614 // the ring has an odd number of terms: add 2 pi electrons
615 if (isNOSinRing && (!exoDoubleBond) && (atomRings[i].size() % 2)) {
616 pi_e += 2;
617 }
618 // if this ring satisfies the 4n+2 rule,
619 // then mark its atoms as aromatic
620 if ((pi_e > 2) && (!((pi_e - 2) % 4))) {
621 aromRingBitVect[i] = 1;
622 for (j = 0; j < atomRings[i].size(); ++j) {
623 atom = mol.getAtomWithIdx(atomRings[i][j]);
624 atom->setIsAromatic(true);
625 }
626 }
627 }
628 // termination criterion: if we did not manage to set any more
629 // aromatic atoms compared to the previous iteration, then
630 // stop looping
631 old_nAromSet = nAromSet;
632 nAromSet = 0;
633 aromRingsAllSet = true;
634 for (i = 0; i < atomRings.size(); ++i) {
635 for (j = 0; j < atomRings[i].size(); ++j) {
636 if (aromBitVect[atomRings[i][j]]) {
637 ++nAromSet;
638 } else {
639 aromRingsAllSet = false;
640 }
641 }
642 }
643 }
644 for (i = 0; i < atomRings.size(); ++i) {
645 // if the ring is not aromatic, move to the next one
646 if (!aromRingBitVect[i]) {
647 continue;
648 }
649 for (j = 0; j < atomRings[i].size(); ++j) {
650 // mark all ring bonds as aromatic
651 nextInRing = (j == (atomRings[i].size() - 1)) ? atomRings[i][0]
652 : atomRings[i][j + 1];
653 bond = mol.getBondBetweenAtoms(atomRings[i][j], nextInRing);
654 bond->setBondType(Bond::AROMATIC);
655 bond->setIsAromatic(true);
656 }
657 }
658 for (i = 0; i < atomRings.size(); ++i) {
659 // if the ring is not aromatic, move to the next one
660 if (!aromRingBitVect[i]) {
661 continue;
662 }
663 for (j = 0; j < atomRings[i].size(); ++j) {
664 atom = mol.getAtomWithIdx(atomRings[i][j]);
665 if (atom->getAtomicNum() != 6) {
666 int iv = atom->calcImplicitValence(false);
667 atom->calcExplicitValence(false);
668 if (iv) {
669 atom->setNumExplicitHs(iv);
670 atom->calcImplicitValence(false);
671 }
672 }
673 }
674 }
675 }
676
677 // sets the MMFF atomType for a heavy atom
setMMFFHeavyAtomType(const RingMembershipSize & rmSize,const Atom * atom)678 void MMFFMolProperties::setMMFFHeavyAtomType(const RingMembershipSize &rmSize,
679 const Atom *atom) {
680 unsigned int atomType = 0;
681 unsigned int i;
682 unsigned int j;
683 unsigned int nTermObondedToN = 0;
684 bool alphaOrBetaInSameRing = false;
685 bool isAlphaOS = false;
686 bool isBetaOS = false;
687 bool isNSO2orNSO3orNCN = false;
688 const ROMol &mol = atom->getOwningMol();
689 RingInfo *ringInfo = mol.getRingInfo();
690 ROMol::ADJ_ITER nbrIdx;
691 ROMol::ADJ_ITER endNbrs;
692 ROMol::ADJ_ITER nbr2Idx;
693 ROMol::ADJ_ITER end2Nbrs;
694 ROMol::ADJ_ITER nbr3Idx;
695 ROMol::ADJ_ITER end3Nbrs;
696 std::vector<const Atom *> alphaHet;
697 std::vector<const Atom *> betaHet;
698
699 if (atom->getIsAromatic()) {
700 if (rmSize.isAtomInAromaticRingOfSize(atom, 5)) {
701 // 5-membered aromatic rings
702 // if ipso is carbon or nitrogen, find eventual alpha and beta heteroatoms
703 if ((atom->getAtomicNum() == 6) || (atom->getAtomicNum() == 7)) {
704 // loop over alpha neighbors
705 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
706 for (; nbrIdx != endNbrs; ++nbrIdx) {
707 const Atom *nbrAtom = mol[*nbrIdx];
708 // if the alpha neighbor is not in a 5-membered aromatic
709 // ring, skip to the next neighbor
710 if (!rmSize.isAtomInAromaticRingOfSize(nbrAtom, 5)) {
711 continue;
712 }
713 // if the alpha neighbor belongs to the same ring of ipso atom
714 // and it is either oxygen, sulfur, or non-N-oxide trivalent nitrogen,
715 // add it to the alpha atom vector
716 if (rmSize.areAtomsInSameRingOfSize(5, 2, atom, nbrAtom) &&
717 ((nbrAtom->getAtomicNum() == 8) ||
718 (nbrAtom->getAtomicNum() == 16) ||
719 ((nbrAtom->getAtomicNum() == 7) &&
720 (nbrAtom->getTotalDegree() == 3) &&
721 (!isAtomNOxide(nbrAtom))))) {
722 alphaHet.push_back(nbrAtom);
723 }
724 // loop over beta neighbors
725 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
726 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
727 const Atom *nbr2Atom = mol[*nbr2Idx];
728 // if we have gone back to the ipso atom, move on
729 if (nbr2Atom->getIdx() == atom->getIdx()) {
730 continue;
731 }
732 // if the beta neighbor is not in a 5-membered aromatic
733 // ring, skip to the next neighbor
734 if (!rmSize.isAtomInAromaticRingOfSize(nbr2Atom, 5)) {
735 continue;
736 }
737 // if the beta neighbor belongs to the same ring of ipso atom
738 // and it is either oxygen, sulfur, or non-N-oxide trivalent
739 // nitrogen,
740 // add it to the beta atom vector
741 if (rmSize.areAtomsInSameRingOfSize(5, 2, atom, nbr2Atom) &&
742 ((nbr2Atom->getAtomicNum() == 8) ||
743 (nbr2Atom->getAtomicNum() == 16) ||
744 ((nbr2Atom->getAtomicNum() == 7) &&
745 (nbr2Atom->getTotalDegree() == 3) &&
746 (!isAtomNOxide(nbr2Atom))))) {
747 betaHet.push_back(nbr2Atom);
748 }
749 }
750 }
751 isAlphaOS = false;
752 for (i = 0; (!isAlphaOS) && (i < alphaHet.size()); ++i) {
753 isAlphaOS = ((alphaHet[i]->getAtomicNum() == 8) ||
754 (alphaHet[i]->getAtomicNum() == 16));
755 }
756 isBetaOS = false;
757 for (i = 0; (!isBetaOS) && (i < betaHet.size()); ++i) {
758 isBetaOS = ((betaHet[i]->getAtomicNum() == 8) ||
759 (betaHet[i]->getAtomicNum() == 16));
760 }
761 if (alphaHet.size() && betaHet.size()) {
762 // do alpha and beta heteroatoms belong to the same ring?
763 for (i = 0; (!alphaOrBetaInSameRing) && (i < alphaHet.size()); ++i) {
764 for (j = 0; (!alphaOrBetaInSameRing) && (j < betaHet.size()); ++j) {
765 alphaOrBetaInSameRing = rmSize.areAtomsInSameRingOfSize(
766 5, 2, alphaHet[i], betaHet[j]);
767 }
768 }
769 }
770 }
771
772 switch (atom->getAtomicNum()) {
773 // Carbon
774 case 6:
775 // if there are no beta heteroatoms
776 if (!(betaHet.size())) {
777 // count how many 3-neighbor nitrogens we have
778 // to be CIM+, there must be at least two such nitrogens,
779 // one of which in a 5-membered aromatic ring and none
780 // in a 6-membered aromatic ring; additionally, one
781 // one of the hydrogens must be protonated
782 unsigned int nN = 0;
783 unsigned int nFormalCharge = 0;
784 unsigned int nInAromatic5Ring = 0;
785 unsigned int nInAromatic6Ring = 0;
786 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
787 for (; nbrIdx != endNbrs; ++nbrIdx) {
788 const Atom *nbrAtom = mol[*nbrIdx];
789 if ((nbrAtom->getAtomicNum() == 7) &&
790 (nbrAtom->getTotalDegree() == 3)) {
791 ++nN;
792 if ((nbrAtom->getFormalCharge() > 0) &&
793 (!isAtomNOxide(nbrAtom))) {
794 ++nFormalCharge;
795 }
796 if (rmSize.isAtomInAromaticRingOfSize(nbrAtom, 5)) {
797 ++nInAromatic5Ring;
798 }
799 if (rmSize.isAtomInAromaticRingOfSize(nbrAtom, 6)) {
800 ++nInAromatic6Ring;
801 }
802 }
803 }
804 if ((((nN == 2) && nInAromatic5Ring) ||
805 ((nN == 3) && (nInAromatic5Ring == 2))) &&
806 nFormalCharge && (!nInAromatic6Ring)) {
807 // CIM+
808 // Aromatic carbon between N's in imidazolium
809 atomType = 80;
810 break;
811 }
812 }
813 // if there are neither alpha nor beta heteroatoms
814 // or if there are both, but they belong to different rings
815 if (alphaHet.size() == betaHet.size()) {
816 bool surroundedByBenzeneC = true;
817 bool surroundedByArom = true;
818 // loop over neighbors
819 // are all neighbors aromatic?
820 // are all neighbors benzene carbons?
821 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
822 for (; nbrIdx != endNbrs; ++nbrIdx) {
823 const Atom *nbrAtom = mol[*nbrIdx];
824
825 if ((nbrAtom->getAtomicNum() != 6) ||
826 (!(ringInfo->isAtomInRingOfSize(nbrAtom->getIdx(), 6)))) {
827 surroundedByBenzeneC = false;
828 }
829 if (rmSize.areAtomsInSameRingOfSize(5, 2, atom, nbrAtom) &&
830 (!(nbrAtom->getIsAromatic()))) {
831 surroundedByArom = false;
832 }
833 }
834 // if there are no alpha and beta heteroatoms and
835 // all neighbors are aromatic but not all of them
836 // benzene carbons, or if there are alpha and beta
837 // atoms but they belong to different rings, or if
838 // there are alpha and beta heteroatoms but no alpha
839 // oxygen or sulfur, then it's C5
840 if (((!(alphaHet.size())) && (!(betaHet.size())) &&
841 (!surroundedByBenzeneC) && surroundedByArom) ||
842 (alphaHet.size() && betaHet.size() &&
843 ((!alphaOrBetaInSameRing) || ((!isAlphaOS) && (!isBetaOS))))) {
844 // C5
845 // General carbon in 5-membered heteroaromatic ring
846 atomType = 78;
847 break;
848 }
849 }
850 if (alphaHet.size() && ((!(betaHet.size())) || isAlphaOS)) {
851 // C5A
852 // Aromatic 5-ring C, alpha to N:, O: or S:
853 atomType = 63;
854 break;
855 }
856 if (betaHet.size() && ((!(alphaHet.size())) || isBetaOS)) {
857 // C5B
858 // Aromatic 5-ring C, alpha to N:, O: or S:
859 atomType = 64;
860 break;
861 }
862 break;
863
864 // Nitrogen
865 case 7:
866 if (isAtomNOxide(atom)) {
867 // N5AX
868 // N-oxide nitrogen in 5-ring alpha position
869 // N5BX
870 // N-oxide nitrogen in 5-ring beta position
871 // N5OX
872 // N-oxide nitrogen in other 5-ring position
873 atomType = 82;
874 break;
875 }
876 // if there are neither alpha nor beta heteroatoms
877 if ((!(alphaHet.size())) && (!(betaHet.size()))) {
878 // if it is nitrogen
879 // if valence is 3, it's pyrrole nitrogen
880 if (atom->getTotalDegree() == 3) {
881 // NPYL
882 // Aromatic 5-ring nitrogen with pi lone pair
883 atomType = 39;
884 break;
885 }
886 // otherwise it is anionic
887 // N5M
888 // Nitrogen in 5-ring aromatic anion
889 atomType = 76;
890 break;
891 }
892 if ((atom->getTotalDegree() == 3) &&
893 alphaHet.size() != betaHet.size()) {
894 // NIM+
895 // Aromatic nitrogen in imidazolium
896 // N5A+
897 // Positive nitrogen in 5-ring alpha position
898 // N5B+
899 // Positive nitrogen in 5-ring beta position
900 // N5+
901 // Positive nitrogen in other 5-ring position
902 atomType = 81;
903 break;
904 }
905 // if there are alpha heteroatoms and either no beta heteroatoms
906 // or no alpha oxygen/sulfur
907 if (alphaHet.size() && ((!(betaHet.size())) || isAlphaOS)) {
908 // N5A
909 // Aromatic 5-ring N, alpha to N:, O: or S:
910 atomType = 65;
911 break;
912 }
913 // if there are beta heteroatoms and either no alpha heteroatoms
914 // or no beta oxygen/sulfur
915 if (betaHet.size() && ((!(alphaHet.size())) || isBetaOS)) {
916 // N5B
917 // Aromatic 5-ring N, beta to N:, O: or S:
918 atomType = 66;
919 break;
920 }
921 // if there are both alpha and beta heteroatoms
922 if (alphaHet.size() && betaHet.size()) {
923 // N5
924 // General nitrogen in 5-memebered heteroaromatic ring
925 atomType = 79;
926 break;
927 }
928 break;
929
930 // Oxygen
931 case 8:
932 // OFUR
933 // Aromatic 5-ring oxygen with pi lone pair
934 atomType = 59;
935 break;
936
937 // Sulfur
938 case 16:
939 // STHI
940 // Aromatic 5-ring sulfur with pi lone pair
941 atomType = 44;
942 break;
943 }
944 }
945
946 if (!atomType && (rmSize.isAtomInAromaticRingOfSize(atom, 6))) {
947 // 6-membered aromatic rings
948 switch (atom->getAtomicNum()) {
949 // Carbon
950 case 6:
951 // CB
952 // Aromatic carbon, e.g., in benzene
953 atomType = 37;
954 break;
955
956 // Nitrogen
957 case 7:
958 if (isAtomNOxide(atom)) {
959 // NPOX
960 // Pyridinium N-oxide nitrogen
961 atomType = 69;
962 break;
963 }
964 if (atom->getTotalDegree() == 3) {
965 // NPD+
966 // Aromatic nitrogen in pyridinium
967 atomType = 58;
968 break;
969 }
970 // NPYD
971 // Aromatic nitrogen with sigma lone pair
972 atomType = 38;
973 break;
974 }
975 }
976 }
977
978 if (!atomType) {
979 // Aliphatic heavy atom types
980
981 switch (atom->getAtomicNum()) {
982 // Lithium
983 case 3:
984 if (atom->getDegree() == 0) {
985 // LI+
986 // Lithium cation
987 atomType = 92;
988 break;
989 }
990 break;
991
992 // Carbon
993 case 6:
994 // 4 neighbors
995 if (atom->getTotalDegree() == 4) {
996 if (ringInfo->isAtomInRingOfSize(atom->getIdx(), 3)) {
997 // CR3R
998 // Aliphatic carbon in 3-membered ring
999 atomType = 22;
1000 break;
1001 }
1002 if (ringInfo->isAtomInRingOfSize(atom->getIdx(), 4)) {
1003 // CR4R
1004 // Aliphatic carbon in 4-membered ring
1005 atomType = 20;
1006 break;
1007 }
1008 // CR
1009 // Alkyl carbon
1010 atomType = 1;
1011 break;
1012 }
1013 // 3 neighbors
1014 if (atom->getTotalDegree() == 3) {
1015 unsigned int nN2 = 0;
1016 unsigned int nN3 = 0;
1017 unsigned int nO = 0;
1018 unsigned int nS = 0;
1019 unsigned int doubleBondedElement = 0;
1020 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1021 for (; nbrIdx != endNbrs; ++nbrIdx) {
1022 const Atom *nbrAtom = mol[*nbrIdx];
1023 // find if there is a double-bonded element
1024 if ((mol.getBondBetweenAtoms(nbrAtom->getIdx(), atom->getIdx()))
1025 ->getBondType() == Bond::DOUBLE) {
1026 doubleBondedElement = nbrAtom->getAtomicNum();
1027 }
1028 // count how many terminal oxygen/sulfur atoms
1029 // are bonded to ipso
1030 if (nbrAtom->getTotalDegree() == 1) {
1031 if (nbrAtom->getAtomicNum() == 8) {
1032 ++nO;
1033 } else if (nbrAtom->getAtomicNum() == 16) {
1034 ++nS;
1035 }
1036 } else if (nbrAtom->getAtomicNum() == 7) {
1037 // count how many nitrogens with 3 neighbors
1038 // are bonded to ipso
1039 if (nbrAtom->getTotalDegree() == 3) {
1040 ++nN3;
1041 }
1042 // count how many nitrogens with 2 neighbors
1043 // are double-bonded to ipso
1044 else if ((nbrAtom->getTotalDegree() == 2) &&
1045 ((mol.getBondBetweenAtoms(nbrAtom->getIdx(),
1046 atom->getIdx()))
1047 ->getBondType() == Bond::DOUBLE)) {
1048 ++nN2;
1049 }
1050 }
1051 }
1052 // if there are two or more nitrogens with 3 neighbors each,
1053 // and there are no nitrogens with two neighbors only,
1054 // and carbon is double-bonded to nitrogen
1055 if ((nN3 >= 2) && (!nN2) && (doubleBondedElement == 7)) {
1056 // CNN+
1057 // Carbon in +N=C-N: resonance structures
1058 // CGD+
1059 // Guanidinium carbon
1060 atomType = 57;
1061 break;
1062 }
1063 // if there are two terminal oxygen/sulfur atoms
1064 if ((nO == 2) || (nS == 2)) {
1065 // CO2M
1066 // Carbon in carboxylate anion
1067 // CS2M
1068 // Carbon in thiocarboxylate anion
1069 atomType = 41;
1070 break;
1071 }
1072 // if this carbon is in a 4-membered ring and
1073 // is double-bonded to another carbon
1074 if (ringInfo->isAtomInRingOfSize(atom->getIdx(), 4) &&
1075 (doubleBondedElement == 6)) {
1076 // CR4E
1077 // Olefinic carbon in 4-membered ring
1078 atomType = 30;
1079 break;
1080 }
1081 // if this carbon is is double-bonded to nitrogen,
1082 // oxygen, phosphorus or sulfur
1083 if ((doubleBondedElement == 7) || (doubleBondedElement == 8) ||
1084 (doubleBondedElement == 15) || (doubleBondedElement == 16)) {
1085 // C=N
1086 // Imine-atomType carbon
1087 // CGD
1088 // Guanidine carbon
1089 // C=O
1090 // Generic carbonyl carbon
1091 // C=OR
1092 // Ketone or aldehyde carbonyl carbon
1093 // C=ON
1094 // Amide carbonyl carbon
1095 // COO
1096 // Carboxylic acid or ester carbonyl carbon
1097 // COON
1098 // Carbamate carbonyl carbon
1099 // COOO
1100 // Carbonic acid or ester carbonyl function
1101 // C=OS
1102 // Thioester carbonyl carbon, double bonded to O
1103 // C=P
1104 // Carbon doubly bonded to P
1105 // C=S
1106 // Thioester carbon, double bonded to S
1107 // C=SN
1108 // Thioamide carbon, double bonded to S
1109 // CSO2
1110 // Carbon in >C=SO2
1111 // CS=O
1112 // Sulfinyl carbon in >C=S=O
1113 // CSS
1114 // Thiocarboxylic acid or ester carbon
1115 atomType = 3;
1116 break;
1117 }
1118 // otherwise it must be generic sp2 carbon
1119 // C=C
1120 // Vinylic carbon
1121 // CSP2
1122 // Generic sp2 carbon
1123 atomType = 2;
1124 break;
1125 }
1126 // 2 neighbors
1127 if (atom->getTotalDegree() == 2) {
1128 // CSP
1129 // Acetylenic carbon
1130 // =C=
1131 // Allenic carbon
1132 atomType = 4;
1133 break;
1134 }
1135 // 1 neighbor
1136 if (atom->getTotalDegree() == 1) {
1137 // C%-
1138 // Isonitrile carbon
1139 atomType = 60;
1140 break;
1141 }
1142 break;
1143
1144 // Nitrogen
1145 case 7:
1146 // if the neighbor is phosphorus or sulfur
1147 // count the number of terminal oxygens bonded
1148 // to that phosphorus or sulfur atom
1149 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1150 for (; nbrIdx != endNbrs; ++nbrIdx) {
1151 const Atom *nbrAtom = mol[*nbrIdx];
1152 // count how many terminal oxygen atoms
1153 // are bonded to ipso
1154 if ((nbrAtom->getAtomicNum() == 8) &&
1155 (nbrAtom->getTotalDegree() == 1)) {
1156 ++nTermObondedToN;
1157 }
1158 if (((atom->getExplicitValence() + atom->getNumImplicitHs()) >= 3) &&
1159 ((nbrAtom->getAtomicNum() == 15) ||
1160 (nbrAtom->getAtomicNum() == 16))) {
1161 unsigned int nObondedToSP = 0;
1162 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1163 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
1164 const Atom *nbr2Atom = mol[*nbr2Idx];
1165 if ((nbr2Atom->getAtomicNum() == 8) &&
1166 (nbr2Atom->getTotalDegree() == 1)) {
1167 ++nObondedToSP;
1168 }
1169 }
1170 // if there are two or more oxygens, ipso is a sulfonamide nitrogen
1171 if (!isNSO2orNSO3orNCN) {
1172 isNSO2orNSO3orNCN = (nObondedToSP >= 2);
1173 }
1174 }
1175 }
1176 // 4 neighbors
1177 if (atom->getTotalDegree() == 4) {
1178 if (isAtomNOxide(atom)) {
1179 // N3OX
1180 // sp3-hybridized N-oxide nitrogen
1181 atomType = 68;
1182 break;
1183 }
1184 // NR+
1185 // Quaternary nitrogen
1186 atomType = 34;
1187 break;
1188 }
1189 // 3 neighbors
1190 if (atom->getTotalDegree() == 3) {
1191 // total bond order >= 4
1192 if ((atom->getExplicitValence() + atom->getNumImplicitHs()) >= 4) {
1193 bool doubleBondedCN = false;
1194 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1195 for (; nbrIdx != endNbrs; ++nbrIdx) {
1196 const Atom *nbrAtom = mol[*nbrIdx];
1197 // find if there is a double-bonded nitrogen,
1198 // or a carbon which is not bonded to other
1199 // nitrogen atoms with 3 neighbors
1200 if ((mol.getBondBetweenAtoms(nbrAtom->getIdx(), atom->getIdx()))
1201 ->getBondType() == Bond::DOUBLE) {
1202 doubleBondedCN = ((nbrAtom->getAtomicNum() == 7) ||
1203 (nbrAtom->getAtomicNum() == 6));
1204 if (nbrAtom->getAtomicNum() == 6) {
1205 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1206 for (; doubleBondedCN && (nbr2Idx != end2Nbrs); ++nbr2Idx) {
1207 const Atom *nbr2Atom = mol[*nbr2Idx];
1208 if (nbr2Atom->getIdx() == atom->getIdx()) {
1209 continue;
1210 }
1211 doubleBondedCN = (!((nbr2Atom->getAtomicNum() == 7) &&
1212 (nbr2Atom->getTotalDegree() == 3)));
1213 }
1214 }
1215 }
1216 }
1217 // if there is a single terminal oxygen
1218 if (nTermObondedToN == 1) {
1219 // N2OX
1220 // sp2-hybridized N-oxide nitrogen
1221 atomType = 67;
1222 break;
1223 }
1224 // if there are two or more terminal oxygens
1225 if (nTermObondedToN >= 2) {
1226 // NO2
1227 // Nitrogen in nitro group
1228 // NO3
1229 // Nitrogen in nitrate group
1230 atomType = 45;
1231 break;
1232 }
1233 // if the carbon bonded to ipso is bonded to 1 nitrogen
1234 // with 3 neighbors, that nitrogen is ipso (>N+=C)
1235 // alternatively, if there is no carbon but ipso is
1236 // double bonded to nitrogen, we have >N+=N
1237 if (doubleBondedCN) {
1238 // N+=C
1239 // Iminium nitrogen
1240 // N+=N
1241 // Positively charged nitrogen doubly bonded to N
1242 atomType = 54;
1243 break;
1244 }
1245 }
1246 // total bond order >= 3
1247 if ((atom->getExplicitValence() + atom->getNumImplicitHs()) >= 3) {
1248 bool isNCOorNCS = false;
1249 bool isNCNplus = false;
1250 bool isNGDplus = false;
1251 bool isNNNorNNC = false;
1252 bool isNbrC = false;
1253 bool isNbrBenzeneC = false;
1254 unsigned int elementDoubleBondedToC = 0;
1255 unsigned int elementTripleBondedToC = 0;
1256 unsigned int nN2bondedToC = 0;
1257 unsigned int nN3bondedToC = 0;
1258 unsigned int nObondedToC = 0;
1259 unsigned int nSbondedToC = 0;
1260 // loop over neighbors
1261 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1262 for (; nbrIdx != endNbrs; ++nbrIdx) {
1263 const Atom *nbrAtom = mol[*nbrIdx];
1264 // if the neighbor is carbon
1265 if (nbrAtom->getAtomicNum() == 6) {
1266 isNbrC = true;
1267 // check if we have a benzene carbon close to ipso
1268 if (nbrAtom->getIsAromatic() &&
1269 ringInfo->isAtomInRingOfSize(nbrAtom->getIdx(), 6)) {
1270 isNbrBenzeneC = true;
1271 }
1272 nN2bondedToC = 0;
1273 nN3bondedToC = 0;
1274 nObondedToC = 0;
1275 nSbondedToC = 0;
1276 unsigned int nFormalCharge = 0;
1277 unsigned int nInAromatic6Ring = 0;
1278 // loop over carbon neighbors
1279 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1280 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
1281 const Atom *nbr2Atom = mol[*nbr2Idx];
1282 const Bond *bond = mol.getBondBetweenAtoms(
1283 nbrAtom->getIdx(), nbr2Atom->getIdx());
1284 // check if we have oxygen or sulfur double-bonded to this
1285 // carbon
1286 if ((bond->getBondType() == Bond::DOUBLE) &&
1287 ((nbr2Atom->getAtomicNum() == 8) ||
1288 (nbr2Atom->getAtomicNum() == 16))) {
1289 isNCOorNCS = true;
1290 }
1291 // check if there is an atom double-bonded to this carbon,
1292 // and if so find which element; if it is carbon or
1293 // nitrogen (provided that the latter does not belong to
1294 // multiple rings), also an aromatic bond is acceptable
1295 if ((bond->getBondType() == Bond::DOUBLE) ||
1296 ((bond->getBondType() == Bond::AROMATIC) &&
1297 ((nbr2Atom->getAtomicNum() == 6) ||
1298 ((nbr2Atom->getAtomicNum() == 7) &&
1299 (queryIsAtomInNRings(nbr2Atom) == 1))))) {
1300 elementDoubleBondedToC = nbr2Atom->getAtomicNum();
1301 }
1302 // check there is an atom triple-bonded to this carbon,
1303 // and if so find which element
1304 if (bond->getBondType() == Bond::TRIPLE) {
1305 elementTripleBondedToC = nbr2Atom->getAtomicNum();
1306 }
1307 // if this carbon is bonded to a nitrogen with 3 neighbors
1308 if ((nbr2Atom->getAtomicNum() == 7) &&
1309 (nbr2Atom->getTotalDegree() == 3)) {
1310 // count the number of +1 formal charges that we have
1311 if (nbr2Atom->getFormalCharge() == 1) {
1312 ++nFormalCharge;
1313 }
1314 if (rmSize.isAtomInAromaticRingOfSize(nbrAtom, 6)) {
1315 ++nInAromatic6Ring;
1316 }
1317 // count how many oxygens are bonded to this nitrogen
1318 // with 3 neighbors
1319 unsigned int nObondedToN3 = 0;
1320 boost::tie(nbr3Idx, end3Nbrs) =
1321 mol.getAtomNeighbors(nbr2Atom);
1322 for (; nbr3Idx != end3Nbrs; ++nbr3Idx) {
1323 const Atom *nbr3Atom = mol[*nbr3Idx];
1324 if (nbr3Atom->getAtomicNum() == 8) {
1325 ++nObondedToN3;
1326 }
1327 }
1328 // if there are less than 2 oxygens, this is neither
1329 // a nitro group nor a nitrate, so increment the counter
1330 // of nitrogens with 3 neighbors bonded to this carbon
1331 // (C-N<)
1332 if (nObondedToN3 < 2) {
1333 ++nN3bondedToC;
1334 }
1335 }
1336 // if this carbon is bonded to a nitrogen with 2 neighbors
1337 // via a double or aromatic bond, increment the counter
1338 // of nitrogens with 2 neighbors bonded to this carbon
1339 // via a double or aromatic bond (C=N-)
1340 if ((nbr2Atom->getAtomicNum() == 7) &&
1341 (nbr2Atom->getTotalDegree() == 2) &&
1342 ((bond->getBondType() == Bond::DOUBLE) ||
1343 (bond->getBondType() == Bond::AROMATIC))) {
1344 ++nN2bondedToC;
1345 }
1346 // if this carbon is bonded to an aromatic atom
1347 if (nbr2Atom->getIsAromatic()) {
1348 // if it is oxygen, increment the counter of
1349 // aromatic oxygen atoms bonded to this carbon
1350 if (nbr2Atom->getAtomicNum() == 8) {
1351 ++nObondedToC;
1352 }
1353 // if it is sulfur, increment the counter of
1354 // aromatic sulfur atoms bonded to this carbon
1355 if (nbr2Atom->getAtomicNum() == 16) {
1356 ++nSbondedToC;
1357 }
1358 }
1359 }
1360 // if nitrogen is bonded to this carbon via a double or aromatic
1361 // bond
1362 if (elementDoubleBondedToC == 7) {
1363 // if 2 nitrogens with 3 neighbors and no nitrogens with 2
1364 // neighbors
1365 // are bonded to this carbon, and we have a formal charge,
1366 // but not a 6-membered aromatic ring, and the carbon atom
1367 // is not sp3, then this is an amidinium nitrogen (>N-C=N+<)
1368 if ((nN3bondedToC == 2) && (!nN2bondedToC) && nFormalCharge &&
1369 (!nInAromatic6Ring) && (nbrAtom->getTotalDegree() < 4)) {
1370 isNCNplus = true;
1371 }
1372 // if 3 nitrogens with 3 neighbors are bonded
1373 // to this carbon, then this is a guanidinium nitrogen
1374 // ((>N-)2-C=N+<)
1375 if (nN3bondedToC == 3) {
1376 isNGDplus = true;
1377 }
1378 }
1379 }
1380 // if the neighbor is nitrogen
1381 if (nbrAtom->getAtomicNum() == 7) {
1382 unsigned int nNbondedToN = 0;
1383 unsigned int nObondedToN = 0;
1384 unsigned int nSbondedToN = 0;
1385 // loop over nitrogen neighbors
1386 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1387 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
1388 const Atom *nbr2Atom = mol[*nbr2Idx];
1389 const Bond *bond = mol.getBondBetweenAtoms(
1390 nbrAtom->getIdx(), nbr2Atom->getIdx());
1391 // if the bond to nitrogen is double
1392 if (bond->getBondType() == Bond::DOUBLE) {
1393 // if the neighbor is carbon (N=N-C)
1394 if (nbr2Atom->getAtomicNum() == 6) {
1395 // loop over carbon neighbors
1396 boost::tie(nbr3Idx, end3Nbrs) =
1397 mol.getAtomNeighbors(nbr2Atom);
1398 for (; nbr3Idx != end3Nbrs; ++nbr3Idx) {
1399 const Atom *nbr3Atom = mol[*nbr3Idx];
1400 // if the nitrogen neighbor to ipso is met, move on
1401 if (nbr3Atom->getIdx() == nbrAtom->getIdx()) {
1402 continue;
1403 }
1404 // count how many nitrogen, oxygen, sulfur atoms
1405 // are bonded to this carbon
1406 switch (nbr3Atom->getAtomicNum()) {
1407 case 7:
1408 ++nNbondedToN;
1409 break;
1410 case 8:
1411 ++nObondedToN;
1412 break;
1413 case 16:
1414 ++nSbondedToN;
1415 break;
1416 }
1417 }
1418 // if there are no more nitrogens, no oxygen, no sulfur
1419 // bonded
1420 // to carbon, and the latter is not a benzene carbon
1421 // then it is N=N-C
1422 if ((!nObondedToN) && (!nSbondedToN) && (!nNbondedToN) &&
1423 (!isNbrBenzeneC)) {
1424 isNNNorNNC = true;
1425 }
1426 }
1427 // if the neighbor is nitrogen (N=N-N) and ipso is not
1428 // bonded
1429 // to benzene carbon then it is N=N-N
1430 if ((nbr2Atom->getAtomicNum() == 7) && (!isNbrBenzeneC)) {
1431 isNNNorNNC = true;
1432 }
1433 }
1434 }
1435 }
1436 }
1437 // if ipso nitrogen is bonded to carbon
1438 if (isNbrC) {
1439 // if neighbor carbon is triple-bonded to N, then ipso is N-C%N
1440 if (elementTripleBondedToC == 7) {
1441 isNSO2orNSO3orNCN = true;
1442 }
1443 // if neighbor carbon is amidinium
1444 if (isNCNplus) {
1445 // NCN+
1446 // Either nitrogen in N+=C-N
1447 atomType = 55;
1448 break;
1449 }
1450 // if neighbor carbon is guanidinium
1451 if (isNGDplus) {
1452 // NGD+
1453 // Guanidinium nitrogen
1454 atomType = 56;
1455 break;
1456 }
1457 // if neighbor carbon is not bonded to oxygen or sulfur
1458 // and is not cyano, there two possibilities:
1459 // 1) ipso nitrogen is bonded to benzene carbon while no oxygen
1460 // or sulfur are bonded to the latter: ipso is aniline nitrogen
1461 // 2) ipso nitrogen is bonded to a carbon which is double-bonded
1462 // to
1463 // carbon, nitrogen or phosphorus, or triple-bonded to carbon
1464 if (((!isNCOorNCS) && (!isNSO2orNSO3orNCN)) &&
1465 (((!nObondedToC) && (!nSbondedToC) && isNbrBenzeneC) ||
1466 ((elementDoubleBondedToC == 6) ||
1467 (elementDoubleBondedToC == 7) ||
1468 (elementDoubleBondedToC == 15) ||
1469 (elementTripleBondedToC == 6)))) {
1470 // NC=C
1471 // Enamine or aniline nitrogen, deloc. lp
1472 // NC=N
1473 // Nitrogen in N-C=N with deloc. lp
1474 // NC=P
1475 // Nitrogen in N-C=P with deloc. lp
1476 // NC%C
1477 // Nitrogen attached to C-C triple bond
1478 atomType = 40;
1479 break;
1480 }
1481 }
1482 // if ipso is not sulfonamide while it is either amide/thioamide
1483 // or >N-N=N-/>N-N=C<
1484 if ((!isNSO2orNSO3orNCN) && (isNCOorNCS || isNNNorNNC)) {
1485 // NC=O
1486 // Amide nitrogen
1487 // NC=S
1488 // Thioamide nitrogen
1489 // NN=C
1490 // Nitrogen in N-N=C moiety with deloc. lp
1491 // NN=N
1492 // Nitrogen in N-N=N moiety with deloc. lp
1493 atomType = 10;
1494 break;
1495 }
1496 }
1497 }
1498 // 2 neighbors
1499 if (atom->getTotalDegree() == 2) {
1500 // total bond order = 4
1501 if ((atom->getExplicitValence() + atom->getNumImplicitHs()) == 4) {
1502 // loop over neighbors
1503 bool isIsonitrile = false;
1504 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1505 for (; (!isIsonitrile) && (nbrIdx != endNbrs); ++nbrIdx) {
1506 const Atom *nbrAtom = mol[*nbrIdx];
1507 // if neighbor is triple-bonded
1508 isIsonitrile =
1509 ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
1510 ->getBondType() == Bond::TRIPLE);
1511 }
1512 if (isIsonitrile) {
1513 // NR%
1514 // Isonitrile nitrogen
1515 atomType = 61;
1516 break;
1517 }
1518 // =N=
1519 // Central nitrogen in C=N=N or N=N=N
1520 atomType = 53;
1521 break;
1522 }
1523 // total bond order = 3
1524 if ((atom->getExplicitValence() + atom->getNumImplicitHs()) == 3) {
1525 // loop over neighbors
1526 bool isNitroso = false;
1527 bool isImineOrAzo = false;
1528 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1529 for (; nbrIdx != endNbrs; ++nbrIdx) {
1530 const Atom *nbrAtom = mol[*nbrIdx];
1531 // if the neighbor is double bonded (-N=)
1532 if ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
1533 ->getBondType() == Bond::DOUBLE) {
1534 // if it is terminal oxygen (-N=O)
1535 isNitroso =
1536 ((nbrAtom->getAtomicNum() == 8) && (nTermObondedToN == 1));
1537 // if it is carbon or nitrogen (-N=N-, -N=C<),
1538 // ipso is imine or azo
1539 isImineOrAzo = ((nbrAtom->getAtomicNum() == 6) ||
1540 (nbrAtom->getAtomicNum() == 7));
1541 }
1542 }
1543 if (isNitroso && (!isImineOrAzo)) {
1544 // N=O
1545 // Nitrogen in nitroso group
1546 atomType = 46;
1547 break;
1548 }
1549 if (isImineOrAzo) {
1550 // N=C
1551 // Imine nitrogen
1552 // N=N
1553 // Azo-group nitrogen
1554 atomType = 9;
1555 break;
1556 }
1557 }
1558 // total bond order >= 2
1559 if ((atom->getExplicitValence() + atom->getNumImplicitHs()) >= 2) {
1560 // loop over neighbors
1561 bool isNSO = false;
1562 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1563 for (; (!isNSO) && (nbrIdx != endNbrs); ++nbrIdx) {
1564 const Atom *nbrAtom = mol[*nbrIdx];
1565 // if the neighbor is sulfur bonded to a single terminal oxygen
1566 if (nbrAtom->getAtomicNum() == 16) {
1567 // loop over neighbors and count how many
1568 // terminal oxygens are bonded to sulfur
1569 unsigned int nTermObondedToS = 0;
1570 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1571 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
1572 const Atom *nbr2Atom = mol[*nbr2Idx];
1573 if ((nbr2Atom->getAtomicNum() == 8) &&
1574 (nbr2Atom->getTotalDegree() == 1)) {
1575 ++nTermObondedToS;
1576 }
1577 }
1578 isNSO = (nTermObondedToS == 1);
1579 }
1580 }
1581 if (isNSO) {
1582 // NSO
1583 // Divalent nitrogen replacing monovalent O in SO2 group
1584 atomType = 48;
1585 break;
1586 }
1587 if (!isNSO2orNSO3orNCN) {
1588 // If it is not sulfonamide deprotonated nitrogen,
1589 // it is anionic nitrogen (>N::-)
1590 // NM
1591 // Anionic divalent nitrogen
1592 atomType = 62;
1593 break;
1594 }
1595 }
1596 }
1597 // if it is sulfonamide (3 neighbors) or cyano (2 neighbors)
1598 if (isNSO2orNSO3orNCN) {
1599 // NSO2
1600 // Sulfonamide nitrogen
1601 // NSO3
1602 // Sulfonamide nitrogen
1603 // NC%N
1604 // Nitrogen attached to cyano group
1605 atomType = 43;
1606 break;
1607 }
1608 // 1 neighbor
1609 if (atom->getTotalDegree() == 1) {
1610 bool isNSP = false;
1611 bool isNAZT = false;
1612 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1613 for (; (!isNSP) && (!isNAZT) && (nbrIdx != endNbrs); ++nbrIdx) {
1614 const Atom *nbrAtom = mol[*nbrIdx];
1615 // if ipso is triple-bonded to its only neighbor
1616 isNSP =
1617 ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
1618 ->getBondType() == Bond::TRIPLE);
1619 // ipso is bonded to a nitrogen atom with 2 neighbors
1620 if ((nbrAtom->getAtomicNum() == 7) &&
1621 (nbrAtom->getTotalDegree() == 2)) {
1622 // loop over nitrogen neighbors
1623 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1624 for (; (!isNAZT) && (nbr2Idx != end2Nbrs); ++nbr2Idx) {
1625 const Atom *nbr2Atom = mol[*nbr2Idx];
1626 // if another nitrogen with 2 neighbors, or a carbon
1627 // with 3 neighbors is found, ipso is NAZT
1628 isNAZT = (((nbr2Atom->getAtomicNum() == 7) &&
1629 (nbr2Atom->getTotalDegree() == 2)) ||
1630 ((nbr2Atom->getAtomicNum() == 6) &&
1631 (nbr2Atom->getTotalDegree() == 3)));
1632 }
1633 }
1634 }
1635 if (isNSP) {
1636 // NSP
1637 // Triply bonded nitrogen
1638 atomType = 42;
1639 break;
1640 }
1641 if (isNAZT) {
1642 // NAZT
1643 // Terminal nitrogen in azido or diazo group
1644 atomType = 47;
1645 break;
1646 }
1647 }
1648 // if nothing else was found
1649 // NR
1650 // Amine nitrogen
1651 atomType = 8;
1652 break;
1653
1654 // Oxygen
1655 case 8:
1656 // 3 neighbors
1657 if (atom->getTotalDegree() == 3) {
1658 // O+
1659 // Oxonium oxygen
1660 atomType = 49;
1661 break;
1662 }
1663 // 2 neighbors
1664 if (atom->getTotalDegree() == 2) {
1665 if ((atom->getExplicitValence() + atom->getNumImplicitHs()) == 3) {
1666 // O=+
1667 // Oxenium oxygen
1668 atomType = 51;
1669 break;
1670 }
1671 // count how many hydrogens are bound to ipso
1672 unsigned int nHbondedToO = 0;
1673 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1674 for (; nbrIdx != endNbrs; ++nbrIdx) {
1675 const Atom *nbrAtom = mol[*nbrIdx];
1676 if (nbrAtom->getAtomicNum() == 1) {
1677 ++nHbondedToO;
1678 }
1679 }
1680 if ((nHbondedToO + atom->getNumImplicitHs()) == 2) {
1681 // OH2
1682 // Oxygen in water
1683 atomType = 70;
1684 break;
1685 }
1686 // otherwise, ipso must be one of the following
1687 // OC=O
1688 // Carboxylic acid or ester oxygen
1689 // OC=C
1690 // Enolic or phenolic oxygen
1691 // OC=N
1692 // Oxygen in -O-C=N- moiety
1693 // OC=S
1694 // Divalent oxygen in thioacid or ester
1695 // ONO2
1696 // Divalent nitrate "ether" oxygen
1697 // ON=O
1698 // Divalent nitrate "ether" oxygen
1699 // OSO3
1700 // Divalent oxygen in sulfate group
1701 // OSO2
1702 // Divalent oxygen in sulfite group
1703 // OSO
1704 // One of two divalent oxygens attached to sulfur
1705 // OS=O
1706 // Divalent oxygen in R(RO)S=O
1707 // -OS
1708 // Other divalent oxygen attached to sulfur
1709 // OPO3
1710 // Divalent oxygen in phosphate group
1711 // OPO2
1712 // Divalent oxygen in phosphite group
1713 // OPO
1714 // Divalent oxygen, one of two oxygens attached to P
1715 // -OP
1716 // Other divalent oxygen attached to phosphorus
1717 atomType = 6;
1718 break;
1719 }
1720 // 1 neighbor
1721 if (atom->getDegree() <= 1) {
1722 unsigned int nNbondedToCorNorS = 0;
1723 unsigned int nObondedToCorNorS = 0;
1724 unsigned int nSbondedToCorNorS = 0;
1725 bool isOxideOBondedToH =
1726 atom->getNumExplicitHs() + atom->getNumImplicitHs() > 0;
1727 bool isCarboxylateO = false;
1728 bool isCarbonylO = false;
1729 bool isOxideOBondedToC = false;
1730 bool isNitrosoO = false;
1731 bool isOxideOBondedToN = false;
1732 bool isNOxideO = false;
1733 bool isNitroO = false;
1734 bool isThioSulfinateO = false;
1735 bool isSulfateO = false;
1736 bool isSulfoxideO = false;
1737 bool isPhosphateOrPerchlorateO = false;
1738 // loop over neighbors
1739 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1740 for (; (nbrIdx != endNbrs) && (!isOxideOBondedToC) &&
1741 (!isOxideOBondedToN) && (!isOxideOBondedToH) &&
1742 (!isCarboxylateO) && (!isNitroO) && (!isNOxideO) &&
1743 (!isThioSulfinateO) && (!isSulfateO) &&
1744 (!isPhosphateOrPerchlorateO) && (!isCarbonylO) &&
1745 (!isNitrosoO) && (!isSulfoxideO);
1746 ++nbrIdx) {
1747 const Atom *nbrAtom = mol[*nbrIdx];
1748 const Bond *bond =
1749 mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx());
1750 // if the neighbor is carbon, nitrogen or sulfur
1751 if ((nbrAtom->getAtomicNum() == 6) ||
1752 (nbrAtom->getAtomicNum() == 7) ||
1753 (nbrAtom->getAtomicNum() == 16)) {
1754 // count how many terminal oxygen/sulfur atoms
1755 // or secondary nitrogens
1756 // are bonded to the carbon or nitrogen neighbor of ipso
1757 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
1758 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
1759 const Atom *nbr2Atom = mol[*nbr2Idx];
1760 if ((nbr2Atom->getAtomicNum() == 7) &&
1761 (nbr2Atom->getTotalDegree() == 2)) {
1762 ++nNbondedToCorNorS;
1763 }
1764 if ((nbr2Atom->getAtomicNum() == 8) &&
1765 (nbr2Atom->getTotalDegree() == 1)) {
1766 ++nObondedToCorNorS;
1767 }
1768 if ((nbr2Atom->getAtomicNum() == 16) &&
1769 (nbr2Atom->getTotalDegree() == 1)) {
1770 ++nSbondedToCorNorS;
1771 }
1772 }
1773 }
1774 // if ipso neighbor is hydrogen
1775 isOxideOBondedToH = (nbrAtom->getAtomicNum() == 1);
1776
1777 // if ipso neighbor is carbon
1778 if (nbrAtom->getAtomicNum() == 6) {
1779 // if carbon neighbor is bonded to 2 oxygens,
1780 // ipso is carboxylate oxygen
1781 isCarboxylateO = (nObondedToCorNorS == 2);
1782 // if ipso oxygen is bonded to carbon
1783 // via a double bond, ipso is carbonyl oxygen
1784 isCarbonylO = (bond->getBondType() == Bond::DOUBLE);
1785 // if ipso oxygen is bonded to carbon via a
1786 // single bond, and there are no other bonded oxygens,
1787 // ipso is oxide oxygen
1788 isOxideOBondedToC = ((bond->getBondType() == Bond::SINGLE) &&
1789 (nObondedToCorNorS == 1));
1790 }
1791
1792 // if ipso neighbor is nitrogen
1793 if (nbrAtom->getAtomicNum() == 7) {
1794 // if ipso oxygen is bonded to nitrogen
1795 // via a double bond, ipso is nitroso oxygen
1796 isNitrosoO = (bond->getBondType() == Bond::DOUBLE);
1797 // if ipso oxygen is bonded to nitrogen via a single bond
1798 // and there are no other bonded oxygens
1799 if ((bond->getBondType() == Bond::SINGLE) &&
1800 (nObondedToCorNorS == 1)) {
1801 // if nitrogen has 2 neighbors or, if the neighbors are 3,
1802 // the total bond order on nitrogen is 3, ipso is oxide oxygen
1803 isOxideOBondedToN = ((nbrAtom->getTotalDegree() == 2) ||
1804 ((nbrAtom->getExplicitValence() +
1805 nbrAtom->getNumImplicitHs()) == 3));
1806 // if the total bond order on nitrogen is 4, ipso is N-oxide
1807 // oxygen
1808 isNOxideO = ((nbrAtom->getExplicitValence() +
1809 nbrAtom->getNumImplicitHs()) == 4);
1810 }
1811 // if ipso oxygen is bonded to nitrogen which is bonded
1812 // to multiple oxygens, ipso is nitro/nitrate oxygen
1813 isNitroO = (nObondedToCorNorS >= 2);
1814 }
1815
1816 // if ipso neighbor is sulfur
1817 if (nbrAtom->getAtomicNum() == 16) {
1818 // if ipso oxygen is bonded to sulfur and
1819 // the latter is bonded to another sulfur,
1820 // ipso is thiosulfinate oxygen
1821 isThioSulfinateO = (nSbondedToCorNorS == 1);
1822 // if ipso oxygen is bonded to sulfur via a single
1823 // bond or, if the bond is double, there are multiple
1824 // oxygen/nitrogen atoms bonded to that sulfur,
1825 // ipso is sulfate oxygen
1826 isSulfateO = ((bond->getBondType() == Bond::SINGLE) ||
1827 ((bond->getBondType() == Bond::DOUBLE) &&
1828 ((nObondedToCorNorS + nNbondedToCorNorS) > 1)));
1829 // if ipso oxygen is bonded to sulfur via a double
1830 // bond and the sum of oxygen/nitrogen atoms bonded
1831 // to that sulfur is 1, ipso is sulfoxide oxygen
1832 isSulfoxideO = ((bond->getBondType() == Bond::DOUBLE) &&
1833 ((nObondedToCorNorS + nNbondedToCorNorS) == 1));
1834 }
1835
1836 // if ipso neighbor is phosphorus or chlorine
1837 isPhosphateOrPerchlorateO = ((nbrAtom->getAtomicNum() == 15) ||
1838 (nbrAtom->getAtomicNum() == 17));
1839 }
1840 if (isOxideOBondedToC || isOxideOBondedToN || isOxideOBondedToH) {
1841 // OM
1842 // Oxide oxygen on sp3 carbon
1843 // OM2
1844 // Oxide oxygen on sp2 carbon
1845 // OM
1846 // Oxide oxygen on sp3 nitrogen (not in original MMFF.I Table III)
1847 // OM2
1848 // Oxide oxygen on sp2 nitrogen (not in original MMFF.I Table III)
1849 atomType = 35;
1850 break;
1851 }
1852 if (isCarboxylateO || isNitroO || isNOxideO || isThioSulfinateO ||
1853 isSulfateO || isPhosphateOrPerchlorateO) {
1854 // O2CM
1855 // Oxygen in carboxylate group
1856 // ONX
1857 // Oxygen in N-oxides
1858 // O2N
1859 // Oxygen in nitro group
1860 // O2NO
1861 // Nitro-group oxygen in nitrate
1862 // O3N
1863 // Nitrate anion oxygen
1864 // OSMS
1865 // Terminal oxygen in thiosulfinate anion
1866 // O-S
1867 // Single terminal O on tetracoordinate sulfur
1868 // O2S
1869 // One of 2 terminal O's on sulfur
1870 // O3S
1871 // One of 3 terminal O's on sulfur
1872 // O4S
1873 // Terminal O in sulfate anion
1874 // OP
1875 // Oxygen in phosphine oxide
1876 // O2P
1877 // One of 2 terminal O's on P
1878 // O3P
1879 // One of 3 terminal O's on P
1880 // O4P
1881 // One of 4 terminal O's on P
1882 // O4Cl
1883 // Oxygen in perchlorate anion
1884 atomType = 32;
1885 break;
1886 }
1887 if (isCarbonylO || isNitrosoO || isSulfoxideO) {
1888 // O=C
1889 // Generic carbonyl oxygen
1890 // O=CN
1891 // Carbonyl oxygen in amides
1892 // O=CR
1893 // Carbonyl oxygen in aldehydes and ketones
1894 // O=CO
1895 // Carbonyl oxygen in acids and esters
1896 // O=N
1897 // Nitroso oxygen
1898 // O=S
1899 // Doubly bonded sulfoxide oxygen
1900 atomType = 7;
1901 break;
1902 }
1903 }
1904 break;
1905
1906 // Fluorine
1907 case 9:
1908 // 1 neighbor
1909 if (atom->getDegree() == 1) {
1910 // F
1911 // Fluorine
1912 atomType = 11;
1913 break;
1914 }
1915 if (atom->getDegree() == 0) {
1916 // F-
1917 // Fluoride anion
1918 atomType = 89;
1919 break;
1920 }
1921 break;
1922
1923 // Sodium
1924 case 11:
1925 if (atom->getDegree() == 0) {
1926 // NA+
1927 // Sodium cation
1928 atomType = 93;
1929 break;
1930 }
1931 break;
1932
1933 // Magnesium
1934 case 12:
1935 if (atom->getDegree() == 0) {
1936 // MG+2
1937 // Dipositive magnesium cation
1938 atomType = 99;
1939 break;
1940 }
1941 break;
1942
1943 // Silicon
1944 case 14:
1945 // SI
1946 // Silicon
1947 atomType = 19;
1948 break;
1949
1950 // Phosphorus
1951 case 15:
1952 if (atom->getTotalDegree() == 4) {
1953 // PO4
1954 // Phosphate group phosphorus
1955 // PO3
1956 // Phosphorus with 3 attached oxygens
1957 // PO2
1958 // Phosphorus with 2 attached oxygens
1959 // PO
1960 // Phosphine oxide phosphorus
1961 // PTET
1962 // General tetracoordinate phosphorus
1963 atomType = 25;
1964 break;
1965 }
1966 if (atom->getTotalDegree() == 3) {
1967 // P
1968 // Phosphorus in phosphines
1969 atomType = 26;
1970 break;
1971 }
1972 if (atom->getTotalDegree() == 2) {
1973 // -P=C
1974 // Phosphorus doubly bonded to C
1975 atomType = 75;
1976 break;
1977 }
1978 break;
1979
1980 // Sulfur
1981 case 16:
1982 // 3 or 4 neighbors
1983 if ((atom->getTotalDegree() == 3) || (atom->getTotalDegree() == 4)) {
1984 unsigned int nOorNbondedToS = 0;
1985 unsigned int nSbondedToS = 0;
1986 bool isCDoubleBondedToS = false;
1987 // loop over neighbors
1988 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
1989 for (; nbrIdx != endNbrs; ++nbrIdx) {
1990 const Atom *nbrAtom = mol[*nbrIdx];
1991 // check if ipso sulfur is double-bonded to carbon
1992 if ((nbrAtom->getAtomicNum() == 6) &&
1993 ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
1994 ->getBondType() == Bond::DOUBLE)) {
1995 isCDoubleBondedToS = true;
1996 }
1997 // if the neighbor is terminal oxygen/sulfur
1998 // or secondary nitrogen, increment the respective counter
1999 if (((nbrAtom->getDegree() == 1) &&
2000 (nbrAtom->getAtomicNum() == 8)) ||
2001 ((nbrAtom->getTotalDegree() == 2) &&
2002 (nbrAtom->getAtomicNum() == 7))) {
2003 ++nOorNbondedToS;
2004 }
2005 if ((nbrAtom->getDegree() == 1) &&
2006 (nbrAtom->getAtomicNum() == 16)) {
2007 ++nSbondedToS;
2008 }
2009 }
2010 // if ipso sulfur has 3 neighbors and is bonded to
2011 // two atoms of oxygen/nitrogen and double-bonded
2012 // to carbon, or if it has 4 neighbors
2013 if (((atom->getTotalDegree() == 3) && (nOorNbondedToS == 2) &&
2014 (isCDoubleBondedToS)) ||
2015 (atom->getTotalDegree() == 4)) {
2016 // =SO2
2017 // Sulfone sulfur, doubly bonded to carbon
2018 atomType = 18;
2019 break;
2020 }
2021 // if ipso sulfur is bonded to both oxygen/nitrogen and sulfur
2022 if ((nOorNbondedToS && nSbondedToS) ||
2023 ((nOorNbondedToS == 2) && (!isCDoubleBondedToS))) {
2024 // SSOM
2025 // Tricoordinate sulfur in anionic thiosulfinate group
2026 atomType = 73;
2027 break;
2028 }
2029 // otherwise ipso sulfur is double bonded to oxygen or nitrogen
2030 // S=O
2031 // Sulfoxide sulfur
2032 // >S=N
2033 // Tricoordinate sulfur doubly bonded to N
2034 atomType = 17;
2035 break;
2036 }
2037 // 2 neighbors
2038 if (atom->getTotalDegree() == 2) {
2039 // loop over neighbors
2040 bool isODoubleBondedToS = false;
2041 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
2042 for (; nbrIdx != endNbrs; ++nbrIdx) {
2043 const Atom *nbrAtom = mol[*nbrIdx];
2044 // check if ipso sulfur is double-bonded to oxygen
2045 if ((nbrAtom->getAtomicNum() == 8) &&
2046 ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
2047 ->getBondType() == Bond::DOUBLE)) {
2048 isODoubleBondedToS = true;
2049 }
2050 }
2051 // if ipso sulfur is double-bonded to oxygen
2052 if (isODoubleBondedToS) {
2053 // =S=O
2054 // Sulfinyl sulfur, e.g., in C=S=O
2055 atomType = 74;
2056 break;
2057 }
2058 // otherwise it is a thiol, sulfide or disulfide
2059 // S
2060 // Thiol, sulfide, or disulfide sulfur
2061 atomType = 15;
2062 break;
2063 }
2064 // 1 neighbor
2065 if (atom->getDegree() == 1) {
2066 unsigned int nTermSbondedToNbr = 0;
2067 bool isCDoubleBondedToS = false;
2068 // find the neighbor and count how many terminal sulfur
2069 // atoms are there, including ipso
2070 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
2071 for (; nbrIdx != endNbrs; ++nbrIdx) {
2072 const Atom *nbrAtom = mol[*nbrIdx];
2073 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
2074 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
2075 const Atom *nbr2Atom = mol[*nbr2Idx];
2076 if ((nbr2Atom->getAtomicNum() == 16) &&
2077 (nbr2Atom->getTotalDegree() == 1)) {
2078 ++nTermSbondedToNbr;
2079 }
2080 }
2081 // check if ipso sulfur is double-bonded to carbon
2082 if ((nbrAtom->getAtomicNum() == 6) &&
2083 ((mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx()))
2084 ->getBondType() == Bond::DOUBLE)) {
2085 isCDoubleBondedToS = true;
2086 }
2087 }
2088 // if ipso sulfur is double bonded to carbon and the latter
2089 // is not bonded to other terminal sulfur atoms, then it is
2090 // not a dithiocarboxylate, but a thioketone, etc.
2091 if (isCDoubleBondedToS && (nTermSbondedToNbr != 2)) {
2092 // S=C
2093 // Sulfur doubly bonded to carbon
2094 atomType = 16;
2095 break;
2096 }
2097 // otherwise ipso must be one of these
2098 // S-P
2099 // Terminal sulfur bonded to P
2100 // SM
2101 // Anionic terminal sulfur
2102 // SSMO
2103 // Terminal sulfur in thiosulfinate group
2104 atomType = 72;
2105 break;
2106 }
2107 break;
2108
2109 // Chlorine
2110 case 17:
2111 // 4 neighbors
2112 if (atom->getTotalDegree() == 4) {
2113 // loop over neighbors and count the number
2114 // of bonded oxygens
2115 unsigned int nObondedToCl = 0;
2116 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
2117 for (; nbrIdx != endNbrs; ++nbrIdx) {
2118 const Atom *nbrAtom = mol[*nbrIdx];
2119 if (nbrAtom->getAtomicNum() == 8) {
2120 ++nObondedToCl;
2121 }
2122 }
2123 // if there are 4 oxygens
2124 if (nObondedToCl == 4) {
2125 // CLO4
2126 // Perchlorate anione chlorine
2127 atomType = 77;
2128 break;
2129 }
2130 }
2131 // 1 neighbor
2132 if (atom->getTotalDegree() == 1) {
2133 // Cl
2134 // Chlorine
2135 atomType = 12;
2136 break;
2137 }
2138 // 0 neighbors
2139 if (atom->getDegree() == 0) {
2140 // Cl-
2141 // Chloride anion
2142 atomType = 90;
2143 break;
2144 }
2145 break;
2146
2147 // Potassium
2148 case 19:
2149 if (atom->getDegree() == 0) {
2150 // K+
2151 // Potassium cation
2152 atomType = 94;
2153 break;
2154 }
2155 break;
2156
2157 // Calcium
2158 case 20:
2159 if (atom->getDegree() == 0) {
2160 // CA+2
2161 // Dipositive calcium cation
2162 atomType = 96;
2163 break;
2164 }
2165 break;
2166
2167 // Iron
2168 case 26:
2169 if (atom->getDegree() == 0) {
2170 if (atom->getFormalCharge() == 2) {
2171 // FE+2
2172 // Dipositive iron cation
2173 atomType = 87;
2174 break;
2175 }
2176 if (atom->getFormalCharge() == 3) {
2177 // FE+3
2178 // Tripositive iron cation
2179 atomType = 88;
2180 break;
2181 }
2182 }
2183 break;
2184
2185 // Copper
2186 case 29:
2187 if (atom->getDegree() == 0) {
2188 if (atom->getFormalCharge() == 1) {
2189 // CU+1
2190 // Monopositive copper cation
2191 atomType = 97;
2192 break;
2193 }
2194 if (atom->getFormalCharge() == 2) {
2195 // CU+2
2196 // Dipositive copper cation
2197 atomType = 98;
2198 break;
2199 }
2200 }
2201 break;
2202
2203 // Zinc
2204 case 30:
2205 if (atom->getDegree() == 0) {
2206 // ZN+2
2207 // Dipositive zinc cation
2208 atomType = 95;
2209 break;
2210 }
2211 break;
2212
2213 // Bromine
2214 case 35:
2215 if (atom->getDegree() == 1) {
2216 // Br
2217 // Bromine
2218 atomType = 13;
2219 break;
2220 }
2221 if (atom->getDegree() == 0) {
2222 // BR-
2223 // Bromide anion
2224 atomType = 91;
2225 break;
2226 }
2227 break;
2228
2229 // Iodine
2230 case 53:
2231 if (atom->getDegree() == 1) {
2232 // I
2233 // Iodine
2234 atomType = 14;
2235 break;
2236 }
2237 break;
2238 }
2239 }
2240 d_MMFFAtomPropertiesPtrVect[atom->getIdx()]->mmffAtomType = atomType;
2241 if (!atomType) {
2242 d_valid = false;
2243 }
2244 }
2245
2246 // finds the MMFF atomType for a hydrogen atom
setMMFFHydrogenType(const Atom * atom)2247 void MMFFMolProperties::setMMFFHydrogenType(const Atom *atom) {
2248 unsigned int atomType = 0;
2249 bool isHOCCorHOCN = false;
2250 bool isHOCO = false;
2251 bool isHOP = false;
2252 bool isHOS = false;
2253 const ROMol &mol = atom->getOwningMol();
2254 ROMol::ADJ_ITER nbrIdx;
2255 ROMol::ADJ_ITER endNbrs;
2256 ROMol::ADJ_ITER nbr2Idx;
2257 ROMol::ADJ_ITER end2Nbrs;
2258 ROMol::ADJ_ITER nbr3Idx;
2259 ROMol::ADJ_ITER end3Nbrs;
2260
2261 // loop over neighbors (actually there can be only one)
2262 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
2263 for (; nbrIdx != endNbrs; ++nbrIdx) {
2264 const Atom *nbrAtom = mol[*nbrIdx];
2265 switch (nbrAtom->getAtomicNum()) {
2266 // carbon, silicon
2267 case 6:
2268 case 14:
2269 // HC
2270 // Hydrogen attached to carbon
2271 // HSI
2272 // Hydrogen attached to silicon
2273 atomType = 5;
2274 break;
2275
2276 // nitrogen
2277 case 7:
2278 switch (this->getMMFFAtomType(nbrAtom->getIdx())) {
2279 case 8:
2280 // HNR
2281 // Generic hydrogen on sp3 nitrogen, e.g. in amines
2282 // H3N
2283 // Hydrogen in ammonia
2284 case 39:
2285 // HPYL
2286 // Hydrogen on nitrogen in pyrrole
2287 case 62:
2288 // HNR
2289 // Generic hydrogen on sp3 nitrogen, e.g. in amines
2290 case 67:
2291 case 68:
2292 // HNOX
2293 // Hydrogen on N in a N-oxide
2294 atomType = 23;
2295 break;
2296
2297 case 34:
2298 // NR+
2299 // Quaternary nitrogen
2300 case 54:
2301 // N+=C
2302 // Iminium nitrogen
2303 // N+=N
2304 // Positively charged nitrogen doubly bonded to N
2305 case 55:
2306 // HNN+
2307 // Hydrogen on amidinium nitrogen
2308 case 56:
2309 // HGD+
2310 // Hydrogen on guanidinium nitrogen
2311 case 58:
2312 // NPD+
2313 // Aromatic nitrogen in pyridinium
2314 case 81:
2315 // HIM+
2316 // Hydrogen on imidazolium nitrogen
2317 atomType = 36;
2318 break;
2319
2320 case 9:
2321 // HN=N
2322 // Hydrogen on azo nitrogen
2323 // HN=C
2324 // Hydrogen on imine nitrogen
2325 atomType = 27;
2326 break;
2327
2328 default:
2329 // HNCC
2330 // Hydrogen on enamine nitrogen
2331 // HNCN
2332 // Hydrogen in H-N-C=N moiety
2333 // HNCO
2334 // Hydrogen on amide nitrogen
2335 // HNCS
2336 // Hydrogen on thioamide nitrogen
2337 // HNNC
2338 // Hydrogen in H-N-N=C moiety
2339 // HNNN
2340 // Hydrogen in H-N-N=N moiety
2341 // HNSO
2342 // Hydrogen on NSO, NSO2, or NSO3 nitrogen
2343 // HNC%
2344 // Hydrogen on N triply bonded to C
2345 // HSP2
2346 // Generic hydrogen on sp2 nitrogen
2347 atomType = 28;
2348 break;
2349 }
2350 break;
2351
2352 // oxygen
2353 case 8:
2354 switch (this->getMMFFAtomType(nbrAtom->getIdx())) {
2355 case 49:
2356 // HO+
2357 // Hydrogen on oxonium oxygen
2358 atomType = 50;
2359 break;
2360
2361 case 51:
2362 // HO=+
2363 // Hydrogen on oxenium oxygen
2364 atomType = 52;
2365 break;
2366
2367 case 70:
2368 // HOH
2369 // Hydroxyl hydrogen in water
2370 atomType = 31;
2371 break;
2372
2373 case 6:
2374 // for hydrogen bonded to atomType 6 oxygen we need to distinguish
2375 // among acidic hydrogens belonging to carboxylic/phospho acids,
2376 // enolic/phenolic/hydroxamic hydrogens and hydrogens whose oxygen
2377 // partner is bonded to sulfur. If none of these is found
2378 // it is either an alcohol or a generic hydroxyl hydrogen
2379 // loop over oxygen neighbors
2380 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
2381 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
2382 const Atom *nbr2Atom = mol[*nbr2Idx];
2383 // if the neighbor of oxygen is carbon, loop over the carbon
2384 // neighbors
2385 if (nbr2Atom->getAtomicNum() == 6) {
2386 boost::tie(nbr3Idx, end3Nbrs) = mol.getAtomNeighbors(nbr2Atom);
2387 for (; nbr3Idx != end3Nbrs; ++nbr3Idx) {
2388 const Atom *nbr3Atom = mol[*nbr3Idx];
2389 const Bond *bond = mol.getBondBetweenAtoms(
2390 nbr2Atom->getIdx(), nbr3Atom->getIdx());
2391 // if the starting oxygen is met, move on
2392 if (nbr3Atom->getIdx() == nbrAtom->getIdx()) {
2393 continue;
2394 }
2395 // if the carbon neighbor is another carbon or nitrogen
2396 // bonded via a double or aromatic bond, ipso is HOCC/HOCN
2397 if (((nbr3Atom->getAtomicNum() == 6) ||
2398 (nbr3Atom->getAtomicNum() == 7)) &&
2399 ((bond->getBondType() == Bond::DOUBLE) ||
2400 (bond->getBondType() == Bond::AROMATIC))) {
2401 isHOCCorHOCN = true;
2402 }
2403 // if the carbon neighbor is an oxygen bonded
2404 // via a double bond, ipso is HOCO
2405 if ((nbr3Atom->getAtomicNum() == 8) &&
2406 (bond->getBondType() == Bond::DOUBLE)) {
2407 isHOCO = true;
2408 }
2409 }
2410 }
2411 // if the neighbor of oxygen is phosphorus, ipso is HOCO
2412 if (nbr2Atom->getAtomicNum() == 15) {
2413 isHOP = true;
2414 }
2415 // if the neighbor of oxygen is sulfur, ipso is HOS
2416 if (nbr2Atom->getAtomicNum() == 16) {
2417 isHOS = true;
2418 }
2419 }
2420 if (isHOCO || isHOP) {
2421 // HOCO
2422 // Hydroxyl hydrogen in carboxylic acids
2423 atomType = 24;
2424 break;
2425 }
2426 if (isHOCCorHOCN) {
2427 // HOCC
2428 // Enolic or phenolic hydroxyl hydrogen
2429 // HOCN
2430 // Hydroxyl hydrogen in HO-C=N moiety
2431 atomType = 29;
2432 break;
2433 }
2434 if (isHOS) {
2435 // HOS
2436 // Hydrogen on oxygen attached to sulfur
2437 atomType = 33;
2438 break;
2439 }
2440 /* FALLTHRU */
2441 default:
2442 // HO
2443 // Generic hydroxyl hydrogen
2444 // HOR
2445 // Hydroxyl hydrogen in alcohols
2446 atomType = 21;
2447 break;
2448 }
2449 break;
2450
2451 // phosphorus and sulfur
2452 case 15:
2453 case 16:
2454 // HP
2455 // Hydrogen attached to phosphorus
2456 // HS
2457 // Hydrogen attached to sulfur
2458 // HS=N
2459 // Hydrogen attached to >S= sulfur doubly bonded to N
2460 atomType = 71;
2461 break;
2462 }
2463 }
2464 d_MMFFAtomPropertiesPtrVect[atom->getIdx()]->mmffAtomType = atomType;
2465 if (!atomType) {
2466 d_valid = false;
2467 }
2468 }
2469
2470 // sanitizes molecule according to MMFF requirements
2471 // returns MolOps::SANITIZE_NONE on success, the flag
2472 // which caused trouble in case of failure
sanitizeMMFFMol(RWMol & mol)2473 unsigned int sanitizeMMFFMol(RWMol &mol) {
2474 unsigned int error = 0;
2475
2476 try {
2477 MolOps::sanitizeMol(
2478 mol, error,
2479 (unsigned int)(MolOps::SANITIZE_CLEANUP | MolOps::SANITIZE_PROPERTIES |
2480 MolOps::SANITIZE_SYMMRINGS | MolOps::SANITIZE_KEKULIZE |
2481 MolOps::SANITIZE_FINDRADICALS |
2482 MolOps::SANITIZE_SETCONJUGATION |
2483 MolOps::SANITIZE_SETHYBRIDIZATION |
2484 MolOps::SANITIZE_CLEANUPCHIRALITY |
2485 MolOps::SANITIZE_ADJUSTHS));
2486 if (!(mol.hasProp(common_properties::_MMFFSanitized))) {
2487 mol.setProp(common_properties::_MMFFSanitized, 1, true);
2488 }
2489 } catch (MolSanitizeException &) {
2490 }
2491
2492 return error;
2493 }
2494
2495 // constructs a MMFFMolProperties object for ROMol mol filled
2496 // with MMFF atom types, formal and partial charges
2497 // in case atom types are missing, d_valid is set to false,
2498 // charges are set to 0.0 and the force-field is unusable
MMFFMolProperties(ROMol & mol,const std::string & mmffVariant,std::uint8_t verbosity,std::ostream & oStream)2499 MMFFMolProperties::MMFFMolProperties(ROMol &mol, const std::string &mmffVariant,
2500 std::uint8_t verbosity,
2501 std::ostream &oStream)
2502 : d_valid(true),
2503 d_mmffs(mmffVariant == "MMFF94s" ? true : false),
2504 d_bondTerm(true),
2505 d_angleTerm(true),
2506 d_stretchBendTerm(true),
2507 d_oopTerm(true),
2508 d_torsionTerm(true),
2509 d_vdWTerm(true),
2510 d_eleTerm(true),
2511 d_dielConst(1.0),
2512 d_dielModel(CONSTANT),
2513 d_verbosity(verbosity),
2514 d_oStream(&oStream),
2515 d_MMFFAtomPropertiesPtrVect(mol.getNumAtoms()) {
2516 if (MolOps::needsHs(mol)) {
2517 BOOST_LOG(rdWarningLog)
2518 << "Molecule does not have explicit Hs. Consider calling AddHs()"
2519 << std::endl;
2520 }
2521 if (!mol.hasProp(common_properties::_MMFFSanitized)) {
2522 bool isAromaticSet = false;
2523 for (const auto atom : mol.atoms()) {
2524 if (atom->getIsAromatic()) {
2525 isAromaticSet = true;
2526 break;
2527 }
2528 }
2529 if (isAromaticSet) {
2530 MolOps::Kekulize((RWMol &)mol, true);
2531 }
2532 mol.setProp(common_properties::_MMFFSanitized, 1, true);
2533 }
2534 for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) {
2535 d_MMFFAtomPropertiesPtrVect[i] =
2536 MMFFAtomPropertiesPtr(new MMFFAtomProperties());
2537 }
2538 setMMFFAromaticity((RWMol &)mol);
2539 RingMembershipSize rmSize(mol);
2540 for (const auto atom : mol.atoms()) {
2541 if (atom->getAtomicNum() != 1) {
2542 this->setMMFFHeavyAtomType(rmSize, atom);
2543 }
2544 }
2545 for (const auto atom : mol.atoms()) {
2546 if (atom->getAtomicNum() == 1) {
2547 this->setMMFFHydrogenType(atom);
2548 }
2549 }
2550 if (this->isValid()) {
2551 this->computeMMFFCharges(mol);
2552 }
2553 if (verbosity == MMFF_VERBOSITY_HIGH) {
2554 oStream << "\n"
2555 "A T O M T Y P E S A N D C H A R G E S\n\n"
2556 " ATOM FORMAL PARTIAL\n"
2557 " ATOM TYPE CHARGE CHARGE\n"
2558 "-----------------------------------"
2559 << std::endl;
2560 for (unsigned int idx = 0; idx < mol.getNumAtoms(); ++idx) {
2561 oStream << std::left << std::setw(2)
2562 << mol.getAtomWithIdx(idx)->getSymbol() << std::left << " #"
2563 << std::setw(5) << idx + 1 << std::right << std::setw(5)
2564 << (unsigned int)(this->getMMFFAtomType(idx)) << std::right
2565 << std::setw(10) << std::fixed << std::setprecision(3)
2566 << this->getMMFFFormalCharge(idx) << std::right << std::setw(10)
2567 << this->getMMFFPartialCharge(idx) << std::endl;
2568 }
2569 if (!(this->isValid())) {
2570 oStream << "\nMissing atom types - charges were not computed"
2571 << std::endl;
2572 }
2573 }
2574 }
2575
2576 // returns the MMFF angle type of the angle formed
2577 // by atoms with indexes idx1, idx2, idx3
getMMFFAngleType(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3)2578 unsigned int MMFFMolProperties::getMMFFAngleType(const ROMol &mol,
2579 const unsigned int idx1,
2580 const unsigned int idx2,
2581 const unsigned int idx3) {
2582 PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
2583
2584 // ftp://ftp.wiley.com/public/journals/jcc/suppmat/17/553/MMFF-III_AppendixA.html
2585 //
2586 // AT[IJK] Structural significance
2587 //--------------------------------------------------------------------------
2588 // 0 The angle i-j-k is a "normal" bond angle
2589 // 1 Either bond i-j or bond j-k has a bond type of 1
2590 // 2 Bonds i-j and j-k each have bond types of 1; the
2591 // sum is 2. 3 The angle occurs in a three-membered ring
2592 // 4 The angle occurs in a four-membered ring
2593 // 5 Is in a three-membered ring and the sum of the
2594 // bond types is
2595 // 1
2596 // 6 Is in a three-membered ring and the sum of the
2597 // bond types is
2598 // 2
2599 // 7 Is in a four-membered ring and the sum of the bond
2600 // types is
2601 // 1
2602 // 8 Is in a four-membered ring and the sum of the bond
2603 // types is
2604 // 2
2605
2606 unsigned int bondTypeSum =
2607 this->getMMFFBondType(mol.getBondBetweenAtoms(idx1, idx2)) +
2608 this->getMMFFBondType(mol.getBondBetweenAtoms(idx2, idx3));
2609 unsigned int angleType = bondTypeSum;
2610
2611 unsigned int size = isAngleInRingOfSize3or4(mol, idx1, idx2, idx3);
2612 if (size) {
2613 angleType = size;
2614 if (bondTypeSum) {
2615 angleType += (bondTypeSum + size - 2);
2616 }
2617 }
2618
2619 return angleType;
2620 }
2621
2622 // returns the MMFF bond type of the bond
getMMFFBondType(const Bond * bond)2623 unsigned int MMFFMolProperties::getMMFFBondType(const Bond *bond) {
2624 PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
2625 PRECONDITION(bond, "invalid bond");
2626
2627 const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
2628 const ForceFields::MMFF::MMFFProp *mmffPropAtom1 =
2629 (*mmffProp)(this->getMMFFAtomType(bond->getBeginAtomIdx()));
2630 const ForceFields::MMFF::MMFFProp *mmffPropAtom2 =
2631 (*mmffProp)(this->getMMFFAtomType(bond->getEndAtomIdx()));
2632
2633 // return 1 if the bond is single and the properties for this
2634 // single bond match either those of sbmb or aromatic bonds
2635 // for this atom pair, 0 if they don't
2636 return (unsigned int)(((bond->getBondType() == Bond::SINGLE) &&
2637 ((mmffPropAtom1->sbmb && mmffPropAtom2->sbmb) ||
2638 (mmffPropAtom1->arom && mmffPropAtom2->arom)))
2639 ? 1
2640 : 0);
2641 }
2642
2643 // given the angle type and the two bond types of the bond
2644 // which compose the angle, it returns the MMFF stretch-bend
2645 // type of the angle
getMMFFStretchBendType(const unsigned int angleType,const unsigned int bondType1,const unsigned int bondType2)2646 unsigned int getMMFFStretchBendType(const unsigned int angleType,
2647 const unsigned int bondType1,
2648 const unsigned int bondType2) {
2649 unsigned int stretchBendType = 0;
2650
2651 switch (angleType) {
2652 case 1:
2653 stretchBendType = ((bondType1 || (bondType1 == bondType2)) ? 1 : 2);
2654 break;
2655
2656 case 2:
2657 stretchBendType = 3;
2658 break;
2659
2660 case 4:
2661 stretchBendType = 4;
2662 break;
2663
2664 case 3:
2665 stretchBendType = 5;
2666 break;
2667
2668 case 5:
2669 stretchBendType = ((bondType1 || (bondType1 == bondType2)) ? 6 : 7);
2670 break;
2671
2672 case 6:
2673 stretchBendType = 8;
2674 break;
2675
2676 case 7:
2677 stretchBendType = ((bondType1 || (bondType1 == bondType2)) ? 9 : 10);
2678 break;
2679
2680 case 8:
2681 stretchBendType = 11;
2682 break;
2683 }
2684
2685 return stretchBendType;
2686 }
2687
2688 // given a dihedral angle formed by 4 atoms with indexes
2689 // idx1, idx2, idx3, idx4, it returns a std::pair whose first element
2690 // is the principal torsion type, and the second is the secondary
2691 // torsion type, to be used only if parameters could not be found
2692 // (empirically found - this is not mentioned either in MMFF.IV
2693 // nor in MMFF.V)
2694 const std::pair<unsigned int, unsigned int>
getMMFFTorsionType(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,const unsigned int idx4)2695 MMFFMolProperties::getMMFFTorsionType(const ROMol &mol, const unsigned int idx1,
2696 const unsigned int idx2,
2697 const unsigned int idx3,
2698 const unsigned int idx4) {
2699 PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
2700
2701 const Bond *bondJK = mol.getBondBetweenAtoms(idx2, idx3);
2702 unsigned int bondTypeIJ =
2703 this->getMMFFBondType(mol.getBondBetweenAtoms(idx1, idx2));
2704 unsigned int bondTypeJK = this->getMMFFBondType(bondJK);
2705 unsigned int bondTypeKL =
2706 this->getMMFFBondType(mol.getBondBetweenAtoms(idx3, idx4));
2707 unsigned int torsionType = bondTypeJK;
2708 unsigned int secondTorsionType = 0;
2709
2710 // according to MMFF.IV page 609 the condition should be as simple as
2711 // if ((bondTypeJK == 0) && ((bondTypeIJ == 1) || (bondTypeKL == 1))) {
2712 // but CYGUAN01 fails the test, so the following condition was
2713 // empirically determined to be the correct one
2714 if ((bondTypeJK == 0) && (bondJK->getBondType() == Bond::SINGLE) &&
2715 ((bondTypeIJ == 1) || (bondTypeKL == 1))) {
2716 torsionType = 2;
2717 }
2718 unsigned int size = isTorsionInRingOfSize4or5(mol, idx1, idx2, idx3, idx4);
2719 // the additional check on the existence of a bond between I and K or J and
2720 // L is to avoid assigning torsionType 4 to those torsions in a 4-membered
2721 // ring constituted by the fusion of two 3-membered rings, even though it
2722 // would be harmless for the energy calculation since parameters for
2723 // 4,22,22,22,22 and 0,22,22,22,22 are identical
2724 if ((size == 4) && (!(mol.getBondBetweenAtoms(idx1, idx3) ||
2725 mol.getBondBetweenAtoms(idx2, idx4)))) {
2726 secondTorsionType = torsionType;
2727 torsionType = 4;
2728 } else if ((size == 5) && ((this->getMMFFAtomType(idx1) == 1) ||
2729 (this->getMMFFAtomType(idx2) == 1) ||
2730 (this->getMMFFAtomType(idx3) == 1) ||
2731 (this->getMMFFAtomType(idx4) == 1))) {
2732 secondTorsionType = torsionType;
2733 torsionType = 5;
2734 }
2735
2736 return std::make_pair(torsionType, secondTorsionType);
2737 }
2738
2739 // empirical rule to compute bond stretching parameters if
2740 // tabulated parameters could not be found. The returned
2741 // pointer to a MMFFBond object must be freed by the caller
2742 const ForceFields::MMFF::MMFFBond *
getMMFFBondStretchEmpiricalRuleParams(const ROMol & mol,const Bond * bond)2743 MMFFMolProperties::getMMFFBondStretchEmpiricalRuleParams(const ROMol &mol,
2744 const Bond *bond) {
2745 RDUNUSED_PARAM(mol);
2746 PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
2747
2748 const MMFFBond *mmffBndkParams;
2749 const MMFFHerschbachLaurie *mmffHerschbachLaurieParams;
2750 const MMFFProp *mmffAtomPropParams[2];
2751 const MMFFCovRadPauEle *mmffAtomCovRadPauEleParams[2];
2752 const MMFFBndkCollection *mmffBndk = DefaultParameters::getMMFFBndk();
2753 const MMFFHerschbachLaurieCollection *mmffHerschbachLaurie =
2754 DefaultParameters::getMMFFHerschbachLaurie();
2755 const MMFFCovRadPauEleCollection *mmffCovRadPauEle =
2756 DefaultParameters::getMMFFCovRadPauEle();
2757 const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
2758
2759 unsigned int atomicNum1 = bond->getBeginAtom()->getAtomicNum();
2760 unsigned int atomicNum2 = bond->getEndAtom()->getAtomicNum();
2761 mmffBndkParams = (*mmffBndk)(atomicNum1, atomicNum2);
2762 mmffAtomCovRadPauEleParams[0] = (*mmffCovRadPauEle)(atomicNum1);
2763 mmffAtomCovRadPauEleParams[1] = (*mmffCovRadPauEle)(atomicNum2);
2764 mmffAtomPropParams[0] =
2765 (*mmffProp)(this->getMMFFAtomType(bond->getBeginAtomIdx()));
2766 mmffAtomPropParams[1] =
2767 (*mmffProp)(this->getMMFFAtomType(bond->getEndAtomIdx()));
2768
2769 PRECONDITION(mmffAtomCovRadPauEleParams[0],
2770 "covalent radius/Pauling electronegativity parameters for atom "
2771 "1 not found");
2772 PRECONDITION(mmffAtomCovRadPauEleParams[1],
2773 "covalent radius/Pauling electronegativity parameters for atom "
2774 "2 not found");
2775 PRECONDITION(mmffAtomPropParams[0],
2776 "property parameters for atom 1 not found");
2777 PRECONDITION(mmffAtomPropParams[1],
2778 "property parameters for atom 2 not found");
2779
2780 auto *mmffBondParams = new ForceFields::MMFF::MMFFBond();
2781 const double c = (((atomicNum1 == 1) || (atomicNum2 == 1)) ? 0.050 : 0.085);
2782 const double n = 1.4;
2783 #if 0
2784 const double delta = 0.008;
2785 #endif
2786 #if 1
2787 const double delta = 0.0;
2788 #endif
2789 double r0_i[2];
2790
2791 // MMFF.V, page 625
2792 for (unsigned int i = 0; i < 2; ++i) {
2793 r0_i[i] = mmffAtomCovRadPauEleParams[i]->r0;
2794 // the part of the empirical rule concerning H
2795 // parameters appears not to be used - tests are
2796 // passed only in its absence, hence it is
2797 // currently excluded
2798 #if 0
2799 switch (mmffAtomPropParams[i]->mltb) {
2800 case 1:
2801 case 2:
2802 H_i[i] = 2;
2803 break;
2804
2805 case 3:
2806 H_i[i] = 1;
2807
2808 default:
2809 H_i[i] = 3;
2810 }
2811 #endif
2812 }
2813 // also the part of the empirical rule concerning BO
2814 // parameters appears not to be used - tests are
2815 // passed only in its absence, hence it is
2816 // currently excluded
2817 #if 0
2818 unsigned int BO_ij = (unsigned int)(bond->getBondTypeAsDouble());
2819 if ((mmffAtomPropParams[0]->mltb == 1)
2820 && (mmffAtomPropParams[1]->mltb == 1)) {
2821 BO_ij = 4;
2822 }
2823 if (((mmffAtomPropParams[0]->mltb == 1)
2824 && (mmffAtomPropParams[1]->mltb == 2))
2825 || ((mmffAtomPropParams[0]->mltb == 2)
2826 && (mmffAtomPropParams[1]->mltb == 1))) {
2827 BO_ij = 5;
2828 }
2829 if (areAtomsInSameAromaticRing(mol,
2830 bond->getBeginAtomIdx(), bond->getEndAtomIdx())) {
2831 BO_ij = (((mmffAtomPropParams[0]->pilp == 0)
2832 && (mmffAtomPropParams[1]->pilp == 0)) ? 4 : 5);
2833 }
2834 if (BO_ij == 1) {
2835 for (unsigned int i = 0; i < 2; ++i) {
2836 std::cout << "H" << i << "=" << H_i[i] << std::endl;
2837 switch (H_i[i]) {
2838 case 1:
2839 r0_i[i] -= 0.08;
2840 break;
2841
2842 case 2:
2843 r0_i[i] -= 0.03;
2844 break;
2845 }
2846 }
2847 }
2848 else {
2849 double dec = 0.0;
2850 switch (BO_ij) {
2851 case 5:
2852 dec = 0.04;
2853 break;
2854
2855 case 4:
2856 dec = 0.075;
2857 break;
2858
2859 case 3:
2860 dec = 0.17;
2861 break;
2862
2863 case 2:
2864 dec = 0.10;
2865 break;
2866 }
2867 r0_i[0] -= dec;
2868 r0_i[1] -= dec;
2869 }
2870 #endif
2871 // equation (18) - MMFF.V, page 625
2872 mmffBondParams->r0 = (r0_i[0] + r0_i[1] -
2873 c * pow(fabs(mmffAtomCovRadPauEleParams[0]->chi -
2874 mmffAtomCovRadPauEleParams[1]->chi),
2875 n) -
2876 delta);
2877 if (mmffBndkParams) {
2878 // equation (19) - MMFF.V, page 625
2879 double coeff = mmffBndkParams->r0 / mmffBondParams->r0;
2880 double coeff2 = coeff * coeff;
2881 double coeff6 = coeff2 * coeff2 * coeff2;
2882 mmffBondParams->kb = mmffBndkParams->kb * coeff6;
2883 } else {
2884 // MMFF.V, page 627
2885 // Herschbach-Laurie version of Badger's rule
2886 // J. Chem. Phys. 35, 458 (1961); https://doi.org/10.1063/1.1731952
2887 // equation (8), page 5
2888 mmffHerschbachLaurieParams = (*mmffHerschbachLaurie)(
2889 getPeriodicTableRowHL(atomicNum1), getPeriodicTableRowHL(atomicNum2));
2890 mmffBondParams->kb =
2891 pow(10.0, -(mmffBondParams->r0 - mmffHerschbachLaurieParams->a_ij) /
2892 mmffHerschbachLaurieParams->d_ij);
2893 }
2894
2895 return (const ForceFields::MMFF::MMFFBond *)mmffBondParams;
2896 }
2897
2898 // empirical rule to compute angle bending parameters if
2899 // tabulated parameters could not be found. The returned
2900 // pointer to a MMFFAngle object must be freed by the caller
getMMFFAngleBendEmpiricalRuleParams(const ROMol & mol,const ForceFields::MMFF::MMFFAngle * oldMMFFAngleParams,const ForceFields::MMFF::MMFFProp * mmffPropParamsCentralAtom,const ForceFields::MMFF::MMFFBond * mmffBondParams1,const ForceFields::MMFF::MMFFBond * mmffBondParams2,unsigned int idx1,unsigned int idx2,unsigned int idx3)2901 const ForceFields::MMFF::MMFFAngle *getMMFFAngleBendEmpiricalRuleParams(
2902 const ROMol &mol, const ForceFields::MMFF::MMFFAngle *oldMMFFAngleParams,
2903 const ForceFields::MMFF::MMFFProp *mmffPropParamsCentralAtom,
2904 const ForceFields::MMFF::MMFFBond *mmffBondParams1,
2905 const ForceFields::MMFF::MMFFBond *mmffBondParams2, unsigned int idx1,
2906 unsigned int idx2, unsigned int idx3) {
2907 int atomicNum[3];
2908 atomicNum[0] = mol.getAtomWithIdx(idx1)->getAtomicNum();
2909 atomicNum[1] = mol.getAtomWithIdx(idx2)->getAtomicNum();
2910 atomicNum[2] = mol.getAtomWithIdx(idx3)->getAtomicNum();
2911 auto *mmffAngleParams = new ForceFields::MMFF::MMFFAngle();
2912 unsigned int ringSize = isAngleInRingOfSize3or4(mol, idx1, idx2, idx3);
2913 if (!oldMMFFAngleParams) {
2914 // angle rest value empirical rule
2915 mmffAngleParams->theta0 = 120.0;
2916 switch (mmffPropParamsCentralAtom->crd) {
2917 case 4:
2918 // if the central atom has crd = 4
2919 mmffAngleParams->theta0 = 109.45;
2920 break;
2921
2922 case 2:
2923 // if the central atom is oxygen
2924 if (atomicNum[1] == 8) {
2925 mmffAngleParams->theta0 = 105.0;
2926 }
2927 // if the central atom is linear
2928 else if (mmffPropParamsCentralAtom->linh == 1) {
2929 mmffAngleParams->theta0 = 180.0;
2930 }
2931 break;
2932
2933 case 3:
2934 if ((mmffPropParamsCentralAtom->val == 3) &&
2935 (mmffPropParamsCentralAtom->mltb == 0)) {
2936 // if the central atom is nitrogen
2937 if (atomicNum[1] == 7) {
2938 mmffAngleParams->theta0 = 107.0;
2939 } else {
2940 mmffAngleParams->theta0 = 92.0;
2941 }
2942 }
2943 break;
2944 }
2945 if (ringSize == 3) {
2946 mmffAngleParams->theta0 = 60.0;
2947 } else if (ringSize == 4) {
2948 mmffAngleParams->theta0 = 90.0;
2949 }
2950 } else {
2951 mmffAngleParams->theta0 = oldMMFFAngleParams->theta0;
2952 }
2953 // angle force constant empirical rule
2954 double Z[3] = {0.0, 0.0, 0.0};
2955 double C[3] = {0.0, 0.0, 0.0};
2956 double beta = 1.75;
2957 for (unsigned int i = 0; i < 3; ++i) {
2958 // Table VI - MMFF.V, page 628
2959 switch (atomicNum[i]) {
2960 // Hydrogen
2961 case 1:
2962 Z[i] = 1.395;
2963 break;
2964
2965 // Carbon
2966 case 6:
2967 Z[i] = 2.494;
2968 C[i] = 1.016;
2969 break;
2970
2971 // Nitrogen
2972 case 7:
2973 Z[i] = 2.711;
2974 C[i] = 1.113;
2975 break;
2976
2977 // Oxygen
2978 case 8:
2979 Z[i] = 3.045;
2980 C[i] = 1.337;
2981 break;
2982
2983 // Fluorine
2984 case 9:
2985 Z[i] = 2.847;
2986 break;
2987
2988 // Silicon
2989 case 14:
2990 Z[i] = 2.350;
2991 C[i] = 0.811;
2992 break;
2993
2994 // Phosphorus
2995 case 15:
2996 Z[i] = 2.350;
2997 C[i] = 1.068;
2998 break;
2999
3000 // Sulfur
3001 case 16:
3002 Z[i] = 2.980;
3003 C[i] = 1.249;
3004 break;
3005
3006 // Chlorine
3007 case 17:
3008 Z[i] = 2.909;
3009 C[i] = 1.078;
3010 break;
3011
3012 // Bromine
3013 case 35:
3014 Z[i] = 3.017;
3015 break;
3016
3017 // Iodine
3018 case 53:
3019 Z[i] = 3.086;
3020 break;
3021 }
3022 }
3023 double r0_ij = mmffBondParams1->r0;
3024 double r0_jk = mmffBondParams2->r0;
3025 double D =
3026 (r0_ij - r0_jk) * (r0_ij - r0_jk) / ((r0_ij + r0_jk) * (r0_ij + r0_jk));
3027 double theta0_rad = DEG2RAD * mmffAngleParams->theta0;
3028 if (ringSize == 4) {
3029 beta *= 0.85;
3030 } else if (ringSize == 3) {
3031 beta *= 0.05;
3032 }
3033 // equation (20) - MMFF.V, page 628
3034 mmffAngleParams->ka =
3035 beta * Z[0] * C[1] * Z[2] /
3036 ((r0_ij + r0_jk) * theta0_rad * theta0_rad * exp(2.0 * D));
3037
3038 return (const ForceFields::MMFF::MMFFAngle *)mmffAngleParams;
3039 }
3040
3041 // empirical rule to compute torsional parameters if
3042 // tabulated parameters could not be found
3043 // the indexes of the two central atoms J and K
3044 // idx2 and idx3 must be supplied. The returned pointer
3045 // to a MMFFTor object must be freed by the caller
3046 const ForceFields::MMFF::MMFFTor *
getMMFFTorsionEmpiricalRuleParams(const ROMol & mol,unsigned int idx2,unsigned int idx3)3047 MMFFMolProperties::getMMFFTorsionEmpiricalRuleParams(const ROMol &mol,
3048 unsigned int idx2,
3049 unsigned int idx3) {
3050 PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
3051
3052 const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
3053 const MMFFAromCollection *mmffArom = DefaultParameters::getMMFFArom();
3054 auto *mmffTorParams = new ForceFields::MMFF::MMFFTor();
3055 unsigned int jAtomType = this->getMMFFAtomType(idx2);
3056 unsigned int kAtomType = this->getMMFFAtomType(idx3);
3057 const MMFFProp *jMMFFProp = (*mmffProp)(jAtomType);
3058 const MMFFProp *kMMFFProp = (*mmffProp)(kAtomType);
3059 const Bond *bond = mol.getBondBetweenAtoms(idx2, idx3);
3060 double U[2] = {0.0, 0.0};
3061 double V[2] = {0.0, 0.0};
3062 double W[2] = {0.0, 0.0};
3063 double beta = 0.0;
3064 double pi_jk = 0.0;
3065 const auto N_jk = (double)((jMMFFProp->crd - 1) * (kMMFFProp->crd - 1));
3066 int atomicNum[2] = {mol.getAtomWithIdx(idx2)->getAtomicNum(),
3067 mol.getAtomWithIdx(idx3)->getAtomicNum()};
3068
3069 for (unsigned int i = 0; i < 2; ++i) {
3070 switch (atomicNum[i]) {
3071 // carbon
3072 case 6:
3073 U[i] = 2.0;
3074 V[i] = 2.12;
3075 break;
3076
3077 // nitrogen
3078 case 7:
3079 U[i] = 2.0;
3080 V[i] = 1.5;
3081 break;
3082
3083 // oxygen
3084 case 8:
3085 U[i] = 2.0;
3086 V[i] = 0.2;
3087 W[i] = 2.0;
3088 break;
3089
3090 // silicon
3091 case 14:
3092 U[i] = 1.25;
3093 V[i] = 1.22;
3094 break;
3095
3096 // phosphorus
3097 case 15:
3098 U[i] = 1.25;
3099 V[i] = 2.40;
3100 break;
3101
3102 // sulfur
3103 case 16:
3104 U[i] = 1.25;
3105 V[i] = 0.49;
3106 W[i] = 8.0;
3107 break;
3108 }
3109 }
3110
3111 // rule (a)
3112 if (jMMFFProp->linh || kMMFFProp->linh) {
3113 mmffTorParams->V1 = 0.0;
3114 mmffTorParams->V2 = 0.0;
3115 mmffTorParams->V3 = 0.0;
3116 }
3117
3118 // rule (b)
3119 else if (mmffArom->isMMFFAromatic(jAtomType) &&
3120 mmffArom->isMMFFAromatic(kAtomType) && bond->getIsAromatic()) {
3121 beta = ((((jMMFFProp->val == 3) && (kMMFFProp->val == 4)) ||
3122 ((jMMFFProp->val == 4) && (kMMFFProp->val == 3)))
3123 ? 3.0
3124 : 6.0);
3125 pi_jk = (((jMMFFProp->pilp == 0) && (kMMFFProp->pilp == 0)) ? 0.5 : 0.3);
3126 mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3127 }
3128
3129 // rule (c)
3130 else if (bond->getBondType() == Bond::DOUBLE) {
3131 beta = 6.0;
3132 pi_jk = (((jMMFFProp->mltb == 2) && (kMMFFProp->mltb == 2)) ? 1.0 : 0.4);
3133 mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3134 }
3135
3136 // rule (d)
3137 else if ((jMMFFProp->crd == 4) && (kMMFFProp->crd == 4)) {
3138 mmffTorParams->V3 = sqrt(V[0] * V[1]) / N_jk;
3139 }
3140
3141 // rule (e)
3142 else if ((jMMFFProp->crd == 4) && (kMMFFProp->crd != 4)) {
3143 if (((kMMFFProp->crd == 3) &&
3144 (((kMMFFProp->val == 4) || (kMMFFProp->val == 34)) ||
3145 kMMFFProp->mltb)) ||
3146 ((kMMFFProp->crd == 2) && ((kMMFFProp->val == 3) || kMMFFProp->mltb))) {
3147 mmffTorParams->V1 = 0.0;
3148 mmffTorParams->V2 = 0.0;
3149 mmffTorParams->V3 = 0.0;
3150 } else {
3151 mmffTorParams->V3 = sqrt(V[0] * V[1]) / N_jk;
3152 }
3153 }
3154
3155 // rule (f)
3156 else if ((kMMFFProp->crd == 4) && (jMMFFProp->crd != 4)) {
3157 if (((jMMFFProp->crd == 3) &&
3158 (((jMMFFProp->val == 4) || (jMMFFProp->val == 34)) ||
3159 jMMFFProp->mltb)) ||
3160 ((jMMFFProp->crd == 2) && ((jMMFFProp->val == 3) || jMMFFProp->mltb))) {
3161 mmffTorParams->V1 = 0.0;
3162 mmffTorParams->V2 = 0.0;
3163 mmffTorParams->V3 = 0.0;
3164 } else {
3165 mmffTorParams->V3 = sqrt(V[0] * V[1]) / N_jk;
3166 }
3167 }
3168
3169 // rule (g)
3170 else if (((bond->getBondType() == Bond::SINGLE) && jMMFFProp->mltb &&
3171 kMMFFProp->mltb) ||
3172 (jMMFFProp->mltb && kMMFFProp->pilp) ||
3173 (jMMFFProp->pilp && kMMFFProp->mltb)) {
3174 // case (1)
3175 if (jMMFFProp->pilp && kMMFFProp->pilp) {
3176 mmffTorParams->V1 = 0.0;
3177 mmffTorParams->V2 = 0.0;
3178 mmffTorParams->V3 = 0.0;
3179 }
3180 // case (2)
3181 else if (jMMFFProp->pilp && kMMFFProp->mltb) {
3182 beta = 6.0;
3183 if (jMMFFProp->mltb == 1) {
3184 pi_jk = 0.5;
3185 } else if ((getPeriodicTableRow(atomicNum[0]) == 2) &&
3186 (getPeriodicTableRow(atomicNum[1]) == 2)) {
3187 pi_jk = 0.3;
3188 } else if ((getPeriodicTableRow(atomicNum[0]) != 2) ||
3189 (getPeriodicTableRow(atomicNum[1]) != 2)) {
3190 pi_jk = 0.15;
3191 }
3192 mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3193 }
3194 // case (3)
3195 else if (kMMFFProp->pilp && jMMFFProp->mltb) {
3196 beta = 6.0;
3197 if (kMMFFProp->mltb == 1) {
3198 pi_jk = 0.5;
3199 } else if ((getPeriodicTableRow(atomicNum[0]) == 2) &&
3200 (getPeriodicTableRow(atomicNum[1]) == 2)) {
3201 pi_jk = 0.3;
3202 } else if ((getPeriodicTableRow(atomicNum[0]) != 2) ||
3203 (getPeriodicTableRow(atomicNum[1]) != 2)) {
3204 pi_jk = 0.15;
3205 }
3206 mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3207 }
3208 // case (4)
3209 else if (((jMMFFProp->mltb == 1) || (kMMFFProp->mltb == 1)) &&
3210 ((atomicNum[0] != 6) || (atomicNum[1] != 6))) {
3211 beta = 6.0;
3212 pi_jk = 0.4;
3213 mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3214 }
3215 // case (5)
3216 else {
3217 beta = 6.0;
3218 pi_jk = 0.15;
3219 mmffTorParams->V2 = beta * pi_jk * sqrt(U[0] * U[1]);
3220 }
3221 }
3222
3223 // rule (h)
3224 else {
3225 if (((atomicNum[0] == 8) || (atomicNum[0] == 16)) &&
3226 ((atomicNum[1] == 8) || (atomicNum[1] == 16))) {
3227 mmffTorParams->V2 = -sqrt(W[0] * W[1]);
3228 } else {
3229 mmffTorParams->V3 = sqrt(V[0] * V[1]) / N_jk;
3230 }
3231 }
3232
3233 return (const MMFFTor *)mmffTorParams;
3234 }
3235
3236 // populates the MMFFMolProperties object with MMFF
3237 // formal and partial charges
computeMMFFCharges(const ROMol & mol)3238 void MMFFMolProperties::computeMMFFCharges(const ROMol &mol) {
3239 PRECONDITION(this->isValid(), "missing atom types - invalid force-field");
3240
3241 unsigned int idx;
3242 unsigned int i;
3243 unsigned int j;
3244 unsigned int atomType;
3245 unsigned int nbrAtomType;
3246 unsigned int nConj = 0;
3247 unsigned int old_nConj = 0;
3248 double pChg = 0.0;
3249 double fChg = 0.0;
3250 boost::dynamic_bitset<> conjNBitVect(mol.getNumAtoms());
3251 VECT_INT_VECT atomRings = mol.getRingInfo()->atomRings();
3252 ROMol::ADJ_ITER nbrIdx;
3253 ROMol::ADJ_ITER endNbrs;
3254 ROMol::ADJ_ITER nbr2Idx;
3255 ROMol::ADJ_ITER end2Nbrs;
3256 const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
3257 const MMFFPBCICollection *mmffPBCI = DefaultParameters::getMMFFPBCI();
3258 const MMFFChgCollection *mmffChg = DefaultParameters::getMMFFChg();
3259
3260 // We need to set formal charges upfront
3261 for (idx = 0; idx < mol.getNumAtoms(); ++idx) {
3262 const Atom *atom = mol.getAtomWithIdx(idx);
3263 atomType = this->getMMFFAtomType(idx);
3264 fChg = 0.0;
3265 switch (atomType) {
3266 // special cases
3267 case 32:
3268 // O2CM
3269 // Oxygen in carboxylate group
3270 case 72:
3271 // SM
3272 // Anionic terminal sulfur
3273 // loop over neighbors
3274 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
3275 for (; nbrIdx != endNbrs; ++nbrIdx) {
3276 const Atom *nbrAtom = mol[*nbrIdx];
3277 nbrAtomType = this->getMMFFAtomType(nbrAtom->getIdx());
3278 // loop over neighbors of the neighbor
3279 // count how many terminal oxygen/sulfur atoms
3280 // or secondary nitrogens
3281 // are bonded to the neighbor of ipso
3282 int nSecNbondedToNbr = 0;
3283 int nTermOSbondedToNbr = 0;
3284 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
3285 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
3286 const Atom *nbr2Atom = mol[*nbr2Idx];
3287 // if it's nitrogen with 2 neighbors and it is not aromatic,
3288 // increment the counter of secondary nitrogens
3289 if ((nbr2Atom->getAtomicNum() == 7) &&
3290 (nbr2Atom->getDegree() == 2) &&
3291 (!(nbr2Atom->getIsAromatic()))) {
3292 ++nSecNbondedToNbr;
3293 }
3294 // if it's terminal oxygen/sulfur,
3295 // increment the terminal oxygen/sulfur counter
3296 if (((nbr2Atom->getAtomicNum() == 8) ||
3297 (nbr2Atom->getAtomicNum() == 16)) &&
3298 (nbr2Atom->getDegree() == 1)) {
3299 ++nTermOSbondedToNbr;
3300 }
3301 }
3302 // in case its sulfur with two terminal oxygen/sulfur atoms and one
3303 // secondary
3304 // nitrogen, this is a deprotonated sulfonamide, so we should not
3305 // consider
3306 // nitrogen as a replacement for oxygen/sulfur in a sulfone
3307 if ((nbrAtom->getAtomicNum() == 16) && (nTermOSbondedToNbr == 2) &&
3308 (nSecNbondedToNbr == 1)) {
3309 nSecNbondedToNbr = 0;
3310 }
3311 // if the neighbor is carbon
3312 if ((nbrAtom->getAtomicNum() == 6) && nTermOSbondedToNbr) {
3313 // O2CM
3314 // Oxygen in (thio)carboxylate group: charge is shared
3315 // across 2 oxygens/sulfur atoms in (thio)carboxylate,
3316 // 3 oxygen/sulfur atoms in (thio)carbonate
3317 // SM
3318 // Anionic terminal sulfur: charge is localized
3319 fChg = ((nTermOSbondedToNbr == 1)
3320 ? -1.0
3321 : -((double)(nTermOSbondedToNbr - 1) /
3322 (double)nTermOSbondedToNbr));
3323 break;
3324 }
3325 // if the neighbor is NO2 or NO3
3326 if ((nbrAtomType == 45) && (nTermOSbondedToNbr == 3)) {
3327 // O3N
3328 // Nitrate anion oxygen
3329 fChg = -1.0 / 3.0;
3330 break;
3331 }
3332 // if the neighbor is PO2, PO3, PO4
3333 if ((nbrAtomType == 25) && nTermOSbondedToNbr) {
3334 // OP
3335 // Oxygen in phosphine oxide
3336 // O2P
3337 // One of 2 terminal O's on P
3338 // O3P
3339 // One of 3 terminal O's on P
3340 // O4P
3341 // One of 4 terminal O's on P
3342 fChg = ((nTermOSbondedToNbr == 1)
3343 ? 0.0
3344 : -((double)(nTermOSbondedToNbr - 1) /
3345 (double)nTermOSbondedToNbr));
3346 break;
3347 }
3348 // if the neighbor is SO2, SO2N, SO3, SO4, SO2M, SSOM
3349 if ((nbrAtomType == 18) && nTermOSbondedToNbr) {
3350 // SO2
3351 // Sulfone sulfur
3352 // SO2N
3353 // Sulfonamide sulfur
3354 // SO3
3355 // Sulfonate group sulfur
3356 // SO4
3357 // Sulfate group sulfur
3358 // SNO
3359 // Sulfur in nitrogen analog of a sulfone
3360 fChg =
3361 (((nSecNbondedToNbr + nTermOSbondedToNbr) == 2)
3362 ? 0.0
3363 : -((double)((nSecNbondedToNbr + nTermOSbondedToNbr) - 2) /
3364 (double)nTermOSbondedToNbr));
3365 break;
3366 }
3367 if ((nbrAtomType == 73) && nTermOSbondedToNbr) {
3368 // SO2M
3369 // Sulfur in anionic sulfinate group
3370 // SSOM
3371 // Tricoordinate sulfur in anionic thiosulfinate group
3372 fChg = ((nTermOSbondedToNbr == 1)
3373 ? 0.0
3374 : -((double)(nTermOSbondedToNbr - 1) /
3375 (double)nTermOSbondedToNbr));
3376 break;
3377 }
3378 if ((nbrAtomType == 77) && nTermOSbondedToNbr) {
3379 // O4Cl
3380 // Oxygen in perchlorate anion
3381 fChg = -(1.0 / (double)nTermOSbondedToNbr);
3382 break;
3383 }
3384 }
3385 break;
3386
3387 case 76:
3388 // N5M
3389 // Nitrogen in 5-ring aromatic anion
3390 // we don't need to bother about the neighbors with N5M
3391 for (i = 0; i < atomRings.size(); ++i) {
3392 if ((std::find(atomRings[i].begin(), atomRings[i].end(), idx) !=
3393 atomRings[i].end())) {
3394 break;
3395 }
3396 }
3397 // find how many nitrogens with atom type 76 we have
3398 // and share the formal charge accordingly
3399 if (i < atomRings.size()) {
3400 unsigned int nNitrogensIn5Ring = 0;
3401 for (j = 0; j < atomRings[i].size(); ++j) {
3402 if (this->getMMFFAtomType(atomRings[i][j]) == 76) {
3403 ++nNitrogensIn5Ring;
3404 }
3405 }
3406 if (nNitrogensIn5Ring) {
3407 fChg = -(1.0 / (double)nNitrogensIn5Ring);
3408 }
3409 }
3410 break;
3411
3412 case 55:
3413 case 56:
3414 case 81:
3415 // NIM+
3416 // Aromatic nitrogen in imidazolium
3417 // N5A+
3418 // Positive nitrogen in 5-ring alpha position
3419 // N5B+
3420 // Positive nitrogen in 5-ring beta position
3421 // N5+
3422 // Positive nitrogen in other 5-ring position
3423 // we need to loop over all molecule atoms
3424 // and find all those nitrogens with atom type
3425 // 81, 55 or 56, check whether they are conjugated
3426 // with ipso and keep on looping until no more
3427 // conjugated atoms can be found. Finally, we divide
3428 // the total formal charge that was found on the
3429 // conjugated system by the number of conjugated nitrogens
3430 // of types 81, 55 or 56 that were found.
3431 // This is not strictly what is described
3432 // in the MMFF papers, but it is the only way to get an
3433 // integer total formal charge, which makes sense to me
3434 // probably such conjugated systems are anyway out of the
3435 // scope of MMFF, but this is an attempt to correctly
3436 // deal with them somehow
3437 fChg = (double)(atom->getFormalCharge());
3438 nConj = 1;
3439 old_nConj = 0;
3440 conjNBitVect.reset();
3441 conjNBitVect[idx] = 1;
3442 while (nConj > old_nConj) {
3443 old_nConj = nConj;
3444 for (i = 0; i < mol.getNumAtoms(); ++i) {
3445 // if this atom is not marked as conj, move on
3446 if (!conjNBitVect[i]) {
3447 continue;
3448 }
3449 // loop over neighbors
3450 boost::tie(nbrIdx, endNbrs) =
3451 mol.getAtomNeighbors(mol.getAtomWithIdx(i));
3452 for (; nbrIdx != endNbrs; ++nbrIdx) {
3453 const Atom *nbrAtom = mol[*nbrIdx];
3454 nbrAtomType = this->getMMFFAtomType(nbrAtom->getIdx());
3455 // if atom type is not 80 or 57, move on
3456 if ((nbrAtomType != 57) && (nbrAtomType != 80)) {
3457 continue;
3458 }
3459 // loop over neighbors of the neighbor
3460 // if they are nitrogens of type 81, 55 or 56 and
3461 // they are not not marked as conjugated yet, do it
3462 // and increment the nConj counter by 1
3463 boost::tie(nbr2Idx, end2Nbrs) = mol.getAtomNeighbors(nbrAtom);
3464 for (; nbr2Idx != end2Nbrs; ++nbr2Idx) {
3465 const Atom *nbr2Atom = mol[*nbr2Idx];
3466 // if atom type is not 81, 55 or 56, move on
3467 nbrAtomType = this->getMMFFAtomType(nbr2Atom->getIdx());
3468 if ((nbrAtomType != 55) && (nbrAtomType != 56) &&
3469 (nbrAtomType != 81)) {
3470 continue;
3471 }
3472 j = nbr2Atom->getIdx();
3473 // if this nitrogen is not yet marked as conjugated,
3474 // mark it and increment the counter and eventually
3475 // adjust the total formal charge of the conjugated system
3476 if (!conjNBitVect[j]) {
3477 conjNBitVect[j] = 1;
3478 fChg += (double)(nbr2Atom->getFormalCharge());
3479 ++nConj;
3480 }
3481 }
3482 }
3483 }
3484 }
3485 if (nConj) {
3486 fChg /= (double)nConj;
3487 }
3488 break;
3489
3490 case 61:
3491 // loop over neighbors
3492 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
3493 for (; nbrIdx != endNbrs; ++nbrIdx) {
3494 const Atom *nbrAtom = mol[*nbrIdx];
3495 // if it is diazonium, set a +1 formal charge on
3496 // the secondary nitrogen
3497 if (this->getMMFFAtomType(nbrAtom->getIdx()) == 42) {
3498 fChg = 1.0;
3499 }
3500 }
3501 break;
3502
3503 // non-complicated +1 atom types
3504 case 34:
3505 // NR+
3506 // Quaternary nitrogen
3507 case 49:
3508 // O+
3509 // Oxonium oxygen
3510 case 51:
3511 // O=+
3512 // Oxenium oxygen
3513 case 54:
3514 // N+=C
3515 // Iminium nitrogen
3516 // N+=N
3517 // Positively charged nitrogen doubly bonded to N
3518 case 58:
3519 // NPD+
3520 // Aromatic nitrogen in pyridinium
3521 case 92:
3522 // LI+
3523 // Lithium cation
3524 case 93:
3525 // NA+
3526 // Sodium cation
3527 case 94:
3528 // K+
3529 // Potassium cation
3530 case 97:
3531 // CU+1
3532 // Monopositive copper cation
3533 fChg = 1.0;
3534 break;
3535
3536 // non-complicated +2 atom types
3537 case 87:
3538 // FE+2
3539 // Dipositive iron cation
3540 case 95:
3541 // ZN+2
3542 // Dipositive zinc cation
3543 case 96:
3544 // CA+2
3545 // Dipositive calcium cation
3546 case 98:
3547 // CU+2
3548 // Dipositive copper cation
3549 case 99:
3550 // MG+2
3551 // Dipositive magnesium cation
3552 fChg = 2.0;
3553 break;
3554
3555 // non-complicated +3 atom types
3556 case 88:
3557 // FE+3
3558 // Tripositive iron cation
3559 fChg = 3.0;
3560 break;
3561
3562 // non-complicated -1 atom types
3563 case 35:
3564 // OM
3565 // Oxide oxygen on sp3 carbon
3566 // OM2
3567 // Oxide oxygen on sp2 carbon
3568 // OM
3569 // Oxide oxygen on sp3 nitrogen (not in original MMFF.I Table III)
3570 // OM2
3571 // Oxide oxygen on sp2 nitrogen (not in original MMFF.I Table III)
3572 case 62:
3573 // NM
3574 // Anionic divalent nitrogen
3575 case 89:
3576 // F-
3577 // Fluoride anion
3578 case 90:
3579 // Cl-
3580 // Chloride anion
3581 case 91:
3582 // BR-
3583 // Bromide anion
3584 fChg = -1.0;
3585 break;
3586 }
3587 this->setMMFFFormalCharge(idx, fChg);
3588 }
3589 // now we compute partial charges
3590 // See Halgren, T. MMFF.V, J. Comput. Chem. 1996, 17, 616-641
3591 // https://doi.org/10.1002/(SICI)1096-987X(199604)17:5/6<616::AID-JCC5>3.0.CO;2-X
3592 for (idx = 0; idx < mol.getNumAtoms(); ++idx) {
3593 const Atom *atom = mol.getAtomWithIdx(idx);
3594 atomType = this->getMMFFAtomType(idx);
3595 double q0 = this->getMMFFFormalCharge(idx);
3596 auto M = (double)((*mmffProp)(atomType)->crd);
3597 double v = (*mmffPBCI)(atomType)->fcadj;
3598 double sumFormalCharge = 0.0;
3599 double sumPartialCharge = 0.0;
3600 double nbrFormalCharge;
3601 std::pair<int, const MMFFChg *> mmffChgParams;
3602
3603 if (isDoubleZero(v)) {
3604 // loop over neighbors
3605 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
3606 for (; nbrIdx != endNbrs; ++nbrIdx) {
3607 const Atom *nbrAtom = mol[*nbrIdx];
3608 nbrFormalCharge = this->getMMFFFormalCharge(nbrAtom->getIdx());
3609 // if neighbors have a negative formal charge, the latter
3610 // influences the charge on ipso
3611 if (nbrFormalCharge < 0.0) {
3612 q0 += (nbrFormalCharge / (2.0 * (double)(nbrAtom->getDegree())));
3613 }
3614 }
3615 }
3616 // there is a special case for anionic divalent nitrogen
3617 // with positively charged neighbor
3618 if (atomType == 62) {
3619 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
3620 for (; nbrIdx != endNbrs; ++nbrIdx) {
3621 const Atom *nbrAtom = mol[*nbrIdx];
3622 nbrFormalCharge = this->getMMFFFormalCharge(nbrAtom->getIdx());
3623 if (nbrFormalCharge > 0.0) {
3624 q0 -= (nbrFormalCharge / 2.0);
3625 }
3626 }
3627 }
3628 // loop over neighbors
3629 boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
3630 for (; nbrIdx != endNbrs; ++nbrIdx) {
3631 const Atom *nbrAtom = mol[*nbrIdx];
3632 const Bond *bond =
3633 mol.getBondBetweenAtoms(atom->getIdx(), nbrAtom->getIdx());
3634 // we need to determine the sign of bond charge
3635 // increments depending on the bonding relationship
3636 // i.e. we have parameters for [a,b] bonds
3637 // but it depends whether ipso is a or b
3638 unsigned int nbrAtomType = this->getMMFFAtomType(nbrAtom->getIdx());
3639 unsigned int bondType = this->getMMFFBondType(bond);
3640 mmffChgParams =
3641 mmffChg->getMMFFChgParams(bondType, atomType, nbrAtomType);
3642 sumPartialCharge +=
3643 (mmffChgParams.second
3644 ? (double)(mmffChgParams.first) * ((mmffChgParams.second)->bci)
3645 : ((*mmffPBCI)(atomType)->pbci -
3646 (*mmffPBCI)(nbrAtomType)->pbci));
3647 nbrFormalCharge = this->getMMFFFormalCharge(nbrAtom->getIdx());
3648 sumFormalCharge += nbrFormalCharge;
3649 }
3650 // we compute ipso partial charge according to
3651 // equation 15, page 622 MMFF.V paper
3652 pChg = (1.0 - M * v) * q0 + v * sumFormalCharge + sumPartialCharge;
3653 this->setMMFFPartialCharge(atom->getIdx(), pChg);
3654 }
3655 }
3656
getMMFFBondStretchParams(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,unsigned int & bondType,MMFFBond & mmffBondStretchParams)3657 bool MMFFMolProperties::getMMFFBondStretchParams(
3658 const ROMol &mol, const unsigned int idx1, const unsigned int idx2,
3659 unsigned int &bondType, MMFFBond &mmffBondStretchParams) {
3660 const MMFFBondCollection *mmffBond = DefaultParameters::getMMFFBond();
3661 bool res = false;
3662 if (isValid()) {
3663 unsigned int iAtomType = getMMFFAtomType(idx1);
3664 unsigned int jAtomType = getMMFFAtomType(idx2);
3665 const Bond *bond = mol.getBondBetweenAtoms(idx1, idx2);
3666 if (bond) {
3667 bondType = getMMFFBondType(bond);
3668 bool areMMFFBondParamsEmpirical = false;
3669 const MMFFBond *mmffBondParams =
3670 (*mmffBond)(bondType, iAtomType, jAtomType);
3671 if (!mmffBondParams) {
3672 mmffBondParams = getMMFFBondStretchEmpiricalRuleParams(mol, bond);
3673 areMMFFBondParamsEmpirical = true;
3674 }
3675 if (mmffBondParams) {
3676 mmffBondStretchParams = *mmffBondParams;
3677 if (areMMFFBondParamsEmpirical) {
3678 delete mmffBondParams;
3679 }
3680 res = true;
3681 }
3682 }
3683 }
3684 return res;
3685 }
3686
getMMFFAngleBendParams(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,unsigned int & angleType,MMFFAngle & mmffAngleBendParams)3687 bool MMFFMolProperties::getMMFFAngleBendParams(const ROMol &mol,
3688 const unsigned int idx1,
3689 const unsigned int idx2,
3690 const unsigned int idx3,
3691 unsigned int &angleType,
3692 MMFFAngle &mmffAngleBendParams) {
3693 bool res = false;
3694 if (isValid() && mol.getBondBetweenAtoms(idx1, idx2) &&
3695 mol.getBondBetweenAtoms(idx2, idx3)) {
3696 const MMFFAngleCollection *mmffAngle = DefaultParameters::getMMFFAngle();
3697 const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
3698 unsigned int idx[3] = {idx1, idx2, idx3};
3699 MMFFBond mmffBondParams[2];
3700 unsigned int atomType[3];
3701 unsigned int i;
3702 angleType = getMMFFAngleType(mol, idx1, idx2, idx3);
3703 bool areMMFFAngleParamsEmpirical = false;
3704 for (i = 0; i < 3; ++i) {
3705 atomType[i] = getMMFFAtomType(idx[i]);
3706 }
3707 const MMFFAngle *mmffAngleParams =
3708 (*mmffAngle)(DefaultParameters::getMMFFDef(), angleType, atomType[0],
3709 atomType[1], atomType[2]);
3710 const MMFFProp *mmffPropParamsCentralAtom = (*mmffProp)(atomType[1]);
3711 if ((!mmffAngleParams) || (isDoubleZero(mmffAngleParams->ka))) {
3712 areMMFFAngleParamsEmpirical = true;
3713 for (i = 0; areMMFFAngleParamsEmpirical && (i < 2); ++i) {
3714 unsigned int bondType;
3715 areMMFFAngleParamsEmpirical = getMMFFBondStretchParams(
3716 mol, idx[i], idx[i + 1], bondType, mmffBondParams[i]);
3717 }
3718 if (areMMFFAngleParamsEmpirical) {
3719 mmffAngleParams = getMMFFAngleBendEmpiricalRuleParams(
3720 mol, mmffAngleParams, mmffPropParamsCentralAtom, &mmffBondParams[0],
3721 &mmffBondParams[1], idx[0], idx[1], idx[2]);
3722 }
3723 }
3724 if (mmffAngleParams) {
3725 mmffAngleBendParams = *mmffAngleParams;
3726 res = true;
3727 if (areMMFFAngleParamsEmpirical) {
3728 delete mmffAngleParams;
3729 }
3730 }
3731 }
3732 return res;
3733 }
3734
getMMFFStretchBendParams(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,unsigned int & stretchBendType,MMFFStbn & mmffStretchBendParams,MMFFBond mmffBondStretchParams[2],MMFFAngle & mmffAngleBendParams)3735 bool MMFFMolProperties::getMMFFStretchBendParams(
3736 const ROMol &mol, const unsigned int idx1, const unsigned int idx2,
3737 const unsigned int idx3, unsigned int &stretchBendType,
3738 MMFFStbn &mmffStretchBendParams, MMFFBond mmffBondStretchParams[2],
3739 MMFFAngle &mmffAngleBendParams) {
3740 bool res = false;
3741 if (isValid()) {
3742 const MMFFPropCollection *mmffProp = DefaultParameters::getMMFFProp();
3743 const MMFFStbnCollection *mmffStbn = DefaultParameters::getMMFFStbn();
3744 const MMFFDfsbCollection *mmffDfsb = DefaultParameters::getMMFFDfsb();
3745 unsigned int idx[3] = {idx1, idx2, idx3};
3746 unsigned int atomType[3];
3747 unsigned int bondType[2];
3748 unsigned int angleType;
3749 const MMFFProp *mmffPropParamsCentralAtom =
3750 (*mmffProp)(getMMFFAtomType(idx[1]));
3751 if (!(mmffPropParamsCentralAtom->linh)) {
3752 res = true;
3753 unsigned int i = 0;
3754 for (i = 0; i < 3; ++i) {
3755 atomType[i] = getMMFFAtomType(idx[i]);
3756 }
3757 for (i = 0; res && (i < 2); ++i) {
3758 res = getMMFFBondStretchParams(mol, idx[i], idx[i + 1], bondType[i],
3759 mmffBondStretchParams[i]);
3760 }
3761 if (res) {
3762 res = getMMFFAngleBendParams(mol, idx1, idx2, idx3, angleType,
3763 mmffAngleBendParams);
3764 }
3765 std::pair<bool, const MMFFStbn *> mmffStbnParams;
3766 if (res) {
3767 stretchBendType = getMMFFStretchBendType(
3768 angleType, (atomType[0] <= atomType[2]) ? bondType[0] : bondType[1],
3769 (atomType[0] < atomType[2]) ? bondType[1] : bondType[0]);
3770 mmffStbnParams = mmffStbn->getMMFFStbnParams(
3771 stretchBendType, bondType[0], bondType[1], atomType[0], atomType[1],
3772 atomType[2]);
3773 if (!(mmffStbnParams.second)) {
3774 mmffStbnParams = mmffDfsb->getMMFFDfsbParams(
3775 getPeriodicTableRow(mol.getAtomWithIdx(idx1)->getAtomicNum()),
3776 getPeriodicTableRow(mol.getAtomWithIdx(idx2)->getAtomicNum()),
3777 getPeriodicTableRow(mol.getAtomWithIdx(idx3)->getAtomicNum()));
3778 }
3779 res = (!(isDoubleZero((mmffStbnParams.second)->kbaIJK) &&
3780 isDoubleZero((mmffStbnParams.second)->kbaKJI)));
3781 }
3782 if (res) {
3783 if (mmffStbnParams.first) {
3784 mmffStretchBendParams.kbaIJK = (mmffStbnParams.second)->kbaKJI;
3785 mmffStretchBendParams.kbaKJI = (mmffStbnParams.second)->kbaIJK;
3786 } else {
3787 mmffStretchBendParams = *(mmffStbnParams.second);
3788 }
3789 }
3790 }
3791 }
3792 return res;
3793 }
3794
getMMFFTorsionParams(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,const unsigned int idx4,unsigned int & torsionType,MMFFTor & mmffTorsionParams)3795 bool MMFFMolProperties::getMMFFTorsionParams(
3796 const ROMol &mol, const unsigned int idx1, const unsigned int idx2,
3797 const unsigned int idx3, const unsigned int idx4, unsigned int &torsionType,
3798 MMFFTor &mmffTorsionParams) {
3799 bool res = false;
3800 if (isValid() && mol.getBondBetweenAtoms(idx1, idx2) &&
3801 mol.getBondBetweenAtoms(idx2, idx3) &&
3802 mol.getBondBetweenAtoms(idx3, idx4)) {
3803 unsigned int i;
3804 unsigned int idx[4] = {idx1, idx2, idx3, idx4};
3805 unsigned int atomType[4];
3806 const MMFFTorCollection *mmffTor =
3807 DefaultParameters::getMMFFTor(getMMFFVariant() == "MMFF94s");
3808 for (i = 0; i < 4; ++i) {
3809 atomType[i] = getMMFFAtomType(idx[i]);
3810 }
3811 const std::pair<unsigned int, unsigned int> torTypePair =
3812 getMMFFTorsionType(mol, idx1, idx2, idx3, idx4);
3813 bool areMMFFTorParamsEmpirical = false;
3814 const std::pair<const unsigned int, const MMFFTor *> mmffTorPair =
3815 mmffTor->getMMFFTorParams(DefaultParameters::getMMFFDef(), torTypePair,
3816 atomType[0], atomType[1], atomType[2],
3817 atomType[3]);
3818 torsionType = (mmffTorPair.first ? mmffTorPair.first : torTypePair.first);
3819 const MMFFTor *mmffTorParams = mmffTorPair.second;
3820 if (!mmffTorParams) {
3821 torsionType = torTypePair.first;
3822 mmffTorParams = getMMFFTorsionEmpiricalRuleParams(mol, idx2, idx3);
3823 areMMFFTorParamsEmpirical = true;
3824 }
3825 res =
3826 (!(isDoubleZero(mmffTorParams->V1) && isDoubleZero(mmffTorParams->V2) &&
3827 isDoubleZero(mmffTorParams->V3)));
3828 if (res) {
3829 mmffTorsionParams = *mmffTorParams;
3830 }
3831 if (areMMFFTorParamsEmpirical) {
3832 delete mmffTorParams;
3833 }
3834 }
3835 return res;
3836 }
3837
getMMFFOopBendParams(const ROMol & mol,const unsigned int idx1,const unsigned int idx2,const unsigned int idx3,const unsigned int idx4,MMFFOop & mmffOopBendParams)3838 bool MMFFMolProperties::getMMFFOopBendParams(const ROMol &mol,
3839 const unsigned int idx1,
3840 const unsigned int idx2,
3841 const unsigned int idx3,
3842 const unsigned int idx4,
3843 MMFFOop &mmffOopBendParams) {
3844 bool res = false;
3845 if (isValid() && mol.getBondBetweenAtoms(idx1, idx2) &&
3846 mol.getBondBetweenAtoms(idx2, idx3) &&
3847 mol.getBondBetweenAtoms(idx2, idx4)) {
3848 unsigned int i;
3849 unsigned int idx[4] = {idx1, idx2, idx3, idx4};
3850 unsigned int atomType[4];
3851
3852 const MMFFOopCollection *mmffOop =
3853 DefaultParameters::getMMFFOop(getMMFFVariant() == "MMFF94s");
3854 for (i = 0; i < 4; ++i) {
3855 atomType[i] = getMMFFAtomType(idx[i]);
3856 }
3857 const MMFFOop *mmffOopParams =
3858 (*mmffOop)(DefaultParameters::getMMFFDef(), atomType[0], atomType[1],
3859 atomType[2], atomType[3]);
3860 // if no parameters could be found, we exclude this term (SURDOX02)
3861 if (mmffOopParams) {
3862 mmffOopBendParams = *mmffOopParams;
3863 res = true;
3864 }
3865 }
3866 return res;
3867 }
3868
getMMFFVdWParams(const unsigned int idx1,const unsigned int idx2,MMFFVdWRijstarEps & mmffVdWParams)3869 bool MMFFMolProperties::getMMFFVdWParams(const unsigned int idx1,
3870 const unsigned int idx2,
3871 MMFFVdWRijstarEps &mmffVdWParams) {
3872 bool res = false;
3873 if (isValid()) {
3874 const MMFFVdWCollection *mmffVdW = DefaultParameters::getMMFFVdW();
3875 const unsigned int iAtomType = getMMFFAtomType(idx1);
3876 const unsigned int jAtomType = getMMFFAtomType(idx2);
3877 const MMFFVdW *mmffVdWParamsIAtom = (*mmffVdW)(iAtomType);
3878 const MMFFVdW *mmffVdWParamsJAtom = (*mmffVdW)(jAtomType);
3879 if (mmffVdWParamsIAtom && mmffVdWParamsJAtom) {
3880 mmffVdWParams.R_ij_starUnscaled = MMFF::Utils::calcUnscaledVdWMinimum(
3881 mmffVdW, mmffVdWParamsIAtom, mmffVdWParamsJAtom);
3882 mmffVdWParams.epsilonUnscaled = MMFF::Utils::calcUnscaledVdWWellDepth(
3883 mmffVdWParams.R_ij_starUnscaled, mmffVdWParamsIAtom,
3884 mmffVdWParamsJAtom);
3885 mmffVdWParams.R_ij_star = mmffVdWParams.R_ij_starUnscaled;
3886 mmffVdWParams.epsilon = mmffVdWParams.epsilonUnscaled;
3887 MMFF::Utils::scaleVdWParams(mmffVdWParams.R_ij_star,
3888 mmffVdWParams.epsilon, mmffVdW,
3889 mmffVdWParamsIAtom, mmffVdWParamsJAtom);
3890 res = true;
3891 }
3892 }
3893 return res;
3894 }
3895 } // namespace MMFF
3896 } // namespace RDKit
3897