1 /**********************************************************************
2 mol.cpp - Handle molecules.
3 
4 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5 Some portions Copyright (C) 2001-2008 by Geoffrey R. Hutchison
6 Some portions Copyright (C) 2003 by Michael Banck
7 
8 This file is part of the Open Babel project.
9 For more information, see <http://openbabel.org/>
10 
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation version 2 of the License.
14 
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 GNU General Public License for more details.
19 ***********************************************************************/
20 #include <openbabel/babelconfig.h>
21 
22 #include <openbabel/mol.h>
23 #include <openbabel/bond.h>
24 #include <openbabel/ring.h>
25 #include <openbabel/rotamer.h>
26 #include <openbabel/phmodel.h>
27 #include <openbabel/bondtyper.h>
28 #include <openbabel/obiter.h>
29 #include <openbabel/builder.h>
30 #include <openbabel/kekulize.h>
31 #include <openbabel/internalcoord.h>
32 #include <openbabel/math/matrix3x3.h>
33 #include <openbabel/obfunctions.h>
34 #include <openbabel/elements.h>
35 
36 #include <openbabel/stereo/tetrahedral.h>
37 #include <openbabel/stereo/cistrans.h>
38 
39 #include <sstream>
40 #include <set>
41 
42 using namespace std;
43 
44 namespace OpenBabel
45 {
46 
47   extern bool SwabInt;
48   extern THREAD_LOCAL OBPhModel  phmodel;
49   extern THREAD_LOCAL OBAromaticTyper  aromtyper;
50   extern THREAD_LOCAL OBAtomTyper      atomtyper;
51   extern THREAD_LOCAL OBBondTyper      bondtyper;
52 
53   /** \class OBMol mol.h <openbabel/mol.h>
54       \brief Molecule Class
55 
56       The most important class in Open Babel is OBMol, or the molecule class.
57       The OBMol class is designed to store all the basic information
58       associated with a molecule, to make manipulations on the connection
59       table of a molecule facile, and to provide member functions which
60       automatically perceive information about a molecule. A guided tour
61       of the OBMol class is a good place to start.
62 
63       An OBMol class can be declared:
64       \code
65       OBMol mol;
66       \endcode
67 
68       For example:
69       \code
70       #include <iostream.h>
71 
72       #include <openbabel/mol.h>
73       #include <openbabel/obconversion.h>
74       int main(int argc,char **argv)
75       {
76       OBConversion conv(&cin,&cout);
77       if(conv.SetInAndOutFormats("SDF","MOL2"))
78       {
79       OBMol mol;
80       if(conv.Read(&mol))
81       ...manipulate molecule
82 
83       conv->Write(&mol);
84       }
85       return(1);
86       }
87       \endcode
88 
89       will read in a molecule in SD file format from stdin
90       (or the C++ equivalent cin) and write a MOL2 format file out
91       to standard out. Additionally, The input and output formats can
92       be altered using the OBConversion class
93 
94       Once a molecule has been read into an OBMol (or created via other methods)
95       the atoms and bonds
96       can be accessed by the following methods:
97       \code
98       OBAtom *atom;
99       atom = mol.GetAtom(5); //random access of an atom
100       \endcode
101       or
102       \code
103       OBBond *bond;
104       bond = mol.GetBond(14); //random access of a bond
105       \endcode
106       or
107       \code
108       FOR_ATOMS_OF_MOL(atom, mol) // iterator access (see OBMolAtomIter)
109       \endcode
110       or
111       \code
112       FOR_BONDS_OF_MOL(bond, mol) // iterator access (see OBMolBondIter)
113       \endcode
114       It is important to note that atom arrays currently begin at 1 and bond arrays
115       begin at 0. Requesting atom 0 (\code
116       OBAtom *atom = mol.GetAtom(0); \endcode
117       will result in an error, but
118       \code
119       OBBond *bond = mol.GetBond(0);
120       \endcode
121       is perfectly valid.
122       Note that this is expected to change in the near future to simplify coding
123       and improve efficiency.
124 
125       The ambiguity of numbering issues and off-by-one errors led to the use
126       of iterators in Open Babel. An iterator is essentially just a pointer, but
127       when used in conjunction with Standard Template Library (STL) vectors
128       it provides an unambiguous way to loop over arrays. OBMols store their
129       atom and bond information in STL vectors. Since vectors are template
130       based, a vector of any user defined type can be declared. OBMols declare
131       vector<OBAtom*> and vector<OBBond*> to store atom and bond information.
132       Iterators are then a natural way to loop over the vectors of atoms and bonds.
133 
134       A variety of predefined iterators have been created to simplify
135       common looping requests (e.g., looping over all atoms in a molecule,
136       bonds to a given atom, etc.)
137 
138       \code
139       #include <openbabel/obiter.h>
140       ...
141       #define FOR_ATOMS_OF_MOL(a,m)     for( OBMolAtomIter     a(m); a; ++a )
142       #define FOR_BONDS_OF_MOL(b,m)     for( OBMolBondIter     b(m); b; ++b )
143       #define FOR_NBORS_OF_ATOM(a,p)    for( OBAtomAtomIter    a(p); a; ++a )
144       #define FOR_BONDS_OF_ATOM(b,p)    for( OBAtomBondIter    b(p); b; ++b )
145       #define FOR_RESIDUES_OF_MOL(r,m)  for( OBResidueIter     r(m); r; ++r )
146       #define FOR_ATOMS_OF_RESIDUE(a,r) for( OBResidueAtomIter a(r); a; ++a )
147       ...
148       \endcode
149 
150       These convenience functions can be used like so:
151       \code
152       #include <openbabel/obiter.h>
153       #include <openbabel/mol.h>
154 
155       OBMol mol;
156       double exactMass = 0.0;
157       FOR_ATOMS_OF_MOL(a, mol)
158       {
159       exactMass +=  a->GetExactMass();
160       }
161       \endcode
162 
163       Note that with these convenience macros, the iterator "a" (or
164       whichever name you pick) is declared for you -- you do not need to
165       do it beforehand.
166   */
167 
168   //
169   // OBMol member functions
170   //
SetTitle(const char * title)171   void  OBMol::SetTitle(const char *title)
172   {
173     _title = title;
174     Trim(_title);
175   }
176 
SetTitle(std::string & title)177   void  OBMol::SetTitle(std::string &title)
178   {
179     _title = title;
180     Trim(_title);
181   }
182 
GetTitle(bool replaceNewlines) const183   const char *OBMol::GetTitle(bool replaceNewlines) const
184   {
185     if (!replaceNewlines || _title.find('\n')== string::npos )
186       return(_title.c_str());
187 
188     //Only multiline titles use the following to replace newlines by spaces
189     static string title;
190     title=_title;
191     string::size_type j;
192     for ( ; (j = title.find_first_of( "\n\r" )) != string::npos ; ) {
193       title.replace( j, 1, " ");
194     }
195 
196     return(title.c_str());
197   }
198 
SortVVInt(const vector<int> & a,const vector<int> & b)199   bool SortVVInt(const vector<int> &a,const vector<int> &b)
200   {
201     return(a.size() > b.size());
202   }
203 
SortAtomZ(const pair<OBAtom *,double> & a,const pair<OBAtom *,double> & b)204   bool SortAtomZ(const pair<OBAtom*,double> &a, const pair<OBAtom*,double> &b)
205   {
206     return (a.second < b.second);
207   }
208 
GetAngle(OBAtom * a,OBAtom * b,OBAtom * c)209   double OBMol::GetAngle( OBAtom* a, OBAtom* b, OBAtom* c)
210   {
211     return a->GetAngle( b, c );
212   }
213 
GetTorsion(int a,int b,int c,int d)214   double OBMol::GetTorsion(int a,int b,int c,int d)
215   {
216     return(GetTorsion((OBAtom*)_vatom[a-1],
217                       (OBAtom*)_vatom[b-1],
218                       (OBAtom*)_vatom[c-1],
219                       (OBAtom*)_vatom[d-1]));
220   }
221 
SetTorsion(OBAtom * a,OBAtom * b,OBAtom * c,OBAtom * d,double ang)222   void OBMol::SetTorsion(OBAtom *a,OBAtom *b,OBAtom *c, OBAtom *d, double ang)
223   {
224     vector<int> tor;
225     vector<int> atoms;
226 
227     obErrorLog.ThrowError(__FUNCTION__,
228                           "Ran OpenBabel::SetTorsion", obAuditMsg);
229 
230     tor.push_back(a->GetCoordinateIdx());
231     tor.push_back(b->GetCoordinateIdx());
232     tor.push_back(c->GetCoordinateIdx());
233     tor.push_back(d->GetCoordinateIdx());
234 
235     FindChildren(atoms, b->GetIdx(), c->GetIdx());
236     int j;
237     for (j = 0 ; (unsigned)j < atoms.size() ; j++ )
238       atoms[j] = (atoms[j] - 1) * 3;
239 
240     double v2x,v2y,v2z;
241     double radang,m[9];
242     double x,y,z,mag,rotang,sn,cs,t,tx,ty,tz;
243 
244     //calculate the torsion angle
245     // TODO: fix this calculation for periodic systems
246     radang = CalcTorsionAngle(a->GetVector(),
247                               b->GetVector(),
248                               c->GetVector(),
249                               d->GetVector()) / RAD_TO_DEG;
250     //
251     // now we have the torsion angle (radang) - set up the rot matrix
252     //
253 
254     //find the difference between current and requested
255     rotang = ang - radang;
256 
257     sn = sin(rotang);
258     cs = cos(rotang);
259     t = 1 - cs;
260 
261     v2x = _c[tor[1]]   - _c[tor[2]];
262     v2y = _c[tor[1]+1] - _c[tor[2]+1];
263     v2z = _c[tor[1]+2] - _c[tor[2]+2];
264 
265    //normalize the rotation vector
266     mag = sqrt(SQUARE(v2x)+SQUARE(v2y)+SQUARE(v2z));
267     x = v2x/mag;
268     y = v2y/mag;
269     z = v2z/mag;
270 
271     //set up the rotation matrix
272     m[0]= t*x*x + cs;
273     m[1] = t*x*y + sn*z;
274     m[2] = t*x*z - sn*y;
275     m[3] = t*x*y - sn*z;
276     m[4] = t*y*y + cs;
277     m[5] = t*y*z + sn*x;
278     m[6] = t*x*z + sn*y;
279     m[7] = t*y*z - sn*x;
280     m[8] = t*z*z + cs;
281 
282     //
283     //now the matrix is set - time to rotate the atoms
284     //
285     tx = _c[tor[1]];
286     ty = _c[tor[1]+1];
287     tz = _c[tor[1]+2];
288     vector<int>::iterator i;
289     for (i = atoms.begin(); i != atoms.end(); ++i)
290       {
291         j = *i;
292 
293         _c[j] -= tx;
294         _c[j+1] -= ty;
295         _c[j+2]-= tz;
296         x = _c[j]*m[0] + _c[j+1]*m[1] + _c[j+2]*m[2];
297         y = _c[j]*m[3] + _c[j+1]*m[4] + _c[j+2]*m[5];
298         z = _c[j]*m[6] + _c[j+1]*m[7] + _c[j+2]*m[8];
299         _c[j] = x;
300         _c[j+1] = y;
301         _c[j+2] = z;
302         _c[j] += tx;
303         _c[j+1] += ty;
304         _c[j+2] += tz;
305       }
306   }
307 
308 
GetTorsion(OBAtom * a,OBAtom * b,OBAtom * c,OBAtom * d)309   double OBMol::GetTorsion(OBAtom *a,OBAtom *b,OBAtom *c,OBAtom *d)
310   {
311     if (!IsPeriodic())
312       {
313         return(CalcTorsionAngle(a->GetVector(),
314                                 b->GetVector(),
315                                 c->GetVector(),
316                                 d->GetVector()));
317       }
318     else
319       {
320         vector3 v1, v2, v3, v4;
321         // Wrap the atomic positions in a continuous chain that makes sense based on the unit cell
322         // Start by extracting the absolute Cartesian coordinates
323         v1 = a->GetVector();
324         v2 = b->GetVector();
325         v3 = c->GetVector();
326         v4 = d->GetVector();
327         // Then redefine the positions based on proximity to the previous atom
328         // to build a continuous chain of expanded Cartesian coordinates
329         OBUnitCell *unitCell = (OBUnitCell * ) GetData(OBGenericDataType::UnitCell);
330         v2 = unitCell->UnwrapCartesianNear(v2, v1);
331         v3 = unitCell->UnwrapCartesianNear(v3, v2);
332         v4 = unitCell->UnwrapCartesianNear(v4, v3);
333         return(CalcTorsionAngle(v1, v2, v3, v4));
334       }
335   }
336 
ContigFragList(std::vector<std::vector<int>> & cfl)337   void OBMol::ContigFragList(std::vector<std::vector<int> >&cfl)
338   {
339     int j;
340     OBAtom *atom;
341     OBBond *bond;
342     vector<OBAtom*>::iterator i;
343     vector<OBBond*>::iterator k;
344     OBBitVec used,curr,next,frag;
345     vector<int> tmp;
346 
347     used.Resize(NumAtoms()+1);
348     curr.Resize(NumAtoms()+1);
349     next.Resize(NumAtoms()+1);
350     frag.Resize(NumAtoms()+1);
351 
352     while ((unsigned)used.CountBits() < NumAtoms())
353       {
354         curr.Clear();
355         frag.Clear();
356         for (atom = BeginAtom(i);atom;atom = NextAtom(i))
357           if (!used.BitIsSet(atom->GetIdx()))
358             {
359               curr.SetBitOn(atom->GetIdx());
360               break;
361             }
362 
363         frag |= curr;
364         while (!curr.IsEmpty())
365           {
366             next.Clear();
367             for (j = curr.NextBit(-1);j != curr.EndBit();j = curr.NextBit(j))
368               {
369                 atom = GetAtom(j);
370                 for (bond = atom->BeginBond(k);bond;bond = atom->NextBond(k))
371                   if (!used.BitIsSet(bond->GetNbrAtomIdx(atom)))
372                     next.SetBitOn(bond->GetNbrAtomIdx(atom));
373               }
374 
375             used |= curr;
376             used |= next;
377             frag |= next;
378             curr = next;
379           }
380 
381         tmp.clear();
382         frag.ToVecInt(tmp);
383         cfl.push_back(tmp);
384       }
385 
386     sort(cfl.begin(),cfl.end(),SortVVInt);
387   }
388 
FindAngles()389   void OBMol::FindAngles()
390   {
391     //if already has data return
392     if(HasData(OBGenericDataType::AngleData))
393       return;
394 
395     //get new data and attach it to molecule
396     OBAngleData *angles = new OBAngleData;
397     angles->SetOrigin(perceived);
398     SetData(angles);
399 
400     OBAngle angle;
401     OBAtom *b;
402     int unique_angle;
403 
404     unique_angle = 0;
405 
406     FOR_ATOMS_OF_MOL(atom, this) {
407       if(atom->GetAtomicNum() == OBElements::Hydrogen)
408         continue;
409 
410       b = (OBAtom*) &*atom;
411 
412       FOR_NBORS_OF_ATOM(a, b) {
413         FOR_NBORS_OF_ATOM(c, b) {
414           if(&*a == &*c) {
415             unique_angle = 1;
416             continue;
417           }
418 
419           if (unique_angle) {
420             angle.SetAtoms((OBAtom*)b, (OBAtom*)&*a, (OBAtom*)&*c);
421             angles->SetData(angle);
422             angle.Clear();
423           }
424         }
425         unique_angle = 0;
426       }
427     }
428 
429     return;
430   }
431 
FindTorsions()432   void OBMol::FindTorsions()
433   {
434     //if already has data return
435     if(HasData(OBGenericDataType::TorsionData))
436       return;
437 
438     //get new data and attach it to molecule
439     OBTorsionData *torsions = new OBTorsionData;
440     torsions->SetOrigin(perceived);
441     SetData(torsions);
442 
443     OBTorsion torsion;
444     vector<OBBond*>::iterator bi1,bi2,bi3;
445     OBBond* bond;
446     OBAtom *a,*b,*c,*d;
447 
448     //loop through all bonds generating torsions
449     for(bond = BeginBond(bi1);bond;bond = NextBond(bi1))
450       {
451         b = bond->GetBeginAtom();
452         c = bond->GetEndAtom();
453         if(b->GetAtomicNum() == OBElements::Hydrogen || c->GetAtomicNum() == OBElements::Hydrogen)
454           continue;
455 
456         for(a = b->BeginNbrAtom(bi2);a;a = b->NextNbrAtom(bi2))
457           {
458             if(a == c)
459               continue;
460 
461             for(d = c->BeginNbrAtom(bi3);d;d = c->NextNbrAtom(bi3))
462               {
463                 if ((d == b) || (d == a))
464                   continue;
465                 torsion.AddTorsion(a,b,c,d);
466               }
467           }
468         //add torsion to torsionData
469         if(torsion.GetSize())
470           torsions->SetData(torsion);
471         torsion.Clear();
472       }
473 
474     return;
475   }
476 
FindLargestFragment(OBBitVec & lf)477   void OBMol::FindLargestFragment(OBBitVec &lf)
478   {
479     int j;
480     OBAtom *atom;
481     OBBond *bond;
482     vector<OBAtom*>::iterator i;
483     vector<OBBond*>::iterator k;
484     OBBitVec used,curr,next,frag;
485 
486     lf.Clear();
487     while ((unsigned)used.CountBits() < NumAtoms())
488       {
489         curr.Clear();
490         frag.Clear();
491         for (atom = BeginAtom(i);atom;atom = NextAtom(i))
492           if (!used.BitIsSet(atom->GetIdx()))
493             {
494               curr.SetBitOn(atom->GetIdx());
495               break;
496             }
497 
498         frag |= curr;
499         while (!curr.IsEmpty())
500           {
501             next.Clear();
502             for (j = curr.NextBit(-1);j != curr.EndBit();j = curr.NextBit(j))
503               {
504                 atom = GetAtom(j);
505                 for (bond = atom->BeginBond(k);bond;bond = atom->NextBond(k))
506                   if (!used.BitIsSet(bond->GetNbrAtomIdx(atom)))
507                     next.SetBitOn(bond->GetNbrAtomIdx(atom));
508               }
509 
510             used |= curr;
511             used |= next;
512             frag |= next;
513             curr = next;
514           }
515 
516         if (lf.IsEmpty() || lf.CountBits() < frag.CountBits())
517           lf = frag;
518       }
519   }
520 
521   //! locates all atoms for which there exists a path to 'end'
522   //! without going through 'bgn'
523   //! children must not include 'end'
FindChildren(vector<OBAtom * > & children,OBAtom * bgn,OBAtom * end)524   void OBMol::FindChildren(vector<OBAtom*> &children,OBAtom *bgn,OBAtom *end)
525   {
526     OBBitVec used,curr,next;
527 
528     used |= bgn->GetIdx();
529     used |= end->GetIdx();
530     curr |= end->GetIdx();
531     children.clear();
532 
533     int i;
534     OBAtom *atom,*nbr;
535     vector<OBBond*>::iterator j;
536 
537     for (;;)
538       {
539         next.Clear();
540         for (i = curr.NextBit(-1);i != curr.EndBit();i = curr.NextBit(i))
541           {
542             atom = GetAtom(i);
543             for (nbr = atom->BeginNbrAtom(j);nbr;nbr = atom->NextNbrAtom(j))
544               if (!used[nbr->GetIdx()])
545                 {
546                   children.push_back(nbr);
547                   next |= nbr->GetIdx();
548                   used |= nbr->GetIdx();
549                 }
550           }
551         if (next.IsEmpty())
552           break;
553         curr = next;
554       }
555   }
556 
557   //! locates all atoms for which there exists a path to 'second'
558   //! without going through 'first'
559   //! children must not include 'second'
FindChildren(vector<int> & children,int first,int second)560   void OBMol::FindChildren(vector<int> &children,int first,int second)
561   {
562     int i;
563     OBBitVec used,curr,next;
564 
565     used.SetBitOn(first);
566     used.SetBitOn(second);
567     curr.SetBitOn(second);
568 
569     OBAtom *atom;
570     while (!curr.IsEmpty())
571       {
572         next.Clear();
573         for (i = curr.NextBit(-1);i != curr.EndBit();i = curr.NextBit(i))
574           {
575             atom = GetAtom(i);
576             FOR_BONDS_OF_ATOM (bond, atom)
577               if (!used.BitIsSet(bond->GetNbrAtomIdx(atom)))
578                 next.SetBitOn(bond->GetNbrAtomIdx(atom));
579           }
580 
581         used |= next;
582         curr = next;
583       }
584 
585     used.SetBitOff(first);
586     used.SetBitOff(second);
587     used.ToVecInt(children);
588   }
589 
590   /*! \brief Calculates the graph theoretical distance (GTD) of each atom.
591    *
592    * Creates a vector (indexed from zero) containing, for each atom
593    * in the molecule, the number of bonds plus one to the most
594    * distant non-H atom.
595    *
596    * For example, for the molecule H3CC(=O)Cl the GTD value for C1
597    * would be 3, as the most distant non-H atom (either Cl or O) is
598    * 2 bonds away.
599    *
600    * Since the GTD measures the distance to non-H atoms, the GTD values
601    * for terminal H atoms tend to be larger than for non-H terminal atoms.
602    * In the example above, the GTD values for the H atoms are all 4.
603    */
GetGTDVector(vector<int> & gtd)604   bool OBMol::GetGTDVector(vector<int> &gtd)
605   //calculates the graph theoretical distance for every atom
606   //and puts it into gtd
607   {
608     gtd.clear();
609     gtd.resize(NumAtoms());
610 
611     int gtdcount,natom;
612     OBBitVec used,curr,next;
613     OBAtom *atom,*atom1;
614     OBBond *bond;
615     vector<OBAtom*>::iterator i;
616     vector<OBBond*>::iterator j;
617 
618     next.Clear();
619 
620     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
621       {
622         gtdcount = 0;
623         used.Clear();
624         curr.Clear();
625         used.SetBitOn(atom->GetIdx());
626         curr.SetBitOn(atom->GetIdx());
627 
628         while (!curr.IsEmpty())
629           {
630             next.Clear();
631             for (natom = curr.NextBit(-1);natom != curr.EndBit();natom = curr.NextBit(natom))
632               {
633                 atom1 = GetAtom(natom);
634                 for (bond = atom1->BeginBond(j);bond;bond = atom1->NextBond(j))
635                   if (!used.BitIsSet(bond->GetNbrAtomIdx(atom1)) && !curr.BitIsSet(bond->GetNbrAtomIdx(atom1)))
636                     if (bond->GetNbrAtom(atom1)->GetAtomicNum() != OBElements::Hydrogen)
637                       next.SetBitOn(bond->GetNbrAtomIdx(atom1));
638               }
639 
640             used |= next;
641             curr = next;
642             gtdcount++;
643           }
644         gtd[atom->GetIdx()-1] = gtdcount;
645       }
646     return(true);
647   }
648 
649   /*!
650   **\brief Calculates a set of graph invariant indexes using
651   ** the graph theoretical distance, number of connected heavy atoms,
652   ** aromatic boolean, ring boolean, atomic number, and
653   ** summation of bond orders connected to the atom.
654   ** Vector is indexed from zero
655   */
GetGIVector(vector<unsigned int> & vid)656   void OBMol::GetGIVector(vector<unsigned int> &vid)
657   {
658     vid.clear();
659     vid.resize(NumAtoms()+1);
660 
661     vector<int> v;
662     GetGTDVector(v);
663 
664     int i;
665     OBAtom *atom;
666     vector<OBAtom*>::iterator j;
667     for (i=0,atom = BeginAtom(j);atom;atom = NextAtom(j),++i)
668       {
669         vid[i] =  (unsigned int)v[i];
670         vid[i] += (unsigned int)(atom->GetHvyDegree()*100);
671         vid[i] += (unsigned int)(((atom->IsAromatic()) ? 1 : 0)*1000);
672         vid[i] += (unsigned int)(((atom->IsInRing()) ? 1 : 0)*10000);
673         vid[i] += (unsigned int)(atom->GetAtomicNum()*100000);
674         vid[i] += (unsigned int)(atom->GetImplicitHCount()*10000000);
675       }
676   }
677 
OBComparePairSecond(const pair<OBAtom *,unsigned int> & a,const pair<OBAtom *,unsigned int> & b)678   static bool OBComparePairSecond(const pair<OBAtom*,unsigned int> &a,const pair<OBAtom*,unsigned int> &b)
679   {
680     return(a.second < b.second);
681   }
682 
OBComparePairFirst(const pair<OBAtom *,unsigned int> & a,const pair<OBAtom *,unsigned int> & b)683   static bool OBComparePairFirst(const pair<OBAtom*,unsigned int> &a,const pair<OBAtom*,unsigned int> &b)
684   {
685     return(a.first->GetIdx() < b.first->GetIdx());
686   }
687 
688   //! counts the number of unique symmetry classes in a list
ClassCount(vector<pair<OBAtom *,unsigned int>> & vp,unsigned int & count)689   static void ClassCount(vector<pair<OBAtom*,unsigned int> > &vp,unsigned int &count)
690   {
691     count = 0;
692     vector<pair<OBAtom*,unsigned int> >::iterator k;
693     sort(vp.begin(),vp.end(),OBComparePairSecond);
694 #if 0 // original version
695 
696     unsigned int id=0; // [ejk] appease gcc's bogus "might be undef'd" warning
697     for (k = vp.begin();k != vp.end();++k)
698       {
699         if (k == vp.begin())
700           {
701             id = k->second;
702             k->second = count = 0;
703           }
704         else
705           if (k->second != id)
706             {
707               id = k->second;
708               k->second = ++count;
709             }
710           else
711             k->second = count;
712       }
713     count++;
714 #else // get rid of warning, moves test out of loop, returns 0 for empty input
715 
716     k = vp.begin();
717     if (k != vp.end())
718       {
719         unsigned int id = k->second;
720         k->second = 0;
721         ++k;
722         for (;k != vp.end(); ++k)
723           {
724             if (k->second != id)
725               {
726                 id = k->second;
727                 k->second = ++count;
728               }
729             else
730               k->second = count;
731           }
732         ++count;
733       }
734     else
735       {
736         // [ejk] thinks count=0 might be OK for an empty list, but orig code did
737         //++count;
738       }
739 #endif
740   }
741 
742   //! creates a new vector of symmetry classes base on an existing vector
743   //! helper routine to GetGIDVector
CreateNewClassVector(vector<pair<OBAtom *,unsigned int>> & vp1,vector<pair<OBAtom *,unsigned int>> & vp2)744   static void CreateNewClassVector(vector<pair<OBAtom*,unsigned int> > &vp1,vector<pair<OBAtom*,unsigned int> > &vp2)
745   {
746     int m,id;
747     OBAtom *nbr;
748     vector<OBBond*>::iterator j;
749     vector<unsigned int>::iterator k;
750     vector<pair<OBAtom*,unsigned int> >::iterator i;
751     sort(vp1.begin(),vp1.end(),OBComparePairFirst);
752     vp2.clear();
753     for (i = vp1.begin();i != vp1.end();++i)
754       {
755         vector<unsigned int> vtmp;
756         for (nbr = i->first->BeginNbrAtom(j);nbr;nbr = i->first->NextNbrAtom(j))
757           vtmp.push_back(vp1[nbr->GetIdx()-1].second);
758         sort(vtmp.begin(),vtmp.end(),OBCompareUnsigned);
759         for (id=i->second,m=100,k = vtmp.begin();k != vtmp.end();++k,m*=100)
760           id += *k * m;
761 
762         vp2.push_back(pair<OBAtom*,unsigned int> (i->first,id));
763       }
764   }
765 
766   /*!
767   **\brief Calculates a set of symmetry identifiers for a molecule.
768   ** Atoms with the same symmetry ID are symmetrically equivalent.
769   ** Vector is indexed from zero
770   */
GetGIDVector(vector<unsigned int> & vgid)771   void OBMol::GetGIDVector(vector<unsigned int> &vgid)
772   {
773     vector<unsigned int> vgi;
774     GetGIVector(vgi);  //get vector of graph invariants
775 
776     int i;
777     OBAtom *atom;
778     vector<OBAtom*>::iterator j;
779     vector<pair<OBAtom*,unsigned int> > vp1,vp2;
780     for (i=0,atom = BeginAtom(j);atom;atom = NextAtom(j),++i)
781       vp1.push_back(pair<OBAtom*,unsigned int> (atom,vgi[i]));
782 
783     unsigned int nclass1,nclass2; //number of classes
784     ClassCount(vp1,nclass1);
785 
786     if (nclass1 < NumAtoms())
787       {
788         for (i = 0;i < 100;++i) //sanity check - shouldn't ever hit this number
789           {
790             CreateNewClassVector(vp1,vp2);
791             ClassCount(vp2,nclass2);
792             vp1 = vp2;
793             if (nclass1 == nclass2)
794               break;
795             nclass1 = nclass2;
796           }
797       }
798 
799     vgid.clear();
800     sort(vp1.begin(),vp1.end(),OBComparePairFirst);
801     vector<pair<OBAtom*,unsigned int> >::iterator k;
802     for (k = vp1.begin();k != vp1.end();++k)
803       vgid.push_back(k->second);
804   }
805 
NumHvyAtoms()806   unsigned int OBMol::NumHvyAtoms()
807   {
808     OBAtom *atom;
809     vector<OBAtom*>::iterator(i);
810     unsigned int count = 0;
811 
812     for(atom = this->BeginAtom(i);atom;atom = this->NextAtom(i))
813       {
814         if (atom->GetAtomicNum() != OBElements::Hydrogen)
815           count++;
816       }
817 
818     return(count);
819   }
820 
NumRotors(bool sampleRingBonds)821   unsigned int OBMol::NumRotors(bool sampleRingBonds)
822   {
823     OBRotorList rl;
824     rl.FindRotors(*this, sampleRingBonds);
825     return rl.Size();
826   }
827 
828   //! Returns a pointer to the atom after a safety check
829   //! 0 < idx <= NumAtoms
GetAtom(int idx) const830   OBAtom *OBMol::GetAtom(int idx) const
831   {
832     if ((unsigned)idx < 1 || (unsigned)idx > NumAtoms())
833       {
834         obErrorLog.ThrowError(__FUNCTION__, "Requested Atom Out of Range", obDebug);
835         return nullptr;
836       }
837 
838     return((OBAtom*)_vatom[idx-1]);
839   }
840 
GetAtomById(unsigned long id) const841   OBAtom *OBMol::GetAtomById(unsigned long id) const
842   {
843     if (id >= _atomIds.size()) {
844       obErrorLog.ThrowError(__FUNCTION__, "Requested atom with invalid id.", obDebug);
845       return nullptr;
846     }
847 
848     return((OBAtom*)_atomIds[id]);
849   }
850 
GetFirstAtom() const851   OBAtom *OBMol::GetFirstAtom() const
852   {
853     return _vatom.empty() ? nullptr : (OBAtom*)_vatom[0];
854   }
855 
856   //! Returns a pointer to the bond after a safety check
857   //! 0 <= idx < NumBonds
GetBond(int idx) const858   OBBond *OBMol::GetBond(int idx) const
859   {
860     if (idx < 0 || (unsigned)idx >= NumBonds())
861       {
862         obErrorLog.ThrowError(__FUNCTION__, "Requested Bond Out of Range", obDebug);
863         return nullptr;
864       }
865 
866     return((OBBond*)_vbond[idx]);
867   }
868 
GetBondById(unsigned long id) const869   OBBond *OBMol::GetBondById(unsigned long id) const
870   {
871     if (id >= _bondIds.size()) {
872       obErrorLog.ThrowError(__FUNCTION__, "Requested bond with invalid id.", obDebug);
873       return nullptr;
874     }
875 
876     return((OBBond*)_bondIds[id]);
877   }
878 
GetBond(int bgn,int end) const879   OBBond *OBMol::GetBond(int bgn, int end) const
880   {
881     return(GetBond(GetAtom(bgn),GetAtom(end)));
882   }
883 
GetBond(OBAtom * bgn,OBAtom * end) const884   OBBond *OBMol::GetBond(OBAtom *bgn,OBAtom *end) const
885   {
886     OBAtom *nbr;
887     vector<OBBond*>::iterator i;
888 
889     if (!bgn || !end) return nullptr;
890 
891     for (nbr = bgn->BeginNbrAtom(i);nbr;nbr = bgn->NextNbrAtom(i))
892       if (nbr == end)
893         return((OBBond *)*i);
894 
895     return nullptr; //just to keep the SGI compiler happy
896   }
897 
GetResidue(int idx) const898   OBResidue *OBMol::GetResidue(int idx) const
899   {
900     if (idx < 0 || (unsigned)idx >= NumResidues())
901       {
902         obErrorLog.ThrowError(__FUNCTION__, "Requested Residue Out of Range", obDebug);
903         return nullptr;
904       }
905 
906     return (_residue[idx]);
907   }
908 
GetInternalCoord()909   std::vector<OBInternalCoord*> OBMol::GetInternalCoord()
910   {
911     if (_internals.empty())
912       {
913         _internals.push_back(nullptr);
914         for(unsigned int i = 1; i <= NumAtoms(); ++i)
915           {
916             _internals.push_back(new OBInternalCoord);
917           }
918         CartesianToInternal(_internals, *this);
919       }
920     return _internals;
921   }
922 
923   //! Implements <a href="http://qsar.sourceforge.net/dicts/blue-obelisk/index.xhtml#findSmallestSetOfSmallestRings">blue-obelisk:findSmallestSetOfSmallestRings</a>.
GetSSSR()924   vector<OBRing*> &OBMol::GetSSSR()
925   {
926     if (!HasSSSRPerceived())
927       FindSSSR();
928 
929     OBRingData *rd = nullptr;
930     if (!HasData("SSSR")) {
931       rd = new OBRingData();
932       rd->SetAttribute("SSSR");
933       SetData(rd);
934     }
935 
936     rd = (OBRingData *) GetData("SSSR");
937     rd->SetOrigin(perceived);
938     return(rd->GetData());
939   }
940 
GetLSSR()941   vector<OBRing*> &OBMol::GetLSSR()
942   {
943     if (!HasLSSRPerceived())
944       FindLSSR();
945 
946     OBRingData *rd = nullptr;
947     if (!HasData("LSSR")) {
948       rd = new OBRingData();
949       rd->SetAttribute("LSSR");
950       SetData(rd);
951     }
952 
953     rd = (OBRingData *) GetData("LSSR");
954     rd->SetOrigin(perceived);
955     return(rd->GetData());
956   }
957 
GetMolWt(bool implicitH)958   double OBMol::GetMolWt(bool implicitH)
959   {
960     double molwt=0.0;
961     OBAtom *atom;
962     vector<OBAtom*>::iterator i;
963 
964     double hmass = OBElements::GetMass(1);
965     for (atom = BeginAtom(i);atom;atom = NextAtom(i)) {
966       molwt += atom->GetAtomicMass();
967       if (implicitH)
968         molwt += atom->GetImplicitHCount() * hmass;
969     }
970     return(molwt);
971   }
972 
GetExactMass(bool implicitH)973   double OBMol::GetExactMass(bool implicitH)
974   {
975     double mass=0.0;
976     OBAtom *atom;
977     vector<OBAtom*>::iterator i;
978 
979     double hmass = OBElements::GetExactMass(1, 1);
980     for (atom = BeginAtom(i); atom; atom = NextAtom(i)) {
981       mass += atom->GetExactMass();
982       if (implicitH)
983         mass += atom->GetImplicitHCount() * hmass;
984     }
985 
986     return(mass);
987   }
988 
989   //! Stochoimetric formula in spaced format e.g. C 4 H 6 O 1
990   //! No pair data is stored. Normally use without parameters: GetSpacedFormula()
991   //! \since version 2.1
GetSpacedFormula(int ones,const char * sp,bool implicitH)992   string OBMol::GetSpacedFormula(int ones, const char* sp, bool implicitH)
993   {
994     //Default ones=0, sp=" ".
995     //Using ones=1 and sp="" will give unspaced formula (and no pair data entry)
996     // These are the atomic numbers of the elements in alphabetical order, plus
997     // pseudo atomic numbers for D, T isotopes.
998     const int NumElements = 118 + 2;
999     const int alphabetical[NumElements] = {
1000       89, 47, 13, 95, 18, 33, 85, 79, 5, 56, 4, 107, 83, 97, 35, 6, 20, 48,
1001       58, 98, 17, 96, 112, 27, 24, 55, 29, NumElements-1,
1002       105, 110, 66, 68, 99, 63, 9, 26, 114, 100, 87, 31,
1003       64, 32, 1, 2, 72, 80, 67, 108, 53, 49, 77, 19, 36, 57, 3, 103, 71, 116, 115, 101,
1004       12, 25, 42, 109, 7, 11, 41, 60, 10, 113, 28, 102, 93, 8, 118, 76, 15, 91, 82, 46,
1005       61, 84, 59, 78, 94, 88, 37, 75, 104, 111, 45, 86, 44, 16, 51, 21, 34, 106, 14,
1006       62, 50, 38, NumElements, 73, 65, 43, 52, 90, 22, 81, 69, 117, 92, 23, 74, 54, 39, 70,
1007       30, 40 };
1008 
1009     int atomicCount[NumElements];
1010     stringstream formula;
1011 
1012     for (int i = 0; i < NumElements; ++i)
1013       atomicCount[i] = 0;
1014 
1015     bool UseImplicitH = (NumBonds()!=0 || NumAtoms()==1);
1016     // Do not use implicit hydrogens if explicitly required not to
1017     if (!implicitH) UseImplicitH = false;
1018     bool HasHvyAtoms = NumHvyAtoms()>0;
1019     FOR_ATOMS_OF_MOL(a, *this)
1020       {
1021         int anum = a->GetAtomicNum();
1022         if(anum==0)
1023           continue;
1024         if(anum > (NumElements-2)) {
1025           char buffer[BUFF_SIZE];  // error buffer
1026           snprintf(buffer, BUFF_SIZE, "Skipping unknown element with atomic number %d", anum);
1027           obErrorLog.ThrowError(__FUNCTION__, buffer, obWarning);
1028           continue;
1029         }
1030         bool IsHiso = anum == 1 && a->GetIsotope()>=2;
1031         if(UseImplicitH)
1032           {
1033             if (anum == 1 && !IsHiso && HasHvyAtoms)
1034               continue; // skip explicit hydrogens except D,T
1035             if(anum==1)
1036               {
1037                 if (IsHiso && HasHvyAtoms)
1038                   --atomicCount[0]; //one of the implicit hydrogens is now explicit
1039               }
1040             else
1041               atomicCount[0] += a->GetImplicitHCount() + a->ExplicitHydrogenCount();
1042           }
1043         if (IsHiso)
1044           anum = NumElements + a->GetIsotope() - 3; //pseudo AtNo for D, T
1045         atomicCount[anum - 1]++;
1046       }
1047 
1048     if (atomicCount[5] != 0) // Carbon (i.e. 6 - 1 = 5)
1049       {
1050         if (atomicCount[5] > ones)
1051           formula << "C" << sp << atomicCount[5] << sp;
1052         else if (atomicCount[5] == 1)
1053           formula << "C";
1054 
1055         atomicCount[5] = 0; // So we don't output C twice
1056 
1057         // only output H if there's also carbon -- otherwise do it alphabetical
1058         if (atomicCount[0] != 0) // Hydrogen (i.e., 1 - 1 = 0)
1059           {
1060             if (atomicCount[0] > ones)
1061               formula << "H" << sp << atomicCount[0] << sp;
1062             else if (atomicCount[0] == 1)
1063               formula << "H";
1064 
1065             atomicCount[0] = 0;
1066           }
1067       }
1068 
1069     for (int j = 0; j < NumElements; ++j)
1070       {
1071         char DT[4] = {'D',0,'T',0};
1072         const char* symb;
1073         int alph = alphabetical[j]-1;
1074         if (atomicCount[ alph ])
1075           {
1076             if(alph==NumElements-1)
1077               symb = DT + 2;//T
1078             else if (alph==NumElements-2)
1079               symb = DT; //D
1080             else
1081               symb = OBElements::GetSymbol(alphabetical[j]);
1082 
1083             formula << symb << sp;
1084             if(atomicCount[alph] > ones)
1085               formula << atomicCount[alph] << sp;
1086           }
1087       }
1088 
1089     int chg = GetTotalCharge();
1090     char ch = chg>0 ? '+' : '-' ;
1091     chg = abs(chg);
1092     while(chg--)
1093       formula << ch << sp;
1094 
1095     string f_str = formula.str();
1096     return (Trim(f_str));
1097   }
1098 
1099   //! Stochoimetric formula (e.g., C4H6O).
1100   //!   This is either set by OBMol::SetFormula() or generated on-the-fly
1101   //!   using the "Hill order" -- i.e., C first if present, then H if present
1102   //!   all other elements in alphabetical order.
GetFormula()1103   string OBMol::GetFormula()
1104   {
1105     string attr = "Formula";
1106     OBPairData *dp = (OBPairData *) GetData(attr);
1107 
1108     if (dp != nullptr) // we already set the formula (or it was read from a file)
1109       return dp->GetValue();
1110 
1111     obErrorLog.ThrowError(__FUNCTION__,
1112                           "Ran OpenBabel::SetFormula -- Hill order formula",
1113                           obAuditMsg);
1114 
1115     string sformula = GetSpacedFormula(1, "");
1116 
1117     dp = new OBPairData;
1118     dp->SetAttribute(attr);
1119     dp->SetValue( sformula );
1120     dp->SetOrigin( perceived ); // internal generation
1121     SetData(dp);
1122     return sformula;
1123   }
1124 
SetFormula(string molFormula)1125   void OBMol::SetFormula(string molFormula)
1126   {
1127     string attr = "Formula";
1128     OBPairData *dp = (OBPairData *) GetData(attr);
1129     if (dp == nullptr)
1130       {
1131         dp = new OBPairData;
1132         dp->SetAttribute(attr);
1133         SetData(dp);
1134       }
1135     dp->SetValue(molFormula);
1136     // typically file input, but this needs to be revisited
1137     dp->SetOrigin(fileformatInput);
1138   }
1139 
SetTotalCharge(int charge)1140   void OBMol::SetTotalCharge(int charge)
1141   {
1142     SetFlag(OB_TCHARGE_MOL);
1143     _totalCharge = charge;
1144   }
1145 
1146   //! Returns the total molecular charge -- if it has not previously been set
1147   //!  it is calculated from the atomic formal charge information.
1148   //!  (This may or may not be correct!)
1149   //!  If you set atomic charges with OBAtom::SetFormalCharge()
1150   //!   you really should set the molecular charge with OBMol::SetTotalCharge()
GetTotalCharge()1151   int OBMol::GetTotalCharge()
1152   {
1153     if(HasFlag(OB_TCHARGE_MOL))
1154       return(_totalCharge);
1155     else // calculate from atomic formal charges (seems the best default)
1156       {
1157         obErrorLog.ThrowError(__FUNCTION__,
1158                               "Ran OpenBabel::GetTotalCharge -- calculated from formal charges",
1159                               obAuditMsg);
1160 
1161         OBAtom *atom;
1162         vector<OBAtom*>::iterator i;
1163         int chg = 0;
1164 
1165         for (atom = BeginAtom(i);atom;atom = NextAtom(i))
1166           chg += atom->GetFormalCharge();
1167         return (chg);
1168       }
1169   }
1170 
SetTotalSpinMultiplicity(unsigned int spin)1171   void   OBMol::SetTotalSpinMultiplicity(unsigned int spin)
1172   {
1173     SetFlag(OB_TSPIN_MOL);
1174     _totalSpin = spin;
1175   }
1176 
SetInternalCoord(std::vector<OBInternalCoord * > int_coord)1177   void OBMol::SetInternalCoord(std::vector<OBInternalCoord*> int_coord) {
1178     if (int_coord[0] != nullptr) {
1179       std::vector<OBInternalCoord*>::iterator it = int_coord.begin();
1180       int_coord.insert(it, nullptr);
1181     }
1182 
1183     if (int_coord.size() != _natoms + 1) {
1184       string error = "Number of internal coordinates is not the same as";
1185       error += " the number of atoms in molecule";
1186       obErrorLog.ThrowError(__FUNCTION__, error, obError);
1187       return;
1188     }
1189 
1190     _internals = int_coord;
1191 
1192     return;
1193   }
1194 
1195   //! Returns the total spin multiplicity -- if it has not previously been set
1196   //!  It is calculated from the atomic spin multiplicity information
1197   //!  assuming the high-spin case (i.e. it simply sums the number of unpaired
1198   //!  electrons assuming no further pairing of spins.
1199   //!  if it fails (gives singlet for odd number of electronic systems),
1200   //!  then assign wrt parity of the total electrons.
GetTotalSpinMultiplicity()1201   unsigned int OBMol::GetTotalSpinMultiplicity()
1202   {
1203     if (HasFlag(OB_TSPIN_MOL))
1204       return(_totalSpin);
1205     else // calculate from atomic spin information (assuming high-spin case)
1206       {
1207         obErrorLog.ThrowError(__FUNCTION__,
1208                               "Ran OpenBabel::GetTotalSpinMultiplicity -- calculating from atomic spins assuming high spin case",
1209                               obAuditMsg);
1210 
1211         OBAtom *atom;
1212         vector<OBAtom*>::iterator i;
1213         unsigned int unpaired_electrons = 0;
1214         int chg = GetTotalCharge();
1215         for (atom = BeginAtom(i);atom;atom = NextAtom(i))
1216           {
1217             if (atom->GetSpinMultiplicity() > 1)
1218               unpaired_electrons += (atom->GetSpinMultiplicity() - 1);
1219            chg += atom->GetAtomicNum();
1220           }
1221         if (chg % 2 != unpaired_electrons %2)
1222           return ((abs(chg) % 2) + 1);
1223         else
1224           return (unpaired_electrons + 1);
1225       }
1226   }
1227 
operator =(const OBMol & source)1228   OBMol &OBMol::operator=(const OBMol &source)
1229   //atom and bond info is copied from src to dest
1230   //Conformers are now copied also, MM 2/7/01
1231   //Residue information are copied, MM 4-27-01
1232   //All OBGenericData incl OBRotameterList is copied, CM 2006
1233   //Zeros all flags except OB_TCHARGE_MOL, OB_PCHARGE_MOL, OB_HYBRID_MOL
1234   //OB_TSPIN_MOL, OB_AROMATIC_MOL, OB_PERIODIC_MOL, OB_CHAINS_MOL and OB_PATTERN_STRUCTURE which are copied
1235   {
1236     if (this == &source)
1237       return *this;
1238 
1239     OBMol &src = (OBMol &)source;
1240     vector<OBAtom*>::iterator i;
1241     vector<OBBond*>::iterator j;
1242     OBAtom *atom;
1243     OBBond *bond;
1244 
1245     Clear();
1246     BeginModify();
1247 
1248     _vatom.reserve(src.NumAtoms());
1249     _atomIds.reserve(src.NumAtoms());
1250     _vbond.reserve(src.NumBonds());
1251     _bondIds.reserve(src.NumBonds());
1252 
1253     for (atom = src.BeginAtom(i);atom;atom = src.NextAtom(i))
1254       AddAtom(*atom);
1255     for (bond = src.BeginBond(j);bond;bond = src.NextBond(j))
1256       AddBond(*bond);
1257 
1258     this->_title  = src.GetTitle();
1259     this->_energy = src.GetEnergy();
1260     this->_dimension = src.GetDimension();
1261     this->SetTotalCharge(src.GetTotalCharge()); //also sets a flag
1262     this->SetTotalSpinMultiplicity(src.GetTotalSpinMultiplicity()); //also sets a flag
1263 
1264     EndModify(); //zeros flags!
1265 
1266     if (src.HasFlag(OB_PATTERN_STRUCTURE))
1267       this->SetFlag(OB_PATTERN_STRUCTURE);
1268     if (src.HasFlag(OB_TSPIN_MOL))
1269       this->SetFlag(OB_TSPIN_MOL);
1270     if (src.HasFlag(OB_TCHARGE_MOL))
1271       this->SetFlag(OB_TCHARGE_MOL);
1272     if (src.HasFlag(OB_PCHARGE_MOL))
1273       this->SetFlag(OB_PCHARGE_MOL);
1274     if (src.HasFlag(OB_PERIODIC_MOL))
1275       this->SetFlag(OB_PERIODIC_MOL);
1276     if (src.HasFlag(OB_HYBRID_MOL))
1277       this->SetFlag(OB_HYBRID_MOL);
1278     if (src.HasFlag(OB_AROMATIC_MOL))
1279       this->SetFlag(OB_AROMATIC_MOL);
1280     if (src.HasFlag(OB_CHAINS_MOL))
1281       this->SetFlag(OB_CHAINS_MOL);
1282     //this->_flags = src.GetFlags(); //Copy all flags. Perhaps too drastic a change
1283 
1284 
1285     //Copy Residue information
1286     unsigned int NumRes = src.NumResidues();
1287     if (NumRes)
1288       {
1289         unsigned int k;
1290         OBResidue *src_res = nullptr;
1291         OBResidue *res = nullptr;
1292         OBAtom *src_atom = nullptr;
1293         OBAtom *atom = nullptr;
1294         vector<OBAtom*>::iterator ii;
1295         for (k=0 ; k<NumRes ; ++k)
1296           {
1297             res = NewResidue();
1298             src_res = src.GetResidue(k);
1299             *res = *src_res; //does not copy atoms
1300             for (src_atom=src_res->BeginAtom(ii) ; src_atom ; src_atom=src_res->NextAtom(ii))
1301               {
1302                 atom = GetAtom(src_atom->GetIdx());
1303                 res->AddAtom(atom);
1304                 res->SetAtomID(atom,src_res->GetAtomID(src_atom));
1305                 res->SetHetAtom(atom,src_res->IsHetAtom(src_atom));
1306                 res->SetSerialNum(atom,src_res->GetSerialNum(src_atom));
1307               }
1308           }
1309       }
1310 
1311     //Copy conformer information
1312     if (src.NumConformers() > 1) {
1313       int k;//,l;
1314       vector<double*> conf;
1315       int currConf = -1;
1316       double* xyz = nullptr;
1317       for (k=0 ; k<src.NumConformers() ; ++k) {
1318         xyz = new double [3*src.NumAtoms()];
1319         memcpy( xyz, src.GetConformer(k), sizeof( double )*3*src.NumAtoms() );
1320         conf.push_back(xyz);
1321 
1322         if( src.GetConformer(k) == src._c ) {
1323           currConf = k;
1324         }
1325       }
1326 
1327       SetConformers(conf);
1328       if( currConf >= 0 && _vconf.size() ) {
1329         _c = _vconf[currConf];
1330       }
1331     }
1332 
1333     //Copy all the OBGenericData, providing the new molecule, this,
1334     //for those classes like OBRotameterList which contain Atom pointers
1335     //OBGenericData classes can choose not to be cloned by returning NULL
1336     vector<OBGenericData*>::iterator itr;
1337     for(itr=src.BeginData();itr!=src.EndData();++itr)
1338       {
1339         OBGenericData* pCopiedData = (*itr)->Clone(this);
1340         SetData(pCopiedData);
1341       }
1342 
1343     if (src.HasChiralityPerceived())
1344       SetChiralityPerceived();
1345 
1346     return(*this);
1347   }
1348 
operator +=(const OBMol & source)1349   OBMol &OBMol::operator+=(const OBMol &source)
1350   {
1351     OBMol &src = (OBMol &)source;
1352     vector<OBAtom*>::iterator i;
1353     vector<OBBond*>::iterator j;
1354     vector<OBResidue*>::iterator k;
1355     OBAtom *atom;
1356     OBBond *bond;
1357     OBResidue *residue;
1358 
1359     BeginModify();
1360 
1361     int prevatms = NumAtoms();
1362 
1363     string extitle(src.GetTitle());
1364     if(!extitle.empty())
1365       _title += "_" + extitle;
1366 
1367     // First, handle atoms and bonds
1368     map<unsigned long int, unsigned long int> correspondingId;
1369     for (atom = src.BeginAtom(i) ; atom ; atom = src.NextAtom(i)) {
1370       AddAtom(*atom, true); // forceNewId=true (don't reuse the original Id)
1371       OBAtom *addedAtom = GetAtom(NumAtoms());
1372       correspondingId[atom->GetId()] = addedAtom->GetId();
1373     }
1374     correspondingId[OBStereo::ImplicitRef] = OBStereo::ImplicitRef;
1375 
1376     for (bond = src.BeginBond(j) ; bond ; bond = src.NextBond(j)) {
1377       bond->SetId(NoId);//Need to remove ID which relates to source mol rather than this mol
1378       AddBond(bond->GetBeginAtomIdx() + prevatms,
1379               bond->GetEndAtomIdx() + prevatms,
1380               bond->GetBondOrder(), bond->GetFlags());
1381     }
1382 
1383     // Now update all copied residues too
1384     for (residue = src.BeginResidue(k); residue; residue = src.NextResidue(k)) {
1385       AddResidue(*residue);
1386 
1387       FOR_ATOMS_OF_RESIDUE(resAtom, residue)
1388         {
1389           // This is the equivalent atom in our combined molecule
1390           atom = GetAtom(resAtom->GetIdx() + prevatms);
1391           // So we add this to the last-added residue
1392           // (i.e., what we just copied)
1393           (_residue[_residue.size() - 1])->AddAtom(atom);
1394         }
1395     }
1396 
1397     // Copy the stereo
1398     std::vector<OBGenericData*> vdata = src.GetAllData(OBGenericDataType::StereoData);
1399     for (std::vector<OBGenericData*>::iterator data = vdata.begin(); data != vdata.end(); ++data) {
1400       OBStereo::Type datatype = ((OBStereoBase*)*data)->GetType();
1401       if (datatype == OBStereo::CisTrans) {
1402         OBCisTransStereo *ct = dynamic_cast<OBCisTransStereo*>(*data);
1403         OBCisTransStereo *nct = new OBCisTransStereo(this);
1404         OBCisTransStereo::Config ct_cfg = ct->GetConfig();
1405         ct_cfg.begin = correspondingId[ct_cfg.begin];
1406         ct_cfg.end = correspondingId[ct_cfg.end];
1407         for(OBStereo::RefIter ri = ct_cfg.refs.begin(); ri != ct_cfg.refs.end(); ++ri)
1408           *ri = correspondingId[*ri];
1409         nct->SetConfig(ct_cfg);
1410         SetData(nct);
1411       }
1412       else if (datatype == OBStereo::Tetrahedral) {
1413         OBTetrahedralStereo *ts = dynamic_cast<OBTetrahedralStereo*>(*data);
1414         OBTetrahedralStereo *nts = new OBTetrahedralStereo(this);
1415         OBTetrahedralStereo::Config ts_cfg = ts->GetConfig();
1416         ts_cfg.center = correspondingId[ts_cfg.center];
1417         ts_cfg.from = correspondingId[ts_cfg.from];
1418         for(OBStereo::RefIter ri = ts_cfg.refs.begin(); ri != ts_cfg.refs.end(); ++ri)
1419           *ri = correspondingId[*ri];
1420         nts->SetConfig(ts_cfg);
1421         SetData(nts);
1422       }
1423     }
1424 
1425     // TODO: This is actually a weird situation (e.g., adding a 2D mol to 3D one)
1426     // We should do something to update the src coordinates if they're not 3D
1427     if(src.GetDimension()<_dimension)
1428       _dimension = src.GetDimension();
1429     // TODO: Periodicity is similarly weird (e.g., adding nonperiodic data to
1430     // a crystal, or two incompatible lattice parameters).  For now, just assume
1431     // we intend to keep the lattice of the source (no updates necessary)
1432 
1433     EndModify();
1434 
1435     return(*this);
1436   }
1437 
Clear()1438   bool OBMol::Clear()
1439   {
1440     if (obErrorLog.GetOutputLevel() >= obAuditMsg)
1441       obErrorLog.ThrowError(__FUNCTION__,
1442                             "Ran OpenBabel::Clear Molecule", obAuditMsg);
1443 
1444     vector<OBAtom*>::iterator i;
1445     vector<OBBond*>::iterator j;
1446     for (i = _vatom.begin();i != _vatom.end();++i)
1447       {
1448         DestroyAtom(*i);
1449         *i = nullptr;
1450       }
1451     for (j = _vbond.begin();j != _vbond.end();++j)
1452       {
1453         DestroyBond(*j);
1454         *j = nullptr;
1455       }
1456 
1457     _atomIds.clear();
1458     _bondIds.clear();
1459     _natoms = _nbonds = 0;
1460 
1461     //Delete residues
1462     unsigned int ii;
1463     for (ii=0 ; ii<_residue.size() ; ++ii)
1464       {
1465         DestroyResidue(_residue[ii]);
1466       }
1467     _residue.clear();
1468 
1469     //clear out the multiconformer data
1470     vector<double*>::iterator k;
1471     for (k = _vconf.begin();k != _vconf.end();++k)
1472       delete [] *k;
1473     _vconf.clear();
1474 
1475     //Clear flags except OB_PATTERN_STRUCTURE which is left the same
1476     _flags &= OB_PATTERN_STRUCTURE;
1477 
1478     _c = nullptr;
1479     _mod = 0;
1480 
1481     // Clean up generic data via the base class
1482     return(OBBase::Clear());
1483   }
1484 
BeginModify()1485   void OBMol::BeginModify()
1486   {
1487     //suck coordinates from _c into _v for each atom
1488     if (!_mod && !Empty())
1489       {
1490         OBAtom *atom;
1491         vector<OBAtom*>::iterator i;
1492         for (atom = BeginAtom(i);atom;atom = NextAtom(i))
1493           {
1494             atom->SetVector();
1495             atom->ClearCoordPtr();
1496           }
1497 
1498         vector<double*>::iterator j;
1499         for (j = _vconf.begin();j != _vconf.end();++j)
1500           delete [] *j;
1501 
1502         _c = nullptr;
1503         _vconf.clear();
1504 
1505         //Destroy rotamer list if necessary
1506         if ((OBRotamerList *)GetData(OBGenericDataType::RotamerList))
1507           {
1508             delete (OBRotamerList *)GetData(OBGenericDataType::RotamerList);
1509             DeleteData(OBGenericDataType::RotamerList);
1510           }
1511       }
1512 
1513     _mod++;
1514   }
1515 
EndModify(bool nukePerceivedData)1516   void OBMol::EndModify(bool nukePerceivedData)
1517   {
1518     if (_mod == 0)
1519       {
1520         obErrorLog.ThrowError(__FUNCTION__, "_mod is negative - EndModify() called too many times", obDebug);
1521         return;
1522       }
1523 
1524     _mod--;
1525 
1526     if (_mod)
1527       return;
1528 
1529     // wipe all but whether it has aromaticity perceived, is a reaction, or has periodic boundaries enabled
1530     if (nukePerceivedData)
1531       _flags = _flags & (OB_AROMATIC_MOL|OB_REACTION_MOL|OB_PERIODIC_MOL);
1532 
1533     _c = nullptr;
1534 
1535     if (Empty())
1536       return;
1537 
1538     //if atoms present convert coords into array
1539     double *c = new double [NumAtoms()*3];
1540     _c = c;
1541 
1542     unsigned int idx;
1543     OBAtom *atom;
1544     vector<OBAtom*>::iterator j;
1545     for (idx=0,atom = BeginAtom(j);atom;atom = NextAtom(j),++idx)
1546       {
1547         atom->SetIdx(idx+1);
1548         (atom->GetVector()).Get(&_c[idx*3]);
1549         atom->SetCoordPtr(&_c);
1550       }
1551     _vconf.push_back(c);
1552 
1553     // Always remove angle and torsion data, since they will interfere with the iterators
1554     // PR#2812013
1555     DeleteData(OBGenericDataType::AngleData);
1556     DeleteData(OBGenericDataType::TorsionData);
1557   }
1558 
DestroyAtom(OBAtom * atom)1559   void OBMol::DestroyAtom(OBAtom *atom)
1560   {
1561     if (atom)
1562       {
1563         delete atom;
1564         atom = nullptr;
1565       }
1566   }
1567 
DestroyBond(OBBond * bond)1568   void OBMol::DestroyBond(OBBond *bond)
1569   {
1570     if (bond)
1571       {
1572         delete bond;
1573         bond = nullptr;
1574       }
1575   }
1576 
DestroyResidue(OBResidue * residue)1577   void OBMol::DestroyResidue(OBResidue *residue)
1578   {
1579     if (residue)
1580       {
1581         delete residue;
1582         residue = nullptr;
1583       }
1584   }
1585 
NewAtom()1586   OBAtom *OBMol::NewAtom()
1587   {
1588     return NewAtom(_atomIds.size());
1589   }
1590 
1591   //! \brief Instantiate a New Atom and add it to the molecule
1592   //!
1593   //! Checks bond_queue for any bonds that should be made to the new atom
1594   //! and updates atom indexes.
NewAtom(unsigned long id)1595   OBAtom *OBMol::NewAtom(unsigned long id)
1596   {
1597     //   BeginModify();
1598 
1599     // resize _atomIds if needed
1600     if (id >= _atomIds.size()) {
1601       unsigned int size = _atomIds.size();
1602       _atomIds.resize(id+1);
1603       for (unsigned long i = size; i < id; ++i)
1604         _atomIds[i] = nullptr;
1605     }
1606 
1607     if (_atomIds.at(id))
1608       return nullptr;
1609 
1610     OBAtom *obatom = new OBAtom;
1611     obatom->SetIdx(_natoms+1);
1612     obatom->SetParent(this);
1613 
1614     _atomIds[id] = obatom;
1615     obatom->SetId(id);
1616 
1617 #define OBAtomIncrement 100
1618 
1619     if (_natoms+1 >= _vatom.size())
1620       {
1621         _vatom.resize(_natoms+OBAtomIncrement);
1622         vector<OBAtom*>::iterator j;
1623         for (j = _vatom.begin(),j+=(_natoms+1);j != _vatom.end();++j)
1624           *j = nullptr;
1625       }
1626 #undef OBAtomIncrement
1627 
1628 
1629     _vatom[_natoms] = obatom;
1630     _natoms++;
1631 
1632     if (HasData(OBGenericDataType::VirtualBondData))
1633       {
1634         /*add bonds that have been queued*/
1635         OBVirtualBond *vb;
1636         vector<OBGenericData*> verase;
1637         vector<OBGenericData*>::iterator i;
1638         for (i = BeginData();i != EndData();++i)
1639           if ((*i)->GetDataType() == OBGenericDataType::VirtualBondData)
1640             {
1641               vb = (OBVirtualBond*)*i;
1642               if (vb->GetBgn() > _natoms || vb->GetEnd() > _natoms)
1643                 continue;
1644               if (obatom->GetIdx() == static_cast<unsigned int>(vb->GetBgn())
1645                   || obatom->GetIdx() == static_cast<unsigned int>(vb->GetEnd()))
1646                 {
1647                   AddBond(vb->GetBgn(),vb->GetEnd(),vb->GetOrder());
1648                   verase.push_back(*i);
1649                 }
1650             }
1651 
1652         if (!verase.empty())
1653           DeleteData(verase);
1654       }
1655 
1656     // EndModify();
1657 
1658     return(obatom);
1659   }
1660 
NewResidue()1661   OBResidue *OBMol::NewResidue()
1662   {
1663     OBResidue *obresidue = new OBResidue;
1664     obresidue->SetIdx(_residue.size());
1665     _residue.push_back(obresidue);
1666     return(obresidue);
1667   }
1668 
NewBond()1669   OBBond *OBMol::NewBond()
1670   {
1671     return NewBond(_bondIds.size());
1672   }
1673 
1674   //! \since version 2.1
1675   //! \brief Instantiate a New Bond and add it to the molecule
1676   //!
1677   //! Sets the proper Bond index and insures this molecule is set as the parent.
NewBond(unsigned long id)1678   OBBond *OBMol::NewBond(unsigned long id)
1679   {
1680     // resize _bondIds if needed
1681     if (id >= _bondIds.size()) {
1682       unsigned int size = _bondIds.size();
1683       _bondIds.resize(id+1);
1684       for (unsigned long i = size; i < id; ++i)
1685         _bondIds[i] = nullptr;
1686     }
1687 
1688     if (_bondIds.at(id))
1689       return nullptr;
1690 
1691     OBBond *pBond = new OBBond;
1692     pBond->SetParent(this);
1693     pBond->SetIdx(_nbonds);
1694 
1695     _bondIds[id] = pBond;
1696     pBond->SetId(id);
1697 
1698 #define OBBondIncrement 100
1699     if (_nbonds+1 >= _vbond.size())
1700       {
1701         _vbond.resize(_nbonds+OBBondIncrement);
1702         vector<OBBond*>::iterator i;
1703         for (i = _vbond.begin(),i+=(_nbonds+1);i != _vbond.end();++i)
1704           *i = nullptr;
1705       }
1706 #undef  OBBondIncrement
1707 
1708     _vbond[_nbonds] = (OBBond*)pBond;
1709     _nbonds++;
1710 
1711     return(pBond);
1712   }
1713 
1714   //! \brief Add an atom to a molecule
1715   //!
1716   //! Also checks bond_queue for any bonds that should be made to the new atom
AddAtom(OBAtom & atom,bool forceNewId)1717   bool OBMol::AddAtom(OBAtom &atom, bool forceNewId)
1718   {
1719     //    BeginModify();
1720 
1721     // Use the existing atom Id unless either it's invalid or forceNewId has been specified
1722     unsigned long id;
1723     if (forceNewId)
1724       id = _atomIds.size();
1725     else {
1726       id = atom.GetId();
1727       if (id == NoId)
1728         id = _atomIds.size();
1729     }
1730 
1731     OBAtom *obatom = new OBAtom;
1732     *obatom = atom;
1733     obatom->SetIdx(_natoms+1);
1734     obatom->SetParent(this);
1735 
1736     // resize _atomIds if needed
1737     if (id >= _atomIds.size()) {
1738       unsigned int size = _atomIds.size();
1739       _atomIds.resize(id+1);
1740       for (unsigned long i = size; i < id; ++i)
1741         _atomIds[i] = nullptr;
1742     }
1743 
1744     obatom->SetId(id);
1745     _atomIds[id] = obatom;
1746 
1747 #define OBAtomIncrement 100
1748 
1749     if (_natoms+1 >= _vatom.size())
1750       {
1751         _vatom.resize(_natoms+OBAtomIncrement);
1752         vector<OBAtom*>::iterator j;
1753         for (j = _vatom.begin(),j+=(_natoms+1);j != _vatom.end();++j)
1754           *j = nullptr;
1755       }
1756 #undef OBAtomIncrement
1757 
1758     _vatom[_natoms] = (OBAtom*)obatom;
1759     _natoms++;
1760 
1761     if (HasData(OBGenericDataType::VirtualBondData))
1762       {
1763         /*add bonds that have been queued*/
1764         OBVirtualBond *vb;
1765         vector<OBGenericData*> verase;
1766         vector<OBGenericData*>::iterator i;
1767         for (i = BeginData();i != EndData();++i)
1768           if ((*i)->GetDataType() == OBGenericDataType::VirtualBondData)
1769             {
1770               vb = (OBVirtualBond*)*i;
1771               if (vb->GetBgn() > _natoms || vb->GetEnd() > _natoms)
1772                 continue;
1773               if (obatom->GetIdx() == static_cast<unsigned int>(vb->GetBgn())
1774                   || obatom->GetIdx() == static_cast<unsigned int>(vb->GetEnd()))
1775                 {
1776                   AddBond(vb->GetBgn(),vb->GetEnd(),vb->GetOrder());
1777                   verase.push_back(*i);
1778                 }
1779             }
1780 
1781         if (!verase.empty())
1782           DeleteData(verase);
1783       }
1784 
1785     //    EndModify();
1786 
1787     return(true);
1788   }
1789 
InsertAtom(OBAtom & atom)1790   bool OBMol::InsertAtom(OBAtom &atom)
1791   {
1792     BeginModify();
1793     AddAtom(atom);
1794     EndModify();
1795 
1796     return(true);
1797   }
1798 
AddResidue(OBResidue & residue)1799   bool OBMol::AddResidue(OBResidue &residue)
1800   {
1801     BeginModify();
1802 
1803     OBResidue *obresidue = new OBResidue;
1804     *obresidue = residue;
1805 
1806     obresidue->SetIdx(_residue.size());
1807 
1808     _residue.push_back(obresidue);
1809 
1810     EndModify();
1811 
1812     return(true);
1813   }
1814 
StripSalts(unsigned int threshold)1815   bool OBMol::StripSalts(unsigned int threshold)
1816   {
1817     vector<vector<int> > cfl;
1818     vector<vector<int> >::iterator i,max;
1819 
1820     ContigFragList(cfl);
1821     if (cfl.empty() || cfl.size() == 1)
1822       {
1823         return(false);
1824       }
1825 
1826 
1827     obErrorLog.ThrowError(__FUNCTION__, "Ran OpenBabel::StripSalts", obAuditMsg);
1828 
1829     max = cfl.begin();
1830     for (i = cfl.begin();i != cfl.end();++i)
1831       {
1832         if ((*max).size() < (*i).size())
1833           max = i;
1834       }
1835 
1836     vector<int>::iterator j;
1837     vector<OBAtom*> delatoms;
1838     set<int> atomIndices;
1839     for (i = cfl.begin(); i != cfl.end(); ++i)
1840       {
1841         if (i->size() < threshold || (threshold == 0 && i != max))
1842           {
1843             for (j = (*i).begin(); j != (*i).end(); ++j)
1844               {
1845                 if (atomIndices.find( *j ) == atomIndices.end())
1846                   {
1847                     delatoms.push_back(GetAtom(*j));
1848                     atomIndices.insert(*j);
1849                   }
1850               }
1851           }
1852       }
1853 
1854     if (!delatoms.empty())
1855       {
1856         //      int tmpflags = _flags & (~(OB_SSSR_MOL));
1857         BeginModify();
1858         vector<OBAtom*>::iterator k;
1859         for (k = delatoms.begin(); k != delatoms.end(); ++k)
1860           DeleteAtom((OBAtom*)*k);
1861         EndModify();
1862         //      _flags = tmpflags;  // Gave crash when SmartsPattern::Match()
1863         // was called susequently
1864         // Hans De Winter; 03-nov-2010
1865       }
1866 
1867     return (true);
1868   }
1869 
1870   // Convenience function used by the DeleteHydrogens methods
IsSuppressibleHydrogen(OBAtom * atom)1871   static bool IsSuppressibleHydrogen(OBAtom *atom)
1872   {
1873     if (atom->GetIsotope() == 0 && atom->GetHvyDegree() == 1 && atom->GetFormalCharge() == 0
1874         && !atom->GetData("Atom Class"))
1875       return true;
1876     else
1877       return false;
1878   }
1879 
DeletePolarHydrogens()1880   bool OBMol::DeletePolarHydrogens()
1881   {
1882     OBAtom *atom;
1883     vector<OBAtom*>::iterator i;
1884     vector<OBAtom*> delatoms;
1885 
1886     obErrorLog.ThrowError(__FUNCTION__,
1887                           "Ran OpenBabel::DeleteHydrogens -- polar",
1888                           obAuditMsg);
1889 
1890     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
1891       if (atom->IsPolarHydrogen() && IsSuppressibleHydrogen(atom))
1892         delatoms.push_back(atom);
1893 
1894     if (delatoms.empty())
1895       return(true);
1896 
1897     IncrementMod();
1898 
1899     for (i = delatoms.begin();i != delatoms.end();++i)
1900       DeleteAtom((OBAtom *)*i);
1901 
1902     DecrementMod();
1903 
1904     SetSSSRPerceived(false);
1905     SetLSSRPerceived(false);
1906     return(true);
1907   }
1908 
1909 
DeleteNonPolarHydrogens()1910   bool OBMol::DeleteNonPolarHydrogens()
1911   {
1912     OBAtom *atom;
1913     vector<OBAtom*>::iterator i;
1914     vector<OBAtom*> delatoms;
1915 
1916     obErrorLog.ThrowError(__FUNCTION__,
1917                           "Ran OpenBabel::DeleteHydrogens -- nonpolar",
1918                           obAuditMsg);
1919 
1920 
1921     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
1922       if (atom->IsNonPolarHydrogen() && IsSuppressibleHydrogen(atom))
1923         delatoms.push_back(atom);
1924 
1925     if (delatoms.empty())
1926       return(true);
1927 
1928     /*
1929       int idx1,idx2;
1930       vector<double*>::iterator j;
1931       for (idx1=0,idx2=0,atom = BeginAtom(i);atom;atom = NextAtom(i),++idx1)
1932       if (atom->GetAtomicNum() != OBElements::Hydrogen)
1933       {
1934       for (j = _vconf.begin();j != _vconf.end();++j)
1935       memcpy((char*)&((*j)[idx2*3]),(char*)&((*j)[idx1*3]),sizeof(double)*3);
1936       idx2++;
1937       }
1938     */
1939 
1940     IncrementMod();
1941 
1942     for (i = delatoms.begin();i != delatoms.end();++i)
1943       DeleteAtom((OBAtom *)*i);
1944 
1945     DecrementMod();
1946 
1947     SetSSSRPerceived(false);
1948     SetLSSRPerceived(false);
1949     return(true);
1950   }
1951 
DeleteHydrogens()1952   bool OBMol::DeleteHydrogens()
1953   {
1954     OBAtom *atom;//,*nbr;
1955     vector<OBAtom*>::iterator i;
1956     vector<OBAtom*> delatoms,va;
1957 
1958     obErrorLog.ThrowError(__FUNCTION__,
1959                           "Ran OpenBabel::DeleteHydrogens", obAuditMsg);
1960 
1961     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
1962       if (atom->GetAtomicNum() == OBElements::Hydrogen && IsSuppressibleHydrogen(atom))
1963         delatoms.push_back(atom);
1964 
1965     SetHydrogensAdded(false);
1966 
1967     if (delatoms.empty())
1968       return(true);
1969 
1970     /* decide whether these flags need to be reset
1971        _flags &= (~(OB_ATOMTYPES_MOL));
1972        _flags &= (~(OB_HYBRID_MOL));
1973        _flags &= (~(OB_PCHARGE_MOL));
1974        _flags &= (~(OB_IMPVAL_MOL));
1975     */
1976 
1977     IncrementMod();
1978 
1979     // This is slow -- we need methods to delete a set of atoms
1980     //  and to delete a set of bonds
1981     // Calling this sequentially does result in correct behavior
1982     //  (e.g., fixing PR# 1704551)
1983     OBBondIterator bi;
1984     for (i = delatoms.begin(); i != delatoms.end(); ++i) {
1985       OBAtom* nbr = (*i)->BeginNbrAtom(bi);
1986       if (nbr) // defensive
1987         nbr->SetImplicitHCount(nbr->GetImplicitHCount() + 1);
1988       DeleteAtom((OBAtom *)*i);
1989     }
1990 
1991     DecrementMod();
1992 
1993     SetSSSRPerceived(false);
1994     SetLSSRPerceived(false);
1995     return(true);
1996   }
1997 
DeleteHydrogens(OBAtom * atom)1998   bool OBMol::DeleteHydrogens(OBAtom *atom)
1999   //deletes all hydrogens attached to the atom passed to the function
2000   {
2001     OBAtom *nbr;
2002     vector<OBAtom*>::iterator i;
2003     vector<OBBond*>::iterator k;
2004     vector<OBAtom*> delatoms;
2005 
2006     for (nbr = atom->BeginNbrAtom(k);nbr;nbr = atom->NextNbrAtom(k))
2007       if (nbr->GetAtomicNum() == OBElements::Hydrogen && IsSuppressibleHydrogen(atom))
2008         delatoms.push_back(nbr);
2009 
2010     if (delatoms.empty())
2011       return(true);
2012 
2013     IncrementMod();
2014     for (i = delatoms.begin();i != delatoms.end();++i)
2015       DeleteHydrogen((OBAtom*)*i);
2016     DecrementMod();
2017 
2018     SetHydrogensAdded(false);
2019     SetSSSRPerceived(false);
2020     SetLSSRPerceived(false);
2021     return(true);
2022   }
2023 
DeleteHydrogen(OBAtom * atom)2024   bool OBMol::DeleteHydrogen(OBAtom *atom)
2025   //deletes the hydrogen atom passed to the function
2026   {
2027     if (atom->GetAtomicNum() != OBElements::Hydrogen)
2028       return false;
2029 
2030     unsigned atomidx = atom->GetIdx();
2031 
2032     //find bonds to delete
2033     OBAtom *nbr;
2034     vector<OBBond*> vdb;
2035     vector<OBBond*>::iterator j;
2036     for (nbr = atom->BeginNbrAtom(j);nbr;nbr = atom->NextNbrAtom(j))
2037       vdb.push_back(*j);
2038 
2039     IncrementMod();
2040     for (j = vdb.begin();j != vdb.end();++j)
2041       DeleteBond((OBBond*)*j); //delete bonds
2042     DecrementMod();
2043 
2044     int idx;
2045     if (atomidx != NumAtoms())
2046       {
2047         idx = atom->GetCoordinateIdx();
2048         int size = NumAtoms()-atom->GetIdx();
2049         vector<double*>::iterator k;
2050         for (k = _vconf.begin();k != _vconf.end();++k)
2051           memmove((char*)&(*k)[idx],(char*)&(*k)[idx+3],sizeof(double)*3*size);
2052 
2053       }
2054 
2055     // Deleting hydrogens does not invalidate the stereo objects
2056     // - however, any explicit refs to the hydrogen atom must be
2057     //   converted to implicit refs
2058     OBStereo::Ref id = atom->GetId();
2059     StereoRefToImplicit(*this, id);
2060 
2061     _atomIds[id] = nullptr;
2062     _vatom.erase(_vatom.begin()+(atomidx-1));
2063     _natoms--;
2064 
2065     //reset all the indices to the atoms
2066     vector<OBAtom*>::iterator i;
2067     OBAtom *atomi;
2068     for (idx=1,atomi = BeginAtom(i);atomi;atomi = NextAtom(i),++idx)
2069       atomi->SetIdx(idx);
2070 
2071     SetHydrogensAdded(false);
2072 
2073     DestroyAtom(atom);
2074 
2075     SetSSSRPerceived(false);
2076     SetLSSRPerceived(false);
2077     return(true);
2078   }
2079 
2080   /*
2081   this has become a wrapper for backward compatibility
2082   */
AddHydrogens(bool polaronly,bool correctForPH,double pH)2083   bool OBMol::AddHydrogens(bool polaronly, bool correctForPH, double pH)
2084   {
2085     return(AddNewHydrogens(polaronly ? PolarHydrogen : AllHydrogen, correctForPH, pH));
2086   }
2087 
AtomIsNSOP(OBAtom * atom)2088   static bool AtomIsNSOP(OBAtom *atom)
2089   {
2090     switch (atom->GetAtomicNum()) {
2091     case OBElements::Nitrogen:
2092     case OBElements::Sulfur:
2093     case OBElements::Oxygen:
2094     case OBElements::Phosphorus:
2095       return true;
2096     default:
2097       return false;
2098     }
2099   }
2100 
2101   //! \return a "corrected" bonding radius based on the hybridization.
2102   //! Scales the covalent radius by 0.95 for sp2 and 0.90 for sp hybrids
CorrectedBondRad(unsigned int elem,unsigned int hyb)2103   static double CorrectedBondRad(unsigned int elem, unsigned int hyb)
2104   {
2105     double rad = OBElements::GetCovalentRad(elem);
2106     switch (hyb) {
2107     case 2:
2108       return rad * 0.95;
2109     case 1:
2110       return rad * 0.90;
2111     default:
2112       return rad;
2113     }
2114   }
2115 
AddNewHydrogens(HydrogenType whichHydrogen,bool correctForPH,double pH)2116   bool OBMol::AddNewHydrogens(HydrogenType whichHydrogen, bool correctForPH, double pH)
2117   {
2118     if (!IsCorrectedForPH() && correctForPH)
2119       CorrectForPH(pH);
2120 
2121     if (HasHydrogensAdded())
2122       return(true);
2123 
2124     bool hasChiralityPerceived = this->HasChiralityPerceived(); // remember
2125 
2126     /*
2127     //
2128     // This was causing bug #1892844 in avogadro. We also want to add hydrogens if the molecule has no bonds.
2129     //
2130     if(NumBonds()==0 && NumAtoms()!=1)
2131     {
2132     obErrorLog.ThrowError(__FUNCTION__,
2133     "Did not run OpenBabel::AddHydrogens on molecule with no bonds", obAuditMsg);
2134     return true;
2135     }
2136     */
2137     if (whichHydrogen == AllHydrogen)
2138       obErrorLog.ThrowError(__FUNCTION__,
2139                             "Ran OpenBabel::AddHydrogens", obAuditMsg);
2140     else if (whichHydrogen == PolarHydrogen)
2141       obErrorLog.ThrowError(__FUNCTION__,
2142                             "Ran OpenBabel::AddHydrogens -- polar only", obAuditMsg);
2143     else
2144       obErrorLog.ThrowError(__FUNCTION__,
2145                             "Ran OpenBabel::AddHydrogens -- nonpolar only", obAuditMsg);
2146 
2147     // Make sure we have conformers (PR#1665519)
2148     if (!_vconf.empty() && !Empty()) {
2149       OBAtom *atom;
2150       vector<OBAtom*>::iterator i;
2151       for (atom = BeginAtom(i);atom;atom = NextAtom(i))
2152         {
2153           atom->SetVector();
2154         }
2155     }
2156 
2157     SetHydrogensAdded(); // This must come after EndModify() as EndModify() wipes the flags
2158     // If chirality was already perceived, remember this (to avoid wiping information
2159     if (hasChiralityPerceived)
2160       this->SetChiralityPerceived();
2161 
2162     //count up number of hydrogens to add
2163     OBAtom *atom,*h;
2164     int hcount,count=0;
2165     vector<pair<OBAtom*,int> > vhadd;
2166     vector<OBAtom*>::iterator i;
2167     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
2168       {
2169         if (whichHydrogen == PolarHydrogen && !AtomIsNSOP(atom))
2170           continue;
2171         if (whichHydrogen == NonPolarHydrogen && AtomIsNSOP(atom))
2172           continue;
2173 
2174         hcount = atom->GetImplicitHCount();
2175         atom->SetImplicitHCount(0);
2176 
2177         if (hcount)
2178           {
2179             vhadd.push_back(pair<OBAtom*,int>(atom,hcount));
2180             count += hcount;
2181           }
2182       }
2183 
2184     if (count == 0) {
2185       // Make sure to clear SSSR and aromatic flags we may have tripped above
2186       _flags &= (~(OB_SSSR_MOL|OB_AROMATIC_MOL));
2187       return(true);
2188     }
2189     bool hasCoords = HasNonZeroCoords();
2190 
2191     //realloc memory in coordinate arrays for new hydrogens
2192     double *tmpf;
2193     vector<double*>::iterator j;
2194     for (j = _vconf.begin();j != _vconf.end();++j)
2195       {
2196         tmpf = new double [(NumAtoms()+count)*3];
2197         memset(tmpf,'\0',sizeof(double)*(NumAtoms()+count)*3);
2198         if (hasCoords)
2199           memcpy(tmpf,(*j),sizeof(double)*NumAtoms()*3);
2200         delete []*j;
2201         *j = tmpf;
2202       }
2203 
2204     IncrementMod();
2205 
2206     int m,n;
2207     vector3 v;
2208     vector<pair<OBAtom*,int> >::iterator k;
2209     double hbrad = CorrectedBondRad(1, 0);
2210 
2211     for (k = vhadd.begin();k != vhadd.end();++k)
2212       {
2213         atom = k->first;
2214         double bondlen = hbrad + CorrectedBondRad(atom->GetAtomicNum(), atom->GetHyb());
2215         for (m = 0;m < k->second;++m)
2216           {
2217             int badh = 0;
2218             for (n = 0;n < NumConformers();++n)
2219               {
2220                 SetConformer(n);
2221                 if (hasCoords)
2222                   {
2223                     // Ensure that add hydrogens only returns finite coords
2224                     //atom->GetNewBondVector(v,bondlen);
2225                     v = OBBuilder::GetNewBondVector(atom,bondlen);
2226                     if (isfinite(v.x()) || isfinite(v.y()) || isfinite(v.z())) {
2227                       _c[(NumAtoms())*3]   = v.x();
2228                       _c[(NumAtoms())*3+1] = v.y();
2229                       _c[(NumAtoms())*3+2] = v.z();
2230                     }
2231                     else {
2232                       _c[(NumAtoms())*3]   = 0.0;
2233                       _c[(NumAtoms())*3+1] = 0.0;
2234                       _c[(NumAtoms())*3+2] = 0.0;
2235                       obErrorLog.ThrowError(__FUNCTION__,
2236                                             "Ran OpenBabel::AddHydrogens -- no reasonable bond geometry for desired hydrogen.",
2237                                             obAuditMsg);
2238                       badh++;
2239                     }
2240                   }
2241                 else
2242                   memset((char*)&_c[NumAtoms()*3],'\0',sizeof(double)*3);
2243               }
2244             if(badh == 0 || badh < NumConformers())
2245               {
2246                 // Add the new H atom to the appropriate residue list
2247                 //but avoid doing perception by checking for existence of residue
2248                 //just in case perception is trigger, make sure GetResidue is called
2249                 //before adding the hydrogen to the molecule
2250                 OBResidue *res = atom->HasResidue() ? atom->GetResidue() : nullptr;
2251                 h = NewAtom();
2252                 h->SetType("H");
2253                 h->SetAtomicNum(1);
2254                 string aname = "H";
2255 
2256                 if(res)
2257                 {
2258                   res->AddAtom(h);
2259                   res->SetAtomID(h,aname);
2260 
2261                   //hydrogen should inherit hetatm status of heteroatom (default is false)
2262                   if(res->IsHetAtom(atom))
2263                   {
2264                       res->SetHetAtom(h, true);
2265                   }
2266                 }
2267 
2268                 int bondFlags = 0;
2269                 AddBond(atom->GetIdx(),h->GetIdx(),1, bondFlags);
2270                 h->SetCoordPtr(&_c);
2271                 OpenBabel::ImplicitRefToStereo(*this, atom->GetId(), h->GetId());
2272               }
2273           }
2274       }
2275 
2276     DecrementMod();
2277 
2278     //reset atom type and partial charge flags
2279     _flags &= (~(OB_PCHARGE_MOL|OB_ATOMTYPES_MOL|OB_SSSR_MOL|OB_AROMATIC_MOL|OB_HYBRID_MOL));
2280 
2281     return(true);
2282   }
2283 
AddPolarHydrogens()2284   bool OBMol::AddPolarHydrogens()
2285   {
2286     return(AddNewHydrogens(PolarHydrogen));
2287   }
2288 
AddNonPolarHydrogens()2289   bool OBMol::AddNonPolarHydrogens()
2290   {
2291     return(AddNewHydrogens(NonPolarHydrogen));
2292   }
2293 
AddHydrogens(OBAtom * atom)2294   bool OBMol::AddHydrogens(OBAtom *atom)
2295   {
2296     int hcount = atom->GetImplicitHCount();
2297     if (hcount == 0)
2298       return true;
2299 
2300     atom->SetImplicitHCount(0);
2301 
2302     vector<pair<OBAtom*, int> > vhadd;
2303     vhadd.push_back(pair<OBAtom*,int>(atom, hcount));
2304 
2305     //realloc memory in coordinate arrays for new hydroges
2306     double *tmpf;
2307     vector<double*>::iterator j;
2308     for (j = _vconf.begin();j != _vconf.end();++j)
2309       {
2310         tmpf = new double [(NumAtoms()+hcount)*3+10];
2311         memcpy(tmpf,(*j),sizeof(double)*NumAtoms()*3);
2312         delete []*j;
2313         *j = tmpf;
2314       }
2315 
2316     IncrementMod();
2317 
2318     int m,n;
2319     vector3 v;
2320     vector<pair<OBAtom*,int> >::iterator k;
2321     double hbrad = CorrectedBondRad(1,0);
2322 
2323     OBAtom *h;
2324     for (k = vhadd.begin();k != vhadd.end();++k)
2325       {
2326         atom = k->first;
2327         double bondlen = hbrad + CorrectedBondRad(atom->GetAtomicNum(),atom->GetHyb());
2328         for (m = 0;m < k->second;++m)
2329           {
2330             for (n = 0;n < NumConformers();++n)
2331               {
2332                 SetConformer(n);
2333                 //atom->GetNewBondVector(v,bondlen);
2334                 v = OBBuilder::GetNewBondVector(atom,bondlen);
2335                 _c[(NumAtoms())*3]   = v.x();
2336                 _c[(NumAtoms())*3+1] = v.y();
2337                 _c[(NumAtoms())*3+2] = v.z();
2338               }
2339             h = NewAtom();
2340             h->SetType("H");
2341             h->SetAtomicNum(1);
2342 
2343             int bondFlags = 0;
2344             AddBond(atom->GetIdx(),h->GetIdx(),1, bondFlags);
2345             h->SetCoordPtr(&_c);
2346             OpenBabel::ImplicitRefToStereo(*this, atom->GetId(), h->GetId());
2347           }
2348       }
2349 
2350     DecrementMod();
2351     SetConformer(0);
2352 
2353     //reset atom type and partial charge flags
2354     //_flags &= (~(OB_PCHARGE_MOL|OB_ATOMTYPES_MOL));
2355 
2356     return(true);
2357   }
2358 
CorrectForPH(double pH)2359   bool OBMol::CorrectForPH(double pH)
2360   {
2361     if (IsCorrectedForPH())
2362       return(true);
2363     phmodel.CorrectForPH(*this, pH);
2364 
2365     obErrorLog.ThrowError(__FUNCTION__,
2366                           "Ran OpenBabel::CorrectForPH", obAuditMsg);
2367 
2368     return(true);
2369   }
2370 
2371   //! \brief set spin multiplicity for H-deficient atoms
2372   /**
2373      If NoImplicitH is true then the molecule has no implicit hydrogens. Individual atoms
2374      on which ForceNoH() has been called also have no implicit hydrogens.
2375      If NoImplicitH is false (the default), then if there are any explicit hydrogens
2376      on an atom then they constitute all the hydrogen on that atom. However, a hydrogen
2377      atom with its _isotope!=0 is not considered explicit hydrogen for this purpose.
2378      In addition, an atom which has had ForceImplH()called for it is never considered
2379      hydrogen deficient, e.g. unbracketed atoms in SMILES.
2380      Any discrepancy with the expected atom valency is interpreted as the atom being a
2381      radical of some sort and iits _spinMultiplicity is set to 2 when it is one hydrogen short
2382      and 3 when it is two hydrogens short and similarly for greater hydrogen deficiency.
2383 
2384      So SMILES C[CH] is interpreted as methyl carbene, CC[H][H] as ethane, and CC[2H] as CH3CH2D.
2385   **/
2386 
2387 
2388 
AssignSpinMultiplicity(bool NoImplicitH)2389   bool OBMol::AssignSpinMultiplicity(bool NoImplicitH)
2390   {
2391     // TODO: The following functions simply returns true, as it has been made
2392     // redundant by changes to the handling of implicit hydrogens, and spin.
2393     // This needs to be sorted out properly at some point.
2394     return true;
2395   }
2396 
2397   // Used by DeleteAtom below. Code based on StereoRefToImplicit
DeleteStereoOnAtom(OBMol & mol,OBStereo::Ref atomId)2398   static void DeleteStereoOnAtom(OBMol& mol, OBStereo::Ref atomId)
2399   {
2400     std::vector<OBGenericData*> vdata = mol.GetAllData(OBGenericDataType::StereoData);
2401     for (std::vector<OBGenericData*>::iterator data = vdata.begin(); data != vdata.end(); ++data) {
2402       OBStereo::Type datatype = ((OBStereoBase*)*data)->GetType();
2403 
2404       if (datatype != OBStereo::CisTrans && datatype != OBStereo::Tetrahedral) {
2405         obErrorLog.ThrowError(__FUNCTION__,
2406             "This function should be updated to handle additional stereo types.\nSome stereochemistry objects may contain explicit refs to hydrogens which have been removed.", obWarning);
2407         continue;
2408       }
2409 
2410       if (datatype == OBStereo::CisTrans) {
2411         OBCisTransStereo *ct = dynamic_cast<OBCisTransStereo*>(*data);
2412         OBCisTransStereo::Config ct_cfg = ct->GetConfig();
2413         if (ct_cfg.begin == atomId || ct_cfg.end == atomId ||
2414             std::find(ct_cfg.refs.begin(), ct_cfg.refs.end(), atomId) != ct_cfg.refs.end())
2415           mol.DeleteData(ct);
2416       }
2417       else if (datatype == OBStereo::Tetrahedral) {
2418         OBTetrahedralStereo *ts = dynamic_cast<OBTetrahedralStereo*>(*data);
2419         OBTetrahedralStereo::Config ts_cfg = ts->GetConfig();
2420         if (ts_cfg.from == atomId ||
2421             std::find(ts_cfg.refs.begin(), ts_cfg.refs.end(), atomId) != ts_cfg.refs.end())
2422           mol.DeleteData(ts);
2423       }
2424     }
2425   }
2426 
DeleteAtom(OBAtom * atom,bool destroyAtom)2427   bool OBMol::DeleteAtom(OBAtom *atom, bool destroyAtom)
2428   {
2429     if (atom->GetAtomicNum() == OBElements::Hydrogen)
2430       return(DeleteHydrogen(atom));
2431 
2432     BeginModify();
2433     //don't need to do anything with coordinates b/c
2434     //BeginModify() blows away coordinates
2435 
2436     //find bonds to delete
2437     OBAtom *nbr;
2438     vector<OBBond*> vdb;
2439     vector<OBBond*>::iterator j;
2440     for (nbr = atom->BeginNbrAtom(j);nbr;nbr = atom->NextNbrAtom(j))
2441       vdb.push_back(*j);
2442 
2443     for (j = vdb.begin();j != vdb.end();++j)
2444       DeleteBond((OBBond *)*j); //delete bonds
2445 
2446     _atomIds[atom->GetId()] = nullptr;
2447     _vatom.erase(_vatom.begin()+(atom->GetIdx()-1));
2448     _natoms--;
2449 
2450     //reset all the indices to the atoms
2451     int idx;
2452     vector<OBAtom*>::iterator i;
2453     OBAtom *atomi;
2454     for (idx=1,atomi = BeginAtom(i);atomi;atomi = NextAtom(i),++idx)
2455       atomi->SetIdx(idx);
2456 
2457     EndModify();
2458 
2459     // Delete any stereo objects involving this atom
2460     OBStereo::Ref id = atom->GetId();
2461     DeleteStereoOnAtom(*this, id);
2462 
2463     if (destroyAtom)
2464       DestroyAtom(atom);
2465 
2466     SetSSSRPerceived(false);
2467     SetLSSRPerceived(false);
2468     return(true);
2469   }
2470 
DeleteResidue(OBResidue * residue,bool destroyResidue)2471   bool OBMol::DeleteResidue(OBResidue *residue, bool destroyResidue)
2472   {
2473     unsigned short idx = residue->GetIdx();
2474     _residue.erase(_residue.begin() + idx);
2475 
2476     for ( unsigned short i = idx ; i < _residue.size() ; i++ )
2477       _residue[i]->SetIdx(i);
2478 
2479     if (destroyResidue)
2480       DestroyResidue(residue);
2481 
2482     SetSSSRPerceived(false);
2483     SetLSSRPerceived(false);
2484     return(true);
2485   }
2486 
DeleteBond(OBBond * bond,bool destroyBond)2487   bool OBMol::DeleteBond(OBBond *bond, bool destroyBond)
2488   {
2489     BeginModify();
2490 
2491     (bond->GetBeginAtom())->DeleteBond(bond);
2492     (bond->GetEndAtom())->DeleteBond(bond);
2493     _bondIds[bond->GetId()] = nullptr;
2494     _vbond.erase(_vbond.begin() + bond->GetIdx()); // bond index starts at 0!!!
2495     _nbonds--;
2496 
2497     vector<OBBond*>::iterator i;
2498     int j;
2499     OBBond *bondi;
2500     for (bondi = BeginBond(i),j=0;bondi;bondi = NextBond(i),++j)
2501       bondi->SetIdx(j);
2502 
2503     EndModify();
2504 
2505     if (destroyBond)
2506       DestroyBond(bond);
2507 
2508     SetSSSRPerceived(false);
2509     SetLSSRPerceived(false);
2510     return(true);
2511   }
2512 
AddBond(int first,int second,int order,int flags,int insertpos)2513   bool OBMol::AddBond(int first,int second,int order,int flags,int insertpos)
2514   {
2515     // Don't add the bond if it already exists
2516     if (first == second || GetBond(first, second) != nullptr)
2517       return(false);
2518 
2519     //    BeginModify();
2520 
2521     if ((unsigned)first <= NumAtoms() && (unsigned)second <= NumAtoms())
2522       //atoms exist and bond doesn't
2523       {
2524         OBBond *bond = new OBBond;
2525         if (!bond)
2526           {
2527             //EndModify();
2528             return(false);
2529           }
2530 
2531         OBAtom *bgn,*end;
2532         bgn = GetAtom(first);
2533         end = GetAtom(second);
2534         if (!bgn || !end)
2535           {
2536             obErrorLog.ThrowError(__FUNCTION__, "Unable to add bond - invalid atom index", obDebug);
2537             return(false);
2538           }
2539         bond->Set(_nbonds,bgn,end,order,flags);
2540         bond->SetParent(this);
2541 
2542         bond->SetId(_bondIds.size());
2543         _bondIds.push_back(bond);
2544 
2545 #define OBBondIncrement 100
2546         if (_nbonds+1 >= _vbond.size())
2547           {
2548             _vbond.resize(_nbonds+OBBondIncrement);
2549             vector<OBBond*>::iterator i;
2550             for (i = _vbond.begin(),i+=(_nbonds+1);i != _vbond.end();++i)
2551               *i = nullptr;
2552           }
2553 #undef  OBBondIncrement
2554 
2555         _vbond[_nbonds] = (OBBond*)bond;
2556         _nbonds++;
2557 
2558         if (insertpos == -1)
2559           {
2560             bgn->AddBond(bond);
2561             end->AddBond(bond);
2562           }
2563         else
2564           {
2565             if (insertpos >= static_cast<int>(bgn->GetExplicitDegree()))
2566               bgn->AddBond(bond);
2567             else //need to insert the bond for the connectivity order to be preserved
2568               {    //otherwise stereochemistry gets screwed up
2569                 vector<OBBond*>::iterator bi;
2570                 bgn->BeginNbrAtom(bi);
2571                 bi += insertpos;
2572                 bgn->InsertBond(bi,bond);
2573               }
2574             end->AddBond(bond);
2575           }
2576       }
2577     else //at least one atom doesn't exist yet - add to bond_q
2578       SetData(new OBVirtualBond(first,second,order,flags));
2579 
2580     //    EndModify();
2581 
2582     return(true);
2583   }
2584 
AddBond(OBBond & bond)2585   bool OBMol::AddBond(OBBond &bond)
2586   {
2587     if(!AddBond(bond.GetBeginAtomIdx(),
2588                    bond.GetEndAtomIdx(),
2589                    bond.GetBondOrder(),
2590                    bond.GetFlags()))
2591       return false;
2592     //copy the bond's generic data
2593     OBDataIterator diter;
2594     for(diter=bond.BeginData(); diter!=bond.EndData();++diter)
2595       GetBond(NumBonds()-1)->CloneData(*diter);
2596     return true;
2597   }
2598 
Align(OBAtom * a1,OBAtom * a2,vector3 & p1,vector3 & p2)2599   void OBMol::Align(OBAtom *a1,OBAtom *a2,vector3 &p1,vector3 &p2)
2600   {
2601     vector<int> children;
2602 
2603     obErrorLog.ThrowError(__FUNCTION__,
2604                           "Ran OpenBabel::Align", obAuditMsg);
2605 
2606     //find which atoms to rotate
2607     FindChildren(children,a1->GetIdx(),a2->GetIdx());
2608     children.push_back(a2->GetIdx());
2609 
2610     //find the rotation vector and angle
2611     vector3 v1,v2,v3;
2612     v1 = p2 - p1;
2613     v2 = a2->GetVector() - a1->GetVector();
2614     v3 = cross(v1,v2);
2615     double angle = vectorAngle(v1,v2);
2616 
2617     //find the rotation matrix
2618     matrix3x3 m;
2619     m.RotAboutAxisByAngle(v3,angle);
2620 
2621     //rotate atoms
2622     vector3 v;
2623     OBAtom *atom;
2624     vector<int>::iterator i;
2625     for (i = children.begin();i != children.end();++i)
2626       {
2627         atom = GetAtom(*i);
2628         v = atom->GetVector();
2629         v -= a1->GetVector();
2630         v *= m;   //rotate the point
2631         v += p1;  //translate the vector
2632         atom->SetVector(v);
2633       }
2634     //set a1 = p1
2635     a1->SetVector(p1);
2636   }
2637 
ToInertialFrame()2638   void OBMol::ToInertialFrame()
2639   {
2640     double m[9];
2641     for (int i = 0;i < NumConformers();++i)
2642       ToInertialFrame(i,m);
2643   }
2644 
ToInertialFrame(int conf,double * rmat)2645   void OBMol::ToInertialFrame(int conf,double *rmat)
2646   {
2647     unsigned int i;
2648     double x,y,z;
2649     double mi;
2650     double mass = 0.0;
2651     double center[3],m[3][3];
2652 
2653     obErrorLog.ThrowError(__FUNCTION__,
2654                           "Ran OpenBabel::ToInertialFrame", obAuditMsg);
2655 
2656     for (i = 0;i < 3;++i)
2657       memset(&m[i],'\0',sizeof(double)*3);
2658     memset(center,'\0',sizeof(double)*3);
2659 
2660     SetConformer(conf);
2661     OBAtom *atom;
2662     vector<OBAtom*>::iterator j;
2663     //find center of mass
2664     for (atom = BeginAtom(j);atom;atom = NextAtom(j))
2665       {
2666         mi = atom->GetAtomicMass();
2667         center[0] += mi*atom->x();
2668         center[1] += mi*atom->y();
2669         center[2] += mi*atom->z();
2670         mass += mi;
2671       }
2672 
2673     center[0] /= mass;
2674     center[1] /= mass;
2675     center[2] /= mass;
2676 
2677     //calculate inertial tensor
2678     for (atom = BeginAtom(j);atom;atom = NextAtom(j))
2679       {
2680         x = atom->x()-center[0];
2681         y = atom->y()-center[1];
2682         z = atom->z()-center[2];
2683         mi = atom->GetAtomicMass();
2684 
2685         m[0][0] += mi*(y*y+z*z);
2686         m[0][1] -= mi*x*y;
2687         m[0][2] -= mi*x*z;
2688         //        m[1][0] -= mi*x*y;
2689         m[1][1] += mi*(x*x+z*z);
2690         m[1][2] -= mi*y*z;
2691         //        m[2][0] -= mi*x*z;
2692         //        m[2][1] -= mi*y*z;
2693         m[2][2] += mi*(x*x+y*y);
2694       }
2695     // Fill in the lower triangle using symmetry across the diagonal
2696     m[1][0] = m[0][1];
2697     m[2][0] = m[0][2];
2698     m[2][1] = m[1][2];
2699 
2700     /* find rotation matrix for moment of inertia */
2701     ob_make_rmat(m,rmat);
2702 
2703     /* rotate all coordinates */
2704     double *c = GetConformer(conf);
2705     for(i=0; i < NumAtoms();++i)
2706       {
2707         x = c[i*3]-center[0];
2708         y = c[i*3+1]-center[1];
2709         z = c[i*3+2]-center[2];
2710         c[i*3]   = x*rmat[0] + y*rmat[1] + z*rmat[2];
2711         c[i*3+1] = x*rmat[3] + y*rmat[4] + z*rmat[5];
2712         c[i*3+2] = x*rmat[6] + y*rmat[7] + z*rmat[8];
2713       }
2714   }
2715 
OBMol()2716   OBMol::OBMol()
2717   {
2718     _natoms = _nbonds = 0;
2719     _mod = 0;
2720     _totalCharge = 0;
2721     _dimension = 3;
2722     _vatom.clear();
2723     _atomIds.clear();
2724     _vbond.clear();
2725     _bondIds.clear();
2726     _vdata.clear();
2727     _title = "";
2728     _c = nullptr;
2729     _flags = 0;
2730     _vconf.clear();
2731     _autoPartialCharge = true;
2732     _autoFormalCharge = true;
2733     _energy = 0.0;
2734   }
2735 
OBMol(const OBMol & mol)2736   OBMol::OBMol(const OBMol &mol) : OBBase(mol)
2737   {
2738     _natoms = _nbonds = 0;
2739     _mod = 0;
2740     _totalCharge = 0;
2741     _dimension = 3;
2742     _vatom.clear();
2743     _atomIds.clear();
2744     _vbond.clear();
2745     _bondIds.clear();
2746     _vdata.clear();
2747     _title = "";
2748     _c = nullptr;
2749     _flags = 0;
2750     _vconf.clear();
2751     _autoPartialCharge = true;
2752     _autoFormalCharge = true;
2753     //NF  _compressed = false;
2754     _energy = 0.0;
2755     *this = mol;
2756   }
2757 
~OBMol()2758   OBMol::~OBMol()
2759   {
2760     OBAtom    *atom;
2761     OBBond    *bond;
2762     OBResidue *residue;
2763     vector<OBAtom*>::iterator i;
2764     vector<OBBond*>::iterator j;
2765     vector<OBResidue*>::iterator r;
2766     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
2767       DestroyAtom(atom);
2768     for (bond = BeginBond(j);bond;bond = NextBond(j))
2769       DestroyBond(bond);
2770     for (residue = BeginResidue(r);residue;residue = NextResidue(r))
2771       DestroyResidue(residue);
2772 
2773     //clear out the multiconformer data
2774     vector<double*>::iterator k;
2775     for (k = _vconf.begin();k != _vconf.end();++k)
2776       delete [] *k;
2777     _vconf.clear();
2778   }
2779 
HasNonZeroCoords()2780   bool OBMol::HasNonZeroCoords()
2781   {
2782     OBAtom *atom;
2783     vector<OBAtom*>::iterator i;
2784 
2785     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
2786       if (atom->GetVector() != VZero)
2787         return(true);
2788 
2789     return(false);
2790   }
2791 
Has2D(bool Not3D)2792   bool OBMol::Has2D(bool Not3D)
2793   {
2794     bool hasX,hasY;
2795     OBAtom *atom;
2796     vector<OBAtom*>::iterator i;
2797 
2798     hasX = hasY = false;
2799     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
2800       {
2801         if (!hasX && !IsNearZero(atom->x()))
2802           hasX = true;
2803         if (!hasY && !IsNearZero(atom->y()))
2804           hasY = true;
2805         if(Not3D && atom->z())
2806           return false;
2807       }
2808     if (hasX || hasY) //was && but this excluded vertically or horizontally aligned linear mols
2809       return(true);
2810     return(false);
2811   }
2812 
Has3D()2813   bool OBMol::Has3D()
2814   {
2815     bool hasX,hasY,hasZ;
2816     OBAtom *atom;
2817     vector<OBAtom*>::iterator i;
2818 
2819     hasX = hasY = hasZ = false;
2820     //    if (this->_c == NULL) **Test removed** Prevented function use during molecule construction
2821     //      return(false);
2822     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
2823       {
2824         if (!hasX && !IsNearZero(atom->x()))
2825           hasX = true;
2826         if (!hasY && !IsNearZero(atom->y()))
2827           hasY = true;
2828         if (!hasZ && !IsNearZero(atom->z()))
2829           hasZ = true;
2830 
2831         if (hasX && hasY && hasZ)
2832           return(true);
2833       }
2834     return(false);
2835   }
2836 
SetCoordinates(double * newCoords)2837   void OBMol::SetCoordinates(double *newCoords)
2838   {
2839     bool noCptr = (_c == nullptr); // did we previously have a coordinate ptr
2840     if (noCptr) {
2841       _c = new double [NumAtoms()*3];
2842     }
2843 
2844     // copy from external to internal
2845     memcpy((char*)_c, (char*)newCoords, sizeof(double)*3*NumAtoms());
2846 
2847     if (noCptr) {
2848       OBAtom *atom;
2849       vector<OBAtom*>::iterator i;
2850       for (atom = BeginAtom(i);atom;atom = NextAtom(i))
2851         atom->SetCoordPtr(&_c);
2852       _vconf.push_back(newCoords);
2853     }
2854   }
2855 
2856   //! Renumber the atoms according to the order of indexes in the supplied vector
2857   //! This with assemble an atom vector and call RenumberAtoms(vector<OBAtom*>)
2858   //! It will return without action if the supplied vector is empty or does not
2859   //! have the same number of atoms as the molecule.
2860   //!
2861   //! \since version 2.3
RenumberAtoms(vector<int> v)2862   void OBMol::RenumberAtoms(vector<int> v)
2863   {
2864     if (Empty() || v.size() != NumAtoms())
2865       return;
2866 
2867     vector <OBAtom*> va;
2868     va.reserve(NumAtoms());
2869 
2870     vector<int>::iterator i;
2871     for (i = v.begin(); i != v.end(); ++i)
2872       va.push_back( GetAtom(*i) );
2873 
2874     this->RenumberAtoms(va);
2875   }
2876 
2877   //! Renumber the atoms in this molecule according to the order in the supplied
2878   //! vector. This will return without action if the supplied vector is empty or
2879   //! does not have the same number of atoms as the molecule.
RenumberAtoms(vector<OBAtom * > & v)2880   void OBMol::RenumberAtoms(vector<OBAtom*> &v)
2881   {
2882     if (Empty())
2883       return;
2884 
2885     obErrorLog.ThrowError(__FUNCTION__,
2886                           "Ran OpenBabel::RenumberAtoms", obAuditMsg);
2887 
2888     OBAtom *atom;
2889     vector<OBAtom*> va;
2890     vector<OBAtom*>::iterator i;
2891 
2892     va = v;
2893 
2894     //make sure all atoms are represented in the vector
2895     if (va.empty() || va.size() != NumAtoms())
2896       return;
2897 
2898     OBBitVec bv;
2899     for (i = va.begin();i != va.end();++i)
2900       bv |= (*i)->GetIdx();
2901 
2902     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
2903       if (!bv[atom->GetIdx()])
2904         va.push_back(atom);
2905 
2906     int j,k;
2907     double *c;
2908     double *ctmp = new double [NumAtoms()*3];
2909 
2910     for (j = 0;j < NumConformers();++j)
2911       {
2912         c = GetConformer(j);
2913         for (k=0,i = va.begin();i != va.end(); ++i,++k)
2914           memcpy((char*)&ctmp[k*3],(char*)&c[((OBAtom*)*i)->GetCoordinateIdx()],sizeof(double)*3);
2915         memcpy((char*)c,(char*)ctmp,sizeof(double)*3*NumAtoms());
2916       }
2917 
2918     for (k=1,i = va.begin();i != va.end(); ++i,++k)
2919       (*i)->SetIdx(k);
2920 
2921     delete [] ctmp;
2922 
2923     _vatom.clear();
2924     for (i = va.begin();i != va.end();++i)
2925       _vatom.push_back(*i);
2926 
2927     DeleteData(OBGenericDataType::RingData);
2928     DeleteData("OpenBabel Symmetry Classes");
2929     DeleteData("LSSR");
2930     DeleteData("SSSR");
2931     UnsetFlag(OB_LSSR_MOL);
2932     UnsetFlag(OB_SSSR_MOL);
2933   }
2934 
WriteTitles(ostream & ofs,OBMol & mol)2935   bool WriteTitles(ostream &ofs, OBMol &mol)
2936   {
2937     ofs << mol.GetTitle() << endl;
2938     return true;
2939   }
2940 
2941   //check that unreasonable bonds aren't being added
validAdditionalBond(OBAtom * a,OBAtom * n)2942   static bool validAdditionalBond(OBAtom *a, OBAtom *n)
2943   {
2944     if(a->GetExplicitValence() == 5 && a->GetAtomicNum() == 15)
2945     {
2946       //only allow octhedral bonding for F and Cl
2947       if(n->GetAtomicNum() == 9 || n->GetAtomicNum() == 17)
2948         return true;
2949       else
2950         return false;
2951     }
2952     //other things to check?
2953     return true;
2954   }
2955 
2956   /*! This method adds single bonds between all atoms
2957     closer than their combined atomic covalent radii,
2958     then "cleans up" making sure bonded atoms are not
2959     closer than 0.4A and the atom does not exceed its valence.
2960     It implements blue-obelisk:rebondFrom3DCoordinates.
2961 
2962   */
ConnectTheDots(void)2963   void OBMol::ConnectTheDots(void)
2964   {
2965     if (Empty())
2966       return;
2967     if (_dimension != 3) return; // not useful on non-3D structures
2968 
2969     if (IsPeriodic())
2970       obErrorLog.ThrowError(__FUNCTION__,
2971                             "Ran OpenBabel::ConnectTheDots -- using periodic boundary conditions",
2972                             obAuditMsg);
2973     else
2974       obErrorLog.ThrowError(__FUNCTION__,
2975                             "Ran OpenBabel::ConnectTheDots", obAuditMsg);
2976 
2977 
2978     int j,k,max;
2979     double maxrad = 0;
2980     bool unset = false;
2981     OBAtom *atom,*nbr;
2982     vector<OBAtom*>::iterator i;
2983     vector<pair<OBAtom*,double> > zsortedAtoms;
2984     vector<double> rad;
2985     vector<int> zsorted;
2986     vector<int> bondCount; // existing bonds (e.g., from residues in PDB)
2987 
2988     double *c = new double [NumAtoms()*3];
2989     rad.resize(_natoms);
2990 
2991     for (j = 0, atom = BeginAtom(i) ; atom ; atom = NextAtom(i), ++j)
2992       {
2993         bondCount.push_back(atom->GetExplicitDegree());
2994         //don't consider atoms with a full valance already
2995         //this is both for correctness (trust existing bonds) and performance
2996         if(atom->GetExplicitValence() >= OBElements::GetMaxBonds(atom->GetAtomicNum()))
2997           continue;
2998         if(atom->GetAtomicNum() == 7 && atom->GetFormalCharge() == 0 && atom->GetExplicitValence() >= 3)
2999           continue;
3000         (atom->GetVector()).Get(&c[j*3]);
3001         pair<OBAtom*,double> entry(atom, atom->GetVector().z());
3002         zsortedAtoms.push_back(entry);
3003       }
3004     sort(zsortedAtoms.begin(), zsortedAtoms.end(), SortAtomZ);
3005 
3006     max = zsortedAtoms.size();
3007 
3008     for ( j = 0 ; j < max ; j++ )
3009       {
3010         atom   = zsortedAtoms[j].first;
3011         rad[j] = OBElements::GetCovalentRad(atom->GetAtomicNum());
3012         maxrad = std::max(rad[j],maxrad);
3013         zsorted.push_back(atom->GetIdx()-1);
3014       }
3015 
3016     int idx1, idx2;
3017     double d2,cutoff,zd;
3018     vector3 atom1, atom2, wrapped_coords;  // Only used for periodic coords
3019     for (j = 0 ; j < max ; ++j)
3020       {
3021         double maxcutoff = SQUARE(rad[j]+maxrad+0.45);
3022         idx1 = zsorted[j];
3023         for (k = j + 1 ; k < max ; k++ )
3024           {
3025             idx2 = zsorted[k];
3026 
3027             // bonded if closer than elemental Rcov + tolerance
3028             cutoff = SQUARE(rad[j] + rad[k] + 0.45);
3029 
3030             // Use minimum image convention if the unit cell is periodic
3031             // Otherwise, use a simpler (faster) distance calculation based on raw coordinates
3032             if (IsPeriodic())
3033               {
3034                 atom1 = vector3(c[idx1*3], c[idx1*3+1], c[idx1*3+2]);
3035                 atom2 = vector3(c[idx2*3], c[idx2*3+1], c[idx2*3+2]);
3036                 OBUnitCell *unitCell = (OBUnitCell * ) GetData(OBGenericDataType::UnitCell);
3037                 wrapped_coords = unitCell->MinimumImageCartesian(atom1 - atom2);
3038                 d2 = wrapped_coords.length_2();
3039               }
3040             else
3041               {
3042                 zd  = SQUARE(c[idx1*3+2] - c[idx2*3+2]);
3043                 // bigger than max cutoff, which is determined using largest radius,
3044                 // not the radius of k (which might be small, ie H, and cause an early  termination)
3045                 // since we sort by z, anything beyond k will also fail
3046                 if (zd > maxcutoff )
3047                   break;
3048 
3049                 d2  = SQUARE(c[idx1*3]   - c[idx2*3]);
3050                 if (d2 > cutoff)
3051                   continue; // x's bigger than cutoff
3052                 d2 += SQUARE(c[idx1*3+1] - c[idx2*3+1]);
3053                 if (d2 > cutoff)
3054                   continue; // x^2 + y^2 bigger than cutoff
3055                 d2 += zd;
3056               }
3057 
3058             if (d2 > cutoff)
3059               continue;
3060             if (d2 < 0.16) // 0.4 * 0.4 = 0.16
3061               continue;
3062 
3063             atom = GetAtom(idx1+1);
3064             nbr  = GetAtom(idx2+1);
3065 
3066             if (atom->IsConnected(nbr))
3067               continue;
3068 
3069             if (!validAdditionalBond(atom,nbr) || !validAdditionalBond(nbr, atom))
3070               continue;
3071 
3072             AddBond(idx1+1,idx2+1,1);
3073           }
3074       }
3075 
3076     // If between BeginModify and EndModify, coord pointers are NULL
3077     // setup molecule to handle current coordinates
3078 
3079     if (_c == nullptr)
3080       {
3081         _c = c;
3082         for (atom = BeginAtom(i);atom;atom = NextAtom(i))
3083           atom->SetCoordPtr(&_c);
3084         _vconf.push_back(c);
3085         unset = true;
3086       }
3087 
3088     // Cleanup -- delete long bonds that exceed max valence
3089     OBBond *maxbond, *bond;
3090     double maxlength;
3091     vector<OBBond*>::iterator l, m;
3092     int valCount;
3093     bool changed;
3094     BeginModify(); //prevent needless re-perception in DeleteBond
3095     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
3096       {
3097         while (atom->GetExplicitValence() > static_cast<unsigned int>(OBElements::GetMaxBonds(atom->GetAtomicNum()))
3098                || atom->SmallestBondAngle() < 45.0)
3099           {
3100             bond = atom->BeginBond(l);
3101             maxbond = bond;
3102             // Fix from Liu Zhiguo 2008-01-26
3103             // loop past any bonds
3104             // which existed before ConnectTheDots was called
3105             // (e.g., from PDB resdata.txt)
3106             valCount = 0;
3107             while (valCount < bondCount[atom->GetIdx() - 1]) {
3108               bond = atom->NextBond(l);
3109               // timvdm: 2008-03-05
3110               // NextBond only returns NULL if the iterator l == _bonds.end().
3111               // This was casuing problems as follows:
3112               // NextBond = 0x????????
3113               // NextBond = 0x????????
3114               // NextBond = 0x????????
3115               // NextBond = 0x????????
3116               // NextBond = NULL  <-- this NULL was not detected
3117               // NextBond = 0x????????
3118               if (!bond) // so we add an additional check
3119                 break;
3120               maxbond = bond;
3121               valCount++;
3122             }
3123             if (!bond) // no new bonds added for this atom, just skip it
3124               break;
3125 
3126             // delete bonds between hydrogens when over max valence
3127             if (atom->GetAtomicNum() == OBElements::Hydrogen)
3128               {
3129                 m = l;
3130                 changed = false;
3131                 for (;bond;bond = atom->NextBond(m))
3132                   {
3133                     if (bond->GetNbrAtom(atom)->GetAtomicNum() == OBElements::Hydrogen)
3134                       {
3135                         DeleteBond(bond);
3136                         changed = true;
3137                         break;
3138                       }
3139                   }
3140                 if (changed)
3141                   {
3142                     // bond deleted, reevaluate BOSum
3143                     continue;
3144                   }
3145                 else
3146                   {
3147                     // reset to first new bond
3148                     bond = maxbond;
3149                   }
3150               }
3151 
3152             maxlength = maxbond->GetLength();
3153             for (bond = atom->NextBond(l);bond;bond = atom->NextBond(l))
3154               {
3155                 if (bond->GetLength() > maxlength)
3156                   {
3157                     maxbond = bond;
3158                     maxlength = bond->GetLength();
3159                   }
3160               }
3161             DeleteBond(maxbond); // delete the new bond with the longest length
3162           }
3163       }
3164     EndModify();
3165     if (unset)
3166       {
3167         _c = nullptr;
3168         for (atom = BeginAtom(i);atom;atom = NextAtom(i))
3169           atom->ClearCoordPtr();
3170 	if (_vconf.size() > 0)
3171 	  _vconf.resize(_vconf.size()-1);
3172       }
3173 
3174     if (_c != nullptr)
3175       delete [] c;
3176   }
3177 
3178   /*! This method uses bond angles and geometries from current
3179     connectivity to guess atom types and then filling empty valences
3180     with multiple bonds. It currently has a pass to detect some
3181     frequent functional groups. It still needs a pass to detect aromatic
3182     rings to "clean up."
3183     AssignSpinMultiplicity(true) is called at the end of the function. The true
3184     states that there are no implict hydrogens in the molecule.
3185   */
PerceiveBondOrders()3186   void OBMol::PerceiveBondOrders()
3187   {
3188     if (Empty())
3189       return;
3190     if (_dimension != 3) return; // not useful on non-3D structures
3191 
3192     obErrorLog.ThrowError(__FUNCTION__,
3193                           "Ran OpenBabel::PerceiveBondOrders", obAuditMsg);
3194 
3195     OBAtom *atom, *b, *c;
3196     vector3 v1, v2;
3197     double angle;//, dist1, dist2;
3198     vector<OBAtom*>::iterator i;
3199     vector<OBBond*>::iterator j;//,k;
3200 
3201     //  BeginModify();
3202 
3203     // Pass 1: Assign estimated hybridization based on avg. angles
3204     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
3205       {
3206         angle = atom->AverageBondAngle();
3207 
3208         //        cout << atom->GetAtomicNum() << " " << angle << endl;
3209 
3210         if (angle > 155.0)
3211           atom->SetHyb(1);
3212         else if (angle <= 155.0 && angle > 115.0)
3213           atom->SetHyb(2);
3214 
3215         // special case for imines
3216         if (atom->GetAtomicNum() == OBElements::Nitrogen
3217             && atom->ExplicitHydrogenCount() == 1
3218             && atom->GetExplicitDegree() == 2
3219             && angle > 109.5)
3220           atom->SetHyb(2);
3221 
3222       } // pass 1
3223 
3224     // Make sure upcoming calls to GetHyb() don't kill these temporary values
3225     SetHybridizationPerceived();
3226 
3227     // Pass 2: look for 5-member rings with torsions <= 7.5 degrees
3228     //         and 6-member rings with torsions <= 12 degrees
3229     //         (set all atoms with at least two bonds to sp2)
3230 
3231     vector<OBRing*> rlist;
3232     vector<OBRing*>::iterator ringit;
3233     vector<int> path;
3234     double torsions = 0.0;
3235 
3236     if (!HasSSSRPerceived())
3237       FindSSSR();
3238     rlist = GetSSSR();
3239     for (ringit = rlist.begin(); ringit != rlist.end(); ++ringit)
3240       {
3241         if ((*ringit)->PathSize() == 5)
3242           {
3243             path = (*ringit)->_path;
3244             torsions =
3245               ( fabs(GetTorsion(path[0], path[1], path[2], path[3])) +
3246                 fabs(GetTorsion(path[1], path[2], path[3], path[4])) +
3247                 fabs(GetTorsion(path[2], path[3], path[4], path[0])) +
3248                 fabs(GetTorsion(path[3], path[4], path[0], path[1])) +
3249                 fabs(GetTorsion(path[4], path[0], path[1], path[2])) ) / 5.0;
3250             if (torsions <= 7.5)
3251               {
3252                 for (unsigned int ringAtom = 0; ringAtom != path.size(); ++ringAtom)
3253                   {
3254                     b = GetAtom(path[ringAtom]);
3255                     // if an aromatic ring atom has valence 3, it is already set
3256                     // to sp2 because the average angles should be 120 anyway
3257                     // so only look for valence 2
3258                     if (b->GetExplicitDegree() == 2)
3259                       b->SetHyb(2);
3260                   }
3261               }
3262           }
3263         else if ((*ringit)->PathSize() == 6)
3264           {
3265             path = (*ringit)->_path;
3266             torsions =
3267               ( fabs(GetTorsion(path[0], path[1], path[2], path[3])) +
3268                 fabs(GetTorsion(path[1], path[2], path[3], path[4])) +
3269                 fabs(GetTorsion(path[2], path[3], path[4], path[5])) +
3270                 fabs(GetTorsion(path[3], path[4], path[5], path[0])) +
3271                 fabs(GetTorsion(path[4], path[5], path[0], path[1])) +
3272                 fabs(GetTorsion(path[5], path[0], path[1], path[2])) ) / 6.0;
3273             if (torsions <= 12.0)
3274               {
3275                 for (unsigned int ringAtom = 0; ringAtom != path.size(); ++ringAtom)
3276                   {
3277                     b = GetAtom(path[ringAtom]);
3278                     if (b->GetExplicitDegree() == 2 || b->GetExplicitDegree() == 3)
3279                       b->SetHyb(2);
3280                   }
3281               }
3282           }
3283       }
3284 
3285     // Pass 3: "Antialiasing" If an atom marked as sp hybrid isn't
3286     //          bonded to another or an sp2 hybrid isn't bonded
3287     //          to another (or terminal atoms in both cases)
3288     //          mark them to a lower hybridization for now
3289     bool openNbr;
3290     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
3291       {
3292         if (atom->GetHyb() == 2 || atom->GetHyb() == 1)
3293           {
3294             openNbr = false;
3295             for (b = atom->BeginNbrAtom(j); b; b = atom->NextNbrAtom(j))
3296               {
3297                 if (b->GetHyb() < 3 || b->GetExplicitDegree() == 1)
3298                   {
3299                     openNbr = true;
3300                     break;
3301                   }
3302               }
3303             if (!openNbr && atom->GetHyb() == 2)
3304               atom->SetHyb(3);
3305             else if (!openNbr && atom->GetHyb() == 1)
3306               atom->SetHyb(2);
3307           }
3308       } // pass 3
3309 
3310     // Pass 4: Check for known functional group patterns and assign bonds
3311     //         to the canonical form
3312     //      Currently we have explicit code to do this, but a "bond typer"
3313     //      is in progress to make it simpler to test and debug.
3314     bondtyper.AssignFunctionalGroupBonds(*this);
3315 
3316     // Pass 5: Check for aromatic rings and assign bonds as appropriate
3317     // This is just a quick and dirty approximation that marks everything
3318     //  as potentially aromatic
3319 
3320     // This doesn't work perfectly, but it's pretty decent.
3321     //  Need to have a list of SMARTS patterns for common rings
3322     //  which would "break ties" on complicated multi-ring systems
3323     // (Most of the current problems lie in the interface with the
3324     //   Kekulize code anyway, not in marking everything as potentially aromatic)
3325 
3326     bool needs_kekulization = false; // are there any aromatic bonds?
3327     bool typed; // has this ring been typed?
3328     unsigned int loop, loopSize;
3329     for (ringit = rlist.begin(); ringit != rlist.end(); ++ringit)
3330       {
3331         typed = false;
3332         loopSize = (*ringit)->PathSize();
3333         if (loopSize == 5 || loopSize == 6 || loopSize == 7)
3334           {
3335             path = (*ringit)->_path;
3336             for(loop = 0; loop < loopSize; ++loop)
3337               {
3338                 atom = GetAtom(path[loop]);
3339                 if(atom->HasBondOfOrder(2) || atom->HasBondOfOrder(3)
3340                    || atom->GetHyb() != 2)
3341                   {
3342                     typed = true;
3343                     break;
3344                   }
3345               }
3346 
3347             if (!typed)
3348               for(loop = 0; loop < loopSize; ++loop)
3349                 {
3350                   //    cout << " set aromatic " << path[loop] << endl;
3351                   (GetBond(path[loop], path[(loop+1) % loopSize]))->SetAromatic();
3352                   needs_kekulization = true;
3353                 }
3354           }
3355       }
3356 
3357     // Kekulization is necessary if an aromatic bond is present
3358     if (needs_kekulization) {
3359       this->SetAromaticPerceived();
3360       // First of all, set the atoms at the ends of the aromatic bonds to also
3361       // be aromatic. This information is required for OBKekulize.
3362       FOR_BONDS_OF_MOL(bond, this) {
3363         if (bond->IsAromatic()) {
3364           bond->GetBeginAtom()->SetAromatic();
3365           bond->GetEndAtom()->SetAromatic();
3366         }
3367       }
3368       bool ok = OBKekulize(this);
3369       if (!ok) {
3370         stringstream errorMsg;
3371         errorMsg << "Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders";
3372         std::string title = this->GetTitle();
3373         if (!title.empty())
3374           errorMsg << " (title is " << title << ")";
3375         errorMsg << endl;
3376         obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
3377         // return false; Should we return false for a kekulization failure?
3378       }
3379       this->SetAromaticPerceived(false);
3380     }
3381 
3382     // Quick pass.. eliminate inter-ring sulfur atom multiple bonds
3383     // dkoes - I have removed this code - if double bonds are set,
3384     // we should trust them.  See pdb_ligands_sdf/4iph_1fj.sdf for
3385     // a case where the charge isn't set, but we break the molecule
3386     // if we remove the double bond.  Also, the previous code was
3387     // fragile - relying on the total mol charge being set.  If we
3388     // are going to do anything, we should "perceive" a formal charge
3389     // in the case of a ring sulfur with a double bond (thiopyrylium)
3390 
3391     // Pass 6: Assign remaining bond types, ordered by atom electronegativity
3392     vector<pair<OBAtom*,double> > sortedAtoms;
3393     vector<double> rad;
3394     vector<int> sorted;
3395     int iter, max;
3396     double maxElNeg, shortestBond, currentElNeg;
3397     double bondLength, testLength;
3398 
3399     for (atom = BeginAtom(i) ; atom ; atom = NextAtom(i))
3400       {
3401         // if atoms have the same electronegativity, make sure those with shorter bonds
3402         // are handled first (helps with assignment of conjugated single/double bonds)
3403         shortestBond = 1.0e5;
3404         for (b = atom->BeginNbrAtom(j); b; b = atom->NextNbrAtom(j))
3405           {
3406             if (b->GetAtomicNum()!=1) shortestBond =
3407                                         std::min(shortestBond,(atom->GetBond(b))->GetLength());
3408           }
3409         pair<OBAtom*,double> entry(atom,
3410                                    OBElements::GetElectroNeg(atom->GetAtomicNum())*1e6+shortestBond);
3411 
3412         sortedAtoms.push_back(entry);
3413       }
3414     sort(sortedAtoms.begin(), sortedAtoms.end(), SortAtomZ);
3415 
3416     max = sortedAtoms.size();
3417     for (iter = 0 ; iter < max ; iter++ )
3418       {
3419         atom = sortedAtoms[iter].first;
3420         // Debugging statement
3421         //        cout << " atom->Hyb " << atom->GetAtomicNum() << " " << atom->GetIdx() << " " << atom->GetHyb()
3422         //             << " BO: " << atom->GetExplicitValence() << endl;
3423 
3424         // Possible sp-hybrids
3425         if ( (atom->GetHyb() == 1 || atom->GetExplicitDegree() == 1)
3426              && atom->GetExplicitValence() + 2  <= static_cast<unsigned int>(OBElements::GetMaxBonds(atom->GetAtomicNum()))
3427              )
3428           {
3429 
3430             // loop through the neighbors looking for a hybrid or terminal atom
3431             // (and pick the one with highest electronegativity first)
3432             // *or* pick a neighbor that's a terminal atom
3433             if (atom->HasNonSingleBond() ||
3434                 (atom->GetAtomicNum() == 7 && atom->GetExplicitValence() + 2 > 3))
3435               continue;
3436 
3437             maxElNeg = 0.0;
3438             shortestBond = 5000.0;
3439             c = nullptr;
3440             for (b = atom->BeginNbrAtom(j); b; b = atom->NextNbrAtom(j))
3441               {
3442                 currentElNeg = OBElements::GetElectroNeg(b->GetAtomicNum());
3443                 if ( (b->GetHyb() == 1 || b->GetExplicitDegree() == 1)
3444                      && b->GetExplicitValence() + 2 <= static_cast<unsigned int>(OBElements::GetMaxBonds(b->GetAtomicNum()))
3445                      && (currentElNeg > maxElNeg ||
3446                          (IsApprox(currentElNeg,maxElNeg, 1.0e-6)
3447                           && (atom->GetBond(b))->GetLength() < shortestBond)) )
3448                   {
3449                     if (b->HasNonSingleBond() ||
3450                         (b->GetAtomicNum() == 7 && b->GetExplicitValence() + 2 > 3))
3451                       continue;
3452 
3453                     // Test terminal bonds against expected triple bond lengths
3454                     bondLength = (atom->GetBond(b))->GetLength();
3455                     if (atom->GetExplicitDegree() == 1 || b->GetExplicitDegree() == 1) {
3456                       testLength = CorrectedBondRad(atom->GetAtomicNum(), atom->GetHyb())
3457                         + CorrectedBondRad(b->GetAtomicNum(), b->GetHyb());
3458                       if (bondLength > 0.9 * testLength)
3459                         continue; // too long, ignore it
3460                     }
3461 
3462                     shortestBond = bondLength;
3463                     maxElNeg = OBElements::GetElectroNeg(b->GetAtomicNum());
3464                     c = b; // save this atom for later use
3465                   }
3466               }
3467             if (c)
3468               (atom->GetBond(c))->SetBondOrder(3);
3469           }
3470         // Possible sp2-hybrid atoms
3471         else if ( (atom->GetHyb() == 2 || atom->GetExplicitDegree() == 1)
3472                   && atom->GetExplicitValence() + 1 <= static_cast<unsigned int>(OBElements::GetMaxBonds(atom->GetAtomicNum())) )
3473           {
3474             // as above
3475             if (atom->HasNonSingleBond() ||
3476                 (atom->GetAtomicNum() == 7 && atom->GetExplicitValence() + 1 > 3))
3477               continue;
3478 
3479             // Don't build multiple bonds to ring sulfurs
3480             //  except thiopyrylium
3481             if (atom->IsInRing() && atom->GetAtomicNum() == 16) {
3482               if (_totalCharge > 1 && atom->GetFormalCharge() == 0)
3483                 atom->SetFormalCharge(+1);
3484               else
3485                 continue;
3486             }
3487 
3488             maxElNeg = 0.0;
3489             shortestBond = 5000.0;
3490             c = nullptr;
3491             for (b = atom->BeginNbrAtom(j); b; b = atom->NextNbrAtom(j))
3492               {
3493                 currentElNeg = OBElements::GetElectroNeg(b->GetAtomicNum());
3494                 if ( (b->GetHyb() == 2 || b->GetExplicitDegree() == 1)
3495                      && b->GetExplicitValence() + 1 <= static_cast<unsigned int>(OBElements::GetMaxBonds(b->GetAtomicNum()))
3496                      && (GetBond(atom, b))->IsDoubleBondGeometry()
3497                      && (currentElNeg > maxElNeg || (IsApprox(currentElNeg,maxElNeg, 1.0e-6)) ) )
3498                   {
3499                     if (b->HasNonSingleBond() ||
3500                         (b->GetAtomicNum() == 7 && b->GetExplicitValence() + 1 > 3))
3501                       continue;
3502 
3503                     if (b->IsInRing() && b->GetAtomicNum() == 16) {
3504                       if (_totalCharge > 1 && b->GetFormalCharge() == 0)
3505                         b->SetFormalCharge(+1);
3506                       else
3507                         continue;
3508                     }
3509 
3510                     // Test terminal bonds against expected double bond lengths
3511                     bondLength = (atom->GetBond(b))->GetLength();
3512                     if (atom->GetExplicitDegree() == 1 || b->GetExplicitDegree() == 1) {
3513                       testLength = CorrectedBondRad(atom->GetAtomicNum(), atom->GetHyb())
3514                         + CorrectedBondRad(b->GetAtomicNum(), b->GetHyb());
3515                       if (bondLength > 0.93 * testLength)
3516                         continue; // too long, ignore it
3517                     }
3518 
3519                     // OK, see if this is better than the previous choice
3520                     // If it's much shorter, pick it (e.g., fulvene)
3521                     // If they're close (0.1A) then prefer the bond in the ring
3522                     double difference = shortestBond - (atom->GetBond(b))->GetLength();
3523                     if ( (difference > 0.1)
3524                          || ( (difference > -0.01) &&
3525                               ( (!atom->IsInRing() || !c || !c->IsInRing() || b->IsInRing())
3526                                 || (atom->IsInRing() && c && !c->IsInRing() && b->IsInRing()) ) ) ) {
3527                       shortestBond = (atom->GetBond(b))->GetLength();
3528                       maxElNeg = OBElements::GetElectroNeg(b->GetAtomicNum());
3529                       c = b; // save this atom for later use
3530                     } // is this bond better than previous choices
3531                   }
3532               } // loop through neighbors
3533             if (c)
3534               (atom->GetBond(c))->SetBondOrder(2);
3535           }
3536       } // pass 6
3537 
3538     // Now let the atom typer go to work again
3539     _flags &= (~(OB_HYBRID_MOL));
3540     _flags &= (~(OB_AROMATIC_MOL));
3541     _flags &= (~(OB_ATOMTYPES_MOL));
3542     //  EndModify(true); // "nuke" perceived data
3543 
3544     //Set _spinMultiplicity other than zero for atoms which are hydrogen
3545     //deficient and which have implicit valency definitions (essentially the
3546     //organic subset in SMILES). There are assumed to no implicit hydrogens.
3547     //AssignSpinMultiplicity(true); // TODO: sort out radicals
3548     }
3549 
Center()3550   void OBMol::Center()
3551   {
3552     for (int i = 0;i < NumConformers();++i)
3553       Center(i);
3554   }
3555 
Center(int nconf)3556   vector3 OBMol::Center(int nconf)
3557   {
3558     obErrorLog.ThrowError(__FUNCTION__,
3559                           "Ran OpenBabel::Center", obAuditMsg);
3560 
3561     SetConformer(nconf);
3562 
3563     OBAtom *atom;
3564     vector<OBAtom*>::iterator i;
3565 
3566     double x=0.0,y=0.0,z=0.0;
3567     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
3568       {
3569         x += atom->x();
3570         y += atom->y();
3571         z += atom->z();
3572       }
3573 
3574     x /= (double)NumAtoms();
3575     y /= (double)NumAtoms();
3576     z /= (double)NumAtoms();
3577 
3578     vector3 vtmp;
3579     vector3 v(x,y,z);
3580 
3581     for (atom = BeginAtom(i);atom;atom = NextAtom(i))
3582       {
3583         vtmp = atom->GetVector() - v;
3584         atom->SetVector(vtmp);
3585       }
3586 
3587     return(v);
3588   }
3589 
3590 
3591   /*! this method adds the vector v to all atom positions in all conformers */
Translate(const vector3 & v)3592   void OBMol::Translate(const vector3 &v)
3593   {
3594     for (int i = 0;i < NumConformers();++i)
3595       Translate(v,i);
3596   }
3597 
3598   /*! this method adds the vector v to all atom positions in the
3599     conformer nconf. If nconf == OB_CURRENT_CONFORMER, then the atom
3600     positions in the current conformer are translated. */
Translate(const vector3 & v,int nconf)3601   void OBMol::Translate(const vector3 &v, int nconf)
3602   {
3603     obErrorLog.ThrowError(__FUNCTION__,
3604                           "Ran OpenBabel::Translate", obAuditMsg);
3605 
3606     int i,size;
3607     double x,y,z;
3608     double *c = (nconf == OB_CURRENT_CONFORMER)? _c : GetConformer(nconf);
3609 
3610     x = v.x();
3611     y = v.y();
3612     z = v.z();
3613     size = NumAtoms();
3614     for (i = 0;i < size;++i)
3615       {
3616         c[i*3  ] += x;
3617         c[i*3+1] += y;
3618         c[i*3+2] += z;
3619       }
3620   }
3621 
Rotate(const double u[3][3])3622   void OBMol::Rotate(const double u[3][3])
3623   {
3624     int i,j,k;
3625     double m[9];
3626     for (k=0,i = 0;i < 3;++i)
3627       for (j = 0;j < 3;++j)
3628         m[k++] = u[i][j];
3629 
3630     for (i = 0;i < NumConformers();++i)
3631       Rotate(m,i);
3632   }
3633 
Rotate(const double m[9])3634   void OBMol::Rotate(const double m[9])
3635   {
3636     for (int i = 0;i < NumConformers();++i)
3637       Rotate(m,i);
3638   }
3639 
Rotate(const double m[9],int nconf)3640   void OBMol::Rotate(const double m[9],int nconf)
3641   {
3642     int i,size;
3643     double x,y,z;
3644     double *c = (nconf == OB_CURRENT_CONFORMER)? _c : GetConformer(nconf);
3645 
3646     obErrorLog.ThrowError(__FUNCTION__,
3647                           "Ran OpenBabel::Rotate", obAuditMsg);
3648 
3649     size = NumAtoms();
3650     for (i = 0;i < size;++i)
3651       {
3652         x = c[i*3  ];
3653         y = c[i*3+1];
3654         z = c[i*3+2];
3655         c[i*3  ] = m[0]*x + m[1]*y + m[2]*z;
3656         c[i*3+1] = m[3]*x + m[4]*y + m[5]*z;
3657         c[i*3+2] = m[6]*x + m[7]*y + m[8]*z;
3658       }
3659   }
3660 
SetEnergies(std::vector<double> & energies)3661   void OBMol::SetEnergies(std::vector<double> &energies)
3662   {
3663     if (!HasData(OBGenericDataType::ConformerData))
3664       SetData(new OBConformerData);
3665     OBConformerData *cd = (OBConformerData*) GetData(OBGenericDataType::ConformerData);
3666     cd->SetEnergies(energies);
3667   }
3668 
GetEnergies()3669   vector<double> OBMol::GetEnergies()
3670   {
3671     if (!HasData(OBGenericDataType::ConformerData))
3672       SetData(new OBConformerData);
3673     OBConformerData *cd = (OBConformerData*) GetData(OBGenericDataType::ConformerData);
3674     vector<double> energies = cd->GetEnergies();
3675 
3676     return energies;
3677   }
3678 
GetEnergy(int ci)3679   double OBMol::GetEnergy(int ci)
3680   {
3681     if (!HasData(OBGenericDataType::ConformerData))
3682       SetData(new OBConformerData);
3683     OBConformerData *cd = (OBConformerData*) GetData(OBGenericDataType::ConformerData);
3684     vector<double> energies = cd->GetEnergies();
3685 
3686     if (((unsigned int)ci >= energies.size()) || (ci < 0))
3687       return 0.0;
3688 
3689     return energies[ci];
3690   }
3691 
SetConformers(vector<double * > & v)3692   void OBMol::SetConformers(vector<double*> &v)
3693   {
3694     vector<double*>::iterator i;
3695     for (i = _vconf.begin();i != _vconf.end();++i)
3696       delete [] *i;
3697 
3698     _vconf = v;
3699     _c = _vconf.empty() ? nullptr : _vconf[0];
3700 
3701   }
3702 
SetConformer(unsigned int i)3703   void OBMol::SetConformer(unsigned int i)
3704   {
3705     if (i < _vconf.size())
3706       _c = _vconf[i];
3707   }
3708 
CopyConformer(double * c,int idx)3709   void OBMol::CopyConformer(double *c,int idx)
3710   {
3711     //    obAssert(!_vconf.empty() && (unsigned)idx < _vconf.size());
3712     memcpy((char*)c, (char*)_vconf[idx], sizeof(double)*3*NumAtoms());
3713   }
3714 
3715   // void OBMol::CopyConformer(double *c,int idx)
3716   // {
3717   //   obAssert(!_vconf.empty() && (unsigned)idx < _vconf.size());
3718 
3719   //   unsigned int i;
3720   //   for (i = 0;i < NumAtoms();++i)
3721   //     {
3722   //       _vconf[idx][i*3  ] = (double)c[i*3  ];
3723   //       _vconf[idx][i*3+1] = (double)c[i*3+1];
3724   //       _vconf[idx][i*3+2] = (double)c[i*3+2];
3725   //     }
3726   // }
3727 
DeleteConformer(int idx)3728   void OBMol::DeleteConformer(int idx)
3729   {
3730     if (idx < 0 || idx >= (signed)_vconf.size())
3731       return;
3732 
3733     delete [] _vconf[idx];
3734     _vconf.erase((_vconf.begin()+idx));
3735   }
3736 
3737   ///Converts for instance [N+]([O-])=O to N(=O)=O
ConvertDativeBonds()3738   bool OBMol::ConvertDativeBonds()
3739   {
3740     obErrorLog.ThrowError(__FUNCTION__,
3741                           "Ran OpenBabel::ConvertDativeBonds", obAuditMsg);
3742 
3743     //Look for + and - charges on adjacent atoms
3744     OBAtom* patom;
3745     vector<OBAtom*>::iterator i;
3746     bool converted = false;
3747     for (patom = BeginAtom(i);patom;patom = NextAtom(i))
3748       {
3749         vector<OBBond*>::iterator itr;
3750         OBBond *pbond;
3751         for (pbond = patom->BeginBond(itr);patom->GetFormalCharge() && pbond;pbond = patom->NextBond(itr))
3752           {
3753             OBAtom* pNbratom = pbond->GetNbrAtom(patom);
3754             int chg1 = patom->GetFormalCharge();
3755             int chg2 = pNbratom->GetFormalCharge();
3756             if((chg1>0 && chg2<0)|| (chg1<0 && chg2>0))
3757               {
3758                 //dative bond. Reduce charges and increase bond order
3759                 converted =true;
3760                 if(chg1>0)
3761                   --chg1;
3762                 else
3763                   ++chg1;
3764                 patom->SetFormalCharge(chg1);
3765                 if(chg2>0)
3766                   --chg2;
3767                 else
3768                   ++chg2;
3769                 pNbratom->SetFormalCharge(chg2);
3770                 pbond->SetBondOrder(pbond->GetBondOrder()+1);
3771               }
3772           }
3773       }
3774     return converted; //false if no changes made
3775   }
3776 
IsNotCorH(OBAtom * atom)3777   static bool IsNotCorH(OBAtom* atom)
3778   {
3779     switch (atom->GetAtomicNum())
3780     {
3781     case OBElements::Hydrogen:
3782     case OBElements::Carbon:
3783       return false;
3784     }
3785     return true;
3786   }
3787 
3788   //This maybe would be better using smirks from a datafile
MakeDativeBonds()3789   bool OBMol::MakeDativeBonds()
3790   {
3791     //! Converts 5-valent N to charged form of dative bonds,
3792     //! e.g. -N(=O)=O converted to -[N+]([O-])=O. Returns true if conversion occurs.
3793     BeginModify();
3794     //AddHydrogens();
3795     bool converted = false;
3796     OBAtom* patom;
3797     vector<OBAtom*>::iterator ai;
3798     for (patom = BeginAtom(ai);patom;patom = NextAtom(ai)) //all atoms
3799     {
3800       if(patom->GetAtomicNum() == OBElements::Nitrogen // || patom->GetAtomicNum() == OBElements::Phosphorus) not phosphorus!
3801         && (patom->GetExplicitValence()==5 || (patom->GetExplicitValence()==4 && patom->GetFormalCharge()==0)))
3802       {
3803         // Find the bond to be modified. Prefer a bond to a hetero-atom,
3804         // and the highest order bond if there is a choice.
3805         OBBond *bond, *bestbond;
3806         OBBondIterator bi;
3807         for (bestbond = bond = patom->BeginBond(bi); bond; bond = patom->NextBond(bi))
3808         {
3809           unsigned int bo = bond->GetBondOrder();
3810           if(bo>=2 && bo<=4)
3811           {
3812             bool het = IsNotCorH(bond->GetNbrAtom(patom));
3813             bool oldhet = IsNotCorH(bestbond->GetNbrAtom(patom));
3814             bool higherorder = bo > bestbond->GetBondOrder();
3815             if((het && !oldhet) || (((het && oldhet) || (!het && !oldhet)) && higherorder))
3816               bestbond = bond;
3817           }
3818         }
3819         //Make the charged form
3820         bestbond->SetBondOrder(bestbond->GetBondOrder()-1);
3821         patom->SetFormalCharge(+1);
3822         OBAtom* at = bestbond->GetNbrAtom(patom);
3823         at->SetFormalCharge(-1);
3824         converted=true;
3825       }
3826     }
3827     EndModify();
3828     return converted;
3829   }
3830 
3831   /**
3832    *  This function is useful when writing to legacy formats (such as MDL MOL) that do
3833    *  not support zero-order bonds. It is worth noting that some compounds cannot be
3834    *  well represented using just single, double and triple bonds, even with adjustments
3835    *  to adjacent charges. In these cases, simply converting zero-order bonds to single
3836    *  bonds is all that can be done.
3837    *
3838    @verbatim
3839    Algorithm from:
3840    Clark, A. M. Accurate Specification of Molecular Structures: The Case for
3841    Zero-Order Bonds and Explicit Hydrogen Counting. Journal of Chemical Information
3842    and Modeling, 51, 3149-3157 (2011). http://pubs.acs.org/doi/abs/10.1021/ci200488k
3843    @endverbatim
3844   */
ConvertZeroBonds()3845   bool OBMol::ConvertZeroBonds()
3846   {
3847     // TODO: Option to just remove zero-order bonds entirely
3848 
3849     // TODO: Is it OK to not wrap this in BeginModify() and EndModify()?
3850     // If we must, I think we need to manually remember HasImplicitValencePerceived and
3851     // re-set it after EndModify()
3852 
3853     // Periodic table block for element (1=s, 2=p, 3=d, 4=f)
3854     const int BLOCKS[113] = {0,1,2,1,1,2,2,2,2,2,2,1,1,2,2,2,2,2,2,1,1,3,3,3,3,3,3,3,3,3,
3855                              3,2,2,2,2,2,2,1,1,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,1,1,4,4,4,
3856                              4,4,4,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,1,1,4,
3857                              4,4,4,4,4,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3};
3858 
3859     bool converted = false;
3860     // Get contiguous fragments of molecule
3861     vector<vector<int> > cfl;
3862     ContigFragList(cfl);
3863     // Iterate over contiguous fragments
3864     for (vector< vector<int> >::iterator i = cfl.begin(); i != cfl.end(); ++i) {
3865       // Get all zero-order bonds in contiguous fragment
3866       vector<OBBond*> bonds;
3867       for(vector<int>::const_iterator j = i->begin(); j != i->end(); ++j) {
3868         FOR_BONDS_OF_ATOM(b, GetAtom(*j)) {
3869           if (b->GetBondOrder() == 0 && !(find(bonds.begin(), bonds.end(), &*b) != bonds.end())) {
3870             bonds.push_back(&*b);
3871           }
3872         }
3873       }
3874       // Convert zero-order bonds
3875       while (bonds.size() > 0) {
3876         // Pick a bond using scoring system
3877         int bi = 0;
3878         if (bonds.size() > 1) {
3879           vector<int> scores(bonds.size());
3880           for (unsigned int n = 0; n < bonds.size(); n++) {
3881             OBAtom *bgn = bonds[n]->GetBeginAtom();
3882             OBAtom *end = bonds[n]->GetEndAtom();
3883             int score = 0;
3884             score += bgn->GetAtomicNum() + end->GetAtomicNum();
3885             score += abs(bgn->GetFormalCharge()) + abs(end->GetFormalCharge());
3886             pair<int, int> lb = bgn->LewisAcidBaseCounts();
3887             pair<int, int> le = end->LewisAcidBaseCounts();
3888             if (lb.first > 0 && lb.second > 0 && le.first > 0 && le.second > 0) {
3889               score += 100;   // Both atoms are Lewis acids *and* Lewis bases
3890             } else if ((lb.first > 0 && le.second > 0) && (lb.second > 0 && le.first > 0)) {
3891               score -= 1000;  // Lewis acid/base direction is mono-directional
3892             }
3893             int bcount = bgn->GetImplicitHCount();
3894             FOR_BONDS_OF_ATOM(b, bgn) { bcount += 1; }
3895             int ecount = end->GetImplicitHCount();
3896             FOR_BONDS_OF_ATOM(b, end) { ecount += 1; }
3897             if (bcount == 1 || ecount == 1) {
3898               score -= 10; // If the start or end atoms have only 1 neighbour
3899             }
3900             scores[n] = score;
3901           }
3902           for (unsigned int n = 1; n < scores.size(); n++) {
3903             if (scores[n] < scores[bi]) {
3904               bi = n;
3905             }
3906           }
3907         }
3908         OBBond *bond = bonds[bi];
3909         bonds.erase(bonds.begin() + bi);
3910         OBAtom *bgn = bond->GetBeginAtom();
3911         OBAtom *end = bond->GetEndAtom();
3912         int blockb = BLOCKS[bgn->GetAtomicNum()];
3913         int blocke = BLOCKS[end->GetAtomicNum()];;
3914         pair<int, int> lb = bgn->LewisAcidBaseCounts();
3915         pair<int, int> le = end->LewisAcidBaseCounts();
3916         int chg = 0; // Amount to adjust atom charges
3917         int ord = 1; // New bond order
3918         if (lb.first > 0 && lb.second > 0 && le.first > 0 && le.second > 0) {
3919           ord = 2;  // both atoms are amphoteric, so turn it into a double bond
3920         } else if (lb.first > 0 && blockb == 2 && blocke >= 3) {
3921           ord = 2;  // p-block lewis acid with d/f-block element: make into double bond
3922         } else if (le.first > 0 && blocke == 2 && blockb >= 3) {
3923           ord = 2;  // p-block lewis acid with d/f-block element: make into double bond
3924         } else if (lb.first > 0 && le.second > 0) {
3925           chg = -1;  // lewis acid/base goes one way only; charge separate it
3926         } else if (lb.second > 0 && le.first > 0) {
3927           chg = 1;  //  no matching capacity; do not charge separate
3928         }
3929         // adjust bond order and atom charges accordingly
3930         bgn->SetFormalCharge(bgn->GetFormalCharge()+chg);
3931         end->SetFormalCharge(end->GetFormalCharge()-chg);
3932         bond->SetBondOrder(ord);
3933         converted = true;
3934       }
3935     }
3936     return converted;
3937   }
3938 
BeginAtom(OBAtomIterator & i)3939   OBAtom *OBMol::BeginAtom(OBAtomIterator &i)
3940   {
3941     i = _vatom.begin();
3942     return i == _vatom.end() ? nullptr : (OBAtom*)*i;
3943   }
3944 
NextAtom(OBAtomIterator & i)3945   OBAtom *OBMol::NextAtom(OBAtomIterator &i)
3946   {
3947     ++i;
3948     return i == _vatom.end() ? nullptr : (OBAtom*)*i;
3949   }
3950 
BeginBond(OBBondIterator & i)3951   OBBond *OBMol::BeginBond(OBBondIterator &i)
3952   {
3953     i = _vbond.begin();
3954     return i == _vbond.end() ? nullptr : (OBBond*)*i;
3955   }
3956 
NextBond(OBBondIterator & i)3957   OBBond *OBMol::NextBond(OBBondIterator &i)
3958   {
3959     ++i;
3960     return i == _vbond.end() ? nullptr : (OBBond*)*i;
3961   }
3962 
3963   //! \since version 2.4
AreInSameRing(OBAtom * a,OBAtom * b)3964   int OBMol::AreInSameRing(OBAtom *a, OBAtom *b)
3965   {
3966     bool a_in, b_in;
3967     vector<OBRing*> vr;
3968     vr = GetLSSR();
3969 
3970     vector<OBRing*>::iterator i;
3971     vector<int>::iterator j;
3972 
3973     for (i = vr.begin();i != vr.end();++i) {
3974       a_in = false;
3975       b_in = false;
3976       // Go through the path of the ring and see if a and/or b match
3977       // each node in the path
3978       for(j = (*i)->_path.begin();j != (*i)->_path.end();++j) {
3979         if ((unsigned)(*j) == a->GetIdx())
3980           a_in = true;
3981         if ((unsigned)(*j) == b->GetIdx())
3982           b_in = true;
3983       }
3984 
3985       if (a_in && b_in)
3986         return (*i)->Size();
3987     }
3988 
3989     return 0;
3990   }
3991 
Separate(int StartIndex)3992   vector<OBMol> OBMol::Separate(int StartIndex)
3993   {
3994     vector<OBMol> result;
3995     if( NumAtoms() == 0 )
3996       return result; // nothing to do, but let's prevent a crash
3997 
3998     OBMolAtomDFSIter iter( this, StartIndex );
3999     OBMol newMol;
4000     while( GetNextFragment( iter, newMol ) ) {
4001       result.push_back( newMol );
4002       newMol.Clear();
4003     }
4004 
4005     return result;
4006   }
4007 
4008   //! \brief Copy part of a molecule to another molecule
4009   /**
4010   This function copies a substructure of a molecule to another molecule. The key
4011   information needed is an OBBitVec indicating which atoms to include and (optionally)
4012   an OBBitVec indicating which bonds to exclude. By default, only bonds joining
4013   included atoms are copied.
4014 
4015   When an atom is copied, but not all of its bonds are, by default hydrogen counts are
4016   adjusted to account for the missing bonds. That is, given the SMILES "CF", if we
4017   copy the two atoms but exclude the bond, we will end up with "C.F". This behavior
4018   can be changed by specifiying a value other than 1 for the \p correctvalence parameter.
4019   A value of 0 will yield "[C].[F]" while 2 will yield "C*.F*" (see \p correctvalence below
4020   for more information).
4021 
4022   Aromaticity is preserved as present in the original OBMol. If this is not desired,
4023   the user should call OBMol::SetAromaticPerceived(false) on the new OBMol.
4024 
4025   Stereochemistry is only preserved if the corresponding elements are wholly present in
4026   the substructure. For example, all four atoms and bonds of a tetrahedral stereocenter
4027   must be copied.
4028 
4029   Residue information is preserved if the original OBMol is marked as having
4030   its residues perceived. If this is not desired, either call
4031   OBMol::SetChainsPerceived(false) in advance on the original OBMol to avoid copying
4032   the residues (and then reset it afterwards), or else call it on the new OBMol so
4033   that residue information will be reperceived (when requested).
4034 
4035   Here is an example of using this method to copy ring systems to a new molecule.
4036   Given the molecule represented by the SMILES string, "FC1CC1c2ccccc2I", we will
4037   end up with a new molecule represented by the SMILES string, "C1CC1.c2ccccc2".
4038   \code{.cpp}
4039   OBBitVec atoms(mol.NumAtoms() + 1); // the maximum size needed
4040   FOR_ATOMS_OF_MOL(atom, mol) {
4041     if(atom->IsInRing())
4042       atoms.SetBitOn(atom->Idx());
4043   }
4044   OBBitVec excludebonds(mol.NumBonds()); // the maximum size needed
4045   FOR_BONDS_OF_MOL(bond, mol) {
4046     if(!bond->IsInRing())
4047       excludebonds.SetBitOn(bond->Idx());
4048   }
4049   OBMol newmol;
4050   mol.CopySubstructure(&newmol, &atoms, &excludebonds);
4051   \endcode
4052 
4053   When used from Python, note that "None" may be used to specify an empty value for
4054   the \p excludebonds parameter.
4055 
4056   \remark Some alternatives to using this function, which may be preferred in some
4057           instances due to efficiency or convenience are:
4058           -# Copying the entire OBMol, and then deleting the unwanted parts
4059           -# Modifiying the original OBMol, and then restoring it
4060           -# Using the SMILES writer option -xf to specify fragment atom idxs
4061 
4062   \return A boolean indicating success or failure. Currently failure is only reported
4063           if one of the specified atoms is not present, or \p atoms is a NULL
4064           pointer.
4065 
4066   \param newmol   The molecule to which to add the substructure. Note that atoms are
4067                   appended to this molecule.
4068   \param atoms    An OBBitVec, indexed by atom Idx, specifying which atoms to copy
4069   \param excludebonds  An OBBitVec, indexed by bond Idx, specifying a list of bonds
4070                        to exclude. By default, all bonds between the specified atoms are
4071                        included - this parameter overrides that.
4072   \param correctvalence  A value of 0, 1 (default) or 2 that indicates how atoms with missing
4073                          bonds are handled:
4074                         0 - Leave the implicit hydrogen count unchanged;
4075                         1 - Adjust the implicit hydrogen count to correct for
4076                             the missing bonds;
4077                         2 - Replace the missing bonds with bonds to dummy atoms
4078   \param atomorder Record the Idxs of the original atoms. That is, the first element
4079                    in this vector will be the Idx of the atom in the original OBMol
4080                    that corresponds to the first atom in the new OBMol. Note that
4081                    the information is appended to this vector.
4082   \param bondorder Record the Idxs of the original bonds. See \p atomorder above.
4083 
4084   **/
4085 
CopySubstructure(OBMol & newmol,OBBitVec * atoms,OBBitVec * excludebonds,unsigned int correctvalence,std::vector<unsigned int> * atomorder,std::vector<unsigned int> * bondorder)4086   bool OBMol::CopySubstructure(OBMol& newmol, OBBitVec *atoms, OBBitVec *excludebonds, unsigned int correctvalence,
4087                                std::vector<unsigned int> *atomorder, std::vector<unsigned int> *bondorder)
4088   {
4089     if (!atoms)
4090       return false;
4091 
4092     bool record_atomorder = atomorder != nullptr;
4093     bool record_bondorder = bondorder != nullptr;
4094     bool bonds_specified = excludebonds != nullptr;
4095 
4096     newmol.SetDimension(GetDimension());
4097 
4098     // If the parent is set to periodic, then also apply boundary conditions to the fragments
4099     if (IsPeriodic()) {
4100       OBUnitCell* parent_uc = (OBUnitCell*)GetData(OBGenericDataType::UnitCell);
4101       newmol.SetData(parent_uc->Clone(nullptr));
4102       newmol.SetPeriodicMol();
4103     }
4104     // If the parent had aromaticity perceived, then retain that for the fragment
4105     newmol.SetFlag(_flags & OB_AROMATIC_MOL);
4106     // The fragment will preserve the "chains perceived" flag of the parent
4107     newmol.SetFlag(_flags & OB_CHAINS_MOL);
4108     // We will check for residues only if the parent has chains perceived already
4109     bool checkresidues = HasChainsPerceived();
4110 
4111     // Now add the atoms
4112     map<OBAtom*, OBAtom*> AtomMap;//key is from old mol; value from new mol
4113     for (int bit = atoms->FirstBit(); bit != atoms->EndBit(); bit = atoms->NextBit(bit)) {
4114       OBAtom* atom = this->GetAtom(bit);
4115       if (!atom)
4116         return false;
4117       newmol.AddAtom(*atom); // each subsequent atom
4118       if (record_atomorder)
4119         atomorder->push_back(bit);
4120       AtomMap[&*atom] = newmol.GetAtom(newmol.NumAtoms());
4121     }
4122 
4123     //Add the residues
4124     if (checkresidues) {
4125       map<OBResidue*, OBResidue*> ResidueMap; // map from old->new
4126       for (int bit = atoms->FirstBit(); bit != atoms->EndBit(); bit = atoms->NextBit(bit)) {
4127         OBAtom* atom = this->GetAtom(bit);
4128         OBResidue* res = atom->GetResidue();
4129         if (!res) continue;
4130         map<OBResidue*, OBResidue*>::iterator mit = ResidueMap.find(res);
4131         OBResidue *newres;
4132         if (mit == ResidueMap.end()) {
4133           newres = newmol.NewResidue();
4134           *newres = *res;
4135           ResidueMap[res] = newres;
4136         } else {
4137           newres = mit->second;
4138         }
4139         OBAtom* newatom = AtomMap[&*atom];
4140         newres->AddAtom(newatom);
4141         newres->SetAtomID(newatom, res->GetAtomID(atom));
4142         newres->SetHetAtom(newatom, res->IsHetAtom(atom));
4143         newres->SetSerialNum(newatom, res->GetSerialNum(atom));
4144       }
4145     }
4146 
4147     // Update Stereo
4148     std::vector<OBGenericData*>::iterator data;
4149     std::vector<OBGenericData*> stereoData = GetAllData(OBGenericDataType::StereoData);
4150     for (data = stereoData.begin(); data != stereoData.end(); ++data) {
4151       if (static_cast<OBStereoBase*>(*data)->GetType() == OBStereo::CisTrans) {
4152         OBCisTransStereo *ct = dynamic_cast<OBCisTransStereo*>(*data);
4153 
4154         // Check that the entirety of this cistrans cfg occurs in this substructure
4155         OBCisTransStereo::Config cfg = ct->GetConfig();
4156         OBAtom* begin = GetAtomById(cfg.begin);
4157         if (AtomMap.find(begin) == AtomMap.end())
4158           continue;
4159         OBAtom* end = GetAtomById(cfg.end);
4160         if (AtomMap.find(end) == AtomMap.end())
4161           continue;
4162         bool skip_cfg = false;
4163         if (bonds_specified) {
4164           FOR_BONDS_OF_ATOM(bond, begin) {
4165             if (excludebonds->BitIsSet(bond->GetIdx())) {
4166               skip_cfg = true;
4167               break;
4168             }
4169           }
4170           if (skip_cfg)
4171             continue;
4172           FOR_BONDS_OF_ATOM(bond, end) {
4173             if (excludebonds->BitIsSet(bond->GetIdx())) {
4174               skip_cfg = true;
4175               break;
4176             }
4177           }
4178           if (skip_cfg)
4179             continue;
4180         }
4181         for (OBStereo::RefIter ri = cfg.refs.begin(); ri != cfg.refs.end(); ++ri) {
4182           if (*ri != OBStereo::ImplicitRef && AtomMap.find(GetAtomById(*ri)) == AtomMap.end()) {
4183             skip_cfg = true;
4184             break;
4185           }
4186         }
4187         if (skip_cfg)
4188           continue;
4189 
4190         OBCisTransStereo::Config newcfg;
4191         newcfg.specified = cfg.specified;
4192         newcfg.begin = cfg.begin == OBStereo::ImplicitRef ? OBStereo::ImplicitRef : AtomMap[GetAtomById(cfg.begin)]->GetId();
4193         newcfg.end = cfg.end == OBStereo::ImplicitRef ? OBStereo::ImplicitRef : AtomMap[GetAtomById(cfg.end)]->GetId();
4194         OBStereo::Refs refs;
4195         for (OBStereo::RefIter ri = cfg.refs.begin(); ri != cfg.refs.end(); ++ri) {
4196           OBStereo::Ref ref = *ri == OBStereo::ImplicitRef ? OBStereo::ImplicitRef : AtomMap[GetAtomById(*ri)]->GetId();
4197           refs.push_back(ref);
4198         }
4199         newcfg.refs = refs;
4200 
4201         OBCisTransStereo *newct = new OBCisTransStereo(this);
4202         newct->SetConfig(newcfg);
4203         newmol.SetData(newct);
4204       }
4205       else if (static_cast<OBStereoBase*>(*data)->GetType() == OBStereo::Tetrahedral) {
4206         OBTetrahedralStereo *tet = dynamic_cast<OBTetrahedralStereo*>(*data);
4207         OBTetrahedralStereo::Config cfg = tet->GetConfig();
4208 
4209         // Check that the entirety of this tet cfg occurs in this substructure
4210         OBAtom *center = GetAtomById(cfg.center);
4211         std::map<OBAtom*, OBAtom*>::iterator centerit = AtomMap.find(center);
4212         if (centerit == AtomMap.end())
4213           continue;
4214         if (cfg.from != OBStereo::ImplicitRef && AtomMap.find(GetAtomById(cfg.from)) == AtomMap.end())
4215           continue;
4216         bool skip_cfg = false;
4217         if (bonds_specified) {
4218           FOR_BONDS_OF_ATOM(bond, center) {
4219             if (excludebonds->BitIsSet(bond->GetIdx())) {
4220               skip_cfg = true;
4221               break;
4222             }
4223           }
4224           if (skip_cfg)
4225             continue;
4226         }
4227         for (OBStereo::RefIter ri = cfg.refs.begin(); ri != cfg.refs.end(); ++ri) {
4228           if (*ri != OBStereo::ImplicitRef && AtomMap.find(GetAtomById(*ri)) == AtomMap.end()) {
4229             skip_cfg = true;
4230             break;
4231           }
4232         }
4233         if (skip_cfg)
4234           continue;
4235 
4236         OBTetrahedralStereo::Config newcfg;
4237         newcfg.specified = cfg.specified;
4238         newcfg.center = centerit->second->GetId();
4239         newcfg.from = cfg.from == OBStereo::ImplicitRef ? OBStereo::ImplicitRef : AtomMap[GetAtomById(cfg.from)]->GetId();
4240         OBStereo::Refs refs;
4241         for (OBStereo::RefIter ri = cfg.refs.begin(); ri != cfg.refs.end(); ++ri) {
4242           OBStereo::Ref ref = *ri == OBStereo::ImplicitRef ? OBStereo::ImplicitRef : AtomMap[GetAtomById(*ri)]->GetId();
4243           refs.push_back(ref);
4244         }
4245         newcfg.refs = refs;
4246 
4247         OBTetrahedralStereo *newtet = new OBTetrahedralStereo(this);
4248         newtet->SetConfig(newcfg);
4249         newmol.SetData(newtet);
4250       }
4251     }
4252 
4253     // Options:
4254     // 1. Bonds that do not connect atoms in the subset are ignored
4255     // 2. As 1. but implicit Hs are added to replace them
4256     // 3. As 1. but asterisks are added to replace them
4257     FOR_BONDS_OF_MOL(bond, this) {
4258       bool skipping_bond = bonds_specified && excludebonds->BitIsSet(bond->GetIdx());
4259       map<OBAtom*, OBAtom*>::iterator posB = AtomMap.find(bond->GetBeginAtom());
4260       map<OBAtom*, OBAtom*>::iterator posE = AtomMap.find(bond->GetEndAtom());
4261       if (posB == AtomMap.end() && posE == AtomMap.end())
4262         continue;
4263 
4264       if (posB == AtomMap.end() || posE == AtomMap.end() || skipping_bond) {
4265         switch(correctvalence) {
4266         case 1:
4267           if (posB == AtomMap.end() || (skipping_bond && posE != AtomMap.end()))
4268             posE->second->SetImplicitHCount(posE->second->GetImplicitHCount() + bond->GetBondOrder());
4269           if (posE == AtomMap.end() || (skipping_bond && posB != AtomMap.end()))
4270             posB->second->SetImplicitHCount(posB->second->GetImplicitHCount() + bond->GetBondOrder());
4271           break;
4272         case 2: {
4273             OBAtom *atomB, *atomE;
4274             if (skipping_bond) {
4275               for(int N=0; N<2; ++N) {
4276                 if (N==0) {
4277                   if (posB != AtomMap.end()) {
4278                     atomB = posB->second;
4279                     atomE = newmol.NewAtom();
4280                     if (record_atomorder)
4281                       atomorder->push_back(bond->GetEndAtomIdx());
4282                   }
4283                 } else if (posE != AtomMap.end()) {
4284                   atomE = posE->second;
4285                   atomB = newmol.NewAtom();
4286                   if (record_atomorder)
4287                     atomorder->push_back(bond->GetBeginAtomIdx());
4288                 }
4289                 newmol.AddBond(atomB->GetIdx(), atomE->GetIdx(),
4290                   bond->GetBondOrder(), bond->GetFlags());
4291                 if (record_bondorder)
4292                   bondorder->push_back(bond->GetIdx());
4293               }
4294             }
4295             else {
4296               atomB = (posB == AtomMap.end()) ? newmol.NewAtom() : posB->second;
4297               atomE = (posE == AtomMap.end()) ? newmol.NewAtom() : posE->second;
4298               if (record_atomorder) {
4299                 if (posB == AtomMap.end())
4300                   atomorder->push_back(bond->GetBeginAtomIdx());
4301                 else
4302                   atomorder->push_back(bond->GetEndAtomIdx());
4303               }
4304               newmol.AddBond(atomB->GetIdx(), atomE->GetIdx(),
4305                 bond->GetBondOrder(), bond->GetFlags());
4306               if (record_bondorder)
4307                 bondorder->push_back(bond->GetIdx());
4308             }
4309           }
4310           break;
4311         default:
4312           break;
4313         }
4314       }
4315       else {
4316         newmol.AddBond((posB->second)->GetIdx(), posE->second->GetIdx(),
4317                        bond->GetBondOrder(), bond->GetFlags());
4318         if (record_bondorder)
4319           bondorder->push_back(bond->GetIdx());
4320       }
4321     }
4322 
4323     return true;
4324   }
4325 
GetNextFragment(OBMolAtomDFSIter & iter,OBMol & newmol)4326   bool OBMol::GetNextFragment( OBMolAtomDFSIter& iter, OBMol& newmol ) {
4327     if( ! iter ) return false;
4328 
4329     // We want to keep the atoms in their original order rather than use
4330     // the DFS order so just record the information first
4331     OBBitVec infragment(this->NumAtoms()+1);
4332     do { //for each atom in fragment
4333       infragment.SetBitOn(iter->GetIdx());
4334     } while ((iter++).next());
4335 
4336     bool ok = CopySubstructure(newmol, &infragment);
4337 
4338     return ok;
4339   }
4340 
4341   // Put the specified molecular charge on a single atom (which is expected for InChIFormat).
4342   // Assumes all the hydrogen is explicitly included in the molecule,
4343   // and that SetTotalCharge() has not been called. (This function is an alternative.)
4344   // Returns false if cannot assign all the charge.
4345   // Not robust in the general case, but see below for the more common simpler cases.
AssignTotalChargeToAtoms(int charge)4346   bool OBMol::AssignTotalChargeToAtoms(int charge)
4347   {
4348     int extraCharge = charge - GetTotalCharge(); //GetTotalCharge() gets charge on atoms
4349 
4350     FOR_ATOMS_OF_MOL (atom, this)
4351     {
4352       unsigned int atomicnum = atom->GetAtomicNum();
4353       if (atomicnum == 1)
4354         continue;
4355       int charge = atom->GetFormalCharge();
4356       unsigned bosum = atom->GetExplicitValence();
4357       unsigned int totalValence = bosum + atom->GetImplicitHCount();
4358       unsigned int typicalValence = GetTypicalValence(atomicnum, bosum, charge);
4359       int diff = typicalValence - totalValence;
4360       if(diff != 0)
4361       {
4362         int c;
4363         if(extraCharge == 0)
4364           c = diff > 0 ? -1 : +1; //e.g. CH3C(=O)O, NH4 respectively
4365         else
4366           c = extraCharge < 0 ? -1 : 1;
4367         if (totalValence == GetTypicalValence(atomicnum, bosum, charge + c)) {
4368           atom->SetFormalCharge(charge + c);
4369           extraCharge -= c;
4370         }
4371       }
4372     }
4373     if(extraCharge != 0)
4374     {
4375       obErrorLog.ThrowError(__FUNCTION__, "Unable to assign all the charge to atoms", obWarning);
4376       return false;
4377     }
4378     return true;
4379  }
4380   /* These cases work ok
4381    original      charge  result
4382   [NH4]             +1   [NH4+]
4383   -C(=O)[O]         -1   -C(=O)[O-]
4384   -[CH2]            +1   -C[CH2+]
4385   -[CH2]            -1   -C[CH2-]
4386   [NH3]CC(=O)[O]     0   [NH3+]CC(=O)[O-]
4387   S(=O)(=O)([O])[O] -2   S(=O)(=O)([O-])[O-]
4388   [NH4].[Cl]         0   [NH4+].[Cl-]
4389   */
4390 
4391 } // end namespace OpenBabel
4392 
4393 //! \file mol.cpp
4394 //! \brief Handle molecules. Implementation of OBMol.
4395