1 /* Copyright (c) 2015  Gerald Knizia
2  *
3  * This file is part of the IboView program (see: http://www.iboview.org)
4  *
5  * IboView is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, version 3.
8  *
9  * IboView is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with bfint (LICENSE). If not, see http://www.gnu.org/licenses/
16  *
17  * Please see IboView documentation in README.txt for:
18  * -- A list of included external software and their licenses. The included
19  *    external software's copyright is not touched by this agreement.
20  * -- Notes on re-distribution and contributions to/further development of
21  *    the IboView software
22  */
23 
24 #include "pugixml.hpp"
25 #include <iostream>
26 #include <fstream>
27 #include <algorithm>
28 #include <set>
29 #include <boost/format.hpp>
30 #include <boost/algorithm/string.hpp>
31 #include <boost/lexical_cast.hpp>
32 #include <sstream>
33 #include <cctype> // for 1-argument std::tolower.
34 #include "CtBasisShell.h"
35 #include "CtMatrix.h"
36 #include "CtConstants.h"
37 #include "IvOrbitalFile.h"
38 
39 namespace ctimp {
40    void Vec_Ca2Sh(double *pSh, double const *pCa, size_t *ls, size_t N);
41    void Vec_CaMolden2CaMolpro(double *pOrb, size_t *ls, size_t N);
42    void Vec_ShMolden2ShMolpro(double *pOrb, size_t *ls, size_t N);
43    void Vec_Ca2Ca_XSqrtNorm(double *pOrb, size_t *ls, size_t N);
44 }
45 
46 using boost::format;
47 using boost::algorithm::trim;
48 using boost::algorithm::trim_left;
49 using boost::algorithm::trim_right;
50 
51 
52 namespace orbital_file {
53 
54 // class FXmlParseError : public FFileLoadError
55 // {
56 // public:
57 //    typedef FFileLoadError
58 //       FBase;
59 // //    FXmlParseError(std::string const &What, std::string const &Where = "");
60 // };
61 
FFileTypeUnrecognizedError(std::string const & What,std::string const & Where)62 FFileTypeUnrecognizedError::FFileTypeUnrecognizedError(std::string const &What, std::string const &Where)
63    : FFileLoadError(What, Where)
64 {}
65 
66 class FUnexpectedFormatError : public FFileLoadError
67 {
68 public:
69    typedef FFileLoadError
70       FBase;
FUnexpectedFormatError(std::string const & Message)71    FUnexpectedFormatError(std::string const &Message)
72       : FBase("Unexpected format: '" + Message)
73    {}
74 };
75 
76 class FVariableNotFoundError : public FFileLoadError
77 {
78 public:
79    typedef FFileLoadError
80       FBase;
FVariableNotFoundError(std::string const & VarName)81    FVariableNotFoundError( std::string const &VarName )
82       : FBase("Variable not found: '" + VarName + "'")
83    {}
84 };
85 
FmtParseError(std::string const & What,std::string const & Where)86 static std::string FmtParseError(std::string const &What, std::string const &Where)
87 {
88    if (Where.empty())
89       return What;
90    else {
91       std::stringstream str;
92       str << boost::format("* ERROR while loading file '%s':\n%s") % Where % What << std::endl;
93       return str.str();
94    }
95 }
96 
FFileLoadError(std::string const & What,std::string const & Where)97 FFileLoadError::FFileLoadError(std::string const &What, std::string const &Where)
98    : FBase(FmtParseError(What,Where))
99 {}
100 
101 
102 // FXmlParseError::FXmlParseError(std::string const &What, std::string const &Where)
103 //    : FBase(FmtParseError(What,Where))
104 // {}
105 
106 
str_tolower(std::string & str)107 static void str_tolower(std::string &str) {
108 //    std::transform(str.begin(), str.end(), str.begin(), std::tolower);
109    for (size_t i = 0; i != str.size(); ++ i)
110       str[i] = std::tolower(str[i]);
111    // ...
112 }
113 
str_lower_and_trim(std::string & str)114 static void str_lower_and_trim(std::string &str)
115 {
116    str_tolower(str);
117    trim(str);
118 }
119 
FOrbitalInfo()120 FOrbitalInfo::FOrbitalInfo()
121 {
122    fEnergy = 0.;
123    fOcc = 0.;
124    iSym = 0;
125    Spin = ORBSPIN_Unknown;
126 }
127 
128 enum FLineReadFlags {
129    LINE_Trim = 0x01,           // if set, output lines are trimmed on both left and right sides (otherwise only right)
130    LINE_ToLower = 0x02,        // if set, output lines are converted to lower case
131    LINE_KeepEmpty = 0x04,      // if set, empty lines are not skipped
132    LINE_AcceptEof = 0x08       // if set, EOF will not raise an exception, but just return 'false'.
133 };
134 
get_next_line(std::string & line,std::istream & in,unsigned flags=0)135 static bool get_next_line(std::string &line, std::istream &in, unsigned flags=0) {
136    // read first line. skip empty lines.
137    line = "";
138    while (in.good() && line.empty()) {
139       if (!std::getline(in, line)) {
140          if (flags & LINE_AcceptEof)
141             return false;
142          else
143             throw FUnexpectedFormatError("Unexpected end of file.");
144       }
145       if (flags & LINE_ToLower)
146          str_tolower(line);
147       if (flags & LINE_Trim)
148          trim(line);
149       else
150          trim_right(line);
151       if (flags & LINE_KeepEmpty)
152          break;
153    }
154    return true;
155 }
156 
ends_with(std::string const & str,std::string const & suffix)157 bool ends_with(std::string const &str, std::string const &suffix)
158 {
159    if (str.size() < suffix.size())
160       return false;
161    return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
162 }
163 
replace_fortran_exponents(std::string & line)164 void replace_fortran_exponents(std::string &line)
165 {
166    // replace fortran exponent format by rest-of-the-world exponent format.
167    boost::algorithm::replace_all(line, "D+", "e+");
168    boost::algorithm::replace_all(line, "D-", "e-");
169    boost::algorithm::replace_all(line, "d+", "e+");
170    boost::algorithm::replace_all(line, "d-", "e-");
171 }
172 
try_read_fortran_float(double & f,std::string s)173 bool try_read_fortran_float(double &f, std::string s)
174 {
175    replace_fortran_exponents(s);
176    std::stringstream ss2(s);
177    std::string MaybeFloat;
178 
179    // this is indeed the most compact C++ way I could come up with
180    // for checking if something "cleanly" converts to 'double'...
181    // (and even this is not really clean, because strtod uses "errno",
182    // which is not threadsafe).
183    ss2 >> MaybeFloat;
184    char const *pBeg = MaybeFloat.c_str();
185    char *pEnd;
186    f = std::strtod(pBeg, &pEnd);
187    if (pEnd - pBeg == ptrdiff_t(MaybeFloat.size()))
188       return true;
189    return false;
190 }
191 
read_fortran_float(std::string const & s)192 double read_fortran_float(std::string const &s)
193 {
194    double f;
195    if (!try_read_fortran_float(f,s))
196       throw FUnexpectedFormatError("Expected float, but found '" + s + "'.");
197    return f;
198 }
199 
200 
201 enum FOrbitalSource {
202    ORBSOURCE_Xml,
203    ORBSOURCE_Molden
204 };
205 
FixOrbitalTypes1(FOrbitalSet::FOrbitalInfoList & OrbList,FOrbitalSource Source)206 static void FixOrbitalTypes1(FOrbitalSet::FOrbitalInfoList &OrbList, FOrbitalSource Source)
207 {
208    size_t
209       nAlpha = 0,
210       nBeta = 0,
211       nClosed = 0,
212       nOther = 0,
213       nNonIntegerOcc = 0;
214    FOrbitalSet::FOrbitalInfoList::iterator
215       it;
216    for (it = OrbList.begin(); it != OrbList.end(); ++ it) {
217       switch((*it)->Spin) {
218          case ORBSPIN_Alpha: nAlpha += 1; continue;
219          case ORBSPIN_Beta: nBeta += 1; continue;
220          case ORBSPIN_SpinFree: nClosed += 1; continue;
221          default: nOther += 1; continue;
222       }
223       if (std::abs(std::modf((*it)->fOcc, 0) > 1e-8))
224          nNonIntegerOcc += 1;
225    }
226 
227    bool RebuildWithOccupations = false;
228    if (Source == ORBSOURCE_Molden) {
229       // there are *only* Alpha orbitals. Most likely we got something R(o)HFy.
230       // Adjust spins based on occupation numbers.
231       if (nBeta == 0 && nOther == 0 && nClosed == 0)
232          RebuildWithOccupations = true;
233    } else if (Source == ORBSOURCE_Xml) {
234       // if there are no non-integer occupations, we probably also have a ROHFy wave function.
235       if (nNonIntegerOcc == 0)
236          RebuildWithOccupations = true;
237    }
238 
239    if (RebuildWithOccupations) {
240       for (it = OrbList.begin(); it != OrbList.end(); ++ it) {
241          double fOcc = (*it)->fOcc;
242          if (fOcc == 2. || fOcc == 0.)
243             (*it)->Spin = ORBSPIN_SpinFree;
244          if (fOcc == 1.)
245             (*it)->Spin = ORBSPIN_Alpha;
246       }
247    }
248 }
249 
250 
251 namespace molpro_xml {
252 
253 typedef ::orbital_file::FFileLoadError
254    FXmlParseError;
255 
256 using boost::format;
257 using boost::str;
258 using ct::TArray;
259 
EnumerateChildren(std::ostream & out,pugi::xml_node Node)260 void EnumerateChildren(std::ostream &out, pugi::xml_node Node)
261 {
262    out << "Node '" << Node.name() << "':\n";
263    for (pugi::xml_node_iterator it = Node.begin(); it != Node.end(); ++it)
264    {
265       out << "-> '" << it->name() << "' Attr: [";
266 
267       for (pugi::xml_attribute_iterator ait = it->attributes_begin(); ait != it->attributes_end(); ++ait)
268       {
269          if (ait != it->attributes_begin())
270             out << " ";
271          out << ait->name() << "=" << ait->value();
272       }
273       out << "]";
274 
275       out << std::endl;
276    };
277 };
278 
279 // append value list contained in a xml node to output list.
280 template<class FType>
AppendToList(TArray<FType> & Out,std::string const & s)281 void AppendToList(TArray<FType> &Out, std::string const &s)
282 {
283 //    std::cout << "rd list: " << s << std::endl;
284    std::stringstream
285       ss(s);
286    while (1) {
287       FType fValue;
288       ss >> fValue;
289       if (ss.fail())
290          break;
291       Out.push_back(fValue);
292 //       std::cout << format("  Val: %10.5f\n") % fValue;
293    }
294 }
295 
AppendAssociationList(TArray<int> & Out,std::string sText,std::string const StartHere)296 static void AppendAssociationList(TArray<int> &Out, std::string sText, std::string const StartHere)
297 {
298    // example strings:
299    //   #xpointer(//molecule[@id='2AA7']//basisGroup[@id='1' or @id='2' or @id='3'])]
300    //   #xpointer(//molecule[@id='2AA7']//atom[@id='1' or @id='3' or @id='5' or @id='7' or @id='9' or @id='11'])]
301    // skip everything until '//basisGroup['. Then erase "@id" and similar things.
302    sText.erase(0, sText.find(StartHere.c_str())+StartHere.size());
303    sText.erase(sText.find("]"));
304    boost::replace_all(sText, "@id='", "");
305    boost::replace_all(sText, " or", "");
306    boost::replace_all(sText, "'", "");
307    boost::replace_all(sText, "a", "");
308 //    std::cout << format("sText: '%s'\n") % sText;
309    // should now just be a linear list of integers.
310    AppendToList<int>(Out, sText);
311 }
312 
313 template<class T>
PrintArray(std::ostream & out,T const & Array,std::string const & Caption,std::string const & Fmt)314 static std::ostream &PrintArray(std::ostream &out, T const &Array, std::string const &Caption, std::string const &Fmt)
315 {
316    out << Caption << ": ";
317    for (typename T::const_iterator it = Array.begin(); it != Array.end(); ++ it) {
318       if (it != Array.begin())
319          out << " ";
320       out << format(Fmt) % (*it);
321    }
322    return out;
323 }
324 
LoadBasisSet(pugi::xml_node BasisNode,ct::FAtomSet const & Atoms)325 ct::FBasisSetPtr LoadBasisSet(pugi::xml_node BasisNode, ct::FAtomSet const &Atoms)
326 {
327    std::string
328       Name = BasisNode.attribute("id").value(),
329       AngularType = BasisNode.attribute("angular").value();
330 //    uint
331 //       nShells = BasisNode.attribute("groups").as_int();
332 //    std::cout << format("Loading basis '%s' with %i groups.\n") % Name % nShells;
333    // gna... need to convert to spherical anyway.
334 //    if (AngularType != "spherical")
335 //       throw FXmlParseError("Sorry, can only use spherical harmonic basis sets, not cartesians.");
336 //    ct::FBasisSetPtr pBasis(new F
337 //    pugi::xml_node Node
338 
339    // load the shell information: only unique ones are stored.
340    typedef std::map<int, ct::FAtomShellPtr>
341       FIdToShellMap;
342    FIdToShellMap
343       // id -> object.
344       AtomShells;
345    for (pugi::xml_node GroupNode = BasisNode.child("basisGroup"); GroupNode; GroupNode = GroupNode.next_sibling("basisGroup"))
346    {
347 //       EnumerateChildren(std::cout, GroupNode);
348       TArray<double>
349          Exp, Co;
350       int
351          AngMom = GroupNode.attribute("minL").as_int(),
352          nExp = GroupNode.attribute("primitives").as_int(),
353          nCo = GroupNode.attribute("contractions").as_int(),
354          iGroup = GroupNode.attribute("id").as_int();
355       if (AngMom != GroupNode.attribute("maxL").as_int())
356          throw FXmlParseError("basis contains minL != maxL shells. Cannot deal with those.");
357       Exp.reserve(nExp);
358       Co.reserve(nExp * nCo);
359       // read exponents.
360       AppendToList<double>(Exp, GroupNode.child_value("basisExponents"));
361       // read contraction coefficients.
362       int iCo = 1;
363       for (pugi::xml_node CoNode = GroupNode.child("basisContraction"); CoNode; CoNode = CoNode.next_sibling("basisContraction"))
364       {
365 //          std::cout << "contraction " << iCo << "\n";
366          int iCoId = CoNode.attribute("id").as_int();
367          if (iCoId != iCo)
368             throw FXmlParseError(str(format("error while reading basis group %i:"
369                "expected contraction with id %i, but found %i.") % iGroup % iCo % iCoId));
370          AppendToList<double>(Co, CoNode.child_value());
371          iCo += 1;
372       }
373       if (nExp != (signed)Exp.size() || nExp*nCo != (signed)Co.size())
374          throw FXmlParseError(str(format("contraction %i: exponents or co-coefficient number does not match up.") % iGroup));
375       if (AtomShells.find(iGroup) != AtomShells.end())
376          throw FXmlParseError(str(format("multiple occurrences of group id %i") % iGroup));
377       // that's all we need. make a atom basis object
378       // and remember it.
379       ct::FAtomShellPtr
380          pFn(new ct::FAtomShell(AngMom, &Exp[0], nExp, &Co[0], nCo));
381       AtomShells[iGroup] = pFn;
382    }
383 //    std::cout << format("read %i unique atom basis shells.\n") % AtomShells.size();
384 
385 
386    typedef std::multimap<int, int>
387       FAtomToShellIdMap;
388    typedef std::pair<FAtomToShellIdMap::iterator, FAtomToShellIdMap::iterator>
389       FAtomToShellRangeIt;
390    FAtomToShellIdMap
391       // atom index -> list of shell ids for the atom (stored in AtomShells)
392       AtomShellIds;
393 
394    // now this part is... more complicated. we get a list of basis/atom
395    // associations, in the form of 'list of unique shells' to 'list of atoms'.
396    // However, this information is stored the form of cross-referenced xlinks...
397 //    EnumerateChildren(std::cout, BasisNode.child("associ  ation"));
398    for (pugi::xml_node AssocNode = BasisNode.child("association"); AssocNode; AssocNode = AssocNode.next_sibling("association"))
399    {
400       // I here hope that I can just skip some of this information, and assume
401       // that there is only one molecule and basis sets between different
402       // molecules are not mixed (technically they could).
403       std::string const
404          sBases = AssocNode.child("bases").attribute("xlink:href").value(),
405          sAtoms = AssocNode.child("atoms").attribute("xlink:href").value();
406       // example strings:
407       //   #xpointer(//molecule[@id='2AA7']//basisGroup[@id='1' or @id='2' or @id='3'])]
408       //   #xpointer(//molecule[@id='2AA7']//atom[@id='1' or @id='3' or @id='5' or @id='7' or @id='9' or @id='11'])]
409       // skip everything until '//basisGroup['. Then erase "@id" and similar things.
410       TArray<int>
411          iShells, iAtoms;
412       AppendAssociationList(iShells, sBases, "//basisGroup[");
413       AppendAssociationList(iAtoms, sAtoms, "//atom[");
414 //       PrintArray(std::cout, iShells, "Shell ids", "%i") << " <-> ";
415 //       PrintArray(std::cout, iAtoms, "Atom ids", "%i") << "\n";
416 
417 //     <bases xlink:type="locator" xlink:label="bases"
418 //       xlink:href="#xpointer(//molecule[@id='436']//basisGroup[@id='1' or @id='2' or @id='3'])"/>
419 //     <atoms xlink:label="atoms"
420 //       xlink:href="#xpointer(//molecule[@id='436']//atom[@id='a1' or @id='a3' or @id='a5' or @id='a7' or @id='a9' or @id='a11'])"/>
421 
422 
423 //       std::cout << ""
424       for (uint i = 0; i < iAtoms.size(); ++ i) {
425          for (uint j = 0; j < iShells.size(); ++ j)
426             AtomShellIds.insert(FAtomToShellIdMap::value_type(iAtoms[i], iShells[j]));
427          if (iAtoms[i] > (int)Atoms.size() || iAtoms[i] == 0)
428             throw FXmlParseError("encountered basis shell association for non-exitant atom.");
429       }
430    }
431 
432    // in principle we now have all the required information to build the basis set (as long
433    // as all functions come in the order I imagine they do...)
434    // Go through the atoms and make a basis shell for each of its associated atom shells.
435    ct::FBasisSet::FBasisShellArray
436       Shells;
437    for (uint iAt = 0; iAt < Atoms.size(); ++ iAt) {
438       ct::FAtom const
439          &At = Atoms[iAt];
440       FAtomToShellRangeIt itEq = AtomShellIds.equal_range(iAt+1);
441       for (FAtomToShellIdMap::iterator it = itEq.first; it != itEq.second; ++ it) {
442          FIdToShellMap::iterator
443             itSh = AtomShells.find(it->second);
444          if (itSh == AtomShells.end())
445             throw FXmlParseError("encountered a non-extant atom shell assigned to an atom.");
446          ct::FAtomShellPtr
447             pFn = itSh->second;
448          Shells.push_back(ct::FBasisShell(At.vPos, iAt, pFn));
449 //             FBasisShell(FVector3 vCenter_, int iCenter_, FAtomShellCptr pAs_)
450 
451       }
452    }
453 
454 //       FBasisSet(FBasisShell const *pShells_, uint nShells_, FBasisContext Context_, std::string const &Name);
455    return ct::FBasisSetPtr(new ct::FBasisSet(&Shells[0], Shells.size(), ct::BASIS_Orbital, "(external)"));
456 }
457 
458 
LoadGeometry(pugi::xml_node GeometryNode,double Factor=1./ct::ToAng)459 FAtomSetPtr LoadGeometry(pugi::xml_node GeometryNode, double Factor = 1./ct::ToAng)
460 {
461    FAtomSetPtr
462       pAtoms = new ct::FAtomSet();
463    int
464       iAtom = 1; // starts at 1.
465    for (pugi::xml_node_iterator it = GeometryNode.begin(); it != GeometryNode.end(); ++it, ++iAtom)
466    {
467       std::string
468          id = it->attribute("id").value(),
469          element = it->attribute("elementType").value();
470       std::string
471          expected_id = "a" + boost::lexical_cast<std::string>(iAtom);
472       if (id != expected_id)
473          throw FXmlParseError("problem reading atoms: expected atom id '" + expected_id + "', but found '" + id + "'.");
474       double
475          x = boost::lexical_cast<double>(it->attribute("x3").value()),
476          y = boost::lexical_cast<double>(it->attribute("y3").value()),
477          z = boost::lexical_cast<double>(it->attribute("z3").value());
478       pAtoms->Atoms.push_back(ct::FAtom(Factor * ct::FVector3(x,y,z), element, "(external)"));
479 
480 //       std::cout << format("!AT  id = %4s  elem = %4s  x = %10.5f y = %10.5f z = %10.5f") % id % element % x % y % z;
481 //       std::cout << std::endl;
482    }
483 //    std::cout << *pAtoms;
484    return pAtoms;
485 }
486 
487 
488 template<class T>
ReadArrayT(typename ct::TArray<T> & Out,std::istream & str)489 void ReadArrayT(typename ct::TArray<T> &Out, std::istream &str)
490 {
491    for (;;) {
492       T x;
493       str >> x;
494       if (str.good())
495          Out.push_back(x);
496       else
497          break;
498    }
499 }
500 
501 
LoadOrbitals(pugi::xml_node OrbitalsNode,ct::FBasisSetPtr pBasisOrb,FLoadOptions const & LoadOptions)502 FOrbitalSetPtr LoadOrbitals(pugi::xml_node OrbitalsNode, ct::FBasisSetPtr pBasisOrb, FLoadOptions const &LoadOptions)
503 {
504    FOrbitalSetPtr
505       pOrbSet = new FOrbitalSet();
506    uint
507       iOrb = 0;
508    uint
509       OrbsForSym[8] = {0};
510    TArray<size_t>
511       ls;
512    unsigned
513       nBfSph = pBasisOrb->nFn(),
514       nBfCa = 0; // expected number of cartesian basis functions.
515    bool
516       ConvertToSpherical = std::string(OrbitalsNode.attribute("angular").value()) != std::string("spherical");
517       // ^- that's the default setting...
518 //    if (ConvertToSpherical)
519 //       std::cout << "Expecting Cartesian orbital input.\n";
520 //    else {
521 //       std::cout << "Expecting spherical orbital input.\n";
522 //    }
523    for (uint iSh = 0; iSh < pBasisOrb->Shells.size(); ++ iSh)
524       for (uint iCo = 0; iCo < pBasisOrb->Shells[iSh].pAs->nCo(); ++ iCo) {
525          unsigned l = pBasisOrb->Shells[iSh].pAs->AngMom;
526          ls.push_back(l);
527 //          def nCartY(l): return ((l+1)*(l+2))/2
528          nBfCa += ((l+1)*(l+2))/2;
529 //          std::cout << format("iSh = %i  l = %i nBfCa = %i\n") % iSh % l % nBfCa;
530       }
531    for (pugi::xml_node OrbNode = OrbitalsNode.child("orbital"); OrbNode; ++iOrb, OrbNode = OrbNode.next_sibling("orbital"))
532    {
533 //       EnumerateChildren(std::cout, OrbNode);
534       FOrbitalInfoPtr
535          pInfo = new FOrbitalInfo;
536       pInfo->fEnergy = boost::lexical_cast<double>(OrbNode.attribute("energy").value());
537       pInfo->fOcc = boost::lexical_cast<double>(OrbNode.attribute("occupation").value());
538       pInfo->iSym = boost::lexical_cast<int>(OrbNode.attribute("symmetryID").value());
539       pInfo->pBasisSet = pBasisOrb;
540       pInfo->Spin = ORBSPIN_SpinFree;
541       if (pInfo->fOcc < 0.00001 && LoadOptions.SkipVirtuals())
542          continue;
543       // ^- why not break? in symmetry calculations there may be additional
544       //    occupied orbitals later.
545 //       std::cout << format("ORB: E[%3i] = %12.6f H   Occ = %12.6f  Sym = %1i\n") % iOrb % fEnergy % fOcc % iSym;
546 //       std::cout << OrbNode.child_value() << std::endl;
547       if (pInfo->iSym <= 8) {
548          OrbsForSym[pInfo->iSym-1] += 1;
549 //          pInfo->sDesc = boost::str(format("%4i.%1i [E=%8.4f  O= %6.4f]") % OrbsForSym[pInfo->iSym-1] % pInfo->iSym % pInfo->fEnergy % pInfo->fOcc);
550          pInfo->sDesc = boost::str(format("%4i.%1i") % OrbsForSym[pInfo->iSym-1] % pInfo->iSym);
551       } else {
552 //          pInfo->sDesc = boost::str(format("%4i.? [E=%8.4f  O= %6.4f]") % iOrb % pInfo->fEnergy % pInfo->fOcc);
553          pInfo->sDesc = boost::str(format("%4i.?") % iOrb);
554       }
555 
556       std::stringstream
557          str(OrbNode.child_value());
558       TArray<double>
559          OrbCa;
560       ReadArrayT(OrbCa, str);
561 //       std::string s = boost::str(boost::format("import orbital %i: expected %i cartesian components, but got %i.") % (iOrb+1) % nBfCa % OrbCa.size());
562       pInfo->Orb.resize(nBfSph, 777.777);
563       if (ConvertToSpherical) {
564          // transform from cartesian back to spherical. (...hopefully...)
565          if (OrbCa.size() != nBfCa)
566             throw FXmlParseError(boost::str(boost::format("import orbital %i: expected %i cartesian components, but got %i.") % (iOrb+1) % nBfCa % OrbCa.size()));
567          ctimp::Vec_Ca2Sh(&pInfo->Orb[0], &OrbCa[0], &ls[0], ls.size());
568 //          throw FXmlParseError("Cartesian basis input currently not supported. Use {put,xml,...; keepspherical}");
569       } else {
570          // hopefully already is spherical...
571          if (OrbCa.size() != nBfSph)
572             throw FXmlParseError(boost::str(boost::format("import orbital %i: expected %i spherical components, but got %i.") % (iOrb+1) % nBfSph % OrbCa.size()));
573          pInfo->Orb = OrbCa;
574       }
575       pOrbSet->OrbInfos.push_back(pInfo);
576 //       std::cout << pInfo->Desc() << std::endl;
577 //       std::cout << format("   nCoeff: %i  (of %i)") % pInfo->Orb.size() % pBasisOrb->nFn() << std::endl;
578    }
579    FixOrbitalTypes1(pOrbSet->OrbInfos, ORBSOURCE_Xml);
580    return pOrbSet;
581 }
582 
TestOrbitals(ct::FBasisSetPtr pBasis,FAtomSetPtr pAtoms,FOrbitalSetPtr pOrbSet)583 int TestOrbitals(ct::FBasisSetPtr pBasis, FAtomSetPtr pAtoms, FOrbitalSetPtr pOrbSet)
584 {
585    using namespace ct;
586 
587    ct::FMemoryStack2 Mem(200000000);
588    uint
589       nAo = pBasis->nFn(),
590       nOrb = pOrbSet->OrbInfos.size();
591    FStackMatrix
592       S(nAo, nAo, &Mem),
593       COrb(nAo, nOrb, &Mem);
594    pAtoms->MakeOverlapMatrix(S, *pBasis, *pBasis, Mem);
595 //    S.Print(std::cout, "AO overlap");
596    for (uint iOrb = 0; iOrb < pOrbSet->OrbInfos.size(); ++ iOrb) {
597       FOrbitalInfo
598          &oi = *pOrbSet->OrbInfos[iOrb];
599       assert(nAo == oi.Orb.size());
600       for (uint iBf = 0; iBf < nAo; ++ iBf)
601          COrb(iBf, iOrb) = oi.Orb[iBf];
602    }
603 //    COrb.Print(std::cout, "MO coeffs");
604    {
605       double rmsd = 0.;
606       FStackMatrix
607          OrbOvl(nOrb, nOrb, &Mem);
608       ChainMxm(OrbOvl, Transpose(COrb), S, COrb, Mem);
609       for (size_t i = 0; i < OrbOvl.nRows; ++ i)
610          for (size_t j = 0; j < OrbOvl.nCols; ++ j) {
611             double f = OrbOvl(i,j) - ((i==j)? 1. : 0.);
612             rmsd += f*f;
613          }
614       rmsd = std::sqrt(rmsd);
615       OrbOvl.Print(std::cout, "Orbital MO overlap");
616       std::cout << boost::format("  RMSD from orthogonality: %8.2e") % rmsd << std::endl;
617    }
618    return 0;
619 }
620 
LoadMolproXmlFile(std::string const & FileName,FLoadOptions const & LoadOptions)621 FMolproXmlDataPtr LoadMolproXmlFile(std::string const &FileName, FLoadOptions const &LoadOptions)
622 {
623 //    try {
624       FMolproXmlDataPtr
625          pXmlData = new FMolproXmlData();
626       pugi::xml_document doc;
627       pugi::xml_parse_result result = doc.load_file(FileName.c_str());
628       if (result) {
629 //          std::cout << format("* read '%s'.") % FileName << std::endl;
630       } else {
631          throw FXmlParseError(str(format("XML load result: '%s'") % result.description()));
632 //          return FMolproXmlDataPtr(0);
633       }
634 
635       pugi::xml_node MoleculeNode = doc.child("molpro").child("molecule");
636       pugi::xml_node GeometryNode = MoleculeNode.child("cml:atomArray");
637       if (!GeometryNode) {
638          // see if we have the geometry as a sub node in a <cml:molecule> node instead
639          // of directly under <molecule> as in previous versions of Molpro master
640          pugi::xml_node CmlMoleculeNode = MoleculeNode.child("cml:molecule");
641          if (CmlMoleculeNode)
642             GeometryNode = CmlMoleculeNode.child("cml:atomArray");
643       }
644       if (!GeometryNode)
645          throw FXmlParseError("Failed to find <cml:atomArray> node.");
646       pXmlData->pAtoms = LoadGeometry(GeometryNode);
647 
648       pugi::xml_node OrbBasisNode = MoleculeNode.find_child_by_attribute("basisSet", "id", "ORBITAL");
649       if (!OrbBasisNode)
650          throw FXmlParseError("Failed to find orbital basis node (<basisSet id='ORBITAL' ...>).");
651 
652       pXmlData->pBasisOrb = LoadBasisSet(OrbBasisNode, *pXmlData->pAtoms);
653    //    std::cout << *pBasisOrb;
654 
655       pugi::xml_node OrbitalsNode = MoleculeNode.child("orbitals");
656       if (!OrbitalsNode)
657          throw FXmlParseError("Failed to find orbital node (<orbitals ...>).");
658       pXmlData->pOrbSet = LoadOrbitals(OrbitalsNode, pXmlData->pBasisOrb, LoadOptions);
659       return pXmlData;
660 //    } catch(FXmlParseError &e) {
661 //       throw FXmlParseError(e.what(), FileName);
662 //    }
663    return 0;
664 }
665 
666 
667 } // namespace molpro_xml
668 
669 
670 namespace molden_file {
671    typedef ::orbital_file::FFileLoadError
672       FMoldenParseError;
673 
674    using boost::format;
675    using boost::str;
676    using ct::TArray;
677 
678 // the molden files made by different programs look almost the same, but they
679 // are all slightly incompatible due to various different bugs.
680 enum FMoldenSource {
681    MOLDENSOURCE_Molpro,
682    // Molcas thinks the .molden format should have different headings than the Molden spec. E.g. '(AU)' tags instead of 'AU' tags.
683    MOLDENSOURCE_Molcas,
684    // Turbomole uses a different basis function normalization than Molpro and Molcas
685    MOLDENSOURCE_Turbomole,
686    // Orca again uses a different normalization than either Molpro/Molcas or Turbomole
687    MOLDENSOURCE_Orca
688 };
689 
690 
691 struct FMoldenAtom
692 {
693    ct::FVector3
694       vPos; // stored in atomic units
695    std::string
696       Label; // normally corresponds to element name. But might contain suffices
697    int
698       // I don't quite get it, but center numbers are explicitly stored.
699       // I assume therefore they can be out of order, or with holes, although
700       // in the examples I've seen they never were.
701       iCenter,
702       iElement; // 1: H, 2: He, ...
703    int
704       // index this atom will get in the loaded file. We require in various places
705       // that atoms are numbered continously, starting at 0, and that the basis sets
706       // are given in the same order as the atoms. We thus might have to shovel things
707       // around...
708       iCenterOut;
709 };
710 
711 // maps center indices to atom objects. Note comments on iCenter
712 // in FMoldenAtom.
713 struct FMoldenAtomSet : public std::map<int, FMoldenAtom>
714 {
operator []orbital_file::molden_file::FMoldenAtomSet715    FMoldenAtom &operator [] (int iCenter) {
716       return (*this)[iCenter];
717    }
718    FMoldenAtom const &operator [] (int iCenter) const;
719 
sizeorbital_file::molden_file::FMoldenAtomSet720    size_t size() const { return (*this).size(); }
721    void AddAtom(FMoldenAtom const &A);
722 
723    FMoldenAtom &Get(int iCenter);
724 
725    ct::FAtomSetPtr Convert();
726 };
727 
728 
729 struct FMoldenGaussShell
730 {
731    ct::TArray<double>
732       Co,  // nExp x nCo contraction matrix
733       Exp; // nExp primitive exponents
734    size_t
735       nExp,
736       nCo;
737    int
738       iAngMom,
739       iCenter;
740 //    FMoldenGaussShell() {};
741    size_t
742       // position of the function in the original basis order. Output basis
743       // will be ordered by center index and angular momentum.
744       iOffsetIn;
745    bool
746       Spherical;
747    inline size_t nFn() const;
748    inline size_t nFnSph() const;
749 
750    bool operator < (FMoldenGaussShell const &other) const;
751 };
752 
753 struct FMoldenBasis
754 {
755    typedef std::vector<FMoldenGaussShell>
756       FGaussFnList;
757    FGaussFnList
758       Shells;
759    size_t nFn() const; // as declared in actual shell objects
760    size_t nFnSph() const; // as if all shells were declared as spherical
761    size_t nFnCart() const; // as if all shells were declared as cartesian
762 
763    // convert Molden's basis format into our own by rebuilding the general contractions and
764    // joining equivalent shell functions into common FBasisFn objects and deferred FBasisShell objects.
765    FBasisSetPtr Convert(FMoldenAtomSet *pMoldenAtomSet);
766 
767    TArray<size_t>
768       // iOrderedFn = m_InputToOutputOrder[iInputFn].
769       // Make by this->Sort(). This takes care only of the center/AM re-alignment.
770       // individual function orders are dealt with separately.
771       m_InputToOutputOrder;
772    // sort basis functions by center index and angular momentum.
773    void Sort();
774 
Sortedorbital_file::molden_file::FMoldenBasis775    bool Sorted() const { return m_IsSorted; };
FMoldenBasisorbital_file::molden_file::FMoldenBasis776    FMoldenBasis() : m_IsSorted(false) {}
777 private:
778    bool m_IsSorted;
779 };
780 
781 // // that's just the same thing... as FVariableSet. Apparently a random list of
782 // // stuff we can assign to various properties associated with an orbital. Not
783 // // sure if they are supposed to be predefined. Molpro stores stuff like 'Ene'
784 // // (orbital energy), 'Occup' (orbital occupancy), or 'Spin' (for unknown reason
785 // // ``Alpha'' for closed orbitals).
786 // struct FMoldenOrbInfo : public FVariableSet
787 // {
788 //    FScalarData
789 //       Coeffs;
790 // };
791 
792 
793 struct FMoldenFile
794 {
795    FVariableSet
796       Vars;
797    FMoldenAtomSet
798       Atoms;
799    FMoldenBasis
800       Basis;
801 
802    typedef std::vector<FOrbitalInfoPtr>
803       FOrbInfoList;
804    FOrbInfoList
805       // information provided in molden file on the individual molecular orbitals.
806       // There is one object for each MO in Orbs, even if no data is associated with
807       // the orbital.
808       OrbInfo;
809 //    size_t
810 //       // number of *spherical* basis functions and of molecular orbitals
811 //       nAo, nOrb;
812 //    ct::TArray<double>
813 //       // nAo x nOrb matrix.
814 //       Orbs;
815 
816    bool
817       m_InputOrbitalsAreSpherical,
818       m_InputOrbitalsAreSphericalForG,
819       m_AllowLargerThanG;
820    FMoldenSource
821       m_MoldenSource;
822    FLoadOptions const
823       *m_pLoadOptions;
824 
825    explicit FMoldenFile(std::istream &in, FLoadOptions const *pLoadOptions_);
826 private:
827    void SkipCurrentSection(std::istream &in);
828    void AddAtom(FMoldenAtom const &A);
829    void ReadAtomsSection(std::istream &in, double fCoordFactor);
830    void ReadBasisSection(std::istream &in);
831    void ReadMoSection(std::istream &in);
832    void FixOrbitalTypes();
833    void FixBasis(); // fix spherical declarations and basis function order.
834 };
835 
836 // number of cartesians components with angular momentum == l
nCartY(int l)837 static size_t nCartY(int l) { return static_cast<size_t>((l+1)*(l+2)/2); }
838 // number of solid harmonic components with angular momentum == l
nSlmY(int l)839 static size_t nSlmY(int l) { return static_cast<size_t>(2*l+1); }
840 
841 static size_t const npos = std::string::npos;
842 
nFn() const843 size_t FMoldenGaussShell::nFn() const
844 {
845    if (Spherical)
846       return nSlmY(iAngMom);
847    else
848       return nCartY(iAngMom);
849 }
850 
nFnSph() const851 size_t FMoldenGaussShell::nFnSph() const {
852    return nSlmY(iAngMom);
853 }
854 
nFnSph() const855 size_t FMoldenBasis::nFnSph() const
856 {
857    size_t
858       r = 0;
859    FGaussFnList::const_iterator
860       it;
861    _for_each(it, Shells)
862       r += nSlmY(it->iAngMom);
863    return r;
864 }
865 
nFnCart() const866 size_t FMoldenBasis::nFnCart() const
867 {
868    size_t
869       r = 0;
870    FGaussFnList::const_iterator
871       it;
872    _for_each(it, Shells)
873       r += nCartY(it->iAngMom);
874    return r;
875 }
876 
nFn() const877 size_t FMoldenBasis::nFn() const
878 {
879    size_t
880       r = 0;
881    FGaussFnList::const_iterator
882       it;
883    _for_each(it, Shells)
884       r += it->nFn();
885    return r;
886 }
887 
Get(int iCenter)888 FMoldenAtom &FMoldenAtomSet::Get(int iCenter) {
889    iterator
890       itAtom = this->find(iCenter);
891    if ( itAtom == this->end() )
892       throw std::runtime_error(str(boost::format("atomic center not found: %i") % iCenter));
893    return itAtom->second;
894 }
895 
operator [](int iCenter) const896 FMoldenAtom const &FMoldenAtomSet::operator [] (int iCenter) const {
897    const_iterator
898       itAtom = this->find(iCenter);
899    if ( itAtom == this->end() )
900       throw std::runtime_error(str(boost::format("atomic center not found: %i") % iCenter));
901    return itAtom->second;
902 }
903 
FMoldenFile(std::istream & in,FLoadOptions const * pLoadOptions_)904 FMoldenFile::FMoldenFile(std::istream &in, FLoadOptions const *pLoadOptions_)
905    : m_InputOrbitalsAreSpherical(false), // that's the default
906      m_InputOrbitalsAreSphericalForG(false), // that's also a default---independent of the d and f functions.
907      m_AllowLargerThanG(false),
908      m_MoldenSource(MOLDENSOURCE_Molpro),
909      m_pLoadOptions(pLoadOptions_)
910 {
911    std::string
912       line;
913    get_next_line(line, in, LINE_Trim | LINE_ToLower);
914    if (!in.good() || line != "[molden format]")
915       throw FUnexpectedFormatError("expected '[Molden Format]'");
916    for (; in.good(); ) {
917       // read section header
918       if (!get_next_line(line, in, LINE_Trim | LINE_ToLower | LINE_AcceptEof))
919          break; // the end.
920       if (line == "[molpro variables]") {
921          SkipCurrentSection(in);
922       } else if (line == "[title]") {
923          // FIXME: Turbomole molden files have a "[Title]" section and Molpro
924          // files don't. Molcas files also don't. Until recently Molpro files
925          // also  "[Molpro variables]", but these were removed at some point in
926          // late 2014 (and, apparently, this was also backported to 2012.1...).
927          // Not good! Need to find a better way of telling the programs apart.
928          std::string
929             title;
930          get_next_line(title, in, LINE_Trim | LINE_ToLower | LINE_KeepEmpty);
931          // files generated by orca_2mkl xxx -molden have a tag saying
932          // "created by orca_2mkl" in the title. Very helpful! So we now apply some heuristics...
933          // if there is a title and it says "Hi, I'm from Orca" we apply orca hacks, otherwise
934          // we apply Turbomole hacks.
935          if (std::string::npos != title.find("created by orca_2mkl")) {
936             m_MoldenSource = MOLDENSOURCE_Orca;
937 //             std::cout << "Molden file format: Orca" << std::endl;
938          } else if (std::string::npos != title.find("molden2aim,")) {
939             // dunno... let's go with Molpro?  ... yes, that seems to work.
940             m_MoldenSource = MOLDENSOURCE_Molpro;
941          } else {
942             // note: at least in my case the title is empty for files made by turbomole.
943             // but I wouldn't bet on it staying that way.
944             m_MoldenSource = MOLDENSOURCE_Turbomole;
945 //             std::cout << "Molden file format: Turbomole (title='" << title << "')" << std::endl;
946          }
947          SkipCurrentSection(in);
948       } else if (line == "[atoms] angs") {
949 //          std::cout << "Read atoms: ANGS" << std::endl;
950          ReadAtomsSection(in, 1./ct::ToAng);
951       } else if (line == "[atoms] au" || line == "[atoms] (au)") { // second variant used off-spec by Molcas.
952 //          std::cout << "Read atoms: AU" << std::endl;
953          ReadAtomsSection(in, 1.);
954       } else if (line == "[gto] (au)") {
955          // Molcas files can be identified by the parenthesis around "au".
956          // There are not supposed to be any according to spec. Especially not after "[gto]".
957          m_MoldenSource = MOLDENSOURCE_Molcas;
958          ReadBasisSection(in);
959       } else if (line == "[gto]") {
960          ReadBasisSection(in);
961       } else if (line == "[5d]" || line == "[5d7f]" || line == "[7f]") {
962          m_InputOrbitalsAreSpherical = true;
963          // ^- WARNING: Orca emits these AFTER emitting the basis.
964          // That means that we can only fix the basis right before reading the MOs,
965          // not after reading the basis as I'd do otherwise.
966       } else if (line == "[5d10f]]") {
967          throw FMoldenParseError("Mixed cartesian and spherical input functions are not supported. Please use *either* cartesian *or* spherical functions, but not both!");
968       } else if (line == "[9g]") {
969          m_InputOrbitalsAreSphericalForG = true;
970       } else if (line == "[allow_larger_g]") {
971          // Note: This is not an "officially" sactioned tag. But there is little reason
972          // to prevent this if both IboView and the exporting program can do it, only
973          // because Molden can't.
974          // With this we will allow larger-than-g functions to be
975          // specified. For the spherical versions the order is not very
976          // involved and most likely right. For the cartesians we assume
977          // Molpro order (which for d and g functions miraculously seems
978          // to be equal to Molden order).
979          m_AllowLargerThanG = true;
980       } else if (line == "[mo]") {
981          if (m_InputOrbitalsAreSphericalForG && !m_InputOrbitalsAreSpherical)
982             throw FMoldenParseError("Input specifies spherical g functions, but does not specify spherical lower functions. Mixed cartesian and spherical functions are not supported by this loader.");
983          // (^- note: can't check the other error variant here (non-spherical g
984          //  and spherical d/f), since the input might not have any g functions,
985          //  thus there would not be any need to specify them as spherical)
986          FixBasis();
987          ReadMoSection(in);
988       } else if (line.find('[') == npos) {
989          throw FMoldenParseError("Expected section header, but found '" + line + "'");
990       } else
991          SkipCurrentSection(in);
992    }
993 }
994 
995 
IsSectionStart(std::string const & line)996 static bool IsSectionStart(std::string const &line)
997 {
998    size_t iopen = line.find('[');
999    if (iopen != npos && line.find(']',iopen+1) != npos)
1000       return true;
1001    return false;
1002 }
1003 
1004 
SkipCurrentSection(std::istream & in)1005 void FMoldenFile::SkipCurrentSection(std::istream &in)
1006 {
1007    // skipp content of current section. That is, everything until the next section
1008    // header (which should be indicated by a '[..]' in the line)
1009    std::string
1010       line;
1011    while (in.good()) {
1012       std::streampos pos = in.tellg();
1013       if (!get_next_line(line, in, LINE_AcceptEof))
1014          return;
1015       if (IsSectionStart(line)) {
1016          // rewind to start of line.
1017          in.seekg(pos);
1018          return;
1019       }
1020    }
1021 }
1022 
AddAtom(FMoldenAtom const & A)1023 void FMoldenFile::AddAtom(FMoldenAtom const &A)
1024 {
1025    FMoldenAtomSet::iterator
1026       itAtom = Atoms.find(A.iCenter);
1027    if ( itAtom != Atoms.end() )
1028       throw FMoldenParseError(str(boost::format("center %i already assigned to another atom") % A.iCenter));
1029    Atoms.insert(itAtom, FMoldenAtomSet::value_type(A.iCenter,A));
1030 }
1031 
ReadAtomsSection(std::istream & in,double fCoordFactor)1032 void FMoldenFile::ReadAtomsSection(std::istream &in, double fCoordFactor)
1033 {
1034    std::string
1035       line;
1036    while (in.good()) {
1037       std::streampos pos = in.tellg();
1038       if (!get_next_line(line, in, LINE_Trim | LINE_AcceptEof))
1039          break;
1040       if (IsSectionStart(line)) {
1041          // rewind to start of line.
1042          in.seekg(pos);
1043          break;
1044       }
1045       // expected format:
1046       //  C     1    6         0.0000000000        0.0000000000        0.0000000000
1047       //             ^- charge?
1048       //        ^- center index (1 based)
1049       //  ^- label?
1050       FMoldenAtom
1051          At;
1052       std::stringstream ss(line);
1053       ss >> At.Label >> At.iCenter >> At.iElement >> At.vPos[0] >> At.vPos[1] >> At.vPos[2];
1054       At.vPos *= fCoordFactor;
1055 //       bool is_bad = ss.bad();
1056 //       bool is_fail = ss.fail();
1057 //       bool is_eol = ss.eof();
1058 //       bool is_good = ss.good();
1059       if (ss.bad() || ss.fail())
1060          throw FMoldenParseError("Failed to understand atom declaration '" + line + "'");
1061       AddAtom(At);
1062    }
1063 
1064    // make a new set of indices which is ordered continously and comes without holes.
1065    size_t
1066       iCenterOut = 0;
1067    for (FMoldenAtomSet::iterator itAtom = Atoms.begin(); itAtom != Atoms.end(); ++ itAtom) {
1068       itAtom->second.iCenterOut = iCenterOut;
1069       ++ iCenterOut;
1070    }
1071 }
1072 
iAngMomFromStr(std::string const & s_)1073 int iAngMomFromStr(std::string const &s_) {
1074    std::string s(s_);
1075    str_tolower(s);
1076    if (s == "sp")
1077       throw FMoldenParseError("This parser does not supported mixed 'sp' shells. Please use a different basis set (e.g., def2 sets).");
1078    if (s.size() != 1)
1079       throw FMoldenParseError("Expected angular momentum, but found '" + s + "'.");
1080    static char const *pAngMomNames = "spdfghikl";
1081    for (int i = 0; size_t(i) < sizeof(pAngMomNames)/sizeof(pAngMomNames[0]); ++ i)
1082       if (pAngMomNames[i] == s[0])
1083          return i;
1084    throw FMoldenParseError("'" + s + "' is not a recognized angular momentum.");
1085 }
1086 
OrcaRenormFactor(double fExp,unsigned l)1087 static double OrcaRenormFactor(double fExp, unsigned l)
1088 {
1089    // note: this differs from RawGaussNorm(fExp,l) by a missing DoubleFactR(2*l-1) inside the square root.
1090    // I guess they just use a different solid harmonics definition.
1091 //    return pow(M_PI/(2*fExp),.75) * sqrt(1./pow(4.*fExp,l));
1092 
1093    double f = pow(M_PI/(2*fExp),.75) * sqrt(1./pow(4.*fExp,l));
1094    if (l == 4) return f * std::sqrt(3); // .oO(what.)
1095    return f;
1096 }
1097 
1098 
ReadBasisSection(std::istream & in)1099 void FMoldenFile::ReadBasisSection(std::istream &in)
1100 {
1101    Basis.Shells.clear();
1102 
1103    size_t
1104       iInputBasisOffset = 0;
1105    std::string
1106       line;
1107    while (in.good()) {
1108       std::streampos pos = in.tellg();
1109       if (!get_next_line(line, in, LINE_Trim | LINE_AcceptEof))
1110          break;
1111       if (IsSectionStart(line)) {
1112          // rewind to start of line.
1113          in.seekg(pos);
1114          break;
1115       }
1116       // expected format:
1117       //   1 0                                    // center index, ??? (always 0?)
1118       //  s    5  1.00                            // number of primitives, ??? (always 1? number of contractions?)
1119       //   0.1238401694D+04  0.5517365048D-02     // exponents and contraction coefficients.
1120       //   0.1862900499D+03  0.4108882856D-01
1121       //   0.4225117635D+02  0.1822538213D+00     // beware of fortran exponent format!! (D+... instead of E+...)
1122       //   0.1167655793D+02  0.4682845944D+00
1123       //   0.3593050648D+01  0.4457581734D+00
1124       //  s    1  1.00
1125       //   0.4024514736D+00  0.1000000000D+01
1126       //  s    1  1.00
1127       //   0.1309018267D+00  0.1000000000D+01
1128       //  p    3  1.00
1129       //   0.9468097062D+01  0.5688833201D-01
1130       //   0.2010354514D+01  0.3129405934D+00
1131       //   0.5477100471D+00  0.7606501651D+00
1132       //  p    1  1.00
1133       //   0.1526861379D+00  0.1000000000D+01
1134       //  d    1  1.00
1135       //   0.8000000000D+00  0.1000000000D+01
1136       //
1137       int
1138          iCenter, iDunno;
1139       {
1140          std::stringstream
1141             str1(line);
1142          str1 >> iCenter >> iDunno;
1143       }
1144       while (in.good()) {
1145          if (!get_next_line(line, in, LINE_Trim | LINE_KeepEmpty | LINE_AcceptEof))
1146             break;
1147          if (line.empty())
1148             break; // end of basis functions for current center.
1149          FMoldenGaussShell
1150             Fn = FMoldenGaussShell(),
1151             *pFn = &Fn;
1152          {
1153             std::stringstream
1154                str2(line);
1155             std::string
1156                sAngMom, Dummy;
1157             str2 >> sAngMom >> pFn->nExp >> Dummy;
1158             pFn->iAngMom = iAngMomFromStr(sAngMom);
1159 //             if (pFn->iAngMom >= 5 && !m_AllowLargerThanG)
1160 //                throw FMoldenParseError("Molden itself does not support larger-than-g functions. You can ask this program to load them anyway by adding [allow_larger_g] as section to the molden input file.");
1161 //             if (pFn->iAngMom == 4 && m_InputOrbitalsAreSpherical && !m_InputOrbitalsAreSphericalForG)
1162 //                throw FMoldenParseError("Mixed spherical and cartesian functions are not supported. (g was specified as default cartesian, while df were defined as spherical).");
1163          }
1164          pFn->nCo = 1; // <- I don't think this can handle general contractions, or can it?
1165          pFn->Exp.resize(pFn->nExp);
1166          pFn->Co.resize(pFn->nCo * pFn->nExp);
1167          for (uint iExp = 0; iExp < pFn->nExp; ++ iExp) {
1168             get_next_line(line, in, LINE_Trim | LINE_KeepEmpty);
1169             replace_fortran_exponents(line);
1170             std::stringstream str3(line);
1171             str3 >> pFn->Exp[iExp] >> pFn->Co[iExp];
1172          }
1173 
1174          // orca exports contraction coefficients with respect to raw primitive Gaussians,
1175          // not normalized primitive Gaussians. So we need to fix up the coefficients before converting them back.
1176          // WARNING: at this moment we do not yet know if or if not the functions are spherical!
1177          if (m_MoldenSource == MOLDENSOURCE_Orca) {
1178             for (uint iExp = 0; iExp < pFn->nExp; ++ iExp)
1179                pFn->Co[iExp] *= OrcaRenormFactor(pFn->Exp[iExp], int(pFn->iAngMom));
1180          }
1181 
1182          FMoldenAtomSet::iterator
1183             itAt = Atoms.find(iCenter);
1184          if (itAt == Atoms.end())
1185             throw FMoldenParseError("Encountered basis function referring to non-declared center index.");
1186          pFn->iCenter = iCenter;
1187 //          pFn->Spherical = m_InputOrbitalsAreSpherical;
1188 //          pFn->iOffsetIn = iInputBasisOffset;
1189          iInputBasisOffset += pFn->nFn();
1190          Basis.Shells.push_back(*pFn);
1191 //          Basis.Shells.push_back(FGaussShell(pFn, iCenter, itAt->second.vPos));
1192       }
1193    }
1194 
1195  //  Basis.Sort();
1196  // ^- moved to extra function since data on whether basis is spherical or not
1197  //    was may or may not have already been given at this point.
1198 }
1199 
FixBasis()1200 void FMoldenFile::FixBasis()
1201 {
1202    if (Basis.Sorted())
1203       throw FMoldenParseError("called FixBasis more than once. Something went wrong.");
1204 
1205    // make input order and check for spherical vs cartesian functions...
1206    size_t
1207       iInputBasisOffset = 0;
1208 
1209    for (size_t iSh = 0; iSh < Basis.Shells.size(); ++ iSh) {
1210       FMoldenGaussShell
1211          *pFn = &Basis.Shells[iSh];
1212       {
1213          if (pFn->iAngMom >= 5 && !m_AllowLargerThanG)
1214             throw FMoldenParseError("Molden itself does not support larger-than-g functions. You can ask this program to load them anyway by adding [allow_larger_g] as section to the molden input file.");
1215          if (pFn->iAngMom == 4 && m_InputOrbitalsAreSpherical && !m_InputOrbitalsAreSphericalForG)
1216             throw FMoldenParseError("Mixed spherical and cartesian functions are not supported. (g was specified as default cartesian, while df were defined as spherical).");
1217       }
1218 
1219       pFn->Spherical = m_InputOrbitalsAreSpherical;
1220       pFn->iOffsetIn = iInputBasisOffset;
1221       iInputBasisOffset += pFn->nFn();
1222    }
1223 
1224    Basis.Sort();
1225 }
1226 
1227 
operator <(FMoldenGaussShell const & other) const1228 bool FMoldenGaussShell::operator < (FMoldenGaussShell const &other) const
1229 {
1230    if (iCenter < other.iCenter) return true;
1231    if (other.iCenter < iCenter) return false;
1232    if (iAngMom < other.iAngMom) return true;
1233    if (other.iAngMom < iAngMom) return false;
1234    return false;
1235 }
1236 
1237 
Sort()1238 void FMoldenBasis::Sort()
1239 {
1240    if (Sorted())
1241       throw FMoldenParseError("called FMoldenBasis::Sort more than once. Something went wrong.");
1242 
1243    size_t
1244       nAo = nFn(); // may be spherical or cartesian or mixed at this point.
1245    m_InputToOutputOrder.clear();
1246    m_InputToOutputOrder.resize(nAo);
1247 
1248    // sort basis shells such that they are ordered as the atoms are.
1249    std::stable_sort(Shells.begin(), Shells.end());
1250 
1251    size_t
1252       iOffsetOut = 0;
1253    // assign new positions to the basis functions.
1254    for (size_t iSh = 0; iSh < Shells.size(); ++ iSh) {
1255       FMoldenGaussShell
1256          &Sh = Shells[iSh];
1257       size_t
1258          nShFn = Sh.nFn();
1259       for (size_t iShFn = 0; iShFn < nShFn; ++ iShFn)
1260          m_InputToOutputOrder[Sh.iOffsetIn + iShFn] = iOffsetOut + iShFn;
1261       iOffsetOut += nShFn;
1262    }
1263    if (0) {
1264       std::cout << "Basis function order:" << std::endl;
1265       for (size_t i = 0; i < nAo; ++ i) {
1266          std::cout << boost::format("  In[%4i] -> internal [%4i]") % i % m_InputToOutputOrder[i] << std::endl;
1267       }
1268    }
1269    m_IsSorted = true;
1270 }
1271 
DoubleFactR(ptrdiff_t l)1272 static size_t DoubleFactR(ptrdiff_t l) {
1273    if (l <= 1)
1274       return 1;
1275    else
1276       return l * DoubleFactR(l - 2);
1277 }
1278 
1279 
ReadMoSection(std::istream & in)1280 void FMoldenFile::ReadMoSection(std::istream &in)
1281 {
1282    if (!Basis.Sorted())
1283       throw FMoldenParseError("Basis not sorted on entry of ReadMoSection.");
1284 
1285    TArray<size_t>
1286       ls;
1287    ls.reserve(Basis.Shells.size());
1288    // make a list of the angular momenta in the basis (need that for converting MO coeffs
1289    // between cartesian orders and cartesians/sphericals).
1290    for (size_t iSh = 0; iSh < Basis.Shells.size(); ++ iSh)
1291       ls.push_back(size_t(Basis.Shells[iSh].iAngMom));
1292    if (0) {
1293       std::cout << "Output l order:" << std::endl;
1294       for (size_t iSh = 0; iSh < Basis.Shells.size(); ++ iSh) {
1295          std::cout << boost::format("  Sh[%3i] -> l = %i") % iSh % Basis.Shells[iSh].iAngMom << std::endl;
1296       }
1297    }
1298 
1299    // these ones are required for program-specific hacks (note: both this
1300    // and ls are in output order! but functions are NOT yet converted to
1301    // spherical (unless they already were spherical in the input basis)).
1302    TArray<FMoldenGaussShell*>
1303       pShellForBf;
1304    pShellForBf.reserve(Basis.nFn());
1305    for (size_t iSh = 0; iSh < Basis.Shells.size(); ++ iSh)
1306       for (size_t iFn = 0; iFn < Basis.Shells[iSh].nFn(); ++ iFn)
1307          pShellForBf.push_back(&Basis.Shells[iSh]);
1308    assert(pShellForBf.size() == Basis.nFn());
1309 
1310    // On the input format:
1311    // the data format is rather... confusing. There is no discernable separator
1312    // between the data for the different MOs, and orbital data and variables
1313    // occur intermixed. Additionally, data looks sparse: there is no
1314    // strict reasons why there actually *IS* data for a MO.
1315    // Molpro puts a variable section before each individual
1316    // MO, and the variables can be detected by containing an assignment sign or
1317    // having a space as first character. I do not know if this is general,
1318    // however. Here I assume that:
1319    //    - there is one variable section before each MO
1320    //    - each orbital contains at least one data line (otherwise it can't
1321    //      be normalized)
1322    FScalarData
1323       // currently read orbital -- still in cartesian coordinates!
1324       Row;
1325    size_t
1326       nFnCart = Basis.nFnCart(),
1327       nFnSph = Basis.nFnSph();
1328    if (m_InputOrbitalsAreSpherical)
1329       Row.resize(nFnSph);
1330    else
1331       Row.resize(nFnCart);
1332 //    FMoldenOrbInfo
1333 //       CurrentInfo;
1334    std::string
1335       line;
1336    for ( ; ; ) {
1337       std::istream &in_file = in;
1338       std::streampos pos = in_file.tellg();
1339       if (!get_next_line(line, in_file, LINE_Trim | LINE_AcceptEof))
1340          break;
1341       if (IsSectionStart(line)) {
1342          // rewind to start of line.
1343          in_file.seekg(pos);
1344          break;
1345       }
1346       in_file.seekg(pos);
1347 //       std::stringstream in(line);
1348 //       std::cout << boost::format(" ...orb-line: '%s'") % line << std::endl;
1349 
1350       FOrbitalInfoPtr
1351          pInfo = new FOrbitalInfo;
1352 
1353       // clear data row for the current MO.
1354       Row.clear_data();
1355 
1356       if ( !in.good() )
1357          throw FMoldenParseError("Expected variables, but found '?'.");
1358 
1359       // read variables
1360       std::string
1361          Key, Val;
1362       for ( ; ; ) {
1363          // read variable lines
1364          std::streampos
1365             iLast = in.tellg();
1366 //          in >> Key >> Val;
1367          if (!get_next_line(line, in_file, LINE_Trim | LINE_AcceptEof))
1368             break;
1369          size_t
1370             iEquals = line.find('=');
1371 //          std::cout << boost::format("     ...k: '%s'  v: '%s'  ok? %i") % Key % Val % iEquals << std::endl;
1372          if ( !in.good() || iEquals == npos ) {
1373             // that's not a variable line.
1374             in.seekg(iLast, std::ios::beg);
1375             break;
1376          }
1377          Key = line.substr(0, iEquals);
1378          trim(Key);
1379          Val = line.substr(iEquals+1);
1380          trim(Val);
1381 //          std::cout << boost::format("     ...k: '%s'  v: '%s'") % Key % Val << std::endl;
1382 //          if ( CurrentInfo.Map.find(Key) != CurrentInfo.Map.end() )
1383 //             throw FMoldenParseError(str(format("Variable '%s' already assigned while parsing MO %i.") % Key % OrbRows.size()));
1384 //          CurrentInfo.Map[Key] = Val;
1385          str_lower_and_trim(Key);
1386          if (Key == "sym")
1387             pInfo->sDesc = Val;
1388          else if (Key == "ene")
1389             pInfo->fEnergy = read_fortran_float(Val);
1390          else if (Key == "spin") {
1391             pInfo->Spin = ORBSPIN_Unknown;
1392             str_lower_and_trim(Val);
1393             if (Val == "alpha")
1394                // this is also used for spin free orbitals!! Molden does not
1395                // distinguish them (which means that we can't read files with
1396                // all orbitals open-shell alpha, since the file format does not
1397                // allow for separating them from all-orbitals closed-shell)
1398                pInfo->Spin = ORBSPIN_Alpha;
1399 //                pInfo->Spin = ORBSPIN_Alpha; // guess from occupation.
1400             if (Val == "beta")
1401                pInfo->Spin = ORBSPIN_Beta;
1402          } else if (Key == "occup") {
1403             pInfo->fOcc = read_fortran_float(Val);
1404          } else {
1405             // not recognized. Should we warn?
1406 //             std::cout << boost::format("     ...unrecognized: '%s' = '%s' ") % Key % Val << std::endl;
1407          }
1408       }
1409 
1410       if ( !in.good() )
1411          throw FMoldenParseError("Expected data, but found '?' while parsing molden file.");
1412 
1413       for ( ; ; ) {
1414          // read data lines
1415          std::streampos
1416             iLast = in.tellg();
1417          size_t
1418             iBf;
1419          double
1420             fData;
1421          std::string
1422             sData;
1423          in >> iBf >> sData;
1424 
1425          iBf -= 1; // translate 1-based indexing to 0-based
1426          fData = read_fortran_float(sData);
1427          if ( !in.good() ) {
1428             // that's not a data line.
1429 //             in.clear(std::ios::failbit);
1430             // ^- interesting fact: stream::clear(bit) does not *clear* the given bit,
1431             //    but rather *sets* the stream state to the given bit. I guess, given the
1432             //    rest of the C++ stream API, this was to be expected. I guess the real mistake
1433             //    was trying to use it.
1434             in.clear();
1435             in.seekg(iLast, std::ios::beg);
1436 //             bool is_bad = in.bad();
1437 //             bool is_fail = in.fail();
1438 //             bool is_eol = in.eof();
1439 //             bool is_good = in.good();
1440 //             std::string bla;
1441 //             in >> bla;
1442             break;
1443          }
1444 //          std::cout << boost::format("     ...iAo: '%s'  sData: '%s'  fData: %12.6f") % iBf % sData % fData << std::endl;
1445          if ( iBf >= Row.size() )
1446             throw FMoldenParseError(str(format("AO index '%i' too large while parsing MO %i.") % (1+iBf) % OrbInfo.size()));
1447          iBf = Basis.m_InputToOutputOrder[iBf];
1448          if ( Row[iBf] != 0 )
1449             throw FMoldenParseError(str(format("AO component Orb[%i,%i] already assigned before.")  % (1+iBf) % OrbInfo.size()));
1450 
1451          if (m_MoldenSource == MOLDENSOURCE_Turbomole) {
1452             size_t l = pShellForBf[iBf]->iAngMom;
1453             fData *= std::sqrt(DoubleFactR(2*l-1));
1454          }
1455 
1456          Row[iBf] = fData;
1457       }
1458 //             std::cout << "Reading file. Input spherical? " << m_InputOrbitalsAreSpherical << " Input from Orca?" << (m_MoldenSource == MOLDENSOURCE_Orca) << std::endl;
1459       // re-order/convert and store the row
1460       if (m_InputOrbitalsAreSpherical) {
1461          assert(Row.size() == nFnSph);
1462 
1463          if (m_MoldenSource == MOLDENSOURCE_Orca) {
1464             // orca seems to use a definition of solid harmonics which differs by some -1 factors
1465             // for higher m. I am not sure if this is quite right...
1466             double *pOrb = &Row[0];
1467             for (size_t i = 0; i < ls.size(); ++ i) {
1468                int l = ls[i];
1469                assert_rt(l == pShellForBf[pOrb-&Row[0]]->iAngMom);
1470                if (l == 3) { // f
1471                   for (size_t ic = 5; ic < 7; ++ ic)
1472                      pOrb[ic] *= -1.;
1473                }
1474                if (l == 4) { // g        a b b c c d d e e
1475                   bool switch_this[9] = {0,0,0,0,0,1,1,1,1};
1476                   for (size_t ic = 0; ic < 9; ++ ic)
1477                      if (switch_this[ic])
1478                         pOrb[ic] *= -1.;
1479 //                   std::swap(pOrb[8],pOrb[7]);
1480 //                   std::swap(pOrb[6],pOrb[5]);
1481 //                   std::swap(pOrb[4],pOrb[3]);
1482 //                   std::swap(pOrb[2],pOrb[1]);
1483                   // doesn't work :(.
1484                   //   ...yes, apart from the phase thing, there is also a mystery sqrt(3) factor in the g functions (see OrcaRenormFactor())
1485                   //   It is running now. I don't even want to know.
1486                }
1487                pOrb += size_t(2*l + 1);
1488 
1489                if (l >= 5)
1490                   throw FMoldenParseError("Sorry, currently only up to 'g' functions are supported for import from ORCA. This input has a higher harmonic.");
1491             }
1492          }
1493 
1494          ctimp::Vec_ShMolden2ShMolpro(&Row[0], &ls[0], ls.size());
1495          pInfo->Orb = Row;
1496       } else {
1497          assert(Row.size() == nFnCart);
1498          ctimp::Vec_CaMolden2CaMolpro(&Row[0], &ls[0], ls.size());
1499          if (m_MoldenSource == MOLDENSOURCE_Orca || m_MoldenSource == MOLDENSOURCE_Molcas)
1500             // unlike Molpro, which always converts to Cartesian for export, these programs export spherical functions.
1501             // So if we encounter a Cartesian .molden file, it most likely means that the actual computational basis
1502             // was Cartesian.
1503             throw FMoldenParseError("Sorry, this program does not support Cartesian basis functions. Please use a spherical harmonic basis (e.g., def2-* or cc-pVnZ bases)");
1504 
1505 //          if (m_MoldenSource == MOLDENSOURCE_Orca) {
1506 //             std::cout << "RENORMALIZING STUFF!!" << std::endl;
1507 //             ctimp::Vec_Ca2Ca_XSqrtNorm(&Row[0], &ls[0], ls.size());
1508 //          }
1509          pInfo->Orb.resize(nFnSph);
1510 //          ctimp::Vec_Ca2Ca_XSqrtNorm(&Row[0], &ls[0], ls.size());
1511          ctimp::Vec_Ca2Sh(&pInfo->Orb[0], &Row[0], &ls[0], ls.size());
1512       }
1513 
1514       if (m_pLoadOptions && m_pLoadOptions->SkipVirtuals() && pInfo->fOcc < 0.00001)
1515          continue;
1516       OrbInfo.push_back(pInfo);
1517    }
1518 //    std::cout << boost::format(" ...read %i MOs from file.") % OrbInfo.size() << std::endl;
1519 
1520    FixOrbitalTypes();
1521 }
1522 
FixOrbitalTypes()1523 void FMoldenFile::FixOrbitalTypes()
1524 {
1525    FixOrbitalTypes1(OrbInfo, ORBSOURCE_Molden);
1526 }
1527 
1528 // void FMoldenFile::FixOrbitalTypes()
1529 // {
1530 //    size_t
1531 //       nAlpha = 0,
1532 //       nBeta = 0,
1533 //       nClosed = 0,
1534 //       nOther = 0;
1535 //    FOrbInfoList::iterator
1536 //       it;
1537 //    for (it = OrbInfo.begin(); it != OrbInfo.end(); ++ it) {
1538 //       switch((*it)->Spin) {
1539 //          case ORBSPIN_Alpha: nAlpha += 1; continue;
1540 //          case ORBSPIN_Beta: nBeta += 1; continue;
1541 //          case ORBSPIN_SpinFree: nClosed += 1; continue;
1542 //          default: nOther += 1; continue;
1543 //       }
1544 //    }
1545 //
1546 //    // there are *only* Alpha orbitals. Most likely we got something R(o)HFy.
1547 //    // Adjust spins based on occupation numbers.
1548 //    if (nBeta == 0 && nOther == 0 && nClosed == 0) {
1549 //       for (it = OrbInfo.begin(); it != OrbInfo.end(); ++ it) {
1550 //          double fOcc = (*it)->fOcc;
1551 //          if (fOcc == 2. || fOcc == 0.)
1552 //             (*it)->Spin = ORBSPIN_SpinFree;
1553 //       }
1554 //    }
1555 // }
1556 
1557 
1558 
1559 // struct FMoldenGaussShell
1560 // {
1561 //    ct::TArray<double>
1562 //       Co,  // nExp x nCo contraction matrix
1563 //       Exp; // nExp primitive exponents
1564 //    size_t
1565 //       nExp,
1566 //       nCo;
1567 //    int
1568 //       iAngMom,
1569 //       iCenter;
1570 // //    FMoldenGaussShell() {};
1571 // };
1572 
1573 
1574 template <class FSequence>
CmpArrays(FSequence const & A,FSequence const & B)1575 int CmpArrays(FSequence const &A, FSequence const &B)
1576 {
1577    size_t
1578       nA = A.size(),
1579       nB = B.size();
1580    if (nA < nB) return -1;
1581    if (nB < nA) return +1;
1582    typename FSequence::const_iterator
1583       itA = A.begin(),
1584       itB = B.begin();
1585    for ( ; itA != A.end(); ) {
1586       if (*itA < *itB) return -1;
1587       if (*itB < *itA) return +1;
1588       ++ itA;
1589       ++ itB;
1590    }
1591    return 0;
1592 }
1593 
1594 // compare two IR gauss shell objects, in order to find equivalent ones on different atoms.
1595 struct FGaussFnCmp
1596 {
operator ()orbital_file::molden_file::FGaussFnCmp1597    bool operator () (ct::FAtomShellPtr const &pA, ct::FAtomShellPtr const &pB) {
1598       if (pA->AngMom < pB->AngMom) return true;
1599       if (pB->AngMom < pA->AngMom) return false;
1600       int iCmp;
1601       iCmp = CmpArrays(pA->Exponents, pB->Exponents);
1602       if (iCmp < 0) return true;
1603       if (iCmp > 0) return false;
1604       iCmp = CmpArrays(pA->CoMatrix, pB->CoMatrix);
1605       if (iCmp < 0) return true;
1606       if (iCmp > 0) return false;
1607       return false;
1608    }
1609 };
1610 
1611 
Convert()1612 ct::FAtomSetPtr FMoldenAtomSet::Convert()
1613 {
1614    ct::FAtomSetPtr
1615       pAtoms = new ct::FAtomSet();
1616    // it's ordered in iCenterOut order.
1617    for (FMoldenAtomSet::iterator itAtom = this->begin(); itAtom != this->end(); ++ itAtom) {
1618       FMoldenAtom
1619          &At = itAtom->second;
1620       // note: technically we also have element names from input... (both labels and numbers).
1621       // I guess the numbers might be safer? Because programs might use strange pre/suffices or
1622       // spellings of elements...
1623       pAtoms->Atoms.push_back(ct::FAtom(At.vPos, ct::ElementNameFromNumber(At.iElement), "(external)"));
1624    }
1625    return pAtoms;
1626 }
1627 
Convert(FMoldenAtomSet * pMoldenAtomSet)1628 ct::FBasisSetPtr FMoldenBasis::Convert(FMoldenAtomSet *pMoldenAtomSet)
1629 {
1630    typedef std::set<ct::FAtomShellPtr, FGaussFnCmp>
1631       FAtomShellSet;
1632    FAtomShellSet
1633       AtomShells;
1634    std::vector<ct::FBasisShell>
1635       NewShells;
1636    NewShells.reserve(Shells.size());
1637 
1638    // Go through the molden shells. At this point they should already have been ordered
1639    // by center and angular momentum, so we just need to join them and fix the output
1640    // atom indices.
1641    size_t
1642       iShBeg = 0, iShEnd;
1643    for ( ; iShBeg != Shells.size(); iShBeg = iShEnd) {
1644       // find all shells on the same center with the same AngMom
1645       iShEnd = iShBeg + 1;
1646       while (iShEnd != Shells.size()
1647           && Shells[iShEnd].iCenter == Shells[iShBeg].iCenter
1648           && Shells[iShEnd].iAngMom == Shells[iShBeg].iAngMom)
1649          iShEnd += 1;
1650       // make a sorted list of all unique exponents. We here assume that exponents come
1651       // out at the same binary version of IEEE doubles if they are equivalent (should be fine)
1652       typedef std::map<double, size_t>
1653          FExpMap;
1654       FExpMap
1655          // maps negative of exponent to exponent index.
1656          // (negative such that we get the large ones first)
1657          ExpMap;
1658       for (size_t iSh = iShBeg; iSh != iShEnd; ++ iSh)
1659          for (size_t iExp = 0; iExp < Shells[iSh].Exp.size(); ++ iExp)
1660             ExpMap[-Shells[iSh].Exp[iExp]] = 0;
1661       // assign exponent indices and make a single ordered list of exponents
1662       TArray<double>
1663          Exps;
1664       size_t
1665          nExp = ExpMap.size();
1666       Exps.reserve(nExp);
1667       for (FExpMap::iterator it = ExpMap.begin(); it != ExpMap.end(); ++ it) {
1668          it->second = Exps.size();
1669          Exps.push_back(-it->first);
1670       }
1671       // make a constraction matrix.
1672       size_t
1673          nCo = iShEnd - iShBeg;
1674       TArray<double>
1675          Cos;
1676       Cos.resize(nExp * nCo);
1677       Cos.clear_data();
1678       for (size_t iSh = iShBeg; iSh != iShEnd; ++ iSh) {
1679          size_t
1680             iCo = iSh - iShBeg;
1681          FMoldenGaussShell
1682             &Sh = Shells[iSh];
1683          for (size_t iExp = 0; iExp < Sh.Exp.size(); ++ iExp) {
1684             // find the index of the current exponent in the output exponent list.
1685             FExpMap::const_iterator
1686                itExp = ExpMap.find(-Sh.Exp[iExp]);
1687             if (itExp == ExpMap.end())
1688                throw FMoldenParseError("Internal indexing error in re-assembly of imported basis shells.");
1689             // and add the current coefficient to it (technically an exponent might occur multiple
1690             // times, this is why we add instead of replacing)
1691             Cos[nExp * iCo + itExp->second] += Sh.Co[iExp];
1692          }
1693       }
1694 
1695       // make a CT/IR shell object
1696       ct::FAtomShellPtr
1697          pIrSh = new ct::FAtomShell(unsigned(Shells[iShBeg].iAngMom), &Exps[0], unsigned(Exps.size()), &Cos[0], unsigned(nCo), 0);
1698       // check if we had one of those things already before.
1699       std::pair<FAtomShellSet::iterator,bool>
1700          itInsResult = AtomShells.insert(pIrSh);
1701       if (itInsResult.second)
1702          // take the previous one.
1703          pIrSh = *itInsResult.first;
1704 
1705       // now make a single BasisShell object referring to the generally contracted atom shell.
1706       FMoldenAtom
1707          &At = pMoldenAtomSet->Get(Shells[iShBeg].iCenter);
1708       NewShells.push_back(ct::FBasisShell(At.vPos, At.iCenterOut, pIrSh));
1709 
1710    }
1711    return ct::FBasisSetPtr(new ct::FBasisSet(&NewShells[0], NewShells.size(), ct::BASIS_Orbital, "(external)"));
1712 }
1713 
1714 
LoadMoldenFile(std::string const & FileName,FLoadOptions const & LoadOptions)1715 FMolproXmlDataPtr LoadMoldenFile(std::string const &FileName, FLoadOptions const &LoadOptions)
1716 {
1717    FMolproXmlDataPtr
1718       pXmlData = new FMolproXmlData();
1719    std::ifstream
1720       in(FileName.c_str(), std::ifstream::in | std::ifstream::binary);
1721    // ^- @binary: the files are TEXT files, but otherwise we get line ending hazard,
1722    //    such that in windows we cannot read lines which are terminated by lf only, not cr lf.
1723    //    There may be better hacks around this (see
1724    //    http://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf),
1725    //    but so far I have not been overly impressed with the suggestions. So hack for now: read as
1726    //    binary and discard any \r's occuring anywhere.
1727    //    I guess one of the savest things to do would be to load the entire file, check if there
1728    //    are any \n's at all, if yes: replace all \r's by nothing, if not: replace all \r's by \n's.
1729    //    But that is also not intensely pretty.
1730    if (!in.good())
1731       throw FMoldenParseError("File load failed.");
1732    FMoldenFile
1733       MoldenData(in, &LoadOptions);
1734 
1735    // convert to output format.
1736    pXmlData->pAtoms = MoldenData.Atoms.Convert();
1737    pXmlData->pBasisOrb = MoldenData.Basis.Convert(&MoldenData.Atoms);
1738    pXmlData->pOrbSet = new FOrbitalSet();
1739    FOrbitalSet::FOrbitalInfoList
1740       &OrbInfos = pXmlData->pOrbSet->OrbInfos;
1741    OrbInfos.reserve(MoldenData.OrbInfo.size());
1742    for (size_t iOrb = 0; iOrb < MoldenData.OrbInfo.size(); ++ iOrb) {
1743       OrbInfos.push_back(MoldenData.OrbInfo[iOrb]);
1744       FOrbitalInfoPtr &oi = OrbInfos.back();
1745       oi->pBasisSet = pXmlData->pBasisOrb;
1746 //       oi->sDesc = boost::str(format("%4i.%1i") % (1+iOrb) % oi.iSym);
1747 
1748    }
1749    return pXmlData;
1750 }
1751 
1752 
1753 } // namespace molden_file
1754 
1755 
1756 
1757 
1758 
1759 
1760 
1761 
1762 
1763 
1764 
1765 
1766 
1767 // int main(int argc, char *argv[])
1768 // {
1769 //    using namespace molpro_xml;
1770 //    FMolproXmlDataPtr pXmlData = LoadMolproXmlFile("test1.xml");
1771 //    TestOrbitals(pXmlData->pBasisOrb, pXmlData->pAtoms, pXmlData->pOrbSet);
1772 // };
1773 
1774 // int main(int argc, char *argv[])
1775 // {
1776 //    using namespace molpro_xml;
1777 //    pugi::xml_document doc;
1778 //    pugi::xml_parse_result result = doc.load_file("test1.xml");
1779 //
1780 //    std::cout << "XML Load result: " << result.description() << std::endl;
1781 //
1782 //
1783 //    pugi::xml_node MoleculeNode = doc.child("molpro").child("molecule");
1784 // //    EnumerateChildren(std::cout, MoleculeNode);
1785 //    pugi::xml_node GeometryNode = MoleculeNode.child("cml:atomArray");
1786 // //    EnumerateChildren(std::cout, GeometryNode);
1787 //    FAtomSetPtr
1788 //       pAtoms = LoadGeometry(GeometryNode);
1789 // //    std::cout << *pAtoms;
1790 //
1791 //    pugi::xml_node OrbBasisNode = MoleculeNode.find_child_by_attribute("basisSet", "id", "ORBITAL");
1792 // //    EnumerateChildren(std::cout, OrbBasisNode);
1793 //
1794 //    ct::FBasisSetPtr
1795 //       pBasisOrb = LoadBasisSet(OrbBasisNode, *pAtoms);
1796 // //    std::cout << *pBasisOrb;
1797 //
1798 //    pugi::xml_node
1799 //       OrbitalsNode = MoleculeNode.child("orbitals");
1800 // //    EnumerateChildren(std::cout, OrbitalsNode);
1801 //    FOrbitalSetPtr
1802 //       pOrbSet = LoadOrbitals(OrbitalsNode, pBasisOrb);
1803 //    // do I still need to normalize the functions?
1804 //
1805 // //    TestOrbitals(pBasisOrb, pAtoms, pOrbSet);
1806 //
1807 //
1808 //
1809 //
1810 // //    std::cout << doc.child("molpro").child("molecule").child("cml:symmetry").value() << std::endl;
1811 // //    for (pugi::xml_node tool = doc.child("Tool"); tool; tool = tool.next_sibling("Tool"))
1812 // //    {
1813 // //       std::cout << "Tool " << tool.attribute("Filename").value();
1814 // //       std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool();
1815 // //       std::cout << ", Timeout " << tool.attribute("Timeout").as_int();
1816 // //       std::cout << ", Description '" << tool.child_value("Description") << "'\n";
1817 // //    }
1818 //
1819 //
1820 // //    std::cout << doc.child("mesh").child("molecule").child("cml:symmetry").value() << std::endl;
1821 // //    for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
1822 // //    {
1823 // //       std::cout << "Tool " << tool.attribute("Filename").value();
1824 // //       std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool();
1825 // //       std::cout << ", Timeout " << tool.attribute("Timeout").as_int();
1826 // //       std::cout << ", Description '" << tool.child_value("Description") << "'\n";
1827 // //    }
1828 // //    for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
1829 // //    {
1830 // //       std::cout << "Tool " << tool.attribute("Filename").value();
1831 // //       std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool();
1832 // //       std::cout << ", Timeout " << tool.attribute("Timeout").as_int();
1833 // //       std::cout << ", Description '" << tool.child_value("Description") << "'\n";
1834 // //    }
1835 //
1836 // //    << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl;
1837 // }
1838 
1839 
LoadOrbitalFile(std::string const & FileName,FLoadOptions const & LoadOptions)1840    FMolproXmlDataPtr LoadOrbitalFile(std::string const &FileName, FLoadOptions const &LoadOptions)
1841    {
1842       try {
1843          if (ends_with(FileName, ".xml"))
1844             return molpro_xml::LoadMolproXmlFile(FileName, LoadOptions);
1845          std::string
1846             FirstLine;
1847          {
1848             std::ifstream
1849                in(FileName.c_str());
1850             if (!get_next_line(FirstLine, in, LINE_Trim | LINE_ToLower | LINE_AcceptEof))
1851                FirstLine = "!2empty44";
1852          }
1853          if (ends_with(FileName, ".molden") || ends_with(FileName, ".molden.input") || FirstLine == "[molden format]")
1854             return molden_file::LoadMoldenFile(FileName, LoadOptions);
1855 
1856          throw FFileTypeUnrecognizedError("Failed to recognize file extension.");
1857       } catch (FFileTypeUnrecognizedError &e) {
1858          throw FFileTypeUnrecognizedError(e.what(), FileName);
1859       } catch (FFileLoadError &e) {
1860          throw FFileLoadError(e.what(), FileName);
1861       }
1862    }
1863 
1864 
1865 } // namespace orbital_file
1866