1 /* Copyright (c) 2015 Gerald Knizia
2 *
3 * This file is part of the IboView program (see: http://www.iboview.org)
4 *
5 * IboView is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3.
8 *
9 * IboView is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with bfint (LICENSE). If not, see http://www.gnu.org/licenses/
16 *
17 * Please see IboView documentation in README.txt for:
18 * -- A list of included external software and their licenses. The included
19 * external software's copyright is not touched by this agreement.
20 * -- Notes on re-distribution and contributions to/further development of
21 * the IboView software
22 */
23
24 #include "pugixml.hpp"
25 #include <iostream>
26 #include <fstream>
27 #include <algorithm>
28 #include <set>
29 #include <boost/format.hpp>
30 #include <boost/algorithm/string.hpp>
31 #include <boost/lexical_cast.hpp>
32 #include <sstream>
33 #include <cctype> // for 1-argument std::tolower.
34 #include "CtBasisShell.h"
35 #include "CtMatrix.h"
36 #include "CtConstants.h"
37 #include "IvOrbitalFile.h"
38
39 namespace ctimp {
40 void Vec_Ca2Sh(double *pSh, double const *pCa, size_t *ls, size_t N);
41 void Vec_CaMolden2CaMolpro(double *pOrb, size_t *ls, size_t N);
42 void Vec_ShMolden2ShMolpro(double *pOrb, size_t *ls, size_t N);
43 void Vec_Ca2Ca_XSqrtNorm(double *pOrb, size_t *ls, size_t N);
44 }
45
46 using boost::format;
47 using boost::algorithm::trim;
48 using boost::algorithm::trim_left;
49 using boost::algorithm::trim_right;
50
51
52 namespace orbital_file {
53
54 // class FXmlParseError : public FFileLoadError
55 // {
56 // public:
57 // typedef FFileLoadError
58 // FBase;
59 // // FXmlParseError(std::string const &What, std::string const &Where = "");
60 // };
61
FFileTypeUnrecognizedError(std::string const & What,std::string const & Where)62 FFileTypeUnrecognizedError::FFileTypeUnrecognizedError(std::string const &What, std::string const &Where)
63 : FFileLoadError(What, Where)
64 {}
65
66 class FUnexpectedFormatError : public FFileLoadError
67 {
68 public:
69 typedef FFileLoadError
70 FBase;
FUnexpectedFormatError(std::string const & Message)71 FUnexpectedFormatError(std::string const &Message)
72 : FBase("Unexpected format: '" + Message)
73 {}
74 };
75
76 class FVariableNotFoundError : public FFileLoadError
77 {
78 public:
79 typedef FFileLoadError
80 FBase;
FVariableNotFoundError(std::string const & VarName)81 FVariableNotFoundError( std::string const &VarName )
82 : FBase("Variable not found: '" + VarName + "'")
83 {}
84 };
85
FmtParseError(std::string const & What,std::string const & Where)86 static std::string FmtParseError(std::string const &What, std::string const &Where)
87 {
88 if (Where.empty())
89 return What;
90 else {
91 std::stringstream str;
92 str << boost::format("* ERROR while loading file '%s':\n%s") % Where % What << std::endl;
93 return str.str();
94 }
95 }
96
FFileLoadError(std::string const & What,std::string const & Where)97 FFileLoadError::FFileLoadError(std::string const &What, std::string const &Where)
98 : FBase(FmtParseError(What,Where))
99 {}
100
101
102 // FXmlParseError::FXmlParseError(std::string const &What, std::string const &Where)
103 // : FBase(FmtParseError(What,Where))
104 // {}
105
106
str_tolower(std::string & str)107 static void str_tolower(std::string &str) {
108 // std::transform(str.begin(), str.end(), str.begin(), std::tolower);
109 for (size_t i = 0; i != str.size(); ++ i)
110 str[i] = std::tolower(str[i]);
111 // ...
112 }
113
str_lower_and_trim(std::string & str)114 static void str_lower_and_trim(std::string &str)
115 {
116 str_tolower(str);
117 trim(str);
118 }
119
FOrbitalInfo()120 FOrbitalInfo::FOrbitalInfo()
121 {
122 fEnergy = 0.;
123 fOcc = 0.;
124 iSym = 0;
125 Spin = ORBSPIN_Unknown;
126 }
127
128 enum FLineReadFlags {
129 LINE_Trim = 0x01, // if set, output lines are trimmed on both left and right sides (otherwise only right)
130 LINE_ToLower = 0x02, // if set, output lines are converted to lower case
131 LINE_KeepEmpty = 0x04, // if set, empty lines are not skipped
132 LINE_AcceptEof = 0x08 // if set, EOF will not raise an exception, but just return 'false'.
133 };
134
get_next_line(std::string & line,std::istream & in,unsigned flags=0)135 static bool get_next_line(std::string &line, std::istream &in, unsigned flags=0) {
136 // read first line. skip empty lines.
137 line = "";
138 while (in.good() && line.empty()) {
139 if (!std::getline(in, line)) {
140 if (flags & LINE_AcceptEof)
141 return false;
142 else
143 throw FUnexpectedFormatError("Unexpected end of file.");
144 }
145 if (flags & LINE_ToLower)
146 str_tolower(line);
147 if (flags & LINE_Trim)
148 trim(line);
149 else
150 trim_right(line);
151 if (flags & LINE_KeepEmpty)
152 break;
153 }
154 return true;
155 }
156
ends_with(std::string const & str,std::string const & suffix)157 bool ends_with(std::string const &str, std::string const &suffix)
158 {
159 if (str.size() < suffix.size())
160 return false;
161 return str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
162 }
163
replace_fortran_exponents(std::string & line)164 void replace_fortran_exponents(std::string &line)
165 {
166 // replace fortran exponent format by rest-of-the-world exponent format.
167 boost::algorithm::replace_all(line, "D+", "e+");
168 boost::algorithm::replace_all(line, "D-", "e-");
169 boost::algorithm::replace_all(line, "d+", "e+");
170 boost::algorithm::replace_all(line, "d-", "e-");
171 }
172
try_read_fortran_float(double & f,std::string s)173 bool try_read_fortran_float(double &f, std::string s)
174 {
175 replace_fortran_exponents(s);
176 std::stringstream ss2(s);
177 std::string MaybeFloat;
178
179 // this is indeed the most compact C++ way I could come up with
180 // for checking if something "cleanly" converts to 'double'...
181 // (and even this is not really clean, because strtod uses "errno",
182 // which is not threadsafe).
183 ss2 >> MaybeFloat;
184 char const *pBeg = MaybeFloat.c_str();
185 char *pEnd;
186 f = std::strtod(pBeg, &pEnd);
187 if (pEnd - pBeg == ptrdiff_t(MaybeFloat.size()))
188 return true;
189 return false;
190 }
191
read_fortran_float(std::string const & s)192 double read_fortran_float(std::string const &s)
193 {
194 double f;
195 if (!try_read_fortran_float(f,s))
196 throw FUnexpectedFormatError("Expected float, but found '" + s + "'.");
197 return f;
198 }
199
200
201 enum FOrbitalSource {
202 ORBSOURCE_Xml,
203 ORBSOURCE_Molden
204 };
205
FixOrbitalTypes1(FOrbitalSet::FOrbitalInfoList & OrbList,FOrbitalSource Source)206 static void FixOrbitalTypes1(FOrbitalSet::FOrbitalInfoList &OrbList, FOrbitalSource Source)
207 {
208 size_t
209 nAlpha = 0,
210 nBeta = 0,
211 nClosed = 0,
212 nOther = 0,
213 nNonIntegerOcc = 0;
214 FOrbitalSet::FOrbitalInfoList::iterator
215 it;
216 for (it = OrbList.begin(); it != OrbList.end(); ++ it) {
217 switch((*it)->Spin) {
218 case ORBSPIN_Alpha: nAlpha += 1; continue;
219 case ORBSPIN_Beta: nBeta += 1; continue;
220 case ORBSPIN_SpinFree: nClosed += 1; continue;
221 default: nOther += 1; continue;
222 }
223 if (std::abs(std::modf((*it)->fOcc, 0) > 1e-8))
224 nNonIntegerOcc += 1;
225 }
226
227 bool RebuildWithOccupations = false;
228 if (Source == ORBSOURCE_Molden) {
229 // there are *only* Alpha orbitals. Most likely we got something R(o)HFy.
230 // Adjust spins based on occupation numbers.
231 if (nBeta == 0 && nOther == 0 && nClosed == 0)
232 RebuildWithOccupations = true;
233 } else if (Source == ORBSOURCE_Xml) {
234 // if there are no non-integer occupations, we probably also have a ROHFy wave function.
235 if (nNonIntegerOcc == 0)
236 RebuildWithOccupations = true;
237 }
238
239 if (RebuildWithOccupations) {
240 for (it = OrbList.begin(); it != OrbList.end(); ++ it) {
241 double fOcc = (*it)->fOcc;
242 if (fOcc == 2. || fOcc == 0.)
243 (*it)->Spin = ORBSPIN_SpinFree;
244 if (fOcc == 1.)
245 (*it)->Spin = ORBSPIN_Alpha;
246 }
247 }
248 }
249
250
251 namespace molpro_xml {
252
253 typedef ::orbital_file::FFileLoadError
254 FXmlParseError;
255
256 using boost::format;
257 using boost::str;
258 using ct::TArray;
259
EnumerateChildren(std::ostream & out,pugi::xml_node Node)260 void EnumerateChildren(std::ostream &out, pugi::xml_node Node)
261 {
262 out << "Node '" << Node.name() << "':\n";
263 for (pugi::xml_node_iterator it = Node.begin(); it != Node.end(); ++it)
264 {
265 out << "-> '" << it->name() << "' Attr: [";
266
267 for (pugi::xml_attribute_iterator ait = it->attributes_begin(); ait != it->attributes_end(); ++ait)
268 {
269 if (ait != it->attributes_begin())
270 out << " ";
271 out << ait->name() << "=" << ait->value();
272 }
273 out << "]";
274
275 out << std::endl;
276 };
277 };
278
279 // append value list contained in a xml node to output list.
280 template<class FType>
AppendToList(TArray<FType> & Out,std::string const & s)281 void AppendToList(TArray<FType> &Out, std::string const &s)
282 {
283 // std::cout << "rd list: " << s << std::endl;
284 std::stringstream
285 ss(s);
286 while (1) {
287 FType fValue;
288 ss >> fValue;
289 if (ss.fail())
290 break;
291 Out.push_back(fValue);
292 // std::cout << format(" Val: %10.5f\n") % fValue;
293 }
294 }
295
AppendAssociationList(TArray<int> & Out,std::string sText,std::string const StartHere)296 static void AppendAssociationList(TArray<int> &Out, std::string sText, std::string const StartHere)
297 {
298 // example strings:
299 // #xpointer(//molecule[@id='2AA7']//basisGroup[@id='1' or @id='2' or @id='3'])]
300 // #xpointer(//molecule[@id='2AA7']//atom[@id='1' or @id='3' or @id='5' or @id='7' or @id='9' or @id='11'])]
301 // skip everything until '//basisGroup['. Then erase "@id" and similar things.
302 sText.erase(0, sText.find(StartHere.c_str())+StartHere.size());
303 sText.erase(sText.find("]"));
304 boost::replace_all(sText, "@id='", "");
305 boost::replace_all(sText, " or", "");
306 boost::replace_all(sText, "'", "");
307 boost::replace_all(sText, "a", "");
308 // std::cout << format("sText: '%s'\n") % sText;
309 // should now just be a linear list of integers.
310 AppendToList<int>(Out, sText);
311 }
312
313 template<class T>
PrintArray(std::ostream & out,T const & Array,std::string const & Caption,std::string const & Fmt)314 static std::ostream &PrintArray(std::ostream &out, T const &Array, std::string const &Caption, std::string const &Fmt)
315 {
316 out << Caption << ": ";
317 for (typename T::const_iterator it = Array.begin(); it != Array.end(); ++ it) {
318 if (it != Array.begin())
319 out << " ";
320 out << format(Fmt) % (*it);
321 }
322 return out;
323 }
324
LoadBasisSet(pugi::xml_node BasisNode,ct::FAtomSet const & Atoms)325 ct::FBasisSetPtr LoadBasisSet(pugi::xml_node BasisNode, ct::FAtomSet const &Atoms)
326 {
327 std::string
328 Name = BasisNode.attribute("id").value(),
329 AngularType = BasisNode.attribute("angular").value();
330 // uint
331 // nShells = BasisNode.attribute("groups").as_int();
332 // std::cout << format("Loading basis '%s' with %i groups.\n") % Name % nShells;
333 // gna... need to convert to spherical anyway.
334 // if (AngularType != "spherical")
335 // throw FXmlParseError("Sorry, can only use spherical harmonic basis sets, not cartesians.");
336 // ct::FBasisSetPtr pBasis(new F
337 // pugi::xml_node Node
338
339 // load the shell information: only unique ones are stored.
340 typedef std::map<int, ct::FAtomShellPtr>
341 FIdToShellMap;
342 FIdToShellMap
343 // id -> object.
344 AtomShells;
345 for (pugi::xml_node GroupNode = BasisNode.child("basisGroup"); GroupNode; GroupNode = GroupNode.next_sibling("basisGroup"))
346 {
347 // EnumerateChildren(std::cout, GroupNode);
348 TArray<double>
349 Exp, Co;
350 int
351 AngMom = GroupNode.attribute("minL").as_int(),
352 nExp = GroupNode.attribute("primitives").as_int(),
353 nCo = GroupNode.attribute("contractions").as_int(),
354 iGroup = GroupNode.attribute("id").as_int();
355 if (AngMom != GroupNode.attribute("maxL").as_int())
356 throw FXmlParseError("basis contains minL != maxL shells. Cannot deal with those.");
357 Exp.reserve(nExp);
358 Co.reserve(nExp * nCo);
359 // read exponents.
360 AppendToList<double>(Exp, GroupNode.child_value("basisExponents"));
361 // read contraction coefficients.
362 int iCo = 1;
363 for (pugi::xml_node CoNode = GroupNode.child("basisContraction"); CoNode; CoNode = CoNode.next_sibling("basisContraction"))
364 {
365 // std::cout << "contraction " << iCo << "\n";
366 int iCoId = CoNode.attribute("id").as_int();
367 if (iCoId != iCo)
368 throw FXmlParseError(str(format("error while reading basis group %i:"
369 "expected contraction with id %i, but found %i.") % iGroup % iCo % iCoId));
370 AppendToList<double>(Co, CoNode.child_value());
371 iCo += 1;
372 }
373 if (nExp != (signed)Exp.size() || nExp*nCo != (signed)Co.size())
374 throw FXmlParseError(str(format("contraction %i: exponents or co-coefficient number does not match up.") % iGroup));
375 if (AtomShells.find(iGroup) != AtomShells.end())
376 throw FXmlParseError(str(format("multiple occurrences of group id %i") % iGroup));
377 // that's all we need. make a atom basis object
378 // and remember it.
379 ct::FAtomShellPtr
380 pFn(new ct::FAtomShell(AngMom, &Exp[0], nExp, &Co[0], nCo));
381 AtomShells[iGroup] = pFn;
382 }
383 // std::cout << format("read %i unique atom basis shells.\n") % AtomShells.size();
384
385
386 typedef std::multimap<int, int>
387 FAtomToShellIdMap;
388 typedef std::pair<FAtomToShellIdMap::iterator, FAtomToShellIdMap::iterator>
389 FAtomToShellRangeIt;
390 FAtomToShellIdMap
391 // atom index -> list of shell ids for the atom (stored in AtomShells)
392 AtomShellIds;
393
394 // now this part is... more complicated. we get a list of basis/atom
395 // associations, in the form of 'list of unique shells' to 'list of atoms'.
396 // However, this information is stored the form of cross-referenced xlinks...
397 // EnumerateChildren(std::cout, BasisNode.child("associ ation"));
398 for (pugi::xml_node AssocNode = BasisNode.child("association"); AssocNode; AssocNode = AssocNode.next_sibling("association"))
399 {
400 // I here hope that I can just skip some of this information, and assume
401 // that there is only one molecule and basis sets between different
402 // molecules are not mixed (technically they could).
403 std::string const
404 sBases = AssocNode.child("bases").attribute("xlink:href").value(),
405 sAtoms = AssocNode.child("atoms").attribute("xlink:href").value();
406 // example strings:
407 // #xpointer(//molecule[@id='2AA7']//basisGroup[@id='1' or @id='2' or @id='3'])]
408 // #xpointer(//molecule[@id='2AA7']//atom[@id='1' or @id='3' or @id='5' or @id='7' or @id='9' or @id='11'])]
409 // skip everything until '//basisGroup['. Then erase "@id" and similar things.
410 TArray<int>
411 iShells, iAtoms;
412 AppendAssociationList(iShells, sBases, "//basisGroup[");
413 AppendAssociationList(iAtoms, sAtoms, "//atom[");
414 // PrintArray(std::cout, iShells, "Shell ids", "%i") << " <-> ";
415 // PrintArray(std::cout, iAtoms, "Atom ids", "%i") << "\n";
416
417 // <bases xlink:type="locator" xlink:label="bases"
418 // xlink:href="#xpointer(//molecule[@id='436']//basisGroup[@id='1' or @id='2' or @id='3'])"/>
419 // <atoms xlink:label="atoms"
420 // xlink:href="#xpointer(//molecule[@id='436']//atom[@id='a1' or @id='a3' or @id='a5' or @id='a7' or @id='a9' or @id='a11'])"/>
421
422
423 // std::cout << ""
424 for (uint i = 0; i < iAtoms.size(); ++ i) {
425 for (uint j = 0; j < iShells.size(); ++ j)
426 AtomShellIds.insert(FAtomToShellIdMap::value_type(iAtoms[i], iShells[j]));
427 if (iAtoms[i] > (int)Atoms.size() || iAtoms[i] == 0)
428 throw FXmlParseError("encountered basis shell association for non-exitant atom.");
429 }
430 }
431
432 // in principle we now have all the required information to build the basis set (as long
433 // as all functions come in the order I imagine they do...)
434 // Go through the atoms and make a basis shell for each of its associated atom shells.
435 ct::FBasisSet::FBasisShellArray
436 Shells;
437 for (uint iAt = 0; iAt < Atoms.size(); ++ iAt) {
438 ct::FAtom const
439 &At = Atoms[iAt];
440 FAtomToShellRangeIt itEq = AtomShellIds.equal_range(iAt+1);
441 for (FAtomToShellIdMap::iterator it = itEq.first; it != itEq.second; ++ it) {
442 FIdToShellMap::iterator
443 itSh = AtomShells.find(it->second);
444 if (itSh == AtomShells.end())
445 throw FXmlParseError("encountered a non-extant atom shell assigned to an atom.");
446 ct::FAtomShellPtr
447 pFn = itSh->second;
448 Shells.push_back(ct::FBasisShell(At.vPos, iAt, pFn));
449 // FBasisShell(FVector3 vCenter_, int iCenter_, FAtomShellCptr pAs_)
450
451 }
452 }
453
454 // FBasisSet(FBasisShell const *pShells_, uint nShells_, FBasisContext Context_, std::string const &Name);
455 return ct::FBasisSetPtr(new ct::FBasisSet(&Shells[0], Shells.size(), ct::BASIS_Orbital, "(external)"));
456 }
457
458
LoadGeometry(pugi::xml_node GeometryNode,double Factor=1./ct::ToAng)459 FAtomSetPtr LoadGeometry(pugi::xml_node GeometryNode, double Factor = 1./ct::ToAng)
460 {
461 FAtomSetPtr
462 pAtoms = new ct::FAtomSet();
463 int
464 iAtom = 1; // starts at 1.
465 for (pugi::xml_node_iterator it = GeometryNode.begin(); it != GeometryNode.end(); ++it, ++iAtom)
466 {
467 std::string
468 id = it->attribute("id").value(),
469 element = it->attribute("elementType").value();
470 std::string
471 expected_id = "a" + boost::lexical_cast<std::string>(iAtom);
472 if (id != expected_id)
473 throw FXmlParseError("problem reading atoms: expected atom id '" + expected_id + "', but found '" + id + "'.");
474 double
475 x = boost::lexical_cast<double>(it->attribute("x3").value()),
476 y = boost::lexical_cast<double>(it->attribute("y3").value()),
477 z = boost::lexical_cast<double>(it->attribute("z3").value());
478 pAtoms->Atoms.push_back(ct::FAtom(Factor * ct::FVector3(x,y,z), element, "(external)"));
479
480 // std::cout << format("!AT id = %4s elem = %4s x = %10.5f y = %10.5f z = %10.5f") % id % element % x % y % z;
481 // std::cout << std::endl;
482 }
483 // std::cout << *pAtoms;
484 return pAtoms;
485 }
486
487
488 template<class T>
ReadArrayT(typename ct::TArray<T> & Out,std::istream & str)489 void ReadArrayT(typename ct::TArray<T> &Out, std::istream &str)
490 {
491 for (;;) {
492 T x;
493 str >> x;
494 if (str.good())
495 Out.push_back(x);
496 else
497 break;
498 }
499 }
500
501
LoadOrbitals(pugi::xml_node OrbitalsNode,ct::FBasisSetPtr pBasisOrb,FLoadOptions const & LoadOptions)502 FOrbitalSetPtr LoadOrbitals(pugi::xml_node OrbitalsNode, ct::FBasisSetPtr pBasisOrb, FLoadOptions const &LoadOptions)
503 {
504 FOrbitalSetPtr
505 pOrbSet = new FOrbitalSet();
506 uint
507 iOrb = 0;
508 uint
509 OrbsForSym[8] = {0};
510 TArray<size_t>
511 ls;
512 unsigned
513 nBfSph = pBasisOrb->nFn(),
514 nBfCa = 0; // expected number of cartesian basis functions.
515 bool
516 ConvertToSpherical = std::string(OrbitalsNode.attribute("angular").value()) != std::string("spherical");
517 // ^- that's the default setting...
518 // if (ConvertToSpherical)
519 // std::cout << "Expecting Cartesian orbital input.\n";
520 // else {
521 // std::cout << "Expecting spherical orbital input.\n";
522 // }
523 for (uint iSh = 0; iSh < pBasisOrb->Shells.size(); ++ iSh)
524 for (uint iCo = 0; iCo < pBasisOrb->Shells[iSh].pAs->nCo(); ++ iCo) {
525 unsigned l = pBasisOrb->Shells[iSh].pAs->AngMom;
526 ls.push_back(l);
527 // def nCartY(l): return ((l+1)*(l+2))/2
528 nBfCa += ((l+1)*(l+2))/2;
529 // std::cout << format("iSh = %i l = %i nBfCa = %i\n") % iSh % l % nBfCa;
530 }
531 for (pugi::xml_node OrbNode = OrbitalsNode.child("orbital"); OrbNode; ++iOrb, OrbNode = OrbNode.next_sibling("orbital"))
532 {
533 // EnumerateChildren(std::cout, OrbNode);
534 FOrbitalInfoPtr
535 pInfo = new FOrbitalInfo;
536 pInfo->fEnergy = boost::lexical_cast<double>(OrbNode.attribute("energy").value());
537 pInfo->fOcc = boost::lexical_cast<double>(OrbNode.attribute("occupation").value());
538 pInfo->iSym = boost::lexical_cast<int>(OrbNode.attribute("symmetryID").value());
539 pInfo->pBasisSet = pBasisOrb;
540 pInfo->Spin = ORBSPIN_SpinFree;
541 if (pInfo->fOcc < 0.00001 && LoadOptions.SkipVirtuals())
542 continue;
543 // ^- why not break? in symmetry calculations there may be additional
544 // occupied orbitals later.
545 // std::cout << format("ORB: E[%3i] = %12.6f H Occ = %12.6f Sym = %1i\n") % iOrb % fEnergy % fOcc % iSym;
546 // std::cout << OrbNode.child_value() << std::endl;
547 if (pInfo->iSym <= 8) {
548 OrbsForSym[pInfo->iSym-1] += 1;
549 // pInfo->sDesc = boost::str(format("%4i.%1i [E=%8.4f O= %6.4f]") % OrbsForSym[pInfo->iSym-1] % pInfo->iSym % pInfo->fEnergy % pInfo->fOcc);
550 pInfo->sDesc = boost::str(format("%4i.%1i") % OrbsForSym[pInfo->iSym-1] % pInfo->iSym);
551 } else {
552 // pInfo->sDesc = boost::str(format("%4i.? [E=%8.4f O= %6.4f]") % iOrb % pInfo->fEnergy % pInfo->fOcc);
553 pInfo->sDesc = boost::str(format("%4i.?") % iOrb);
554 }
555
556 std::stringstream
557 str(OrbNode.child_value());
558 TArray<double>
559 OrbCa;
560 ReadArrayT(OrbCa, str);
561 // std::string s = boost::str(boost::format("import orbital %i: expected %i cartesian components, but got %i.") % (iOrb+1) % nBfCa % OrbCa.size());
562 pInfo->Orb.resize(nBfSph, 777.777);
563 if (ConvertToSpherical) {
564 // transform from cartesian back to spherical. (...hopefully...)
565 if (OrbCa.size() != nBfCa)
566 throw FXmlParseError(boost::str(boost::format("import orbital %i: expected %i cartesian components, but got %i.") % (iOrb+1) % nBfCa % OrbCa.size()));
567 ctimp::Vec_Ca2Sh(&pInfo->Orb[0], &OrbCa[0], &ls[0], ls.size());
568 // throw FXmlParseError("Cartesian basis input currently not supported. Use {put,xml,...; keepspherical}");
569 } else {
570 // hopefully already is spherical...
571 if (OrbCa.size() != nBfSph)
572 throw FXmlParseError(boost::str(boost::format("import orbital %i: expected %i spherical components, but got %i.") % (iOrb+1) % nBfSph % OrbCa.size()));
573 pInfo->Orb = OrbCa;
574 }
575 pOrbSet->OrbInfos.push_back(pInfo);
576 // std::cout << pInfo->Desc() << std::endl;
577 // std::cout << format(" nCoeff: %i (of %i)") % pInfo->Orb.size() % pBasisOrb->nFn() << std::endl;
578 }
579 FixOrbitalTypes1(pOrbSet->OrbInfos, ORBSOURCE_Xml);
580 return pOrbSet;
581 }
582
TestOrbitals(ct::FBasisSetPtr pBasis,FAtomSetPtr pAtoms,FOrbitalSetPtr pOrbSet)583 int TestOrbitals(ct::FBasisSetPtr pBasis, FAtomSetPtr pAtoms, FOrbitalSetPtr pOrbSet)
584 {
585 using namespace ct;
586
587 ct::FMemoryStack2 Mem(200000000);
588 uint
589 nAo = pBasis->nFn(),
590 nOrb = pOrbSet->OrbInfos.size();
591 FStackMatrix
592 S(nAo, nAo, &Mem),
593 COrb(nAo, nOrb, &Mem);
594 pAtoms->MakeOverlapMatrix(S, *pBasis, *pBasis, Mem);
595 // S.Print(std::cout, "AO overlap");
596 for (uint iOrb = 0; iOrb < pOrbSet->OrbInfos.size(); ++ iOrb) {
597 FOrbitalInfo
598 &oi = *pOrbSet->OrbInfos[iOrb];
599 assert(nAo == oi.Orb.size());
600 for (uint iBf = 0; iBf < nAo; ++ iBf)
601 COrb(iBf, iOrb) = oi.Orb[iBf];
602 }
603 // COrb.Print(std::cout, "MO coeffs");
604 {
605 double rmsd = 0.;
606 FStackMatrix
607 OrbOvl(nOrb, nOrb, &Mem);
608 ChainMxm(OrbOvl, Transpose(COrb), S, COrb, Mem);
609 for (size_t i = 0; i < OrbOvl.nRows; ++ i)
610 for (size_t j = 0; j < OrbOvl.nCols; ++ j) {
611 double f = OrbOvl(i,j) - ((i==j)? 1. : 0.);
612 rmsd += f*f;
613 }
614 rmsd = std::sqrt(rmsd);
615 OrbOvl.Print(std::cout, "Orbital MO overlap");
616 std::cout << boost::format(" RMSD from orthogonality: %8.2e") % rmsd << std::endl;
617 }
618 return 0;
619 }
620
LoadMolproXmlFile(std::string const & FileName,FLoadOptions const & LoadOptions)621 FMolproXmlDataPtr LoadMolproXmlFile(std::string const &FileName, FLoadOptions const &LoadOptions)
622 {
623 // try {
624 FMolproXmlDataPtr
625 pXmlData = new FMolproXmlData();
626 pugi::xml_document doc;
627 pugi::xml_parse_result result = doc.load_file(FileName.c_str());
628 if (result) {
629 // std::cout << format("* read '%s'.") % FileName << std::endl;
630 } else {
631 throw FXmlParseError(str(format("XML load result: '%s'") % result.description()));
632 // return FMolproXmlDataPtr(0);
633 }
634
635 pugi::xml_node MoleculeNode = doc.child("molpro").child("molecule");
636 pugi::xml_node GeometryNode = MoleculeNode.child("cml:atomArray");
637 if (!GeometryNode) {
638 // see if we have the geometry as a sub node in a <cml:molecule> node instead
639 // of directly under <molecule> as in previous versions of Molpro master
640 pugi::xml_node CmlMoleculeNode = MoleculeNode.child("cml:molecule");
641 if (CmlMoleculeNode)
642 GeometryNode = CmlMoleculeNode.child("cml:atomArray");
643 }
644 if (!GeometryNode)
645 throw FXmlParseError("Failed to find <cml:atomArray> node.");
646 pXmlData->pAtoms = LoadGeometry(GeometryNode);
647
648 pugi::xml_node OrbBasisNode = MoleculeNode.find_child_by_attribute("basisSet", "id", "ORBITAL");
649 if (!OrbBasisNode)
650 throw FXmlParseError("Failed to find orbital basis node (<basisSet id='ORBITAL' ...>).");
651
652 pXmlData->pBasisOrb = LoadBasisSet(OrbBasisNode, *pXmlData->pAtoms);
653 // std::cout << *pBasisOrb;
654
655 pugi::xml_node OrbitalsNode = MoleculeNode.child("orbitals");
656 if (!OrbitalsNode)
657 throw FXmlParseError("Failed to find orbital node (<orbitals ...>).");
658 pXmlData->pOrbSet = LoadOrbitals(OrbitalsNode, pXmlData->pBasisOrb, LoadOptions);
659 return pXmlData;
660 // } catch(FXmlParseError &e) {
661 // throw FXmlParseError(e.what(), FileName);
662 // }
663 return 0;
664 }
665
666
667 } // namespace molpro_xml
668
669
670 namespace molden_file {
671 typedef ::orbital_file::FFileLoadError
672 FMoldenParseError;
673
674 using boost::format;
675 using boost::str;
676 using ct::TArray;
677
678 // the molden files made by different programs look almost the same, but they
679 // are all slightly incompatible due to various different bugs.
680 enum FMoldenSource {
681 MOLDENSOURCE_Molpro,
682 // Molcas thinks the .molden format should have different headings than the Molden spec. E.g. '(AU)' tags instead of 'AU' tags.
683 MOLDENSOURCE_Molcas,
684 // Turbomole uses a different basis function normalization than Molpro and Molcas
685 MOLDENSOURCE_Turbomole,
686 // Orca again uses a different normalization than either Molpro/Molcas or Turbomole
687 MOLDENSOURCE_Orca
688 };
689
690
691 struct FMoldenAtom
692 {
693 ct::FVector3
694 vPos; // stored in atomic units
695 std::string
696 Label; // normally corresponds to element name. But might contain suffices
697 int
698 // I don't quite get it, but center numbers are explicitly stored.
699 // I assume therefore they can be out of order, or with holes, although
700 // in the examples I've seen they never were.
701 iCenter,
702 iElement; // 1: H, 2: He, ...
703 int
704 // index this atom will get in the loaded file. We require in various places
705 // that atoms are numbered continously, starting at 0, and that the basis sets
706 // are given in the same order as the atoms. We thus might have to shovel things
707 // around...
708 iCenterOut;
709 };
710
711 // maps center indices to atom objects. Note comments on iCenter
712 // in FMoldenAtom.
713 struct FMoldenAtomSet : public std::map<int, FMoldenAtom>
714 {
operator []orbital_file::molden_file::FMoldenAtomSet715 FMoldenAtom &operator [] (int iCenter) {
716 return (*this)[iCenter];
717 }
718 FMoldenAtom const &operator [] (int iCenter) const;
719
sizeorbital_file::molden_file::FMoldenAtomSet720 size_t size() const { return (*this).size(); }
721 void AddAtom(FMoldenAtom const &A);
722
723 FMoldenAtom &Get(int iCenter);
724
725 ct::FAtomSetPtr Convert();
726 };
727
728
729 struct FMoldenGaussShell
730 {
731 ct::TArray<double>
732 Co, // nExp x nCo contraction matrix
733 Exp; // nExp primitive exponents
734 size_t
735 nExp,
736 nCo;
737 int
738 iAngMom,
739 iCenter;
740 // FMoldenGaussShell() {};
741 size_t
742 // position of the function in the original basis order. Output basis
743 // will be ordered by center index and angular momentum.
744 iOffsetIn;
745 bool
746 Spherical;
747 inline size_t nFn() const;
748 inline size_t nFnSph() const;
749
750 bool operator < (FMoldenGaussShell const &other) const;
751 };
752
753 struct FMoldenBasis
754 {
755 typedef std::vector<FMoldenGaussShell>
756 FGaussFnList;
757 FGaussFnList
758 Shells;
759 size_t nFn() const; // as declared in actual shell objects
760 size_t nFnSph() const; // as if all shells were declared as spherical
761 size_t nFnCart() const; // as if all shells were declared as cartesian
762
763 // convert Molden's basis format into our own by rebuilding the general contractions and
764 // joining equivalent shell functions into common FBasisFn objects and deferred FBasisShell objects.
765 FBasisSetPtr Convert(FMoldenAtomSet *pMoldenAtomSet);
766
767 TArray<size_t>
768 // iOrderedFn = m_InputToOutputOrder[iInputFn].
769 // Make by this->Sort(). This takes care only of the center/AM re-alignment.
770 // individual function orders are dealt with separately.
771 m_InputToOutputOrder;
772 // sort basis functions by center index and angular momentum.
773 void Sort();
774
Sortedorbital_file::molden_file::FMoldenBasis775 bool Sorted() const { return m_IsSorted; };
FMoldenBasisorbital_file::molden_file::FMoldenBasis776 FMoldenBasis() : m_IsSorted(false) {}
777 private:
778 bool m_IsSorted;
779 };
780
781 // // that's just the same thing... as FVariableSet. Apparently a random list of
782 // // stuff we can assign to various properties associated with an orbital. Not
783 // // sure if they are supposed to be predefined. Molpro stores stuff like 'Ene'
784 // // (orbital energy), 'Occup' (orbital occupancy), or 'Spin' (for unknown reason
785 // // ``Alpha'' for closed orbitals).
786 // struct FMoldenOrbInfo : public FVariableSet
787 // {
788 // FScalarData
789 // Coeffs;
790 // };
791
792
793 struct FMoldenFile
794 {
795 FVariableSet
796 Vars;
797 FMoldenAtomSet
798 Atoms;
799 FMoldenBasis
800 Basis;
801
802 typedef std::vector<FOrbitalInfoPtr>
803 FOrbInfoList;
804 FOrbInfoList
805 // information provided in molden file on the individual molecular orbitals.
806 // There is one object for each MO in Orbs, even if no data is associated with
807 // the orbital.
808 OrbInfo;
809 // size_t
810 // // number of *spherical* basis functions and of molecular orbitals
811 // nAo, nOrb;
812 // ct::TArray<double>
813 // // nAo x nOrb matrix.
814 // Orbs;
815
816 bool
817 m_InputOrbitalsAreSpherical,
818 m_InputOrbitalsAreSphericalForG,
819 m_AllowLargerThanG;
820 FMoldenSource
821 m_MoldenSource;
822 FLoadOptions const
823 *m_pLoadOptions;
824
825 explicit FMoldenFile(std::istream &in, FLoadOptions const *pLoadOptions_);
826 private:
827 void SkipCurrentSection(std::istream &in);
828 void AddAtom(FMoldenAtom const &A);
829 void ReadAtomsSection(std::istream &in, double fCoordFactor);
830 void ReadBasisSection(std::istream &in);
831 void ReadMoSection(std::istream &in);
832 void FixOrbitalTypes();
833 void FixBasis(); // fix spherical declarations and basis function order.
834 };
835
836 // number of cartesians components with angular momentum == l
nCartY(int l)837 static size_t nCartY(int l) { return static_cast<size_t>((l+1)*(l+2)/2); }
838 // number of solid harmonic components with angular momentum == l
nSlmY(int l)839 static size_t nSlmY(int l) { return static_cast<size_t>(2*l+1); }
840
841 static size_t const npos = std::string::npos;
842
nFn() const843 size_t FMoldenGaussShell::nFn() const
844 {
845 if (Spherical)
846 return nSlmY(iAngMom);
847 else
848 return nCartY(iAngMom);
849 }
850
nFnSph() const851 size_t FMoldenGaussShell::nFnSph() const {
852 return nSlmY(iAngMom);
853 }
854
nFnSph() const855 size_t FMoldenBasis::nFnSph() const
856 {
857 size_t
858 r = 0;
859 FGaussFnList::const_iterator
860 it;
861 _for_each(it, Shells)
862 r += nSlmY(it->iAngMom);
863 return r;
864 }
865
nFnCart() const866 size_t FMoldenBasis::nFnCart() const
867 {
868 size_t
869 r = 0;
870 FGaussFnList::const_iterator
871 it;
872 _for_each(it, Shells)
873 r += nCartY(it->iAngMom);
874 return r;
875 }
876
nFn() const877 size_t FMoldenBasis::nFn() const
878 {
879 size_t
880 r = 0;
881 FGaussFnList::const_iterator
882 it;
883 _for_each(it, Shells)
884 r += it->nFn();
885 return r;
886 }
887
Get(int iCenter)888 FMoldenAtom &FMoldenAtomSet::Get(int iCenter) {
889 iterator
890 itAtom = this->find(iCenter);
891 if ( itAtom == this->end() )
892 throw std::runtime_error(str(boost::format("atomic center not found: %i") % iCenter));
893 return itAtom->second;
894 }
895
operator [](int iCenter) const896 FMoldenAtom const &FMoldenAtomSet::operator [] (int iCenter) const {
897 const_iterator
898 itAtom = this->find(iCenter);
899 if ( itAtom == this->end() )
900 throw std::runtime_error(str(boost::format("atomic center not found: %i") % iCenter));
901 return itAtom->second;
902 }
903
FMoldenFile(std::istream & in,FLoadOptions const * pLoadOptions_)904 FMoldenFile::FMoldenFile(std::istream &in, FLoadOptions const *pLoadOptions_)
905 : m_InputOrbitalsAreSpherical(false), // that's the default
906 m_InputOrbitalsAreSphericalForG(false), // that's also a default---independent of the d and f functions.
907 m_AllowLargerThanG(false),
908 m_MoldenSource(MOLDENSOURCE_Molpro),
909 m_pLoadOptions(pLoadOptions_)
910 {
911 std::string
912 line;
913 get_next_line(line, in, LINE_Trim | LINE_ToLower);
914 if (!in.good() || line != "[molden format]")
915 throw FUnexpectedFormatError("expected '[Molden Format]'");
916 for (; in.good(); ) {
917 // read section header
918 if (!get_next_line(line, in, LINE_Trim | LINE_ToLower | LINE_AcceptEof))
919 break; // the end.
920 if (line == "[molpro variables]") {
921 SkipCurrentSection(in);
922 } else if (line == "[title]") {
923 // FIXME: Turbomole molden files have a "[Title]" section and Molpro
924 // files don't. Molcas files also don't. Until recently Molpro files
925 // also "[Molpro variables]", but these were removed at some point in
926 // late 2014 (and, apparently, this was also backported to 2012.1...).
927 // Not good! Need to find a better way of telling the programs apart.
928 std::string
929 title;
930 get_next_line(title, in, LINE_Trim | LINE_ToLower | LINE_KeepEmpty);
931 // files generated by orca_2mkl xxx -molden have a tag saying
932 // "created by orca_2mkl" in the title. Very helpful! So we now apply some heuristics...
933 // if there is a title and it says "Hi, I'm from Orca" we apply orca hacks, otherwise
934 // we apply Turbomole hacks.
935 if (std::string::npos != title.find("created by orca_2mkl")) {
936 m_MoldenSource = MOLDENSOURCE_Orca;
937 // std::cout << "Molden file format: Orca" << std::endl;
938 } else if (std::string::npos != title.find("molden2aim,")) {
939 // dunno... let's go with Molpro? ... yes, that seems to work.
940 m_MoldenSource = MOLDENSOURCE_Molpro;
941 } else {
942 // note: at least in my case the title is empty for files made by turbomole.
943 // but I wouldn't bet on it staying that way.
944 m_MoldenSource = MOLDENSOURCE_Turbomole;
945 // std::cout << "Molden file format: Turbomole (title='" << title << "')" << std::endl;
946 }
947 SkipCurrentSection(in);
948 } else if (line == "[atoms] angs") {
949 // std::cout << "Read atoms: ANGS" << std::endl;
950 ReadAtomsSection(in, 1./ct::ToAng);
951 } else if (line == "[atoms] au" || line == "[atoms] (au)") { // second variant used off-spec by Molcas.
952 // std::cout << "Read atoms: AU" << std::endl;
953 ReadAtomsSection(in, 1.);
954 } else if (line == "[gto] (au)") {
955 // Molcas files can be identified by the parenthesis around "au".
956 // There are not supposed to be any according to spec. Especially not after "[gto]".
957 m_MoldenSource = MOLDENSOURCE_Molcas;
958 ReadBasisSection(in);
959 } else if (line == "[gto]") {
960 ReadBasisSection(in);
961 } else if (line == "[5d]" || line == "[5d7f]" || line == "[7f]") {
962 m_InputOrbitalsAreSpherical = true;
963 // ^- WARNING: Orca emits these AFTER emitting the basis.
964 // That means that we can only fix the basis right before reading the MOs,
965 // not after reading the basis as I'd do otherwise.
966 } else if (line == "[5d10f]]") {
967 throw FMoldenParseError("Mixed cartesian and spherical input functions are not supported. Please use *either* cartesian *or* spherical functions, but not both!");
968 } else if (line == "[9g]") {
969 m_InputOrbitalsAreSphericalForG = true;
970 } else if (line == "[allow_larger_g]") {
971 // Note: This is not an "officially" sactioned tag. But there is little reason
972 // to prevent this if both IboView and the exporting program can do it, only
973 // because Molden can't.
974 // With this we will allow larger-than-g functions to be
975 // specified. For the spherical versions the order is not very
976 // involved and most likely right. For the cartesians we assume
977 // Molpro order (which for d and g functions miraculously seems
978 // to be equal to Molden order).
979 m_AllowLargerThanG = true;
980 } else if (line == "[mo]") {
981 if (m_InputOrbitalsAreSphericalForG && !m_InputOrbitalsAreSpherical)
982 throw FMoldenParseError("Input specifies spherical g functions, but does not specify spherical lower functions. Mixed cartesian and spherical functions are not supported by this loader.");
983 // (^- note: can't check the other error variant here (non-spherical g
984 // and spherical d/f), since the input might not have any g functions,
985 // thus there would not be any need to specify them as spherical)
986 FixBasis();
987 ReadMoSection(in);
988 } else if (line.find('[') == npos) {
989 throw FMoldenParseError("Expected section header, but found '" + line + "'");
990 } else
991 SkipCurrentSection(in);
992 }
993 }
994
995
IsSectionStart(std::string const & line)996 static bool IsSectionStart(std::string const &line)
997 {
998 size_t iopen = line.find('[');
999 if (iopen != npos && line.find(']',iopen+1) != npos)
1000 return true;
1001 return false;
1002 }
1003
1004
SkipCurrentSection(std::istream & in)1005 void FMoldenFile::SkipCurrentSection(std::istream &in)
1006 {
1007 // skipp content of current section. That is, everything until the next section
1008 // header (which should be indicated by a '[..]' in the line)
1009 std::string
1010 line;
1011 while (in.good()) {
1012 std::streampos pos = in.tellg();
1013 if (!get_next_line(line, in, LINE_AcceptEof))
1014 return;
1015 if (IsSectionStart(line)) {
1016 // rewind to start of line.
1017 in.seekg(pos);
1018 return;
1019 }
1020 }
1021 }
1022
AddAtom(FMoldenAtom const & A)1023 void FMoldenFile::AddAtom(FMoldenAtom const &A)
1024 {
1025 FMoldenAtomSet::iterator
1026 itAtom = Atoms.find(A.iCenter);
1027 if ( itAtom != Atoms.end() )
1028 throw FMoldenParseError(str(boost::format("center %i already assigned to another atom") % A.iCenter));
1029 Atoms.insert(itAtom, FMoldenAtomSet::value_type(A.iCenter,A));
1030 }
1031
ReadAtomsSection(std::istream & in,double fCoordFactor)1032 void FMoldenFile::ReadAtomsSection(std::istream &in, double fCoordFactor)
1033 {
1034 std::string
1035 line;
1036 while (in.good()) {
1037 std::streampos pos = in.tellg();
1038 if (!get_next_line(line, in, LINE_Trim | LINE_AcceptEof))
1039 break;
1040 if (IsSectionStart(line)) {
1041 // rewind to start of line.
1042 in.seekg(pos);
1043 break;
1044 }
1045 // expected format:
1046 // C 1 6 0.0000000000 0.0000000000 0.0000000000
1047 // ^- charge?
1048 // ^- center index (1 based)
1049 // ^- label?
1050 FMoldenAtom
1051 At;
1052 std::stringstream ss(line);
1053 ss >> At.Label >> At.iCenter >> At.iElement >> At.vPos[0] >> At.vPos[1] >> At.vPos[2];
1054 At.vPos *= fCoordFactor;
1055 // bool is_bad = ss.bad();
1056 // bool is_fail = ss.fail();
1057 // bool is_eol = ss.eof();
1058 // bool is_good = ss.good();
1059 if (ss.bad() || ss.fail())
1060 throw FMoldenParseError("Failed to understand atom declaration '" + line + "'");
1061 AddAtom(At);
1062 }
1063
1064 // make a new set of indices which is ordered continously and comes without holes.
1065 size_t
1066 iCenterOut = 0;
1067 for (FMoldenAtomSet::iterator itAtom = Atoms.begin(); itAtom != Atoms.end(); ++ itAtom) {
1068 itAtom->second.iCenterOut = iCenterOut;
1069 ++ iCenterOut;
1070 }
1071 }
1072
iAngMomFromStr(std::string const & s_)1073 int iAngMomFromStr(std::string const &s_) {
1074 std::string s(s_);
1075 str_tolower(s);
1076 if (s == "sp")
1077 throw FMoldenParseError("This parser does not supported mixed 'sp' shells. Please use a different basis set (e.g., def2 sets).");
1078 if (s.size() != 1)
1079 throw FMoldenParseError("Expected angular momentum, but found '" + s + "'.");
1080 static char const *pAngMomNames = "spdfghikl";
1081 for (int i = 0; size_t(i) < sizeof(pAngMomNames)/sizeof(pAngMomNames[0]); ++ i)
1082 if (pAngMomNames[i] == s[0])
1083 return i;
1084 throw FMoldenParseError("'" + s + "' is not a recognized angular momentum.");
1085 }
1086
OrcaRenormFactor(double fExp,unsigned l)1087 static double OrcaRenormFactor(double fExp, unsigned l)
1088 {
1089 // note: this differs from RawGaussNorm(fExp,l) by a missing DoubleFactR(2*l-1) inside the square root.
1090 // I guess they just use a different solid harmonics definition.
1091 // return pow(M_PI/(2*fExp),.75) * sqrt(1./pow(4.*fExp,l));
1092
1093 double f = pow(M_PI/(2*fExp),.75) * sqrt(1./pow(4.*fExp,l));
1094 if (l == 4) return f * std::sqrt(3); // .oO(what.)
1095 return f;
1096 }
1097
1098
ReadBasisSection(std::istream & in)1099 void FMoldenFile::ReadBasisSection(std::istream &in)
1100 {
1101 Basis.Shells.clear();
1102
1103 size_t
1104 iInputBasisOffset = 0;
1105 std::string
1106 line;
1107 while (in.good()) {
1108 std::streampos pos = in.tellg();
1109 if (!get_next_line(line, in, LINE_Trim | LINE_AcceptEof))
1110 break;
1111 if (IsSectionStart(line)) {
1112 // rewind to start of line.
1113 in.seekg(pos);
1114 break;
1115 }
1116 // expected format:
1117 // 1 0 // center index, ??? (always 0?)
1118 // s 5 1.00 // number of primitives, ??? (always 1? number of contractions?)
1119 // 0.1238401694D+04 0.5517365048D-02 // exponents and contraction coefficients.
1120 // 0.1862900499D+03 0.4108882856D-01
1121 // 0.4225117635D+02 0.1822538213D+00 // beware of fortran exponent format!! (D+... instead of E+...)
1122 // 0.1167655793D+02 0.4682845944D+00
1123 // 0.3593050648D+01 0.4457581734D+00
1124 // s 1 1.00
1125 // 0.4024514736D+00 0.1000000000D+01
1126 // s 1 1.00
1127 // 0.1309018267D+00 0.1000000000D+01
1128 // p 3 1.00
1129 // 0.9468097062D+01 0.5688833201D-01
1130 // 0.2010354514D+01 0.3129405934D+00
1131 // 0.5477100471D+00 0.7606501651D+00
1132 // p 1 1.00
1133 // 0.1526861379D+00 0.1000000000D+01
1134 // d 1 1.00
1135 // 0.8000000000D+00 0.1000000000D+01
1136 //
1137 int
1138 iCenter, iDunno;
1139 {
1140 std::stringstream
1141 str1(line);
1142 str1 >> iCenter >> iDunno;
1143 }
1144 while (in.good()) {
1145 if (!get_next_line(line, in, LINE_Trim | LINE_KeepEmpty | LINE_AcceptEof))
1146 break;
1147 if (line.empty())
1148 break; // end of basis functions for current center.
1149 FMoldenGaussShell
1150 Fn = FMoldenGaussShell(),
1151 *pFn = &Fn;
1152 {
1153 std::stringstream
1154 str2(line);
1155 std::string
1156 sAngMom, Dummy;
1157 str2 >> sAngMom >> pFn->nExp >> Dummy;
1158 pFn->iAngMom = iAngMomFromStr(sAngMom);
1159 // if (pFn->iAngMom >= 5 && !m_AllowLargerThanG)
1160 // throw FMoldenParseError("Molden itself does not support larger-than-g functions. You can ask this program to load them anyway by adding [allow_larger_g] as section to the molden input file.");
1161 // if (pFn->iAngMom == 4 && m_InputOrbitalsAreSpherical && !m_InputOrbitalsAreSphericalForG)
1162 // throw FMoldenParseError("Mixed spherical and cartesian functions are not supported. (g was specified as default cartesian, while df were defined as spherical).");
1163 }
1164 pFn->nCo = 1; // <- I don't think this can handle general contractions, or can it?
1165 pFn->Exp.resize(pFn->nExp);
1166 pFn->Co.resize(pFn->nCo * pFn->nExp);
1167 for (uint iExp = 0; iExp < pFn->nExp; ++ iExp) {
1168 get_next_line(line, in, LINE_Trim | LINE_KeepEmpty);
1169 replace_fortran_exponents(line);
1170 std::stringstream str3(line);
1171 str3 >> pFn->Exp[iExp] >> pFn->Co[iExp];
1172 }
1173
1174 // orca exports contraction coefficients with respect to raw primitive Gaussians,
1175 // not normalized primitive Gaussians. So we need to fix up the coefficients before converting them back.
1176 // WARNING: at this moment we do not yet know if or if not the functions are spherical!
1177 if (m_MoldenSource == MOLDENSOURCE_Orca) {
1178 for (uint iExp = 0; iExp < pFn->nExp; ++ iExp)
1179 pFn->Co[iExp] *= OrcaRenormFactor(pFn->Exp[iExp], int(pFn->iAngMom));
1180 }
1181
1182 FMoldenAtomSet::iterator
1183 itAt = Atoms.find(iCenter);
1184 if (itAt == Atoms.end())
1185 throw FMoldenParseError("Encountered basis function referring to non-declared center index.");
1186 pFn->iCenter = iCenter;
1187 // pFn->Spherical = m_InputOrbitalsAreSpherical;
1188 // pFn->iOffsetIn = iInputBasisOffset;
1189 iInputBasisOffset += pFn->nFn();
1190 Basis.Shells.push_back(*pFn);
1191 // Basis.Shells.push_back(FGaussShell(pFn, iCenter, itAt->second.vPos));
1192 }
1193 }
1194
1195 // Basis.Sort();
1196 // ^- moved to extra function since data on whether basis is spherical or not
1197 // was may or may not have already been given at this point.
1198 }
1199
FixBasis()1200 void FMoldenFile::FixBasis()
1201 {
1202 if (Basis.Sorted())
1203 throw FMoldenParseError("called FixBasis more than once. Something went wrong.");
1204
1205 // make input order and check for spherical vs cartesian functions...
1206 size_t
1207 iInputBasisOffset = 0;
1208
1209 for (size_t iSh = 0; iSh < Basis.Shells.size(); ++ iSh) {
1210 FMoldenGaussShell
1211 *pFn = &Basis.Shells[iSh];
1212 {
1213 if (pFn->iAngMom >= 5 && !m_AllowLargerThanG)
1214 throw FMoldenParseError("Molden itself does not support larger-than-g functions. You can ask this program to load them anyway by adding [allow_larger_g] as section to the molden input file.");
1215 if (pFn->iAngMom == 4 && m_InputOrbitalsAreSpherical && !m_InputOrbitalsAreSphericalForG)
1216 throw FMoldenParseError("Mixed spherical and cartesian functions are not supported. (g was specified as default cartesian, while df were defined as spherical).");
1217 }
1218
1219 pFn->Spherical = m_InputOrbitalsAreSpherical;
1220 pFn->iOffsetIn = iInputBasisOffset;
1221 iInputBasisOffset += pFn->nFn();
1222 }
1223
1224 Basis.Sort();
1225 }
1226
1227
operator <(FMoldenGaussShell const & other) const1228 bool FMoldenGaussShell::operator < (FMoldenGaussShell const &other) const
1229 {
1230 if (iCenter < other.iCenter) return true;
1231 if (other.iCenter < iCenter) return false;
1232 if (iAngMom < other.iAngMom) return true;
1233 if (other.iAngMom < iAngMom) return false;
1234 return false;
1235 }
1236
1237
Sort()1238 void FMoldenBasis::Sort()
1239 {
1240 if (Sorted())
1241 throw FMoldenParseError("called FMoldenBasis::Sort more than once. Something went wrong.");
1242
1243 size_t
1244 nAo = nFn(); // may be spherical or cartesian or mixed at this point.
1245 m_InputToOutputOrder.clear();
1246 m_InputToOutputOrder.resize(nAo);
1247
1248 // sort basis shells such that they are ordered as the atoms are.
1249 std::stable_sort(Shells.begin(), Shells.end());
1250
1251 size_t
1252 iOffsetOut = 0;
1253 // assign new positions to the basis functions.
1254 for (size_t iSh = 0; iSh < Shells.size(); ++ iSh) {
1255 FMoldenGaussShell
1256 &Sh = Shells[iSh];
1257 size_t
1258 nShFn = Sh.nFn();
1259 for (size_t iShFn = 0; iShFn < nShFn; ++ iShFn)
1260 m_InputToOutputOrder[Sh.iOffsetIn + iShFn] = iOffsetOut + iShFn;
1261 iOffsetOut += nShFn;
1262 }
1263 if (0) {
1264 std::cout << "Basis function order:" << std::endl;
1265 for (size_t i = 0; i < nAo; ++ i) {
1266 std::cout << boost::format(" In[%4i] -> internal [%4i]") % i % m_InputToOutputOrder[i] << std::endl;
1267 }
1268 }
1269 m_IsSorted = true;
1270 }
1271
DoubleFactR(ptrdiff_t l)1272 static size_t DoubleFactR(ptrdiff_t l) {
1273 if (l <= 1)
1274 return 1;
1275 else
1276 return l * DoubleFactR(l - 2);
1277 }
1278
1279
ReadMoSection(std::istream & in)1280 void FMoldenFile::ReadMoSection(std::istream &in)
1281 {
1282 if (!Basis.Sorted())
1283 throw FMoldenParseError("Basis not sorted on entry of ReadMoSection.");
1284
1285 TArray<size_t>
1286 ls;
1287 ls.reserve(Basis.Shells.size());
1288 // make a list of the angular momenta in the basis (need that for converting MO coeffs
1289 // between cartesian orders and cartesians/sphericals).
1290 for (size_t iSh = 0; iSh < Basis.Shells.size(); ++ iSh)
1291 ls.push_back(size_t(Basis.Shells[iSh].iAngMom));
1292 if (0) {
1293 std::cout << "Output l order:" << std::endl;
1294 for (size_t iSh = 0; iSh < Basis.Shells.size(); ++ iSh) {
1295 std::cout << boost::format(" Sh[%3i] -> l = %i") % iSh % Basis.Shells[iSh].iAngMom << std::endl;
1296 }
1297 }
1298
1299 // these ones are required for program-specific hacks (note: both this
1300 // and ls are in output order! but functions are NOT yet converted to
1301 // spherical (unless they already were spherical in the input basis)).
1302 TArray<FMoldenGaussShell*>
1303 pShellForBf;
1304 pShellForBf.reserve(Basis.nFn());
1305 for (size_t iSh = 0; iSh < Basis.Shells.size(); ++ iSh)
1306 for (size_t iFn = 0; iFn < Basis.Shells[iSh].nFn(); ++ iFn)
1307 pShellForBf.push_back(&Basis.Shells[iSh]);
1308 assert(pShellForBf.size() == Basis.nFn());
1309
1310 // On the input format:
1311 // the data format is rather... confusing. There is no discernable separator
1312 // between the data for the different MOs, and orbital data and variables
1313 // occur intermixed. Additionally, data looks sparse: there is no
1314 // strict reasons why there actually *IS* data for a MO.
1315 // Molpro puts a variable section before each individual
1316 // MO, and the variables can be detected by containing an assignment sign or
1317 // having a space as first character. I do not know if this is general,
1318 // however. Here I assume that:
1319 // - there is one variable section before each MO
1320 // - each orbital contains at least one data line (otherwise it can't
1321 // be normalized)
1322 FScalarData
1323 // currently read orbital -- still in cartesian coordinates!
1324 Row;
1325 size_t
1326 nFnCart = Basis.nFnCart(),
1327 nFnSph = Basis.nFnSph();
1328 if (m_InputOrbitalsAreSpherical)
1329 Row.resize(nFnSph);
1330 else
1331 Row.resize(nFnCart);
1332 // FMoldenOrbInfo
1333 // CurrentInfo;
1334 std::string
1335 line;
1336 for ( ; ; ) {
1337 std::istream &in_file = in;
1338 std::streampos pos = in_file.tellg();
1339 if (!get_next_line(line, in_file, LINE_Trim | LINE_AcceptEof))
1340 break;
1341 if (IsSectionStart(line)) {
1342 // rewind to start of line.
1343 in_file.seekg(pos);
1344 break;
1345 }
1346 in_file.seekg(pos);
1347 // std::stringstream in(line);
1348 // std::cout << boost::format(" ...orb-line: '%s'") % line << std::endl;
1349
1350 FOrbitalInfoPtr
1351 pInfo = new FOrbitalInfo;
1352
1353 // clear data row for the current MO.
1354 Row.clear_data();
1355
1356 if ( !in.good() )
1357 throw FMoldenParseError("Expected variables, but found '?'.");
1358
1359 // read variables
1360 std::string
1361 Key, Val;
1362 for ( ; ; ) {
1363 // read variable lines
1364 std::streampos
1365 iLast = in.tellg();
1366 // in >> Key >> Val;
1367 if (!get_next_line(line, in_file, LINE_Trim | LINE_AcceptEof))
1368 break;
1369 size_t
1370 iEquals = line.find('=');
1371 // std::cout << boost::format(" ...k: '%s' v: '%s' ok? %i") % Key % Val % iEquals << std::endl;
1372 if ( !in.good() || iEquals == npos ) {
1373 // that's not a variable line.
1374 in.seekg(iLast, std::ios::beg);
1375 break;
1376 }
1377 Key = line.substr(0, iEquals);
1378 trim(Key);
1379 Val = line.substr(iEquals+1);
1380 trim(Val);
1381 // std::cout << boost::format(" ...k: '%s' v: '%s'") % Key % Val << std::endl;
1382 // if ( CurrentInfo.Map.find(Key) != CurrentInfo.Map.end() )
1383 // throw FMoldenParseError(str(format("Variable '%s' already assigned while parsing MO %i.") % Key % OrbRows.size()));
1384 // CurrentInfo.Map[Key] = Val;
1385 str_lower_and_trim(Key);
1386 if (Key == "sym")
1387 pInfo->sDesc = Val;
1388 else if (Key == "ene")
1389 pInfo->fEnergy = read_fortran_float(Val);
1390 else if (Key == "spin") {
1391 pInfo->Spin = ORBSPIN_Unknown;
1392 str_lower_and_trim(Val);
1393 if (Val == "alpha")
1394 // this is also used for spin free orbitals!! Molden does not
1395 // distinguish them (which means that we can't read files with
1396 // all orbitals open-shell alpha, since the file format does not
1397 // allow for separating them from all-orbitals closed-shell)
1398 pInfo->Spin = ORBSPIN_Alpha;
1399 // pInfo->Spin = ORBSPIN_Alpha; // guess from occupation.
1400 if (Val == "beta")
1401 pInfo->Spin = ORBSPIN_Beta;
1402 } else if (Key == "occup") {
1403 pInfo->fOcc = read_fortran_float(Val);
1404 } else {
1405 // not recognized. Should we warn?
1406 // std::cout << boost::format(" ...unrecognized: '%s' = '%s' ") % Key % Val << std::endl;
1407 }
1408 }
1409
1410 if ( !in.good() )
1411 throw FMoldenParseError("Expected data, but found '?' while parsing molden file.");
1412
1413 for ( ; ; ) {
1414 // read data lines
1415 std::streampos
1416 iLast = in.tellg();
1417 size_t
1418 iBf;
1419 double
1420 fData;
1421 std::string
1422 sData;
1423 in >> iBf >> sData;
1424
1425 iBf -= 1; // translate 1-based indexing to 0-based
1426 fData = read_fortran_float(sData);
1427 if ( !in.good() ) {
1428 // that's not a data line.
1429 // in.clear(std::ios::failbit);
1430 // ^- interesting fact: stream::clear(bit) does not *clear* the given bit,
1431 // but rather *sets* the stream state to the given bit. I guess, given the
1432 // rest of the C++ stream API, this was to be expected. I guess the real mistake
1433 // was trying to use it.
1434 in.clear();
1435 in.seekg(iLast, std::ios::beg);
1436 // bool is_bad = in.bad();
1437 // bool is_fail = in.fail();
1438 // bool is_eol = in.eof();
1439 // bool is_good = in.good();
1440 // std::string bla;
1441 // in >> bla;
1442 break;
1443 }
1444 // std::cout << boost::format(" ...iAo: '%s' sData: '%s' fData: %12.6f") % iBf % sData % fData << std::endl;
1445 if ( iBf >= Row.size() )
1446 throw FMoldenParseError(str(format("AO index '%i' too large while parsing MO %i.") % (1+iBf) % OrbInfo.size()));
1447 iBf = Basis.m_InputToOutputOrder[iBf];
1448 if ( Row[iBf] != 0 )
1449 throw FMoldenParseError(str(format("AO component Orb[%i,%i] already assigned before.") % (1+iBf) % OrbInfo.size()));
1450
1451 if (m_MoldenSource == MOLDENSOURCE_Turbomole) {
1452 size_t l = pShellForBf[iBf]->iAngMom;
1453 fData *= std::sqrt(DoubleFactR(2*l-1));
1454 }
1455
1456 Row[iBf] = fData;
1457 }
1458 // std::cout << "Reading file. Input spherical? " << m_InputOrbitalsAreSpherical << " Input from Orca?" << (m_MoldenSource == MOLDENSOURCE_Orca) << std::endl;
1459 // re-order/convert and store the row
1460 if (m_InputOrbitalsAreSpherical) {
1461 assert(Row.size() == nFnSph);
1462
1463 if (m_MoldenSource == MOLDENSOURCE_Orca) {
1464 // orca seems to use a definition of solid harmonics which differs by some -1 factors
1465 // for higher m. I am not sure if this is quite right...
1466 double *pOrb = &Row[0];
1467 for (size_t i = 0; i < ls.size(); ++ i) {
1468 int l = ls[i];
1469 assert_rt(l == pShellForBf[pOrb-&Row[0]]->iAngMom);
1470 if (l == 3) { // f
1471 for (size_t ic = 5; ic < 7; ++ ic)
1472 pOrb[ic] *= -1.;
1473 }
1474 if (l == 4) { // g a b b c c d d e e
1475 bool switch_this[9] = {0,0,0,0,0,1,1,1,1};
1476 for (size_t ic = 0; ic < 9; ++ ic)
1477 if (switch_this[ic])
1478 pOrb[ic] *= -1.;
1479 // std::swap(pOrb[8],pOrb[7]);
1480 // std::swap(pOrb[6],pOrb[5]);
1481 // std::swap(pOrb[4],pOrb[3]);
1482 // std::swap(pOrb[2],pOrb[1]);
1483 // doesn't work :(.
1484 // ...yes, apart from the phase thing, there is also a mystery sqrt(3) factor in the g functions (see OrcaRenormFactor())
1485 // It is running now. I don't even want to know.
1486 }
1487 pOrb += size_t(2*l + 1);
1488
1489 if (l >= 5)
1490 throw FMoldenParseError("Sorry, currently only up to 'g' functions are supported for import from ORCA. This input has a higher harmonic.");
1491 }
1492 }
1493
1494 ctimp::Vec_ShMolden2ShMolpro(&Row[0], &ls[0], ls.size());
1495 pInfo->Orb = Row;
1496 } else {
1497 assert(Row.size() == nFnCart);
1498 ctimp::Vec_CaMolden2CaMolpro(&Row[0], &ls[0], ls.size());
1499 if (m_MoldenSource == MOLDENSOURCE_Orca || m_MoldenSource == MOLDENSOURCE_Molcas)
1500 // unlike Molpro, which always converts to Cartesian for export, these programs export spherical functions.
1501 // So if we encounter a Cartesian .molden file, it most likely means that the actual computational basis
1502 // was Cartesian.
1503 throw FMoldenParseError("Sorry, this program does not support Cartesian basis functions. Please use a spherical harmonic basis (e.g., def2-* or cc-pVnZ bases)");
1504
1505 // if (m_MoldenSource == MOLDENSOURCE_Orca) {
1506 // std::cout << "RENORMALIZING STUFF!!" << std::endl;
1507 // ctimp::Vec_Ca2Ca_XSqrtNorm(&Row[0], &ls[0], ls.size());
1508 // }
1509 pInfo->Orb.resize(nFnSph);
1510 // ctimp::Vec_Ca2Ca_XSqrtNorm(&Row[0], &ls[0], ls.size());
1511 ctimp::Vec_Ca2Sh(&pInfo->Orb[0], &Row[0], &ls[0], ls.size());
1512 }
1513
1514 if (m_pLoadOptions && m_pLoadOptions->SkipVirtuals() && pInfo->fOcc < 0.00001)
1515 continue;
1516 OrbInfo.push_back(pInfo);
1517 }
1518 // std::cout << boost::format(" ...read %i MOs from file.") % OrbInfo.size() << std::endl;
1519
1520 FixOrbitalTypes();
1521 }
1522
FixOrbitalTypes()1523 void FMoldenFile::FixOrbitalTypes()
1524 {
1525 FixOrbitalTypes1(OrbInfo, ORBSOURCE_Molden);
1526 }
1527
1528 // void FMoldenFile::FixOrbitalTypes()
1529 // {
1530 // size_t
1531 // nAlpha = 0,
1532 // nBeta = 0,
1533 // nClosed = 0,
1534 // nOther = 0;
1535 // FOrbInfoList::iterator
1536 // it;
1537 // for (it = OrbInfo.begin(); it != OrbInfo.end(); ++ it) {
1538 // switch((*it)->Spin) {
1539 // case ORBSPIN_Alpha: nAlpha += 1; continue;
1540 // case ORBSPIN_Beta: nBeta += 1; continue;
1541 // case ORBSPIN_SpinFree: nClosed += 1; continue;
1542 // default: nOther += 1; continue;
1543 // }
1544 // }
1545 //
1546 // // there are *only* Alpha orbitals. Most likely we got something R(o)HFy.
1547 // // Adjust spins based on occupation numbers.
1548 // if (nBeta == 0 && nOther == 0 && nClosed == 0) {
1549 // for (it = OrbInfo.begin(); it != OrbInfo.end(); ++ it) {
1550 // double fOcc = (*it)->fOcc;
1551 // if (fOcc == 2. || fOcc == 0.)
1552 // (*it)->Spin = ORBSPIN_SpinFree;
1553 // }
1554 // }
1555 // }
1556
1557
1558
1559 // struct FMoldenGaussShell
1560 // {
1561 // ct::TArray<double>
1562 // Co, // nExp x nCo contraction matrix
1563 // Exp; // nExp primitive exponents
1564 // size_t
1565 // nExp,
1566 // nCo;
1567 // int
1568 // iAngMom,
1569 // iCenter;
1570 // // FMoldenGaussShell() {};
1571 // };
1572
1573
1574 template <class FSequence>
CmpArrays(FSequence const & A,FSequence const & B)1575 int CmpArrays(FSequence const &A, FSequence const &B)
1576 {
1577 size_t
1578 nA = A.size(),
1579 nB = B.size();
1580 if (nA < nB) return -1;
1581 if (nB < nA) return +1;
1582 typename FSequence::const_iterator
1583 itA = A.begin(),
1584 itB = B.begin();
1585 for ( ; itA != A.end(); ) {
1586 if (*itA < *itB) return -1;
1587 if (*itB < *itA) return +1;
1588 ++ itA;
1589 ++ itB;
1590 }
1591 return 0;
1592 }
1593
1594 // compare two IR gauss shell objects, in order to find equivalent ones on different atoms.
1595 struct FGaussFnCmp
1596 {
operator ()orbital_file::molden_file::FGaussFnCmp1597 bool operator () (ct::FAtomShellPtr const &pA, ct::FAtomShellPtr const &pB) {
1598 if (pA->AngMom < pB->AngMom) return true;
1599 if (pB->AngMom < pA->AngMom) return false;
1600 int iCmp;
1601 iCmp = CmpArrays(pA->Exponents, pB->Exponents);
1602 if (iCmp < 0) return true;
1603 if (iCmp > 0) return false;
1604 iCmp = CmpArrays(pA->CoMatrix, pB->CoMatrix);
1605 if (iCmp < 0) return true;
1606 if (iCmp > 0) return false;
1607 return false;
1608 }
1609 };
1610
1611
Convert()1612 ct::FAtomSetPtr FMoldenAtomSet::Convert()
1613 {
1614 ct::FAtomSetPtr
1615 pAtoms = new ct::FAtomSet();
1616 // it's ordered in iCenterOut order.
1617 for (FMoldenAtomSet::iterator itAtom = this->begin(); itAtom != this->end(); ++ itAtom) {
1618 FMoldenAtom
1619 &At = itAtom->second;
1620 // note: technically we also have element names from input... (both labels and numbers).
1621 // I guess the numbers might be safer? Because programs might use strange pre/suffices or
1622 // spellings of elements...
1623 pAtoms->Atoms.push_back(ct::FAtom(At.vPos, ct::ElementNameFromNumber(At.iElement), "(external)"));
1624 }
1625 return pAtoms;
1626 }
1627
Convert(FMoldenAtomSet * pMoldenAtomSet)1628 ct::FBasisSetPtr FMoldenBasis::Convert(FMoldenAtomSet *pMoldenAtomSet)
1629 {
1630 typedef std::set<ct::FAtomShellPtr, FGaussFnCmp>
1631 FAtomShellSet;
1632 FAtomShellSet
1633 AtomShells;
1634 std::vector<ct::FBasisShell>
1635 NewShells;
1636 NewShells.reserve(Shells.size());
1637
1638 // Go through the molden shells. At this point they should already have been ordered
1639 // by center and angular momentum, so we just need to join them and fix the output
1640 // atom indices.
1641 size_t
1642 iShBeg = 0, iShEnd;
1643 for ( ; iShBeg != Shells.size(); iShBeg = iShEnd) {
1644 // find all shells on the same center with the same AngMom
1645 iShEnd = iShBeg + 1;
1646 while (iShEnd != Shells.size()
1647 && Shells[iShEnd].iCenter == Shells[iShBeg].iCenter
1648 && Shells[iShEnd].iAngMom == Shells[iShBeg].iAngMom)
1649 iShEnd += 1;
1650 // make a sorted list of all unique exponents. We here assume that exponents come
1651 // out at the same binary version of IEEE doubles if they are equivalent (should be fine)
1652 typedef std::map<double, size_t>
1653 FExpMap;
1654 FExpMap
1655 // maps negative of exponent to exponent index.
1656 // (negative such that we get the large ones first)
1657 ExpMap;
1658 for (size_t iSh = iShBeg; iSh != iShEnd; ++ iSh)
1659 for (size_t iExp = 0; iExp < Shells[iSh].Exp.size(); ++ iExp)
1660 ExpMap[-Shells[iSh].Exp[iExp]] = 0;
1661 // assign exponent indices and make a single ordered list of exponents
1662 TArray<double>
1663 Exps;
1664 size_t
1665 nExp = ExpMap.size();
1666 Exps.reserve(nExp);
1667 for (FExpMap::iterator it = ExpMap.begin(); it != ExpMap.end(); ++ it) {
1668 it->second = Exps.size();
1669 Exps.push_back(-it->first);
1670 }
1671 // make a constraction matrix.
1672 size_t
1673 nCo = iShEnd - iShBeg;
1674 TArray<double>
1675 Cos;
1676 Cos.resize(nExp * nCo);
1677 Cos.clear_data();
1678 for (size_t iSh = iShBeg; iSh != iShEnd; ++ iSh) {
1679 size_t
1680 iCo = iSh - iShBeg;
1681 FMoldenGaussShell
1682 &Sh = Shells[iSh];
1683 for (size_t iExp = 0; iExp < Sh.Exp.size(); ++ iExp) {
1684 // find the index of the current exponent in the output exponent list.
1685 FExpMap::const_iterator
1686 itExp = ExpMap.find(-Sh.Exp[iExp]);
1687 if (itExp == ExpMap.end())
1688 throw FMoldenParseError("Internal indexing error in re-assembly of imported basis shells.");
1689 // and add the current coefficient to it (technically an exponent might occur multiple
1690 // times, this is why we add instead of replacing)
1691 Cos[nExp * iCo + itExp->second] += Sh.Co[iExp];
1692 }
1693 }
1694
1695 // make a CT/IR shell object
1696 ct::FAtomShellPtr
1697 pIrSh = new ct::FAtomShell(unsigned(Shells[iShBeg].iAngMom), &Exps[0], unsigned(Exps.size()), &Cos[0], unsigned(nCo), 0);
1698 // check if we had one of those things already before.
1699 std::pair<FAtomShellSet::iterator,bool>
1700 itInsResult = AtomShells.insert(pIrSh);
1701 if (itInsResult.second)
1702 // take the previous one.
1703 pIrSh = *itInsResult.first;
1704
1705 // now make a single BasisShell object referring to the generally contracted atom shell.
1706 FMoldenAtom
1707 &At = pMoldenAtomSet->Get(Shells[iShBeg].iCenter);
1708 NewShells.push_back(ct::FBasisShell(At.vPos, At.iCenterOut, pIrSh));
1709
1710 }
1711 return ct::FBasisSetPtr(new ct::FBasisSet(&NewShells[0], NewShells.size(), ct::BASIS_Orbital, "(external)"));
1712 }
1713
1714
LoadMoldenFile(std::string const & FileName,FLoadOptions const & LoadOptions)1715 FMolproXmlDataPtr LoadMoldenFile(std::string const &FileName, FLoadOptions const &LoadOptions)
1716 {
1717 FMolproXmlDataPtr
1718 pXmlData = new FMolproXmlData();
1719 std::ifstream
1720 in(FileName.c_str(), std::ifstream::in | std::ifstream::binary);
1721 // ^- @binary: the files are TEXT files, but otherwise we get line ending hazard,
1722 // such that in windows we cannot read lines which are terminated by lf only, not cr lf.
1723 // There may be better hacks around this (see
1724 // http://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf),
1725 // but so far I have not been overly impressed with the suggestions. So hack for now: read as
1726 // binary and discard any \r's occuring anywhere.
1727 // I guess one of the savest things to do would be to load the entire file, check if there
1728 // are any \n's at all, if yes: replace all \r's by nothing, if not: replace all \r's by \n's.
1729 // But that is also not intensely pretty.
1730 if (!in.good())
1731 throw FMoldenParseError("File load failed.");
1732 FMoldenFile
1733 MoldenData(in, &LoadOptions);
1734
1735 // convert to output format.
1736 pXmlData->pAtoms = MoldenData.Atoms.Convert();
1737 pXmlData->pBasisOrb = MoldenData.Basis.Convert(&MoldenData.Atoms);
1738 pXmlData->pOrbSet = new FOrbitalSet();
1739 FOrbitalSet::FOrbitalInfoList
1740 &OrbInfos = pXmlData->pOrbSet->OrbInfos;
1741 OrbInfos.reserve(MoldenData.OrbInfo.size());
1742 for (size_t iOrb = 0; iOrb < MoldenData.OrbInfo.size(); ++ iOrb) {
1743 OrbInfos.push_back(MoldenData.OrbInfo[iOrb]);
1744 FOrbitalInfoPtr &oi = OrbInfos.back();
1745 oi->pBasisSet = pXmlData->pBasisOrb;
1746 // oi->sDesc = boost::str(format("%4i.%1i") % (1+iOrb) % oi.iSym);
1747
1748 }
1749 return pXmlData;
1750 }
1751
1752
1753 } // namespace molden_file
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767 // int main(int argc, char *argv[])
1768 // {
1769 // using namespace molpro_xml;
1770 // FMolproXmlDataPtr pXmlData = LoadMolproXmlFile("test1.xml");
1771 // TestOrbitals(pXmlData->pBasisOrb, pXmlData->pAtoms, pXmlData->pOrbSet);
1772 // };
1773
1774 // int main(int argc, char *argv[])
1775 // {
1776 // using namespace molpro_xml;
1777 // pugi::xml_document doc;
1778 // pugi::xml_parse_result result = doc.load_file("test1.xml");
1779 //
1780 // std::cout << "XML Load result: " << result.description() << std::endl;
1781 //
1782 //
1783 // pugi::xml_node MoleculeNode = doc.child("molpro").child("molecule");
1784 // // EnumerateChildren(std::cout, MoleculeNode);
1785 // pugi::xml_node GeometryNode = MoleculeNode.child("cml:atomArray");
1786 // // EnumerateChildren(std::cout, GeometryNode);
1787 // FAtomSetPtr
1788 // pAtoms = LoadGeometry(GeometryNode);
1789 // // std::cout << *pAtoms;
1790 //
1791 // pugi::xml_node OrbBasisNode = MoleculeNode.find_child_by_attribute("basisSet", "id", "ORBITAL");
1792 // // EnumerateChildren(std::cout, OrbBasisNode);
1793 //
1794 // ct::FBasisSetPtr
1795 // pBasisOrb = LoadBasisSet(OrbBasisNode, *pAtoms);
1796 // // std::cout << *pBasisOrb;
1797 //
1798 // pugi::xml_node
1799 // OrbitalsNode = MoleculeNode.child("orbitals");
1800 // // EnumerateChildren(std::cout, OrbitalsNode);
1801 // FOrbitalSetPtr
1802 // pOrbSet = LoadOrbitals(OrbitalsNode, pBasisOrb);
1803 // // do I still need to normalize the functions?
1804 //
1805 // // TestOrbitals(pBasisOrb, pAtoms, pOrbSet);
1806 //
1807 //
1808 //
1809 //
1810 // // std::cout << doc.child("molpro").child("molecule").child("cml:symmetry").value() << std::endl;
1811 // // for (pugi::xml_node tool = doc.child("Tool"); tool; tool = tool.next_sibling("Tool"))
1812 // // {
1813 // // std::cout << "Tool " << tool.attribute("Filename").value();
1814 // // std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool();
1815 // // std::cout << ", Timeout " << tool.attribute("Timeout").as_int();
1816 // // std::cout << ", Description '" << tool.child_value("Description") << "'\n";
1817 // // }
1818 //
1819 //
1820 // // std::cout << doc.child("mesh").child("molecule").child("cml:symmetry").value() << std::endl;
1821 // // for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
1822 // // {
1823 // // std::cout << "Tool " << tool.attribute("Filename").value();
1824 // // std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool();
1825 // // std::cout << ", Timeout " << tool.attribute("Timeout").as_int();
1826 // // std::cout << ", Description '" << tool.child_value("Description") << "'\n";
1827 // // }
1828 // // for (pugi::xml_node tool = tools.child("Tool"); tool; tool = tool.next_sibling("Tool"))
1829 // // {
1830 // // std::cout << "Tool " << tool.attribute("Filename").value();
1831 // // std::cout << ": AllowRemote " << tool.attribute("AllowRemote").as_bool();
1832 // // std::cout << ", Timeout " << tool.attribute("Timeout").as_int();
1833 // // std::cout << ", Description '" << tool.child_value("Description") << "'\n";
1834 // // }
1835 //
1836 // // << ", mesh name: " << doc.child("mesh").attribute("name").value() << std::endl;
1837 // }
1838
1839
LoadOrbitalFile(std::string const & FileName,FLoadOptions const & LoadOptions)1840 FMolproXmlDataPtr LoadOrbitalFile(std::string const &FileName, FLoadOptions const &LoadOptions)
1841 {
1842 try {
1843 if (ends_with(FileName, ".xml"))
1844 return molpro_xml::LoadMolproXmlFile(FileName, LoadOptions);
1845 std::string
1846 FirstLine;
1847 {
1848 std::ifstream
1849 in(FileName.c_str());
1850 if (!get_next_line(FirstLine, in, LINE_Trim | LINE_ToLower | LINE_AcceptEof))
1851 FirstLine = "!2empty44";
1852 }
1853 if (ends_with(FileName, ".molden") || ends_with(FileName, ".molden.input") || FirstLine == "[molden format]")
1854 return molden_file::LoadMoldenFile(FileName, LoadOptions);
1855
1856 throw FFileTypeUnrecognizedError("Failed to recognize file extension.");
1857 } catch (FFileTypeUnrecognizedError &e) {
1858 throw FFileTypeUnrecognizedError(e.what(), FileName);
1859 } catch (FFileLoadError &e) {
1860 throw FFileLoadError(e.what(), FileName);
1861 }
1862 }
1863
1864
1865 } // namespace orbital_file
1866