1/*
2 *  Copyright (C) 2004-2021 Edward F. Valeev
3 *
4 *  This file is part of Libint.
5 *
6 *  Libint is free software: you can redistribute it and/or modify
7 *  it under the terms of the GNU Lesser General Public License as published by
8 *  the Free Software Foundation, either version 3 of the License, or
9 *  (at your option) any later version.
10 *
11 *  Libint is distributed in the hope that it will be useful,
12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 *  GNU Lesser General Public License for more details.
15 *
16 *  You should have received a copy of the GNU Lesser General Public License
17 *  along with Libint.  If not, see <http://www.gnu.org/licenses/>.
18 *
19 */
20
21#ifndef _libint2_src_lib_libint_basis_h_
22#define _libint2_src_lib_libint_basis_h_
23
24#include <libint2/util/cxxstd.h>
25#if LIBINT2_CPLUSPLUS_STD < 2011
26# error "libint2/basis.h requires C++11 support"
27#endif
28#include <libint2/cxxapi.h>
29
30#include <cerrno>
31#include <iostream>
32#include <fstream>
33#include <locale>
34#include <vector>
35#include <stdexcept>
36
37#include <sys/types.h>
38#include <sys/stat.h>
39#include <unistd.h>
40
41#include <libint2.h>
42#include <libint2/shell.h>
43#include <libint2/atom.h>
44
45namespace libint2 {
46
47  /// Computes the number of basis functions in a range of shells
48  /// @tparam a range type
49  /// @param[in] shells a sequence of shells
50  /// @return the number of basis functions
51  template <typename ShellRange> size_t nbf(ShellRange && shells) {
52    size_t n = 0;
53    for (const auto& shell: std::forward<ShellRange>(shells))
54      n += shell.size();
55    return n;
56  }
57
58  /// Computes the maximum number of primitives in any Shell among a range of shells
59  /// @tparam a range type
60  /// @param[in] shells a sequence of shells
61  /// @return the maximum number of primitives
62  template <typename ShellRange> size_t max_nprim(ShellRange && shells) {
63    size_t n = 0;
64    for (auto shell: std::forward<ShellRange>(shells))
65      n = std::max(shell.nprim(), n);
66    return n;
67  }
68
69  /// Computes the maximum angular momentum quantum number @c l in any Shell among a range of shells
70  /// @tparam a range type
71  /// @param[in] shells a sequence of shells
72  /// @return the maximum angular momentum
73  template <typename ShellRange> int max_l(ShellRange && shells) {
74    int l = 0;
75    for (auto shell: std::forward<ShellRange>(shells))
76      for (auto c: shell.contr)
77        l = std::max(c.l, l);
78    return l;
79  }
80
81  /// BasisSet is a slightly decorated \c std::vector of \c libint2::Shell objects.
82  class BasisSet : public std::vector<libint2::Shell> {
83    public:
84      BasisSet() : name_(""), nbf_(-1), max_nprim_(0), max_l_(-1) {}
85      BasisSet(const BasisSet&) = default;
86      BasisSet(BasisSet&& other) :
87        std::vector<libint2::Shell>(std::move(other)),
88        name_(std::move(other.name_)),
89        nbf_(other.nbf_),
90        max_nprim_(other.max_nprim_),
91        max_l_(other.max_l_),
92        shell2bf_(std::move(other.shell2bf_))
93      {
94      }
95      ~BasisSet() = default;
96      BasisSet& operator=(const BasisSet&) = default;
97
98      /// @brief Construct from the basis set name and a vector of atoms.
99
100      /**
101       * @param[in] name the basis set name
102       * @param[in] atoms \c std::vector of Atom objects
103       * @param[in] throw_if_no_match If true, and the basis is not found for this atomic number, throw a std::logic_error.
104       *       Otherwise omit the basis quietly.
105       * @throw std::logic_error if throw_if_no_match is true and for at least one atom no matching basis is found.
106       * @throw std::ios_base::failure if throw_if_no_match is true and the basis file could not be read
107       * \note All instances of the same chemical element receive the same basis set.
108       * \note \c name will be "canonicalized" using BasisSet::canonicalize(name) to
109       *       produce the file name where the basis will be sought. This file needs to contain
110       *       the basis set definition in Gaussian94 format (see \c lib/basis directory for examples).
111       *       The expected location of the file is determined by BasisSet::data_path as follows:
112       *       <ol>
113       *         <li> specified by LIBINT_DATA_PATH environmental variable, if defined </li>
114       *         <li> specified by DATADIR macro variable, if defined </li>
115       *         <li> specified by SRCDATADIR macro variable, if defined </li>
116       *         <li> hardwired to directory \c @DATADIR_ABSOLUTE@/basis </li>
117       *       </ol>
118       */
119      BasisSet(std::string name,
120               const std::vector<Atom>& atoms,
121               const bool throw_if_no_match = false) : name_(std::move(name)) {
122
123        // read in the library file contents
124        std::string basis_lib_path = data_path();
125        auto canonical_name = canonicalize_name(name_);
126        // some basis sets use cartesian d shells by convention, the convention is taken from Gaussian09
127        auto force_cartesian_d = gaussian_cartesian_d_convention(canonical_name);
128
129        // parse the name into components
130        std::vector<std::string> basis_component_names = decompose_name_into_components(canonical_name);
131
132        // ref_shells[component_idx][Z] => vector of Shells
133        std::vector<std::vector<std::vector<libint2::Shell>>> component_basis_sets;
134        component_basis_sets.reserve(basis_component_names.size());
135
136        // read in ALL basis set components
137        for(const auto& basis_component_name: basis_component_names) {
138          auto file_dot_g94 = basis_lib_path + "/" + basis_component_name + ".g94";
139
140          // use same cartesian_d convention for all components!
141          component_basis_sets.emplace_back(read_g94_basis_library(file_dot_g94, force_cartesian_d, throw_if_no_match));
142        }
143
144        // for each atom find the corresponding basis components
145        for(auto a=0ul; a<atoms.size(); ++a) {
146
147          const std::size_t Z = atoms[a].atomic_number;
148
149          // add each component in order
150          for(auto comp_idx=0ul; comp_idx!=component_basis_sets.size(); ++comp_idx) {
151            const auto& component_basis_set = component_basis_sets[comp_idx];
152            if (!component_basis_set.at(Z).empty()) {  // found? add shells in order
153              for(auto s: component_basis_set.at(Z)) {
154                this->push_back(std::move(s));
155                this->back().move({{atoms[a].x, atoms[a].y, atoms[a].z}});
156              } // shell loop
157            }
158            else if (throw_if_no_match) {  // not found? throw, if needed
159              std::string errmsg(std::string("did not find the basis for this Z in ") +
160                      basis_lib_path + "/" + basis_component_names[comp_idx] + ".g94");
161              throw std::logic_error(errmsg);
162            }
163          } // basis component loop
164        } // atom loop
165
166        init();
167      }
168
169      /// @brief Construct from a vector of atoms and the per-element basis set specification
170
171      /**
172       * @param[in] atoms \c std::vector of Atom objects
173       * @param[in] element_bases vector of shell sequences for each element; if @c throw_if_no_match is false,
174       *            ok for @c element_bases[Z] to be nonempty or
175       *            not exist (e.g. if @c element_bases.size()<=Z )
176       * @param[in] name the basis set name
177       * @param[in] throw_if_no_match If true, and the basis is not found for this atomic number, throw a std::logic_error.
178       *       Otherwise omit the basis quietly.
179       * @throw std::logic_error if throw_if_no_match is true and for at least one atom with no matching (or empty) basis is found.
180       * \note All instances of the same chemical element receive the same basis set.
181       */
182      BasisSet(const std::vector<Atom>& atoms,
183               const std::vector<std::vector<Shell>>& element_bases,
184               std::string name = "",
185               const bool throw_if_no_match = false) : name_(std::move(name)) {
186        // for each atom find the corresponding basis components
187        for(auto a=0ul; a<atoms.size(); ++a) {
188
189          auto Z = atoms[a].atomic_number;
190
191          if (decltype(Z)(element_bases.size()) > Z && !element_bases.at(Z).empty()) {  // found? add shells in order
192            for(auto s: element_bases.at(Z)) {
193              this->push_back(std::move(s));
194              this->back().move({{atoms[a].x, atoms[a].y, atoms[a].z}});
195            } // shell loop
196          }
197          else if (throw_if_no_match) {  // not found? throw, if needed
198            throw std::logic_error(std::string("did not find the basis for Z=") + std::to_string(Z) + " in the element_bases");
199          }
200        } // atom loop
201
202        init();
203      }
204
205      /// forces solid harmonics/Cartesian Gaussians
206      /// @param solid if true, force all shells with L>1 to be solid harmonics, otherwise force all shells to Cartesian
207      void set_pure(bool solid) {
208        for(auto& s: *this) {
209          s.contr[0].pure = solid;
210        }
211        init();
212      }
213
214      /// @return the number of basis functions in the basis; -1 if uninitialized
215      long nbf() const {
216        return nbf_;
217      }
218      /// @return the maximum number of primitives in a contracted Shell, i.e. maximum contraction length; 0 if uninitialized
219      size_t max_nprim() const {
220        return max_nprim_;
221      }
222      /// @return the maximum angular momentum of a contraction; -1 if uninitialized
223      long max_l() const {
224        return max_l_;
225      }
226      /// @return the map from shell index to index of the first basis function from this shell
227      /// \note basis functions are ordered as shells, i.e. shell2bf[i] >= shell2bf[j] iff i >= j
228      const std::vector<size_t>& shell2bf() const {
229        return shell2bf_;
230      }
231      /// Computes the map from this object's shells to the corresponding atoms in \c atoms. If no atom matches the origin of a shell, it is mapped to -1.
232      /// @note coordinates must match \em exactly , i.e. shell2atom[k] == l iff atoms[l].x == *this[k].O[0] && atoms[l].y == *this[k].O[1] &&  atoms[l].z == *this[k].O[2]
233      /// @return the map from shell index to the atom in the list \c atoms that coincides with its origin;
234      std::vector<long> shell2atom(const std::vector<Atom>& atoms) const {
235        return shell2atom(*this, atoms, false);
236      }
237      /// Computes the map from \c atoms to the corresponding shells in this object. Coordinates are compared bit-wise (@sa BasisSet::shell2atom() )
238      /// @return the map from atom index to the vector of shell indices whose origins conincide with the atom;
239      /// @note this does not assume that \c shells are ordered in the order of atoms, as does BasisSet
240      std::vector<std::vector<long>> atom2shell(const std::vector<Atom>& atoms) const {
241        return atom2shell(atoms, *this);
242      }
243
244      /// Computes the map from \c shells to the corresponding atoms in \c atoms. Coordinates are compared bit-wise, i.e.
245      /// shell2atom[k] == l iff atoms[l].x == *this[k].O[0] && atoms[l].y == *this[k].O[1] &&  atoms[l].z == *this[k].O[2]
246      /// @param throw_if_no_match If true, and no atom matches the origin of a shell, throw a std::logic_error.
247      ///        Otherwise such shells will be mapped to -1.
248      /// @return the map from shell index to the atom in the list \c atoms that coincides with its origin;
249      /// @throw std::logic_error if throw_if_no_match is true and for at least one shell no matching atom is found.
250      static std::vector<long> shell2atom(const std::vector<Shell>& shells, const std::vector<Atom>& atoms, bool throw_if_no_match = false) {
251        std::vector<long> result;
252        result.reserve(shells.size());
253        for(const auto& s: shells) {
254          auto a = std::find_if(atoms.begin(), atoms.end(), [&s](const Atom& a){ return s.O[0] == a.x && s.O[1] == a.y && s.O[2] == a.z; } );
255          const auto found_match = (a != atoms.end());
256          if (throw_if_no_match && !found_match)
257            throw std::logic_error("shell2atom: no matching atom found");
258          result.push_back( found_match ? a - atoms.begin() : -1);
259        }
260        return result;
261      }
262      /// Computes the map from \c atoms to the corresponding shells in \c shells. Coordinates are compared bit-wise (@sa BasisSet::shell2atom() )
263      /// @return the map from atom index to the vector of shell indices whose origins conincide with the atom;
264      /// @note this does not assume that \c shells are ordered in the order of atoms, as does BasisSet
265      static std::vector<std::vector<long>> atom2shell(const std::vector<Atom>& atoms, const std::vector<Shell>& shells) {
266        std::vector<std::vector<long>> result;
267        result.resize(atoms.size());
268        size_t iatom = 0;
269        for(const auto& a: atoms) {
270          auto s = shells.begin();
271          while (s != shells.end()) {
272            s = std::find_if(s, shells.end(), [&a](const Shell& s){ return s.O[0] == a.x && s.O[1] == a.y && s.O[2] == a.z; } );
273            if (s != shells.end()) {
274              result[iatom].push_back( s - shells.begin());
275              ++s;
276            }
277          }
278          ++iatom;
279        }
280        return result;
281      }
282
283    private:
284      std::string name_;
285      long nbf_;
286      size_t max_nprim_;
287      int max_l_;
288      std::vector<size_t> shell2bf_;
289
290      void init() {
291        nbf_ = libint2::nbf(*this);
292        max_nprim_ = libint2::max_nprim(*this);
293        max_l_ = libint2::max_l(*this);
294        shell2bf_ = compute_shell2bf(*this);
295      }
296
297      struct canonicalizer {
298          char operator()(char c) {
299            char cc = ::tolower(c);
300            switch (cc) {
301              case '/': cc = 'I'; break;
302            }
303            return cc;
304          }
305      };
306
307      static std::string canonicalize_name(const std::string& name) {
308        auto result = name;
309        std::transform(name.begin(), name.end(),
310                       result.begin(), BasisSet::canonicalizer());
311        return result;
312      }
313
314      // see http://gaussian.com/basissets/
315      bool gaussian_cartesian_d_convention(const std::string& canonical_name) {
316        // 3-21??g??, 4-31g??
317        if (canonical_name.find("3-21")    == 0 ||
318            canonical_name.find("4-31g")   == 0)
319          return true;
320        // 6-31??g?? but not 6-311 OR 6-31g()
321        if (canonical_name.find("6-31") == 0 && canonical_name[4] != '1') {
322          // to exclude 6-31??g() find the g, then check the next character
323          auto g_pos = canonical_name.find('g');
324          if (g_pos == std::string::npos) // wtf, I don't even know what this is, assume spherical d is OK
325            return false;
326          if (g_pos+1 == canonical_name.size()) // 6-31??g uses cartesian d
327            return true;
328          if (canonical_name[g_pos+1] == '*') // 6-31??g*? uses cartesian d
329            return true;
330        }
331        return false;
332      }
333
334      /// decompose basis set name into components
335      std::vector<std::string> decompose_name_into_components(std::string name) {
336        std::vector<std::string> component_names;
337        // aug-cc-pvxz* = cc-pvxz* + augmentation-... , except aug-cc-pvxz-cabs
338        if ( (name.find("aug-cc-pv") == 0) && (name.find("cabs")==std::string::npos)  ) {
339          std::string base_name = name.substr(4);
340          component_names.push_back(base_name);
341          component_names.push_back(std::string("augmentation-") + base_name);
342        }
343        else
344          component_names.push_back(name);
345
346        return component_names;
347      }
348
349      /** determines the path to the data directory, as follows:
350       *       <ol>
351       *         <li> specified by LIBINT_DATA_PATH environmental variable, if defined </li>
352       *         <li> specified by DATADIR macro variable, if defined </li>
353       *         <li> specified by SRCDATADIR macro variable, if defined </li>
354       *         <li> hardwired to directory \c @DATADIR_ABSOLUTE@/basis </li>
355       *       </ol>
356       *  @throw std::system_error if the path is not valid, or cannot be determined
357       *  @return valid path to the data directory
358       */
359      static std::string data_path() {
360        std::string path;
361        const char* data_path_env = getenv("LIBINT_DATA_PATH");
362        if (data_path_env) {
363          path = data_path_env;
364        }
365        else {
366#if defined(DATADIR)
367          path = std::string{DATADIR};
368#elif defined(SRCDATADIR)
369          path = std::string{SRCDATADIR};
370#else
371          path = std::string("@DATADIR_ABSOLUTE@");
372#endif
373        }
374        // validate basis_path = path + "/basis"
375        std::string basis_path = path + std::string("/basis");
376        bool error = true;
377        std::error_code ec;
378        auto validate_basis_path = [&basis_path, &error, &ec]() -> void {
379          if (not basis_path.empty()) {
380            struct stat sb;
381            error = (::stat(basis_path.c_str(), &sb) == -1);
382            error = error || not S_ISDIR(sb.st_mode);
383            if (error)
384              ec = std::error_code(errno, std::generic_category());
385          }
386        };
387        validate_basis_path();
388        if (error) { // try without "/basis"
389          basis_path = path;
390          validate_basis_path();
391        }
392        if (error) {
393          std::ostringstream oss; oss << "BasisSet::data_path(): path \"" << path << "{/basis}\" is not valid";
394          throw std::system_error(ec, oss.str());
395        }
396        return basis_path;
397      }
398
399      /// converts fortran scientific-notation floats that use d/D instead of e/E in \c str
400      /// @param[in,out] str string in which chars 'd' and 'D' are replaced with 'e' and 'E',
401      ///                respectively
402      static void fortran_dfloats_to_efloats(std::string& str) {
403        for(auto& ch: str) {
404          if (ch == 'd') ch = 'e';
405          if (ch == 'D') ch = 'E';
406        }
407      }
408
409    public:
410
411      /** reads in all basis sets from a Gaussian94-formatted basis set file (see https://bse.pnl.gov/bse/portal)
412       *  @param[in] file_dot_g94 file name
413       *  @param[in] force_cartesian_d force use of Cartesian d shells, if true
414       *  @param[in] locale_name specifies the locale to use
415       *  @throw std::ios_base::failure if the path is not valid, or cannot be determined
416       *  @throw std::logic_error if the contents of the file cannot be interpreted
417       *  @return vector of basis sets for each element
418       *  @warning the included library basis sets should be parsed using POSIX locale
419       */
420      static std::vector<std::vector<libint2::Shell>> read_g94_basis_library(std::string file_dot_g94,
421                                                                             bool force_cartesian_d = false,
422                                                                             bool throw_if_missing = true,
423                                                                             std::string locale_name = std::string("POSIX")) {
424
425        std::locale locale(locale_name.c_str());  // TODO omit c_str() with up-to-date stdlib
426        std::vector<std::vector<libint2::Shell>> ref_shells(118); // 118 = number of chemical elements
427        std::ifstream is(file_dot_g94);
428        is.imbue(locale);
429
430        if (is.good()) {
431          if (libint2::verbose())
432            libint2::verbose_stream() << "Will read basis set from " << file_dot_g94 << std::endl;
433
434          std::string line, rest;
435
436          auto LIBINT2_LINE_TO_STRINGSTREAM = [&](const std::string& line) {
437            std::istringstream iss{line};
438            iss.imbue(locale);
439            return iss;
440          };
441
442          size_t Z;
443          auto nextbasis = true, nextshell = false;
444          bool first_element = true;
445          // read lines till end
446          do {
447            // skipping empties and starting with '!' (the comment delimiter)
448            if (line.empty() || line[0] == '!') continue;
449            if (line == "****") {
450              // old (EMSL) basis set exchange g94 format marks the beginning of data by ****
451              // new (MolSSI) basis set exchange g94 format does not start with ****
452              // so if found **** and still waiting for the first element, this is new g94 format, skip to next line
453              if (first_element)
454                continue;
455              nextbasis = true;
456              nextshell = false;
457              continue;
458            }
459            if (nextbasis) {
460              nextbasis = false;
461              first_element = false;
462              auto iss = LIBINT2_LINE_TO_STRINGSTREAM(line);
463              std::string elemsymbol;
464              iss >> elemsymbol >> rest;
465
466              bool found = false;
467              for (const auto &e: libint2::chemistry::get_element_info()) {
468                if (strcaseequal(e.symbol, elemsymbol)) {
469                  Z = e.Z;
470                  found = true;
471                  break;
472                }
473              }
474              if (not found) {
475                std::ostringstream oss;
476                oss << "in file " << file_dot_g94
477                    << " found G94 basis set for element symbol \""
478                    << elemsymbol << "\", not found in Periodic Table.";
479                throw std::logic_error(oss.str());
480              }
481
482              nextshell = true;
483              continue;
484            }
485            if (nextshell) {
486              auto iss = LIBINT2_LINE_TO_STRINGSTREAM(line);
487              std::string amlabel;
488              std::size_t nprim;
489              iss >> amlabel >> nprim >> rest;
490              if (amlabel != "SP" && amlabel != "sp") {
491                assert(amlabel.size() == 1);
492                auto l = Shell::am_symbol_to_l(amlabel[0]);
493                svector<double> exps;
494                svector<double> coeffs;
495                for (decltype(nprim) p = 0; p != nprim; ++p) {
496                  while (std::getline(is, line) && (line.empty() || line[0] == '!')) {}
497                  fortran_dfloats_to_efloats(line);
498                  auto iss = LIBINT2_LINE_TO_STRINGSTREAM(line);
499                  double e, c;
500                  iss >> e >> c;
501                  exps.emplace_back(e);
502                  coeffs.emplace_back(c);
503                }
504                auto pure = force_cartesian_d ? (l > 2) : (l > 1);
505                ref_shells.at(Z).push_back(
506                    libint2::Shell{
507                        std::move(exps),
508                        {
509                            {l, pure, std::move(coeffs)}
510                        },
511                        {{0, 0, 0}}
512                    }
513                );
514              } else { // split the SP shells
515                svector<double> exps;
516                svector<double> coeffs_s, coeffs_p;
517                for (decltype(nprim) p = 0; p != nprim; ++p) {
518                  while (std::getline(is, line) && (line.empty() || line[0] == '!')) {}
519                  fortran_dfloats_to_efloats(line);
520                  auto iss = LIBINT2_LINE_TO_STRINGSTREAM(line);
521                  double e, c1, c2;
522                  iss >> e >> c1 >> c2;
523                  exps.emplace_back(e);
524                  coeffs_s.emplace_back(c1);
525                  coeffs_p.emplace_back(c2);
526                }
527                ref_shells.at(Z).push_back(
528                    libint2::Shell{exps,
529                                   {
530                                       {0, false, coeffs_s}
531                                   },
532                                   {{0, 0, 0}}
533                    }
534                );
535                ref_shells.at(Z).push_back(
536                    libint2::Shell{std::move(exps),
537                                   {
538                                       {1, false, std::move(coeffs_p)}
539                                   },
540                                   {{0, 0, 0}}
541                    }
542                );
543              }
544            }
545          } while (std::getline(is, line));
546
547        }
548        else {  // !is.good()
549          if (throw_if_missing) {
550            std::ostringstream oss;
551            oss << "BasisSet::read_g94_basis_library(): could not open \"" << file_dot_g94 << "\"" << std::endl;
552            throw std::ios_base::failure(oss.str());
553          }
554        }
555
556        return ref_shells;
557      }
558
559      DEPRECATED static size_t nbf(const std::vector<libint2::Shell>& shells) {
560        return libint2::nbf(shells);
561      }
562
563      DEPRECATED static size_t max_nprim(const std::vector<libint2::Shell>& shells) {
564        return libint2::max_nprim(shells);
565      }
566
567      DEPRECATED static int max_l(const std::vector<libint2::Shell>& shells) {
568        return libint2::max_l(shells);
569      }
570
571      static std::vector<size_t> compute_shell2bf(const std::vector<libint2::Shell>& shells) {
572        std::vector<size_t> result;
573        result.reserve(shells.size());
574
575        size_t n = 0;
576        for (auto shell: shells) {
577          result.push_back(n);
578          n += shell.size();
579        }
580
581        return result;
582      }
583
584  }; // BasisSet
585
586} // namespace libint2
587
588#endif /* _libint2_src_lib_libint_basis_h_ */
589