1/* 2 * Copyright (C) 2004-2021 Edward F. Valeev 3 * 4 * This file is part of Libint. 5 * 6 * Libint is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU Lesser General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * Libint is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public License 17 * along with Libint. If not, see <http://www.gnu.org/licenses/>. 18 * 19 */ 20 21#ifndef _libint2_src_lib_libint_basis_h_ 22#define _libint2_src_lib_libint_basis_h_ 23 24#include <libint2/util/cxxstd.h> 25#if LIBINT2_CPLUSPLUS_STD < 2011 26# error "libint2/basis.h requires C++11 support" 27#endif 28#include <libint2/cxxapi.h> 29 30#include <cerrno> 31#include <iostream> 32#include <fstream> 33#include <locale> 34#include <vector> 35#include <stdexcept> 36 37#include <sys/types.h> 38#include <sys/stat.h> 39#include <unistd.h> 40 41#include <libint2.h> 42#include <libint2/shell.h> 43#include <libint2/atom.h> 44 45namespace libint2 { 46 47 /// Computes the number of basis functions in a range of shells 48 /// @tparam a range type 49 /// @param[in] shells a sequence of shells 50 /// @return the number of basis functions 51 template <typename ShellRange> size_t nbf(ShellRange && shells) { 52 size_t n = 0; 53 for (const auto& shell: std::forward<ShellRange>(shells)) 54 n += shell.size(); 55 return n; 56 } 57 58 /// Computes the maximum number of primitives in any Shell among a range of shells 59 /// @tparam a range type 60 /// @param[in] shells a sequence of shells 61 /// @return the maximum number of primitives 62 template <typename ShellRange> size_t max_nprim(ShellRange && shells) { 63 size_t n = 0; 64 for (auto shell: std::forward<ShellRange>(shells)) 65 n = std::max(shell.nprim(), n); 66 return n; 67 } 68 69 /// Computes the maximum angular momentum quantum number @c l in any Shell among a range of shells 70 /// @tparam a range type 71 /// @param[in] shells a sequence of shells 72 /// @return the maximum angular momentum 73 template <typename ShellRange> int max_l(ShellRange && shells) { 74 int l = 0; 75 for (auto shell: std::forward<ShellRange>(shells)) 76 for (auto c: shell.contr) 77 l = std::max(c.l, l); 78 return l; 79 } 80 81 /// BasisSet is a slightly decorated \c std::vector of \c libint2::Shell objects. 82 class BasisSet : public std::vector<libint2::Shell> { 83 public: 84 BasisSet() : name_(""), nbf_(-1), max_nprim_(0), max_l_(-1) {} 85 BasisSet(const BasisSet&) = default; 86 BasisSet(BasisSet&& other) : 87 std::vector<libint2::Shell>(std::move(other)), 88 name_(std::move(other.name_)), 89 nbf_(other.nbf_), 90 max_nprim_(other.max_nprim_), 91 max_l_(other.max_l_), 92 shell2bf_(std::move(other.shell2bf_)) 93 { 94 } 95 ~BasisSet() = default; 96 BasisSet& operator=(const BasisSet&) = default; 97 98 /// @brief Construct from the basis set name and a vector of atoms. 99 100 /** 101 * @param[in] name the basis set name 102 * @param[in] atoms \c std::vector of Atom objects 103 * @param[in] throw_if_no_match If true, and the basis is not found for this atomic number, throw a std::logic_error. 104 * Otherwise omit the basis quietly. 105 * @throw std::logic_error if throw_if_no_match is true and for at least one atom no matching basis is found. 106 * @throw std::ios_base::failure if throw_if_no_match is true and the basis file could not be read 107 * \note All instances of the same chemical element receive the same basis set. 108 * \note \c name will be "canonicalized" using BasisSet::canonicalize(name) to 109 * produce the file name where the basis will be sought. This file needs to contain 110 * the basis set definition in Gaussian94 format (see \c lib/basis directory for examples). 111 * The expected location of the file is determined by BasisSet::data_path as follows: 112 * <ol> 113 * <li> specified by LIBINT_DATA_PATH environmental variable, if defined </li> 114 * <li> specified by DATADIR macro variable, if defined </li> 115 * <li> specified by SRCDATADIR macro variable, if defined </li> 116 * <li> hardwired to directory \c @DATADIR_ABSOLUTE@/basis </li> 117 * </ol> 118 */ 119 BasisSet(std::string name, 120 const std::vector<Atom>& atoms, 121 const bool throw_if_no_match = false) : name_(std::move(name)) { 122 123 // read in the library file contents 124 std::string basis_lib_path = data_path(); 125 auto canonical_name = canonicalize_name(name_); 126 // some basis sets use cartesian d shells by convention, the convention is taken from Gaussian09 127 auto force_cartesian_d = gaussian_cartesian_d_convention(canonical_name); 128 129 // parse the name into components 130 std::vector<std::string> basis_component_names = decompose_name_into_components(canonical_name); 131 132 // ref_shells[component_idx][Z] => vector of Shells 133 std::vector<std::vector<std::vector<libint2::Shell>>> component_basis_sets; 134 component_basis_sets.reserve(basis_component_names.size()); 135 136 // read in ALL basis set components 137 for(const auto& basis_component_name: basis_component_names) { 138 auto file_dot_g94 = basis_lib_path + "/" + basis_component_name + ".g94"; 139 140 // use same cartesian_d convention for all components! 141 component_basis_sets.emplace_back(read_g94_basis_library(file_dot_g94, force_cartesian_d, throw_if_no_match)); 142 } 143 144 // for each atom find the corresponding basis components 145 for(auto a=0ul; a<atoms.size(); ++a) { 146 147 const std::size_t Z = atoms[a].atomic_number; 148 149 // add each component in order 150 for(auto comp_idx=0ul; comp_idx!=component_basis_sets.size(); ++comp_idx) { 151 const auto& component_basis_set = component_basis_sets[comp_idx]; 152 if (!component_basis_set.at(Z).empty()) { // found? add shells in order 153 for(auto s: component_basis_set.at(Z)) { 154 this->push_back(std::move(s)); 155 this->back().move({{atoms[a].x, atoms[a].y, atoms[a].z}}); 156 } // shell loop 157 } 158 else if (throw_if_no_match) { // not found? throw, if needed 159 std::string errmsg(std::string("did not find the basis for this Z in ") + 160 basis_lib_path + "/" + basis_component_names[comp_idx] + ".g94"); 161 throw std::logic_error(errmsg); 162 } 163 } // basis component loop 164 } // atom loop 165 166 init(); 167 } 168 169 /// @brief Construct from a vector of atoms and the per-element basis set specification 170 171 /** 172 * @param[in] atoms \c std::vector of Atom objects 173 * @param[in] element_bases vector of shell sequences for each element; if @c throw_if_no_match is false, 174 * ok for @c element_bases[Z] to be nonempty or 175 * not exist (e.g. if @c element_bases.size()<=Z ) 176 * @param[in] name the basis set name 177 * @param[in] throw_if_no_match If true, and the basis is not found for this atomic number, throw a std::logic_error. 178 * Otherwise omit the basis quietly. 179 * @throw std::logic_error if throw_if_no_match is true and for at least one atom with no matching (or empty) basis is found. 180 * \note All instances of the same chemical element receive the same basis set. 181 */ 182 BasisSet(const std::vector<Atom>& atoms, 183 const std::vector<std::vector<Shell>>& element_bases, 184 std::string name = "", 185 const bool throw_if_no_match = false) : name_(std::move(name)) { 186 // for each atom find the corresponding basis components 187 for(auto a=0ul; a<atoms.size(); ++a) { 188 189 auto Z = atoms[a].atomic_number; 190 191 if (decltype(Z)(element_bases.size()) > Z && !element_bases.at(Z).empty()) { // found? add shells in order 192 for(auto s: element_bases.at(Z)) { 193 this->push_back(std::move(s)); 194 this->back().move({{atoms[a].x, atoms[a].y, atoms[a].z}}); 195 } // shell loop 196 } 197 else if (throw_if_no_match) { // not found? throw, if needed 198 throw std::logic_error(std::string("did not find the basis for Z=") + std::to_string(Z) + " in the element_bases"); 199 } 200 } // atom loop 201 202 init(); 203 } 204 205 /// forces solid harmonics/Cartesian Gaussians 206 /// @param solid if true, force all shells with L>1 to be solid harmonics, otherwise force all shells to Cartesian 207 void set_pure(bool solid) { 208 for(auto& s: *this) { 209 s.contr[0].pure = solid; 210 } 211 init(); 212 } 213 214 /// @return the number of basis functions in the basis; -1 if uninitialized 215 long nbf() const { 216 return nbf_; 217 } 218 /// @return the maximum number of primitives in a contracted Shell, i.e. maximum contraction length; 0 if uninitialized 219 size_t max_nprim() const { 220 return max_nprim_; 221 } 222 /// @return the maximum angular momentum of a contraction; -1 if uninitialized 223 long max_l() const { 224 return max_l_; 225 } 226 /// @return the map from shell index to index of the first basis function from this shell 227 /// \note basis functions are ordered as shells, i.e. shell2bf[i] >= shell2bf[j] iff i >= j 228 const std::vector<size_t>& shell2bf() const { 229 return shell2bf_; 230 } 231 /// Computes the map from this object's shells to the corresponding atoms in \c atoms. If no atom matches the origin of a shell, it is mapped to -1. 232 /// @note coordinates must match \em exactly , i.e. shell2atom[k] == l iff atoms[l].x == *this[k].O[0] && atoms[l].y == *this[k].O[1] && atoms[l].z == *this[k].O[2] 233 /// @return the map from shell index to the atom in the list \c atoms that coincides with its origin; 234 std::vector<long> shell2atom(const std::vector<Atom>& atoms) const { 235 return shell2atom(*this, atoms, false); 236 } 237 /// Computes the map from \c atoms to the corresponding shells in this object. Coordinates are compared bit-wise (@sa BasisSet::shell2atom() ) 238 /// @return the map from atom index to the vector of shell indices whose origins conincide with the atom; 239 /// @note this does not assume that \c shells are ordered in the order of atoms, as does BasisSet 240 std::vector<std::vector<long>> atom2shell(const std::vector<Atom>& atoms) const { 241 return atom2shell(atoms, *this); 242 } 243 244 /// Computes the map from \c shells to the corresponding atoms in \c atoms. Coordinates are compared bit-wise, i.e. 245 /// shell2atom[k] == l iff atoms[l].x == *this[k].O[0] && atoms[l].y == *this[k].O[1] && atoms[l].z == *this[k].O[2] 246 /// @param throw_if_no_match If true, and no atom matches the origin of a shell, throw a std::logic_error. 247 /// Otherwise such shells will be mapped to -1. 248 /// @return the map from shell index to the atom in the list \c atoms that coincides with its origin; 249 /// @throw std::logic_error if throw_if_no_match is true and for at least one shell no matching atom is found. 250 static std::vector<long> shell2atom(const std::vector<Shell>& shells, const std::vector<Atom>& atoms, bool throw_if_no_match = false) { 251 std::vector<long> result; 252 result.reserve(shells.size()); 253 for(const auto& s: shells) { 254 auto a = std::find_if(atoms.begin(), atoms.end(), [&s](const Atom& a){ return s.O[0] == a.x && s.O[1] == a.y && s.O[2] == a.z; } ); 255 const auto found_match = (a != atoms.end()); 256 if (throw_if_no_match && !found_match) 257 throw std::logic_error("shell2atom: no matching atom found"); 258 result.push_back( found_match ? a - atoms.begin() : -1); 259 } 260 return result; 261 } 262 /// Computes the map from \c atoms to the corresponding shells in \c shells. Coordinates are compared bit-wise (@sa BasisSet::shell2atom() ) 263 /// @return the map from atom index to the vector of shell indices whose origins conincide with the atom; 264 /// @note this does not assume that \c shells are ordered in the order of atoms, as does BasisSet 265 static std::vector<std::vector<long>> atom2shell(const std::vector<Atom>& atoms, const std::vector<Shell>& shells) { 266 std::vector<std::vector<long>> result; 267 result.resize(atoms.size()); 268 size_t iatom = 0; 269 for(const auto& a: atoms) { 270 auto s = shells.begin(); 271 while (s != shells.end()) { 272 s = std::find_if(s, shells.end(), [&a](const Shell& s){ return s.O[0] == a.x && s.O[1] == a.y && s.O[2] == a.z; } ); 273 if (s != shells.end()) { 274 result[iatom].push_back( s - shells.begin()); 275 ++s; 276 } 277 } 278 ++iatom; 279 } 280 return result; 281 } 282 283 private: 284 std::string name_; 285 long nbf_; 286 size_t max_nprim_; 287 int max_l_; 288 std::vector<size_t> shell2bf_; 289 290 void init() { 291 nbf_ = libint2::nbf(*this); 292 max_nprim_ = libint2::max_nprim(*this); 293 max_l_ = libint2::max_l(*this); 294 shell2bf_ = compute_shell2bf(*this); 295 } 296 297 struct canonicalizer { 298 char operator()(char c) { 299 char cc = ::tolower(c); 300 switch (cc) { 301 case '/': cc = 'I'; break; 302 } 303 return cc; 304 } 305 }; 306 307 static std::string canonicalize_name(const std::string& name) { 308 auto result = name; 309 std::transform(name.begin(), name.end(), 310 result.begin(), BasisSet::canonicalizer()); 311 return result; 312 } 313 314 // see http://gaussian.com/basissets/ 315 bool gaussian_cartesian_d_convention(const std::string& canonical_name) { 316 // 3-21??g??, 4-31g?? 317 if (canonical_name.find("3-21") == 0 || 318 canonical_name.find("4-31g") == 0) 319 return true; 320 // 6-31??g?? but not 6-311 OR 6-31g() 321 if (canonical_name.find("6-31") == 0 && canonical_name[4] != '1') { 322 // to exclude 6-31??g() find the g, then check the next character 323 auto g_pos = canonical_name.find('g'); 324 if (g_pos == std::string::npos) // wtf, I don't even know what this is, assume spherical d is OK 325 return false; 326 if (g_pos+1 == canonical_name.size()) // 6-31??g uses cartesian d 327 return true; 328 if (canonical_name[g_pos+1] == '*') // 6-31??g*? uses cartesian d 329 return true; 330 } 331 return false; 332 } 333 334 /// decompose basis set name into components 335 std::vector<std::string> decompose_name_into_components(std::string name) { 336 std::vector<std::string> component_names; 337 // aug-cc-pvxz* = cc-pvxz* + augmentation-... , except aug-cc-pvxz-cabs 338 if ( (name.find("aug-cc-pv") == 0) && (name.find("cabs")==std::string::npos) ) { 339 std::string base_name = name.substr(4); 340 component_names.push_back(base_name); 341 component_names.push_back(std::string("augmentation-") + base_name); 342 } 343 else 344 component_names.push_back(name); 345 346 return component_names; 347 } 348 349 /** determines the path to the data directory, as follows: 350 * <ol> 351 * <li> specified by LIBINT_DATA_PATH environmental variable, if defined </li> 352 * <li> specified by DATADIR macro variable, if defined </li> 353 * <li> specified by SRCDATADIR macro variable, if defined </li> 354 * <li> hardwired to directory \c @DATADIR_ABSOLUTE@/basis </li> 355 * </ol> 356 * @throw std::system_error if the path is not valid, or cannot be determined 357 * @return valid path to the data directory 358 */ 359 static std::string data_path() { 360 std::string path; 361 const char* data_path_env = getenv("LIBINT_DATA_PATH"); 362 if (data_path_env) { 363 path = data_path_env; 364 } 365 else { 366#if defined(DATADIR) 367 path = std::string{DATADIR}; 368#elif defined(SRCDATADIR) 369 path = std::string{SRCDATADIR}; 370#else 371 path = std::string("@DATADIR_ABSOLUTE@"); 372#endif 373 } 374 // validate basis_path = path + "/basis" 375 std::string basis_path = path + std::string("/basis"); 376 bool error = true; 377 std::error_code ec; 378 auto validate_basis_path = [&basis_path, &error, &ec]() -> void { 379 if (not basis_path.empty()) { 380 struct stat sb; 381 error = (::stat(basis_path.c_str(), &sb) == -1); 382 error = error || not S_ISDIR(sb.st_mode); 383 if (error) 384 ec = std::error_code(errno, std::generic_category()); 385 } 386 }; 387 validate_basis_path(); 388 if (error) { // try without "/basis" 389 basis_path = path; 390 validate_basis_path(); 391 } 392 if (error) { 393 std::ostringstream oss; oss << "BasisSet::data_path(): path \"" << path << "{/basis}\" is not valid"; 394 throw std::system_error(ec, oss.str()); 395 } 396 return basis_path; 397 } 398 399 /// converts fortran scientific-notation floats that use d/D instead of e/E in \c str 400 /// @param[in,out] str string in which chars 'd' and 'D' are replaced with 'e' and 'E', 401 /// respectively 402 static void fortran_dfloats_to_efloats(std::string& str) { 403 for(auto& ch: str) { 404 if (ch == 'd') ch = 'e'; 405 if (ch == 'D') ch = 'E'; 406 } 407 } 408 409 public: 410 411 /** reads in all basis sets from a Gaussian94-formatted basis set file (see https://bse.pnl.gov/bse/portal) 412 * @param[in] file_dot_g94 file name 413 * @param[in] force_cartesian_d force use of Cartesian d shells, if true 414 * @param[in] locale_name specifies the locale to use 415 * @throw std::ios_base::failure if the path is not valid, or cannot be determined 416 * @throw std::logic_error if the contents of the file cannot be interpreted 417 * @return vector of basis sets for each element 418 * @warning the included library basis sets should be parsed using POSIX locale 419 */ 420 static std::vector<std::vector<libint2::Shell>> read_g94_basis_library(std::string file_dot_g94, 421 bool force_cartesian_d = false, 422 bool throw_if_missing = true, 423 std::string locale_name = std::string("POSIX")) { 424 425 std::locale locale(locale_name.c_str()); // TODO omit c_str() with up-to-date stdlib 426 std::vector<std::vector<libint2::Shell>> ref_shells(118); // 118 = number of chemical elements 427 std::ifstream is(file_dot_g94); 428 is.imbue(locale); 429 430 if (is.good()) { 431 if (libint2::verbose()) 432 libint2::verbose_stream() << "Will read basis set from " << file_dot_g94 << std::endl; 433 434 std::string line, rest; 435 436 auto LIBINT2_LINE_TO_STRINGSTREAM = [&](const std::string& line) { 437 std::istringstream iss{line}; 438 iss.imbue(locale); 439 return iss; 440 }; 441 442 size_t Z; 443 auto nextbasis = true, nextshell = false; 444 bool first_element = true; 445 // read lines till end 446 do { 447 // skipping empties and starting with '!' (the comment delimiter) 448 if (line.empty() || line[0] == '!') continue; 449 if (line == "****") { 450 // old (EMSL) basis set exchange g94 format marks the beginning of data by **** 451 // new (MolSSI) basis set exchange g94 format does not start with **** 452 // so if found **** and still waiting for the first element, this is new g94 format, skip to next line 453 if (first_element) 454 continue; 455 nextbasis = true; 456 nextshell = false; 457 continue; 458 } 459 if (nextbasis) { 460 nextbasis = false; 461 first_element = false; 462 auto iss = LIBINT2_LINE_TO_STRINGSTREAM(line); 463 std::string elemsymbol; 464 iss >> elemsymbol >> rest; 465 466 bool found = false; 467 for (const auto &e: libint2::chemistry::get_element_info()) { 468 if (strcaseequal(e.symbol, elemsymbol)) { 469 Z = e.Z; 470 found = true; 471 break; 472 } 473 } 474 if (not found) { 475 std::ostringstream oss; 476 oss << "in file " << file_dot_g94 477 << " found G94 basis set for element symbol \"" 478 << elemsymbol << "\", not found in Periodic Table."; 479 throw std::logic_error(oss.str()); 480 } 481 482 nextshell = true; 483 continue; 484 } 485 if (nextshell) { 486 auto iss = LIBINT2_LINE_TO_STRINGSTREAM(line); 487 std::string amlabel; 488 std::size_t nprim; 489 iss >> amlabel >> nprim >> rest; 490 if (amlabel != "SP" && amlabel != "sp") { 491 assert(amlabel.size() == 1); 492 auto l = Shell::am_symbol_to_l(amlabel[0]); 493 svector<double> exps; 494 svector<double> coeffs; 495 for (decltype(nprim) p = 0; p != nprim; ++p) { 496 while (std::getline(is, line) && (line.empty() || line[0] == '!')) {} 497 fortran_dfloats_to_efloats(line); 498 auto iss = LIBINT2_LINE_TO_STRINGSTREAM(line); 499 double e, c; 500 iss >> e >> c; 501 exps.emplace_back(e); 502 coeffs.emplace_back(c); 503 } 504 auto pure = force_cartesian_d ? (l > 2) : (l > 1); 505 ref_shells.at(Z).push_back( 506 libint2::Shell{ 507 std::move(exps), 508 { 509 {l, pure, std::move(coeffs)} 510 }, 511 {{0, 0, 0}} 512 } 513 ); 514 } else { // split the SP shells 515 svector<double> exps; 516 svector<double> coeffs_s, coeffs_p; 517 for (decltype(nprim) p = 0; p != nprim; ++p) { 518 while (std::getline(is, line) && (line.empty() || line[0] == '!')) {} 519 fortran_dfloats_to_efloats(line); 520 auto iss = LIBINT2_LINE_TO_STRINGSTREAM(line); 521 double e, c1, c2; 522 iss >> e >> c1 >> c2; 523 exps.emplace_back(e); 524 coeffs_s.emplace_back(c1); 525 coeffs_p.emplace_back(c2); 526 } 527 ref_shells.at(Z).push_back( 528 libint2::Shell{exps, 529 { 530 {0, false, coeffs_s} 531 }, 532 {{0, 0, 0}} 533 } 534 ); 535 ref_shells.at(Z).push_back( 536 libint2::Shell{std::move(exps), 537 { 538 {1, false, std::move(coeffs_p)} 539 }, 540 {{0, 0, 0}} 541 } 542 ); 543 } 544 } 545 } while (std::getline(is, line)); 546 547 } 548 else { // !is.good() 549 if (throw_if_missing) { 550 std::ostringstream oss; 551 oss << "BasisSet::read_g94_basis_library(): could not open \"" << file_dot_g94 << "\"" << std::endl; 552 throw std::ios_base::failure(oss.str()); 553 } 554 } 555 556 return ref_shells; 557 } 558 559 DEPRECATED static size_t nbf(const std::vector<libint2::Shell>& shells) { 560 return libint2::nbf(shells); 561 } 562 563 DEPRECATED static size_t max_nprim(const std::vector<libint2::Shell>& shells) { 564 return libint2::max_nprim(shells); 565 } 566 567 DEPRECATED static int max_l(const std::vector<libint2::Shell>& shells) { 568 return libint2::max_l(shells); 569 } 570 571 static std::vector<size_t> compute_shell2bf(const std::vector<libint2::Shell>& shells) { 572 std::vector<size_t> result; 573 result.reserve(shells.size()); 574 575 size_t n = 0; 576 for (auto shell: shells) { 577 result.push_back(n); 578 n += shell.size(); 579 } 580 581 return result; 582 } 583 584 }; // BasisSet 585 586} // namespace libint2 587 588#endif /* _libint2_src_lib_libint_basis_h_ */ 589