/* Ergo, version 3.8, a program for linear scaling electronic structure
* calculations.
* Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
* and Anastasia Kruchinina.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
* Primary academic reference:
* Ergo: An open-source program for linear-scaling electronic structure
* calculations,
* Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
* Kruchinina,
* SoftwareX 7, 107 (2018),
*
*
* For further information about Ergo, see .
*/
/** @file convert_basis_set_file.cc
\brief Program that can be used to convert a file downloaded from
the EMSL Basis Set Library (in SuperMolecule format) to the format
expected by the Ergo program.
@author: Elias Rudberg responsible
*/
#include
#include
#include
#include
#include
#include
static int getChargeForAtomName(const std::string & atomName) {
if(atomName == "HYDROGEN") return 1;
if(atomName == "HELIUM") return 2;
if(atomName == "LITHIUM") return 3;
if(atomName == "BERYLLIUM") return 4;
if(atomName == "BORON") return 5;
if(atomName == "CARBON") return 6;
if(atomName == "NITROGEN") return 7;
if(atomName == "OXYGEN") return 8;
if(atomName == "FLUORINE") return 9;
if(atomName == "NEON") return 10;
if(atomName == "SODIUM") return 11;
if(atomName == "MAGNESIUM") return 12;
if(atomName == "ALUMINUM") return 13;
if(atomName == "SILICON") return 14;
if(atomName == "PHOSPHOROUS") return 15;
if(atomName == "SULFUR") return 16;
if(atomName == "CHLORINE") return 17;
if(atomName == "ARGON") return 18;
if(atomName == "POTASSIUM") return 19;
if(atomName == "CALCIUM") return 20;
if(atomName == "SCANDIUM") return 21;
if(atomName == "TITANIUM") return 22;
if(atomName == "VANADIUM") return 23;
if(atomName == "CHROMIUM") return 24;
if(atomName == "MANGANESE") return 25;
if(atomName == "IRON") return 26;
if(atomName == "COBALT") return 27;
if(atomName == "NICKEL") return 28;
if(atomName == "COPPER") return 29;
if(atomName == "ZINC") return 30;
if(atomName == "GALLIUM") return 31;
if(atomName == "GERMANIUM") return 32;
if(atomName == "ARSENIC") return 33;
if(atomName == "SELENIUM") return 34;
if(atomName == "BROMINE") return 35;
if(atomName == "KRYPTON") return 36;
return -1;
}
static std::string getSecondWordFromLine(const std::string & str) {
int len = str.length();
char s[len+1];
strcpy(s, str.c_str());
assert(len > 3);
assert(s[0] == '$');
assert(s[1] == ' ');
int idx = 2;
assert(s[idx] != ' ');
while(idx < len) {
if(s[idx] == ' ')
break;
idx++;
}
int nChars = idx - 2;
char ss[nChars+1];
memcpy(ss, &s[2], nChars);
ss[nChars] = '\0';
std::string resultStr = ss;
return resultStr;
}
static bool is_digit(char c) {
if(c >= '0' && c <= '9')
return true;
return false;
}
static bool checkIfLineHasThreeNumbers(const std::string & str) {
int len = str.length();
char s[len+1];
strcpy(s, str.c_str());
int nDigitsFound = 0;
int idx = 0;
while(idx < len) {
if(s[idx] == ' ') {
idx++;
continue;
}
if(is_digit(s[idx])) {
// Digit found. Check how many digits follow.
int nDigits = 1;
for(int k = 1; k < len; k++) {
if(is_digit(s[idx+k]))
nDigits++;
else
break;
}
idx += nDigits;
nDigitsFound++;
}
else
return false;
}
if(nDigitsFound == 3)
return true;
return false;
}
int main(int argc, char* argv[])
{
printf("convert_basis_set_file 1.0\n");
printf("Written by Elias Rudberg\n");
printf("Source modified on Mon 9 Nov 13:22:39 CET 2015\n");
if(argc != 3) {
printf("usage: convert_basis_set_file infile outfile\n");
return -1;
}
const char* inFileName = argv[1];
const char* outFileName = argv[2];
printf("inFileName = '%s', outFileName = '%s'\n", inFileName, outFileName);
FILE* inFile = fopen(inFileName, "rb");
if(!inFile) {
printf("Error opening inFile '%s' for reading.\n", inFileName);
return -1;
}
FILE* outFile = fopen(outFileName, "wb");
if(!outFile) {
printf("Error opening outFile '%s' for writing.\n", outFileName);
return -1;
}
const int MAXFILESIZE = 8888888;
std::vector buf(MAXFILESIZE);
memset(&buf[0], 0x00, MAXFILESIZE);
if(fread(&buf[0], 1, MAXFILESIZE, inFile) <= 0) {
printf("Error reading inFile\n");
return -1;
}
if(buf[MAXFILESIZE-1] != '\0') {
printf("Error: zero not found at end of buffer. File too large?\n");
return -1;
}
// Count number of lines in file
int nLines = 0;
for(int i = 0; i < MAXFILESIZE; i++) {
if(buf[i] == '\n')
nLines++;
}
nLines++;
std::vector lines(nLines);
const char* p = &buf[0];
int lineCount = 0;
while(*p != '\0') {
// Find end of line
const char* q = p;
while(*q != '\n' && *q != '\0')
q++;
int nChars = q - p;
char lineStr[nChars+1];
memcpy(lineStr, p, nChars);
lineStr[nChars] = '\0';
lines[lineCount] = lineStr;
lineCount++;
p = q;
if(*q == '\n')
p++;
}
printf("lineCount = %d\n", lineCount);
const std::string str_s = "$ S-TYPE FUNCTIONS";
const std::string str_p = "$ P-TYPE FUNCTIONS";
const std::string str_d = "$ D-TYPE FUNCTIONS";
const std::string str_f = "$ F-TYPE FUNCTIONS";
const std::string str_g = "$ G-TYPE FUNCTIONS";
const std::string str_h = "$ H-TYPE FUNCTIONS";
const std::string str_i = "$ I-TYPE FUNCTIONS";
// Check how many atom types there are
int nAtomTypes = 0;
for(int i = 0; i < lineCount; i++) {
std::string & currLine = lines[i];
if(currLine == str_s)
nAtomTypes++;
}
printf("nAtomTypes = %d\n", nAtomTypes);
assert(nAtomTypes >= 1);
std::vector linesToInsert(nAtomTypes);
int linesToInsertCount = 0;
// OK, now we have extracted the lines.
// Look for lines containing three integer numbers.
int lineIdx = 0;
while(lineIdx < lineCount) {
std::string & currLine = lines[lineIdx];
if(checkIfLineHasThreeNumbers(currLine)) {
// Now previous line must be one of the following strings:
assert(lineIdx > 5);
std::string & prevLine = lines[lineIdx-1];
if(prevLine != str_s &&
prevLine != str_p &&
prevLine != str_d &&
prevLine != str_f &&
prevLine != str_g &&
prevLine != str_h &&
prevLine != str_i) {
printf("ERROR: string like 'X-TYPE FUNCTIONS' not found where expected.\n");
return -1;
}
if(prevLine == str_s) {
// Now we found a place where info about nuclear charge should be inserted.
std::string & prevLine2 = lines[lineIdx-2];
std::string atomName = getSecondWordFromLine(prevLine2);
int charge = getChargeForAtomName(atomName);
if(charge <= 0) {
printf("ERROR: getChargeForAtomName failed for atomName = '%s'\n", atomName.c_str());
return -1;
}
printf("atomName = '%s', charge = %d\n", atomName.c_str(), charge);
char s[88];
sprintf(s, "a %d", charge);
std::string lineToInsert = s;
linesToInsert[linesToInsertCount] = lineToInsert;
linesToInsertCount++;
}
}
lineIdx++;
}
assert(linesToInsertCount == nAtomTypes);
int nLinesFinal = nLines + nAtomTypes;
std::vector linesFinal(nLinesFinal);
// No go through all lines again, creating linesFinal.
lineIdx = 0;
int lineIdx2 = 0;
int atomTypeCounter = 0;
while(lineIdx < lineCount) {
// Find next str_s line
int foundIdx = -1;
for(int idxTmp = lineIdx; idxTmp < lineCount; idxTmp++) {
if(lines[idxTmp] == str_s) {
foundIdx = idxTmp;
break;
}
}
assert(foundIdx >= 0);
while(lineIdx < foundIdx)
linesFinal[lineIdx2++] = lines[lineIdx++];
linesFinal[lineIdx2++] = linesToInsert[atomTypeCounter++];
linesFinal[lineIdx2++] = lines[lineIdx++];
if(atomTypeCounter == nAtomTypes) {
while(lineIdx < lineCount)
linesFinal[lineIdx2++] = lines[lineIdx++];
}
}
for(int i = 0; i < nLinesFinal; i++)
fprintf(outFile, "%s\n", linesFinal[i].c_str());
fclose(inFile);
fclose(outFile);
printf("Done, file '%s' created OK, nLinesFinal = %d.\n", outFileName, nLinesFinal);
return 0;
}