1 /********************************************************************** 2 (C) 2008-2010 by Jens Thomas 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation version 2 of the License. 7 8 This program is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 GNU General Public License for more details. 12 ***********************************************************************/ 13 #include <openbabel/babelconfig.h> 14 #include <openbabel/obmolecformat.h> 15 #include <openbabel/mol.h> 16 #include <openbabel/atom.h> 17 #include <openbabel/bond.h> 18 #include <openbabel/obiter.h> 19 #include <openbabel/elements.h> 20 #include <openbabel/generic.h> 21 #include <openbabel/internalcoord.h> 22 23 24 #include <algorithm> 25 #include <cmath> 26 27 #ifdef _MSC_VER 28 #include <regex> 29 #else 30 #include <regex.h> 31 #endif 32 33 using namespace std; 34 35 namespace OpenBabel 36 { 37 #define BOHR_TO_ANGSTROM 0.529177249 38 #define ANGSTROM_TO_BOHR 1.889725989 39 40 class GAMESSUKFormat 41 { 42 /* Base class for GAMESS-UK readers with various utility functions that are used by 43 * both input and output readers. 44 * 45 * The most important is ReadGeometry, which takes a list of strings defining the 46 * geometry (in the style of a GAMESS-UK input deck) and creates the OBMol from it. 47 * This routine supports both Zmatrix and Cartesian formats, although currently not 48 * mixed decks. 49 */ 50 51 public: 52 bool ReadGeometry(OBMol &mol, vector<string> &geomList); 53 bool ReadVariables(istream &ifs, double factor, string stopstr); 54 bool ReadLineCartesian(OBAtom *atom, vector<string> &tokens, double factor); 55 bool ReadLineZmatrix(OBMol &mol, OBAtom *atom, vector<string> &tokens, double factor, int *zmatLineCount); 56 double Rescale(string text); 57 bool IsUnits(string text); 58 /** 59 * Converts a string to a numerical type 60 * This purloined from: http://www.codeguru.com/forum/showthread.php?t=231054 61 */ 62 template <class T> from_string(T & t,const std::string & s,std::ios_base & (* f)(std::ios_base &))63 bool from_string(T& t, const std::string& s, 64 std::ios_base& (*f)(std::ios_base&)) 65 { 66 std::istringstream iss(s); 67 return !(iss >> f >> t).fail(); 68 } 69 70 // Variables 71 enum ReadMode_t {CARTESIAN, ZMATRIX, VARIABLES, CONSTANTS, SKIP}; 72 ReadMode_t ReadMode; 73 char buffer[BUFF_SIZE]; 74 stringstream errorMsg; 75 76 private: 77 map<string, double> variables; // map from variable name to value 78 vector<OBInternalCoord*> vic; // Holds lists of internal coordinates 79 int LabelToAtomicNumber(string label); 80 }; 81 82 ReadGeometry(OBMol & mol,vector<string> & geomList)83 bool GAMESSUKFormat::ReadGeometry(OBMol &mol, vector<string> &geomList) 84 { 85 86 /* Read a geometry from a list. Any variables that appear in the geometry need 87 * to be in the variables map that should have been populated before this is called. 88 */ 89 90 if (geomList.size()==0){ 91 obErrorLog.ThrowError(__FUNCTION__, 92 "Problems reading a GAMESS-UK Input file: ReadGeometry got empty list", 93 obWarning); 94 return false; 95 } 96 97 vector<string> tokens; // list of lines and list of tokens on a line 98 string line; // For convenience so we can refer to lines from the iterator as 'line' 99 double factor=BOHR_TO_ANGSTROM; // The coordinate conversion factor for handling bohr/angstrom issues 100 101 mol.BeginModify(); 102 // Clear out any existing information 103 mol.Clear(); 104 vic.clear(); 105 106 ReadMode=SKIP; 107 bool ContainsZmatrix=false; 108 int zmatLineCount=0; 109 110 /* 111 cerr << "ReadGeometry got geometry list: \n"; 112 for (vector<string>::iterator i=geomList.begin(); i !=geomList.end(); i++) { 113 114 // Alias the line 115 line = *i; 116 cerr << "line: " << line << endl; 117 } 118 */ 119 120 for (vector<string>::iterator i=geomList.begin(); i !=geomList.end(); ++i) { 121 122 // Alias the line 123 line = *i; 124 125 //cerr << "ReadGeometry line is: " << line << endl; 126 127 // Check for commas & split with that as the separator if necessary 128 if (line.find(',')!=string::npos) { 129 tokenize(tokens, line, ","); 130 } else { 131 tokenize(tokens, line, " \t\n"); 132 } 133 134 135 // Set the mode 136 if (line.compare(0, 4, "zmat")==0 || line.compare(0, 4, "inte")==0) { 137 ReadMode=ZMATRIX; 138 //cout << "ZMATRIX mode " << ReadMode << endl; 139 //cout << "tokens.size()" << tokens.size() << endl; 140 if (tokens.size()>1) if (IsUnits(tokens[1])) factor=Rescale(tokens[1]); 141 ContainsZmatrix=true; 142 vic.push_back(nullptr); // OBMol indexed from 1 -- potential atom index problem 143 } else if (line.compare(0, 4, "coor")==0 || line.compare(0, 4, "cart")==0 ||line.compare(0, 4, "geom")==0) { 144 ReadMode=CARTESIAN; 145 //cout << "CARTESIAN mode " << ReadMode << endl; 146 if (tokens.size()>1) if (IsUnits(tokens[1])) factor=Rescale(tokens[1]); 147 148 /* 149 We need to have read the variables first 150 } else if (line.compare(0, 4, "vari")==0) { 151 ReadMode=VARIABLES; 152 //cout << "VARIABLES mode "<< ReadMode << endl; 153 if (tokens.size() == 2) factor=Rescale(tokens[1]); 154 //cout << "Factor now " << factor << endl; 155 } else if (line.compare(0, 4, "cons")==0) { 156 ReadMode=CONSTANTS; 157 //cout << "CONSTANTS mode\n"; 158 if (tokens.size() == 2) 159 factor=Rescale(tokens[1]); 160 //cout << "Factor now " << factor << endl; 161 */ 162 163 } else if (line.compare(0, 3, "end")==0) { 164 ReadMode=SKIP; 165 //cout << "SKIP mode " << ReadMode << endl; 166 } else { 167 if (ReadMode==SKIP) continue; 168 if (ReadMode==ZMATRIX) { 169 // Create an atom 170 OBAtom *atom = mol.NewAtom(); 171 // Read the ZMatrix definition line 172 if (! ReadLineZmatrix(mol,atom,tokens,factor,&zmatLineCount) ) 173 { 174 errorMsg << "Problems reading a GAMESS-UK Input file: " 175 << "Could not read zmat line: " << line; 176 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , 177 obWarning); 178 return (false); 179 } 180 181 } // End ReadMode ZMATRIX 182 183 if (ReadMode==CARTESIAN) { 184 OBAtom *atom = mol.NewAtom(); 185 if (! ReadLineCartesian(atom,tokens,factor) ) 186 { 187 errorMsg << "Problems reading a GAMESS-UK Input file: " 188 << "Could not read xyz line: " << line; 189 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , 190 obWarning); 191 return (false); 192 } 193 194 } // End ReadMode CARTESIAN 195 196 197 } // End Test for first chars on line 198 } // End loop over lines 199 200 201 if (ContainsZmatrix)InternalToCartesian(vic,mol); 202 mol.EndModify(); 203 204 return true; 205 } // End Read Geometry 206 IsUnits(string text)207 bool GAMESSUKFormat::IsUnits(string text) 208 { 209 /* See if the supplied string specifies a unit */ 210 211 if ( text.compare(0, 4, "angs")==0 || 212 text.compare(0, 4, "bohr")==0 || 213 text.compare(0, 4, "a.u.")==0 || 214 text.compare(0, 2, "au")==0) { 215 return true; 216 } else { 217 return false; 218 } 219 } 220 Rescale(string text)221 double GAMESSUKFormat::Rescale(string text) 222 { 223 /* Return the correct scale factor given a string identifying the units */ 224 225 if (! IsUnits(text) ){ 226 errorMsg << "Problems reading GUK input - bad scale factor: " << text; 227 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); 228 return -1.0; 229 } 230 231 if (text.compare(0, 4, "angs")==0) { 232 return 1.0; 233 } else if (text.compare(0, 4, "bohr")==0||text.compare(0, 4, "a.u.")==0 234 ||text.compare(0, 2, "au")==0) { 235 return BOHR_TO_ANGSTROM; 236 } else { 237 return -1.0; 238 } 239 } 240 LabelToAtomicNumber(string label)241 int GAMESSUKFormat::LabelToAtomicNumber(string label) 242 { 243 /* 244 * Given a string with the label for an atom return the atomic number 245 * As we are using the GetAtomicNum function case is not important 246 */ 247 248 // See if the first 2 characters give us a valid atomic # 249 int Z=OBElements::GetAtomicNum(label.substr(0,2).c_str()); 250 251 // If not try the first one 252 if (Z==0) Z=OBElements::GetAtomicNum(label.substr(0,1).c_str()); 253 254 if (Z==0){ 255 // Check if it's an x (dummy) atom 256 if( label.substr(0,1) != "x" && label.substr(0,1) != "X" ) 257 { 258 // Houston... 259 errorMsg << "LabelToAtomicNumber got bad Label: " << label << std::endl; 260 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); 261 } 262 } 263 return Z; 264 } 265 ReadLineCartesian(OBAtom * atom,vector<string> & tokens,double factor)266 bool GAMESSUKFormat::ReadLineCartesian(OBAtom *atom, vector<string> &tokens, double factor) 267 { 268 269 /* Read a line defining the Cartesian coordinates for an atom 270 * This assumes the line is formatted in GAMESS-UK input style as: 271 * x y z AtomicNumber Label 272 */ 273 274 bool ok=false; 275 int Z; 276 double x,y,z; 277 278 // 4th field is the atomic number 279 ok = from_string<int>(Z, tokens.at(3), std::dec); 280 atom->SetAtomicNum(Z); 281 282 // Read the atom coordinates 283 ok = from_string<double>(x, tokens.at(0), std::dec); 284 if ( ! ok) 285 { 286 // Can't convert to double so see if it's in the variables 287 if (variables.find(tokens[0])==variables.end()) return false; 288 x = variables[tokens[0]]; 289 } 290 291 ok = from_string<double>(y, tokens.at(1), std::dec); 292 if ( ! ok) 293 { 294 // Can't convert to double so see if it's in the variables 295 if (variables.find(tokens[1])==variables.end()) return false; 296 y = variables[tokens[1]]; 297 } 298 299 ok = from_string<double>(z, tokens.at(2), std::dec); 300 if ( ! ok) 301 { 302 // Can't convert to double so see if it's in the variables 303 if (variables.find(tokens[2])==variables.end()) return false; 304 z = variables[tokens[2]]; 305 } 306 307 // Convert to Angstroms 308 x=x*factor; 309 y=y*factor; 310 z=z*factor; 311 atom->SetVector(x, y, z); //set coordinates 312 return true; 313 } 314 ReadLineZmatrix(OBMol & mol,OBAtom * atom,vector<string> & tokens,double factor,int * zmatLineCount)315 bool GAMESSUKFormat::ReadLineZmatrix(OBMol &mol, OBAtom *atom, vector<string> &tokens, double factor, int *zmatLineCount) 316 { 317 /* 318 * Read a line from a GAMESS-UK input defining an atom in Inernal Coordinates. 319 * We create a list of OBInternalCoords that match the list of atoms in the molecule 320 * that are defined using internal coordinates 321 */ 322 323 double var; 324 bool ok=false; 325 int n; 326 327 vic.push_back(new OBInternalCoord); 328 atom->SetAtomicNum(LabelToAtomicNumber(tokens[0])); 329 330 switch (*zmatLineCount) { 331 case 0: 332 break; 333 334 case 1: 335 if (tokens.size() < 3) {return false;} 336 337 // Specify the atom that defines the distance to this one 338 ok = from_string<int>(n, tokens.at(1), std::dec); 339 vic[*zmatLineCount]->_a = mol.GetAtom(n); 340 341 // Get the distance 342 ok = from_string<double>(var, tokens.at(2), std::dec); 343 if ( !ok ) 344 { 345 // Can't convert to double so see if it's in the variables 346 if (variables.find(tokens[2])==variables.end()) return false; 347 var = variables[tokens[2]]; 348 } 349 vic[*zmatLineCount]->_dst = var; 350 break; 351 352 case 2: 353 if (tokens.size() < 5) {return false;} 354 355 // Specify the atom that defines the distance to this one 356 ok = from_string<int>(n, tokens.at(1), std::dec); 357 vic[*zmatLineCount]->_a = mol.GetAtom(n); 358 359 // Get the distance 360 ok = from_string<double>(var, tokens.at(2), std::dec); 361 if ( !ok ) 362 { 363 // Can't convert to double so see if it's in the variables 364 if (variables.find(tokens[2])==variables.end()) return false; 365 var = variables[tokens[2]]; 366 } 367 vic[*zmatLineCount]->_dst = var; 368 369 // Specify atom defining angle 370 ok = from_string<int>(n, tokens.at(3), std::dec); 371 vic[*zmatLineCount]->_b = mol.GetAtom(n); 372 // Get the angle 373 ok = from_string<double>(var, tokens.at(4), std::dec); 374 if ( !ok ) 375 { 376 // Can't convert to double so see if it's in the variables 377 if (variables.find(tokens[4])==variables.end()) return false; 378 var = variables[tokens[4]]; 379 } 380 vic[*zmatLineCount]->_ang = var; 381 break; 382 383 default: 384 if (tokens.size() < 7) {return false;} 385 386 ok = from_string<int>(n, tokens.at(1), std::dec); 387 vic[*zmatLineCount]->_a = mol.GetAtom(n); 388 // Get the distance 389 ok = from_string<double>(var, tokens.at(2), std::dec); 390 if ( !ok ) 391 { 392 // Can't convert to double so see if it's in the variables 393 if (variables.find(tokens[2])==variables.end()) return false; 394 var = variables[tokens[2]]; 395 } 396 vic[*zmatLineCount]->_dst = var; 397 398 ok = from_string<int>(n, tokens.at(3), std::dec); 399 vic[*zmatLineCount]->_b = mol.GetAtom(n); 400 // Get the angle 401 ok = from_string<double>(var, tokens.at(4), std::dec); 402 if ( !ok ) 403 { 404 // Can't convert to double so see if it's in the variables 405 if (variables.find(tokens[4])==variables.end()) return false; 406 var = variables[tokens[4]]; 407 } 408 vic[*zmatLineCount]->_ang = var; 409 410 ok = from_string<int>(n, tokens.at(5), std::dec); 411 vic[*zmatLineCount]->_c = mol.GetAtom(n); 412 // Get the torsion angle 413 ok = from_string<double>(var, tokens.at(6), std::dec); 414 if ( !ok ) 415 { 416 // Can't convert to double so see if it's in the variables 417 if (variables.find(tokens[6])==variables.end()) return false; 418 var = variables[tokens[6]]; 419 } 420 vic[*zmatLineCount]->_tor = var; 421 } 422 423 (*zmatLineCount)++; 424 return true; 425 } 426 ReadVariables(istream & ifs,double factor,string stopstr)427 bool GAMESSUKFormat::ReadVariables(istream &ifs, double factor, string stopstr) 428 { 429 /* 430 * This takes an input stream that is positioned where the list of variables 431 * starts and the reads the variables into the supplied map 432 * 433 * This is different to ReadGeometry (which takes a vector of strings as input) because 434 * currently the variables always need to be read after the geometry, so we need to save the 435 * geometry and then read the variables. However this means that we can parse the variables 436 * directly into a map and don't need to keep a copy of the specifcation as strings. 437 * 438 * stopstr is a string that defines when we stop reading 439 */ 440 441 string line; 442 vector<string> tokens; 443 bool ok=false; 444 double var; 445 446 // Now read in all the varibles 447 while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)) { 448 449 // Skip commnents 450 if (EQn(buffer, "#", 1) || EQn(buffer, "?", 1)) 451 continue; 452 453 // Copy line to a C++ string and convert to lower case 454 // & remove leading and trailing spaces 455 line = buffer; 456 // transform(method.begin(), method.end(), method.begin(), ::tolower); 457 ToLower(line); 458 Trim(line); 459 460 // Check for end of variables 461 if (line.length()==0 && stopstr.length()==0) break; 462 if (stopstr.length()>0 && line.compare(0, stopstr.length(), stopstr)==0) break; 463 464 // Check for commas & split with that as the separator if necessary 465 if (line.find(',')!=string::npos) { 466 tokenize(tokens, line, ","); 467 } else { 468 tokenize(tokens, line, " \t\n"); 469 } 470 471 ok = from_string<double>(var, tokens.at(3), std::dec); 472 if ( !ok ) 473 { 474 errorMsg << "Problems reading a GAMESS-UK file: " 475 << "Could not read variable line: " << line; 476 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning); 477 return false; 478 } 479 // Add to list of variables 480 variables[tokens[0]]=var*factor; 481 } 482 483 /* 484 cerr << "Got list of variables: " << endl; 485 for (map<string,double>::iterator i=variables.begin(); i 486 != variables.end(); i++) { 487 cerr << "Name: " << i->first << " Value: " << i->second << endl; 488 } 489 */ 490 491 return true; 492 493 } // end Read Variables 494 495 496 class GAMESSUKInputFormat : public OBMoleculeFormat, public GAMESSUKFormat 497 { 498 public: 499 //Register this format type ID GAMESSUKInputFormat()500 GAMESSUKInputFormat() 501 { 502 OBConversion::RegisterFormat("gukin",this, "chemical/x-gamess-input"); 503 // Command-line keywords 504 //OBConversion::RegisterOptionParam("k", NULL, 1, OBConversion::OUTOPTIONS); 505 // Command-line keyword file 506 //OBConversion::RegisterOptionParam("f", NULL, 1, OBConversion::OUTOPTIONS); 507 } 508 509 Description()510 virtual const char* Description() //required 511 { 512 return 513 "GAMESS-UK Input\n"; 514 }; 515 SpecificationURL()516 virtual const char* SpecificationURL() 517 {return "http://www.cfs.dl.ac.uk";}; //optional 518 GetMIMEType()519 virtual const char* GetMIMEType() 520 { return "chemical/x-gamessuk-input"; }; 521 522 //Flags() can return be any the following combined by | or be omitted if none apply 523 // NOTREADABLE READONEONLY NOTWRITABLE WRITEONEONLY Flags()524 virtual unsigned int Flags() 525 { 526 return READONEONLY; // | NOTREADABLE; 527 }; 528 529 //////////////////////////////////////////////////// 530 /// The "API" interface functions 531 virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv); 532 virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv); 533 }; 534 535 //Make an instance of the format class 536 GAMESSUKInputFormat theGAMESSUKInputFormat; 537 538 ReadMolecule(OBBase * pOb,OBConversion * pConv)539 bool GAMESSUKInputFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) 540 541 { 542 /* 543 * Stuff to think about: 544 * - At outset check whether we are in zmatrix, cartesian or nw-chem format 545 * (we currently only suppot homogeneous formats - not mixed). 546 * 547 * For each line need to check: 548 * - Is this a comment (?,#) 549 * - Are the tokens separated by commas, if so use tokenize to split at commas 550 * - Is there an 'end' token on the line 551 * 552 * For each line we want to check that we haven't hit a change from c to zm or vv 553 * 554 */ 555 556 OBMol* pmol = dynamic_cast<OBMol*>(pOb); 557 if (pmol == nullptr) 558 return false; 559 560 //Define some references so we can use the old parameter names 561 istream& ifs = *pConv->GetInStream(); 562 OBMol &mol = *pmol; 563 564 // Get a default title as the filename 565 const char* title = pConv->GetTitle(); 566 mol.BeginModify(); 567 mol.SetTitle(title); 568 mol.EndModify(); 569 570 vector<string> geomList, tokens; // list of lines and list of tokens on a line 571 string line; // For convenience so we can refer to lines from the iterator as 'line' 572 ReadMode_t ReadMode=SKIP; 573 double factor=BOHR_TO_ANGSTROM; 574 575 // Read File and copy geometry specification into geomList 576 while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)) { 577 578 // Skip commnents 579 if (EQn(buffer, "#", 1) || EQn(buffer, "?", 1)) continue; 580 581 // Copy line to a C++ string and convert to lower case 582 // & remove leading and trailing spaces 583 line = buffer; 584 // transform(method.begin(), method.end(), method.begin(), ::tolower); 585 ToLower(line); 586 Trim(line); 587 588 // Start of coordinate specifiation 589 if (line.compare(0, 4, "zmat")==0) 590 { 591 ReadMode=ZMATRIX; 592 geomList.push_back(line); 593 continue; 594 } 595 else if (line.compare(0, 4, "geom")==0) 596 { 597 ReadMode=CARTESIAN; 598 geomList.push_back(line); 599 continue; 600 } 601 602 // Reading the coordinate specification into the list 603 if (ReadMode==ZMATRIX || ReadMode==CARTESIAN) 604 { 605 606 // Variables specification - process directly from filestream 607 // and then remove from the geometry specification 608 if (line.compare(0, 4, "vari")==0 || line.compare(0, 4, "const")==0) 609 { 610 611 // Check for commas & split with that as the separator if necessary 612 if (line.find(',')!=string::npos) 613 tokenize(tokens, line, ","); 614 else 615 tokenize(tokens, line, " \t\n"); 616 617 // See if we need to rescale 618 if (IsUnits(tokens[1])) factor=Rescale(tokens[1]); 619 620 if (! ReadVariables(ifs, factor, "end")) return false; 621 622 ReadMode=SKIP; 623 geomList.push_back("end\n"); 624 continue; 625 } 626 627 if (line.compare(0, 3, "end")==0) ReadMode=SKIP; 628 629 geomList.push_back(line); 630 } 631 632 }// End while reading loop 633 634 // Now go and process the coordinate specification if we got any 635 bool ok = ReadGeometry(mol, geomList); 636 637 if (mol.NumAtoms() == 0) { // e.g., if we're at the end of a file PR#1737209 638 mol.EndModify(); 639 return false; 640 } else { 641 if (!pConv->IsOption("b",OBConversion::INOPTIONS)) 642 mol.ConnectTheDots(); 643 if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS)) 644 mol.PerceiveBondOrders(); 645 return ok; 646 } 647 648 } // End ReadMolecule 649 650 //////////////////////////////////////////////////////////////// 651 WriteMolecule(OBBase * pOb,OBConversion * pConv)652 bool GAMESSUKInputFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) 653 { 654 OBMol* pmol = dynamic_cast<OBMol*>(pOb); 655 if (pmol == nullptr) 656 return false; 657 658 //Define some references so we can use the old parameter names 659 ostream &ofs = *pConv->GetOutStream(); 660 OBMol &mol = *pmol; 661 662 char buffer[BUFF_SIZE]; 663 664 ofs << "title" << endl; 665 ofs << mol.GetTitle() << endl << endl; 666 667 ofs << "#" << endl; 668 ofs << "# NB: Class I directives (e.g. memory, multiplicity, charge etc) go here" << endl; 669 ofs << "#" << endl; 670 ofs << "# For more information see: http://www.cfs.dl.ac.uk/docs/index.shtml" << endl; 671 ofs << "#" << endl; 672 ofs << endl; 673 674 ofs << "geometry angstrom" << endl; 675 FOR_ATOMS_OF_MOL(atom, mol) 676 { 677 snprintf(buffer, BUFF_SIZE, "%15.8f %15.8f %15.8f %3d %3s\n", 678 atom->GetX(), 679 atom->GetY(), 680 atom->GetZ(), 681 atom->GetAtomicNum(), 682 OBElements::GetSymbol(atom->GetAtomicNum()) 683 ); 684 ofs << buffer; 685 } 686 ofs << "end" << endl << endl; 687 688 ofs << endl; 689 ofs << "basis 6-31G" << endl; 690 ofs << endl; 691 692 ofs << "#" << endl; 693 ofs << "# NB: Class II directives go here" << endl; 694 ofs << "#" << endl; 695 ofs << "# To perform a dft calculation with b3lyp and medium quadrature uncomment the below" << endl; 696 ofs << "# dft b3lyp" << endl; 697 ofs << "# dft quadrature medium" << endl; 698 ofs << "#" << endl; 699 ofs << endl; 700 701 ofs << "runtype scf" << endl; 702 ofs << endl; 703 ofs << "enter" << endl; 704 705 return(true); 706 } //End WriteMolecule 707 708 709 class GAMESSUKOutputFormat : public OBMoleculeFormat, public GAMESSUKFormat 710 { 711 public: 712 //Register this format type ID GAMESSUKOutputFormat()713 GAMESSUKOutputFormat() 714 { OBConversion::RegisterFormat("gukout",this, "chemical/x-gamess-output"); } 715 Description()716 virtual const char* Description() //required 717 { return "GAMESS-UK Output\n"; }; 718 SpecificationURL()719 virtual const char* SpecificationURL() 720 {return "http://www.cfs.dl.ac.uk";}; //optional 721 GetMIMEType()722 virtual const char* GetMIMEType() 723 { return "chemical/x-gamessuk-output"; }; 724 725 //////////////////////////////////////////////////// 726 /// The "API" interface functions 727 virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv); 728 729 private: 730 enum RunType_t { UNKNOWN, SINGLEPOINT, OPTXYZ, OPTZMAT, SADDLE, FREQUENCIES }; 731 vector<string> tokens, geomList; // list of lines and list of tokens on a line 732 string line; // For convenience so we can refer to lines from the iterator as 'line' 733 bool ReadInputZmatrix( OBMol &mol, std::istream &ifs ); 734 bool ReadInitialCartesian( OBMol &mol, std::istream &ifs ); 735 bool ReadOptGeomXyz1( OBMol &mol, std::istream &ifs ); 736 bool ReadOptGeomXyz2( OBMol &mol, std::istream &ifs ); 737 bool ReadNormalModesHessian( OBMol &mol, std::istream &ifs); 738 bool ReadNormalModesForce( OBMol &mol, std::istream &ifs); 739 }; 740 741 //Make an instance of the format class 742 GAMESSUKOutputFormat theGAMESSUKOutputFormat; 743 ReadInputZmatrix(OBMol & mol,std::istream & ifs)744 bool GAMESSUKOutputFormat::ReadInputZmatrix( OBMol &mol, std::istream &ifs ) 745 { 746 /* The zmatrix entered by the user 747 * REM: need to add stuff for "automatic z-matrix generation" as we currently 748 * ignore the zmatrix & just read the cartesian coordinates 749 */ 750 geomList.clear(); 751 752 // skip 2 lines 753 ifs.getline(buffer, BUFF_SIZE) && ifs.getline(buffer, BUFF_SIZE); 754 755 // Stick a header line first 756 geomList.push_back("zmatrix bohr"); 757 758 // Read zmatrix into list until blank line 759 while (ifs.good() && ifs.getline(buffer, BUFF_SIZE) && strlen(buffer) != 0) 760 { 761 line = buffer; 762 // transform(method.begin(), method.end(), method.begin(), ::tolower); 763 ToLower(line); 764 Trim(line); 765 geomList.push_back(line); 766 } 767 768 // Skip 2 lines 769 ifs.getline(buffer, BUFF_SIZE); 770 ifs.getline(buffer, BUFF_SIZE); 771 772 // Check if line is variables line 773 if (strstr(buffer,"name input type hessian minima") != nullptr) 774 { 775 // Skip additional line to be where variables are printed 776 ifs.getline(buffer, BUFF_SIZE); 777 // Read in the variables till we hit blank line 778 if (! ReadVariables(ifs, 1.0, "")) return false; 779 } 780 781 // Now go and process the geometry 782 return ReadGeometry(mol, geomList); 783 } // ReadInputZmatrix 784 ReadInitialCartesian(OBMol & mol,std::istream & ifs)785 bool GAMESSUKOutputFormat::ReadInitialCartesian( OBMol &mol, std::istream &ifs ) 786 { 787 bool ok=false; 788 double x,y,z; 789 int n; 790 791 // Skip 3 lines 792 ifs.getline(buffer, BUFF_SIZE) && 793 ifs.getline(buffer, BUFF_SIZE) && 794 ifs.getline(buffer, BUFF_SIZE); 795 796 // Create regex for the coords 797 // ------label-------- -------charge-------- < seems enough for a match 798 string pattern(" *\\* *[a-zA-Z]{1,2}[0-9]* *[0-9]{1,3}\\.[0-9]{1}"); 799 bool iok; 800 #ifdef _MSC_VER 801 std::tr1::regex myregex; 802 try { 803 myregex.assign(pattern, 804 std::tr1::regex_constants::extended | 805 std::tr1::regex_constants::nosubs); 806 iok = true; 807 } catch (std::tr1::regex_error ex) { 808 iok = false; 809 } 810 #else 811 regex_t *myregex = new regex_t; 812 iok = regcomp(myregex, pattern.c_str(), REG_EXTENDED | REG_NOSUB)==0; 813 #endif 814 if (!iok) cerr << "Error compiling regex in GUK OUTPUT!\n"; 815 816 // Read in the coordinates - we process them directly rather 817 // then use ReadGeometry as we probably should do... 818 mol.BeginModify(); 819 while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)){ 820 821 // End of geometry block 822 if (strstr(buffer, "*************************") != nullptr) break; 823 #ifdef _MSC_VER 824 if (std::tr1::regex_search(buffer, myregex)) { 825 #else 826 if (regexec(myregex, buffer, 0, nullptr, 0) == 0) { 827 #endif 828 //cerr << "Got Coord line: " << buffer << endl; 829 OBAtom *atom = mol.NewAtom(); 830 tokenize(tokens,buffer," "); 831 832 ok = from_string<int>(n, tokens.at(2), std::dec); 833 atom->SetAtomicNum(n); 834 ok = from_string<double>(x, tokens.at(3), std::dec); 835 x=x*BOHR_TO_ANGSTROM; 836 ok = from_string<double>(y, tokens.at(4), std::dec); 837 y=y*BOHR_TO_ANGSTROM; 838 ok = from_string<double>(z, tokens.at(5), std::dec); 839 z=z*BOHR_TO_ANGSTROM; 840 atom->SetVector(x, y, z); 841 } 842 } 843 mol.EndModify(); 844 #ifndef _MSC_VER 845 regfree(myregex); 846 #endif 847 return true; 848 } // End ReadInitalCartesian 849 850 851 bool GAMESSUKOutputFormat::ReadOptGeomXyz1( OBMol &mol, std::istream &ifs ) 852 { 853 bool ok=false; 854 double x,y,z; 855 int n; 856 857 // Clear the Molecule as we're going to start from scratch again. 858 mol.BeginModify(); 859 mol.Clear(); 860 861 // FF to start of coordinate specification 862 while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)) { 863 if (strstr(buffer, 864 "atom znuc x y z") != nullptr) break; 865 } 866 867 // Skip 2 lines - should then be at the coordinates 868 ifs.getline(buffer, BUFF_SIZE) && 869 ifs.getline(buffer, BUFF_SIZE); 870 871 // Read in the coordinates - we process them directly rather 872 // then use ReadGeometry as we probably should do... 873 while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)){ 874 875 // End of geometry block 876 if (strstr(buffer, "*************************") != nullptr) break; 877 878 //cerr << "Got Coord line: " << buffer << endl; 879 OBAtom *atom = mol.NewAtom(); 880 tokenize(tokens,buffer," "); 881 882 ok = from_string<int>(n, tokens.at(2), std::dec); 883 atom->SetAtomicNum(n); 884 ok = from_string<double>(x, tokens.at(3), std::dec); 885 x=x*BOHR_TO_ANGSTROM; 886 ok = from_string<double>(y, tokens.at(4), std::dec); 887 y=y*BOHR_TO_ANGSTROM; 888 ok = from_string<double>(z, tokens.at(5), std::dec); 889 z=z*BOHR_TO_ANGSTROM; 890 atom->SetVector(x, y, z); 891 892 } 893 894 mol.EndModify(); 895 return true; 896 } // End ReadOptGeomXyz 897 898 bool GAMESSUKOutputFormat::ReadOptGeomXyz2( OBMol &mol, std::istream &ifs ) 899 { 900 bool ok=false; 901 double x,y,z; 902 int n; 903 904 // Clear the Molecule as we're going to start from scratch again. 905 mol.BeginModify(); 906 mol.Clear(); 907 908 while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)) { 909 if (strstr(buffer, 910 " x y z chg tag") != nullptr) break; 911 } 912 913 // Skip 1 line - should then be at the coordinates 914 ifs.getline(buffer, BUFF_SIZE); 915 916 // Read in the coordinates - we process them directly rather 917 // then use ReadGeometry as we probably should do... 918 while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)){ 919 920 // End of geometry block 921 if (strstr(buffer, "============================================================") != nullptr) break; 922 923 //cerr << "Got Coord line: " << buffer << endl; 924 OBAtom *atom = mol.NewAtom(); 925 tokenize(tokens,buffer," "); 926 927 ok = from_string<int>(n, tokens.at(3), std::dec); 928 atom->SetAtomicNum(n); 929 ok = from_string<double>(x, tokens.at(0), std::dec); 930 x=x*BOHR_TO_ANGSTROM; 931 ok = from_string<double>(y, tokens.at(1), std::dec); 932 y=y*BOHR_TO_ANGSTROM; 933 ok = from_string<double>(z, tokens.at(2), std::dec); 934 z=z*BOHR_TO_ANGSTROM; 935 atom->SetVector(x, y, z); 936 } 937 938 mol.EndModify(); 939 return true; 940 941 } // End ReadOptGeomZmat 942 943 bool GAMESSUKOutputFormat::ReadNormalModesHessian( OBMol &mol, std::istream &ifs) 944 { 945 946 bool ok=false; 947 double dtmp,dtmp2; 948 949 int ncols = 8; // Think this is always the case 950 int natoms = mol.NumAtoms(); 951 int maxroot = natoms*3; 952 953 // Create data structures 954 std::vector< double > frequencies, intensities; 955 std::vector< std::vector< vector3 > > Lx; 956 957 // Set up data structures with null data 958 for( int i=0; i<maxroot; i++ ) 959 { 960 std::vector< vector3 > atoml; 961 for( int j=0; j < natoms; j++ ) 962 { 963 atoml.push_back( vector3(0.0,0.0,0.0) ); 964 } 965 Lx.push_back( atoml ); 966 } 967 968 ifs.getline(buffer, BUFF_SIZE); // skip "===============" line 969 970 int root7; 971 for ( int root1=0; root1 < maxroot; root1+=ncols ) 972 { 973 root7 = root1 + ncols; 974 root7 = min(maxroot,root7); 975 976 //Skip 6 lines to col header with frequencies 977 for( int j=0; j < 6; j++ ) 978 ifs.getline(buffer, BUFF_SIZE); 979 980 tokenize(tokens,buffer," \t\n"); 981 for( std::size_t si=0; si < tokens.size(); si++ ) 982 { 983 ok = from_string<double>(dtmp, tokens.at(si), std::dec); 984 frequencies.push_back(dtmp); 985 intensities.push_back( 0.0 ); // Add placeholder data 986 } 987 988 // Skip 2 lines to where data matrix starts 989 ifs.getline(buffer, BUFF_SIZE); 990 ifs.getline(buffer, BUFF_SIZE); 991 992 int mycols=root7-root1; 993 // Loop over atoms & the x,y,z 994 int atomcount=0; 995 for ( int i=0; i<maxroot; i+=3 ) 996 { 997 for ( int j=0; j<3; j++ ) 998 { 999 ifs.getline(buffer, BUFF_SIZE); 1000 //std::cout << "GOT LINE:" << buffer <<std::endl; 1001 tokenize(tokens,buffer," \t\n"); 1002 int start=1; 1003 if ( j == 0 ) 1004 start=3; 1005 for ( int k=0; k<mycols; k++ ) 1006 { 1007 //std::cout << "Lx[ " << root1+k << " ]" << 1008 // "][ " << atomcount << " ] [ " << j << " ] = " << tokens.at(start+k) << std::endl; 1009 ok = from_string<double>(dtmp, tokens.at(start+k), std::dec); 1010 if ( j==0) 1011 Lx[ root1+k ][ atomcount ].SetX( dtmp ); 1012 else if ( j==1 ) 1013 Lx[ root1+k ][ atomcount ].SetY( dtmp ); 1014 else if ( j==2 ) 1015 Lx[ root1+k ][ atomcount ].SetZ( dtmp ); 1016 } 1017 } // End j loop 1018 atomcount+=1; 1019 } // End loop over atoms 1020 } // loop over root1 1021 1022 // Now skip down to read in intensities 1023 for( int i=0; i<7; i++ ) 1024 { 1025 ifs.getline(buffer, BUFF_SIZE); 1026 } 1027 1028 // loop until we've read them all in 1029 for( std::size_t si=0; si<frequencies.size(); si++ ) 1030 { 1031 ifs.getline(buffer, BUFF_SIZE); 1032 // End of info 1033 if (strstr(buffer, "============") != nullptr) break; 1034 tokenize(tokens,buffer," \t\n"); 1035 1036 ok = from_string<double>(dtmp, tokens.at(1), std::dec); 1037 ok = from_string<double>(dtmp2, tokens.at(6), std::dec); 1038 // Now match them up 1039 for( std::size_t sj=0; sj<frequencies.size(); sj++ ) 1040 { 1041 if ( std::abs( frequencies.at(sj) - dtmp ) < 0.01 ) 1042 { 1043 intensities[sj]= dtmp2; 1044 continue; 1045 } 1046 } 1047 } 1048 1049 //for (int i=0; i< frequencies.size(); i++ ) 1050 // std::cout << "Frequency: " << frequencies.at(i) << " : " << intensities.at(i) << std::endl; 1051 1052 if(frequencies.size()>0) 1053 { 1054 OBVibrationData* vd = new OBVibrationData; 1055 vd->SetData(Lx, frequencies, intensities); 1056 vd->SetOrigin(fileformatInput); 1057 mol.SetData(vd); 1058 } 1059 return ok; 1060 } // End ReadNormalModesHessian 1061 1062 bool GAMESSUKOutputFormat::ReadNormalModesForce( OBMol &mol, std::istream &ifs) 1063 { 1064 1065 bool ok=false; 1066 double dtmp; 1067 1068 int ncols = 9; // Think this is always the case 1069 int natoms = mol.NumAtoms(); 1070 int maxroot = natoms*3; 1071 int start,mycols; 1072 1073 // Create data structures 1074 std::vector< double > frequencies, intensities; 1075 std::vector< std::vector< vector3 > > Lx; 1076 1077 // Set up data structures with null data 1078 for( int i=0; i<maxroot; i++ ) 1079 { 1080 std::vector< vector3 > atoml; 1081 for( int j=0; j < natoms; j++ ) 1082 { 1083 atoml.push_back( vector3(0.0,0.0,0.0) ); 1084 } 1085 Lx.push_back( atoml ); 1086 } 1087 1088 ifs.getline(buffer, BUFF_SIZE); // skip "===============" line 1089 1090 int root7; 1091 for ( int root1=0; root1 < maxroot; root1+=ncols ) 1092 { 1093 root7 = root1 + ncols; 1094 root7 = min(maxroot,root7); 1095 mycols=root7-root1; 1096 1097 //Skip 6 lines to col header with frequencies 1098 for( int j=0; j < 6; j++ ) 1099 ifs.getline(buffer, BUFF_SIZE); 1100 1101 line=buffer; 1102 // Need to manually chop up line 1103 start=20; // Numbers start at col 20 & are 11 characters long 1104 for( int i=0; i<mycols; i++) 1105 { 1106 ok = from_string<double>(dtmp, line.substr(start,12), std::dec); 1107 frequencies.push_back(dtmp); 1108 intensities.push_back( 10.0 ); // Intensities aren't printed so just use 10 1109 start+=12; 1110 } 1111 1112 // Skip 2 lines to where data matrix starts 1113 ifs.getline(buffer, BUFF_SIZE); 1114 ifs.getline(buffer, BUFF_SIZE); 1115 1116 // Loop over atoms & the x,y,z 1117 int atomcount=0; 1118 for ( int i=0; i<maxroot; i+=3 ) 1119 { 1120 //for j in range(3): 1121 for ( int j=0; j<3; j++ ) 1122 { 1123 ifs.getline(buffer, BUFF_SIZE); 1124 //std::cout << "GOT LINE:" << buffer <<std::endl; 1125 tokenize(tokens,buffer," \t\n"); 1126 start=1; 1127 if ( j == 0 ) 1128 start=3; 1129 for ( int k=0; k<mycols; k++ ) 1130 { 1131 // std::cout << "Lx[ " << root1+k << " ]" << 1132 // "][ " << atomcount << " ] [ " << j << " ] = " << tokens.at(start+k) << std::endl; 1133 ok = from_string<double>(dtmp, tokens.at(start+k), std::dec); 1134 if ( j==0) 1135 Lx[ root1+k ][ atomcount ].SetX( dtmp ); 1136 else if ( j==1 ) 1137 Lx[ root1+k ][ atomcount ].SetY( dtmp ); 1138 else if ( j==2 ) 1139 Lx[ root1+k ][ atomcount ].SetZ( dtmp ); 1140 } 1141 } // End j loop 1142 atomcount+=1; 1143 } // End loop over atoms 1144 } // loop over root1 1145 1146 1147 //for (int i=0; i< frequencies.size(); i++ ) 1148 // std::cout << "Frequency: " << frequencies.at(i) << " : " << intensities.at(i) << std::endl; 1149 1150 if(frequencies.size()>0) 1151 { 1152 OBVibrationData* vd = new OBVibrationData; 1153 vd->SetData(Lx, frequencies, intensities); 1154 vd->SetOrigin(fileformatInput); 1155 mol.SetData(vd); 1156 } 1157 return ok; 1158 } // End ReadNormalModesForce 1159 1160 /* 1161 bool GAMESSUKOutputFormat::ReadOptGeomZmat( OBMol &mol, std::istream &ifs ) 1162 { 1163 1164 //Below was for reading in zmatricies - ignore for the time being 1165 1166 // Original geometry specification should still be in geomList 1167 // So just update the variables 1168 //cerr << "Got converged for OPTZMAT\n"; 1169 1170 // FF to variable specification 1171 while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)) { 1172 if (strstr(buffer, 1173 " variable value hessian") != NULL) break; 1174 } 1175 // Skip a line - should then be at variable specification 1176 ifs.getline(buffer, BUFF_SIZE); 1177 1178 // Process them 1179 if (! ReadVariables(ifs, BOHR_TO_ANGSTROM, 1180 "===============================================")) return false; 1181 1182 // Now go and process with the geometry we read before 1183 return ReadGeometry(mol, geomList); 1184 1185 } //ReadOptGeomZmat 1186 */ 1187 1188 bool GAMESSUKOutputFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) { 1189 1190 /* 1191 Read a GAMESS-UK output file. The reader is currently set up to only return one molecule, i.e 1192 if the run is some sort of optimisation run, then only the optimised geometry is returned. 1193 1194 Previously we parsed in the z-matrix - and the code to do that is still here and should work. 1195 However, the zmatrix is not actually used anywhere in OpenBabel currently - it's just converted 1196 to Cartesians, and this code appears to be buggy, so for the time being we just stick to reading 1197 Cartesians. 1198 */ 1199 1200 OBMol *pmol = dynamic_cast<OBMol*>(pOb); 1201 if (pmol == nullptr) 1202 return false; 1203 1204 //Define some references so we can use the old parameter names 1205 istream& ifs = *pConv->GetInStream(); 1206 OBMol &mol = *pmol; 1207 1208 // Get a default title as the filename 1209 const char* title = pConv->GetTitle(); 1210 mol.BeginModify(); 1211 mol.SetTitle(title); 1212 mol.EndModify(); 1213 1214 RunType_t RunType=UNKNOWN; 1215 bool ok; 1216 std::string runt; 1217 1218 while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)) 1219 { 1220 1221 if (strstr(buffer, " input z-matrix") != nullptr) 1222 { 1223 /* OpenBabel's handling of zmatricies is currently too buggy and the zmatrix 1224 * read in isn't currently used - it's just converted to cartesians, so we 1225 * can skip this for the time being 1226 */ 1227 continue; 1228 /* 1229 ok = ReadInputZmatrix( mol, ifs ); 1230 // Set Runtype to SINGLEPOINT so we don't read in the cartesians 1231 RunType=SINGLEPOINT; 1232 */ 1233 } // End Reading user z-matrix 1234 1235 // Read the cartesian coordinates if we've not read in the ZMATRIX 1236 if (strstr(buffer, "* charge x y z shells") != nullptr && 1237 RunType==UNKNOWN) 1238 ok = ReadInitialCartesian( mol, ifs ); 1239 1240 // Determine the RunType - affects how we move on from here. 1241 if (strstr(buffer, " * RUN TYPE") != nullptr) 1242 { 1243 tokenize(tokens,buffer," \t\n"); 1244 runt=tokens[3].substr(0,5); 1245 if(runt=="optxy") RunType=OPTXYZ; 1246 else if (runt=="optim") RunType=OPTZMAT; 1247 else if (runt=="saddl") RunType=SADDLE; 1248 continue; 1249 } // End RUNTYPE 1250 1251 // Read the optimised geometry 1252 if (strstr(buffer, "optimization converged") != nullptr) 1253 { 1254 if (RunType==OPTXYZ) 1255 ok = ReadOptGeomXyz1( mol, ifs ); 1256 else if (RunType==OPTZMAT || RunType==SADDLE) 1257 ok = ReadOptGeomXyz2( mol, ifs ); 1258 } // End read optimised geometry 1259 1260 // Frequencies for runtype hessian 1261 if (strstr(buffer, "cartesians to normal") != nullptr) 1262 ok = ReadNormalModesHessian( mol, ifs); 1263 1264 // Frequencies for runtype force 1265 if (strstr(buffer, "eigenvectors of cartesian") != nullptr) 1266 ok = ReadNormalModesForce( mol, ifs); 1267 1268 } // End Reading loop 1269 1270 if (mol.NumAtoms() == 0) { // Something went wrong 1271 mol.EndModify(); 1272 return false; 1273 } else { 1274 mol.BeginModify(); 1275 if (!pConv->IsOption("b",OBConversion::INOPTIONS)) 1276 mol.ConnectTheDots(); 1277 if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS)) 1278 mol.PerceiveBondOrders(); 1279 mol.EndModify(); 1280 return true; 1281 } 1282 1283 } // End GAMESSUKOutputFormat::ReadMolecule 1284 1285 1286 } //namespace OpenBabel 1287