1 /**********************************************************************
2   (C) 2008-2010 by Jens Thomas
3 
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation version 2 of the License.
7 
8   This program is distributed in the hope that it will be useful,
9   but WITHOUT ANY WARRANTY; without even the implied warranty of
10   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11   GNU General Public License for more details.
12 ***********************************************************************/
13 #include <openbabel/babelconfig.h>
14 #include <openbabel/obmolecformat.h>
15 #include <openbabel/mol.h>
16 #include <openbabel/atom.h>
17 #include <openbabel/bond.h>
18 #include <openbabel/obiter.h>
19 #include <openbabel/elements.h>
20 #include <openbabel/generic.h>
21 #include <openbabel/internalcoord.h>
22 
23 
24 #include <algorithm>
25 #include <cmath>
26 
27 #ifdef _MSC_VER
28 #include <regex>
29 #else
30 #include <regex.h>
31 #endif
32 
33 using namespace std;
34 
35 namespace OpenBabel
36 {
37 #define BOHR_TO_ANGSTROM 0.529177249
38 #define ANGSTROM_TO_BOHR 1.889725989
39 
40   class GAMESSUKFormat
41   {
42     /* Base class for GAMESS-UK readers with various utility functions that are used by
43      * both input and output readers.
44      *
45      * The most important is ReadGeometry, which takes a list of strings defining the
46      * geometry (in the style of a GAMESS-UK input deck) and creates the OBMol from it.
47      * This routine supports both Zmatrix and Cartesian formats, although currently not
48      * mixed decks.
49      */
50 
51   public:
52     bool ReadGeometry(OBMol &mol, vector<string> &geomList);
53     bool ReadVariables(istream &ifs, double factor, string stopstr);
54     bool ReadLineCartesian(OBAtom *atom, vector<string> &tokens, double factor);
55     bool ReadLineZmatrix(OBMol &mol, OBAtom *atom, vector<string> &tokens, double factor, int *zmatLineCount);
56     double Rescale(string text);
57     bool IsUnits(string text);
58     /**
59      * Converts a string to a numerical type
60      * This purloined from: http://www.codeguru.com/forum/showthread.php?t=231054
61      */
62     template <class T>
from_string(T & t,const std::string & s,std::ios_base & (* f)(std::ios_base &))63     bool from_string(T& t, const std::string& s,
64                      std::ios_base& (*f)(std::ios_base&))
65     {
66       std::istringstream iss(s);
67       return !(iss >> f >> t).fail();
68     }
69 
70     // Variables
71     enum ReadMode_t {CARTESIAN, ZMATRIX, VARIABLES, CONSTANTS, SKIP};
72     ReadMode_t ReadMode;
73     char buffer[BUFF_SIZE];
74     stringstream errorMsg;
75 
76   private:
77     map<string, double> variables; // map from variable name to value
78     vector<OBInternalCoord*> vic; // Holds lists of internal coordinates
79     int LabelToAtomicNumber(string label);
80   };
81 
82 
ReadGeometry(OBMol & mol,vector<string> & geomList)83   bool GAMESSUKFormat::ReadGeometry(OBMol &mol, vector<string> &geomList)
84   {
85 
86     /* Read a geometry from a list. Any variables that appear in the geometry need
87      * to be in the variables map that should have been populated before this is called.
88      */
89 
90     if (geomList.size()==0){
91       obErrorLog.ThrowError(__FUNCTION__,
92                             "Problems reading a GAMESS-UK Input file: ReadGeometry got empty list",
93                             obWarning);
94       return false;
95     }
96 
97     vector<string> tokens; // list of lines and list of tokens on a line
98     string line; // For convenience so we can refer to lines from the iterator as 'line'
99     double factor=BOHR_TO_ANGSTROM; // The coordinate conversion factor for handling bohr/angstrom issues
100 
101     mol.BeginModify();
102     // Clear out any existing information
103     mol.Clear();
104     vic.clear();
105 
106     ReadMode=SKIP;
107     bool ContainsZmatrix=false;
108     int zmatLineCount=0;
109 
110     /*
111       cerr << "ReadGeometry got geometry list: \n";
112       for (vector<string>::iterator i=geomList.begin(); i !=geomList.end(); i++) {
113 
114       // Alias the line
115       line = *i;
116       cerr << "line: " << line << endl;
117       }
118     */
119 
120     for (vector<string>::iterator i=geomList.begin(); i !=geomList.end(); ++i) {
121 
122       // Alias the line
123       line = *i;
124 
125       //cerr << "ReadGeometry line is: " << line << endl;
126 
127       // Check for commas & split with that as the separator if necessary
128       if (line.find(',')!=string::npos) {
129         tokenize(tokens, line, ",");
130       } else {
131         tokenize(tokens, line, " \t\n");
132       }
133 
134 
135       // Set the mode
136       if (line.compare(0, 4, "zmat")==0 || line.compare(0, 4, "inte")==0) {
137         ReadMode=ZMATRIX;
138         //cout << "ZMATRIX mode " << ReadMode << endl;
139         //cout << "tokens.size()" << tokens.size() << endl;
140         if (tokens.size()>1) if (IsUnits(tokens[1])) factor=Rescale(tokens[1]);
141         ContainsZmatrix=true;
142         vic.push_back(nullptr); // OBMol indexed from 1 -- potential atom index problem
143       } else if (line.compare(0, 4, "coor")==0 || line.compare(0, 4, "cart")==0 ||line.compare(0, 4, "geom")==0) {
144         ReadMode=CARTESIAN;
145         //cout << "CARTESIAN mode " << ReadMode << endl;
146         if (tokens.size()>1) if (IsUnits(tokens[1])) factor=Rescale(tokens[1]);
147 
148         /*
149           We need to have read the variables first
150           } else if (line.compare(0, 4, "vari")==0) {
151           ReadMode=VARIABLES;
152           //cout << "VARIABLES mode "<< ReadMode << endl;
153           if (tokens.size() == 2) factor=Rescale(tokens[1]);
154           //cout << "Factor now " << factor << endl;
155           } else if (line.compare(0, 4, "cons")==0) {
156           ReadMode=CONSTANTS;
157           //cout << "CONSTANTS mode\n";
158           if (tokens.size() == 2)
159           factor=Rescale(tokens[1]);
160           //cout << "Factor now " << factor << endl;
161           */
162 
163       } else if (line.compare(0, 3, "end")==0) {
164         ReadMode=SKIP;
165         //cout << "SKIP mode " << ReadMode << endl;
166       } else {
167         if (ReadMode==SKIP) continue;
168         if (ReadMode==ZMATRIX) {
169           // Create an atom
170           OBAtom *atom = mol.NewAtom();
171           // Read the ZMatrix definition line
172           if (! ReadLineZmatrix(mol,atom,tokens,factor,&zmatLineCount) )
173             {
174               errorMsg << "Problems reading a GAMESS-UK Input file: "
175                        << "Could not read zmat line: " << line;
176               obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() ,
177                                     obWarning);
178               return (false);
179             }
180 
181         } // End ReadMode ZMATRIX
182 
183         if (ReadMode==CARTESIAN) {
184           OBAtom *atom = mol.NewAtom();
185           if (! ReadLineCartesian(atom,tokens,factor) )
186             {
187               errorMsg << "Problems reading a GAMESS-UK Input file: "
188                        << "Could not read xyz line: " << line;
189               obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() ,
190                                     obWarning);
191               return (false);
192             }
193 
194         } // End ReadMode CARTESIAN
195 
196 
197       } // End Test for first chars on line
198     } // End loop over lines
199 
200 
201     if (ContainsZmatrix)InternalToCartesian(vic,mol);
202     mol.EndModify();
203 
204     return true;
205   } // End Read Geometry
206 
IsUnits(string text)207   bool GAMESSUKFormat::IsUnits(string text)
208   {
209     /* See if the supplied string specifies a unit */
210 
211     if ( text.compare(0, 4, "angs")==0 ||
212          text.compare(0, 4, "bohr")==0 ||
213          text.compare(0, 4, "a.u.")==0 ||
214          text.compare(0, 2, "au")==0) {
215       return true;
216     } else {
217       return false;
218     }
219   }
220 
Rescale(string text)221   double GAMESSUKFormat::Rescale(string text)
222   {
223     /* Return the correct scale factor given a string identifying the units */
224 
225     if (! IsUnits(text) ){
226       errorMsg << "Problems reading GUK input - bad scale factor: " << text;
227       obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
228       return -1.0;
229     }
230 
231     if (text.compare(0, 4, "angs")==0) {
232       return 1.0;
233     } else if (text.compare(0, 4, "bohr")==0||text.compare(0, 4, "a.u.")==0
234                ||text.compare(0, 2, "au")==0) {
235       return BOHR_TO_ANGSTROM;
236     } else {
237       return -1.0;
238     }
239   }
240 
LabelToAtomicNumber(string label)241   int GAMESSUKFormat::LabelToAtomicNumber(string label)
242   {
243     /*
244      * Given a string with the label for an atom return the atomic number
245      * As we are using the GetAtomicNum function case is not important
246      */
247 
248     // See if the first 2 characters give us a valid atomic #
249     int Z=OBElements::GetAtomicNum(label.substr(0,2).c_str());
250 
251     // If not try the first one
252     if (Z==0) Z=OBElements::GetAtomicNum(label.substr(0,1).c_str());
253 
254     if (Z==0){
255       // Check if it's an x (dummy) atom
256       if(  label.substr(0,1) != "x" && label.substr(0,1) != "X" )
257         {
258           // Houston...
259           errorMsg << "LabelToAtomicNumber got bad Label: " << label << std::endl;
260           obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
261         }
262     }
263     return Z;
264   }
265 
ReadLineCartesian(OBAtom * atom,vector<string> & tokens,double factor)266   bool GAMESSUKFormat::ReadLineCartesian(OBAtom *atom, vector<string> &tokens, double factor)
267   {
268 
269     /*  Read a line defining the Cartesian coordinates for an atom
270      * This assumes the line is formatted in GAMESS-UK input style as:
271      * x y z AtomicNumber Label
272      */
273 
274     bool ok=false;
275     int Z;
276     double x,y,z;
277 
278     // 4th field is the atomic number
279     ok = from_string<int>(Z, tokens.at(3), std::dec);
280     atom->SetAtomicNum(Z);
281 
282     // Read the atom coordinates
283     ok = from_string<double>(x, tokens.at(0), std::dec);
284     if ( ! ok)
285       {
286         // Can't convert to double so see if it's in the variables
287         if (variables.find(tokens[0])==variables.end()) return false;
288         x = variables[tokens[0]];
289       }
290 
291     ok = from_string<double>(y, tokens.at(1), std::dec);
292     if ( ! ok)
293       {
294         // Can't convert to double so see if it's in the variables
295         if (variables.find(tokens[1])==variables.end()) return false;
296         y = variables[tokens[1]];
297       }
298 
299     ok = from_string<double>(z, tokens.at(2), std::dec);
300     if ( ! ok)
301       {
302         // Can't convert to double so see if it's in the variables
303         if (variables.find(tokens[2])==variables.end()) return false;
304         z = variables[tokens[2]];
305       }
306 
307     // Convert to Angstroms
308     x=x*factor;
309     y=y*factor;
310     z=z*factor;
311     atom->SetVector(x, y, z); //set coordinates
312     return true;
313   }
314 
ReadLineZmatrix(OBMol & mol,OBAtom * atom,vector<string> & tokens,double factor,int * zmatLineCount)315   bool GAMESSUKFormat::ReadLineZmatrix(OBMol &mol, OBAtom *atom, vector<string> &tokens, double factor, int *zmatLineCount)
316   {
317     /*
318      * Read a line from a GAMESS-UK input defining an atom in Inernal Coordinates.
319      * We create a list of OBInternalCoords that match the list of atoms in the molecule
320      * that are defined using internal coordinates
321      */
322 
323     double var;
324     bool ok=false;
325     int n;
326 
327     vic.push_back(new OBInternalCoord);
328     atom->SetAtomicNum(LabelToAtomicNumber(tokens[0]));
329 
330     switch (*zmatLineCount) {
331     case 0:
332       break;
333 
334     case 1:
335       if (tokens.size() < 3) {return false;}
336 
337       // Specify the atom that defines the distance to this one
338       ok = from_string<int>(n, tokens.at(1), std::dec);
339       vic[*zmatLineCount]->_a = mol.GetAtom(n);
340 
341       // Get the distance
342       ok = from_string<double>(var, tokens.at(2), std::dec);
343       if ( !ok )
344         {
345           // Can't convert to double so see if it's in the variables
346           if (variables.find(tokens[2])==variables.end()) return false;
347           var = variables[tokens[2]];
348         }
349       vic[*zmatLineCount]->_dst = var;
350       break;
351 
352     case 2:
353       if (tokens.size() < 5) {return false;}
354 
355       // Specify the atom that defines the distance to this one
356       ok = from_string<int>(n, tokens.at(1), std::dec);
357       vic[*zmatLineCount]->_a = mol.GetAtom(n);
358 
359       // Get the distance
360       ok = from_string<double>(var, tokens.at(2), std::dec);
361       if ( !ok )
362         {
363           // Can't convert to double so see if it's in the variables
364           if (variables.find(tokens[2])==variables.end()) return false;
365           var = variables[tokens[2]];
366         }
367       vic[*zmatLineCount]->_dst = var;
368 
369       // Specify atom defining angle
370       ok = from_string<int>(n, tokens.at(3), std::dec);
371       vic[*zmatLineCount]->_b = mol.GetAtom(n);
372       // Get the angle
373       ok = from_string<double>(var, tokens.at(4), std::dec);
374       if ( !ok )
375         {
376           // Can't convert to double so see if it's in the variables
377           if (variables.find(tokens[4])==variables.end()) return false;
378           var = variables[tokens[4]];
379         }
380       vic[*zmatLineCount]->_ang = var;
381       break;
382 
383     default:
384       if (tokens.size() < 7) {return false;}
385 
386       ok = from_string<int>(n, tokens.at(1), std::dec);
387       vic[*zmatLineCount]->_a = mol.GetAtom(n);
388       // Get the distance
389       ok = from_string<double>(var, tokens.at(2), std::dec);
390       if ( !ok )
391         {
392           // Can't convert to double so see if it's in the variables
393           if (variables.find(tokens[2])==variables.end()) return false;
394           var = variables[tokens[2]];
395         }
396       vic[*zmatLineCount]->_dst = var;
397 
398       ok = from_string<int>(n, tokens.at(3), std::dec);
399       vic[*zmatLineCount]->_b = mol.GetAtom(n);
400       // Get the angle
401       ok = from_string<double>(var, tokens.at(4), std::dec);
402       if ( !ok )
403         {
404           // Can't convert to double so see if it's in the variables
405           if (variables.find(tokens[4])==variables.end()) return false;
406           var = variables[tokens[4]];
407         }
408       vic[*zmatLineCount]->_ang = var;
409 
410       ok = from_string<int>(n, tokens.at(5), std::dec);
411       vic[*zmatLineCount]->_c = mol.GetAtom(n);
412       // Get the torsion angle
413       ok = from_string<double>(var, tokens.at(6), std::dec);
414       if ( !ok )
415         {
416           // Can't convert to double so see if it's in the variables
417           if (variables.find(tokens[6])==variables.end()) return false;
418           var = variables[tokens[6]];
419         }
420       vic[*zmatLineCount]->_tor = var;
421     }
422 
423     (*zmatLineCount)++;
424     return true;
425   }
426 
ReadVariables(istream & ifs,double factor,string stopstr)427   bool GAMESSUKFormat::ReadVariables(istream &ifs, double factor, string stopstr)
428   {
429     /*
430      * This takes an input stream that is positioned where the list of variables
431      * starts and the reads the variables into the supplied map
432      *
433      * This is different to ReadGeometry (which takes a vector of strings as input) because
434      * currently the variables always need to be read after the geometry, so we need to save the
435      * geometry and then read the variables. However this means that we can parse the variables
436      * directly into a map and don't need to keep a copy of the specifcation as strings.
437      *
438      * stopstr is a string that defines when we stop reading
439      */
440 
441     string line;
442     vector<string> tokens;
443     bool ok=false;
444     double var;
445 
446     // Now read in all the varibles
447     while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)) {
448 
449       // Skip commnents
450       if (EQn(buffer, "#", 1) || EQn(buffer, "?", 1))
451         continue;
452 
453       // Copy line to a C++ string and convert to lower case
454       // & remove leading and trailing spaces
455       line = buffer;
456       // transform(method.begin(), method.end(), method.begin(), ::tolower);
457       ToLower(line);
458       Trim(line);
459 
460       // Check for end of variables
461       if (line.length()==0 && stopstr.length()==0) break;
462       if (stopstr.length()>0 && line.compare(0, stopstr.length(), stopstr)==0) break;
463 
464       // Check for commas & split with that as the separator if necessary
465       if (line.find(',')!=string::npos) {
466         tokenize(tokens, line, ",");
467       } else {
468         tokenize(tokens, line, " \t\n");
469       }
470 
471       ok = from_string<double>(var, tokens.at(3), std::dec);
472       if ( !ok )
473         {
474           errorMsg << "Problems reading a GAMESS-UK  file: "
475                    << "Could not read variable line: " << line;
476           obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning);
477           return false;
478         }
479       // Add to list of variables
480       variables[tokens[0]]=var*factor;
481     }
482 
483     /*
484       cerr << "Got list of variables: " << endl;
485       for (map<string,double>::iterator i=variables.begin(); i
486       != variables.end(); i++) {
487       cerr << "Name: " << i->first << " Value: " << i->second << endl;
488       }
489     */
490 
491     return true;
492 
493   } // end Read Variables
494 
495 
496   class GAMESSUKInputFormat : public OBMoleculeFormat, public GAMESSUKFormat
497   {
498   public:
499     //Register this format type ID
GAMESSUKInputFormat()500     GAMESSUKInputFormat()
501     {
502       OBConversion::RegisterFormat("gukin",this, "chemical/x-gamess-input");
503       // Command-line keywords
504       //OBConversion::RegisterOptionParam("k", NULL, 1, OBConversion::OUTOPTIONS);
505       // Command-line keyword file
506       //OBConversion::RegisterOptionParam("f", NULL, 1, OBConversion::OUTOPTIONS);
507     }
508 
509 
Description()510     virtual const char* Description() //required
511     {
512       return
513         "GAMESS-UK Input\n";
514     };
515 
SpecificationURL()516     virtual const char* SpecificationURL()
517     {return "http://www.cfs.dl.ac.uk";}; //optional
518 
GetMIMEType()519     virtual const char* GetMIMEType()
520     { return "chemical/x-gamessuk-input"; };
521 
522     //Flags() can return be any the following combined by | or be omitted if none apply
523     // NOTREADABLE  READONEONLY  NOTWRITABLE  WRITEONEONLY
Flags()524     virtual unsigned int Flags()
525     {
526       return READONEONLY; // | NOTREADABLE;
527     };
528 
529     ////////////////////////////////////////////////////
530     /// The "API" interface functions
531     virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
532     virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
533   };
534 
535   //Make an instance of the format class
536   GAMESSUKInputFormat theGAMESSUKInputFormat;
537 
538 
ReadMolecule(OBBase * pOb,OBConversion * pConv)539   bool GAMESSUKInputFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
540 
541   {
542     /*
543      * Stuff to think about:
544      * - At outset check whether we are in zmatrix, cartesian or nw-chem format
545      *   (we currently only suppot homogeneous formats - not mixed).
546      *
547      * For each line need to check:
548      * - Is this a comment (?,#)
549      * - Are the tokens separated by commas, if so use tokenize to split at commas
550      * - Is there an 'end' token on the line
551      *
552      * For each line we want to check that we haven't hit a change from c to zm or vv
553      *
554      */
555 
556     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
557     if (pmol == nullptr)
558       return false;
559 
560     //Define some references so we can use the old parameter names
561     istream& ifs = *pConv->GetInStream();
562     OBMol &mol = *pmol;
563 
564     // Get a default title as the filename
565     const char* title = pConv->GetTitle();
566     mol.BeginModify();
567     mol.SetTitle(title);
568     mol.EndModify();
569 
570     vector<string> geomList, tokens; // list of lines and list of tokens on a line
571     string line; // For convenience so we can refer to lines from the iterator as 'line'
572     ReadMode_t ReadMode=SKIP;
573     double factor=BOHR_TO_ANGSTROM;
574 
575     // Read File and copy geometry specification into geomList
576     while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)) {
577 
578       // Skip commnents
579       if (EQn(buffer, "#", 1) || EQn(buffer, "?", 1)) continue;
580 
581       // Copy line to a C++ string and convert to lower case
582       // & remove leading and trailing spaces
583       line = buffer;
584       // transform(method.begin(), method.end(), method.begin(), ::tolower);
585       ToLower(line);
586       Trim(line);
587 
588       // Start of coordinate specifiation
589       if (line.compare(0, 4, "zmat")==0)
590 	{
591 	  ReadMode=ZMATRIX;
592 	  geomList.push_back(line);
593 	  continue;
594 	}
595       else if (line.compare(0, 4, "geom")==0)
596 	{
597 	  ReadMode=CARTESIAN;
598 	  geomList.push_back(line);
599 	  continue;
600 	}
601 
602       // Reading the coordinate specification into the list
603       if (ReadMode==ZMATRIX || ReadMode==CARTESIAN)
604 	{
605 
606 	  // Variables specification - process directly from filestream
607 	  // and then remove from the geometry specification
608 	  if (line.compare(0, 4, "vari")==0 || line.compare(0, 4, "const")==0)
609 	    {
610 
611 	      // Check for commas & split with that as the separator if necessary
612 	      if (line.find(',')!=string::npos)
613 		tokenize(tokens, line, ",");
614 	      else
615 		tokenize(tokens, line, " \t\n");
616 
617 	      // See if we need to rescale
618 	      if (IsUnits(tokens[1])) factor=Rescale(tokens[1]);
619 
620 	      if (! ReadVariables(ifs, factor, "end")) return false;
621 
622 	      ReadMode=SKIP;
623 	      geomList.push_back("end\n");
624 	      continue;
625 	    }
626 
627 	  if (line.compare(0, 3, "end")==0) ReadMode=SKIP;
628 
629 	  geomList.push_back(line);
630 	}
631 
632     }// End while reading loop
633 
634     // Now go and process the coordinate specification if we got any
635     bool ok = ReadGeometry(mol, geomList);
636 
637     if (mol.NumAtoms() == 0) { // e.g., if we're at the end of a file PR#1737209
638       mol.EndModify();
639       return false;
640     } else {
641       if (!pConv->IsOption("b",OBConversion::INOPTIONS))
642         mol.ConnectTheDots();
643       if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS))
644         mol.PerceiveBondOrders();
645       return ok;
646     }
647 
648   } // End ReadMolecule
649 
650   ////////////////////////////////////////////////////////////////
651 
WriteMolecule(OBBase * pOb,OBConversion * pConv)652   bool GAMESSUKInputFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
653   {
654     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
655     if (pmol == nullptr)
656       return false;
657 
658     //Define some references so we can use the old parameter names
659     ostream &ofs = *pConv->GetOutStream();
660     OBMol &mol = *pmol;
661 
662     char buffer[BUFF_SIZE];
663 
664     ofs << "title" << endl;
665     ofs << mol.GetTitle() << endl << endl;
666 
667     ofs << "#" << endl;
668     ofs << "# NB: Class I directives (e.g. memory, multiplicity, charge etc) go here" << endl;
669     ofs << "#" << endl;
670     ofs << "# For more information see: http://www.cfs.dl.ac.uk/docs/index.shtml" << endl;
671     ofs << "#" << endl;
672     ofs << endl;
673 
674     ofs << "geometry angstrom" << endl;
675     FOR_ATOMS_OF_MOL(atom, mol)
676       {
677         snprintf(buffer, BUFF_SIZE, "%15.8f %15.8f %15.8f %3d %3s\n",
678 		 atom->GetX(),
679 		 atom->GetY(),
680 		 atom->GetZ(),
681 		 atom->GetAtomicNum(),
682 		 OBElements::GetSymbol(atom->GetAtomicNum())
683 		 );
684 	ofs << buffer;
685       }
686     ofs << "end" << endl << endl;
687 
688     ofs << endl;
689     ofs << "basis 6-31G" << endl;
690     ofs << endl;
691 
692     ofs << "#" << endl;
693     ofs << "# NB: Class II directives go here" << endl;
694     ofs << "#" << endl;
695     ofs << "# To perform a dft calculation with b3lyp and medium quadrature uncomment the below" << endl;
696     ofs << "# dft b3lyp" << endl;
697     ofs << "# dft quadrature medium" << endl;
698     ofs << "#" << endl;
699     ofs << endl;
700 
701     ofs << "runtype scf" << endl;
702     ofs << endl;
703     ofs << "enter" << endl;
704 
705     return(true);
706   } //End WriteMolecule
707 
708 
709   class GAMESSUKOutputFormat : public OBMoleculeFormat, public GAMESSUKFormat
710   {
711   public:
712     //Register this format type ID
GAMESSUKOutputFormat()713     GAMESSUKOutputFormat()
714     { OBConversion::RegisterFormat("gukout",this, "chemical/x-gamess-output"); }
715 
Description()716     virtual const char* Description() //required
717     { return "GAMESS-UK Output\n"; };
718 
SpecificationURL()719     virtual const char* SpecificationURL()
720     {return "http://www.cfs.dl.ac.uk";}; //optional
721 
GetMIMEType()722     virtual const char* GetMIMEType()
723     { return "chemical/x-gamessuk-output"; };
724 
725     ////////////////////////////////////////////////////
726     /// The "API" interface functions
727     virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
728 
729   private:
730     enum RunType_t { UNKNOWN, SINGLEPOINT, OPTXYZ, OPTZMAT, SADDLE, FREQUENCIES };
731     vector<string> tokens, geomList; // list of lines and list of tokens on a line
732     string line; // For convenience so we can refer to lines from the iterator as 'line'
733     bool ReadInputZmatrix( OBMol &mol, std::istream &ifs );
734     bool ReadInitialCartesian( OBMol &mol, std::istream &ifs );
735     bool ReadOptGeomXyz1( OBMol &mol, std::istream &ifs );
736     bool ReadOptGeomXyz2( OBMol &mol, std::istream &ifs );
737     bool ReadNormalModesHessian( OBMol &mol, std::istream &ifs);
738     bool ReadNormalModesForce( OBMol &mol, std::istream &ifs);
739   };
740 
741   //Make an instance of the format class
742   GAMESSUKOutputFormat theGAMESSUKOutputFormat;
743 
ReadInputZmatrix(OBMol & mol,std::istream & ifs)744   bool GAMESSUKOutputFormat::ReadInputZmatrix( OBMol &mol, std::istream &ifs )
745   {
746     /* The zmatrix entered by the user
747      * REM:  need to add stuff for "automatic z-matrix generation" as we currently
748      * ignore the zmatrix & just read the cartesian coordinates
749      */
750     geomList.clear();
751 
752     // skip 2 lines
753     ifs.getline(buffer, BUFF_SIZE) && ifs.getline(buffer, BUFF_SIZE);
754 
755     // Stick a header line first
756     geomList.push_back("zmatrix bohr");
757 
758     // Read zmatrix into list until blank line
759     while (ifs.good() && ifs.getline(buffer, BUFF_SIZE) && strlen(buffer) != 0)
760       {
761         line = buffer;
762         // transform(method.begin(), method.end(), method.begin(), ::tolower);
763         ToLower(line);
764         Trim(line);
765         geomList.push_back(line);
766       }
767 
768     // Skip 2 lines
769     ifs.getline(buffer, BUFF_SIZE);
770     ifs.getline(buffer, BUFF_SIZE);
771 
772     // Check if line is variables line
773     if (strstr(buffer,"name            input  type     hessian         minima") != nullptr)
774       {
775         // Skip additional line to be where variables are printed
776         ifs.getline(buffer, BUFF_SIZE);
777         // Read in the variables till we hit blank line
778         if (! ReadVariables(ifs, 1.0, "")) return false;
779       }
780 
781     // Now go and process the geometry
782     return ReadGeometry(mol, geomList);
783   } // ReadInputZmatrix
784 
ReadInitialCartesian(OBMol & mol,std::istream & ifs)785   bool GAMESSUKOutputFormat::ReadInitialCartesian( OBMol &mol, std::istream &ifs )
786   {
787     bool ok=false;
788     double x,y,z;
789     int n;
790 
791     // Skip 3 lines
792     ifs.getline(buffer, BUFF_SIZE) &&
793       ifs.getline(buffer, BUFF_SIZE) &&
794       ifs.getline(buffer, BUFF_SIZE);
795 
796     // Create regex for the coords
797     //                     ------label--------   -------charge-------- < seems enough for a match
798     string pattern(" *\\* *[a-zA-Z]{1,2}[0-9]* *[0-9]{1,3}\\.[0-9]{1}");
799     bool iok;
800 #ifdef _MSC_VER
801     std::tr1::regex myregex;
802     try {
803       myregex.assign(pattern,
804                      std::tr1::regex_constants::extended |
805                      std::tr1::regex_constants::nosubs);
806       iok = true;
807     } catch (std::tr1::regex_error ex) {
808       iok = false;
809     }
810 #else
811     regex_t *myregex = new regex_t;
812     iok = regcomp(myregex, pattern.c_str(), REG_EXTENDED | REG_NOSUB)==0;
813 #endif
814     if (!iok) cerr << "Error compiling regex in GUK OUTPUT!\n";
815 
816     // Read in the coordinates - we process them directly rather
817     // then use ReadGeometry as we probably should do...
818     mol.BeginModify();
819     while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)){
820 
821       // End of geometry block
822       if (strstr(buffer, "*************************") != nullptr) break;
823 #ifdef _MSC_VER
824       if (std::tr1::regex_search(buffer, myregex)) {
825 #else
826         if (regexec(myregex, buffer, 0, nullptr, 0) == 0) {
827 #endif
828           //cerr << "Got Coord line: " << buffer << endl;
829           OBAtom *atom = mol.NewAtom();
830           tokenize(tokens,buffer," ");
831 
832           ok = from_string<int>(n, tokens.at(2), std::dec);
833           atom->SetAtomicNum(n);
834           ok = from_string<double>(x, tokens.at(3), std::dec);
835           x=x*BOHR_TO_ANGSTROM;
836           ok = from_string<double>(y, tokens.at(4), std::dec);
837           y=y*BOHR_TO_ANGSTROM;
838           ok = from_string<double>(z, tokens.at(5), std::dec);
839           z=z*BOHR_TO_ANGSTROM;
840           atom->SetVector(x, y, z);
841         }
842       }
843       mol.EndModify();
844 #ifndef _MSC_VER
845       regfree(myregex);
846 #endif
847       return true;
848     } // End ReadInitalCartesian
849 
850 
851     bool GAMESSUKOutputFormat::ReadOptGeomXyz1( OBMol &mol, std::istream &ifs )
852     {
853       bool ok=false;
854       double x,y,z;
855       int n;
856 
857       // Clear the Molecule as we're going to start from scratch again.
858       mol.BeginModify();
859       mol.Clear();
860 
861       // FF to start of coordinate specification
862       while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)) {
863         if (strstr(buffer,
864                    "atom     znuc       x             y             z") != nullptr) break;
865       }
866 
867       // Skip 2 lines - should then be at the coordinates
868       ifs.getline(buffer, BUFF_SIZE) &&
869         ifs.getline(buffer, BUFF_SIZE);
870 
871       // Read in the coordinates - we process them directly rather
872       // then use ReadGeometry as we probably should do...
873       while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)){
874 
875         // End of geometry block
876         if (strstr(buffer, "*************************") != nullptr) break;
877 
878         //cerr << "Got Coord line: " << buffer << endl;
879         OBAtom *atom = mol.NewAtom();
880         tokenize(tokens,buffer," ");
881 
882         ok = from_string<int>(n, tokens.at(2), std::dec);
883         atom->SetAtomicNum(n);
884         ok = from_string<double>(x, tokens.at(3), std::dec);
885         x=x*BOHR_TO_ANGSTROM;
886         ok = from_string<double>(y, tokens.at(4), std::dec);
887         y=y*BOHR_TO_ANGSTROM;
888         ok = from_string<double>(z, tokens.at(5), std::dec);
889         z=z*BOHR_TO_ANGSTROM;
890         atom->SetVector(x, y, z);
891 
892       }
893 
894       mol.EndModify();
895       return true;
896     } // End ReadOptGeomXyz
897 
898     bool GAMESSUKOutputFormat::ReadOptGeomXyz2( OBMol &mol, std::istream &ifs )
899     {
900       bool ok=false;
901       double x,y,z;
902       int n;
903 
904       // Clear the Molecule as we're going to start from scratch again.
905       mol.BeginModify();
906       mol.Clear();
907 
908       while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)) {
909         if (strstr(buffer,
910                    "       x              y              z            chg  tag") != nullptr) break;
911       }
912 
913       // Skip 1 line - should then be at the coordinates
914       ifs.getline(buffer, BUFF_SIZE);
915 
916       // Read in the coordinates - we process them directly rather
917       // then use ReadGeometry as we probably should do...
918       while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)){
919 
920         // End of geometry block
921         if (strstr(buffer, "============================================================") != nullptr) break;
922 
923         //cerr << "Got Coord line: " << buffer << endl;
924         OBAtom *atom = mol.NewAtom();
925         tokenize(tokens,buffer," ");
926 
927         ok = from_string<int>(n, tokens.at(3), std::dec);
928         atom->SetAtomicNum(n);
929         ok = from_string<double>(x, tokens.at(0), std::dec);
930         x=x*BOHR_TO_ANGSTROM;
931         ok = from_string<double>(y, tokens.at(1), std::dec);
932         y=y*BOHR_TO_ANGSTROM;
933         ok = from_string<double>(z, tokens.at(2), std::dec);
934         z=z*BOHR_TO_ANGSTROM;
935         atom->SetVector(x, y, z);
936       }
937 
938       mol.EndModify();
939       return true;
940 
941     } // End ReadOptGeomZmat
942 
943     bool GAMESSUKOutputFormat::ReadNormalModesHessian( OBMol &mol, std::istream &ifs)
944     {
945 
946       bool ok=false;
947       double dtmp,dtmp2;
948 
949       int ncols = 8; // Think this is always the case
950       int natoms = mol.NumAtoms();
951       int maxroot = natoms*3;
952 
953       // Create data structures
954       std::vector< double > frequencies, intensities;
955       std::vector< std::vector< vector3 > > Lx;
956 
957       // Set up data structures with null data
958       for( int i=0; i<maxroot; i++ )
959         {
960           std::vector< vector3 > atoml;
961           for( int j=0; j < natoms; j++ )
962             {
963               atoml.push_back( vector3(0.0,0.0,0.0) );
964             }
965           Lx.push_back( atoml );
966         }
967 
968       ifs.getline(buffer, BUFF_SIZE); // skip "===============" line
969 
970       int root7;
971       for ( int root1=0; root1 < maxroot; root1+=ncols )
972         {
973           root7 = root1 + ncols;
974           root7 = min(maxroot,root7);
975 
976           //Skip 6 lines to col header with frequencies
977           for( int j=0; j < 6; j++ )
978             ifs.getline(buffer, BUFF_SIZE);
979 
980           tokenize(tokens,buffer," \t\n");
981           for( std::size_t si=0; si < tokens.size(); si++ )
982             {
983               ok = from_string<double>(dtmp, tokens.at(si), std::dec);
984               frequencies.push_back(dtmp);
985               intensities.push_back( 0.0 ); // Add placeholder data
986             }
987 
988           // Skip 2 lines to where data matrix starts
989           ifs.getline(buffer, BUFF_SIZE);
990           ifs.getline(buffer, BUFF_SIZE);
991 
992           int mycols=root7-root1;
993           // Loop over atoms & the x,y,z
994           int atomcount=0;
995           for ( int i=0; i<maxroot; i+=3 )
996             {
997               for ( int j=0; j<3; j++ )
998                 {
999                   ifs.getline(buffer, BUFF_SIZE);
1000                   //std::cout << "GOT LINE:" << buffer <<std::endl;
1001                   tokenize(tokens,buffer," \t\n");
1002                   int start=1;
1003                   if ( j == 0 )
1004                     start=3;
1005                   for ( int k=0; k<mycols; k++ )
1006                     {
1007                       //std::cout << "Lx[ " << root1+k << " ]" <<
1008                       //  "][ " << atomcount << " ] [ " << j << " ] = " << tokens.at(start+k) << std::endl;
1009                       ok = from_string<double>(dtmp, tokens.at(start+k), std::dec);
1010                       if ( j==0)
1011                         Lx[ root1+k ][ atomcount ].SetX( dtmp );
1012                       else if ( j==1 )
1013                         Lx[ root1+k ][ atomcount ].SetY( dtmp );
1014                       else if ( j==2 )
1015                         Lx[ root1+k ][ atomcount ].SetZ( dtmp );
1016                     }
1017                 } // End j loop
1018               atomcount+=1;
1019             } // End loop over atoms
1020         } // loop over root1
1021 
1022       // Now skip down to read in intensities
1023       for( int i=0; i<7; i++ )
1024         {
1025           ifs.getline(buffer, BUFF_SIZE);
1026         }
1027 
1028       // loop until we've read them all in
1029       for( std::size_t si=0; si<frequencies.size(); si++ )
1030         {
1031           ifs.getline(buffer, BUFF_SIZE);
1032           // End of info
1033           if (strstr(buffer, "============") != nullptr) break;
1034           tokenize(tokens,buffer," \t\n");
1035 
1036           ok = from_string<double>(dtmp, tokens.at(1), std::dec);
1037           ok = from_string<double>(dtmp2, tokens.at(6), std::dec);
1038           // Now match them up
1039           for( std::size_t sj=0; sj<frequencies.size(); sj++ )
1040             {
1041               if ( std::abs( frequencies.at(sj) - dtmp ) < 0.01 )
1042                 {
1043                   intensities[sj]= dtmp2;
1044                   continue;
1045                 }
1046             }
1047         }
1048 
1049       //for (int i=0; i< frequencies.size(); i++ )
1050       //  std::cout << "Frequency: " << frequencies.at(i) << " : " << intensities.at(i) << std::endl;
1051 
1052       if(frequencies.size()>0)
1053         {
1054           OBVibrationData* vd = new OBVibrationData;
1055           vd->SetData(Lx, frequencies, intensities);
1056           vd->SetOrigin(fileformatInput);
1057           mol.SetData(vd);
1058         }
1059       return ok;
1060     } // End ReadNormalModesHessian
1061 
1062     bool GAMESSUKOutputFormat::ReadNormalModesForce( OBMol &mol, std::istream &ifs)
1063     {
1064 
1065       bool ok=false;
1066       double dtmp;
1067 
1068       int ncols = 9; // Think this is always the case
1069       int natoms = mol.NumAtoms();
1070       int maxroot = natoms*3;
1071       int start,mycols;
1072 
1073       // Create data structures
1074       std::vector< double > frequencies, intensities;
1075       std::vector< std::vector< vector3 > > Lx;
1076 
1077       // Set up data structures with null data
1078       for( int i=0; i<maxroot; i++ )
1079         {
1080           std::vector< vector3 > atoml;
1081           for( int j=0; j < natoms; j++ )
1082             {
1083               atoml.push_back( vector3(0.0,0.0,0.0) );
1084             }
1085           Lx.push_back( atoml );
1086         }
1087 
1088       ifs.getline(buffer, BUFF_SIZE); // skip "===============" line
1089 
1090       int root7;
1091       for ( int root1=0; root1 < maxroot; root1+=ncols )
1092         {
1093           root7 = root1 + ncols;
1094           root7 = min(maxroot,root7);
1095           mycols=root7-root1;
1096 
1097           //Skip 6 lines to col header with frequencies
1098           for( int j=0; j < 6; j++ )
1099             ifs.getline(buffer, BUFF_SIZE);
1100 
1101           line=buffer;
1102           // Need  to manually chop up line
1103           start=20; // Numbers start at col 20 & are 11 characters long
1104           for( int i=0; i<mycols; i++)
1105             {
1106               ok = from_string<double>(dtmp, line.substr(start,12), std::dec);
1107               frequencies.push_back(dtmp);
1108               intensities.push_back( 10.0 ); // Intensities aren't printed so just use 10
1109               start+=12;
1110             }
1111 
1112           // Skip 2 lines to where data matrix starts
1113           ifs.getline(buffer, BUFF_SIZE);
1114           ifs.getline(buffer, BUFF_SIZE);
1115 
1116           // Loop over atoms & the x,y,z
1117           int atomcount=0;
1118           for ( int i=0; i<maxroot; i+=3 )
1119             {
1120               //for j in range(3):
1121               for ( int j=0; j<3; j++ )
1122                 {
1123                   ifs.getline(buffer, BUFF_SIZE);
1124                   //std::cout << "GOT LINE:" << buffer <<std::endl;
1125                   tokenize(tokens,buffer," \t\n");
1126                   start=1;
1127                   if ( j == 0 )
1128                     start=3;
1129                   for ( int k=0; k<mycols; k++ )
1130                     {
1131                       // std::cout << "Lx[ " << root1+k << " ]" <<
1132                       //  "][ " << atomcount << " ] [ " << j << " ] = " << tokens.at(start+k) << std::endl;
1133                       ok = from_string<double>(dtmp, tokens.at(start+k), std::dec);
1134                       if ( j==0)
1135                         Lx[ root1+k ][ atomcount ].SetX( dtmp );
1136                       else if ( j==1 )
1137                         Lx[ root1+k ][ atomcount ].SetY( dtmp );
1138                       else if ( j==2 )
1139                         Lx[ root1+k ][ atomcount ].SetZ( dtmp );
1140                     }
1141                 } // End j loop
1142               atomcount+=1;
1143             } // End loop over atoms
1144         } // loop over root1
1145 
1146 
1147       //for (int i=0; i< frequencies.size(); i++ )
1148       //  std::cout << "Frequency: " << frequencies.at(i) << " : " << intensities.at(i) << std::endl;
1149 
1150       if(frequencies.size()>0)
1151         {
1152           OBVibrationData* vd = new OBVibrationData;
1153           vd->SetData(Lx, frequencies, intensities);
1154           vd->SetOrigin(fileformatInput);
1155           mol.SetData(vd);
1156         }
1157       return ok;
1158     } // End ReadNormalModesForce
1159 
1160     /*
1161       bool GAMESSUKOutputFormat::ReadOptGeomZmat( OBMol &mol, std::istream &ifs )
1162       {
1163 
1164       //Below was for reading in zmatricies - ignore for the time being
1165 
1166       // Original geometry specification should still be in geomList
1167       // So just update the variables
1168       //cerr << "Got converged for OPTZMAT\n";
1169 
1170       // FF to variable specification
1171       while (ifs.good() && ifs.getline(buffer, BUFF_SIZE)) {
1172       if (strstr(buffer,
1173       " variable           value                hessian") != NULL) break;
1174       }
1175       // Skip a line - should then be at variable specification
1176       ifs.getline(buffer, BUFF_SIZE);
1177 
1178       // Process them
1179       if (! ReadVariables(ifs, BOHR_TO_ANGSTROM,
1180       "===============================================")) return false;
1181 
1182       // Now go and process with the geometry we read before
1183       return ReadGeometry(mol, geomList);
1184 
1185       } //ReadOptGeomZmat
1186     */
1187 
1188     bool GAMESSUKOutputFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv) {
1189 
1190       /*
1191         Read a GAMESS-UK output file. The reader is currently set up to only return one molecule, i.e
1192         if the run is some sort of optimisation run, then only the optimised geometry is returned.
1193 
1194         Previously we parsed in the z-matrix - and the code to do that is still here and should work.
1195         However, the zmatrix is not actually used anywhere in OpenBabel currently - it's just converted
1196         to Cartesians, and this code appears to be buggy, so for the time being we just stick to reading
1197         Cartesians.
1198       */
1199 
1200       OBMol *pmol = dynamic_cast<OBMol*>(pOb);
1201       if (pmol == nullptr)
1202         return false;
1203 
1204       //Define some references so we can use the old parameter names
1205       istream& ifs = *pConv->GetInStream();
1206       OBMol &mol = *pmol;
1207 
1208       // Get a default title as the filename
1209       const char* title = pConv->GetTitle();
1210       mol.BeginModify();
1211       mol.SetTitle(title);
1212       mol.EndModify();
1213 
1214       RunType_t RunType=UNKNOWN;
1215       bool ok;
1216       std::string runt;
1217 
1218       while (ifs.good() && ifs.getline(buffer, BUFF_SIZE))
1219         {
1220 
1221           if (strstr(buffer, "                              input z-matrix") != nullptr)
1222             {
1223               /* OpenBabel's handling of zmatricies is currently too buggy and the zmatrix
1224                * read in isn't currently used - it's just converted to cartesians, so we
1225                * can skip this for the time being
1226                */
1227               continue;
1228               /*
1229                 ok = ReadInputZmatrix( mol, ifs );
1230                 // Set Runtype to SINGLEPOINT so we don't read in the cartesians
1231                 RunType=SINGLEPOINT;
1232               */
1233             } // End Reading user z-matrix
1234 
1235           // Read the cartesian coordinates if we've not read in the ZMATRIX
1236           if (strstr(buffer, "*            charge       x             y              z       shells") != nullptr &&
1237               RunType==UNKNOWN)
1238             ok = ReadInitialCartesian( mol, ifs );
1239 
1240           // Determine the RunType - affects how we move on from here.
1241           if (strstr(buffer, " * RUN TYPE") != nullptr)
1242             {
1243               tokenize(tokens,buffer," \t\n");
1244               runt=tokens[3].substr(0,5);
1245               if(runt=="optxy") RunType=OPTXYZ;
1246               else if (runt=="optim") RunType=OPTZMAT;
1247               else if (runt=="saddl") RunType=SADDLE;
1248               continue;
1249             } // End RUNTYPE
1250 
1251           // Read the optimised geometry
1252           if (strstr(buffer, "optimization converged") != nullptr)
1253             {
1254               if (RunType==OPTXYZ)
1255                 ok = ReadOptGeomXyz1( mol, ifs );
1256               else if (RunType==OPTZMAT || RunType==SADDLE)
1257                 ok = ReadOptGeomXyz2( mol, ifs );
1258             } // End read optimised geometry
1259 
1260           // Frequencies for runtype hessian
1261           if (strstr(buffer, "cartesians to normal") != nullptr)
1262             ok = ReadNormalModesHessian( mol, ifs);
1263 
1264           // Frequencies for runtype force
1265           if (strstr(buffer, "eigenvectors of cartesian") != nullptr)
1266             ok = ReadNormalModesForce( mol, ifs);
1267 
1268         } // End Reading loop
1269 
1270       if (mol.NumAtoms() == 0) { // Something went wrong
1271         mol.EndModify();
1272         return false;
1273       } else {
1274         mol.BeginModify();
1275         if (!pConv->IsOption("b",OBConversion::INOPTIONS))
1276           mol.ConnectTheDots();
1277         if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS))
1278           mol.PerceiveBondOrders();
1279         mol.EndModify();
1280         return true;
1281       }
1282 
1283     } // End GAMESSUKOutputFormat::ReadMolecule
1284 
1285 
1286   } //namespace OpenBabel
1287