1 /*
2 *
3 *  msi2lmp.exe
4 *
5 *   v3.9.9 AK- Teach msi2lmp to not generate dihedrals with identical 1-4 atoms
6 *
7 *   v3.9.8 AK- Improved whitespace handling in parsing topology and force
8 *              field files to avoid bogus warnings about type name truncation
9 *
10 *   v3.9.7 AK- Add check to enforce that Class1/OPLS-AA use A-B parameter
11 *              conventions in force field file and Class2 us r-eps conventions
12 *
13 *   v3.9.6 AK- Refactoring of MDF file parser with more consistent
14 *              handling of compile time constants MAX_NAME and MAX_STRING
15 *
16 *   v3.9.5 AK- Add TopoTools style force field parameter type hints
17 *
18 *   v3.9.4 AK- Make force field style hints optional with a flag
19 *
20 *   v3.9.3 AK- Bugfix for triclinic cells.
21 *
22 *   v3.9.2 AK- Support for writing out force field style hints
23 *
24 *   v3.9.1 AK- Bugfix for Class2. Free allocated memory. Print version number.
25 *
26 *   v3.9 AK  - Rudimentary support for OPLS-AA
27 *
28 *   v3.8 AK  - Some refactoring and cleanup of global variables
29 *            - Bugfixes for argument parsing and improper definitions
30 *            - improved handling of box dimensions and image flags
31 *            - port to compiling on windows using MinGW
32 *            - more consistent print level handling
33 *            - more consistent handling of missing parameters
34 *            - Added a regression test script with examples.
35 *
36 *   V3.7 STM - Added support for triclinic cells
37 *
38 *   v3.6 KLA - Changes to output to either lammps 2001 (F90 version) or to
39 *              lammps 2005 (C++ version)
40 *
41 *   v3.4 JEC - a number of minor changes due to way newline and EOF are generated
42 *              on Materials Studio generated .car and .mdf files as well as odd
43 *              behavior out of newer Linux IO libraries. ReadMdfFile was restructured
44 *              in the process.
45 *
46 *   v3.1 JEC - changed IO interface to standard in/out, forcefield file
47 *              location can be indicated by environmental variable; added
48 *              printing options, consistency checks and forcefield
49 *              parameter versions sensitivity (highest one used)
50 *
51 *   v3.0 JEC - program substantially rewritten to reduce execution time
52 *              and be 98 % dynamic in memory use (still fixed limits on
53 *              number of parameter types for different internal coordinate
54 *              sets)
55 *
56 *   v2.0 MDP - got internal coordinate information from mdf file and
57 *              forcefield parameters from frc file thus eliminating
58 *              need for Discover
59 *
60 *   V1.0 SL  - original version. Used .car file and internal coordinate
61 *              information from Discover to produce LAMMPS data file.
62 *
63 *  This program uses the .car and .mdf files from MSI/Biosyms's INSIGHT
64 *  program to produce a LAMMPS data file.
65 *
66 *  The program is started by supplying information at the command prompt
67 * according to the usage described below.
68 *
69 *  USAGE: msi2lmp3 ROOTNAME {-print #} {-class #} {-frc FRC_FILE} {-ignore} {-nocenter} {-oldstyle}
70 *
71 *  -- msi2lmp3 is the name of the executable
72 *  -- ROOTNAME is the base name of the .car and .mdf files
73 *  -- all opther flags are optional and can be abbreviated (e.g. -p instead of -print)
74 *
75 *  -- -print
76 *        # is the print level:  0  - silent except for errors
77 *                               1  - minimal (default)
78 *                               2  - more verbose
79 *                               3  - even more verbose
80 *  -- -class
81 *        # is the class of forcefield to use (I  or 1 = Class I e.g., CVFF, clayff)
82 *                                            (II or 2 = Class II e.g., CFFx, COMPASS)
83 *                                            (O  or 0 = OPLS-AA)
84 *     default is -class I
85 *
86 *  -- -ignore   - tells msi2lmp to ignore warnings and errors and keep going
87 *
88 *  -- -nocenter - tells msi2lmp to not center the box around the (geometrical)
89 *                 center of the atoms, but around the origin
90 *
91 *  -- -oldstyle - tells msi2lmp to write out a data file without style hints
92 *                 (to be compatible with older LAMMPS versions)
93 *
94 *  -- -shift    - tells msi2lmp to shift the entire system (box and coordinates)
95 *                 by a vector (default: 0.0 0.0 0.0)
96 *
97 *  -- -frc      - specifies name of the forcefield file (e.g., cff91)
98 *
99 *     If the name includes a hard wired directory (i.e., if the name
100 *     starts with . or /), then the name is used alone. Otherwise,
101 *     the program looks for the forcefield file in $MSI2LMP_LIBRARY.
102 *     If $MSI2LMP_LIBRARY is not set, then the current directory is
103 *     used.
104 *
105 *     If the file name does not include a dot after the first
106 *     character, then .frc is appended to the name.
107 *
108 *     For example,  -frc cvff (assumes cvff.frc is in $MSI2LMP_LIBRARY
109 *                              or .)
110 *
111 *                   -frc cff/cff91 (assumes cff91.frc is in
112 *                                   $MSI2LMP_LIBRARY/cff or ./cff)
113 *
114 *                   -frc /usr/local/forcefields/cff95 (absolute
115 *                                                             location)
116 *
117 *     By default, the program uses $MSI2LMP_LIBRARY/cvff.frc
118 *
119 *  -- output is written to a file called ROOTNAME.data
120 *
121 *
122 ****************************************************************
123 *
124 * msi2lmp
125 *
126 * This is the third version of a program that generates a LAMMPS
127 * data file based on the information in a MSI car file (atom
128 * coordinates) and mdf file (molecular topology). A key part of
129 * the program looks up forcefield parameters from an MSI frc file.
130 *
131 * The first version was written by Steve Lustig at Dupont, but
132 * required using Discover to derive internal coordinates and
133 * forcefield parameters
134 *
135 * The second version was written by Michael Peachey while an
136 * in intern in the Cray Chemistry Applications Group managed
137 * by John Carpenter. This version derived internal coordinates
138 * from the mdf file and looked up parameters in the frc file
139 * thus eliminating the need for Discover.
140 *
141 * The third version was written by John Carpenter to optimize
142 * the performance of the program for large molecular systems
143 * (the original  code for deriving atom numbers was quadratic in time)
144 * and to make the program fully dynamic. The second version used
145 * fixed dimension arrays for the internal coordinates.
146 *
147 * November 2000
148 */
149 
150 #include "msi2lmp.h"
151 
152 #include <stdlib.h>
153 #include <string.h>
154 
155 #ifdef _WIN32
156 #include <ctype.h>
157 #endif
158 
159 /* global variables */
160 
161 char  *rootname;
162 double pbc[6];
163 double box[3][3];
164 double shift[3];
165 int    periodic = 1;
166 int    TriclinicFlag = 0;
167 int    forcefield = 0;
168 int    centerflag = 1;
169 int    hintflag = 1;
170 int    ljtypeflag = 0;
171 
172 int    pflag;
173 int    iflag;
174 int   *no_atoms;
175 int    no_molecules;
176 int    replicate[3];
177 int    total_no_atoms = 0;
178 int    total_no_bonds = 0;
179 int    total_no_angles = 0;
180 int    total_no_dihedrals = 0;
181 int    total_no_angle_angles = 0;
182 int    total_no_oops = 0;
183 int    no_atom_types = 0;
184 int    no_bond_types = 0;
185 int    no_angle_types = 0;
186 int    no_dihedral_types = 0;
187 int    no_oop_types = 0;
188 int    no_angleangle_types = 0;
189 char   *FrcFileName = NULL;
190 FILE   *CarF = NULL;
191 FILE   *FrcF = NULL;
192 FILE   *PrmF = NULL;
193 FILE   *MdfF = NULL;
194 FILE   *RptF = NULL;
195 
196 struct Atom *atoms = NULL;
197 struct MoleculeList *molecule = NULL;
198 struct BondList *bonds = NULL;
199 struct AngleList *angles = NULL;
200 struct DihedralList *dihedrals = NULL;
201 struct OOPList *oops = NULL;
202 struct AngleAngleList *angleangles = NULL;
203 struct AtomTypeList *atomtypes = NULL;
204 struct BondTypeList *bondtypes = NULL;
205 struct AngleTypeList *angletypes = NULL;
206 struct DihedralTypeList *dihedraltypes = NULL;
207 struct OOPTypeList *ooptypes = NULL;
208 struct AngleAngleTypeList *angleangletypes = NULL;
209 
condexit(int val)210 void condexit(int val)
211 {
212     if (iflag == 0) exit(val);
213 }
214 
check_arg(char ** arg,const char * flag,int num,int argc)215 static int check_arg(char **arg, const char *flag, int num, int argc)
216 {
217   if (num >= argc) {
218     printf("Missing argument to \"%s\" flag\n",flag);
219     return 1;
220   }
221   if (arg[num][0] == '-') {
222     printf("Incorrect argument to \"%s\" flag: %s\n",flag,arg[num]);
223     return 1;
224   }
225   return 0;
226 }
227 
main(int argc,char * argv[])228 int main (int argc, char *argv[])
229 {
230   int n,i,found_sep;
231   const char *frc_dir_name = NULL;
232   const char *frc_file_name = NULL;
233 
234   pflag = 1;
235   iflag = 0;
236   forcefield = FF_TYPE_CLASS1 | FF_TYPE_COMMON;
237   shift[0] = shift[1] = shift[2] = 0.0;
238 
239   frc_dir_name = getenv("MSI2LMP_LIBRARY");
240 
241   if (argc < 2) {
242     printf("usage: %s <rootname> [-class <I|1|II|2>] [-frc <path to frc file>] [-print #] [-ignore] [-nocenter] [-oldstyle]\n",argv[0]);
243     return 1;
244   } else { /* rootname was supplied as first argument, copy to rootname */
245     int len = strlen(argv[1]) + 1;
246     rootname = (char *)malloc(len);
247     strcpy(rootname,argv[1]);
248   }
249 
250   n = 2;
251   while (n < argc) {
252     if (strncmp(argv[n],"-c",2) == 0) {
253       n++;
254       if (check_arg(argv,"-class",n,argc))
255         return 2;
256       if ((strcmp(argv[n],"I") == 0) || (strcmp(argv[n],"1") == 0)) {
257         forcefield = FF_TYPE_CLASS1 | FF_TYPE_COMMON;
258       } else if ((strcmp(argv[n],"II") == 0) || (strcmp(argv[n],"2") == 0)) {
259         forcefield = FF_TYPE_CLASS2 | FF_TYPE_COMMON;
260       } else if ((strcmp(argv[n],"O") == 0) || (strcmp(argv[n],"0") == 0)) {
261         forcefield = FF_TYPE_OPLSAA | FF_TYPE_COMMON;
262       } else {
263         printf("Unrecognized Forcefield class: %s\n",argv[n]);
264         return 3;
265       }
266     } else if (strncmp(argv[n],"-f",2) == 0) {
267       n++;
268       if (check_arg(argv,"-frc",n,argc))
269         return 4;
270       frc_file_name = argv[n];
271     } else if (strncmp(argv[n],"-s",2) == 0) {
272       if (n+3 > argc) {
273         printf("Missing argument(s) to \"-shift\" flag\n");
274         return 1;
275       }
276       shift[0] = atof(argv[++n]);
277       shift[1] = atof(argv[++n]);
278       shift[2] = atof(argv[++n]);
279     } else if (strncmp(argv[n],"-i",2) == 0 ) {
280       iflag = 1;
281     } else if (strncmp(argv[n],"-n",2) == 0 ) {
282       centerflag = 0;
283     } else if (strncmp(argv[n],"-o",2) == 0 ) {
284       hintflag = 0;
285     } else if (strncmp(argv[n],"-p",2) == 0) {
286       n++;
287       if (check_arg(argv,"-print",n,argc))
288         return 5;
289       pflag = atoi(argv[n]);
290     } else {
291       printf("Unrecognized option: %s\n",argv[n]);
292       return 6;
293     }
294     n++;
295   }
296 
297   /* set defaults, if nothing else was given */
298   if (frc_dir_name == NULL) {
299 #if (_WIN32)
300     frc_dir_name = "..\\frc_files";
301 #else
302     frc_dir_name = "../frc_files";
303 #endif
304   }
305 
306   if (frc_file_name == NULL)
307     frc_file_name = "cvff.frc";
308 
309   found_sep=0;
310 #ifdef _WIN32
311   if (isalpha(frc_file_name[0]) && (frc_file_name[1] == ':'))
312     found_sep=1; /* windows drive letter => full path. */
313 #endif
314 
315   n = strlen(frc_file_name);
316   for (i=0; i < n; ++i) {
317 #ifdef _WIN32
318     if ((frc_file_name[i] == '/') || (frc_file_name[i] == '\\'))
319       found_sep=1+i;
320 #else
321     if (frc_file_name[i] == '/')
322       found_sep=1+i;
323 #endif
324   }
325 
326   /* full pathname given */
327   if (found_sep) {
328     i = 0;
329     /* need to append extension? */
330     if ((n < 5) || (strcmp(frc_file_name+n-4,".frc") !=0))
331       i=1;
332 
333     FrcFileName = (char *)malloc(n+1+i*4);
334     strcpy(FrcFileName,frc_file_name);
335     if (i) strcat(FrcFileName,".frc");
336   } else {
337     i = 0;
338     /* need to append extension? */
339     if ((n < 5) || (strcmp(frc_file_name+n-4,".frc") !=0))
340       i=1;
341 
342     FrcFileName = (char *)malloc(n+2+i*4+strlen(frc_dir_name));
343     strcpy(FrcFileName,frc_dir_name);
344 #ifdef _WIN32
345     strcat(FrcFileName,"\\");
346 #else
347     strcat(FrcFileName,"/");
348 #endif
349     strcat(FrcFileName,frc_file_name);
350     if (i) strcat(FrcFileName,".frc");
351   }
352 
353 
354   if (pflag > 0) {
355     puts("\nRunning msi2lmp " MSI2LMP_VERSION "\n");
356     if (forcefield & FF_TYPE_CLASS1) puts(" Forcefield: Class I");
357     if (forcefield & FF_TYPE_CLASS2) puts(" Forcefield: Class II");
358     if (forcefield & FF_TYPE_OPLSAA) puts(" Forcefield: OPLS-AA");
359     printf(" Forcefield file name: %s\n",FrcFileName);
360     if (centerflag) puts(" Output is recentered around geometrical center");
361     if (hintflag) puts(" Output contains style flag hints");
362     else puts(" Style flag hints disabled");
363     printf(" System translated by: %g %g %g\n",shift[0],shift[1],shift[2]);
364   }
365 
366   n = 0;
367   if (forcefield & FF_TYPE_CLASS1) {
368     if (strstr(FrcFileName,"cvff") != NULL) ++n;
369     if (strstr(FrcFileName,"clayff") != NULL) ++n;
370   } else if (forcefield & FF_TYPE_OPLSAA) {
371     if (strstr(FrcFileName,"oplsaa") != NULL) ++n;
372   } else if (forcefield & FF_TYPE_CLASS2) {
373     if (strstr(FrcFileName,"pcff") != NULL) ++n;
374     if (strstr(FrcFileName,"cff91") != NULL) ++n;
375     if (strstr(FrcFileName,"compass") != NULL) ++n;
376   }
377 
378   if (n == 0) {
379     if (iflag > 0) fputs(" WARNING",stderr);
380     else           fputs(" Error  ",stderr);
381 
382     fputs("- forcefield name and class appear to be inconsistent\n\n",stderr);
383     if (iflag == 0) return 7;
384   }
385 
386   /* Read in .car file */
387   ReadCarFile();
388 
389   /*Read in .mdf file */
390 
391   ReadMdfFile();
392 
393   /* Define bonds, angles, etc...*/
394 
395   if (pflag > 0)
396     printf("\n Building internal coordinate lists \n");
397   MakeLists();
398 
399   /* Read .frc file into memory */
400 
401   if (pflag > 0)
402     printf("\n Reading forcefield file \n");
403   ReadFrcFile();
404 
405   /* Get forcefield parameters */
406 
407   if (pflag > 0)
408     printf("\n Get force field parameters for this system\n");
409   GetParameters();
410 
411   /* Do internal check of internal coordinate lists */
412   if (pflag > 0)
413     printf("\n Check parameters for internal consistency\n");
414   CheckLists();
415 
416   /* Write out the final data */
417   WriteDataFile(rootname);
418 
419   /* free up memory to detect possible memory corruption */
420   free(rootname);
421   free(FrcFileName);
422   ClearFrcData();
423 
424   for (n=0; n < no_molecules; n++) {
425     free(molecule[n].residue);
426   }
427 
428   free(no_atoms);
429   free(molecule);
430   free(atoms);
431   free(atomtypes);
432   if (bonds) free(bonds);
433   if (bondtypes) free(bondtypes);
434   if (angles) free(angles);
435   if (angletypes) free(angletypes);
436   if (dihedrals) free(dihedrals);
437   if (dihedraltypes) free(dihedraltypes);
438   if (oops) free(oops);
439   if (ooptypes) free(ooptypes);
440   if (angleangles) free(angleangles);
441   if (angleangletypes) free(angleangletypes);
442 
443   if (pflag > 0)
444     printf("\nNormal program termination\n");
445   return 0;
446 }
447