1 /*
2 *
3 * msi2lmp.exe
4 *
5 * v3.9.9 AK- Teach msi2lmp to not generate dihedrals with identical 1-4 atoms
6 *
7 * v3.9.8 AK- Improved whitespace handling in parsing topology and force
8 * field files to avoid bogus warnings about type name truncation
9 *
10 * v3.9.7 AK- Add check to enforce that Class1/OPLS-AA use A-B parameter
11 * conventions in force field file and Class2 us r-eps conventions
12 *
13 * v3.9.6 AK- Refactoring of MDF file parser with more consistent
14 * handling of compile time constants MAX_NAME and MAX_STRING
15 *
16 * v3.9.5 AK- Add TopoTools style force field parameter type hints
17 *
18 * v3.9.4 AK- Make force field style hints optional with a flag
19 *
20 * v3.9.3 AK- Bugfix for triclinic cells.
21 *
22 * v3.9.2 AK- Support for writing out force field style hints
23 *
24 * v3.9.1 AK- Bugfix for Class2. Free allocated memory. Print version number.
25 *
26 * v3.9 AK - Rudimentary support for OPLS-AA
27 *
28 * v3.8 AK - Some refactoring and cleanup of global variables
29 * - Bugfixes for argument parsing and improper definitions
30 * - improved handling of box dimensions and image flags
31 * - port to compiling on windows using MinGW
32 * - more consistent print level handling
33 * - more consistent handling of missing parameters
34 * - Added a regression test script with examples.
35 *
36 * V3.7 STM - Added support for triclinic cells
37 *
38 * v3.6 KLA - Changes to output to either lammps 2001 (F90 version) or to
39 * lammps 2005 (C++ version)
40 *
41 * v3.4 JEC - a number of minor changes due to way newline and EOF are generated
42 * on Materials Studio generated .car and .mdf files as well as odd
43 * behavior out of newer Linux IO libraries. ReadMdfFile was restructured
44 * in the process.
45 *
46 * v3.1 JEC - changed IO interface to standard in/out, forcefield file
47 * location can be indicated by environmental variable; added
48 * printing options, consistency checks and forcefield
49 * parameter versions sensitivity (highest one used)
50 *
51 * v3.0 JEC - program substantially rewritten to reduce execution time
52 * and be 98 % dynamic in memory use (still fixed limits on
53 * number of parameter types for different internal coordinate
54 * sets)
55 *
56 * v2.0 MDP - got internal coordinate information from mdf file and
57 * forcefield parameters from frc file thus eliminating
58 * need for Discover
59 *
60 * V1.0 SL - original version. Used .car file and internal coordinate
61 * information from Discover to produce LAMMPS data file.
62 *
63 * This program uses the .car and .mdf files from MSI/Biosyms's INSIGHT
64 * program to produce a LAMMPS data file.
65 *
66 * The program is started by supplying information at the command prompt
67 * according to the usage described below.
68 *
69 * USAGE: msi2lmp3 ROOTNAME {-print #} {-class #} {-frc FRC_FILE} {-ignore} {-nocenter} {-oldstyle}
70 *
71 * -- msi2lmp3 is the name of the executable
72 * -- ROOTNAME is the base name of the .car and .mdf files
73 * -- all opther flags are optional and can be abbreviated (e.g. -p instead of -print)
74 *
75 * -- -print
76 * # is the print level: 0 - silent except for errors
77 * 1 - minimal (default)
78 * 2 - more verbose
79 * 3 - even more verbose
80 * -- -class
81 * # is the class of forcefield to use (I or 1 = Class I e.g., CVFF, clayff)
82 * (II or 2 = Class II e.g., CFFx, COMPASS)
83 * (O or 0 = OPLS-AA)
84 * default is -class I
85 *
86 * -- -ignore - tells msi2lmp to ignore warnings and errors and keep going
87 *
88 * -- -nocenter - tells msi2lmp to not center the box around the (geometrical)
89 * center of the atoms, but around the origin
90 *
91 * -- -oldstyle - tells msi2lmp to write out a data file without style hints
92 * (to be compatible with older LAMMPS versions)
93 *
94 * -- -shift - tells msi2lmp to shift the entire system (box and coordinates)
95 * by a vector (default: 0.0 0.0 0.0)
96 *
97 * -- -frc - specifies name of the forcefield file (e.g., cff91)
98 *
99 * If the name includes a hard wired directory (i.e., if the name
100 * starts with . or /), then the name is used alone. Otherwise,
101 * the program looks for the forcefield file in $MSI2LMP_LIBRARY.
102 * If $MSI2LMP_LIBRARY is not set, then the current directory is
103 * used.
104 *
105 * If the file name does not include a dot after the first
106 * character, then .frc is appended to the name.
107 *
108 * For example, -frc cvff (assumes cvff.frc is in $MSI2LMP_LIBRARY
109 * or .)
110 *
111 * -frc cff/cff91 (assumes cff91.frc is in
112 * $MSI2LMP_LIBRARY/cff or ./cff)
113 *
114 * -frc /usr/local/forcefields/cff95 (absolute
115 * location)
116 *
117 * By default, the program uses $MSI2LMP_LIBRARY/cvff.frc
118 *
119 * -- output is written to a file called ROOTNAME.data
120 *
121 *
122 ****************************************************************
123 *
124 * msi2lmp
125 *
126 * This is the third version of a program that generates a LAMMPS
127 * data file based on the information in a MSI car file (atom
128 * coordinates) and mdf file (molecular topology). A key part of
129 * the program looks up forcefield parameters from an MSI frc file.
130 *
131 * The first version was written by Steve Lustig at Dupont, but
132 * required using Discover to derive internal coordinates and
133 * forcefield parameters
134 *
135 * The second version was written by Michael Peachey while an
136 * in intern in the Cray Chemistry Applications Group managed
137 * by John Carpenter. This version derived internal coordinates
138 * from the mdf file and looked up parameters in the frc file
139 * thus eliminating the need for Discover.
140 *
141 * The third version was written by John Carpenter to optimize
142 * the performance of the program for large molecular systems
143 * (the original code for deriving atom numbers was quadratic in time)
144 * and to make the program fully dynamic. The second version used
145 * fixed dimension arrays for the internal coordinates.
146 *
147 * November 2000
148 */
149
150 #include "msi2lmp.h"
151
152 #include <stdlib.h>
153 #include <string.h>
154
155 #ifdef _WIN32
156 #include <ctype.h>
157 #endif
158
159 /* global variables */
160
161 char *rootname;
162 double pbc[6];
163 double box[3][3];
164 double shift[3];
165 int periodic = 1;
166 int TriclinicFlag = 0;
167 int forcefield = 0;
168 int centerflag = 1;
169 int hintflag = 1;
170 int ljtypeflag = 0;
171
172 int pflag;
173 int iflag;
174 int *no_atoms;
175 int no_molecules;
176 int replicate[3];
177 int total_no_atoms = 0;
178 int total_no_bonds = 0;
179 int total_no_angles = 0;
180 int total_no_dihedrals = 0;
181 int total_no_angle_angles = 0;
182 int total_no_oops = 0;
183 int no_atom_types = 0;
184 int no_bond_types = 0;
185 int no_angle_types = 0;
186 int no_dihedral_types = 0;
187 int no_oop_types = 0;
188 int no_angleangle_types = 0;
189 char *FrcFileName = NULL;
190 FILE *CarF = NULL;
191 FILE *FrcF = NULL;
192 FILE *PrmF = NULL;
193 FILE *MdfF = NULL;
194 FILE *RptF = NULL;
195
196 struct Atom *atoms = NULL;
197 struct MoleculeList *molecule = NULL;
198 struct BondList *bonds = NULL;
199 struct AngleList *angles = NULL;
200 struct DihedralList *dihedrals = NULL;
201 struct OOPList *oops = NULL;
202 struct AngleAngleList *angleangles = NULL;
203 struct AtomTypeList *atomtypes = NULL;
204 struct BondTypeList *bondtypes = NULL;
205 struct AngleTypeList *angletypes = NULL;
206 struct DihedralTypeList *dihedraltypes = NULL;
207 struct OOPTypeList *ooptypes = NULL;
208 struct AngleAngleTypeList *angleangletypes = NULL;
209
condexit(int val)210 void condexit(int val)
211 {
212 if (iflag == 0) exit(val);
213 }
214
check_arg(char ** arg,const char * flag,int num,int argc)215 static int check_arg(char **arg, const char *flag, int num, int argc)
216 {
217 if (num >= argc) {
218 printf("Missing argument to \"%s\" flag\n",flag);
219 return 1;
220 }
221 if (arg[num][0] == '-') {
222 printf("Incorrect argument to \"%s\" flag: %s\n",flag,arg[num]);
223 return 1;
224 }
225 return 0;
226 }
227
main(int argc,char * argv[])228 int main (int argc, char *argv[])
229 {
230 int n,i,found_sep;
231 const char *frc_dir_name = NULL;
232 const char *frc_file_name = NULL;
233
234 pflag = 1;
235 iflag = 0;
236 forcefield = FF_TYPE_CLASS1 | FF_TYPE_COMMON;
237 shift[0] = shift[1] = shift[2] = 0.0;
238
239 frc_dir_name = getenv("MSI2LMP_LIBRARY");
240
241 if (argc < 2) {
242 printf("usage: %s <rootname> [-class <I|1|II|2>] [-frc <path to frc file>] [-print #] [-ignore] [-nocenter] [-oldstyle]\n",argv[0]);
243 return 1;
244 } else { /* rootname was supplied as first argument, copy to rootname */
245 int len = strlen(argv[1]) + 1;
246 rootname = (char *)malloc(len);
247 strcpy(rootname,argv[1]);
248 }
249
250 n = 2;
251 while (n < argc) {
252 if (strncmp(argv[n],"-c",2) == 0) {
253 n++;
254 if (check_arg(argv,"-class",n,argc))
255 return 2;
256 if ((strcmp(argv[n],"I") == 0) || (strcmp(argv[n],"1") == 0)) {
257 forcefield = FF_TYPE_CLASS1 | FF_TYPE_COMMON;
258 } else if ((strcmp(argv[n],"II") == 0) || (strcmp(argv[n],"2") == 0)) {
259 forcefield = FF_TYPE_CLASS2 | FF_TYPE_COMMON;
260 } else if ((strcmp(argv[n],"O") == 0) || (strcmp(argv[n],"0") == 0)) {
261 forcefield = FF_TYPE_OPLSAA | FF_TYPE_COMMON;
262 } else {
263 printf("Unrecognized Forcefield class: %s\n",argv[n]);
264 return 3;
265 }
266 } else if (strncmp(argv[n],"-f",2) == 0) {
267 n++;
268 if (check_arg(argv,"-frc",n,argc))
269 return 4;
270 frc_file_name = argv[n];
271 } else if (strncmp(argv[n],"-s",2) == 0) {
272 if (n+3 > argc) {
273 printf("Missing argument(s) to \"-shift\" flag\n");
274 return 1;
275 }
276 shift[0] = atof(argv[++n]);
277 shift[1] = atof(argv[++n]);
278 shift[2] = atof(argv[++n]);
279 } else if (strncmp(argv[n],"-i",2) == 0 ) {
280 iflag = 1;
281 } else if (strncmp(argv[n],"-n",2) == 0 ) {
282 centerflag = 0;
283 } else if (strncmp(argv[n],"-o",2) == 0 ) {
284 hintflag = 0;
285 } else if (strncmp(argv[n],"-p",2) == 0) {
286 n++;
287 if (check_arg(argv,"-print",n,argc))
288 return 5;
289 pflag = atoi(argv[n]);
290 } else {
291 printf("Unrecognized option: %s\n",argv[n]);
292 return 6;
293 }
294 n++;
295 }
296
297 /* set defaults, if nothing else was given */
298 if (frc_dir_name == NULL) {
299 #if (_WIN32)
300 frc_dir_name = "..\\frc_files";
301 #else
302 frc_dir_name = "../frc_files";
303 #endif
304 }
305
306 if (frc_file_name == NULL)
307 frc_file_name = "cvff.frc";
308
309 found_sep=0;
310 #ifdef _WIN32
311 if (isalpha(frc_file_name[0]) && (frc_file_name[1] == ':'))
312 found_sep=1; /* windows drive letter => full path. */
313 #endif
314
315 n = strlen(frc_file_name);
316 for (i=0; i < n; ++i) {
317 #ifdef _WIN32
318 if ((frc_file_name[i] == '/') || (frc_file_name[i] == '\\'))
319 found_sep=1+i;
320 #else
321 if (frc_file_name[i] == '/')
322 found_sep=1+i;
323 #endif
324 }
325
326 /* full pathname given */
327 if (found_sep) {
328 i = 0;
329 /* need to append extension? */
330 if ((n < 5) || (strcmp(frc_file_name+n-4,".frc") !=0))
331 i=1;
332
333 FrcFileName = (char *)malloc(n+1+i*4);
334 strcpy(FrcFileName,frc_file_name);
335 if (i) strcat(FrcFileName,".frc");
336 } else {
337 i = 0;
338 /* need to append extension? */
339 if ((n < 5) || (strcmp(frc_file_name+n-4,".frc") !=0))
340 i=1;
341
342 FrcFileName = (char *)malloc(n+2+i*4+strlen(frc_dir_name));
343 strcpy(FrcFileName,frc_dir_name);
344 #ifdef _WIN32
345 strcat(FrcFileName,"\\");
346 #else
347 strcat(FrcFileName,"/");
348 #endif
349 strcat(FrcFileName,frc_file_name);
350 if (i) strcat(FrcFileName,".frc");
351 }
352
353
354 if (pflag > 0) {
355 puts("\nRunning msi2lmp " MSI2LMP_VERSION "\n");
356 if (forcefield & FF_TYPE_CLASS1) puts(" Forcefield: Class I");
357 if (forcefield & FF_TYPE_CLASS2) puts(" Forcefield: Class II");
358 if (forcefield & FF_TYPE_OPLSAA) puts(" Forcefield: OPLS-AA");
359 printf(" Forcefield file name: %s\n",FrcFileName);
360 if (centerflag) puts(" Output is recentered around geometrical center");
361 if (hintflag) puts(" Output contains style flag hints");
362 else puts(" Style flag hints disabled");
363 printf(" System translated by: %g %g %g\n",shift[0],shift[1],shift[2]);
364 }
365
366 n = 0;
367 if (forcefield & FF_TYPE_CLASS1) {
368 if (strstr(FrcFileName,"cvff") != NULL) ++n;
369 if (strstr(FrcFileName,"clayff") != NULL) ++n;
370 } else if (forcefield & FF_TYPE_OPLSAA) {
371 if (strstr(FrcFileName,"oplsaa") != NULL) ++n;
372 } else if (forcefield & FF_TYPE_CLASS2) {
373 if (strstr(FrcFileName,"pcff") != NULL) ++n;
374 if (strstr(FrcFileName,"cff91") != NULL) ++n;
375 if (strstr(FrcFileName,"compass") != NULL) ++n;
376 }
377
378 if (n == 0) {
379 if (iflag > 0) fputs(" WARNING",stderr);
380 else fputs(" Error ",stderr);
381
382 fputs("- forcefield name and class appear to be inconsistent\n\n",stderr);
383 if (iflag == 0) return 7;
384 }
385
386 /* Read in .car file */
387 ReadCarFile();
388
389 /*Read in .mdf file */
390
391 ReadMdfFile();
392
393 /* Define bonds, angles, etc...*/
394
395 if (pflag > 0)
396 printf("\n Building internal coordinate lists \n");
397 MakeLists();
398
399 /* Read .frc file into memory */
400
401 if (pflag > 0)
402 printf("\n Reading forcefield file \n");
403 ReadFrcFile();
404
405 /* Get forcefield parameters */
406
407 if (pflag > 0)
408 printf("\n Get force field parameters for this system\n");
409 GetParameters();
410
411 /* Do internal check of internal coordinate lists */
412 if (pflag > 0)
413 printf("\n Check parameters for internal consistency\n");
414 CheckLists();
415
416 /* Write out the final data */
417 WriteDataFile(rootname);
418
419 /* free up memory to detect possible memory corruption */
420 free(rootname);
421 free(FrcFileName);
422 ClearFrcData();
423
424 for (n=0; n < no_molecules; n++) {
425 free(molecule[n].residue);
426 }
427
428 free(no_atoms);
429 free(molecule);
430 free(atoms);
431 free(atomtypes);
432 if (bonds) free(bonds);
433 if (bondtypes) free(bondtypes);
434 if (angles) free(angles);
435 if (angletypes) free(angletypes);
436 if (dihedrals) free(dihedrals);
437 if (dihedraltypes) free(dihedraltypes);
438 if (oops) free(oops);
439 if (ooptypes) free(ooptypes);
440 if (angleangles) free(angleangles);
441 if (angleangletypes) free(angleangletypes);
442
443 if (pflag > 0)
444 printf("\nNormal program termination\n");
445 return 0;
446 }
447