1 /* MACHINE GENERATED FILE, DO NOT EDIT! */
2 
3 #define VMDPLUGIN molfile_netcdfplugin
4 #define STATIC_PLUGIN 1
5 
6 /***************************************************************************
7  *cr
8  *cr            (C) Copyright 1995-2016 The Board of Trustees of the
9  *cr                        University of Illinois
10  *cr                         All Rights Reserved
11  *cr
12  ***************************************************************************/
13 
14 /***************************************************************************
15  * RCS INFORMATION:
16  *
17  *      $RCSfile: netcdfplugin.c,v $
18  *      $Author: johns $       $Locker:  $             $State: Exp $
19  *      $Revision: 1.29 $       $Date: 2016/11/28 05:01:54 $
20  *
21  ***************************************************************************/
22 
23 /*
24  * NetCDF based trajectories, used by AMBER 9, MMTK, etc.
25  */
26 
27 #include <netcdf.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <ctype.h>
32 #include "molfile_plugin.h"
33 
34 #define CDF_TYPE_UNKNOWN 0
35 #define CDF_TYPE_AMBER   1
36 #define CDF_TYPE_MMTK    2
37 
38 #define CDF_SUCCESS      0
39 #define CDF_ERR         -1
40 
41 typedef struct {
42   int trajectorytype;
43   int step_numberdimid;
44   size_t step_numberdim;
45   int minor_step_numberdimid;
46   size_t minor_step_numberdim;
47   int atom_numberdimid;
48   size_t atom_numberdim;
49   int xyzdimid;
50   size_t xyzdim;
51   int box_size_lengthdimid;
52   size_t box_size_lengthdim;
53   int description_lengthdimid;
54   size_t description_lengthdim;
55   char *description;
56   int description_id;
57   int step_id;
58   int time_id;
59   int box_size_id;
60   int configuration_id;
61   int has_box;
62   char *comment;
63 } mmtkdata;
64 
65 typedef struct {
66   int is_restart;
67   int is_trajectory;
68   int has_box;
69   int atomdimid;
70   size_t atomdim;
71   int spatialdimid;
72   size_t spatialdim;
73   int framedimid;
74   size_t framedim;
75   char *conventionversion;
76   char *title;
77   char *application;
78   char *program;
79   char *programversion;
80   int spatial_id;                  /* "xyz" */
81   int cell_spatial_id;             /* "abc" */
82   int cell_angular_id;             /* "alpha, beta, gamma" */
83   int time_id;                     /* frame time in picoseconds */
84   int coordinates_id;              /* coords in angstroms */
85   char *coordinates_units;         /* coordinates units */
86   float coordinates_scalefactor;   /* coordinates scaling factor */
87   int cell_lengths_id;             /* cell lengths in angstroms */
88   char *cell_lengths_units;        /* cell lengths units */
89   float cell_lengths_scalefactor;  /* cell lengths scaling factor */
90   int cell_angles_id;              /* cell angles in degrees */
91   char *cell_angles_units;         /* cell angles units */
92   float cell_angles_scalefactor;   /* cell angles scaling factor */
93   int velocities_id;               /* velocities in angstroms/picosecond */
94 } amberdata;
95 
96 
97 typedef struct {
98   /* sub-format independent data */
99   int ncid;
100   int type;
101   int natoms;
102   int curframe;
103   char *conventions;
104 
105   /* stuff used by AMBER */
106   amberdata amber;
107 
108   /* stuff used by MMTK */
109   mmtkdata mmtk;
110 
111 } cdfdata;
112 
113 
close_cdf_read(void * mydata)114 static void close_cdf_read(void *mydata) {
115   cdfdata *cdf = (cdfdata *)mydata;
116 
117   nc_close(cdf->ncid);
118 
119   /* AMBER stuff */
120   if (cdf->amber.title)
121     free(cdf->amber.title);
122 
123   if (cdf->amber.application)
124     free(cdf->amber.application);
125 
126   if (cdf->amber.program)
127     free(cdf->amber.program);
128 
129   if (cdf->amber.programversion)
130     free(cdf->amber.programversion);
131 
132   if (cdf->amber.conventionversion)
133     free(cdf->amber.conventionversion);
134 
135   if (cdf->amber.coordinates_units)
136     free(cdf->amber.coordinates_units);
137 
138   if (cdf->amber.cell_lengths_units)
139     free(cdf->amber.cell_lengths_units);
140 
141   /* MMTK stuff */
142   if (cdf->mmtk.comment)
143     free(cdf->mmtk.comment);
144 
145   /* format independent stuff */
146   if (cdf->conventions)
147     free(cdf->conventions);
148 
149   free(cdf);
150 }
151 
152 
153 
open_amber_cdf_read(cdfdata * cdf)154 static int open_amber_cdf_read(cdfdata *cdf) {
155   int rc;
156   size_t len;
157   amberdata *amber = &cdf->amber;
158 
159   /* check if this is a restart file or not */
160   if (!strcmp(cdf->conventions, "AMBERRESTART")) {
161     amber->is_restart = 1;
162   } else {
163     amber->is_trajectory = 1;
164   }
165 
166   /* global attrib: "ConventionVersion" -- required */
167   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "ConventionVersion", &len);
168   if (rc == NC_NOERR && len > 0) {
169     amber->conventionversion = (char *) malloc((len+1) * sizeof(char));
170     nc_get_att_text(cdf->ncid, NC_GLOBAL, "ConventionVersion", amber->conventionversion);
171     amber->conventionversion[len] = '\0';
172     printf("netcdfplugin) %s follows AMBER conventions version '%s'\n",
173            (amber->is_restart) ? "restart file" : "trajectory",
174            amber->conventionversion);
175   } else {
176     return CDF_ERR;
177   }
178 
179   /* at this point we know that this is an AMBER trajectory or restart file */
180   cdf->type = CDF_TYPE_AMBER;
181 
182   /* initialize default scaling factors so they are always set to a sane */
183   /* value even if problems occur later */
184   amber->coordinates_scalefactor = 1.0;
185   amber->cell_lengths_scalefactor = 1.0;
186   amber->cell_angles_scalefactor = 1.0;
187 
188   /* global attrib: "program" -- required */
189   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "program", &len);
190   if (rc == NC_NOERR && len > 0) {
191     amber->program = (char *) malloc((len+1) * sizeof(char));
192     nc_get_att_text(cdf->ncid, NC_GLOBAL, "program", amber->program);
193     amber->program[len] = '\0';
194     printf("netcdfplugin) AMBER: program '%s'\n", amber->program);
195   } else {
196     printf("netcdfplugin) AMBER: Missing required 'program' global attribute, corrupt file?\n");
197   }
198 
199 
200   /* global attrib: "programVersion" -- required */
201   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "programVersion", &len);
202   if (rc == NC_NOERR && len > 0) {
203     amber->programversion = (char *) malloc((len+1) * sizeof(char));
204     nc_get_att_text(cdf->ncid, NC_GLOBAL, "programVersion", amber->programversion);
205     amber->programversion[len] = '\0';
206     printf("netcdfplugin) AMBER: program version '%s'\n", amber->programversion);
207   } else {
208     printf("netcdfplugin) AMBER: Missing required 'programVersion' global attribute, corrupt file?\n");
209   }
210 
211 
212   /* global attrib: "title" -- optional */
213   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "title", &len);
214   if (rc == NC_NOERR && len > 0) {
215     amber->title = (char *) malloc((len+1) * sizeof(char));
216     nc_get_att_text(cdf->ncid, NC_GLOBAL, "title", amber->title);
217     amber->title[len] = '\0';
218     printf("netcdfplugin) AMBER: title '%s'\n", amber->title);
219   }
220 
221 
222   /* global attrib: "application" -- optional */
223   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "application", &len);
224   if (rc == NC_NOERR && len > 0) {
225     amber->application = (char *) malloc((len+1) * sizeof(char));
226     nc_get_att_text(cdf->ncid, NC_GLOBAL, "application", amber->application);
227     amber->application[len] = '\0';
228     printf("netcdfplugin) AMBER: application '%s'\n", amber->application);
229   }
230 
231 
232 /* XXX lots of additional error checking is needed below... */
233 
234   /* read in spatial dimension */
235   rc = nc_inq_dimid(cdf->ncid, "spatial", &amber->spatialdimid);
236   if (rc == NC_NOERR) {
237     rc = nc_inq_dimlen(cdf->ncid, amber->spatialdimid, &amber->spatialdim);
238     if (rc == NC_NOERR) {
239       printf("netcdfplugin) AMBER: spatial dimension: %ld\n", (long)amber->spatialdim);
240     } else {
241       printf("netcdfplugin) AMBER: Missing spatial dimension, corrupt file?\n");
242       printf("netcdfplugin) AMBER: Fixing by guessing spatialdim as '3'\n");
243       amber->spatialdim = 3;
244     }
245   } else {
246     printf("netcdfplugin) AMBER: Missing spatial dimension, corrupt file?\n");
247     printf("netcdfplugin) AMBER: Fixing by guessing spatialdim as '3'\n");
248     amber->spatialdim = 3;
249   }
250 
251   /* read in atom dimension */
252   rc = nc_inq_dimid(cdf->ncid, "atom", &amber->atomdimid);
253   if (rc == NC_NOERR) {
254     rc = nc_inq_dimlen(cdf->ncid, amber->atomdimid, &amber->atomdim);
255     if (rc == NC_NOERR) {
256       printf("netcdfplugin) AMBER: atom dimension: %ld\n", (long)amber->atomdim);
257       cdf->natoms = amber->atomdim; /* copy to format independent part */
258     } else  {
259       printf("netcdfplugin) AMBER: missing atom dimension, aborting\n");
260       return CDF_ERR;
261     }
262   } else {
263     printf("netcdfplugin) AMBER: missing atom dimension, aborting\n");
264     return CDF_ERR;
265   }
266 
267   /* if this is a trajectory, read in frame dimension */
268   if (amber->is_trajectory) {
269     rc = nc_inq_dimid(cdf->ncid, "frame", &amber->framedimid);
270     if (rc == NC_NOERR) {
271       rc = nc_inq_dimlen(cdf->ncid, amber->framedimid, &amber->framedim);
272       if (rc == NC_NOERR) {
273         printf("netcdfplugin) AMBER: frame dimension: %ld\n", (long)amber->framedim);
274       } else {
275         printf("netcdfplugin) AMBER: missing frame dimension, aborting\n");
276         return CDF_ERR;
277       }
278     } else {
279       printf("netcdfplugin) AMBER: missing frame dimension, aborting\n");
280       return CDF_ERR;
281     }
282   }
283 
284 
285   /*
286    * get ID values for all of the variables we're interested in
287    */
288 #if 0
289   /* VMD can live without the various human readable label variables. */
290   rc = nc_inq_varid(cdf->ncid, "spatial", &amber->spatial_id);
291   if (rc != NC_NOERR)
292     return CDF_ERR;
293 
294   rc = nc_inq_varid(cdf->ncid, "cell_spatial", &amber->cell_spatial_id);
295   if (rc != NC_NOERR)
296     return CDF_ERR;
297 
298   rc = nc_inq_varid(cdf->ncid, "cell_angular", &amber->cell_angular_id);
299   if (rc != NC_NOERR)
300     return CDF_ERR;
301 #endif
302 
303   /* VMD requires coordinates at a minimum */
304   rc = nc_inq_varid(cdf->ncid, "coordinates", &amber->coordinates_id);
305   if (rc != NC_NOERR) {
306     printf("netcdfplugin) AMBER: no coordinates variable, nothing to load\n");
307     return CDF_ERR;
308   }
309 
310   /* Coordinate units */
311   rc = nc_inq_attlen(cdf->ncid, amber->coordinates_id, "units", &len);
312   if (rc == NC_NOERR && len > 0) {
313     amber->coordinates_units = (char *) malloc((len+1) * sizeof(char));
314     nc_get_att_text(cdf->ncid, amber->coordinates_id, "units", amber->coordinates_units);
315     amber->coordinates_units[len] = '\0';
316     printf("netcdfplugin) AMBER: coordinates units: '%s'\n", amber->coordinates_units);
317   } else {
318     printf("netcdfplugin) AMBER: no coordinates units attribute, Angstroms assumed\n");
319   }
320 
321   /* Coordinate scaling factor to get to Angstroms */
322   if (nc_get_att_float(cdf->ncid, amber->coordinates_id, "scale_factor", &amber->coordinates_scalefactor) != NC_NOERR) {
323     printf("netcdfplugin) AMBER: no coordinates scalefactor attribute, 1.0 assumed\n");
324   }
325   printf("netcdfplugin) AMBER: coordinates scalefactor: %f\n", amber->coordinates_scalefactor);
326 
327 #if 0
328   /* we don't need velocities at this time */
329   rc = nc_inq_varid(cdf->ncid, "velocities", &amber->velocities_id);
330   if (rc != NC_NOERR) {
331     printf("netcdfplugin) AMBER: missing velocities variable, aborting\n");
332     return CDF_ERR;
333   }
334 #endif
335 
336   /* optional periodic cell info */
337   rc = nc_inq_varid(cdf->ncid, "cell_lengths", &amber->cell_lengths_id);
338   if (rc == NC_NOERR) {
339     rc = nc_inq_varid(cdf->ncid, "cell_angles", &amber->cell_angles_id);
340     if (rc == NC_NOERR) {
341       printf("netcdfplugin) AMBER trajectory contains periodic cell information\n");
342       amber->has_box = 1;
343 
344       /* Cell lengths units */
345       rc = nc_inq_attlen(cdf->ncid, amber->cell_lengths_id, "units", &len);
346       if (rc == NC_NOERR && len > 0) {
347         amber->cell_lengths_units = (char *) malloc((len+1) * sizeof(char));
348         nc_get_att_text(cdf->ncid, amber->cell_lengths_id, "units", amber->cell_lengths_units);
349         amber->cell_lengths_units[len] = '\0';
350         printf("netcdfplugin) AMBER: cell lengths units: '%s'\n", amber->cell_lengths_units);
351       } else {
352         printf("netcdfplugin) AMBER: no cell lengths units attribute, Angstroms assumed\n");
353       }
354 
355       /* Cell lengths scaling factor to get to Angstroms */
356       if (nc_get_att_float(cdf->ncid, amber->cell_lengths_id, "scale_factor", &amber->cell_lengths_scalefactor) != NC_NOERR) {
357         printf("netcdfplugin) AMBER: no cell lengths scalefactor attribute, 1.0 assumed\n");
358       }
359       printf("netcdfplugin) AMBER: cell lengths scalefactor: %f\n", amber->cell_lengths_scalefactor);
360 
361       /* Cell angles units */
362       rc = nc_inq_attlen(cdf->ncid, amber->cell_angles_id, "units", &len);
363       if (rc == NC_NOERR && len > 0) {
364         amber->cell_angles_units = (char *) malloc((len+1) * sizeof(char));
365         nc_get_att_text(cdf->ncid, amber->cell_angles_id, "units", amber->cell_angles_units);
366         amber->cell_angles_units[len] = '\0';
367         printf("netcdfplugin) AMBER: cell angles units: '%s'\n", amber->cell_angles_units);
368       } else {
369         printf("netcdfplugin) AMBER: no cell angles units attribute, Degrees assumed\n");
370       }
371 
372       /* Cell angles scaling factor to get to degrees */
373       if (nc_get_att_float(cdf->ncid, amber->cell_angles_id, "scale_factor", &amber->cell_angles_scalefactor) != NC_NOERR) {
374         printf("netcdfplugin) AMBER: no cell angles scalefactor attribute, 1.0 assumed\n");
375       }
376       printf("netcdfplugin) AMBER: cell angles scalefactor: %f\n", amber->cell_angles_scalefactor);
377     }
378   }
379 
380   return CDF_SUCCESS;
381 }
382 
383 
open_mmtk_cdf_read(cdfdata * cdf,int conventionsknown)384 static int open_mmtk_cdf_read(cdfdata *cdf, int conventionsknown) {
385   int rc;
386   size_t len;
387   mmtkdata *mmtk = &cdf->mmtk;
388 
389   /* If conventions specify MMTK then we're safe to continue */
390   /* and we know what we're dealing with */
391   if (conventionsknown) {
392     cdf->type = CDF_TYPE_MMTK;
393   }
394 
395   /* global attrib: "trajectory_type" (new format) */
396   rc = nc_get_att_int(cdf->ncid, NC_GLOBAL, "trajectory_type", &mmtk->trajectorytype);
397   if (rc == NC_NOERR) {
398     printf("netcdfplugin) MMTK trajectory type: %d\n", mmtk->trajectorytype);
399   } else {
400     printf("netcdfplugin) Assuming MMTK trajectory type: %d\n", mmtk->trajectorytype);
401     mmtk->trajectorytype = 0;
402   }
403 
404   /* read in spatial dimension */
405   rc = nc_inq_dimid(cdf->ncid, "xyz", &mmtk->xyzdimid);
406   if (rc == NC_NOERR) {
407     rc = nc_inq_dimlen(cdf->ncid, mmtk->xyzdimid, &mmtk->xyzdim);
408     if (rc == NC_NOERR)
409       printf("netcdfplugin) MMTK: xyz dimension: %ld\n", (long)mmtk->xyzdim);
410     else
411       return CDF_ERR;
412   } else {
413     return CDF_ERR;
414   }
415 
416   /* read in atom dimension */
417   rc = nc_inq_dimid(cdf->ncid, "atom_number", &mmtk->atom_numberdimid);
418   if (rc == NC_NOERR) {
419     rc = nc_inq_dimlen(cdf->ncid, mmtk->atom_numberdimid, &mmtk->atom_numberdim);
420     if (rc == NC_NOERR) {
421       printf("netcdfplugin) MMTK: atom_number dimension: %ld\n", (long)mmtk->atom_numberdim);
422       cdf->natoms = mmtk->atom_numberdim; /* copy to format independent part */
423     } else {
424       return CDF_ERR;
425     }
426   } else {
427     return CDF_ERR;
428   }
429 
430 
431   /* read in frame dimension */
432   rc = nc_inq_dimid(cdf->ncid, "step_number", &mmtk->step_numberdimid);
433   if (rc == NC_NOERR) {
434     rc = nc_inq_dimlen(cdf->ncid, mmtk->step_numberdimid, &mmtk->step_numberdim);
435     if (rc == NC_NOERR)
436       printf("netcdfplugin) MMTK: step_number dimension: %ld\n", (long)mmtk->step_numberdim);
437     else
438       return CDF_ERR;
439   } else {
440     return CDF_ERR;
441   }
442 
443 
444   /* read in minor step number dimension */
445   rc = nc_inq_dimid(cdf->ncid, "minor_step_number", &mmtk->minor_step_numberdimid);
446   if (rc == NC_NOERR) {
447     rc = nc_inq_dimlen(cdf->ncid, mmtk->minor_step_numberdimid, &mmtk->minor_step_numberdim);
448     if (rc == NC_NOERR)
449       printf("netcdfplugin) MMTK: minor_step_number dimension: %ld\n", (long)mmtk->minor_step_numberdim);
450     else
451       return CDF_ERR;
452   } else if (rc == NC_EBADDIM) {
453     printf("netcdfplugin) MMTK: no minor_step_number dimension\n");
454     mmtk->minor_step_numberdim = 0;
455   } else {
456     return CDF_ERR;
457   }
458 
459 
460   /* read in description_length dimension */
461   rc = nc_inq_dimid(cdf->ncid, "description_length", &mmtk->description_lengthdimid);
462   if (rc == NC_NOERR) {
463     rc = nc_inq_dimlen(cdf->ncid, mmtk->description_lengthdimid, &mmtk->description_lengthdim);
464     if (rc == NC_NOERR)
465       printf("netcdfplugin) MMTK: description_length dimension: %ld\n", (long)mmtk->description_lengthdim);
466     else
467       return CDF_ERR;
468   } else {
469     return CDF_ERR;
470   }
471 
472 
473   /* get ID values for all of the variables we're interested in */
474   rc = nc_inq_varid(cdf->ncid, "configuration", &mmtk->configuration_id);
475   if (rc != NC_NOERR)
476     return CDF_ERR;
477 
478   rc = nc_inq_varid(cdf->ncid, "description", &mmtk->description_id);
479   if (rc != NC_NOERR)
480     return CDF_ERR;
481 
482   /* check for PBC */
483   rc = nc_inq_varid(cdf->ncid, "box_size", &mmtk->box_size_id);
484   if (rc == NC_NOERR) {
485     mmtk->has_box = 1;
486     printf("netcdfplugin) MMTK: system has periodic boundary conditions\n");
487   }
488   else if (rc == NC_ENOTVAR)
489     mmtk->has_box = 0;
490   else
491     return CDF_ERR;
492 
493 
494   /* global attrib: "comment" -- optional */
495   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "comment", &len);
496   if (rc == NC_NOERR && len > 0) {
497     mmtk->comment = (char *) malloc((len+1) * sizeof(char));
498     nc_get_att_text(cdf->ncid, NC_GLOBAL, "comment", mmtk->comment);
499     mmtk->comment[len] = '\0';
500     printf("netcdfplugin) MMTK: comment '%s'\n", mmtk->comment);
501   }
502 
503   /* at this point we know that this is an MMTK trajectory */
504   if (!conventionsknown) {
505     printf("netcdfplugin) File is an old format MMTK trajectory without conventions\n");
506     cdf->type = CDF_TYPE_MMTK;
507   }
508 
509   return CDF_SUCCESS;
510 }
511 
512 
open_cdf_read(const char * filename,const char * filetype,int * natoms)513 static void *open_cdf_read(const char *filename, const char *filetype,
514                            int *natoms) {
515   int ncid, rc;
516   size_t len;
517   cdfdata *cdf;
518 
519   rc = nc_open(filename, NC_NOWRITE, &ncid);
520   if (rc != NC_NOERR) return NULL;
521 
522   cdf = (cdfdata *) malloc(sizeof(cdfdata));
523   memset(cdf, 0, sizeof(cdfdata));
524 
525   cdf->ncid = ncid;
526   cdf->type = CDF_TYPE_UNKNOWN;
527 
528   /* Determine what NetCDF conventions apply to this data, if any */
529   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "Conventions", &len);
530   if (rc == NC_NOERR && len > 0) {
531     cdf->conventions = (char *) malloc((len+1) * sizeof(char));
532     nc_get_att_text(cdf->ncid, NC_GLOBAL, "Conventions", cdf->conventions);
533     cdf->conventions[len] = '\0';
534     printf("netcdfplugin) conventions: '%s'\n", cdf->conventions);
535   }
536 
537   if (cdf->conventions != NULL) {
538     /* Check if this is a file generated by AMBER */
539     if (strstr(cdf->conventions, "AMBER") != NULL) {
540       if (!open_amber_cdf_read(cdf)) {
541         *natoms = cdf->natoms;
542         return cdf;
543       }
544     }
545 
546     /* Check if this is a file generated by MMTK */
547     if (strstr(cdf->conventions, "MMTK") != NULL) {
548       if (!open_mmtk_cdf_read(cdf, 1)) {
549         *natoms = cdf->natoms;
550         return cdf;
551       }
552     }
553   }
554 
555   printf("netcdfplugin) Missing or unrecognized conventions attribute\n");
556   printf("netcdfplugin) checking for old format MMTK NetCDF file...\n");
557 
558   /* If no conventions are specified, then maybe it's from MMTK */
559   if (!open_mmtk_cdf_read(cdf, 0)) {
560     *natoms = cdf->natoms;
561     return cdf;
562   }
563 
564   /* if no conventions are recognized, then we free everything */
565   /* and return failure                                        */
566   close_cdf_read(cdf);
567 
568   return NULL;
569 }
570 
571 /* A very basic bracket counter. It assumes that the expression
572    is syntactically correct. */
find_closing_bracket(char * s)573 static char *find_closing_bracket(char *s) {
574   int count = 1;
575   while (*s && count > 0) {
576     if (*s == '(' || *s == '[')
577       count++;
578     if (*s == ')' || *s == ']')
579       count--;
580     s++;
581   }
582   return s;
583 }
584 
585 /* Simple string replacement routine for fixing atom names. */
atom_name_replace(char * name,char * substring,char letter)586 static void atom_name_replace(char *name, char *substring, char letter) {
587   char *s = strstr(name, substring);
588   if (s != NULL) {
589     *s = letter;
590     strcpy(s+1, s+strlen(substring));
591   }
592 }
593 
atom_name_remove_underscores(char * name)594 static void atom_name_remove_underscores(char *name) {
595   char *s = name;
596   while (1) {
597     s = strchr(s, '_');
598     if (s == NULL)
599       break;
600     strcpy(s, s+1);
601   }
602 }
603 
604 /* Set chainid, resname, and resnum for a range of atoms
605    and fix atom names. */
set_atom_attributes(molfile_atom_t * atoms,int natoms,char ** atom_pointers,char chain_id,char * resname,int resnum,char * start,char * end,int name_correction_type)606 static void set_atom_attributes(molfile_atom_t *atoms, int natoms,
607 				char **atom_pointers, char chain_id,
608 				char *resname, int resnum,
609 				char *start, char *end,
610 				int name_correction_type) {
611   int i;
612   for (i=0; i<natoms; i++)
613     if (atom_pointers[i] > start && atom_pointers[i] < end) {
614       molfile_atom_t *atom = atoms + i;
615       atom->chain[0] = chain_id;
616       atom->chain[1] = '\0';
617       strcpy(atom->resname, resname);
618       atom->resid = resnum;
619       if (name_correction_type == 1 /* proteins */) {
620 	atom_name_replace(atom->name, "_alpha", 'A');
621 	atom_name_replace(atom->name, "_beta", 'B');
622 	atom_name_replace(atom->name, "_gamma", 'G');
623 	atom_name_replace(atom->name, "_delta", 'D');
624 	atom_name_replace(atom->name, "_epsilon", 'E');
625 	atom_name_replace(atom->name, "_zeta", 'Z');
626 	atom_name_replace(atom->name, "_eta", 'H');
627 	atom_name_remove_underscores(atom->name);
628       }
629       else if (name_correction_type == 2 /* nucleic acids */) {
630 	if (strcmp(atom->name, "O_1") == 0)
631 	  strcpy(atom->name, "O1P");
632 	else if (strcmp(atom->name, "O_2") == 0)
633 	  strcpy(atom->name, "O2P");
634 	else if (strcmp(atom->name, "C_1") == 0)
635 	  strcpy(atom->name, "C1'");
636 	else if (strcmp(atom->name, "C_2") == 0)
637 	  strcpy(atom->name, "C2'");
638 	else if (strcmp(atom->name, "C_3") == 0)
639 	  strcpy(atom->name, "C3'");
640 	else if (strcmp(atom->name, "O_3") == 0)
641 	  strcpy(atom->name, "O3'");
642 	else if (strcmp(atom->name, "C_4") == 0)
643 	  strcpy(atom->name, "C4'");
644 	else if (strcmp(atom->name, "O_4") == 0)
645 	  strcpy(atom->name, "O4'");
646 	else if (strcmp(atom->name, "C_5") == 0)
647 	  strcpy(atom->name, "C5'");
648 	else if (strcmp(atom->name, "O_5") == 0)
649 	  strcpy(atom->name, "O5'");
650 	else
651 	  atom_name_remove_underscores(atom->name);
652       }
653     }
654 }
655 
656 /* Get structure from an MMTK trajectory file */
read_mmtk_cdf_structure(void * mydata,int * optflags,molfile_atom_t * atoms)657 static int read_mmtk_cdf_structure(void *mydata, int *optflags,
658                                    molfile_atom_t *atoms) {
659   int i, rc;
660   molfile_atom_t *atom;
661   cdfdata *cdf = (cdfdata *) mydata;
662   mmtkdata *mmtk = &cdf->mmtk;
663   size_t start[3], count[3];
664   char *dstr;
665   char **atom_pointers;
666   int resnum;
667   char resname[8];
668 
669   *optflags = MOLFILE_NOOPTIONS;
670 
671   mmtk->description = (char *) malloc((mmtk->description_lengthdim + 1) * sizeof(char));
672   if (mmtk->description == NULL)
673     return MOLFILE_ERROR;
674 
675   start[0] = cdf->curframe; /* frame */
676   count[0] = mmtk->description_lengthdim;
677 
678   rc = nc_get_vara_text(cdf->ncid, mmtk->description_id,
679                         start, count, mmtk->description);
680   if (rc != NC_NOERR)
681     return MOLFILE_ERROR;
682 
683   /* initialize all atoms with name "X" to start with */
684   /* indicating unknown atom types etc..              */
685   for (i=0; i<cdf->natoms; i++) {
686     atom = atoms + i;
687     strncpy(atom->name, "X", sizeof(atom->name)-1);
688     atom->name[sizeof(atom->name)-1] = '\0';
689     strncpy(atom->type, atom->name, sizeof(atom->type)-1);
690     atom->type[sizeof(atom->type)-1] = '\0';
691     atom->resname[0] = '\0';
692     atom->resid = 1;
693     atom->chain[0] = '\0';
694     atom->segid[0] = '\0';
695   }
696 
697   /* Allocate a pointer array that will hold each atom's location in
698      the description string. This will be used in a second pass through
699      the description string in which residue names and indices will
700      be assigned. */
701   atom_pointers = (char **) malloc(cdf->natoms * sizeof(char *));
702   if (atom_pointers == NULL)
703     return MOLFILE_ERROR;
704 
705   /* First pass: look only at atoms */
706   dstr = mmtk->description;
707   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
708     char *atomstr;
709     atomstr = strstr(dstr, "A('");
710 
711     if (atomstr != NULL) {
712       char name[1024];
713       char *nmstart = NULL;
714       char *nmend = NULL;
715       char *indstart = NULL;
716       char *endp = NULL;
717       int index, len;
718 
719       endp = strchr(atomstr, ')');
720       nmstart = strchr(atomstr, '\'');
721       if (nmstart != NULL)
722         nmend = strchr(nmstart+1, '\'');
723       indstart = strchr(atomstr, ',');
724       if (endp == NULL || nmstart == NULL || nmend == NULL || indstart == NULL) {
725         printf("netcdfplugin) mmtk_read_structure(): unable to parse atom tag\n");
726         break; /* something went wrong */
727       }
728 
729       len = nmend - nmstart - 1;
730       if (len > sizeof(name)) {
731         printf("netcdfplugin) mmtk_read_structure(): bad length: %d\n", len);
732         break; /* something went wrong */
733       }
734       memcpy(name, nmstart+1, len);
735       name[len] = '\0';
736 
737       index = -1;
738       sscanf(indstart, ",%d)", &index);
739       atom_pointers[index] = atomstr;
740 
741       if (index >= 0 && index < cdf->natoms) {
742         atom = atoms + index;
743         strncpy(atom->name, name, sizeof(atom->name)-1);
744         atom->name[sizeof(atom->name)-1] = '\0';
745         strncpy(atom->type, atom->name, sizeof(atom->type)-1);
746         atom->type[sizeof(atom->type)-1] = '\0';
747       }
748 
749       dstr = atomstr+1;
750     } else {
751       break; /* no more atom records found */
752     }
753   }
754 
755   /* Second pass: peptide chains */
756   dstr = mmtk->description;
757   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
758     char *peptide, *pend;
759     char *group, *gend;
760     char *nmstart, *nmend;
761     char chain_id = 'A';
762     char *s;
763 
764     peptide = strstr(dstr, "S('");
765     if (peptide == NULL)
766       break;
767     pend = find_closing_bracket(peptide+2);
768 
769     resnum = 1;
770     group = peptide;
771     while (1) {
772       group = strstr(group, "G('");
773       if (group == NULL || group >= pend)
774 	break;
775       gend = find_closing_bracket(group+2);
776       nmstart = strchr(group, '\'') + 1;
777       nmend = strchr(nmstart, '\'');
778       while (nmend > nmstart && isdigit(*(nmend-1)))
779 	nmend--;
780       if (nmend-nmstart > 7)
781 	nmend = nmstart+7;
782       strncpy(resname, nmstart, nmend-nmstart);
783       resname[nmend-nmstart] = '\0';
784       s = resname;
785       while (*s) {
786 	*s = toupper(*s);
787 	s++;
788       }
789       set_atom_attributes(atoms, cdf->natoms, atom_pointers,
790 			  chain_id, resname, resnum, group, gend, 1);
791       group = gend;
792       resnum++;
793     }
794 
795     if (chain_id == 'Z')
796       chain_id = 'A';
797     else
798 	chain_id++;
799     dstr = pend;
800   }
801 
802   /* Third pass: nucleic acid chains */
803   dstr = mmtk->description;
804   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
805     char *nacid, *nend;
806     char *group, *gend;
807     char *nmstart, *nmend;
808     char chain_id = 'a';
809     char *s;
810 
811     nacid = strstr(dstr, "N('");
812     if (nacid == NULL)
813       break;
814     nend = find_closing_bracket(nacid+2);
815 
816     resnum = 1;
817     group = nacid;
818     while (1) {
819       group = strstr(group, "G('");
820       if (group == NULL || group >= nend)
821 	break;
822       gend = find_closing_bracket(group+2);
823       nmstart = strchr(group, '\'') + 1;
824       nmend = strchr(nmstart, '\'');
825       while (nmend > nmstart && isdigit(*(nmend-1)))
826 	nmend--;
827       if (nmend > nmstart && nmend[-1] == '_')
828 	nmend--;
829       if (nmend-nmstart > 7)
830 	nmend = nmstart+7;
831       strncpy(resname, nmstart, nmend-nmstart);
832       resname[nmend-nmstart] = '\0';
833       s = resname;
834       while (*s) {
835 	*s = toupper(*s);
836 	s++;
837       }
838       if (resname[0] == 'R' || resname[0] == 'D') {
839 	switch (resname[1]) {
840 	case 'A':
841 	  strcpy(resname, "ADE");
842 	  break;
843 	case 'C':
844 	  strcpy(resname, "CYT");
845 	  break;
846 	case 'G':
847 	  strcpy(resname, "GUA");
848 	  break;
849 	case 'T':
850 	  strcpy(resname, "THY");
851 	  break;
852 	case 'U':
853 	  strcpy(resname, "URA");
854 	  break;
855 	}
856       }
857       set_atom_attributes(atoms, cdf->natoms, atom_pointers,
858 			  chain_id, resname, resnum, group, gend, 2);
859       group = gend;
860       resnum++;
861     }
862 
863     if (chain_id == 'z')
864       chain_id = 'a';
865     else
866 	chain_id++;
867     dstr = nend;
868   }
869 
870   /* Fourth pass: non-chain molecules */
871   resnum = 1;
872   dstr = mmtk->description;
873   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
874     char *molecule, *mend;
875     char *nmstart, *nmend;
876 
877     molecule = strstr(dstr, "M('");
878     if (molecule == NULL)
879       break;
880     mend = find_closing_bracket(molecule+2);
881     nmstart = strchr(molecule, '\'') + 1;
882     nmend = strchr(nmstart, '\'');
883     if (strncmp(nmstart, "water", 5) == 0)
884       strcpy(resname, "HOH");
885     else {
886       if (nmend-nmstart > 7)
887 	nmend = nmstart+7;
888       strncpy(resname, nmstart, nmend-nmstart);
889       resname[nmend-nmstart] = '\0';
890     }
891     set_atom_attributes(atoms, cdf->natoms, atom_pointers,
892 			'_', resname, resnum, molecule, mend, 0);
893     resnum++;
894     dstr = mend;
895   }
896 
897   free(atom_pointers);
898 
899   return MOLFILE_SUCCESS;
900 }
901 
902 
read_cdf_structure(void * mydata,int * optflags,molfile_atom_t * atoms)903 static int read_cdf_structure(void *mydata, int *optflags,
904                                    molfile_atom_t *atoms) {
905   cdfdata *cdf = (cdfdata *)mydata;
906 
907   switch (cdf->type) {
908     case CDF_TYPE_AMBER:
909       return MOLFILE_NOSTRUCTUREDATA; /* not an error, just no data */
910 
911     case CDF_TYPE_MMTK:
912       return read_mmtk_cdf_structure(mydata, optflags, atoms);
913   }
914 
915   return MOLFILE_ERROR;
916 }
917 
918 
read_amber_cdf_timestep(void * mydata,int natoms,molfile_timestep_t * ts)919 static int read_amber_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
920   size_t start[3], count[3];
921   cdfdata *cdf = (cdfdata *)mydata;
922   amberdata *amber = &cdf->amber;
923   int rc;
924 
925   /* Read in the atom coordinates and unit cell information */
926   /* only save coords if we're given a valid ts pointer     */
927   /* otherwise VMD wants us to skip it.                     */
928   if (ts != NULL) {
929     if (amber->is_restart) {
930       // restart files only contain one frame, so return if we get called again
931       if (cdf->curframe > 0)
932         return MOLFILE_ERROR;
933 
934       start[0] = 0;             /* atom */
935       start[1] = 0;             /* spatial */
936       start[2] = 0;
937 
938       count[0] = amber->atomdim;
939       count[1] = amber->spatialdim;
940       count[2] = 0;
941 
942       rc = nc_get_vara_float(cdf->ncid, amber->coordinates_id,
943                              start, count, ts->coords);
944       if (rc != NC_NOERR) {
945         printf("netcdfplugin) AMBER: failed to parse restart file coordinates!\n");
946       }
947     } else {
948       start[0] = cdf->curframe; /* frame */
949       start[1] = 0;             /* atom */
950       start[2] = 0;             /* spatial */
951 
952       count[0] = 1;
953       count[1] = amber->atomdim;
954       count[2] = amber->spatialdim;
955 
956       /* parse trajectory timestep */
957       rc = nc_get_vara_float(cdf->ncid, amber->coordinates_id,
958                              start, count, ts->coords);
959     }
960 
961     if (rc != NC_NOERR)
962       return MOLFILE_ERROR;
963 
964     /* apply coordinate scaling factor if not 1.0 */
965     if (amber->coordinates_scalefactor != 1.0) {
966       int i;
967       float s = amber->coordinates_scalefactor;
968       for (i=0; i<natoms*3; i++) {
969         ts->coords[i] *= s;
970       }
971     }
972 
973     /* Read the PBC box info. */
974     if (amber->has_box) {
975       size_t start[3], count[3];
976       double lengths[3];
977       double angles[3];
978 
979       if (amber->is_restart) {
980         start[0] = 0;             /* spatial */
981         start[1] = 0;
982         start[2] = 0;
983         count[0] = amber->spatialdim;
984         count[1] = 0;
985         count[2] = 0;
986       } else {
987         start[0] = cdf->curframe; /* frame */
988         start[1] = 0;             /* spatial */
989         start[2] = 0;
990         count[0] = 1;
991         count[1] = amber->spatialdim;
992         count[2] = 0;
993       }
994 
995       rc = nc_get_vara_double(cdf->ncid, amber->cell_lengths_id,
996                               start, count, lengths);
997       if (rc != NC_NOERR)
998         return MOLFILE_ERROR;
999 
1000       rc = nc_get_vara_double(cdf->ncid, amber->cell_angles_id,
1001                               start, count, angles);
1002       if (rc != NC_NOERR)
1003         return MOLFILE_ERROR;
1004 
1005       ts->A = lengths[0] * amber->cell_lengths_scalefactor;
1006       ts->B = lengths[1] * amber->cell_lengths_scalefactor;
1007       ts->C = lengths[2] * amber->cell_lengths_scalefactor;
1008 
1009       ts->alpha = angles[0] * amber->cell_angles_scalefactor;
1010       ts->beta  = angles[1] * amber->cell_angles_scalefactor;
1011       ts->gamma = angles[2] * amber->cell_angles_scalefactor;
1012     }
1013   }
1014 
1015   cdf->curframe++;
1016   return MOLFILE_SUCCESS;
1017 }
1018 
1019 
read_mmtk_cdf_timestep(void * mydata,int natoms,molfile_timestep_t * ts)1020 static int read_mmtk_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
1021   cdfdata *cdf = (cdfdata *)mydata;
1022   mmtkdata *mmtk = &cdf->mmtk;
1023   int rc;
1024 
1025   /* Read in the atom coordinates and unit cell information */
1026   /* only save coords if we're given a valid ts pointer     */
1027   /* otherwise VMD wants us to skip it.                     */
1028   if (ts != NULL) {
1029     size_t start[4], count[4];
1030     int i;
1031 
1032     if (mmtk->minor_step_numberdim == 0) {
1033       start[0] = cdf->curframe; /* step */
1034       start[1] = 0;             /* atom */
1035       start[2] = 0;             /* spatial */
1036       start[3] = 0;             /* minor step */
1037     }
1038     else {
1039       start[0] = cdf->curframe/mmtk->minor_step_numberdim;   /* step */
1040       start[1] = 0;             /* atom */
1041       start[2] = 0;             /* spatial */
1042       start[3] = cdf->curframe % mmtk->minor_step_numberdim; /* minor step */
1043     }
1044 
1045     count[0] = 1;
1046     count[1] = mmtk->atom_numberdim;
1047     count[2] = mmtk->xyzdim;
1048     count[3] = 1;             /* only want one minor step, regardless */
1049 
1050     rc = nc_get_vara_float(cdf->ncid, mmtk->configuration_id,
1051                            start, count, ts->coords);
1052     if (rc != NC_NOERR)
1053       return MOLFILE_ERROR;
1054 
1055     /* check for allocated but not yet used frame */
1056     if (ts->coords[0] == NC_FILL_FLOAT)
1057       return MOLFILE_ERROR;
1058 
1059     /* scale coordinates from nanometers to angstroms */
1060     for (i=0; i<(3 * mmtk->atom_numberdim); i++) {
1061       ts->coords[i] *= 10.0f;
1062     }
1063 
1064     /* Read the PBC box info. */
1065     if (mmtk->has_box) {
1066       float lengths[3];
1067 
1068       if (mmtk->minor_step_numberdim == 0) {
1069 	start[0] = cdf->curframe; /* step */
1070 	start[1] = 0;             /* box_size */
1071 	start[2] = 0;             /* minor step */
1072       }
1073       else {
1074 	start[0] = cdf->curframe/mmtk->minor_step_numberdim;   /* step */
1075 	start[1] = 0;             /* box_size */
1076 	start[2] = cdf->curframe % mmtk->minor_step_numberdim; /* minor step */
1077       }
1078 
1079       count[0] = 1;
1080       count[1] = 3;
1081       count[2] = 1;
1082 
1083       rc = nc_get_vara_float(cdf->ncid, mmtk->box_size_id,
1084                              start, count, lengths);
1085       if (rc != NC_NOERR)
1086         return MOLFILE_ERROR;
1087 
1088       ts->A = 10.*lengths[0];
1089       ts->B = 10.*lengths[1];
1090       ts->C = 10.*lengths[2];
1091 
1092       ts->alpha = 90.;
1093       ts->beta  = 90.;
1094       ts->gamma = 90.;
1095     }
1096   }
1097 
1098   cdf->curframe++;
1099   return MOLFILE_SUCCESS;
1100 }
1101 
1102 
1103 
read_cdf_timestep(void * mydata,int natoms,molfile_timestep_t * ts)1104 static int read_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
1105   cdfdata *cdf = (cdfdata *)mydata;
1106 
1107   switch (cdf->type) {
1108     case CDF_TYPE_AMBER:
1109       return read_amber_cdf_timestep(mydata, natoms, ts);
1110 
1111     case CDF_TYPE_MMTK:
1112       return read_mmtk_cdf_timestep(mydata, natoms, ts);
1113   }
1114 
1115   return MOLFILE_ERROR;
1116 }
1117 
1118 
1119 static molfile_plugin_t plugin;
1120 
VMDPLUGIN_init(void)1121 VMDPLUGIN_API int VMDPLUGIN_init(void) {
1122   memset(&plugin, 0, sizeof(molfile_plugin_t));
1123   plugin.abiversion = vmdplugin_ABIVERSION;
1124   plugin.type = MOLFILE_PLUGIN_TYPE;
1125   plugin.name = "netcdf";
1126   plugin.prettyname = "NetCDF (AMBER, MMTK)";
1127   plugin.author = "Konrad Hinsen, John Stone";
1128   plugin.majorv = 1;
1129   plugin.minorv = 1;
1130   plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
1131   plugin.filename_extension = "nc,ncrst";
1132   plugin.open_file_read = open_cdf_read;
1133   plugin.read_structure = read_cdf_structure;
1134   plugin.read_next_timestep = read_cdf_timestep;
1135   plugin.close_file_read = close_cdf_read;
1136   return VMDPLUGIN_SUCCESS;
1137 }
1138 
VMDPLUGIN_register(void * v,vmdplugin_register_cb cb)1139 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
1140   (*cb)(v, (vmdplugin_t *)&plugin);
1141   return VMDPLUGIN_SUCCESS;
1142 }
1143 
VMDPLUGIN_fini(void)1144 VMDPLUGIN_API int VMDPLUGIN_fini(void) { return VMDPLUGIN_SUCCESS; }
1145 
1146