1 /* MACHINE GENERATED FILE, DO NOT EDIT! */
2
3 #define VMDPLUGIN molfile_netcdfplugin
4 #define STATIC_PLUGIN 1
5
6 /***************************************************************************
7 *cr
8 *cr (C) Copyright 1995-2016 The Board of Trustees of the
9 *cr University of Illinois
10 *cr All Rights Reserved
11 *cr
12 ***************************************************************************/
13
14 /***************************************************************************
15 * RCS INFORMATION:
16 *
17 * $RCSfile: netcdfplugin.c,v $
18 * $Author: johns $ $Locker: $ $State: Exp $
19 * $Revision: 1.29 $ $Date: 2016/11/28 05:01:54 $
20 *
21 ***************************************************************************/
22
23 /*
24 * NetCDF based trajectories, used by AMBER 9, MMTK, etc.
25 */
26
27 #include <netcdf.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <ctype.h>
32 #include "molfile_plugin.h"
33
34 #define CDF_TYPE_UNKNOWN 0
35 #define CDF_TYPE_AMBER 1
36 #define CDF_TYPE_MMTK 2
37
38 #define CDF_SUCCESS 0
39 #define CDF_ERR -1
40
41 typedef struct {
42 int trajectorytype;
43 int step_numberdimid;
44 size_t step_numberdim;
45 int minor_step_numberdimid;
46 size_t minor_step_numberdim;
47 int atom_numberdimid;
48 size_t atom_numberdim;
49 int xyzdimid;
50 size_t xyzdim;
51 int box_size_lengthdimid;
52 size_t box_size_lengthdim;
53 int description_lengthdimid;
54 size_t description_lengthdim;
55 char *description;
56 int description_id;
57 int step_id;
58 int time_id;
59 int box_size_id;
60 int configuration_id;
61 int has_box;
62 char *comment;
63 } mmtkdata;
64
65 typedef struct {
66 int is_restart;
67 int is_trajectory;
68 int has_box;
69 int atomdimid;
70 size_t atomdim;
71 int spatialdimid;
72 size_t spatialdim;
73 int framedimid;
74 size_t framedim;
75 char *conventionversion;
76 char *title;
77 char *application;
78 char *program;
79 char *programversion;
80 int spatial_id; /* "xyz" */
81 int cell_spatial_id; /* "abc" */
82 int cell_angular_id; /* "alpha, beta, gamma" */
83 int time_id; /* frame time in picoseconds */
84 int coordinates_id; /* coords in angstroms */
85 char *coordinates_units; /* coordinates units */
86 float coordinates_scalefactor; /* coordinates scaling factor */
87 int cell_lengths_id; /* cell lengths in angstroms */
88 char *cell_lengths_units; /* cell lengths units */
89 float cell_lengths_scalefactor; /* cell lengths scaling factor */
90 int cell_angles_id; /* cell angles in degrees */
91 char *cell_angles_units; /* cell angles units */
92 float cell_angles_scalefactor; /* cell angles scaling factor */
93 int velocities_id; /* velocities in angstroms/picosecond */
94 } amberdata;
95
96
97 typedef struct {
98 /* sub-format independent data */
99 int ncid;
100 int type;
101 int natoms;
102 int curframe;
103 char *conventions;
104
105 /* stuff used by AMBER */
106 amberdata amber;
107
108 /* stuff used by MMTK */
109 mmtkdata mmtk;
110
111 } cdfdata;
112
113
close_cdf_read(void * mydata)114 static void close_cdf_read(void *mydata) {
115 cdfdata *cdf = (cdfdata *)mydata;
116
117 nc_close(cdf->ncid);
118
119 /* AMBER stuff */
120 if (cdf->amber.title)
121 free(cdf->amber.title);
122
123 if (cdf->amber.application)
124 free(cdf->amber.application);
125
126 if (cdf->amber.program)
127 free(cdf->amber.program);
128
129 if (cdf->amber.programversion)
130 free(cdf->amber.programversion);
131
132 if (cdf->amber.conventionversion)
133 free(cdf->amber.conventionversion);
134
135 if (cdf->amber.coordinates_units)
136 free(cdf->amber.coordinates_units);
137
138 if (cdf->amber.cell_lengths_units)
139 free(cdf->amber.cell_lengths_units);
140
141 /* MMTK stuff */
142 if (cdf->mmtk.comment)
143 free(cdf->mmtk.comment);
144
145 /* format independent stuff */
146 if (cdf->conventions)
147 free(cdf->conventions);
148
149 free(cdf);
150 }
151
152
153
open_amber_cdf_read(cdfdata * cdf)154 static int open_amber_cdf_read(cdfdata *cdf) {
155 int rc;
156 size_t len;
157 amberdata *amber = &cdf->amber;
158
159 /* check if this is a restart file or not */
160 if (!strcmp(cdf->conventions, "AMBERRESTART")) {
161 amber->is_restart = 1;
162 } else {
163 amber->is_trajectory = 1;
164 }
165
166 /* global attrib: "ConventionVersion" -- required */
167 rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "ConventionVersion", &len);
168 if (rc == NC_NOERR && len > 0) {
169 amber->conventionversion = (char *) malloc((len+1) * sizeof(char));
170 nc_get_att_text(cdf->ncid, NC_GLOBAL, "ConventionVersion", amber->conventionversion);
171 amber->conventionversion[len] = '\0';
172 printf("netcdfplugin) %s follows AMBER conventions version '%s'\n",
173 (amber->is_restart) ? "restart file" : "trajectory",
174 amber->conventionversion);
175 } else {
176 return CDF_ERR;
177 }
178
179 /* at this point we know that this is an AMBER trajectory or restart file */
180 cdf->type = CDF_TYPE_AMBER;
181
182 /* initialize default scaling factors so they are always set to a sane */
183 /* value even if problems occur later */
184 amber->coordinates_scalefactor = 1.0;
185 amber->cell_lengths_scalefactor = 1.0;
186 amber->cell_angles_scalefactor = 1.0;
187
188 /* global attrib: "program" -- required */
189 rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "program", &len);
190 if (rc == NC_NOERR && len > 0) {
191 amber->program = (char *) malloc((len+1) * sizeof(char));
192 nc_get_att_text(cdf->ncid, NC_GLOBAL, "program", amber->program);
193 amber->program[len] = '\0';
194 printf("netcdfplugin) AMBER: program '%s'\n", amber->program);
195 } else {
196 printf("netcdfplugin) AMBER: Missing required 'program' global attribute, corrupt file?\n");
197 }
198
199
200 /* global attrib: "programVersion" -- required */
201 rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "programVersion", &len);
202 if (rc == NC_NOERR && len > 0) {
203 amber->programversion = (char *) malloc((len+1) * sizeof(char));
204 nc_get_att_text(cdf->ncid, NC_GLOBAL, "programVersion", amber->programversion);
205 amber->programversion[len] = '\0';
206 printf("netcdfplugin) AMBER: program version '%s'\n", amber->programversion);
207 } else {
208 printf("netcdfplugin) AMBER: Missing required 'programVersion' global attribute, corrupt file?\n");
209 }
210
211
212 /* global attrib: "title" -- optional */
213 rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "title", &len);
214 if (rc == NC_NOERR && len > 0) {
215 amber->title = (char *) malloc((len+1) * sizeof(char));
216 nc_get_att_text(cdf->ncid, NC_GLOBAL, "title", amber->title);
217 amber->title[len] = '\0';
218 printf("netcdfplugin) AMBER: title '%s'\n", amber->title);
219 }
220
221
222 /* global attrib: "application" -- optional */
223 rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "application", &len);
224 if (rc == NC_NOERR && len > 0) {
225 amber->application = (char *) malloc((len+1) * sizeof(char));
226 nc_get_att_text(cdf->ncid, NC_GLOBAL, "application", amber->application);
227 amber->application[len] = '\0';
228 printf("netcdfplugin) AMBER: application '%s'\n", amber->application);
229 }
230
231
232 /* XXX lots of additional error checking is needed below... */
233
234 /* read in spatial dimension */
235 rc = nc_inq_dimid(cdf->ncid, "spatial", &amber->spatialdimid);
236 if (rc == NC_NOERR) {
237 rc = nc_inq_dimlen(cdf->ncid, amber->spatialdimid, &amber->spatialdim);
238 if (rc == NC_NOERR) {
239 printf("netcdfplugin) AMBER: spatial dimension: %ld\n", (long)amber->spatialdim);
240 } else {
241 printf("netcdfplugin) AMBER: Missing spatial dimension, corrupt file?\n");
242 printf("netcdfplugin) AMBER: Fixing by guessing spatialdim as '3'\n");
243 amber->spatialdim = 3;
244 }
245 } else {
246 printf("netcdfplugin) AMBER: Missing spatial dimension, corrupt file?\n");
247 printf("netcdfplugin) AMBER: Fixing by guessing spatialdim as '3'\n");
248 amber->spatialdim = 3;
249 }
250
251 /* read in atom dimension */
252 rc = nc_inq_dimid(cdf->ncid, "atom", &amber->atomdimid);
253 if (rc == NC_NOERR) {
254 rc = nc_inq_dimlen(cdf->ncid, amber->atomdimid, &amber->atomdim);
255 if (rc == NC_NOERR) {
256 printf("netcdfplugin) AMBER: atom dimension: %ld\n", (long)amber->atomdim);
257 cdf->natoms = amber->atomdim; /* copy to format independent part */
258 } else {
259 printf("netcdfplugin) AMBER: missing atom dimension, aborting\n");
260 return CDF_ERR;
261 }
262 } else {
263 printf("netcdfplugin) AMBER: missing atom dimension, aborting\n");
264 return CDF_ERR;
265 }
266
267 /* if this is a trajectory, read in frame dimension */
268 if (amber->is_trajectory) {
269 rc = nc_inq_dimid(cdf->ncid, "frame", &amber->framedimid);
270 if (rc == NC_NOERR) {
271 rc = nc_inq_dimlen(cdf->ncid, amber->framedimid, &amber->framedim);
272 if (rc == NC_NOERR) {
273 printf("netcdfplugin) AMBER: frame dimension: %ld\n", (long)amber->framedim);
274 } else {
275 printf("netcdfplugin) AMBER: missing frame dimension, aborting\n");
276 return CDF_ERR;
277 }
278 } else {
279 printf("netcdfplugin) AMBER: missing frame dimension, aborting\n");
280 return CDF_ERR;
281 }
282 }
283
284
285 /*
286 * get ID values for all of the variables we're interested in
287 */
288 #if 0
289 /* VMD can live without the various human readable label variables. */
290 rc = nc_inq_varid(cdf->ncid, "spatial", &amber->spatial_id);
291 if (rc != NC_NOERR)
292 return CDF_ERR;
293
294 rc = nc_inq_varid(cdf->ncid, "cell_spatial", &amber->cell_spatial_id);
295 if (rc != NC_NOERR)
296 return CDF_ERR;
297
298 rc = nc_inq_varid(cdf->ncid, "cell_angular", &amber->cell_angular_id);
299 if (rc != NC_NOERR)
300 return CDF_ERR;
301 #endif
302
303 /* VMD requires coordinates at a minimum */
304 rc = nc_inq_varid(cdf->ncid, "coordinates", &amber->coordinates_id);
305 if (rc != NC_NOERR) {
306 printf("netcdfplugin) AMBER: no coordinates variable, nothing to load\n");
307 return CDF_ERR;
308 }
309
310 /* Coordinate units */
311 rc = nc_inq_attlen(cdf->ncid, amber->coordinates_id, "units", &len);
312 if (rc == NC_NOERR && len > 0) {
313 amber->coordinates_units = (char *) malloc((len+1) * sizeof(char));
314 nc_get_att_text(cdf->ncid, amber->coordinates_id, "units", amber->coordinates_units);
315 amber->coordinates_units[len] = '\0';
316 printf("netcdfplugin) AMBER: coordinates units: '%s'\n", amber->coordinates_units);
317 } else {
318 printf("netcdfplugin) AMBER: no coordinates units attribute, Angstroms assumed\n");
319 }
320
321 /* Coordinate scaling factor to get to Angstroms */
322 if (nc_get_att_float(cdf->ncid, amber->coordinates_id, "scale_factor", &amber->coordinates_scalefactor) != NC_NOERR) {
323 printf("netcdfplugin) AMBER: no coordinates scalefactor attribute, 1.0 assumed\n");
324 }
325 printf("netcdfplugin) AMBER: coordinates scalefactor: %f\n", amber->coordinates_scalefactor);
326
327 #if 0
328 /* we don't need velocities at this time */
329 rc = nc_inq_varid(cdf->ncid, "velocities", &amber->velocities_id);
330 if (rc != NC_NOERR) {
331 printf("netcdfplugin) AMBER: missing velocities variable, aborting\n");
332 return CDF_ERR;
333 }
334 #endif
335
336 /* optional periodic cell info */
337 rc = nc_inq_varid(cdf->ncid, "cell_lengths", &amber->cell_lengths_id);
338 if (rc == NC_NOERR) {
339 rc = nc_inq_varid(cdf->ncid, "cell_angles", &amber->cell_angles_id);
340 if (rc == NC_NOERR) {
341 printf("netcdfplugin) AMBER trajectory contains periodic cell information\n");
342 amber->has_box = 1;
343
344 /* Cell lengths units */
345 rc = nc_inq_attlen(cdf->ncid, amber->cell_lengths_id, "units", &len);
346 if (rc == NC_NOERR && len > 0) {
347 amber->cell_lengths_units = (char *) malloc((len+1) * sizeof(char));
348 nc_get_att_text(cdf->ncid, amber->cell_lengths_id, "units", amber->cell_lengths_units);
349 amber->cell_lengths_units[len] = '\0';
350 printf("netcdfplugin) AMBER: cell lengths units: '%s'\n", amber->cell_lengths_units);
351 } else {
352 printf("netcdfplugin) AMBER: no cell lengths units attribute, Angstroms assumed\n");
353 }
354
355 /* Cell lengths scaling factor to get to Angstroms */
356 if (nc_get_att_float(cdf->ncid, amber->cell_lengths_id, "scale_factor", &amber->cell_lengths_scalefactor) != NC_NOERR) {
357 printf("netcdfplugin) AMBER: no cell lengths scalefactor attribute, 1.0 assumed\n");
358 }
359 printf("netcdfplugin) AMBER: cell lengths scalefactor: %f\n", amber->cell_lengths_scalefactor);
360
361 /* Cell angles units */
362 rc = nc_inq_attlen(cdf->ncid, amber->cell_angles_id, "units", &len);
363 if (rc == NC_NOERR && len > 0) {
364 amber->cell_angles_units = (char *) malloc((len+1) * sizeof(char));
365 nc_get_att_text(cdf->ncid, amber->cell_angles_id, "units", amber->cell_angles_units);
366 amber->cell_angles_units[len] = '\0';
367 printf("netcdfplugin) AMBER: cell angles units: '%s'\n", amber->cell_angles_units);
368 } else {
369 printf("netcdfplugin) AMBER: no cell angles units attribute, Degrees assumed\n");
370 }
371
372 /* Cell angles scaling factor to get to degrees */
373 if (nc_get_att_float(cdf->ncid, amber->cell_angles_id, "scale_factor", &amber->cell_angles_scalefactor) != NC_NOERR) {
374 printf("netcdfplugin) AMBER: no cell angles scalefactor attribute, 1.0 assumed\n");
375 }
376 printf("netcdfplugin) AMBER: cell angles scalefactor: %f\n", amber->cell_angles_scalefactor);
377 }
378 }
379
380 return CDF_SUCCESS;
381 }
382
383
open_mmtk_cdf_read(cdfdata * cdf,int conventionsknown)384 static int open_mmtk_cdf_read(cdfdata *cdf, int conventionsknown) {
385 int rc;
386 size_t len;
387 mmtkdata *mmtk = &cdf->mmtk;
388
389 /* If conventions specify MMTK then we're safe to continue */
390 /* and we know what we're dealing with */
391 if (conventionsknown) {
392 cdf->type = CDF_TYPE_MMTK;
393 }
394
395 /* global attrib: "trajectory_type" (new format) */
396 rc = nc_get_att_int(cdf->ncid, NC_GLOBAL, "trajectory_type", &mmtk->trajectorytype);
397 if (rc == NC_NOERR) {
398 printf("netcdfplugin) MMTK trajectory type: %d\n", mmtk->trajectorytype);
399 } else {
400 printf("netcdfplugin) Assuming MMTK trajectory type: %d\n", mmtk->trajectorytype);
401 mmtk->trajectorytype = 0;
402 }
403
404 /* read in spatial dimension */
405 rc = nc_inq_dimid(cdf->ncid, "xyz", &mmtk->xyzdimid);
406 if (rc == NC_NOERR) {
407 rc = nc_inq_dimlen(cdf->ncid, mmtk->xyzdimid, &mmtk->xyzdim);
408 if (rc == NC_NOERR)
409 printf("netcdfplugin) MMTK: xyz dimension: %ld\n", (long)mmtk->xyzdim);
410 else
411 return CDF_ERR;
412 } else {
413 return CDF_ERR;
414 }
415
416 /* read in atom dimension */
417 rc = nc_inq_dimid(cdf->ncid, "atom_number", &mmtk->atom_numberdimid);
418 if (rc == NC_NOERR) {
419 rc = nc_inq_dimlen(cdf->ncid, mmtk->atom_numberdimid, &mmtk->atom_numberdim);
420 if (rc == NC_NOERR) {
421 printf("netcdfplugin) MMTK: atom_number dimension: %ld\n", (long)mmtk->atom_numberdim);
422 cdf->natoms = mmtk->atom_numberdim; /* copy to format independent part */
423 } else {
424 return CDF_ERR;
425 }
426 } else {
427 return CDF_ERR;
428 }
429
430
431 /* read in frame dimension */
432 rc = nc_inq_dimid(cdf->ncid, "step_number", &mmtk->step_numberdimid);
433 if (rc == NC_NOERR) {
434 rc = nc_inq_dimlen(cdf->ncid, mmtk->step_numberdimid, &mmtk->step_numberdim);
435 if (rc == NC_NOERR)
436 printf("netcdfplugin) MMTK: step_number dimension: %ld\n", (long)mmtk->step_numberdim);
437 else
438 return CDF_ERR;
439 } else {
440 return CDF_ERR;
441 }
442
443
444 /* read in minor step number dimension */
445 rc = nc_inq_dimid(cdf->ncid, "minor_step_number", &mmtk->minor_step_numberdimid);
446 if (rc == NC_NOERR) {
447 rc = nc_inq_dimlen(cdf->ncid, mmtk->minor_step_numberdimid, &mmtk->minor_step_numberdim);
448 if (rc == NC_NOERR)
449 printf("netcdfplugin) MMTK: minor_step_number dimension: %ld\n", (long)mmtk->minor_step_numberdim);
450 else
451 return CDF_ERR;
452 } else if (rc == NC_EBADDIM) {
453 printf("netcdfplugin) MMTK: no minor_step_number dimension\n");
454 mmtk->minor_step_numberdim = 0;
455 } else {
456 return CDF_ERR;
457 }
458
459
460 /* read in description_length dimension */
461 rc = nc_inq_dimid(cdf->ncid, "description_length", &mmtk->description_lengthdimid);
462 if (rc == NC_NOERR) {
463 rc = nc_inq_dimlen(cdf->ncid, mmtk->description_lengthdimid, &mmtk->description_lengthdim);
464 if (rc == NC_NOERR)
465 printf("netcdfplugin) MMTK: description_length dimension: %ld\n", (long)mmtk->description_lengthdim);
466 else
467 return CDF_ERR;
468 } else {
469 return CDF_ERR;
470 }
471
472
473 /* get ID values for all of the variables we're interested in */
474 rc = nc_inq_varid(cdf->ncid, "configuration", &mmtk->configuration_id);
475 if (rc != NC_NOERR)
476 return CDF_ERR;
477
478 rc = nc_inq_varid(cdf->ncid, "description", &mmtk->description_id);
479 if (rc != NC_NOERR)
480 return CDF_ERR;
481
482 /* check for PBC */
483 rc = nc_inq_varid(cdf->ncid, "box_size", &mmtk->box_size_id);
484 if (rc == NC_NOERR) {
485 mmtk->has_box = 1;
486 printf("netcdfplugin) MMTK: system has periodic boundary conditions\n");
487 }
488 else if (rc == NC_ENOTVAR)
489 mmtk->has_box = 0;
490 else
491 return CDF_ERR;
492
493
494 /* global attrib: "comment" -- optional */
495 rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "comment", &len);
496 if (rc == NC_NOERR && len > 0) {
497 mmtk->comment = (char *) malloc((len+1) * sizeof(char));
498 nc_get_att_text(cdf->ncid, NC_GLOBAL, "comment", mmtk->comment);
499 mmtk->comment[len] = '\0';
500 printf("netcdfplugin) MMTK: comment '%s'\n", mmtk->comment);
501 }
502
503 /* at this point we know that this is an MMTK trajectory */
504 if (!conventionsknown) {
505 printf("netcdfplugin) File is an old format MMTK trajectory without conventions\n");
506 cdf->type = CDF_TYPE_MMTK;
507 }
508
509 return CDF_SUCCESS;
510 }
511
512
open_cdf_read(const char * filename,const char * filetype,int * natoms)513 static void *open_cdf_read(const char *filename, const char *filetype,
514 int *natoms) {
515 int ncid, rc;
516 size_t len;
517 cdfdata *cdf;
518
519 rc = nc_open(filename, NC_NOWRITE, &ncid);
520 if (rc != NC_NOERR) return NULL;
521
522 cdf = (cdfdata *) malloc(sizeof(cdfdata));
523 memset(cdf, 0, sizeof(cdfdata));
524
525 cdf->ncid = ncid;
526 cdf->type = CDF_TYPE_UNKNOWN;
527
528 /* Determine what NetCDF conventions apply to this data, if any */
529 rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "Conventions", &len);
530 if (rc == NC_NOERR && len > 0) {
531 cdf->conventions = (char *) malloc((len+1) * sizeof(char));
532 nc_get_att_text(cdf->ncid, NC_GLOBAL, "Conventions", cdf->conventions);
533 cdf->conventions[len] = '\0';
534 printf("netcdfplugin) conventions: '%s'\n", cdf->conventions);
535 }
536
537 if (cdf->conventions != NULL) {
538 /* Check if this is a file generated by AMBER */
539 if (strstr(cdf->conventions, "AMBER") != NULL) {
540 if (!open_amber_cdf_read(cdf)) {
541 *natoms = cdf->natoms;
542 return cdf;
543 }
544 }
545
546 /* Check if this is a file generated by MMTK */
547 if (strstr(cdf->conventions, "MMTK") != NULL) {
548 if (!open_mmtk_cdf_read(cdf, 1)) {
549 *natoms = cdf->natoms;
550 return cdf;
551 }
552 }
553 }
554
555 printf("netcdfplugin) Missing or unrecognized conventions attribute\n");
556 printf("netcdfplugin) checking for old format MMTK NetCDF file...\n");
557
558 /* If no conventions are specified, then maybe it's from MMTK */
559 if (!open_mmtk_cdf_read(cdf, 0)) {
560 *natoms = cdf->natoms;
561 return cdf;
562 }
563
564 /* if no conventions are recognized, then we free everything */
565 /* and return failure */
566 close_cdf_read(cdf);
567
568 return NULL;
569 }
570
571 /* A very basic bracket counter. It assumes that the expression
572 is syntactically correct. */
find_closing_bracket(char * s)573 static char *find_closing_bracket(char *s) {
574 int count = 1;
575 while (*s && count > 0) {
576 if (*s == '(' || *s == '[')
577 count++;
578 if (*s == ')' || *s == ']')
579 count--;
580 s++;
581 }
582 return s;
583 }
584
585 /* Simple string replacement routine for fixing atom names. */
atom_name_replace(char * name,char * substring,char letter)586 static void atom_name_replace(char *name, char *substring, char letter) {
587 char *s = strstr(name, substring);
588 if (s != NULL) {
589 *s = letter;
590 strcpy(s+1, s+strlen(substring));
591 }
592 }
593
atom_name_remove_underscores(char * name)594 static void atom_name_remove_underscores(char *name) {
595 char *s = name;
596 while (1) {
597 s = strchr(s, '_');
598 if (s == NULL)
599 break;
600 strcpy(s, s+1);
601 }
602 }
603
604 /* Set chainid, resname, and resnum for a range of atoms
605 and fix atom names. */
set_atom_attributes(molfile_atom_t * atoms,int natoms,char ** atom_pointers,char chain_id,char * resname,int resnum,char * start,char * end,int name_correction_type)606 static void set_atom_attributes(molfile_atom_t *atoms, int natoms,
607 char **atom_pointers, char chain_id,
608 char *resname, int resnum,
609 char *start, char *end,
610 int name_correction_type) {
611 int i;
612 for (i=0; i<natoms; i++)
613 if (atom_pointers[i] > start && atom_pointers[i] < end) {
614 molfile_atom_t *atom = atoms + i;
615 atom->chain[0] = chain_id;
616 atom->chain[1] = '\0';
617 strcpy(atom->resname, resname);
618 atom->resid = resnum;
619 if (name_correction_type == 1 /* proteins */) {
620 atom_name_replace(atom->name, "_alpha", 'A');
621 atom_name_replace(atom->name, "_beta", 'B');
622 atom_name_replace(atom->name, "_gamma", 'G');
623 atom_name_replace(atom->name, "_delta", 'D');
624 atom_name_replace(atom->name, "_epsilon", 'E');
625 atom_name_replace(atom->name, "_zeta", 'Z');
626 atom_name_replace(atom->name, "_eta", 'H');
627 atom_name_remove_underscores(atom->name);
628 }
629 else if (name_correction_type == 2 /* nucleic acids */) {
630 if (strcmp(atom->name, "O_1") == 0)
631 strcpy(atom->name, "O1P");
632 else if (strcmp(atom->name, "O_2") == 0)
633 strcpy(atom->name, "O2P");
634 else if (strcmp(atom->name, "C_1") == 0)
635 strcpy(atom->name, "C1'");
636 else if (strcmp(atom->name, "C_2") == 0)
637 strcpy(atom->name, "C2'");
638 else if (strcmp(atom->name, "C_3") == 0)
639 strcpy(atom->name, "C3'");
640 else if (strcmp(atom->name, "O_3") == 0)
641 strcpy(atom->name, "O3'");
642 else if (strcmp(atom->name, "C_4") == 0)
643 strcpy(atom->name, "C4'");
644 else if (strcmp(atom->name, "O_4") == 0)
645 strcpy(atom->name, "O4'");
646 else if (strcmp(atom->name, "C_5") == 0)
647 strcpy(atom->name, "C5'");
648 else if (strcmp(atom->name, "O_5") == 0)
649 strcpy(atom->name, "O5'");
650 else
651 atom_name_remove_underscores(atom->name);
652 }
653 }
654 }
655
656 /* Get structure from an MMTK trajectory file */
read_mmtk_cdf_structure(void * mydata,int * optflags,molfile_atom_t * atoms)657 static int read_mmtk_cdf_structure(void *mydata, int *optflags,
658 molfile_atom_t *atoms) {
659 int i, rc;
660 molfile_atom_t *atom;
661 cdfdata *cdf = (cdfdata *) mydata;
662 mmtkdata *mmtk = &cdf->mmtk;
663 size_t start[3], count[3];
664 char *dstr;
665 char **atom_pointers;
666 int resnum;
667 char resname[8];
668
669 *optflags = MOLFILE_NOOPTIONS;
670
671 mmtk->description = (char *) malloc((mmtk->description_lengthdim + 1) * sizeof(char));
672 if (mmtk->description == NULL)
673 return MOLFILE_ERROR;
674
675 start[0] = cdf->curframe; /* frame */
676 count[0] = mmtk->description_lengthdim;
677
678 rc = nc_get_vara_text(cdf->ncid, mmtk->description_id,
679 start, count, mmtk->description);
680 if (rc != NC_NOERR)
681 return MOLFILE_ERROR;
682
683 /* initialize all atoms with name "X" to start with */
684 /* indicating unknown atom types etc.. */
685 for (i=0; i<cdf->natoms; i++) {
686 atom = atoms + i;
687 strncpy(atom->name, "X", sizeof(atom->name)-1);
688 atom->name[sizeof(atom->name)-1] = '\0';
689 strncpy(atom->type, atom->name, sizeof(atom->type)-1);
690 atom->type[sizeof(atom->type)-1] = '\0';
691 atom->resname[0] = '\0';
692 atom->resid = 1;
693 atom->chain[0] = '\0';
694 atom->segid[0] = '\0';
695 }
696
697 /* Allocate a pointer array that will hold each atom's location in
698 the description string. This will be used in a second pass through
699 the description string in which residue names and indices will
700 be assigned. */
701 atom_pointers = (char **) malloc(cdf->natoms * sizeof(char *));
702 if (atom_pointers == NULL)
703 return MOLFILE_ERROR;
704
705 /* First pass: look only at atoms */
706 dstr = mmtk->description;
707 while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
708 char *atomstr;
709 atomstr = strstr(dstr, "A('");
710
711 if (atomstr != NULL) {
712 char name[1024];
713 char *nmstart = NULL;
714 char *nmend = NULL;
715 char *indstart = NULL;
716 char *endp = NULL;
717 int index, len;
718
719 endp = strchr(atomstr, ')');
720 nmstart = strchr(atomstr, '\'');
721 if (nmstart != NULL)
722 nmend = strchr(nmstart+1, '\'');
723 indstart = strchr(atomstr, ',');
724 if (endp == NULL || nmstart == NULL || nmend == NULL || indstart == NULL) {
725 printf("netcdfplugin) mmtk_read_structure(): unable to parse atom tag\n");
726 break; /* something went wrong */
727 }
728
729 len = nmend - nmstart - 1;
730 if (len > sizeof(name)) {
731 printf("netcdfplugin) mmtk_read_structure(): bad length: %d\n", len);
732 break; /* something went wrong */
733 }
734 memcpy(name, nmstart+1, len);
735 name[len] = '\0';
736
737 index = -1;
738 sscanf(indstart, ",%d)", &index);
739 atom_pointers[index] = atomstr;
740
741 if (index >= 0 && index < cdf->natoms) {
742 atom = atoms + index;
743 strncpy(atom->name, name, sizeof(atom->name)-1);
744 atom->name[sizeof(atom->name)-1] = '\0';
745 strncpy(atom->type, atom->name, sizeof(atom->type)-1);
746 atom->type[sizeof(atom->type)-1] = '\0';
747 }
748
749 dstr = atomstr+1;
750 } else {
751 break; /* no more atom records found */
752 }
753 }
754
755 /* Second pass: peptide chains */
756 dstr = mmtk->description;
757 while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
758 char *peptide, *pend;
759 char *group, *gend;
760 char *nmstart, *nmend;
761 char chain_id = 'A';
762 char *s;
763
764 peptide = strstr(dstr, "S('");
765 if (peptide == NULL)
766 break;
767 pend = find_closing_bracket(peptide+2);
768
769 resnum = 1;
770 group = peptide;
771 while (1) {
772 group = strstr(group, "G('");
773 if (group == NULL || group >= pend)
774 break;
775 gend = find_closing_bracket(group+2);
776 nmstart = strchr(group, '\'') + 1;
777 nmend = strchr(nmstart, '\'');
778 while (nmend > nmstart && isdigit(*(nmend-1)))
779 nmend--;
780 if (nmend-nmstart > 7)
781 nmend = nmstart+7;
782 strncpy(resname, nmstart, nmend-nmstart);
783 resname[nmend-nmstart] = '\0';
784 s = resname;
785 while (*s) {
786 *s = toupper(*s);
787 s++;
788 }
789 set_atom_attributes(atoms, cdf->natoms, atom_pointers,
790 chain_id, resname, resnum, group, gend, 1);
791 group = gend;
792 resnum++;
793 }
794
795 if (chain_id == 'Z')
796 chain_id = 'A';
797 else
798 chain_id++;
799 dstr = pend;
800 }
801
802 /* Third pass: nucleic acid chains */
803 dstr = mmtk->description;
804 while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
805 char *nacid, *nend;
806 char *group, *gend;
807 char *nmstart, *nmend;
808 char chain_id = 'a';
809 char *s;
810
811 nacid = strstr(dstr, "N('");
812 if (nacid == NULL)
813 break;
814 nend = find_closing_bracket(nacid+2);
815
816 resnum = 1;
817 group = nacid;
818 while (1) {
819 group = strstr(group, "G('");
820 if (group == NULL || group >= nend)
821 break;
822 gend = find_closing_bracket(group+2);
823 nmstart = strchr(group, '\'') + 1;
824 nmend = strchr(nmstart, '\'');
825 while (nmend > nmstart && isdigit(*(nmend-1)))
826 nmend--;
827 if (nmend > nmstart && nmend[-1] == '_')
828 nmend--;
829 if (nmend-nmstart > 7)
830 nmend = nmstart+7;
831 strncpy(resname, nmstart, nmend-nmstart);
832 resname[nmend-nmstart] = '\0';
833 s = resname;
834 while (*s) {
835 *s = toupper(*s);
836 s++;
837 }
838 if (resname[0] == 'R' || resname[0] == 'D') {
839 switch (resname[1]) {
840 case 'A':
841 strcpy(resname, "ADE");
842 break;
843 case 'C':
844 strcpy(resname, "CYT");
845 break;
846 case 'G':
847 strcpy(resname, "GUA");
848 break;
849 case 'T':
850 strcpy(resname, "THY");
851 break;
852 case 'U':
853 strcpy(resname, "URA");
854 break;
855 }
856 }
857 set_atom_attributes(atoms, cdf->natoms, atom_pointers,
858 chain_id, resname, resnum, group, gend, 2);
859 group = gend;
860 resnum++;
861 }
862
863 if (chain_id == 'z')
864 chain_id = 'a';
865 else
866 chain_id++;
867 dstr = nend;
868 }
869
870 /* Fourth pass: non-chain molecules */
871 resnum = 1;
872 dstr = mmtk->description;
873 while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
874 char *molecule, *mend;
875 char *nmstart, *nmend;
876
877 molecule = strstr(dstr, "M('");
878 if (molecule == NULL)
879 break;
880 mend = find_closing_bracket(molecule+2);
881 nmstart = strchr(molecule, '\'') + 1;
882 nmend = strchr(nmstart, '\'');
883 if (strncmp(nmstart, "water", 5) == 0)
884 strcpy(resname, "HOH");
885 else {
886 if (nmend-nmstart > 7)
887 nmend = nmstart+7;
888 strncpy(resname, nmstart, nmend-nmstart);
889 resname[nmend-nmstart] = '\0';
890 }
891 set_atom_attributes(atoms, cdf->natoms, atom_pointers,
892 '_', resname, resnum, molecule, mend, 0);
893 resnum++;
894 dstr = mend;
895 }
896
897 free(atom_pointers);
898
899 return MOLFILE_SUCCESS;
900 }
901
902
read_cdf_structure(void * mydata,int * optflags,molfile_atom_t * atoms)903 static int read_cdf_structure(void *mydata, int *optflags,
904 molfile_atom_t *atoms) {
905 cdfdata *cdf = (cdfdata *)mydata;
906
907 switch (cdf->type) {
908 case CDF_TYPE_AMBER:
909 return MOLFILE_NOSTRUCTUREDATA; /* not an error, just no data */
910
911 case CDF_TYPE_MMTK:
912 return read_mmtk_cdf_structure(mydata, optflags, atoms);
913 }
914
915 return MOLFILE_ERROR;
916 }
917
918
read_amber_cdf_timestep(void * mydata,int natoms,molfile_timestep_t * ts)919 static int read_amber_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
920 size_t start[3], count[3];
921 cdfdata *cdf = (cdfdata *)mydata;
922 amberdata *amber = &cdf->amber;
923 int rc;
924
925 /* Read in the atom coordinates and unit cell information */
926 /* only save coords if we're given a valid ts pointer */
927 /* otherwise VMD wants us to skip it. */
928 if (ts != NULL) {
929 if (amber->is_restart) {
930 // restart files only contain one frame, so return if we get called again
931 if (cdf->curframe > 0)
932 return MOLFILE_ERROR;
933
934 start[0] = 0; /* atom */
935 start[1] = 0; /* spatial */
936 start[2] = 0;
937
938 count[0] = amber->atomdim;
939 count[1] = amber->spatialdim;
940 count[2] = 0;
941
942 rc = nc_get_vara_float(cdf->ncid, amber->coordinates_id,
943 start, count, ts->coords);
944 if (rc != NC_NOERR) {
945 printf("netcdfplugin) AMBER: failed to parse restart file coordinates!\n");
946 }
947 } else {
948 start[0] = cdf->curframe; /* frame */
949 start[1] = 0; /* atom */
950 start[2] = 0; /* spatial */
951
952 count[0] = 1;
953 count[1] = amber->atomdim;
954 count[2] = amber->spatialdim;
955
956 /* parse trajectory timestep */
957 rc = nc_get_vara_float(cdf->ncid, amber->coordinates_id,
958 start, count, ts->coords);
959 }
960
961 if (rc != NC_NOERR)
962 return MOLFILE_ERROR;
963
964 /* apply coordinate scaling factor if not 1.0 */
965 if (amber->coordinates_scalefactor != 1.0) {
966 int i;
967 float s = amber->coordinates_scalefactor;
968 for (i=0; i<natoms*3; i++) {
969 ts->coords[i] *= s;
970 }
971 }
972
973 /* Read the PBC box info. */
974 if (amber->has_box) {
975 size_t start[3], count[3];
976 double lengths[3];
977 double angles[3];
978
979 if (amber->is_restart) {
980 start[0] = 0; /* spatial */
981 start[1] = 0;
982 start[2] = 0;
983 count[0] = amber->spatialdim;
984 count[1] = 0;
985 count[2] = 0;
986 } else {
987 start[0] = cdf->curframe; /* frame */
988 start[1] = 0; /* spatial */
989 start[2] = 0;
990 count[0] = 1;
991 count[1] = amber->spatialdim;
992 count[2] = 0;
993 }
994
995 rc = nc_get_vara_double(cdf->ncid, amber->cell_lengths_id,
996 start, count, lengths);
997 if (rc != NC_NOERR)
998 return MOLFILE_ERROR;
999
1000 rc = nc_get_vara_double(cdf->ncid, amber->cell_angles_id,
1001 start, count, angles);
1002 if (rc != NC_NOERR)
1003 return MOLFILE_ERROR;
1004
1005 ts->A = lengths[0] * amber->cell_lengths_scalefactor;
1006 ts->B = lengths[1] * amber->cell_lengths_scalefactor;
1007 ts->C = lengths[2] * amber->cell_lengths_scalefactor;
1008
1009 ts->alpha = angles[0] * amber->cell_angles_scalefactor;
1010 ts->beta = angles[1] * amber->cell_angles_scalefactor;
1011 ts->gamma = angles[2] * amber->cell_angles_scalefactor;
1012 }
1013 }
1014
1015 cdf->curframe++;
1016 return MOLFILE_SUCCESS;
1017 }
1018
1019
read_mmtk_cdf_timestep(void * mydata,int natoms,molfile_timestep_t * ts)1020 static int read_mmtk_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
1021 cdfdata *cdf = (cdfdata *)mydata;
1022 mmtkdata *mmtk = &cdf->mmtk;
1023 int rc;
1024
1025 /* Read in the atom coordinates and unit cell information */
1026 /* only save coords if we're given a valid ts pointer */
1027 /* otherwise VMD wants us to skip it. */
1028 if (ts != NULL) {
1029 size_t start[4], count[4];
1030 int i;
1031
1032 if (mmtk->minor_step_numberdim == 0) {
1033 start[0] = cdf->curframe; /* step */
1034 start[1] = 0; /* atom */
1035 start[2] = 0; /* spatial */
1036 start[3] = 0; /* minor step */
1037 }
1038 else {
1039 start[0] = cdf->curframe/mmtk->minor_step_numberdim; /* step */
1040 start[1] = 0; /* atom */
1041 start[2] = 0; /* spatial */
1042 start[3] = cdf->curframe % mmtk->minor_step_numberdim; /* minor step */
1043 }
1044
1045 count[0] = 1;
1046 count[1] = mmtk->atom_numberdim;
1047 count[2] = mmtk->xyzdim;
1048 count[3] = 1; /* only want one minor step, regardless */
1049
1050 rc = nc_get_vara_float(cdf->ncid, mmtk->configuration_id,
1051 start, count, ts->coords);
1052 if (rc != NC_NOERR)
1053 return MOLFILE_ERROR;
1054
1055 /* check for allocated but not yet used frame */
1056 if (ts->coords[0] == NC_FILL_FLOAT)
1057 return MOLFILE_ERROR;
1058
1059 /* scale coordinates from nanometers to angstroms */
1060 for (i=0; i<(3 * mmtk->atom_numberdim); i++) {
1061 ts->coords[i] *= 10.0f;
1062 }
1063
1064 /* Read the PBC box info. */
1065 if (mmtk->has_box) {
1066 float lengths[3];
1067
1068 if (mmtk->minor_step_numberdim == 0) {
1069 start[0] = cdf->curframe; /* step */
1070 start[1] = 0; /* box_size */
1071 start[2] = 0; /* minor step */
1072 }
1073 else {
1074 start[0] = cdf->curframe/mmtk->minor_step_numberdim; /* step */
1075 start[1] = 0; /* box_size */
1076 start[2] = cdf->curframe % mmtk->minor_step_numberdim; /* minor step */
1077 }
1078
1079 count[0] = 1;
1080 count[1] = 3;
1081 count[2] = 1;
1082
1083 rc = nc_get_vara_float(cdf->ncid, mmtk->box_size_id,
1084 start, count, lengths);
1085 if (rc != NC_NOERR)
1086 return MOLFILE_ERROR;
1087
1088 ts->A = 10.*lengths[0];
1089 ts->B = 10.*lengths[1];
1090 ts->C = 10.*lengths[2];
1091
1092 ts->alpha = 90.;
1093 ts->beta = 90.;
1094 ts->gamma = 90.;
1095 }
1096 }
1097
1098 cdf->curframe++;
1099 return MOLFILE_SUCCESS;
1100 }
1101
1102
1103
read_cdf_timestep(void * mydata,int natoms,molfile_timestep_t * ts)1104 static int read_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
1105 cdfdata *cdf = (cdfdata *)mydata;
1106
1107 switch (cdf->type) {
1108 case CDF_TYPE_AMBER:
1109 return read_amber_cdf_timestep(mydata, natoms, ts);
1110
1111 case CDF_TYPE_MMTK:
1112 return read_mmtk_cdf_timestep(mydata, natoms, ts);
1113 }
1114
1115 return MOLFILE_ERROR;
1116 }
1117
1118
1119 static molfile_plugin_t plugin;
1120
VMDPLUGIN_init(void)1121 VMDPLUGIN_API int VMDPLUGIN_init(void) {
1122 memset(&plugin, 0, sizeof(molfile_plugin_t));
1123 plugin.abiversion = vmdplugin_ABIVERSION;
1124 plugin.type = MOLFILE_PLUGIN_TYPE;
1125 plugin.name = "netcdf";
1126 plugin.prettyname = "NetCDF (AMBER, MMTK)";
1127 plugin.author = "Konrad Hinsen, John Stone";
1128 plugin.majorv = 1;
1129 plugin.minorv = 1;
1130 plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
1131 plugin.filename_extension = "nc,ncrst";
1132 plugin.open_file_read = open_cdf_read;
1133 plugin.read_structure = read_cdf_structure;
1134 plugin.read_next_timestep = read_cdf_timestep;
1135 plugin.close_file_read = close_cdf_read;
1136 return VMDPLUGIN_SUCCESS;
1137 }
1138
VMDPLUGIN_register(void * v,vmdplugin_register_cb cb)1139 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
1140 (*cb)(v, (vmdplugin_t *)&plugin);
1141 return VMDPLUGIN_SUCCESS;
1142 }
1143
VMDPLUGIN_fini(void)1144 VMDPLUGIN_API int VMDPLUGIN_fini(void) { return VMDPLUGIN_SUCCESS; }
1145
1146