1 /* MACHINE GENERATED FILE, DO NOT EDIT! */
2
3 #define VMDPLUGIN molfile_pdbplugin
4 #define STATIC_PLUGIN 1
5
6 /***************************************************************************
7 *cr
8 *cr (C) Copyright 1995-2016 The Board of Trustees of the
9 *cr University of Illinois
10 *cr All Rights Reserved
11 *cr
12 ***************************************************************************/
13
14 /***************************************************************************
15 * RCS INFORMATION:
16 *
17 * $RCSfile: pdbplugin.c,v $
18 * $Author: johns $ $Locker: $ $State: Exp $
19 * $Revision: 1.73 $ $Date: 2016/11/28 05:01:54 $
20 *
21 ***************************************************************************/
22
23 /*
24 * PDB file format specifications:
25 * http://www.rcsb.org/pdb/static.do?p=file_formats/pdb/index.html
26 */
27
28 #include "largefiles.h" /* platform dependent 64-bit file I/O defines */
29
30 #include "molfile_plugin.h"
31 #include "readpdb.h"
32 #include "periodic_table.h"
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36
37 /*
38 * API functions start here
39 */
40
41 typedef struct {
42 FILE *fd;
43 int first_frame;
44 int natoms;
45 molfile_atom_t *atomlist;
46 molfile_metadata_t *meta;
47 int nconect;
48 int nbonds, maxbnum;
49 int *from, *to, *idxmap;
50 } pdbdata;
51
open_pdb_read(const char * filepath,const char * filetype,int * natoms)52 static void *open_pdb_read(const char *filepath, const char *filetype,
53 int *natoms) {
54 FILE *fd;
55 pdbdata *pdb;
56 char pdbstr[PDB_BUFFER_LENGTH];
57 int indx, nconect;
58
59 fd = fopen(filepath, "r");
60 if (!fd)
61 return NULL;
62 pdb = (pdbdata *)malloc(sizeof(pdbdata));
63 pdb->fd = fd;
64 pdb->meta = (molfile_metadata_t *) malloc(sizeof(molfile_metadata_t));
65 memset(pdb->meta, 0, sizeof(molfile_metadata_t));
66
67 pdb->meta->remarklen = 0;
68 pdb->meta->remarks = NULL;
69
70 *natoms=0;
71 nconect=0;
72 do {
73 indx = read_pdb_record(pdb->fd, pdbstr);
74 if (indx == PDB_ATOM) {
75 *natoms += 1;
76 } else if (indx == PDB_CONECT) {
77 nconect++;
78 } else if (indx == PDB_HEADER) {
79 get_pdb_header(pdbstr, pdb->meta->accession, pdb->meta->date, NULL);
80 if (strlen(pdb->meta->accession) > 0)
81 strcpy(pdb->meta->database, "PDB");
82 } else if (indx == PDB_REMARK || indx == PDB_CONECT || indx == PDB_UNKNOWN) {
83 int len=strlen(pdbstr);
84 int newlen = len + pdb->meta->remarklen;
85
86 char *newstr=realloc(pdb->meta->remarks, newlen + 1);
87 if (newstr != NULL) {
88 pdb->meta->remarks = newstr;
89 pdb->meta->remarks[pdb->meta->remarklen] = '\0';
90 memcpy(pdb->meta->remarks + pdb->meta->remarklen, pdbstr, len);
91 pdb->meta->remarks[newlen] = '\0';
92 pdb->meta->remarklen = newlen;
93 }
94 }
95
96 } while (indx != PDB_END && indx != PDB_EOF);
97
98 /* If no atoms were found, this is probably not a PDB file! */
99 if (!*natoms) {
100 fprintf(stderr, "PDB file '%s' contains no atoms.\n", filepath);
101 if (pdb->meta->remarks != NULL)
102 free(pdb->meta->remarks);
103 if (pdb->meta != NULL)
104 free(pdb->meta);
105 free(pdb);
106 return NULL;
107 }
108
109 rewind(pdb->fd); /* if ok, rewind file and prepare to parse it for real */
110 pdb->natoms = *natoms;
111 pdb->nconect = nconect;
112 pdb->nbonds = 0;
113 pdb->maxbnum = 0;
114 pdb->from = NULL;
115 pdb->to = NULL;
116 pdb->idxmap = NULL;
117 pdb->atomlist = NULL;
118
119 #if defined(VMDUSECONECTRECORDS)
120 /* allocate atom index translation table if we have 99,999 atoms or less */
121 /* and we have conect records to process */
122 if (pdb->natoms < 100000 && pdb->nconect > 0) {
123 pdb->idxmap = (int *) malloc(100000 * sizeof(int));
124 memset(pdb->idxmap, 0, 100000 * sizeof(int));
125 }
126 #endif
127
128 return pdb;
129 }
130
read_pdb_structure(void * mydata,int * optflags,molfile_atom_t * atoms)131 static int read_pdb_structure(void *mydata, int *optflags,
132 molfile_atom_t *atoms) {
133 pdbdata *pdb = (pdbdata *)mydata;
134 molfile_atom_t *atom;
135 char pdbrec[PDB_BUFFER_LENGTH];
136 int i, rectype, atomserial, pteidx;
137 char ridstr[8];
138 char elementsymbol[3];
139 int badptecount = 0;
140 long fpos = ftell(pdb->fd);
141
142 *optflags = MOLFILE_INSERTION | MOLFILE_OCCUPANCY | MOLFILE_BFACTOR |
143 MOLFILE_ALTLOC | MOLFILE_ATOMICNUMBER | MOLFILE_BONDSSPECIAL;
144
145 i = 0;
146 do {
147 rectype = read_pdb_record(pdb->fd, pdbrec);
148 switch (rectype) {
149 case PDB_ATOM:
150 atom = atoms+i;
151 get_pdb_fields(pdbrec, strlen(pdbrec), &atomserial,
152 atom->name, atom->resname, atom->chain, atom->segid,
153 ridstr, atom->insertion, atom->altloc, elementsymbol,
154 NULL, NULL, NULL, &atom->occupancy, &atom->bfactor);
155
156 if (pdb->idxmap != NULL && atomserial < 100000) {
157 pdb->idxmap[atomserial] = i; /* record new serial number translation */
158 }
159
160 atom->resid = atoi(ridstr);
161
162 /* determine atomic number from the element symbol */
163 pteidx = get_pte_idx_from_string(elementsymbol);
164 atom->atomicnumber = pteidx;
165 if (pteidx != 0) {
166 atom->mass = get_pte_mass(pteidx);
167 atom->radius = get_pte_vdw_radius(pteidx);
168 } else {
169 badptecount++; /* unrecognized element */
170 }
171
172 strcpy(atom->type, atom->name);
173 i++;
174 break;
175
176 case PDB_CONECT:
177 /* only read CONECT records for structures where we know they can */
178 /* be valid for all of the atoms in the structure */
179 if (pdb->idxmap != NULL) {
180 get_pdb_conect(pdbrec, pdb->natoms, pdb->idxmap,
181 &pdb->maxbnum, &pdb->nbonds, &pdb->from, &pdb->to);
182 }
183 break;
184
185 default:
186 /* other record types are ignored in the structure callback */
187 /* and are dealt with in the timestep callback or elsewhere */
188 break;
189 }
190 } while (rectype != PDB_END && rectype != PDB_EOF);
191
192 fseek(pdb->fd, fpos, SEEK_SET);
193
194 /* if all atoms are recognized, set the mass and radius flags too, */
195 /* otherwise let VMD guess these for itself using it's own methods */
196 if (badptecount == 0) {
197 *optflags |= MOLFILE_MASS | MOLFILE_RADIUS;
198 }
199
200 return MOLFILE_SUCCESS;
201 }
202
read_bonds(void * v,int * nbonds,int ** fromptr,int ** toptr,float ** bondorder,int ** bondtype,int * nbondtypes,char *** bondtypename)203 static int read_bonds(void *v, int *nbonds, int **fromptr, int **toptr,
204 float ** bondorder,int **bondtype,
205 int *nbondtypes, char ***bondtypename) {
206 pdbdata *pdb = (pdbdata *)v;
207
208 *nbonds = 0;
209 *fromptr = NULL;
210 *toptr = NULL;
211 *bondorder = NULL; /* PDB files don't have bond order information */
212 *bondtype = NULL;
213 *nbondtypes = 0;
214 *bondtypename = NULL;
215
216 /* The newest plugin API allows us to return CONECT records as
217 * additional bonds above and beyond what the distance search returns.
218 * Without that feature, we otherwise have to check completeness and
219 * ignore them if they don't look to be fully specified for this molecule */
220 #if !defined(MOLFILE_BONDSSPECIAL)
221 if (pdb->natoms >= 100000) {
222 printf("pdbplugin) Warning: more than 99,999 atoms, ignored CONECT records\n");
223 return MOLFILE_SUCCESS;
224 } else if (((float) pdb->nconect / (float) pdb->natoms) <= 0.85) {
225 printf("pdbplugin) Warning: Probable incomplete bond structure specified,\n");
226 printf("pdbplugin) ignoring CONECT records\n");
227 return MOLFILE_SUCCESS;
228 } else if (pdb->nconect == 0) {
229 return MOLFILE_SUCCESS;
230 }
231 #endif
232
233 *nbonds = pdb->nbonds;
234 *fromptr = pdb->from;
235 *toptr = pdb->to;
236
237 return MOLFILE_SUCCESS;
238 }
239
240
241 /*
242 *
243 */
read_next_timestep(void * v,int natoms,molfile_timestep_t * ts)244 static int read_next_timestep(void *v, int natoms, molfile_timestep_t *ts) {
245 pdbdata *pdb = (pdbdata *)v;
246 char pdbstr[PDB_BUFFER_LENGTH];
247 int indx, i;
248 float *x, *y, *z;
249 float occup, bfac;
250 if (pdb->natoms == 0)
251 return MOLFILE_ERROR; /* EOF */
252 if (ts) {
253 x = ts->coords;
254 y = x+1;
255 z = x+2;
256 } else {
257 x = y = z = 0;
258 }
259 i = 0;
260 do {
261 indx = read_pdb_record(pdb->fd, pdbstr);
262 if((indx == PDB_END || indx == PDB_EOF) && (i < pdb->natoms)) {
263 return MOLFILE_ERROR;
264 } else if(indx == PDB_ATOM) {
265 if(i++ >= pdb->natoms) {
266 break;
267 }
268 /* just get the coordinates, and store them */
269 if (ts) {
270 get_pdb_coordinates(pdbstr, x, y, z, &occup, &bfac);
271 x += 3;
272 y += 3;
273 z += 3;
274 }
275 } else if (indx == PDB_CRYST1) {
276 if (ts) {
277 get_pdb_cryst1(pdbstr, &ts->alpha, &ts->beta, &ts->gamma,
278 &ts->A, &ts->B, &ts->C);
279 }
280 }
281 } while(!(indx == PDB_END || indx == PDB_EOF));
282
283 return MOLFILE_SUCCESS;
284 }
285
close_pdb_read(void * v)286 static void close_pdb_read(void *v) {
287 pdbdata *pdb = (pdbdata *)v;
288 if (pdb->fd != NULL)
289 fclose(pdb->fd);
290 if (pdb->idxmap != NULL)
291 free(pdb->idxmap);
292 if (pdb->meta->remarks != NULL)
293 free(pdb->meta->remarks);
294 if (pdb->meta != NULL)
295 free(pdb->meta);
296 free(pdb);
297 }
298
open_file_write(const char * path,const char * filetype,int natoms)299 static void *open_file_write(const char *path, const char *filetype,
300 int natoms) {
301
302 FILE *fd;
303 pdbdata *pdb;
304 fd = fopen(path, "w");
305 if (!fd) {
306 fprintf(stderr, "Unable to open file %s for writing\n", path);
307 return NULL;
308 }
309 pdb = (pdbdata *)malloc(sizeof(pdbdata));
310 pdb->fd = fd;
311 pdb->natoms = natoms;
312 pdb->atomlist = NULL;
313 pdb->first_frame = 1;
314 return pdb;
315 }
316
write_structure(void * v,int optflags,const molfile_atom_t * atoms)317 static int write_structure(void *v, int optflags,
318 const molfile_atom_t *atoms) {
319
320 int i;
321 pdbdata *pdb = (pdbdata *)v;
322 int natoms = pdb->natoms;
323 pdb->atomlist = (molfile_atom_t *)malloc(natoms*sizeof(molfile_atom_t));
324 memcpy(pdb->atomlist, atoms, natoms*sizeof(molfile_atom_t));
325
326 /* If occ, bfactor, and insertion aren't given, we assign defaultvalues. */
327 if (!(optflags & MOLFILE_OCCUPANCY)) {
328 for (i=0; i<natoms; i++) pdb->atomlist[i].occupancy = 0.0f;
329 }
330 if (!(optflags & MOLFILE_BFACTOR)) {
331 for (i=0; i<natoms; i++) pdb->atomlist[i].bfactor= 0.0f;
332 }
333 if (!(optflags & MOLFILE_INSERTION)) {
334 for (i=0; i<natoms; i++) {
335 pdb->atomlist[i].insertion[0] =' ';
336 pdb->atomlist[i].insertion[1] ='\0';
337 }
338 }
339 if (!(optflags & MOLFILE_ALTLOC)) {
340 for (i=0; i<natoms; i++) {
341 pdb->atomlist[i].altloc[0]=' ';
342 pdb->atomlist[i].altloc[1]='\0';
343 }
344 }
345 if (!(optflags & MOLFILE_ATOMICNUMBER)) {
346 for (i=0; i<natoms; i++) pdb->atomlist[i].atomicnumber = 0;
347 }
348
349 /* TODO: put bonds into CONECT records? */
350 return MOLFILE_SUCCESS;
351 }
352
353 /* SEQRES records look like this:
354
355 COLUMNS DATA TYPE FIELD DEFINITION
356 ---------------------------------------------------------------------------------
357 1 - 6 Record name "SEQRES"
358
359 9 - 10 Integer serNum Serial number of the SEQRES record
360 for the current chain. Starts at 1
361 and increments by one each line.
362 Reset to 1 for each chain.
363
364 12 Character chainID Chain identifier. This may be any
365 single legal character, including a
366 blank which is used if there is
367 only one chain.
368
369 14 - 17 Integer numRes Number of residues in the chain.
370 This value is repeated on every
371 record.
372
373 20 - 22 Residue name resName Residue name.
374
375 24 - 26 Residue name resName Residue name.
376
377 ... and so forth out to 68-70, for a total of 13 in each line (except possibly
378 the last.
379
380 source:
381 http://www.rcsb.org/pdb/file_formats/pdb/pdbguide2.2/part_35.html
382 */
383
384 /*
385 * However, we don't use them right now because of several issues that
386 * can't presently be resolved satisfactorily in VMD:
387
388 According to the RCSB, SEQRES records have to contain all residues, not
389 just those in the structure, which means VMD will usually produce incorrect
390 output and there's nothing we can do about it. The RCSB actually specifies
391 that all residues in the chain have to present in the SEQRES records, even
392 if they're not in the structure.
393
394 We can never know which residues to output. Our current system of outputting
395 everything is just terrible when you have 20,000 waters in your system; we
396 have to fix this immediately. We could almost get away with making a hash
397 table of the names of protein and nucleic acid residues and only write chains
398 containing those residues. However, there's this little snippet from the
399 specification:
400
401 * Heterogens which are integrated into the backbone of the chain are listed
402 as being part of the chain and are included in the SEQRES records for
403 that chain.
404
405 That means that we can never know what might appear in the sequence unless we
406 also read HET records and keep track of them in VMD as well. We shouldn't
407 get people depending on such fallible SEQRES records.
408
409 And of course, there's the fact that no other program that we know of besides
410 CE needs these SEQRES records.
411
412 * Uncomment the write_seqres line in write_timestep to turn them back on.
413 */
414
415
416 #if 0
417 static void write_seqres(FILE * fd, int natoms, const molfile_atom_t *atomlist) {
418 int i=0;
419 while (i < natoms) {
420 int k, serNum;
421 int j = i;
422 int ires, nres = 1;
423 int resid = atomlist[i].resid;
424 /* Count up the number of residues in the chain */
425 const char *chain = atomlist[i].chain;
426 while (j < natoms && !strcmp(chain, atomlist[j].chain)) {
427 if (resid != atomlist[j].resid) {
428 nres++;
429 resid = atomlist[j].resid;
430 }
431 j++;
432 }
433 /* There are nres residues in the chain, from atoms i to j. */
434 serNum = 1;
435 ires = 1;
436 resid = atomlist[i].resid;
437 fprintf(fd, "SEQRES %2d %c %4d ", serNum, chain[0], nres);
438 serNum = 2;
439 fprintf(fd, "%3s ", atomlist[i].resname);
440 for (k=i; k<j; k++) {
441 if (resid != atomlist[k].resid) {
442 resid = atomlist[k].resid;
443 if (!(ires % 13)) {
444 fprintf(fd, "\nSEQRES %2d %c %4d ", serNum, chain[0], nres);
445 serNum++;
446 }
447 fprintf(fd, "%3s ", atomlist[k].resname);
448 ires++;
449 }
450 }
451 i = j;
452 fprintf(fd, "\n");
453 }
454 }
455 #endif
456
457 /*
458 CRYST1 records look like this:
459 The CRYST1 record presents the unit cell parameters, space group, and Z value. If the structure was not determined by crystallographic means, CRYST1 simply defines a unit cube.
460
461
462 Record Format
463
464 COLUMNS DATA TYPE FIELD DEFINITION
465 -------------------------------------------------------------
466 1 - 6 Record name "CRYST1"
467
468 7 - 15 Real(9.3) a a (Angstroms).
469
470 16 - 24 Real(9.3) b b (Angstroms).
471
472 25 - 33 Real(9.3) c c (Angstroms).
473
474 34 - 40 Real(7.2) alpha alpha (degrees).
475
476 41 - 47 Real(7.2) beta beta (degrees).
477
478 48 - 54 Real(7.2) gamma gamma (degrees).
479
480 56 - 66 LString sGroup Space group.
481
482 67 - 70 Integer z Z value.
483
484 * If the coordinate entry describes a structure determined by a technique
485 other than crystallography, CRYST1 contains a = b = c = 1.0, alpha =
486 beta = gamma = 90 degrees, space group = P 1, and Z = 1.
487
488 We will use "P 1" and "1" for space group and z value, as recommended, but
489 we'll populate the other fields with the unit cell information we do have.
490
491 */
492
write_cryst1(FILE * fd,const molfile_timestep_t * ts)493 static void write_cryst1(FILE *fd, const molfile_timestep_t *ts) {
494 fprintf(fd, "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1\n",
495 ts->A, ts->B, ts->C, ts->alpha, ts->beta, ts->gamma);
496 }
497
498
write_timestep(void * v,const molfile_timestep_t * ts)499 static int write_timestep(void *v, const molfile_timestep_t *ts) {
500 pdbdata *pdb = (pdbdata *)v;
501 const molfile_atom_t *atom;
502 const float *pos;
503 int i;
504 char elementsymbol[3];
505
506 if (pdb->natoms == 0)
507 return MOLFILE_SUCCESS;
508
509 if (pdb->first_frame) {
510 /* Turn off SEQRES writing for now; see comments above.
511 write_seqres(pdb->fd, pdb->natoms, pdb->atomlist);
512 */
513 write_cryst1(pdb->fd, ts);
514 pdb->first_frame = 0;
515 }
516 atom = pdb->atomlist;
517 pos = ts->coords;
518 for (i=0; i<pdb->natoms; i++) {
519 /*
520 * The 8.3 format for position, occupancy, and bfactor permits values
521 * only in the range of -999.9994 to 9999.9994 (so that they round
522 * to the range [-999.999, 9999.999]). If values fall outside of that
523 * range, fail and emit an error message rather than generate a
524 * misformatted PDB file.
525 */
526 #define PDBBAD(x) ((x) < -999.9994f || (x) > 9999.9994f)
527 if (PDBBAD(pos[0]) || PDBBAD(pos[1]) || PDBBAD(pos[2]) ||
528 PDBBAD(atom->occupancy) || PDBBAD(atom->bfactor)) {
529 fprintf(stderr, "PDB WRITE ERROR: Position, occupancy, or b-factor (beta) for atom %d\n", i);
530 fprintf(stderr, " cannot be written in PDB format.\n");
531 fprintf(stderr, " File will be truncated.\n");
532 return MOLFILE_ERROR;
533 }
534
535 /* check the atomicnumber and format the atomic element symbol string */
536 strcpy(elementsymbol, (atom->atomicnumber < 1) ? " " : get_pte_label(atom->atomicnumber));
537 elementsymbol[0] = toupper(elementsymbol[0]);
538 elementsymbol[1] = toupper(elementsymbol[1]);
539
540 if (!write_raw_pdb_record(pdb->fd,
541 "ATOM ", i+1, atom->name, atom->resname, atom->resid,
542 atom->insertion, atom->altloc, elementsymbol,
543 pos[0], pos[1], pos[2],
544 atom->occupancy, atom->bfactor, atom->chain, atom->segid)) {
545 fprintf(stderr,
546 "PDB: Error encoutered writing atom %d; file may be incomplete.\n",
547 i+1);
548 return MOLFILE_ERROR;
549 }
550 ++atom;
551 pos += 3;
552 }
553 fprintf(pdb->fd, "END\n");
554
555 return MOLFILE_SUCCESS;
556 }
557
close_file_write(void * v)558 static void close_file_write(void *v) {
559 pdbdata *pdb = (pdbdata *)v;
560 fclose(pdb->fd);
561 free(pdb->atomlist);
562 free(pdb);
563 }
564
read_molecule_metadata(void * v,molfile_metadata_t ** metadata)565 static int read_molecule_metadata(void *v, molfile_metadata_t **metadata) {
566 pdbdata *pdb = (pdbdata *)v;
567 *metadata = pdb->meta;
568 return MOLFILE_SUCCESS;
569 }
570
571 /*
572 * Initialization stuff down here
573 */
574
575 static molfile_plugin_t plugin;
576
VMDPLUGIN_init()577 VMDPLUGIN_API int VMDPLUGIN_init() {
578 memset(&plugin, 0, sizeof(molfile_plugin_t));
579 plugin.abiversion = vmdplugin_ABIVERSION;
580 plugin.type = MOLFILE_PLUGIN_TYPE;
581 plugin.name = "pdb";
582 plugin.prettyname = "PDB";
583 plugin.author = "Justin Gullingsrud, John Stone";
584 plugin.majorv = 1;
585 plugin.minorv = 16;
586 plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
587 plugin.filename_extension = "pdb,ent";
588 plugin.open_file_read = open_pdb_read;
589 plugin.read_structure = read_pdb_structure;
590 plugin.read_bonds = read_bonds;
591 plugin.read_next_timestep = read_next_timestep;
592 plugin.close_file_read = close_pdb_read;
593 plugin.open_file_write = open_file_write;
594 plugin.write_structure = write_structure;
595 plugin.write_timestep = write_timestep;
596 plugin.close_file_write = close_file_write;
597 plugin.read_molecule_metadata = read_molecule_metadata;
598 return VMDPLUGIN_SUCCESS;
599 }
600
VMDPLUGIN_register(void * v,vmdplugin_register_cb cb)601 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
602 (*cb)(v, (vmdplugin_t *)&plugin);
603 return VMDPLUGIN_SUCCESS;
604 }
605
VMDPLUGIN_fini()606 VMDPLUGIN_API int VMDPLUGIN_fini() {
607 return VMDPLUGIN_SUCCESS;
608 }
609
610