1 /* MACHINE GENERATED FILE, DO NOT EDIT! */
2 
3 #define VMDPLUGIN molfile_pdbplugin
4 #define STATIC_PLUGIN 1
5 
6 /***************************************************************************
7  *cr
8  *cr            (C) Copyright 1995-2016 The Board of Trustees of the
9  *cr                        University of Illinois
10  *cr                         All Rights Reserved
11  *cr
12  ***************************************************************************/
13 
14 /***************************************************************************
15  * RCS INFORMATION:
16  *
17  *      $RCSfile: pdbplugin.c,v $
18  *      $Author: johns $       $Locker:  $             $State: Exp $
19  *      $Revision: 1.73 $       $Date: 2016/11/28 05:01:54 $
20  *
21  ***************************************************************************/
22 
23 /*
24  * PDB file format specifications:
25  *   http://www.rcsb.org/pdb/static.do?p=file_formats/pdb/index.html
26  */
27 
28 #include "largefiles.h"   /* platform dependent 64-bit file I/O defines */
29 
30 #include "molfile_plugin.h"
31 #include "readpdb.h"
32 #include "periodic_table.h"
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 /*
38  * API functions start here
39  */
40 
41 typedef struct {
42   FILE *fd;
43   int first_frame;
44   int natoms;
45   molfile_atom_t *atomlist;
46   molfile_metadata_t *meta;
47   int nconect;
48   int nbonds, maxbnum;
49   int *from, *to, *idxmap;
50 } pdbdata;
51 
open_pdb_read(const char * filepath,const char * filetype,int * natoms)52 static void *open_pdb_read(const char *filepath, const char *filetype,
53     int *natoms) {
54   FILE *fd;
55   pdbdata *pdb;
56   char pdbstr[PDB_BUFFER_LENGTH];
57   int indx, nconect;
58 
59   fd = fopen(filepath, "r");
60   if (!fd)
61     return NULL;
62   pdb = (pdbdata *)malloc(sizeof(pdbdata));
63   pdb->fd = fd;
64   pdb->meta = (molfile_metadata_t *) malloc(sizeof(molfile_metadata_t));
65   memset(pdb->meta, 0, sizeof(molfile_metadata_t));
66 
67   pdb->meta->remarklen = 0;
68   pdb->meta->remarks = NULL;
69 
70   *natoms=0;
71   nconect=0;
72   do {
73     indx = read_pdb_record(pdb->fd, pdbstr);
74     if (indx == PDB_ATOM) {
75       *natoms += 1;
76     } else if (indx == PDB_CONECT) {
77       nconect++;
78     } else if (indx == PDB_HEADER) {
79       get_pdb_header(pdbstr, pdb->meta->accession, pdb->meta->date, NULL);
80       if (strlen(pdb->meta->accession) > 0)
81         strcpy(pdb->meta->database, "PDB");
82     } else if (indx == PDB_REMARK || indx == PDB_CONECT || indx == PDB_UNKNOWN) {
83       int len=strlen(pdbstr);
84       int newlen = len + pdb->meta->remarklen;
85 
86       char *newstr=realloc(pdb->meta->remarks, newlen + 1);
87       if (newstr != NULL) {
88         pdb->meta->remarks = newstr;
89         pdb->meta->remarks[pdb->meta->remarklen] = '\0';
90         memcpy(pdb->meta->remarks + pdb->meta->remarklen, pdbstr, len);
91         pdb->meta->remarks[newlen] = '\0';
92         pdb->meta->remarklen = newlen;
93       }
94     }
95 
96   } while (indx != PDB_END && indx != PDB_EOF);
97 
98   /* If no atoms were found, this is probably not a PDB file! */
99   if (!*natoms) {
100     fprintf(stderr, "PDB file '%s' contains no atoms.\n", filepath);
101     if (pdb->meta->remarks != NULL)
102       free(pdb->meta->remarks);
103     if (pdb->meta != NULL)
104       free(pdb->meta);
105     free(pdb);
106     return NULL;
107   }
108 
109   rewind(pdb->fd); /* if ok, rewind file and prepare to parse it for real */
110   pdb->natoms = *natoms;
111   pdb->nconect = nconect;
112   pdb->nbonds = 0;
113   pdb->maxbnum = 0;
114   pdb->from = NULL;
115   pdb->to = NULL;
116   pdb->idxmap = NULL;
117   pdb->atomlist = NULL;
118 
119 #if defined(VMDUSECONECTRECORDS)
120   /* allocate atom index translation table if we have 99,999 atoms or less */
121   /* and we have conect records to process                                 */
122   if (pdb->natoms < 100000 && pdb->nconect > 0) {
123     pdb->idxmap = (int *) malloc(100000 * sizeof(int));
124     memset(pdb->idxmap, 0, 100000 * sizeof(int));
125   }
126 #endif
127 
128   return pdb;
129 }
130 
read_pdb_structure(void * mydata,int * optflags,molfile_atom_t * atoms)131 static int read_pdb_structure(void *mydata, int *optflags,
132     molfile_atom_t *atoms) {
133   pdbdata *pdb = (pdbdata *)mydata;
134   molfile_atom_t *atom;
135   char pdbrec[PDB_BUFFER_LENGTH];
136   int i, rectype, atomserial, pteidx;
137   char ridstr[8];
138   char elementsymbol[3];
139   int badptecount = 0;
140   long fpos = ftell(pdb->fd);
141 
142   *optflags = MOLFILE_INSERTION | MOLFILE_OCCUPANCY | MOLFILE_BFACTOR |
143               MOLFILE_ALTLOC | MOLFILE_ATOMICNUMBER | MOLFILE_BONDSSPECIAL;
144 
145   i = 0;
146   do {
147     rectype = read_pdb_record(pdb->fd, pdbrec);
148     switch (rectype) {
149     case PDB_ATOM:
150       atom = atoms+i;
151       get_pdb_fields(pdbrec, strlen(pdbrec), &atomserial,
152           atom->name, atom->resname, atom->chain, atom->segid,
153           ridstr, atom->insertion, atom->altloc, elementsymbol,
154           NULL, NULL, NULL, &atom->occupancy, &atom->bfactor);
155 
156       if (pdb->idxmap != NULL && atomserial < 100000) {
157         pdb->idxmap[atomserial] = i; /* record new serial number translation */
158       }
159 
160       atom->resid = atoi(ridstr);
161 
162       /* determine atomic number from the element symbol */
163       pteidx = get_pte_idx_from_string(elementsymbol);
164       atom->atomicnumber = pteidx;
165       if (pteidx != 0) {
166         atom->mass = get_pte_mass(pteidx);
167         atom->radius = get_pte_vdw_radius(pteidx);
168       } else {
169         badptecount++; /* unrecognized element */
170       }
171 
172       strcpy(atom->type, atom->name);
173       i++;
174       break;
175 
176     case PDB_CONECT:
177       /* only read CONECT records for structures where we know they can */
178       /* be valid for all of the atoms in the structure                 */
179       if (pdb->idxmap != NULL) {
180         get_pdb_conect(pdbrec, pdb->natoms, pdb->idxmap,
181                        &pdb->maxbnum, &pdb->nbonds, &pdb->from, &pdb->to);
182       }
183       break;
184 
185     default:
186       /* other record types are ignored in the structure callback */
187       /* and are dealt with in the timestep callback or elsewhere */
188       break;
189     }
190   } while (rectype != PDB_END && rectype != PDB_EOF);
191 
192   fseek(pdb->fd, fpos, SEEK_SET);
193 
194   /* if all atoms are recognized, set the mass and radius flags too,  */
195   /* otherwise let VMD guess these for itself using it's own methods  */
196   if (badptecount == 0) {
197     *optflags |= MOLFILE_MASS | MOLFILE_RADIUS;
198   }
199 
200   return MOLFILE_SUCCESS;
201 }
202 
read_bonds(void * v,int * nbonds,int ** fromptr,int ** toptr,float ** bondorder,int ** bondtype,int * nbondtypes,char *** bondtypename)203 static int read_bonds(void *v, int *nbonds, int **fromptr, int **toptr,
204                       float ** bondorder,int **bondtype,
205                       int *nbondtypes, char ***bondtypename) {
206   pdbdata *pdb = (pdbdata *)v;
207 
208   *nbonds = 0;
209   *fromptr = NULL;
210   *toptr = NULL;
211   *bondorder = NULL; /* PDB files don't have bond order information */
212   *bondtype = NULL;
213   *nbondtypes = 0;
214   *bondtypename = NULL;
215 
216 /* The newest plugin API allows us to return CONECT records as
217  * additional bonds above and beyond what the distance search returns.
218  * Without that feature, we otherwise have to check completeness and
219  * ignore them if they don't look to be fully specified for this molecule */
220 #if !defined(MOLFILE_BONDSSPECIAL)
221   if (pdb->natoms >= 100000) {
222     printf("pdbplugin) Warning: more than 99,999 atoms, ignored CONECT records\n");
223     return MOLFILE_SUCCESS;
224   } else if (((float) pdb->nconect / (float) pdb->natoms) <= 0.85) {
225     printf("pdbplugin) Warning: Probable incomplete bond structure specified,\n");
226     printf("pdbplugin)          ignoring CONECT records\n");
227     return MOLFILE_SUCCESS;
228   } else if (pdb->nconect == 0) {
229     return MOLFILE_SUCCESS;
230   }
231 #endif
232 
233   *nbonds = pdb->nbonds;
234   *fromptr = pdb->from;
235   *toptr = pdb->to;
236 
237   return MOLFILE_SUCCESS;
238 }
239 
240 
241 /*
242  *
243  */
read_next_timestep(void * v,int natoms,molfile_timestep_t * ts)244 static int read_next_timestep(void *v, int natoms, molfile_timestep_t *ts) {
245   pdbdata *pdb = (pdbdata *)v;
246   char pdbstr[PDB_BUFFER_LENGTH];
247   int indx, i;
248   float *x, *y, *z;
249   float occup, bfac;
250   if (pdb->natoms == 0)
251     return MOLFILE_ERROR; /* EOF */
252   if (ts) {
253     x = ts->coords;
254     y = x+1;
255     z = x+2;
256   } else {
257     x = y = z = 0;
258   }
259   i = 0;
260   do {
261     indx = read_pdb_record(pdb->fd, pdbstr);
262     if((indx == PDB_END || indx == PDB_EOF) && (i < pdb->natoms)) {
263       return MOLFILE_ERROR;
264     } else if(indx == PDB_ATOM) {
265       if(i++ >= pdb->natoms) {
266         break;
267       }
268       /* just get the coordinates, and store them */
269       if (ts) {
270         get_pdb_coordinates(pdbstr, x, y, z, &occup, &bfac);
271         x += 3;
272         y += 3;
273         z += 3;
274       }
275     } else if (indx == PDB_CRYST1) {
276       if (ts) {
277         get_pdb_cryst1(pdbstr, &ts->alpha, &ts->beta, &ts->gamma,
278                                &ts->A, &ts->B, &ts->C);
279       }
280     }
281   } while(!(indx == PDB_END || indx == PDB_EOF));
282 
283   return MOLFILE_SUCCESS;
284 }
285 
close_pdb_read(void * v)286 static void close_pdb_read(void *v) {
287   pdbdata *pdb = (pdbdata *)v;
288   if (pdb->fd != NULL)
289     fclose(pdb->fd);
290   if (pdb->idxmap != NULL)
291     free(pdb->idxmap);
292   if (pdb->meta->remarks != NULL)
293     free(pdb->meta->remarks);
294   if (pdb->meta != NULL)
295     free(pdb->meta);
296   free(pdb);
297 }
298 
open_file_write(const char * path,const char * filetype,int natoms)299 static void *open_file_write(const char *path, const char *filetype,
300     int natoms) {
301 
302   FILE *fd;
303   pdbdata *pdb;
304   fd = fopen(path, "w");
305   if (!fd) {
306     fprintf(stderr, "Unable to open file %s for writing\n", path);
307     return NULL;
308   }
309   pdb = (pdbdata *)malloc(sizeof(pdbdata));
310   pdb->fd = fd;
311   pdb->natoms = natoms;
312   pdb->atomlist = NULL;
313   pdb->first_frame = 1;
314   return pdb;
315 }
316 
write_structure(void * v,int optflags,const molfile_atom_t * atoms)317 static int write_structure(void *v, int optflags,
318     const molfile_atom_t *atoms) {
319 
320   int i;
321   pdbdata *pdb = (pdbdata *)v;
322   int natoms = pdb->natoms;
323   pdb->atomlist = (molfile_atom_t *)malloc(natoms*sizeof(molfile_atom_t));
324   memcpy(pdb->atomlist, atoms, natoms*sizeof(molfile_atom_t));
325 
326   /* If occ, bfactor, and insertion aren't given, we assign defaultvalues. */
327   if (!(optflags & MOLFILE_OCCUPANCY)) {
328     for (i=0; i<natoms; i++) pdb->atomlist[i].occupancy = 0.0f;
329   }
330   if (!(optflags & MOLFILE_BFACTOR)) {
331     for (i=0; i<natoms; i++) pdb->atomlist[i].bfactor= 0.0f;
332   }
333   if (!(optflags & MOLFILE_INSERTION)) {
334     for (i=0; i<natoms; i++) {
335       pdb->atomlist[i].insertion[0] =' ';
336       pdb->atomlist[i].insertion[1] ='\0';
337     }
338   }
339   if (!(optflags & MOLFILE_ALTLOC)) {
340     for (i=0; i<natoms; i++) {
341       pdb->atomlist[i].altloc[0]=' ';
342       pdb->atomlist[i].altloc[1]='\0';
343     }
344   }
345   if (!(optflags & MOLFILE_ATOMICNUMBER)) {
346     for (i=0; i<natoms; i++) pdb->atomlist[i].atomicnumber = 0;
347   }
348 
349   /* TODO: put bonds into CONECT records? */
350   return MOLFILE_SUCCESS;
351 }
352 
353 /* SEQRES records look like this:
354 
355 COLUMNS        DATA TYPE       FIELD         DEFINITION
356 ---------------------------------------------------------------------------------
357  1 -  6        Record name     "SEQRES"
358 
359  9 - 10        Integer         serNum        Serial number of the SEQRES record
360                                              for the current chain.  Starts at 1
361                                              and increments by one each line.
362                                              Reset to 1 for each chain.
363 
364 12             Character       chainID       Chain identifier.  This may be any
365                                              single legal character, including a
366                                              blank which is used if there is
367                                              only one chain.
368 
369 14 - 17        Integer         numRes        Number of residues in the chain.
370                                              This value is repeated on every
371                                              record.
372 
373 20 - 22        Residue name    resName       Residue name.
374 
375 24 - 26        Residue name    resName       Residue name.
376 
377 ... and so forth out to 68-70, for a total of 13 in each line (except possibly
378 the last.
379 
380 source:
381 http://www.rcsb.org/pdb/file_formats/pdb/pdbguide2.2/part_35.html
382 */
383 
384 /*
385  * However, we don't use them right now because of several issues that
386  * can't presently be resolved satisfactorily in VMD:
387 
388 According to the RCSB, SEQRES records have to contain all residues, not
389 just those in the structure, which means VMD will usually produce incorrect
390 output and there's nothing we can do about it.  The RCSB actually specifies
391 that all residues in the chain have to present in the SEQRES records, even
392 if they're not in the structure.
393 
394 We can never know which residues to output.  Our current system of outputting
395 everything is just terrible when you have 20,000 waters in your system; we
396 have to fix this immediately.  We could almost get away with making a hash
397 table of the names of protein and nucleic acid residues and only write chains
398 containing those residues.  However, there's this little snippet from the
399 specification:
400 
401 * Heterogens which are integrated into the backbone of the chain are listed
402   as being part of the chain and are included in the SEQRES records for
403   that chain.
404 
405 That means that we can never know what might appear in the sequence unless we
406 also read HET records and keep track of them in VMD as well.  We shouldn't
407 get people depending on such fallible SEQRES records.
408 
409 And of course, there's the fact that no other program that we know of besides
410 CE needs these SEQRES records.
411 
412  * Uncomment the write_seqres line in write_timestep to turn them back on.
413  */
414 
415 
416 #if 0
417 static void write_seqres(FILE * fd, int natoms, const molfile_atom_t *atomlist) {
418   int i=0;
419   while (i < natoms) {
420     int k, serNum;
421     int j = i;
422     int ires, nres = 1;
423     int resid = atomlist[i].resid;
424     /* Count up the number of residues in the chain */
425     const char *chain = atomlist[i].chain;
426     while (j < natoms && !strcmp(chain, atomlist[j].chain)) {
427       if (resid != atomlist[j].resid) {
428         nres++;
429         resid = atomlist[j].resid;
430       }
431       j++;
432     }
433     /* There are nres residues in the chain, from atoms i to j. */
434     serNum = 1;
435     ires = 1;
436     resid = atomlist[i].resid;
437     fprintf(fd, "SEQRES  %2d %c %4d  ",  serNum, chain[0], nres);
438     serNum = 2;
439     fprintf(fd, "%3s ", atomlist[i].resname);
440     for (k=i; k<j; k++) {
441       if (resid != atomlist[k].resid) {
442         resid = atomlist[k].resid;
443         if (!(ires % 13)) {
444           fprintf(fd, "\nSEQRES  %2d %c %4d  ",  serNum, chain[0], nres);
445           serNum++;
446         }
447         fprintf(fd, "%3s ", atomlist[k].resname);
448         ires++;
449       }
450     }
451     i = j;
452     fprintf(fd, "\n");
453   }
454 }
455 #endif
456 
457 /*
458 CRYST1 records look like this:
459 The CRYST1 record presents the unit cell parameters, space group, and Z value. If the structure was not determined by crystallographic means, CRYST1 simply defines a unit cube.
460 
461 
462 Record Format
463 
464 COLUMNS       DATA TYPE      FIELD         DEFINITION
465 -------------------------------------------------------------
466  1 -  6       Record name    "CRYST1"
467 
468  7 - 15       Real(9.3)      a             a (Angstroms).
469 
470 16 - 24       Real(9.3)      b             b (Angstroms).
471 
472 25 - 33       Real(9.3)      c             c (Angstroms).
473 
474 34 - 40       Real(7.2)      alpha         alpha (degrees).
475 
476 41 - 47       Real(7.2)      beta          beta (degrees).
477 
478 48 - 54       Real(7.2)      gamma         gamma (degrees).
479 
480 56 - 66       LString        sGroup        Space group.
481 
482 67 - 70       Integer        z             Z value.
483 
484 * If the coordinate entry describes a structure determined by a technique
485 other than crystallography, CRYST1 contains a = b = c = 1.0, alpha =
486 beta = gamma = 90 degrees, space group = P 1, and Z = 1.
487 
488 We will use "P 1" and "1" for space group and z value, as recommended, but
489 we'll populate the other fields with the unit cell information we do have.
490 
491 */
492 
write_cryst1(FILE * fd,const molfile_timestep_t * ts)493 static void write_cryst1(FILE *fd, const molfile_timestep_t *ts) {
494   fprintf(fd, "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1\n",
495     ts->A, ts->B, ts->C, ts->alpha, ts->beta, ts->gamma);
496 }
497 
498 
write_timestep(void * v,const molfile_timestep_t * ts)499 static int write_timestep(void *v, const molfile_timestep_t *ts) {
500   pdbdata *pdb = (pdbdata *)v;
501   const molfile_atom_t *atom;
502   const float *pos;
503   int i;
504   char elementsymbol[3];
505 
506   if (pdb->natoms == 0)
507     return MOLFILE_SUCCESS;
508 
509   if (pdb->first_frame) {
510     /* Turn off SEQRES writing for now; see comments above.
511     write_seqres(pdb->fd, pdb->natoms, pdb->atomlist);
512     */
513     write_cryst1(pdb->fd, ts);
514     pdb->first_frame = 0;
515   }
516   atom = pdb->atomlist;
517   pos = ts->coords;
518   for (i=0; i<pdb->natoms; i++) {
519     /*
520      * The 8.3 format for position, occupancy, and bfactor permits values
521      * only in the range of -999.9994 to 9999.9994 (so that they round
522      * to the range [-999.999, 9999.999]).  If values fall outside of that
523      * range, fail and emit an error message rather than generate a
524      * misformatted PDB file.
525      */
526 #define PDBBAD(x) ((x) < -999.9994f || (x) > 9999.9994f)
527     if (PDBBAD(pos[0]) || PDBBAD(pos[1]) || PDBBAD(pos[2]) ||
528 		PDBBAD(atom->occupancy) || PDBBAD(atom->bfactor)) {
529 	    fprintf(stderr, "PDB WRITE ERROR: Position, occupancy, or b-factor (beta) for atom %d\n", i);
530       fprintf(stderr, "                 cannot be written in PDB format.\n");
531       fprintf(stderr, "                 File will be truncated.\n");
532       return MOLFILE_ERROR;
533     }
534 
535     /* check the atomicnumber and format the atomic element symbol string */
536     strcpy(elementsymbol, (atom->atomicnumber < 1) ? "  " : get_pte_label(atom->atomicnumber));
537     elementsymbol[0] = toupper(elementsymbol[0]);
538     elementsymbol[1] = toupper(elementsymbol[1]);
539 
540     if (!write_raw_pdb_record(pdb->fd,
541         "ATOM  ", i+1, atom->name, atom->resname, atom->resid,
542         atom->insertion, atom->altloc, elementsymbol,
543         pos[0], pos[1], pos[2],
544         atom->occupancy, atom->bfactor, atom->chain, atom->segid)) {
545       fprintf(stderr,
546           "PDB: Error encoutered writing atom %d; file may be incomplete.\n",
547           i+1);
548       return MOLFILE_ERROR;
549     }
550     ++atom;
551     pos += 3;
552   }
553   fprintf(pdb->fd, "END\n");
554 
555   return MOLFILE_SUCCESS;
556 }
557 
close_file_write(void * v)558 static void close_file_write(void *v) {
559   pdbdata *pdb = (pdbdata *)v;
560   fclose(pdb->fd);
561   free(pdb->atomlist);
562   free(pdb);
563 }
564 
read_molecule_metadata(void * v,molfile_metadata_t ** metadata)565 static int read_molecule_metadata(void *v, molfile_metadata_t **metadata) {
566   pdbdata *pdb = (pdbdata *)v;
567   *metadata = pdb->meta;
568   return MOLFILE_SUCCESS;
569 }
570 
571 /*
572  * Initialization stuff down here
573  */
574 
575 static molfile_plugin_t plugin;
576 
VMDPLUGIN_init()577 VMDPLUGIN_API int VMDPLUGIN_init() {
578   memset(&plugin, 0, sizeof(molfile_plugin_t));
579   plugin.abiversion = vmdplugin_ABIVERSION;
580   plugin.type = MOLFILE_PLUGIN_TYPE;
581   plugin.name = "pdb";
582   plugin.prettyname = "PDB";
583   plugin.author = "Justin Gullingsrud, John Stone";
584   plugin.majorv = 1;
585   plugin.minorv = 16;
586   plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
587   plugin.filename_extension = "pdb,ent";
588   plugin.open_file_read = open_pdb_read;
589   plugin.read_structure = read_pdb_structure;
590   plugin.read_bonds = read_bonds;
591   plugin.read_next_timestep = read_next_timestep;
592   plugin.close_file_read = close_pdb_read;
593   plugin.open_file_write = open_file_write;
594   plugin.write_structure = write_structure;
595   plugin.write_timestep = write_timestep;
596   plugin.close_file_write = close_file_write;
597   plugin.read_molecule_metadata = read_molecule_metadata;
598   return VMDPLUGIN_SUCCESS;
599 }
600 
VMDPLUGIN_register(void * v,vmdplugin_register_cb cb)601 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
602   (*cb)(v, (vmdplugin_t *)&plugin);
603   return VMDPLUGIN_SUCCESS;
604 }
605 
VMDPLUGIN_fini()606 VMDPLUGIN_API int VMDPLUGIN_fini() {
607   return VMDPLUGIN_SUCCESS;
608 }
609 
610