1# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
2# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
3#
4# MDAnalysis --- https://www.mdanalysis.org
5# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
6# (see the file AUTHORS for the full list of names)
7#
8# Released under the GNU Public Licence, v2 or any higher version
9#
10# Please cite your use of MDAnalysis in published work:
11#
12# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
13# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
14# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
15# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
16# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
17#
18# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
19# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
20# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
21#
22
23
24"""PDB structure files in MDAnalysis --- :mod:`MDAnalysis.coordinates.PDB`
25========================================================================
26
27MDAnalysis reads coordinates from PDB files and additional optional
28data such as B-factors. It is also possible to substitute a PDB file
29instead of PSF file in order to define the list of atoms (but no
30connectivity information will be available in this case).
31
32PDB files contain both coordinate and atom information. It is also possible to
33write trajectories as multi-frame (or multi-model) PDB files. This is not very
34space efficient but is sometimes the lowest common denominator for exchanging
35trajectories. Single frame and multi-frame PDB files are automatically
36recognized; the only difference is thath the single-frame PDB is represented as
37a trajectory with only one frame.
38
39In order to write a selection to a PDB file one typically uses the
40:meth:`MDAnalysis.core.groups.AtomGroup.write` method of the selection::
41
42  calphas = universe.select_atoms("name CA")
43  calphas.write("calpha_only.pdb")
44
45This uses the coordinates from the current timestep of the trajectory.
46
47In order to write a PDB trajectory one needs to first obtain a multi-frame
48writer (keyword *multiframe* = ``True``) and then iterate through the
49trajectory, while writing each frame::
50
51  calphas = universe.select_atoms("name CA")
52  with MDAnalysis.Writer("calpha_traj.pdb", multiframe=True) as W:
53      for ts in u.trajectory:
54          W.write(calphas)
55
56It is important to *always close the trajectory* when done because only at this
57step is the final END_ record written, which is required by the `PDB 3.2
58standard`_. Using the writer as a context manager ensures that this always
59happens.
60
61
62Capabilities
63------------
64
65A pure-Python implementation for PDB files commonly encountered in MD
66simulations comes under the names :class:`PDBReader` and :class:`PDBWriter`. It
67only implements a subset of the `PDB 3.2 standard`_ and also allows some
68typical enhancements such as 4-letter resids (introduced by CHARMM/NAMD).
69
70The :class:`PDBReader` can read multi-frame PDB files and represents
71them as a trajectory. The :class:`PDBWriter` can write single and
72multi-frame PDB files as specified by the *multiframe* keyword. By default, it
73writes single frames. On the other hand, the :class:`MultiPDBWriter` is set up
74to write a PDB trajectory by default (equivalent to using *multiframe* =
75``True``).
76
77
78Examples for working with PDB files
79-----------------------------------
80
81A **single frame PDB** can be written with the
82:meth:`~MDAnalysis.core.groups.AtomGroup.write` method of any
83:class:`~MDAnalysis.core.groups.AtomGroup`::
84
85   protein = u.select_atoms("protein")
86   protein.write("protein.pdb")
87
88Alternatively, get the single frame writer and supply the
89:class:`~MDAnalysis.core.groups.AtomGroup`::
90
91  protein = u.select_atoms("protein")
92  with MDAnalysis.Writer("protein.pdb") as pdb:
93      pdb.write(protein)
94
95In order to write a **multi-frame PDB trajectory** from a universe *u* one can
96do the following::
97
98  with MDAnalysis.Writer("all.pdb", multiframe=True) as pdb:
99      for ts in u.trajectory:
100          pdb.write(u)
101
102Similarly, writing only a protein::
103
104  protein = u.select_atoms("protein")
105  with MDAnalysis.Writer("protein.pdb", multiframe=True) as pdb:
106      for ts in u.trajectory:
107          pdb.write(protein)
108
109
110
111Classes
112-------
113
114.. versionchanged:: 0.16.0
115   PDB readers and writers based on :class:`Bio.PDB.PDBParser` were retired and
116   removed.
117
118
119.. autoclass:: PDBReader
120   :members:
121
122.. autoclass:: PDBWriter
123   :members:
124
125   .. automethod:: _check_pdb_coordinates
126   .. automethod:: _write_pdb_bonds
127   .. automethod:: _update_frame
128   .. automethod:: _write_timestep
129
130.. autoclass:: MultiPDBWriter
131   :members:
132
133.. autoclass:: ExtendedPDBReader
134   :members:
135   :inherited-members:
136
137
138.. _`PDB 3.2 standard`:
139    http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html
140
141"""
142from __future__ import absolute_import
143
144from six.moves import range, zip
145from six import StringIO, BytesIO
146
147import io
148import os
149import errno
150import itertools
151import textwrap
152import warnings
153import logging
154import collections
155import numpy as np
156
157from ..core import flags
158from ..lib import util
159from . import base
160from ..topology.core import guess_atom_element
161from ..core.universe import Universe
162from ..exceptions import NoDataError
163
164
165logger = logging.getLogger("MDAnalysis.coordinates.PBD")
166
167# Pairs of residue name / atom name in use to deduce PDB formatted atom names
168Pair = collections.namedtuple('Atom', 'resname name')
169
170class PDBReader(base.ReaderBase):
171    """PDBReader that reads a `PDB-formatted`_ file, no frills.
172
173    The following *PDB records* are parsed (see `PDB coordinate section`_ for
174    details):
175
176     - *CRYST1* for unitcell A,B,C, alpha,beta,gamma
177     - *ATOM* or *HETATM* for serial,name,resName,chainID,resSeq,x,y,z,occupancy,tempFactor
178     - *HEADER* (:attr:`header`), *TITLE* (:attr:`title`), *COMPND*
179       (:attr:`compound`), *REMARK* (:attr:`remarks`)
180     - all other lines are ignored
181
182    Reads multi-`MODEL`_ PDB files as trajectories.
183
184    .. _PDB-formatted:
185       http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html
186    .. _PDB coordinate section:
187       http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html
188    .. _MODEL:
189       http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL
190
191    =============  ============  ===========  =============================================
192    COLUMNS        DATA  TYPE    FIELD        DEFINITION
193    =============  ============  ===========  =============================================
194    1 -  6         Record name   "CRYST1"
195    7 - 15         Real(9.3)     a              a (Angstroms).
196    16 - 24        Real(9.3)     b              b (Angstroms).
197    25 - 33        Real(9.3)     c              c (Angstroms).
198    34 - 40        Real(7.2)     alpha          alpha (degrees).
199    41 - 47        Real(7.2)     beta           beta (degrees).
200    48 - 54        Real(7.2)     gamma          gamma (degrees).
201
202    1 -  6         Record name   "ATOM  "
203    7 - 11         Integer       serial       Atom  serial number.
204    13 - 16        Atom          name         Atom name.
205    17             Character     altLoc       Alternate location indicator.
206    18 - 21        Residue name  resName      Residue name.
207    22             Character     chainID      Chain identifier.
208    23 - 26        Integer       resSeq       Residue sequence number.
209    27             AChar         iCode        Code for insertion of residues.
210    31 - 38        Real(8.3)     x            Orthogonal coordinates for X in Angstroms.
211    39 - 46        Real(8.3)     y            Orthogonal coordinates for Y in Angstroms.
212    47 - 54        Real(8.3)     z            Orthogonal coordinates for Z in Angstroms.
213    55 - 60        Real(6.2)     occupancy    Occupancy.
214    61 - 66        Real(6.2)     tempFactor   Temperature  factor.
215    67 - 76        String        segID        (unofficial CHARMM extension ?)
216    77 - 78        LString(2)    element      Element symbol, right-justified.
217    79 - 80        LString(2)    charge       Charge  on the atom.
218    =============  ============  ===========  =============================================
219
220
221    See Also
222    --------
223    :class:`PDBWriter`
224    :class:`PDBReader`
225       implements a larger subset of the header records,
226       which are accessible as :attr:`PDBReader.metadata`.
227
228
229    .. versionchanged:: 0.11.0
230       * Frames now 0-based instead of 1-based
231       * New :attr:`title` (list with all TITLE lines).
232    .. versionchanged:: 0.19.1
233       Can now read PDB files with DOS line endings
234    """
235    format = ['PDB', 'ENT']
236    units = {'time': None, 'length': 'Angstrom'}
237
238    def __init__(self, filename, **kwargs):
239        """Read coordinates from *filename*.
240
241        *filename* can be a gzipped or bzip2ed compressed PDB file.
242
243        If the pdb file contains multiple MODEL records then it is
244        read as a trajectory where the MODEL numbers correspond to
245        frame numbers.
246        """
247        super(PDBReader, self).__init__(filename, **kwargs)
248
249        try:
250            self.n_atoms = kwargs['n_atoms']
251        except KeyError:
252            # hackish, but should work and keeps things DRY
253            # regular MDA usage via Universe doesn't follow this route
254            from MDAnalysis.topology import PDBParser
255
256            with PDBParser.PDBParser(self.filename) as p:
257                top = p.parse()
258            self.n_atoms = top.n_atoms
259
260        self.model_offset = kwargs.pop("model_offset", 0)
261
262        self.header = header = ""
263        self.title = title = []
264        self.compound = compound = []
265        self.remarks = remarks = []
266
267        self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs)
268
269        # Record positions in file of CRYST and MODEL headers
270        # then build frame offsets to start at the minimum of these
271        # This allows CRYST to come either before or after MODEL
272        # This assumes that **either**
273        # - pdbfile has a single CRYST (NVT)
274        # - pdbfile has a CRYST for every MODEL (NPT)
275        models = []
276        crysts = []
277
278        # hack for streamIO
279        if isinstance(filename, util.NamedStream) and isinstance(filename.stream, StringIO):
280            filename.stream = BytesIO(filename.stream.getvalue().encode())
281
282        pdbfile = self._pdbfile = util.anyopen(filename, 'rb')
283
284        line = "magical"
285        while line:
286            # need to use readline so tell gives end of line
287            # (rather than end of current chunk)
288            line = pdbfile.readline()
289
290            if line[:5] == b'MODEL':
291                models.append(pdbfile.tell())
292            elif line[:5] == b'CRYST':
293                # remove size of line to get **start** of CRYST line
294                crysts.append(pdbfile.tell() - len(line))
295            elif line[:6] == b'HEADER':
296                # classification = line[10:50]
297                # date = line[50:59]
298                # idCode = line[62:66]
299                header = line[10:66].decode()
300            elif line[:5] == b'TITLE':
301                title.append(line[8:80].strip().decode())
302            elif line[:6] == b'COMPND':
303                compound.append(line[7:80].strip().decode())
304            elif line[:6] == b'REMARK':
305                remarks.append(line[6:].strip().decode())
306
307        end = pdbfile.tell()  # where the file ends
308
309        if not models:
310            # No model entries
311            # so read from start of file to read first frame
312            models.append(0)
313        if len(crysts) == len(models):
314            offsets = [min(a, b) for a, b in zip(models, crysts)]
315        else:
316            offsets = models
317        # Position of the start of each frame
318        self._start_offsets = offsets
319        # Position of the end of each frame
320        self._stop_offsets = offsets[1:] + [end]
321        self.n_frames = len(offsets)
322
323        self._read_frame(0)
324
325    def Writer(self, filename, **kwargs):
326        """Returns a PDBWriter for *filename*.
327
328        Parameters
329        ----------
330        filename : str
331            filename of the output PDB file
332
333        Returns
334        -------
335        :class:`PDBWriter`
336
337        """
338        kwargs.setdefault('multiframe', self.n_frames > 1)
339        return PDBWriter(filename, **kwargs)
340
341    def _reopen(self):
342        # Pretend the current TS is -1 (in 0 based) so "next" is the
343        # 0th frame
344        self.close()
345        self._pdbfile = util.anyopen(self.filename, 'rb')
346        self.ts.frame = -1
347
348    def _read_next_timestep(self, ts=None):
349        if ts is None:
350            ts = self.ts
351        else:
352            # TODO: cleanup _read_frame() to use a "free" Timestep
353            raise NotImplementedError("PDBReader cannot assign to a timestep")
354        # frame is 1-based. Normally would add 1 to frame before calling
355        # self._read_frame to retrieve the subsequent ts. But self._read_frame
356        # assumes it is being passed a 0-based frame, and adjusts.
357        frame = self.frame + 1
358        return self._read_frame(frame)
359
360    def _read_frame(self, frame):
361        try:
362            start = self._start_offsets[frame]
363            stop = self._stop_offsets[frame]
364        except IndexError:  # out of range of known frames
365            raise IOError
366
367        pos = 0
368        occupancy = np.ones(self.n_atoms)
369
370        # Seek to start and read until start of next frame
371        self._pdbfile.seek(start)
372        chunk = self._pdbfile.read(stop - start).decode()
373
374        tmp_buf = []
375        for line in chunk.splitlines():
376            if line[:6] in ('ATOM  ', 'HETATM'):
377                # we only care about coordinates
378                tmp_buf.append([line[30:38], line[38:46], line[46:54]])
379                # TODO import bfactors - might these change?
380                try:
381                    # does an implicit str -> float conversion
382                    occupancy[pos] = line[54:60]
383                except ValueError:
384                    # Be tolerant for ill-formated or empty occupancies
385                    pass
386                pos += 1
387            elif line[:6] == 'CRYST1':
388                # does an implicit str -> float conversion
389                self.ts._unitcell[:] = [line[6:15], line[15:24],
390                                        line[24:33], line[33:40],
391                                        line[40:47], line[47:54]]
392
393        # doing the conversion from list to array at the end is faster
394        self.ts.positions = tmp_buf
395
396        # check if atom number changed
397        if pos != self.n_atoms:
398            raise ValueError("Read an incorrect number of atoms\n"
399                             "Expected {expected} got {actual}"
400                             "".format(expected=self.n_atoms, actual=pos+1))
401
402        if self.convert_units:
403            # both happen inplace
404            self.convert_pos_from_native(self.ts._pos)
405            self.convert_pos_from_native(self.ts._unitcell[:3])
406        self.ts.frame = frame
407        self.ts.data['occupancy'] = occupancy
408        return self.ts
409
410    def close(self):
411        self._pdbfile.close()
412
413
414class PDBWriter(base.WriterBase):
415    """PDB writer that implements a subset of the `PDB 3.2 standard`_ .
416
417    PDB format as used by NAMD/CHARMM: 4-letter resnames and segID are allowed,
418    altLoc is written.
419
420    The :class:`PDBWriter` can be used to either dump a coordinate
421    set to a PDB file (operating as a "single frame writer", selected with the
422    constructor keyword *multiframe* = ``False``, the default) or by writing a
423    PDB "movie" (multi frame mode, *multiframe* = ``True``), consisting of
424    multiple models (using the MODEL_ and ENDMDL_ records).
425
426    .. _`PDB 3.2 standard`:
427       http://www.wwpdb.org/documentation/file-format-content/format32/v3.2.html
428    .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL
429    .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL
430    .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT
431
432
433    Note
434    ----
435    Writing bonds currently only works when writing a whole :class:`Universe`
436    and if bond information is available in the topology.  (For selections
437    smaller than the whole :class:`Universe`, the atom numbering in the CONECT_
438    records would not match the numbering of the atoms in the new PDB file and
439    therefore a :exc:`NotImplementedError` is raised.)
440
441    The maximum frame number that can be stored in a PDB file is 9999 and it
442    will wrap around (see :meth:`MODEL` for further details).
443
444
445    See Also
446    --------
447    This class is identical to :class:`MultiPDBWriter` with the one
448    exception that it defaults to writing single-frame PDB files as if
449    `multiframe` = ``False`` was selected.
450
451
452    .. versionchanged:: 0.7.5
453       Initial support for multi-frame PDB files.
454
455    .. versionchanged:: 0.7.6
456       The *multiframe* keyword was added to select the writing mode. The
457       writing of bond information (CONECT_ records) is now disabled by default
458       but can be enabled with the *bonds* keyword.
459
460    .. versionchanged:: 0.11.0
461       Frames now 0-based instead of 1-based
462
463    .. versionchanged:: 0.14.0
464       PDB doesn't save charge information
465
466    """
467    fmt = {
468        'ATOM': (
469            "ATOM  {serial:5d} {name:<4s}{altLoc:<1s}{resName:<4s}"
470            "{chainID:1s}{resSeq:4d}{iCode:1s}"
471            "   {pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}{occupancy:6.2f}"
472            "{tempFactor:6.2f}      {segID:<4s}{element:>2s}\n"),
473        'REMARK': "REMARK     {0}\n",
474        'COMPND': "COMPND    {0}\n",
475        'HEADER': "HEADER    {0}\n",
476        'TITLE': "TITLE     {0}\n",
477        'MODEL': "MODEL     {0:>4d}\n",
478        'NUMMDL': "NUMMDL    {0:5d}\n",
479        'ENDMDL': "ENDMDL\n",
480        'END': "END\n",
481        'CRYST1': ("CRYST1{box[0]:9.3f}{box[1]:9.3f}{box[2]:9.3f}"
482                   "{ang[0]:7.2f}{ang[1]:7.2f}{ang[2]:7.2f} "
483                   "{spacegroup:<11s}{zvalue:4d}\n"),
484        'CONECT': "CONECT{0}\n"
485    }
486    format = ['PDB', 'ENT']
487    units = {'time': None, 'length': 'Angstrom'}
488    pdb_coor_limits = {"min": -999.9995, "max": 9999.9995}
489    #: wrap comments into REMARK records that are not longer than
490    # :attr:`remark_max_length` characters.
491    remark_max_length = 66
492    multiframe = False
493
494    # These attributes are used to deduce how to format the atom name.
495    ions = ('FE', 'AS', 'ZN', 'MG', 'MN', 'CO', 'BR',
496            'CU', 'TA', 'MO', 'AL', 'BE', 'SE', 'PT',
497            'EU', 'NI', 'IR', 'RH', 'AU', 'GD', 'RU')
498    # Mercurial can be confused for hydrogen gamma. Yet, mercurial is
499    # rather rare in the PDB. Here are all the residues that contain
500    # mercurial.
501    special_hg = ('CMH', 'EMC', 'MBO', 'MMC', 'HGB', 'BE7', 'PMB')
502    # Chloride can be confused for a carbon. Here are the residues that
503    # contain chloride.
504    special_cl = ('0QE', 'CPT', 'DCE', 'EAA', 'IMN', 'OCZ', 'OMY', 'OMZ',
505                  'UN9', '1N1', '2T8', '393', '3MY', 'BMU', 'CLM', 'CP6',
506                  'DB8', 'DIF', 'EFZ', 'LUR', 'RDC', 'UCL', 'XMM', 'HLT',
507                  'IRE', 'LCP', 'PCI', 'VGH')
508    # In these pairs, the atom name is aligned on the first column
509    # (column 13).
510    include_pairs = (Pair('OEC', 'CA1'),
511                     Pair('PLL', 'PD'),
512                     Pair('OEX', 'CA1'))
513    # In these pairs, the atom name is aligned on the second column
514    # (column 14), but other rules would align them on the first column.
515    exclude_pairs = (Pair('C14', 'C14'), Pair('C15', 'C15'),
516                     Pair('F9F', 'F9F'), Pair('OAN', 'OAN'),
517                     Pair('BLM', 'NI'), Pair('BZG', 'CO'),
518                     Pair('BZG', 'NI'), Pair('VNL', 'CO1'),
519                     Pair('VNL', 'CO2'), Pair('PF5', 'FE1'),
520                     Pair('PF5', 'FE2'), Pair('UNL', 'UNL'))
521
522    def __init__(self, filename, bonds="conect", n_atoms=None, start=0, step=1,
523                 remarks="Created by PDBWriter",
524                 convert_units=None, multiframe=None):
525        """Create a new PDBWriter
526
527        Parameters
528        ----------
529        filename: str
530           name of output file
531        start: int (optional)
532           starting timestep (the first frame will have MODEL number `start` + 1
533           because the PDB standard prescribes MODEL numbers starting at 1)
534        step: int (optional)
535           skip between subsequent timesteps
536        remarks: str (optional)
537           comments to annotate pdb file (added to the *TITLE* record); note that
538           any remarks from the trajectory that serves as input are
539           written to REMARK records with lines longer than :attr:`remark_max_length` (66
540           characters) being wrapped.
541        convert_units: str (optional)
542           units are converted to the MDAnalysis base format; ``None`` selects
543           the value of :data:`MDAnalysis.core.flags` ['convert_lengths']
544        bonds : {"conect", "all", None} (optional)
545           If set to "conect", then only write those bonds that were already
546           defined in an input PDB file as PDB CONECT_ record. If set to "all",
547           write all bonds (including guessed ones) to the file. ``None`` does
548           not write any bonds. The default is "conect".
549        multiframe: bool (optional)
550           ``False``: write a single frame to the file; ``True``: create a
551           multi frame PDB file in which frames are written as MODEL_ ... ENDMDL_
552           records. If ``None``, then the class default is chosen.    [``None``]
553
554
555        .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT
556        .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL
557        .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL
558
559        """
560        # n_atoms = None : dummy keyword argument
561        # (not used, but Writer() always provides n_atoms as the second argument)
562
563        # TODO: - remarks should be a list of lines and written to REMARK
564        #       - additional title keyword could contain line for TITLE
565
566        self.filename = filename
567        if convert_units is None:
568            convert_units = flags['convert_lengths']
569        # convert length and time to base units
570        self.convert_units = convert_units
571        self._multiframe = self.multiframe if multiframe is None else multiframe
572        self.bonds = bonds
573
574        if start < 0:
575            raise ValueError("'Start' must be a positive value")
576
577        self.start =  self.frames_written = start
578        self.step = step
579        self.remarks = remarks
580
581        self.pdbfile = util.anyopen(self.filename, 'wt')  # open file on init
582        self.has_END = False
583        self.first_frame_done = False
584
585    def close(self):
586        """Close PDB file and write END record"""
587        if hasattr(self, 'pdbfile') and self.pdbfile is not None:
588            if not self.has_END:
589                self.END()
590            else:
591                logger.warning("END record has already been written"
592                               " before the final closing of the file")
593            self.pdbfile.close()
594        self.pdbfile = None
595
596    def _write_pdb_title(self):
597        if self._multiframe:
598            self.TITLE("MDANALYSIS FRAMES FROM {0:d}, STEP {1:d}: {2!s}"
599                       "".format(self.start, self.step, self.remarks))
600        else:
601            self.TITLE("MDANALYSIS FRAME {0:d}: {1!s}"
602                       "".format(self.start, self.remarks))
603
604    def _write_pdb_header(self):
605        if self.first_frame_done == True:
606            return
607
608        self.first_frame_done = True
609        u = self.obj.universe
610        self.HEADER(u.trajectory)
611
612        self._write_pdb_title()
613
614        self.COMPND(u.trajectory)
615        try:
616            # currently inconsistent: DCDReader gives a string,
617            # PDB*Reader a list, so always get a list
618            # split long single lines into chunks of 66 chars
619            remarks = []
620            for line in util.asiterable(u.trajectory.remarks):
621                remarks.extend(textwrap.wrap(line, self.remark_max_length))
622            self.REMARK(*remarks)
623        except AttributeError:
624            pass
625        self.CRYST1(self.convert_dimensions_to_unitcell(u.trajectory.ts))
626
627    def _check_pdb_coordinates(self):
628        """Check if the coordinate values fall within the range allowed for PDB files.
629
630        Deletes the output file if this is the first frame or if frames have
631        already been written (in multi-frame mode) adds a REMARK instead of the
632        coordinates and closes the file.
633
634        Raises :exc:`ValueError` if the coordinates fail the check.
635        """
636        atoms = self.obj.atoms  # make sure to use atoms (Issue 46)
637        # can write from selection == Universe (Issue 49)
638        coor = atoms.positions
639
640        # check if any coordinates are illegal (coordinates are already in
641        # Angstroem per package default)
642        if self.has_valid_coordinates(self.pdb_coor_limits, coor):
643            return True
644        # note the precarious close() here: we know that the file is open and
645        # we now prepare to remove what we have already written (header and
646        # such) or add a REMARK (which allows the user to look at the
647        # previously written frames)
648        if self.frames_written > 1:
649            self.REMARK("Incomplete multi-frame trajectory.",
650                        "Coordinates for the current frame cannot be "
651                        "represented in the PDB format.")
652            self.close()
653        else:
654            self.close()
655            try:
656                os.remove(self.filename)
657            except OSError as err:
658                if err.errno == errno.ENOENT:
659                    pass
660        raise ValueError("PDB files must have coordinate values between "
661                         "{0:.3f} and {1:.3f} Angstroem: file writing was "
662                         "aborted.".format(self.pdb_coor_limits["min"],
663                                           self.pdb_coor_limits["max"]))
664
665    def _write_pdb_bonds(self):
666        """Writes out all the bond records"""
667        if self.bonds is None:
668            return
669
670        if not self.obj or not hasattr(self.obj.universe, 'bonds'):
671            return
672
673        bondset = set(itertools.chain(*(a.bonds for a in self.obj.atoms)))
674
675        mapping = {index: i for i, index in enumerate(self.obj.atoms.indices)}
676
677        # Write out only the bonds that were defined in CONECT records
678        if self.bonds == "conect":
679            bonds = ((bond[0].index, bond[1].index) for bond in bondset if not bond.is_guessed)
680        elif self.bonds == "all":
681            bonds = ((bond[0].index, bond[1].index) for bond in bondset)
682        else:
683            raise ValueError("bonds has to be either None, 'conect' or 'all'")
684
685        con = collections.defaultdict(list)
686        for a1, a2 in bonds:
687            if not (a1 in mapping and a2 in mapping):
688                continue
689            con[a2].append(a1)
690            con[a1].append(a2)
691
692        atoms = np.sort(self.obj.atoms.indices)
693
694        conect = ([a] + sorted(con[a])
695                  for a in atoms if a in con)
696        connect = ([mapping[x] for x in row] for row in conect)
697
698        for c in conect:
699            self.CONECT(c)
700
701    def _update_frame(self, obj):
702        """Method to initialize important attributes in writer from a AtomGroup or Universe *obj*.
703
704        Attributes initialized/updated:
705
706        * :attr:`PDBWriter.obj` (the entity that provides topology information *and*
707          coordinates, either a :class:`~MDAnalysis.core.groups.AtomGroup` or a whole
708          :class:`~MDAnalysis.core.universe.Universe`)
709        * :attr:`PDBWriter.trajectory` (the underlying trajectory
710          :class:`~MDAnalysis.coordinates.base.Reader`)
711        * :attr:`PDBWriter.timestep` (the underlying trajectory
712          :class:`~MDAnalysis.coordinates.base.Timestep`)
713
714        Before calling :meth:`write_next_timestep` this method **must** be
715        called at least once to enable extracting topology information from the
716        current frame.
717        """
718        if isinstance(obj, base.Timestep):
719            raise TypeError("PDBWriter cannot write Timestep objects "
720                            "directly, since they lack topology information ("
721                            "atom names and types) required in PDB files")
722        if len(obj.atoms) == 0:
723            raise IndexError("Cannot write empty AtomGroup")
724
725        # remember obj for some of other methods --- NOTE: this is an evil/lazy
726        # hack...
727        self.obj = obj
728        self.ts = obj.universe.trajectory.ts
729
730    def write(self, obj):
731        """Write object *obj* at current trajectory frame to file.
732
733        *obj* can be a selection (i.e. a
734        :class:`~MDAnalysis.core.groups.AtomGroup`) or a whole
735        :class:`~MDAnalysis.core.universe.Universe`.
736
737        The last letter of the :attr:`~MDAnalysis.core.groups.Atom.segid` is
738        used as the PDB chainID (but see :meth:`~PDBWriter.ATOM` for
739        details).
740
741        Parameters
742        ----------
743        obj
744            The :class:`~MDAnalysis.core.groups.AtomGroup` or
745            :class:`~MDAnalysis.core.universe.Universe` to write.
746        """
747
748        self._update_frame(obj)
749        self._write_pdb_header()
750        # Issue 105: with write() ONLY write a single frame; use
751        # write_all_timesteps() to dump everything in one go, or do the
752        # traditional loop over frames
753        self.write_next_timestep(self.ts, multiframe=self._multiframe)
754        self._write_pdb_bonds()
755        # END record is written when file is being close()d
756
757    def write_all_timesteps(self, obj):
758        """Write all timesteps associated with *obj* to the PDB file.
759
760        *obj* can be a :class:`~MDAnalysis.core.groups.AtomGroup`
761        or a :class:`~MDAnalysis.core.universe.Universe`.
762
763        The method writes the frames from the one specified as *start* until
764        the end, using a step of *step* (*start* and *step* are set in the
765        constructor). Thus, if *u* is a Universe then ::
766
767           u.trajectory[-2]
768           pdb = PDBWriter("out.pdb", u.atoms.n_atoms)
769           pdb.write_all_timesteps(u)
770
771        will write a PDB trajectory containing the last 2 frames and ::
772
773           pdb = PDBWriter("out.pdb", u.atoms.n_atoms, start=12, step=2)
774           pdb.write_all_timesteps(u)
775
776        will be writing frames 12, 14, 16, ...
777
778        .. versionchanged:: 0.11.0
779           Frames now 0-based instead of 1-based
780        """
781
782        self._update_frame(obj)
783        self._write_pdb_header()
784
785        start, step = self.start, self.step
786        traj = obj.universe.trajectory
787
788        # Start from trajectory[0]/frame 0, if there are more than 1 frame.
789        # If there is only 1 frame, the traj.frames is not like a python list:
790        # accessing trajectory[-1] raises key error.
791        if not start and traj.n_frames > 1:
792            start = traj.frame
793
794        for framenumber in range(start, len(traj), step):
795            traj[framenumber]
796            self.write_next_timestep(self.ts, multiframe=True)
797
798        self._write_pdb_bonds()
799        self.close()
800
801        # Set the trajectory to the starting position
802        traj[start]
803
804    def write_next_timestep(self, ts=None, **kwargs):
805        '''write a new timestep to the PDB file
806
807        :Keywords:
808          *ts*
809             :class:`base.Timestep` object containing coordinates to be written to trajectory file;
810             if ``None`` then :attr:`PDBWriter.ts`` is tried.
811          *multiframe*
812             ``False``: write a single frame (default); ``True`` behave as a trajectory writer
813
814        .. Note::
815
816           Before using this method with another :class:`base.Timestep` in the *ts*
817           argument, :meth:`PDBWriter._update_frame` *must* be called
818           with the :class:`~MDAnalysis.core.groups.AtomGroup.Universe` as
819           its argument so that topology information can be gathered.
820        '''
821        if ts is None:
822            try:
823                ts = self.ts
824            except AttributeError:
825                raise NoDataError("PBDWriter: no coordinate data to write to "
826                                  "trajectory file")
827        self._check_pdb_coordinates()
828        self._write_timestep(ts, **kwargs)
829
830    def _deduce_PDB_atom_name(self, atomname, resname):
831        """Deduce how the atom name should be aligned.
832
833        Atom name format can be deduced from the atom type, yet atom type is
834        not always available. This function uses the atom name and residue name
835        to deduce how the atom name should be formatted. The rules in use got
836        inferred from an analysis of the PDB. See gist at
837        <https://gist.github.com/jbarnoud/37a524330f29b5b7b096> for more
838        details.
839        """
840        if atomname == '':
841            return ''
842        if len(atomname) >= 4:
843            return atomname[:4]
844        elif len(atomname) == 1:
845            return ' {}  '.format(atomname)
846        elif ((resname == atomname
847               or atomname[:2] in self.ions
848               or atomname == 'UNK'
849               or (resname in self.special_hg and atomname[:2] == 'HG')
850               or (resname in self.special_cl and atomname[:2] == 'CL')
851               or Pair(resname, atomname) in self.include_pairs)
852              and Pair(resname, atomname) not in self.exclude_pairs):
853            return '{:<4}'.format(atomname)
854        return ' {:<3}'.format(atomname)
855
856    def _write_timestep(self, ts, multiframe=False):
857        """Write a new timestep *ts* to file
858
859        Does unit conversion if necessary.
860
861        By setting *multiframe* = ``True``, MODEL_ ... ENDMDL_ records are
862        written to represent trajectory frames in a multi-model PDB file. (At
863        the moment we do *not* write the NUMMDL_ record.)
864
865        The *multiframe* = ``False`` keyword signals that the
866        :class:`PDBWriter` is in single frame mode and no MODEL_
867        records are written.
868
869        .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL
870        .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL
871        .. _NUMMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#NUMMDL
872
873        .. versionchanged:: 0.7.6
874           The *multiframe* keyword was added, which completely determines if
875           MODEL records are written. (Previously, this was decided based on the
876           underlying trajectory and only if ``len(traj) > 1`` would MODEL records
877           have been written.)
878
879        """
880        atoms = self.obj.atoms
881        pos = atoms.positions
882        if self.convert_units:
883            pos = self.convert_pos_to_native(pos, inplace=False)
884
885        if multiframe:
886            self.MODEL(self.frames_written + 1)
887
888        # Make zero assumptions on what information the AtomGroup has!
889        # theoretically we could get passed only indices!
890        def get_attr(attrname, default):
891            """Try and pull info off atoms, else fake it
892
893            attrname - the field to pull of AtomGroup (plural!)
894            default - default value in case attrname not found
895            """
896            try:
897                return getattr(atoms, attrname)
898            except AttributeError:
899                if self.frames_written == 0:
900                    warnings.warn("Found no information for attr: '{}'"
901                                  " Using default value of '{}'"
902                                  "".format(attrname, default))
903                return np.array([default] * len(atoms))
904        altlocs = get_attr('altLocs', ' ')
905        resnames = get_attr('resnames', 'UNK')
906        icodes = get_attr('icodes', ' ')
907        segids = get_attr('segids', ' ')
908        resids = get_attr('resids', 1)
909        occupancies = get_attr('occupancies', 1.0)
910        tempfactors = get_attr('tempfactors', 0.0)
911        atomnames = get_attr('names', 'X')
912
913        for i, atom in enumerate(atoms):
914            vals = {}
915            vals['serial'] = util.ltruncate_int(i + 1, 5)  # check for overflow here?
916            vals['name'] = self._deduce_PDB_atom_name(atomnames[i], resnames[i])
917            vals['altLoc'] = altlocs[i][:1]
918            vals['resName'] = resnames[i][:4]
919            vals['chainID'] = segids[i][:1]
920            vals['resSeq'] = util.ltruncate_int(resids[i], 4)
921            vals['iCode'] = icodes[i][:1]
922            vals['pos'] = pos[i]  # don't take off atom so conversion works
923            vals['occupancy'] = occupancies[i]
924            vals['tempFactor'] = tempfactors[i]
925            vals['segID'] = segids[i][:4]
926            vals['element'] = guess_atom_element(atomnames[i].strip())[:2]
927
928            # .. _ATOM: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ATOM
929            self.pdbfile.write(self.fmt['ATOM'].format(**vals))
930        if multiframe:
931            self.ENDMDL()
932        self.frames_written += 1
933
934    def HEADER(self, trajectory):
935        """Write HEADER_ record.
936
937        .. _HEADER: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html#HEADER
938
939        """
940        if not hasattr(trajectory, 'header'):
941            return
942        header = trajectory.header
943        self.pdbfile.write(self.fmt['HEADER'].format(header))
944
945    def TITLE(self, *title):
946        """Write TITLE_ record.
947
948        .. _TITLE: http://www.wwpdb.org/documentation/file-format-content/format32/sect2.html
949
950        """
951        line = " ".join(title)  # TODO: should do continuation automatically
952        self.pdbfile.write(self.fmt['TITLE'].format(line))
953
954    def REMARK(self, *remarks):
955        """Write generic REMARK_ record (without number).
956
957        Each string provided in *remarks* is written as a separate REMARK_
958        record.
959
960        See also `REMARK (update)`_.
961
962        .. _REMARK: http://www.wwpdb.org/documentation/file-format-content/format32/remarks1.html
963        .. _REMARK (update): http://www.wwpdb.org/documentation/file-format-content/format32/remarks2.html
964
965        """
966        for remark in remarks:
967            self.pdbfile.write(self.fmt['REMARK'].format(remark))
968
969    def COMPND(self, trajectory):
970        if not hasattr(trajectory, 'compound'):
971            return
972        compound = trajectory.compound
973        for c in compound:
974            self.pdbfile.write(self.fmt['COMPND'].format(c))
975
976    def CRYST1(self, dimensions, spacegroup='P 1', zvalue=1):
977        """Write CRYST1_ record.
978
979        .. _CRYST1: http://www.wwpdb.org/documentation/file-format-content/format32/sect8.html#CRYST1
980
981        """
982        self.pdbfile.write(self.fmt['CRYST1'].format(
983            box=dimensions[:3],
984            ang=dimensions[3:],
985            spacegroup=spacegroup,
986            zvalue=zvalue))
987
988    def MODEL(self, modelnumber):
989        """Write the MODEL_ record.
990
991        .. note::
992
993           The maximum MODEL number is limited to 9999 in the PDB
994           standard (i.e., 4 digits). If frame numbers are larger than
995           9999, they will wrap around, i.e., 9998, 9999, 0, 1, 2, ...
996
997
998        .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL
999
1000
1001        .. versionchanged:: 0.19.0
1002           Maximum model number is enforced.
1003
1004        """
1005        self.pdbfile.write(self.fmt['MODEL'].format(int(str(modelnumber)[-4:])))
1006
1007    def END(self):
1008        """Write END_ record.
1009
1010        Only a single END record is written. Calling END multiple times has no
1011        effect. Because :meth:`~PDBWriter.close` also calls this
1012        method right before closing the file it is recommended to *not* call
1013        :meth:`~PDBWriter.END` explicitly.
1014
1015        .. _END: http://www.wwpdb.org/documentation/file-format-content/format32/sect11.html#END
1016
1017        """
1018        if not self.has_END:
1019            # only write a single END record
1020            self.pdbfile.write(self.fmt['END'])
1021        self.has_END = True
1022
1023    def ENDMDL(self):
1024        """Write the ENDMDL_ record.
1025
1026        .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL
1027
1028        """
1029        self.pdbfile.write(self.fmt['ENDMDL'])
1030
1031    def CONECT(self, conect):
1032        """Write CONECT_ record.
1033
1034        .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT
1035
1036        """
1037        conect = ["{0:5d}".format(entry + 1) for entry in conect]
1038        conect = "".join(conect)
1039        self.pdbfile.write(self.fmt['CONECT'].format(conect))
1040
1041
1042class ExtendedPDBReader(PDBReader):
1043    """PDBReader that reads a PDB-formatted file with five-digit residue numbers.
1044
1045    This reader does not conform to the `PDB 3.2 standard`_ because it allows
1046    five-digit residue numbers that may take up columns 23 to 27 (inclusive)
1047    instead of being confined to 23-26 (with column 27 being reserved for the
1048    insertion code in the PDB standard). PDB files in this format are written
1049    by popular programs such as VMD_.
1050
1051    See Also
1052    --------
1053    :class:`PDBReader`
1054
1055
1056    .. _VMD: http://www.ks.uiuc.edu/Research/vmd/
1057
1058    .. versionadded:: 0.8
1059    """
1060    format = "XPDB"
1061
1062
1063class MultiPDBWriter(PDBWriter):
1064    """PDB writer that implements a subset of the `PDB 3.2 standard`_ .
1065
1066    PDB format as used by NAMD/CHARMM: 4-letter resnames and segID, altLoc
1067    is written.
1068
1069    By default, :class:`MultiPDBWriter` writes a PDB "movie" (multi frame mode,
1070    *multiframe* = ``True``), consisting of multiple models (using the MODEL_
1071    and ENDMDL_ records).
1072
1073
1074    .. _MODEL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#MODEL
1075    .. _ENDMDL: http://www.wwpdb.org/documentation/file-format-content/format32/sect9.html#ENDMDL
1076    .. _CONECT: http://www.wwpdb.org/documentation/file-format-content/format32/sect10.html#CONECT
1077
1078
1079    See Also
1080    --------
1081    This class is identical to :class:`PDBWriter` with the one
1082    exception that it defaults to writing multi-frame PDB files instead of
1083    single frames.
1084
1085
1086    .. versionadded:: 0.7.6
1087
1088    """
1089    format = ['PDB', 'ENT']
1090    multiframe = True  # For Writer registration
1091    singleframe = False
1092