1# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
2# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
3#
4# MDAnalysis --- https://www.mdanalysis.org
5# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
6# (see the file AUTHORS for the full list of names)
7#
8# Released under the GNU Public Licence, v2 or any higher version
9#
10# Please cite your use of MDAnalysis in published work:
11#
12# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
13# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
14# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
15# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
16# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
17#
18# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
19# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
20# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
21#
22
23"""
24GRO file format --- :mod:`MDAnalysis.coordinates.GRO`
25======================================================
26
27Classes to read and write Gromacs_ GRO_ coordinate files; see the notes on the
28`GRO format`_ which includes a conversion routine for the box.
29
30
31Writing GRO files
32-----------------
33
34By default any written GRO files will renumber the atom ids to move sequentially
35from 1.  This can be disabled, and instead the original atom ids kept, by
36using the `reindex=False` keyword argument.  This is useful when writing a
37subsection of a larger Universe while wanting to preserve the original
38identities of atoms.
39
40For example::
41
42   >>> u = mda.Universe()`
43
44   >>> u.atoms.write('out.gro', reindex=False)
45
46   # OR
47   >>> with mda.Writer('out.gro', reindex=False) as w:
48   ...     w.write(u.atoms)
49
50
51Classes
52-------
53
54.. autoclass:: Timestep
55   :members:
56
57.. autoclass:: GROReader
58   :members:
59
60.. autoclass:: GROWriter
61   :members:
62
63
64Developer notes: ``GROWriter`` format strings
65---------------------------------------------
66
67The :class:`GROWriter` class has a :attr:`GROWriter.fmt` attribute, which is a dictionary of different
68strings for writing lines in ``.gro`` files.  These are as follows:
69
70``n_atoms``
71  For the first line of the gro file, supply the number of atoms in the system.
72  E.g.::
73
74      fmt['n_atoms'].format(42)
75
76``xyz``
77  An atom line without velocities.  Requires that the 'resid', 'resname',
78  'name', 'index' and 'pos' keys be supplied.
79  E.g.::
80
81     fmt['xyz'].format(resid=1, resname='SOL', name='OW2', index=2, pos=(0.0, 1.0, 2.0))
82
83``xyz_v``
84  As above, but with velocities.  Needs an additional keyword 'vel'.
85
86``box_orthorhombic``
87  The final line of the gro file which gives box dimensions.  Requires
88  the box keyword to be given, which should be the three cartesian dimensions.
89  E.g.::
90
91     fmt['box_orthorhombic'].format(box=(10.0, 10.0, 10.0))
92
93``box_triclinic``
94  As above, but for a non orthorhombic box. Requires the box keyword, but this
95  time as a length 9 vector.  This is a flattened version of the (3,3) triclinic
96  vector representation of the unit cell.  The rearrangement into the odd
97  gromacs order is done automatically.
98
99
100.. _Gromacs: http://www.gromacs.org
101.. _GRO: http://manual.gromacs.org/current/online/gro.html
102.. _GRO format: http://chembytes.wikidot.com/g-grofile
103
104"""
105from __future__ import absolute_import
106
107from six.moves import range, zip
108import itertools
109import warnings
110
111import numpy as np
112
113from . import base
114from .core import triclinic_box, triclinic_vectors
115from ..core import flags
116from ..exceptions import NoDataError
117from ..lib import util
118
119
120class Timestep(base.Timestep):
121    _ts_order_x = [0, 3, 4]
122    _ts_order_y = [5, 1, 6]
123    _ts_order_z = [7, 8, 2]
124
125    def _init_unitcell(self):
126        return np.zeros(9, dtype=np.float32)
127
128    @property
129    def dimensions(self):
130        """unitcell dimensions (A, B, C, alpha, beta, gamma)
131
132        GRO::
133
134          8.00170   8.00170   5.65806   0.00000   0.00000   0.00000   0.00000   4.00085   4.00085
135
136        PDB::
137
138          CRYST1   80.017   80.017   80.017  60.00  60.00  90.00 P 1           1
139
140        XTC: c.trajectory.ts._unitcell::
141
142          array([[ 80.00515747,   0.        ,   0.        ],
143                 [  0.        ,  80.00515747,   0.        ],
144                 [ 40.00257874,  40.00257874,  56.57218552]], dtype=float32)
145        """
146        # unit cell line (from http://manual.gromacs.org/current/online/gro.html)
147        # v1(x) v2(y) v3(z) v1(y) v1(z) v2(x) v2(z) v3(x) v3(y)
148        # 0     1     2      3     4     5     6    7     8
149        # This information now stored as _ts_order_x/y/z to keep DRY
150        x = self._unitcell[self._ts_order_x]
151        y = self._unitcell[self._ts_order_y]
152        z = self._unitcell[self._ts_order_z]  # this ordering is correct! (checked it, OB)
153        return triclinic_box(x, y, z)
154
155    @dimensions.setter
156    def dimensions(self, box):
157        x, y, z = triclinic_vectors(box)
158        np.put(self._unitcell, self._ts_order_x, x)
159        np.put(self._unitcell, self._ts_order_y, y)
160        np.put(self._unitcell, self._ts_order_z, z)
161
162
163class GROReader(base.SingleFrameReaderBase):
164    """Reader for the Gromacs GRO structure format.
165
166    .. versionchanged:: 0.11.0
167       Frames now 0-based instead of 1-based
168    """
169    format = 'GRO'
170    units = {'time': None, 'length': 'nm', 'velocity': 'nm/ps'}
171    _Timestep = Timestep
172
173    def _read_first_frame(self):
174        with util.openany(self.filename, 'rt') as grofile:
175            # Read first two lines to get number of atoms
176            grofile.readline()
177            self.n_atoms = n_atoms = int(grofile.readline())
178            # and the third line to get the spacing between coords (cs)
179            # (dependent upon the GRO file precision)
180            first_atomline = grofile.readline()
181            cs = first_atomline[25:].find('.') + 1
182
183            # Always try, and maybe add them later
184            velocities = np.zeros((n_atoms, 3), dtype=np.float32)
185
186            self.ts = ts = self._Timestep(n_atoms,
187                                          **self._ts_kwargs)
188
189            missed_vel = False
190
191            grofile.seek(0)
192            for pos, line in enumerate(grofile, start=-2):
193                # 2 header lines, 1 box line at end
194                if pos == n_atoms:
195                    unitcell = np.float32(line.split())
196                    continue
197                if pos < 0:
198                    continue
199
200                ts._pos[pos] = [line[20 + cs * i:20 + cs * (i + 1)] for i in range(3)]
201                try:
202                    velocities[pos] = [line[20 + cs * i:20 + cs * (i + 1)] for i in range(3, 6)]
203                except ValueError:
204                    # Remember that we got this error
205                    missed_vel = True
206
207        if np.any(velocities):
208            ts.velocities = velocities
209            if missed_vel:
210                warnings.warn("Not all velocities were present.  "
211                              "Unset velocities set to zero.")
212
213        self.ts.frame = 0  # 0-based frame number
214
215        if len(unitcell) == 3:
216            # special case: a b c --> (a 0 0) (b 0 0) (c 0 0)
217            # see Timestep.dimensions() above for format (!)
218            self.ts._unitcell[:3] = unitcell
219        elif len(unitcell) == 9:
220            self.ts._unitcell[:] = unitcell  # fill all
221        else:  # or maybe raise an error for wrong format??
222            warnings.warn("GRO unitcell has neither 3 nor 9 entries --- might be wrong.")
223            self.ts._unitcell[:len(unitcell)] = unitcell  # fill linearly ... not sure about this
224        if self.convert_units:
225            self.convert_pos_from_native(self.ts._pos)  # in-place !
226            self.convert_pos_from_native(self.ts._unitcell)  # in-place ! (all are lengths)
227            if self.ts.has_velocities:
228                # converts nm/ps to A/ps units
229                self.convert_velocities_from_native(self.ts._velocities)
230
231    def Writer(self, filename, n_atoms=None, **kwargs):
232        """Returns a CRDWriter for *filename*.
233
234        Parameters
235        ----------
236        filename: str
237            filename of the output GRO file
238
239        Returns
240        -------
241        :class:`GROWriter`
242
243        """
244        if n_atoms is None:
245            n_atoms = self.n_atoms
246        return GROWriter(filename, n_atoms=n_atoms, **kwargs)
247
248
249class GROWriter(base.WriterBase):
250    """GRO Writer that conforms to the Trajectory API.
251
252    Will attempt to write the following information from the topology:
253     - atom name (defaults to 'X')
254     - resnames (defaults to 'UNK')
255     - resids (defaults to '1')
256
257    Note
258    ----
259    The precision is hard coded to three decimal places
260
261
262    .. versionchanged:: 0.11.0
263       Frames now 0-based instead of 1-based
264    .. versionchanged:: 0.13.0
265       Now strictly writes positions with 3dp precision.
266       and velocities with 4dp.
267       Removed the `convert_dimensions_to_unitcell` method,
268       use `Timestep.triclinic_dimensions` instead.
269       Now now writes velocities where possible.
270    .. versionchanged:: 0.18.0
271       Added `reindex` keyword argument to allow original atom
272       ids to be kept.
273    """
274
275    format = 'GRO'
276    units = {'time': None, 'length': 'nm', 'velocity': 'nm/ps'}
277    gro_coor_limits = {'min': -999.9995, 'max': 9999.9995}
278
279    #: format strings for the GRO file (all include newline); precision
280    #: of 3 decimal places is hard-coded here.
281    fmt = {
282        'n_atoms': "{0:5d}\n",  # number of atoms
283        # coordinates output format, see http://chembytes.wikidot.com/g-grofile
284        'xyz': "{resid:>5d}{resname:<5.5s}{name:>5.5s}{index:>5d}{pos[0]:8.3f}{pos[1]:8.3f}{pos[2]:8.3f}\n",
285        # unitcell
286        'box_orthorhombic': "{box[0]:10.5f}{box[1]:10.5f}{box[2]:10.5f}\n",
287        'box_triclinic': "{box[0]:10.5f}{box[4]:10.5f}{box[8]:10.5f}{box[1]:10.5f}{box[2]:10.5f}{box[3]:10.5f}{box[5]:10.5f}{box[6]:10.5f}{box[7]:10.5f}\n"
288    }
289    fmt['xyz_v'] = fmt['xyz'][:-1] + "{vel[0]:8.4f}{vel[1]:8.4f}{vel[2]:8.4f}\n"
290
291    def __init__(self, filename, convert_units=None, n_atoms=None, **kwargs):
292        """Set up a GROWriter with a precision of 3 decimal places.
293
294        Parameters
295        -----------
296        filename : str
297            output filename
298
299        n_atoms : int (optional)
300            number of atoms
301
302        convert_units : str (optional)
303            units are converted to the MDAnalysis base format; ``None`` selects
304            the value of :data:`MDAnalysis.core.flags` ['convert_lengths']
305
306        reindex : bool (optional)
307            By default, all the atoms were reindexed to have a atom id starting
308            from 1. [``True``] However, this behaviour can be turned off by
309            specifying `reindex` ``=False``.
310
311        Note
312        ----
313        To use the reindex keyword, user can follow the two examples given
314        below.::
315
316           u = mda.Universe()
317
318        Usage 1::
319
320           u.atoms.write('out.gro', reindex=False)
321
322        Usage 2::
323
324           with mda.Writer('out.gro', reindex=False) as w:
325               w.write(u.atoms)
326
327        """
328        self.filename = util.filename(filename, ext='gro')
329        self.n_atoms = n_atoms
330        self.reindex = kwargs.pop('reindex', True)
331
332        if convert_units is None:
333            convert_units = flags['convert_lengths']
334        self.convert_units = convert_units  # convert length and time to base units
335
336    def write(self, obj):
337        """Write selection at current trajectory frame to file.
338
339        Parameters
340        -----------
341        obj : AtomGroup or Universe or :class:`Timestep`
342
343        Note
344        ----
345        The GRO format only allows 5 digits for *resid* and *atom
346        number*. If these numbers become larger than 99,999 then this
347        routine will chop off the leading digits.
348
349
350        .. versionchanged:: 0.7.6
351           *resName* and *atomName* are truncated to a maximum of 5 characters
352        .. versionchanged:: 0.16.0
353           `frame` kwarg has been removed
354        """
355        # write() method that complies with the Trajectory API
356
357        try:
358
359            # make sure to use atoms (Issue 46)
360            ag_or_ts = obj.atoms
361            # can write from selection == Universe (Issue 49)
362
363        except AttributeError:
364            if isinstance(obj, base.Timestep):
365                ag_or_ts = obj.copy()
366            else:
367                raise TypeError("No Timestep found in obj argument")
368
369        try:
370            velocities = ag_or_ts.velocities
371        except NoDataError:
372            has_velocities = False
373        else:
374            has_velocities = True
375
376        # Check for topology information
377        missing_topology = []
378        try:
379            names = ag_or_ts.names
380        except (AttributeError, NoDataError):
381            names = itertools.cycle(('X',))
382            missing_topology.append('names')
383        try:
384            resnames = ag_or_ts.resnames
385        except (AttributeError, NoDataError):
386            resnames = itertools.cycle(('UNK',))
387            missing_topology.append('resnames')
388        try:
389            resids = ag_or_ts.resids
390        except (AttributeError, NoDataError):
391            resids = itertools.cycle((1,))
392            missing_topology.append('resids')
393
394        if not self.reindex:
395            try:
396                atom_indices = ag_or_ts.ids
397            except (AttributeError, NoDataError):
398                atom_indices = range(1, ag_or_ts.n_atoms+1)
399                missing_topology.append('ids')
400        else:
401            atom_indices = range(1, ag_or_ts.n_atoms + 1)
402        if missing_topology:
403            warnings.warn(
404                "Supplied AtomGroup was missing the following attributes: "
405                "{miss}. These will be written with default values. "
406                "Alternatively these can be supplied as keyword arguments."
407                "".format(miss=', '.join(missing_topology)))
408
409        positions = ag_or_ts.positions
410
411        if self.convert_units:
412            # Convert back to nm from Angstroms,
413            # Not inplace because AtomGroup is not a copy
414            positions = self.convert_pos_to_native(positions, inplace=False)
415            if has_velocities:
416                velocities = self.convert_velocities_to_native(velocities, inplace=False)
417        # check if any coordinates are illegal
418        # (checks the coordinates in native nm!)
419        if not self.has_valid_coordinates(self.gro_coor_limits, positions):
420            raise ValueError("GRO files must have coordinate values between "
421                             "{0:.3f} and {1:.3f} nm: No file was written."
422                             "".format(self.gro_coor_limits["min"],
423                                       self.gro_coor_limits["max"]))
424
425        with util.openany(self.filename, 'wt') as output_gro:
426            # Header
427            output_gro.write('Written by MDAnalysis\n')
428            output_gro.write(self.fmt['n_atoms'].format(ag_or_ts.n_atoms))
429
430            # Atom descriptions and coords
431            # Dont use enumerate here,
432            # all attributes could be infinite cycles!
433            for atom_index, resid, resname, name in zip(
434                    range(ag_or_ts.n_atoms), resids, resnames, names):
435                truncated_atom_index = util.ltruncate_int(atom_indices[atom_index], 5)
436                truncated_resid = util.ltruncate_int(resid, 5)
437                if has_velocities:
438                    output_gro.write(self.fmt['xyz_v'].format(
439                        resid=truncated_resid,
440                        resname=resname,
441                        index=truncated_atom_index,
442                        name=name,
443                        pos=positions[atom_index],
444                        vel=velocities[atom_index],
445                    ))
446                else:
447                    output_gro.write(self.fmt['xyz'].format(
448                        resid=truncated_resid,
449                        resname=resname,
450                        index=truncated_atom_index,
451                        name=name,
452                        pos=positions[atom_index]
453                    ))
454
455            # Footer: box dimensions
456            if np.allclose(ag_or_ts.dimensions[3:], [90., 90., 90.]):
457                box = self.convert_pos_to_native(
458                    ag_or_ts.dimensions[:3], inplace=False)
459                # orthorhombic cell, only lengths along axes needed in gro
460                output_gro.write(self.fmt['box_orthorhombic'].format(
461                    box=box)
462                )
463            else:
464
465                try:  # for AtomGroup/Universe
466                    tri_dims = obj.universe.coord.triclinic_dimensions
467                except AttributeError:  # for Timestep
468                    tri_dims = obj.triclinic_dimensions
469
470                # full output
471                box = self.convert_pos_to_native(tri_dims.flatten(), inplace=False)
472                output_gro.write(self.fmt['box_triclinic'].format(box=box))
473