1# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
2# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
3#
4# MDAnalysis --- https://www.mdanalysis.org
5# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
6# (see the file AUTHORS for the full list of names)
7#
8# Released under the GNU Public Licence, v2 or any higher version
9#
10# Please cite your use of MDAnalysis in published work:
11#
12# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
13# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
14# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
15# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
16# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
17#
18# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
19# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
20# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
21#
22
23"""
24PQR topology parser
25===================
26
27Read atoms with charges from a PQR_ file (as written by PDB2PQR_). No
28connectivity is deduced.
29
30Note
31----
32The file format is described in :mod:`MDAnalysis.coordinates.PQR`.
33
34
35Classes
36-------
37
38.. autoclass:: PQRParser
39   :members:
40   :inherited-members:
41
42
43.. _PQR:     https://apbs-pdb2pqr.readthedocs.io/en/latest/formats/pqr.html
44.. _APBS:    https://apbs-pdb2pqr.readthedocs.io/en/latest/apbs/index.html
45.. _PDB2PQR: https://apbs-pdb2pqr.readthedocs.io/en/latest/pdb2pqr/index.html
46.. _PDB:     http://www.wwpdb.org/documentation/file-format
47
48"""
49from __future__ import absolute_import
50
51import numpy as np
52
53from . import guessers
54from ..lib.util import openany
55from ..core.topologyattrs import (
56    Atomids,
57    Atomnames,
58    Atomtypes,
59    Charges,
60    ICodes,
61    Masses,
62    Radii,
63    RecordTypes,
64    Resids,
65    Resnums,
66    Resnames,
67    Segids,
68)
69from ..core.topology import Topology
70from .base import TopologyReaderBase, squash_by, change_squash
71
72
73class PQRParser(TopologyReaderBase):
74    """Parse atom information from PQR file *filename*.
75
76    Creates a MDAnalysis Topology with the following attributes
77     - Atomids
78     - Atomnames
79     - Charges
80     - Radii
81     - RecordTypes (ATOM/HETATM)
82     - Resids
83     - Resnames
84     - Segids
85
86    Guesses the following:
87     - atomtypes (if not present, Gromacs generated PQR files have these)
88     - masses
89
90    .. versionchanged:: 0.9.0
91       Read chainID from a PQR file and use it as segid (before we always used
92       'SYSTEM' as the new segid).
93    .. versionchanged:: 0.16.1
94       Now reads insertion codes and splits into new residues around these
95    .. versionchanged:: 0.18.0
96       Added parsing of Record types
97       Can now read PQR files from Gromacs, these provide atom type as last column
98       but don't have segids
99    """
100    format = 'PQR'
101
102    @staticmethod
103    def guess_flavour(line):
104        """Guess which variant of PQR format this line is
105
106        Parameters
107        ----------
108        line : str
109          entire line of PQR file starting with ATOM/HETATM
110
111        Returns
112        -------
113        flavour : str
114          ORIGINAL / GROMACS / NO_CHAINID
115
116        .. versionadded:: 0.18.0
117        """
118        fields = line.split()
119        if len(fields) == 11:
120            try:
121                float(fields[-1])
122            except ValueError:
123                flavour = 'GROMACS'
124            else:
125                flavour = 'ORIGINAL'
126        else:
127            flavour = 'NO_CHAINID'
128        return flavour
129
130    def parse(self, **kwargs):
131        """Parse atom information from PQR file *filename*.
132
133        Returns
134        -------
135        A MDAnalysis Topology object
136        """
137        record_types = []
138        serials = []
139        names = []
140        resnames = []
141        chainIDs = []
142        resids = []
143        icodes = []
144        charges = []
145        radii = []
146        elements = []
147
148        flavour = None
149
150        with openany(self.filename) as f:
151            for line in f:
152                if not line.startswith(("ATOM", "HETATM")):
153                    continue
154                fields = line.split()
155
156                if flavour is None:
157                    flavour = self.guess_flavour(line)
158                if flavour == 'ORIGINAL':
159                    (recordName, serial, name, resName,
160                     chainID, resSeq, x, y, z, charge,
161                     radius) = fields
162                elif flavour == 'GROMACS':
163                    (recordName, serial, name, resName,
164                     resSeq, x, y, z, charge,
165                     radius, element) = fields
166                    chainID = "SYSTEM"
167                    elements.append(element)
168                elif flavour == 'NO_CHAINID':
169                    # files without the chainID
170                    (recordName, serial, name, resName,
171                     resSeq, x, y, z, charge, radius) = fields
172                    chainID = "SYSTEM"
173
174                try:
175                    resid = int(resSeq)
176                except ValueError:
177                    # has icode present
178                    resid = int(resSeq[:-1])
179                    icode = resSeq[-1]
180                else:
181                    icode = ''
182
183                record_types.append(recordName)
184                serials.append(serial)
185                names.append(name)
186                resnames.append(resName)
187                resids.append(resid)
188                icodes.append(icode)
189                charges.append(charge)
190                radii.append(radius)
191                chainIDs.append(chainID)
192
193        n_atoms = len(serials)
194
195        if not elements:
196            atomtypes = guessers.guess_types(names)
197            guessed_types = True
198        else:
199            atomtypes = elements
200            guessed_types = False
201        masses = guessers.guess_masses(atomtypes)
202
203        attrs = []
204        attrs.append(Atomids(np.array(serials, dtype=np.int32)))
205        attrs.append(Atomnames(np.array(names, dtype=object)))
206        attrs.append(Charges(np.array(charges, dtype=np.float32)))
207        attrs.append(Atomtypes(atomtypes, guessed=guessed_types))
208        attrs.append(Masses(masses, guessed=True))
209        attrs.append(RecordTypes(np.array(record_types, dtype=object)))
210        attrs.append(Radii(np.array(radii, dtype=np.float32)))
211
212        resids = np.array(resids, dtype=np.int32)
213        icodes = np.array(icodes, dtype=object)
214        resnames = np.array(resnames, dtype=object)
215        chainIDs = np.array(chainIDs, dtype=object)
216
217        residx, (resids, resnames, icodes, chainIDs) = change_squash(
218            (resids, resnames, icodes, chainIDs),
219            (resids, resnames, icodes, chainIDs))
220
221        n_residues = len(resids)
222        attrs.append(Resids(resids))
223        attrs.append(Resnums(resids.copy()))
224        attrs.append(Resnames(resnames))
225        attrs.append(ICodes(icodes))
226
227        segidx, chainIDs = squash_by(chainIDs)[:2]
228
229        n_segments = len(chainIDs)
230        attrs.append(Segids(chainIDs))
231
232        top = Topology(n_atoms, n_residues, n_segments,
233                       attrs=attrs,
234                       atom_resindex=residx,
235                       residue_segindex=segidx)
236
237        return top
238