1# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*-
2# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
3#
4# MDAnalysis --- https://www.mdanalysis.org
5# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
6# (see the file AUTHORS for the full list of names)
7#
8# Released under the GNU Public Licence, v2 or any higher version
9#
10# Please cite your use of MDAnalysis in published work:
11#
12# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
13# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
14# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
15# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
16# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
17#
18# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
19# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
20# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
21#
22
23"""
24CRD topology parser
25===================
26
27Read a list of atoms from a CHARMM CARD coordinate file (CRD_)
28to build a basic topology.  Reads atom ids (ATOMNO), atom names (TYPES),
29resids (RESID), residue numbers (RESNO), residue names (RESNames), segment ids
30(SEGID) and tempfactor (Weighting).  Atom element and mass are guessed based on
31the name of the atom.
32
33Residues are detected through a change is either resid or resname
34while segments are detected according to changes in segid.
35
36.. _CRD: https://www.charmmtutorial.org/index.php/CHARMM:The_Basics
37
38
39Classes
40-------
41
42.. autoclass:: CRDParser
43   :members:
44   :inherited-members:
45
46"""
47from __future__ import absolute_import
48
49import numpy as np
50
51from ..lib.util import openany, FORTRANReader
52from .base import TopologyReaderBase, change_squash
53from . import guessers
54from ..core.topology import Topology
55from ..core.topologyattrs import (
56    Atomids,
57    Atomnames,
58    Atomtypes,
59    Masses,
60    Resids,
61    Resnames,
62    Resnums,
63    Segids,
64    Tempfactors,
65)
66
67
68class CRDParser(TopologyReaderBase):
69    """Parse a CHARMM CARD coordinate file for topology information.
70
71    Reads the following Attributes:
72     - Atomids
73     - Atomnames
74     - Tempfactors
75     - Resids
76     - Resnames
77     - Resnums
78     - Segids
79
80    Guesses the following attributes:
81     - Atomtypes
82     - Masses
83    """
84    format = 'CRD'
85
86    def parse(self, **kwargs):
87        """Create the Topology object
88
89        Returns
90        -------
91        MDAnalysis Topology object
92
93        Todo
94        ----
95        Could use the resnum and temp factor better
96        """
97        extformat = FORTRANReader('2I10,2X,A8,2X,A8,3F20.10,2X,A8,2X,A8,F20.10')
98        stdformat = FORTRANReader('2I5,1X,A4,1X,A4,3F10.5,1X,A4,1X,A4,F10.5')
99
100        atomids = []
101        atomnames = []
102        tempfactors = []
103        resids = []
104        resnames = []
105        resnums = []
106        segids = []
107
108        with openany(self.filename) as crd:
109            for linenum, line in enumerate(crd):
110                # reading header
111                if line.split()[0] == '*':
112                    continue
113                elif line.split()[-1] == 'EXT' and int(line.split()[0]):
114                    r = extformat
115                    continue
116                elif line.split()[0] == line.split()[-1] and line.split()[0] != '*':
117                    r = stdformat
118                    continue
119                # anything else should be an atom
120                try:
121                    (serial, resnum, resName, name,
122                     x, y, z, segid, resid, tempFactor) = r.read(line)
123                except:
124                    raise ValueError("Check CRD format at line {0}: {1}"
125                                     "".format(linenum + 1, line.rstrip()))
126
127                atomids.append(serial)
128                atomnames.append(name)
129                tempfactors.append(tempFactor)
130                resids.append(resid)
131                resnames.append(resName)
132                resnums.append(resnum)
133                segids.append(segid)
134
135        # Convert to np arrays
136        atomids = np.array(atomids, dtype=np.int32)
137        atomnames = np.array(atomnames, dtype=object)
138        tempfactors = np.array(tempfactors, dtype=np.float32)
139        resids = np.array(resids, dtype=np.int32)
140        resnames = np.array(resnames, dtype=object)
141        resnums = np.array(resnums, dtype=np.int32)
142        segids = np.array(segids, dtype=object)
143
144        # Guess some attributes
145        atomtypes = guessers.guess_types(atomnames)
146        masses = guessers.guess_masses(atomtypes)
147
148        atom_residx, (res_resids, res_resnames, res_resnums, res_segids) = change_squash(
149            (resids, resnames), (resids, resnames, resnums, segids))
150        res_segidx, (seg_segids,) = change_squash(
151            (res_segids,), (res_segids,))
152
153        top = Topology(len(atomids), len(res_resids), len(seg_segids),
154                       attrs=[
155                           Atomids(atomids),
156                           Atomnames(atomnames),
157                           Atomtypes(atomtypes, guessed=True),
158                           Masses(masses, guessed=True),
159                           Tempfactors(tempfactors),
160                           Resids(res_resids),
161                           Resnames(res_resnames),
162                           Resnums(res_resnums),
163                           Segids(seg_segids),
164                       ],
165                       atom_resindex=atom_residx,
166                       residue_segindex=res_segidx)
167
168        return top
169