1# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
2# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8
3#
4# MDAnalysis --- https://www.mdanalysis.org
5# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
6# (see the file AUTHORS for the full list of names)
7#
8# Released under the GNU Public Licence, v2 or any higher version
9#
10# Please cite your use of MDAnalysis in published work:
11#
12# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
13# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
14# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
15# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
16# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
17#
18# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
19# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
20# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
21#
22
23"""\
24Core Topology object --- :mod:`MDAnalysis.core.topology`
25========================================================
26
27.. versionadded:: 0.16.0
28
29:class:`Topology` is the core object that holds all topology information.
30
31TODO: Add in-depth discussion.
32
33Notes
34-----
35For developers: In MDAnalysis 0.16.0 this new topology system was
36introduced and discussed as issue `#363`_; this issue contains key
37information and discussions on the new system. The issue number *363*
38is also being used as a short-hand in discussions to refer to the new
39topology system.
40
41
42.. _`#363`: https://github.com/MDAnalysis/mdanalysis/issues/363
43
44Classes
45-------
46
47.. autoclass:: Topology
48   :members:
49.. autoclass:: TransTable
50   :members:
51
52Helper functions
53----------------
54
55.. autofunction:: make_downshift_arrays
56
57"""
58from __future__ import absolute_import
59
60from six.moves import zip
61import numpy as np
62
63from .topologyattrs import Atomindices, Resindices, Segindices
64from ..exceptions import NoDataError
65
66
67# TODO Notes:
68#   Could make downshift tables lazily built! This would
69#     a) Make these not get built when not used
70#     b) Optimise moving multiple atoms between residues as only built once
71#     afterwards
72
73#   Could optimise moves by only updating the two parent tables rather than
74#   rebuilding everything!
75
76
77def make_downshift_arrays(upshift, nparents):
78    """From an upwards translation table, create the opposite direction
79
80    Turns a many to one mapping (eg atoms to residues) to a one to many mapping
81    (residues to atoms)
82
83    Parameters
84    ----------
85    upshift : array_like
86        Array of integers describing which parent each item belongs to
87    nparents : integer
88        Total number of parents that exist.
89
90    Returns
91    -------
92    downshift : array_like (dtype object)
93        An array of arrays, each containing the indices of the children
94        of each parent.  Length `nparents` + 1
95
96    Examples
97    --------
98
99    To find the residue to atom mappings for a given atom to residue mapping:
100
101    >>> atom2res = np.array([0, 1, 0, 2, 2, 0, 2])
102    >>> make_downshift_arrays(atom2res)
103    array([array([0, 2, 5]), array([1]), array([3, 4, 6]), None], dtype=object)
104
105    Entry 0 corresponds to residue 0 and says that this contains atoms 0, 2 & 5
106
107    Notes
108    -----
109    The final entry in the return array will be ``None`` to ensure that the
110    dtype of the array is :class:`object`.
111
112    .. warning:: This means negative indexing should **never**
113                 be used with these arrays.
114    """
115    order = np.argsort(upshift)
116
117    upshift_sorted = upshift[order]
118    borders = [None] + list(np.nonzero(np.diff(upshift_sorted))[0] + 1) + [None]
119
120    # returns an array of arrays
121    downshift = []
122    counter = -1
123    # don't use enumerate, we modify counter in place
124    for x, y in zip(borders[:-1], borders[1:]):
125        counter += 1
126        # If parent is skipped, eg (0, 0, 2, 2, etc)
127        while counter != upshift[order[x:y][0]]:
128            downshift.append(np.array([], dtype=np.intp))
129            counter += 1
130        downshift.append(np.sort(np.array(order[x:y], copy=True, dtype=np.intp)))
131    # Add entries for childless parents at end of range
132    while counter < (nparents - 1):
133        downshift.append(np.array([], dtype=np.intp))
134        counter += 1
135    # Add None to end of array to force it to be of type Object
136    # Without this, a rectangular array gets squashed into a single array
137    downshift.append(None)
138    return np.array(downshift, dtype=object)
139
140
141class TransTable(object):
142    """Membership tables with methods to translate indices across levels.
143
144    There are three levels; Atom, Residue and Segment.  Each Atom **must**
145    belong in a Residue, each Residue **must** belong to a Segment.
146
147    When translating upwards, eg finding which Segment a Residue belongs in,
148    a single numpy array is returned.  When translating downwards, two options
149    are available; a concatenated result (suffix `_1`) or a list for each parent
150    object (suffix `_2d`).
151
152    Parameters
153    ----------
154    n_atoms : int
155        number of atoms in topology
156    n_residues : int
157        number of residues in topology
158    n_segments : int
159        number of segments in topology
160    atom_resindex : 1-D array
161        resindex for each atom in the topology; the number of unique values in
162        this array must be <= `n_residues`, and the array must be length
163        `n_atoms`; giving None defaults to placing all atoms in residue 0
164    residue_segindex : 1-D array
165        segindex for each residue in the topology; the number of unique values
166        in this array must be <= `n_segments`, and the array must be length
167        `n_residues`; giving None defaults to placing all residues in segment 0
168
169
170    Attributes
171    ----------
172    n_atoms : int
173        number of atoms in topology
174    n_residues : int
175        number of residues in topology
176    n_segments : int
177        number of segments in topology
178    size
179        tuple describing the shape of the TransTable
180
181    Methods
182    -------
183    atoms2residues(aix)
184        Returns the residue index for many atom indices
185    residues2atoms_1d(rix)
186        All atoms in the residues represented by *rix*
187    residues2atoms_2d(rix)
188        List of atom indices for each residue in *rix*
189    residues2segments(rix)
190        Segment indices for each residue in *rix*
191    segments2residues_1d(six)
192        Similar to `residues2atoms_1d`
193    segments2residues_2d(six)
194        Similar to `residues2atoms_2d`
195    atoms2segments(aix)
196        Segment indices for each atom in *aix*
197    segments2atoms_1d(six)
198        Similar to `residues2atoms_1d`
199    segments2atoms_2d(six)
200        Similar to `residues2atoms_2d`
201
202    """
203    def __init__(self,
204                 n_atoms, n_residues, n_segments,  # Size of tables
205                 atom_resindex=None, residue_segindex=None,  # Contents of tables
206                 ):
207        self.n_atoms = n_atoms
208        self.n_residues = n_residues
209        self.n_segments = n_segments
210
211        # built atom-to-residue mapping, and vice-versa
212        if atom_resindex is None:
213            self._AR = np.zeros(n_atoms, dtype=np.intp)
214        else:
215            self._AR = np.asarray(atom_resindex, dtype=np.intp).copy()
216            if not len(self._AR) == n_atoms:
217                raise ValueError("atom_resindex must be len n_atoms")
218        self._RA = make_downshift_arrays(self._AR, n_residues)
219
220        # built residue-to-segment mapping, and vice-versa
221        if residue_segindex is None:
222            self._RS = np.zeros(n_residues, dtype=np.intp)
223        else:
224            self._RS = np.asarray(residue_segindex, dtype=np.intp).copy()
225            if not len(self._RS) == n_residues:
226                raise ValueError("residue_segindex must be len n_residues")
227        self._SR = make_downshift_arrays(self._RS, n_segments)
228
229    def copy(self):
230        """Return a deepcopy of this Transtable"""
231        return self.__class__(self.n_atoms, self.n_residues, self.n_segments,
232                              atom_resindex=self._AR, residue_segindex=self._RS)
233
234    @property
235    def size(self):
236        """The shape of the table, (n_atoms, n_residues, n_segments)"""
237        return (self.n_atoms, self.n_residues, self.n_segments)
238
239    def atoms2residues(self, aix):
240        """Get residue indices for each atom.
241
242        Parameters
243        ----------
244        aix : array
245            atom indices
246
247        Returns
248        -------
249        rix : array
250            residue index for each atom
251
252        """
253        return self._AR[aix]
254
255    def residues2atoms_1d(self, rix):
256        """Get atom indices collectively represented by given residue indices.
257
258        Parameters
259        ----------
260        rix : array
261            residue indices
262
263        Returns
264        -------
265        aix : array
266            indices of atoms present in residues, collectively
267
268        """
269        try:
270            return np.concatenate([self._RA[r] for r in rix])
271        except TypeError:  # integers aren't iterable, raises TypeError
272            return self._RA[rix].copy()  # don't accidentally send a view!
273
274    def residues2atoms_2d(self, rix):
275        """Get atom indices represented by each residue index.
276
277        Parameters
278        ----------
279        rix : array
280            residue indices
281
282        Returns
283        -------
284        raix : list
285            each element corresponds to a residue index, in order given in
286            `rix`, with each element being an array of the atom indices present
287            in that residue
288
289        """
290        try:
291            return [self._RA[r].copy() for r in rix]
292        except TypeError:
293            return [self._RA[rix].copy()]  # why would this be singular for 2d?
294
295    def residues2segments(self, rix):
296        """Get segment indices for each residue.
297
298        Parameters
299        ----------
300        rix : array
301            residue indices
302
303        Returns
304        -------
305        six : array
306            segment index for each residue
307
308        """
309        return self._RS[rix]
310
311    def segments2residues_1d(self, six):
312        """Get residue indices collectively represented by given segment indices
313
314        Parameters
315        ----------
316        six : array
317            segment indices
318
319        Returns
320        -------
321        rix : array
322            sorted indices of residues present in segments, collectively
323
324        """
325        try:
326            return np.concatenate([self._SR[s] for s in six])
327        except TypeError:
328            return self._SR[six].copy()
329
330    def segments2residues_2d(self, six):
331        """Get residue indices represented by each segment index.
332
333        Parameters
334        ----------
335        six : array
336            residue indices
337
338        Returns
339        -------
340        srix : list
341            each element corresponds to a segment index, in order given in
342            `six`, with each element being an array of the residue indices
343            present in that segment
344
345        """
346        try:
347            return [self._SR[s].copy() for s in six]
348        except TypeError:
349            return [self._SR[six].copy()]
350
351    # Compound moves, does 2 translations
352    def atoms2segments(self, aix):
353        """Get segment indices for each atom.
354
355        Parameters
356        ----------
357        aix : array
358            atom indices
359
360        Returns
361        -------
362        rix : array
363            segment index for each atom
364
365        """
366        rix = self.atoms2residues(aix)
367        return self.residues2segments(rix)
368
369    def segments2atoms_1d(self, six):
370        """Get atom indices collectively represented by given segment indices.
371
372        Parameters
373        ----------
374        six : array
375            segment indices
376
377        Returns
378        -------
379        aix : array
380            sorted indices of atoms present in segments, collectively
381
382        """
383        rixs = self.segments2residues_2d(six)
384        return np.concatenate([self.residues2atoms_1d(rix)
385                               for rix in rixs])
386
387    def segments2atoms_2d(self, six):
388        """Get atom indices represented by each segment index.
389
390        Parameters
391        ----------
392        six : array
393            residue indices
394
395        Returns
396        -------
397        saix : list
398            each element corresponds to a segment index, in order given in
399            `six`, with each element being an array of the atom indices present
400            in that segment
401
402        """
403        # residues in EACH
404        rixs = self.segments2residues_2d(six)
405        return [self.residues2atoms_1d(rix) for rix in rixs]
406
407    # Move between different groups.
408    def move_atom(self, aix, rix):
409        """Move aix to be in rix"""
410        self._AR[aix] = rix
411        self._RA = make_downshift_arrays(self._AR, self.n_residues)
412
413    def move_residue(self, rix, six):
414        """Move rix to be in six"""
415        self._RS[rix] = six
416        self._SR = make_downshift_arrays(self._RS, self.n_segments)
417
418    def add_Residue(self, segidx):
419        # segidx - index of parent
420        self.n_residues += 1
421        self._RA = make_downshift_arrays(self._AR, self.n_residues)
422        self._RS = np.concatenate([self._RS, np.array([segidx])])
423        self._SR = make_downshift_arrays(self._RS, self.n_segments)
424
425        return self.n_residues - 1
426
427    def add_Segment(self):
428        self.n_segments += 1
429        # self._RS remains the same, no residues point to the new segment yet
430        self._SR = make_downshift_arrays(self._RS, self.n_segments)
431
432        return self.n_segments - 1
433
434
435class Topology(object):
436    """In-memory, array-based topology database.
437
438    The topology model of MDanalysis features atoms, which must each be a
439    member of one residue. Each residue, in turn, must be a member of one
440    segment. The details of maintaining this heirarchy, and mappings of atoms
441    to residues, residues to segments, and vice-versa, are handled internally
442    by this object.
443
444    """
445
446    def __init__(self, n_atoms=1, n_res=1, n_seg=1,
447                 attrs=None,
448                 atom_resindex=None,
449                 residue_segindex=None):
450        """
451        Parameters
452        ----------
453        n_atoms : int
454            number of atoms in topology. Must be larger then 1 at each level
455        n_residues : int
456            number of residues in topology. Must be larger then 1 at each level
457        n_segments : int
458            number of segments in topology. Must be larger then 1 at each level
459        attrs : TopologyAttr objects
460            components of the topology to be included
461        atom_resindex : array
462            1-D array giving the resindex of each atom in the system
463        residue_segindex : array
464            1-D array giving the segindex of each residue in the system
465
466        """
467        self.tt = TransTable(n_atoms, n_res, n_seg,
468                             atom_resindex=atom_resindex,
469                             residue_segindex=residue_segindex)
470
471        if attrs is None:
472            attrs = []
473        # add core TopologyAttrs that give access to indices
474        attrs.extend((Atomindices(), Resindices(), Segindices()))
475
476        # attach the TopologyAttrs
477        self.attrs = []
478        for topologyattr in attrs:
479            self.add_TopologyAttr(topologyattr)
480
481    def copy(self):
482        """Return a deepcopy of this Topology"""
483        new = self.__class__(1, 1, 1)
484        # copy the tt
485        new.tt = self.tt.copy()
486        # remove indices
487        for attr in self.attrs:
488            if isinstance(attr, (Atomindices, Resindices, Segindices)):
489                continue
490            new.add_TopologyAttr(attr.copy())
491        return new
492
493    @property
494    def n_atoms(self):
495        return self.tt.n_atoms
496
497    @property
498    def n_residues(self):
499        return self.tt.n_residues
500
501    @property
502    def n_segments(self):
503        return self.tt.n_segments
504
505    def add_TopologyAttr(self, topologyattr):
506        """Add a new TopologyAttr to the Topology.
507
508        Parameters
509        ----------
510        topologyattr : TopologyAttr
511
512        """
513        self.attrs.append(topologyattr)
514        topologyattr.top = self
515        self.__setattr__(topologyattr.attrname, topologyattr)
516
517    @property
518    def guessed_attributes(self):
519        """A list of the guessed attributes in this topology"""
520        return filter(lambda x: x.is_guessed, self.attrs)
521
522    @property
523    def read_attributes(self):
524        """A list of the attributes read from the topology"""
525        return filter(lambda x: not x.is_guessed, self.attrs)
526
527    def add_Residue(self, segment, **new_attrs):
528        """
529        Returns
530        -------
531        residx of the new Residue
532
533        Raises
534        ------
535        NoDataError
536          If not all data was provided.  This error is raised before any
537        """
538        # Check that all data is here before making any changes
539        for attr in self.attrs:
540            if not attr.per_object == 'residue':
541                continue
542            if attr.singular not in new_attrs:
543                missing = (attr.singular for attr in self.attrs
544                           if (attr.per_object == 'residue' and
545                               attr.singular not in new_attrs))
546                raise NoDataError("Missing the following attributes for the new"
547                                  " Residue: {}".format(', '.join(missing)))
548
549        # Resize topology table
550        residx = self.tt.add_Residue(segment.segindex)
551
552        # Add new value to each attribute
553        for attr in self.attrs:
554            if not attr.per_object == 'residue':
555                continue
556            newval = new_attrs[attr.singular]
557            attr.values = np.concatenate([attr.values, np.array([newval])])
558
559        return residx
560
561    def add_Segment(self, **new_attrs):
562        for attr in self.attrs:
563            if attr.per_object == 'segment':
564                if attr.singular not in new_attrs:
565                    missing = (attr.singular for attr in self.attrs
566                               if (attr.per_object == 'segment' and
567                                   attr.singular not in new_attrs))
568                    raise NoDataError("Missing the following attributes for the"
569                                      " new Segment: {}"
570                                      "".format(', '.join(missing)))
571
572        segidx = self.tt.add_Segment()
573
574        for attr in self.attrs:
575            if not attr.per_object == 'segment':
576                continue
577            newval = new_attrs[attr.singular]
578            attr.values = np.concatenate([attr.values, np.array([newval])])
579
580        return segidx
581