1# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- 2# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 3# 4# MDAnalysis --- https://www.mdanalysis.org 5# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors 6# (see the file AUTHORS for the full list of names) 7# 8# Released under the GNU Public Licence, v2 or any higher version 9# 10# Please cite your use of MDAnalysis in published work: 11# 12# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, 13# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. 14# MDAnalysis: A Python package for the rapid analysis of molecular dynamics 15# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th 16# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. 17# 18# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. 19# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. 20# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 21# 22 23"""\ 24Core Topology object --- :mod:`MDAnalysis.core.topology` 25======================================================== 26 27.. versionadded:: 0.16.0 28 29:class:`Topology` is the core object that holds all topology information. 30 31TODO: Add in-depth discussion. 32 33Notes 34----- 35For developers: In MDAnalysis 0.16.0 this new topology system was 36introduced and discussed as issue `#363`_; this issue contains key 37information and discussions on the new system. The issue number *363* 38is also being used as a short-hand in discussions to refer to the new 39topology system. 40 41 42.. _`#363`: https://github.com/MDAnalysis/mdanalysis/issues/363 43 44Classes 45------- 46 47.. autoclass:: Topology 48 :members: 49.. autoclass:: TransTable 50 :members: 51 52Helper functions 53---------------- 54 55.. autofunction:: make_downshift_arrays 56 57""" 58from __future__ import absolute_import 59 60from six.moves import zip 61import numpy as np 62 63from .topologyattrs import Atomindices, Resindices, Segindices 64from ..exceptions import NoDataError 65 66 67# TODO Notes: 68# Could make downshift tables lazily built! This would 69# a) Make these not get built when not used 70# b) Optimise moving multiple atoms between residues as only built once 71# afterwards 72 73# Could optimise moves by only updating the two parent tables rather than 74# rebuilding everything! 75 76 77def make_downshift_arrays(upshift, nparents): 78 """From an upwards translation table, create the opposite direction 79 80 Turns a many to one mapping (eg atoms to residues) to a one to many mapping 81 (residues to atoms) 82 83 Parameters 84 ---------- 85 upshift : array_like 86 Array of integers describing which parent each item belongs to 87 nparents : integer 88 Total number of parents that exist. 89 90 Returns 91 ------- 92 downshift : array_like (dtype object) 93 An array of arrays, each containing the indices of the children 94 of each parent. Length `nparents` + 1 95 96 Examples 97 -------- 98 99 To find the residue to atom mappings for a given atom to residue mapping: 100 101 >>> atom2res = np.array([0, 1, 0, 2, 2, 0, 2]) 102 >>> make_downshift_arrays(atom2res) 103 array([array([0, 2, 5]), array([1]), array([3, 4, 6]), None], dtype=object) 104 105 Entry 0 corresponds to residue 0 and says that this contains atoms 0, 2 & 5 106 107 Notes 108 ----- 109 The final entry in the return array will be ``None`` to ensure that the 110 dtype of the array is :class:`object`. 111 112 .. warning:: This means negative indexing should **never** 113 be used with these arrays. 114 """ 115 order = np.argsort(upshift) 116 117 upshift_sorted = upshift[order] 118 borders = [None] + list(np.nonzero(np.diff(upshift_sorted))[0] + 1) + [None] 119 120 # returns an array of arrays 121 downshift = [] 122 counter = -1 123 # don't use enumerate, we modify counter in place 124 for x, y in zip(borders[:-1], borders[1:]): 125 counter += 1 126 # If parent is skipped, eg (0, 0, 2, 2, etc) 127 while counter != upshift[order[x:y][0]]: 128 downshift.append(np.array([], dtype=np.intp)) 129 counter += 1 130 downshift.append(np.sort(np.array(order[x:y], copy=True, dtype=np.intp))) 131 # Add entries for childless parents at end of range 132 while counter < (nparents - 1): 133 downshift.append(np.array([], dtype=np.intp)) 134 counter += 1 135 # Add None to end of array to force it to be of type Object 136 # Without this, a rectangular array gets squashed into a single array 137 downshift.append(None) 138 return np.array(downshift, dtype=object) 139 140 141class TransTable(object): 142 """Membership tables with methods to translate indices across levels. 143 144 There are three levels; Atom, Residue and Segment. Each Atom **must** 145 belong in a Residue, each Residue **must** belong to a Segment. 146 147 When translating upwards, eg finding which Segment a Residue belongs in, 148 a single numpy array is returned. When translating downwards, two options 149 are available; a concatenated result (suffix `_1`) or a list for each parent 150 object (suffix `_2d`). 151 152 Parameters 153 ---------- 154 n_atoms : int 155 number of atoms in topology 156 n_residues : int 157 number of residues in topology 158 n_segments : int 159 number of segments in topology 160 atom_resindex : 1-D array 161 resindex for each atom in the topology; the number of unique values in 162 this array must be <= `n_residues`, and the array must be length 163 `n_atoms`; giving None defaults to placing all atoms in residue 0 164 residue_segindex : 1-D array 165 segindex for each residue in the topology; the number of unique values 166 in this array must be <= `n_segments`, and the array must be length 167 `n_residues`; giving None defaults to placing all residues in segment 0 168 169 170 Attributes 171 ---------- 172 n_atoms : int 173 number of atoms in topology 174 n_residues : int 175 number of residues in topology 176 n_segments : int 177 number of segments in topology 178 size 179 tuple describing the shape of the TransTable 180 181 Methods 182 ------- 183 atoms2residues(aix) 184 Returns the residue index for many atom indices 185 residues2atoms_1d(rix) 186 All atoms in the residues represented by *rix* 187 residues2atoms_2d(rix) 188 List of atom indices for each residue in *rix* 189 residues2segments(rix) 190 Segment indices for each residue in *rix* 191 segments2residues_1d(six) 192 Similar to `residues2atoms_1d` 193 segments2residues_2d(six) 194 Similar to `residues2atoms_2d` 195 atoms2segments(aix) 196 Segment indices for each atom in *aix* 197 segments2atoms_1d(six) 198 Similar to `residues2atoms_1d` 199 segments2atoms_2d(six) 200 Similar to `residues2atoms_2d` 201 202 """ 203 def __init__(self, 204 n_atoms, n_residues, n_segments, # Size of tables 205 atom_resindex=None, residue_segindex=None, # Contents of tables 206 ): 207 self.n_atoms = n_atoms 208 self.n_residues = n_residues 209 self.n_segments = n_segments 210 211 # built atom-to-residue mapping, and vice-versa 212 if atom_resindex is None: 213 self._AR = np.zeros(n_atoms, dtype=np.intp) 214 else: 215 self._AR = np.asarray(atom_resindex, dtype=np.intp).copy() 216 if not len(self._AR) == n_atoms: 217 raise ValueError("atom_resindex must be len n_atoms") 218 self._RA = make_downshift_arrays(self._AR, n_residues) 219 220 # built residue-to-segment mapping, and vice-versa 221 if residue_segindex is None: 222 self._RS = np.zeros(n_residues, dtype=np.intp) 223 else: 224 self._RS = np.asarray(residue_segindex, dtype=np.intp).copy() 225 if not len(self._RS) == n_residues: 226 raise ValueError("residue_segindex must be len n_residues") 227 self._SR = make_downshift_arrays(self._RS, n_segments) 228 229 def copy(self): 230 """Return a deepcopy of this Transtable""" 231 return self.__class__(self.n_atoms, self.n_residues, self.n_segments, 232 atom_resindex=self._AR, residue_segindex=self._RS) 233 234 @property 235 def size(self): 236 """The shape of the table, (n_atoms, n_residues, n_segments)""" 237 return (self.n_atoms, self.n_residues, self.n_segments) 238 239 def atoms2residues(self, aix): 240 """Get residue indices for each atom. 241 242 Parameters 243 ---------- 244 aix : array 245 atom indices 246 247 Returns 248 ------- 249 rix : array 250 residue index for each atom 251 252 """ 253 return self._AR[aix] 254 255 def residues2atoms_1d(self, rix): 256 """Get atom indices collectively represented by given residue indices. 257 258 Parameters 259 ---------- 260 rix : array 261 residue indices 262 263 Returns 264 ------- 265 aix : array 266 indices of atoms present in residues, collectively 267 268 """ 269 try: 270 return np.concatenate([self._RA[r] for r in rix]) 271 except TypeError: # integers aren't iterable, raises TypeError 272 return self._RA[rix].copy() # don't accidentally send a view! 273 274 def residues2atoms_2d(self, rix): 275 """Get atom indices represented by each residue index. 276 277 Parameters 278 ---------- 279 rix : array 280 residue indices 281 282 Returns 283 ------- 284 raix : list 285 each element corresponds to a residue index, in order given in 286 `rix`, with each element being an array of the atom indices present 287 in that residue 288 289 """ 290 try: 291 return [self._RA[r].copy() for r in rix] 292 except TypeError: 293 return [self._RA[rix].copy()] # why would this be singular for 2d? 294 295 def residues2segments(self, rix): 296 """Get segment indices for each residue. 297 298 Parameters 299 ---------- 300 rix : array 301 residue indices 302 303 Returns 304 ------- 305 six : array 306 segment index for each residue 307 308 """ 309 return self._RS[rix] 310 311 def segments2residues_1d(self, six): 312 """Get residue indices collectively represented by given segment indices 313 314 Parameters 315 ---------- 316 six : array 317 segment indices 318 319 Returns 320 ------- 321 rix : array 322 sorted indices of residues present in segments, collectively 323 324 """ 325 try: 326 return np.concatenate([self._SR[s] for s in six]) 327 except TypeError: 328 return self._SR[six].copy() 329 330 def segments2residues_2d(self, six): 331 """Get residue indices represented by each segment index. 332 333 Parameters 334 ---------- 335 six : array 336 residue indices 337 338 Returns 339 ------- 340 srix : list 341 each element corresponds to a segment index, in order given in 342 `six`, with each element being an array of the residue indices 343 present in that segment 344 345 """ 346 try: 347 return [self._SR[s].copy() for s in six] 348 except TypeError: 349 return [self._SR[six].copy()] 350 351 # Compound moves, does 2 translations 352 def atoms2segments(self, aix): 353 """Get segment indices for each atom. 354 355 Parameters 356 ---------- 357 aix : array 358 atom indices 359 360 Returns 361 ------- 362 rix : array 363 segment index for each atom 364 365 """ 366 rix = self.atoms2residues(aix) 367 return self.residues2segments(rix) 368 369 def segments2atoms_1d(self, six): 370 """Get atom indices collectively represented by given segment indices. 371 372 Parameters 373 ---------- 374 six : array 375 segment indices 376 377 Returns 378 ------- 379 aix : array 380 sorted indices of atoms present in segments, collectively 381 382 """ 383 rixs = self.segments2residues_2d(six) 384 return np.concatenate([self.residues2atoms_1d(rix) 385 for rix in rixs]) 386 387 def segments2atoms_2d(self, six): 388 """Get atom indices represented by each segment index. 389 390 Parameters 391 ---------- 392 six : array 393 residue indices 394 395 Returns 396 ------- 397 saix : list 398 each element corresponds to a segment index, in order given in 399 `six`, with each element being an array of the atom indices present 400 in that segment 401 402 """ 403 # residues in EACH 404 rixs = self.segments2residues_2d(six) 405 return [self.residues2atoms_1d(rix) for rix in rixs] 406 407 # Move between different groups. 408 def move_atom(self, aix, rix): 409 """Move aix to be in rix""" 410 self._AR[aix] = rix 411 self._RA = make_downshift_arrays(self._AR, self.n_residues) 412 413 def move_residue(self, rix, six): 414 """Move rix to be in six""" 415 self._RS[rix] = six 416 self._SR = make_downshift_arrays(self._RS, self.n_segments) 417 418 def add_Residue(self, segidx): 419 # segidx - index of parent 420 self.n_residues += 1 421 self._RA = make_downshift_arrays(self._AR, self.n_residues) 422 self._RS = np.concatenate([self._RS, np.array([segidx])]) 423 self._SR = make_downshift_arrays(self._RS, self.n_segments) 424 425 return self.n_residues - 1 426 427 def add_Segment(self): 428 self.n_segments += 1 429 # self._RS remains the same, no residues point to the new segment yet 430 self._SR = make_downshift_arrays(self._RS, self.n_segments) 431 432 return self.n_segments - 1 433 434 435class Topology(object): 436 """In-memory, array-based topology database. 437 438 The topology model of MDanalysis features atoms, which must each be a 439 member of one residue. Each residue, in turn, must be a member of one 440 segment. The details of maintaining this heirarchy, and mappings of atoms 441 to residues, residues to segments, and vice-versa, are handled internally 442 by this object. 443 444 """ 445 446 def __init__(self, n_atoms=1, n_res=1, n_seg=1, 447 attrs=None, 448 atom_resindex=None, 449 residue_segindex=None): 450 """ 451 Parameters 452 ---------- 453 n_atoms : int 454 number of atoms in topology. Must be larger then 1 at each level 455 n_residues : int 456 number of residues in topology. Must be larger then 1 at each level 457 n_segments : int 458 number of segments in topology. Must be larger then 1 at each level 459 attrs : TopologyAttr objects 460 components of the topology to be included 461 atom_resindex : array 462 1-D array giving the resindex of each atom in the system 463 residue_segindex : array 464 1-D array giving the segindex of each residue in the system 465 466 """ 467 self.tt = TransTable(n_atoms, n_res, n_seg, 468 atom_resindex=atom_resindex, 469 residue_segindex=residue_segindex) 470 471 if attrs is None: 472 attrs = [] 473 # add core TopologyAttrs that give access to indices 474 attrs.extend((Atomindices(), Resindices(), Segindices())) 475 476 # attach the TopologyAttrs 477 self.attrs = [] 478 for topologyattr in attrs: 479 self.add_TopologyAttr(topologyattr) 480 481 def copy(self): 482 """Return a deepcopy of this Topology""" 483 new = self.__class__(1, 1, 1) 484 # copy the tt 485 new.tt = self.tt.copy() 486 # remove indices 487 for attr in self.attrs: 488 if isinstance(attr, (Atomindices, Resindices, Segindices)): 489 continue 490 new.add_TopologyAttr(attr.copy()) 491 return new 492 493 @property 494 def n_atoms(self): 495 return self.tt.n_atoms 496 497 @property 498 def n_residues(self): 499 return self.tt.n_residues 500 501 @property 502 def n_segments(self): 503 return self.tt.n_segments 504 505 def add_TopologyAttr(self, topologyattr): 506 """Add a new TopologyAttr to the Topology. 507 508 Parameters 509 ---------- 510 topologyattr : TopologyAttr 511 512 """ 513 self.attrs.append(topologyattr) 514 topologyattr.top = self 515 self.__setattr__(topologyattr.attrname, topologyattr) 516 517 @property 518 def guessed_attributes(self): 519 """A list of the guessed attributes in this topology""" 520 return filter(lambda x: x.is_guessed, self.attrs) 521 522 @property 523 def read_attributes(self): 524 """A list of the attributes read from the topology""" 525 return filter(lambda x: not x.is_guessed, self.attrs) 526 527 def add_Residue(self, segment, **new_attrs): 528 """ 529 Returns 530 ------- 531 residx of the new Residue 532 533 Raises 534 ------ 535 NoDataError 536 If not all data was provided. This error is raised before any 537 """ 538 # Check that all data is here before making any changes 539 for attr in self.attrs: 540 if not attr.per_object == 'residue': 541 continue 542 if attr.singular not in new_attrs: 543 missing = (attr.singular for attr in self.attrs 544 if (attr.per_object == 'residue' and 545 attr.singular not in new_attrs)) 546 raise NoDataError("Missing the following attributes for the new" 547 " Residue: {}".format(', '.join(missing))) 548 549 # Resize topology table 550 residx = self.tt.add_Residue(segment.segindex) 551 552 # Add new value to each attribute 553 for attr in self.attrs: 554 if not attr.per_object == 'residue': 555 continue 556 newval = new_attrs[attr.singular] 557 attr.values = np.concatenate([attr.values, np.array([newval])]) 558 559 return residx 560 561 def add_Segment(self, **new_attrs): 562 for attr in self.attrs: 563 if attr.per_object == 'segment': 564 if attr.singular not in new_attrs: 565 missing = (attr.singular for attr in self.attrs 566 if (attr.per_object == 'segment' and 567 attr.singular not in new_attrs)) 568 raise NoDataError("Missing the following attributes for the" 569 " new Segment: {}" 570 "".format(', '.join(missing))) 571 572 segidx = self.tt.add_Segment() 573 574 for attr in self.attrs: 575 if not attr.per_object == 'segment': 576 continue 577 newval = new_attrs[attr.singular] 578 attr.values = np.concatenate([attr.values, np.array([newval])]) 579 580 return segidx 581