1# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- 2# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 3# 4# MDAnalysis --- https://www.mdanalysis.org 5# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors 6# (see the file AUTHORS for the full list of names) 7# 8# Released under the GNU Public Licence, v2 or any higher version 9# 10# Please cite your use of MDAnalysis in published work: 11# 12# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, 13# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. 14# MDAnalysis: A Python package for the rapid analysis of molecular dynamics 15# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th 16# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. 17# 18# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. 19# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. 20# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 21# 22""" 23Calculating root mean square quantities --- :mod:`MDAnalysis.analysis.rms` 24========================================================================== 25 26:Author: Oliver Beckstein, David L. Dotson, John Detlefs 27:Year: 2016 28:Copyright: GNU Public License v2 29 30.. versionadded:: 0.7.7 31.. versionchanged:: 0.11.0 32 Added :class:`RMSF` analysis. 33.. versionchanged:: 0.16.0 34 Refactored RMSD to fit AnalysisBase API 35 36The module contains code to analyze root mean square quantities such 37as the coordinat root mean square distance (:class:`RMSD`) or the 38per-residue root mean square fluctuations (:class:`RMSF`). 39 40This module uses the fast QCP algorithm [Theobald2005]_ to calculate 41the root mean square distance (RMSD) between two coordinate sets (as 42implemented in 43:func:`MDAnalysis.lib.qcprot.CalcRMSDRotationalMatrix`). 44 45When using this module in published work please cite [Theobald2005]_. 46 47 48See Also 49-------- 50:mod:`MDAnalysis.analysis.align` 51 aligning structures based on RMSD 52:mod:`MDAnalysis.lib.qcprot` 53 implements the fast RMSD algorithm. 54 55 56Example applications 57-------------------- 58 59Calculating RMSD for multiple domains 60~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 61 62In this example we will globally fit a protein to a reference 63structure and investigate the relative movements of domains by 64computing the RMSD of the domains to the reference. The example is a 65DIMS trajectory of adenylate kinase, which samples a large 66closed-to-open transition. The protein consists of the CORE, LID, and 67NMP domain. 68 69* superimpose on the closed structure (frame 0 of the trajectory), 70 using backbone atoms 71 72* calculate the backbone RMSD and RMSD for CORE, LID, NMP (backbone atoms) 73 74The trajectory is included with the test data files. The data in 75:attr:`RMSD.rmsd` is plotted with :func:`matplotlib.pyplot.plot`:: 76 77 import MDAnalysis 78 from MDAnalysis.tests.datafiles import PSF,DCD,CRD 79 u = MDAnalysis.Universe(PSF,DCD) 80 ref = MDAnalysis.Universe(PSF,DCD) # reference closed AdK (1AKE) (with the default ref_frame=0) 81 #ref = MDAnalysis.Universe(PSF,CRD) # reference open AdK (4AKE) 82 83 import MDAnalysis.analysis.rms 84 85 R = MDAnalysis.analysis.rms.RMSD(u, ref, 86 select="backbone", # superimpose on whole backbone of the whole protein 87 groupselections=["backbone and (resid 1-29 or resid 60-121 or resid 160-214)", # CORE 88 "backbone and resid 122-159", # LID 89 "backbone and resid 30-59"], # NMP 90 filename="rmsd_all_CORE_LID_NMP.dat") 91 R.run() 92 93 import matplotlib.pyplot as plt 94 rmsd = R.rmsd.T # transpose makes it easier for plotting 95 time = rmsd[1] 96 fig = plt.figure(figsize=(4,4)) 97 ax = fig.add_subplot(111) 98 ax.plot(time, rmsd[2], 'k-', label="all") 99 ax.plot(time, rmsd[3], 'k--', label="CORE") 100 ax.plot(time, rmsd[4], 'r--', label="LID") 101 ax.plot(time, rmsd[5], 'b--', label="NMP") 102 ax.legend(loc="best") 103 ax.set_xlabel("time (ps)") 104 ax.set_ylabel(r"RMSD ($\\AA$)") 105 fig.savefig("rmsd_all_CORE_LID_NMP_ref1AKE.pdf") 106 107 108Functions 109--------- 110 111.. autofunction:: rmsd 112 113Analysis classes 114---------------- 115 116.. autoclass:: RMSD 117 :members: 118 :inherited-members: 119 120 .. attribute:: rmsd 121 122 Contains the time series of the RMSD as an N×3 :class:`numpy.ndarray` 123 array with content ``[[frame, time (ps), RMSD (A)], [...], ...]``. 124 125.. autoclass:: RMSF 126 :members: 127 :inherited-members: 128 129 .. attribute:: rmsf 130 131 Results are stored in this N-length :class:`numpy.ndarray` array, 132 giving RMSFs for each of the given atoms. 133 134""" 135from __future__ import division, absolute_import 136 137from six.moves import zip 138from six import string_types 139 140import numpy as np 141 142import logging 143import warnings 144 145import MDAnalysis.lib.qcprot as qcp 146from MDAnalysis.analysis.base import AnalysisBase 147from MDAnalysis.exceptions import SelectionError, NoDataError 148from MDAnalysis.lib.log import ProgressMeter 149from MDAnalysis.lib.util import asiterable, iterable, get_weights, deprecate 150 151 152logger = logging.getLogger('MDAnalysis.analysis.rmsd') 153 154 155def rmsd(a, b, weights=None, center=False, superposition=False): 156 r"""Returns RMSD between two coordinate sets `a` and `b`. 157 158 `a` and `b` are arrays of the coordinates of N atoms of shape 159 :math:`N times 3` as generated by, e.g., 160 :meth:`MDAnalysis.core.groups.AtomGroup.positions`. 161 162 Note 163 ---- 164 If you use trajectory data from simulations performed under **periodic 165 boundary conditions** then you *must make your molecules whole* before 166 performing RMSD calculations so that the centers of mass of the mobile and 167 reference structure are properly superimposed. 168 169 170 Parameters 171 ---------- 172 a : array_like 173 coordinates to align to `b` 174 b : array_like 175 coordinates to align to (same shape as `a`) 176 weights : array_like (optional) 177 1D array with weights, use to compute weighted average 178 center : bool (optional) 179 subtract center of geometry before calculation. With weights given 180 compute weighted average as center. 181 superposition : bool (optional) 182 perform a rotational and translational superposition with the fast QCP 183 algorithm [Theobald2005]_ before calculating the RMSD; implies 184 ``center=True``. 185 186 Returns 187 ------- 188 rmsd : float 189 RMSD between `a` and `b` 190 191 Notes 192 ----- 193 The RMSD :math:`\rho(t)` as a function of time is calculated as 194 195 .. math:: 196 197 \rho(t) = \sqrt{\frac{1}{N} \sum_{i=1}^N w_i \left(\mathbf{x}_i(t) 198 - \mathbf{x}_i^{\text{ref}}\right)^2} 199 200 It is the Euclidean distance in configuration space of the current 201 configuration (possibly after optimal translation and rotation) from a 202 reference configuration divided by :math:`1/\sqrt{N}` where :math:`N` is 203 the number of coordinates. 204 205 The weights :math:`w_i` are calculated from the input weights 206 `weights` :math:`w'_i` as relative to the mean: 207 208 .. math:: 209 210 w_i = \frac{w'_i}{\langle w' \rangle} 211 212 213 Example 214 ------- 215 >>> u = Universe(PSF,DCD) 216 >>> bb = u.select_atoms('backbone') 217 >>> A = bb.positions.copy() # coordinates of first frame 218 >>> u.trajectory[-1] # forward to last frame 219 >>> B = bb.positions.copy() # coordinates of last frame 220 >>> rmsd(A, B, center=True) 221 3.9482355416565049 222 223 .. versionchanged: 0.8.1 224 *center* keyword added 225 .. versionchanged: 0.14.0 226 *superposition* keyword added 227 228 """ 229 230 a = np.asarray(a, dtype=np.float64) 231 b = np.asarray(b, dtype=np.float64) 232 N = b.shape[0] 233 234 if a.shape != b.shape: 235 raise ValueError('a and b must have same shape') 236 237 # superposition only works if structures are centered 238 if center or superposition: 239 # make copies (do not change the user data!) 240 # weights=None is equivalent to all weights 1 241 a = a - np.average(a, axis=0, weights=weights) 242 b = b - np.average(b, axis=0, weights=weights) 243 244 if weights is not None: 245 if len(weights) != len(a): 246 raise ValueError('weights must have same length as a and b') 247 # weights are constructed as relative to the mean 248 weights = np.asarray(weights, dtype=np.float64) / np.mean(weights) 249 250 if superposition: 251 return qcp.CalcRMSDRotationalMatrix(a, b, N, None, weights) 252 else: 253 if weights is not None: 254 return np.sqrt(np.sum(weights[:, np.newaxis] 255 * ((a - b) ** 2)) / N) 256 else: 257 return np.sqrt(np.sum((a - b) ** 2) / N) 258 259 260def process_selection(select): 261 """Return a canonical selection dictionary. 262 263 Parameters 264 ---------- 265 select : str or tuple or dict 266 267 - `str` -> Any valid string selection 268 - `dict` -> ``{'mobile':sel1, 'reference':sel2}`` 269 - `tuple` -> ``(sel1, sel2)`` 270 271 Returns 272 ------- 273 dict 274 selections for 'reference' and 'mobile'. Values are guarenteed to be 275 iterable (so that one can provide selections to retain order) 276 277 Notes 278 ----- 279 The dictionary input for `select` can be generated by 280 :func:`fasta2select` based on a ClustalW_ or STAMP_ sequence alignment. 281 """ 282 283 if isinstance(select, string_types): 284 select = {'reference': str(select), 'mobile': str(select)} 285 elif type(select) is tuple: 286 try: 287 select = {'mobile': select[0], 'reference': select[1]} 288 except IndexError: 289 raise IndexError("select must contain two selection strings " 290 "(reference, mobile)") 291 elif type(select) is dict: 292 # compatability hack to use new nomenclature 293 try: 294 select['mobile'] 295 select['reference'] 296 except KeyError: 297 raise KeyError("select dictionary must contain entries for keys " 298 "'mobile' and 'reference'.") 299 else: 300 raise TypeError("'select' must be either a string, 2-tuple, or dict") 301 select['mobile'] = asiterable(select['mobile']) 302 select['reference'] = asiterable(select['reference']) 303 return select 304 305 306class RMSD(AnalysisBase): 307 r"""Class to perform RMSD analysis on a trajectory. 308 309 The RMSD will be computed for two groups of atoms and all frames in the 310 trajectory belonging to `atomgroup`. The groups of atoms are obtained by 311 applying the selection selection `select` to the changing `atomgroup` and 312 the fixed `reference`. 313 314 Note 315 ---- 316 If you use trajectory data from simulations performed under **periodic 317 boundary conditions** then you *must make your molecules whole* before 318 performing RMSD calculations so that the centers of mass of the selected 319 and reference structure are properly superimposed. 320 321 322 Run the analysis with :meth:`RMSD.run`, which stores the results 323 in the array :attr:`RMSD.rmsd`. 324 325 """ 326 def __init__(self, atomgroup, reference=None, select='all', 327 groupselections=None, filename="rmsd.dat", 328 weights=None, tol_mass=0.1, ref_frame=0, **kwargs): 329 # DEPRECATION: remove filename kwarg in 1.0 330 r"""Parameters 331 ---------- 332 atomgroup : AtomGroup or Universe 333 Group of atoms for which the RMSD is calculated. If a trajectory is 334 associated with the atoms then the computation iterates over the 335 trajectory. 336 reference : AtomGroup or Universe (optional) 337 Group of reference atoms; if ``None`` then the current frame of 338 `atomgroup` is used. 339 select : str or dict or tuple (optional) 340 The selection to operate on; can be one of: 341 342 1. any valid selection string for 343 :meth:`~MDAnalysis.core.groups.AtomGroup.select_atoms` that 344 produces identical selections in `atomgroup` and `reference`; or 345 346 2. a dictionary ``{'mobile': sel1, 'reference': sel2}`` where *sel1* 347 and *sel2* are valid selection strings that are applied to 348 `atomgroup` and `reference` respectively (the 349 :func:`MDAnalysis.analysis.align.fasta2select` function returns such 350 a dictionary based on a ClustalW_ or STAMP_ sequence alignment); or 351 352 3. a tuple ``(sel1, sel2)`` 353 354 When using 2. or 3. with *sel1* and *sel2* then these selection strings 355 are applied to `atomgroup` and `reference` respectively and should 356 generate *groups of equivalent atoms*. *sel1* and *sel2* can each also 357 be a *list of selection strings* to generate a 358 :class:`~MDAnalysis.core.groups.AtomGroup` with defined atom order as 359 described under :ref:`ordered-selections-label`). 360 361 groupselections : list (optional) 362 A list of selections as described for `select`, with the difference 363 that these selections are *always applied to the full universes*, 364 i.e., ``atomgroup.universe.select_atoms(sel1)`` and 365 ``reference.universe.select_atoms(sel2)``. Each selection describes 366 additional RMSDs to be computed *after the structures have been 367 superimposed* according to `select`. No additional fitting is 368 performed.The output contains one additional column for each 369 selection. 370 371 .. Note:: Experimental feature. Only limited error checking 372 implemented. 373 filename : str (optional) 374 write RMSD into file with :meth:`RMSD.save` 375 376 .. deprecated:; 0.19.0 377 `filename` will be removed together with :meth:`save` in 1.0. 378 379 weights : {"mass", ``None``} or array_like (optional) 380 choose weights. With ``"mass"`` uses masses as weights; with ``None`` 381 weigh each atom equally. If a float array of the same length as 382 `atomgroup` is provided, use each element of the `array_like` as a 383 weight for the corresponding atom in `atomgroup`. 384 tol_mass : float (optional) 385 Reject match if the atomic masses for matched atoms differ by more 386 than `tol_mass`. 387 ref_frame : int (optional) 388 frame index to select frame from `reference` 389 verbose : bool (optional) 390 Show detailed progress of the calculation if set to ``True``; the 391 default is ``False``. 392 393 Raises 394 ------ 395 SelectionError 396 If the selections from `atomgroup` and `reference` do not match. 397 TypeError 398 If `weights` is not of the appropriate type; see also 399 :func:`MDAnalysis.lib.util.get_weights` 400 ValueError 401 If `weights` are not compatible with `atomgroup` (not the same 402 length) or if it is not a 1D array (see 403 :func:`MDAnalysis.lib.util.get_weights`). 404 405 A :exc:`ValueError` is also raised if `weights` are not compatible 406 with `groupselections`: only equal weights (``weights=None``) or 407 mass-weighted (``weights="mass"``) are supported for additional 408 `groupselections`. 409 410 Notes 411 ----- 412 The root mean square deviation :math:`\rho(t)` of a group of :math:`N` 413 atoms relative to a reference structure as a function of time is 414 calculated as 415 416 .. math:: 417 418 \rho(t) = \sqrt{\frac{1}{N} \sum_{i=1}^N w_i \left(\mathbf{x}_i(t) 419 - \mathbf{x}_i^{\text{ref}}\right)^2} 420 421 The weights :math:`w_i` are calculated from the input weights `weights` 422 :math:`w'_i` as relative to the mean of the input weights: 423 424 .. math:: 425 426 w_i = \frac{w'_i}{\langle w' \rangle} 427 428 The selected coordinates from `atomgroup` are optimally superimposed 429 (translation and rotation) on the `reference` coordinates at each time step 430 as to minimize the RMSD. Douglas Theobald's fast QCP algorithm 431 [Theobald2005]_ is used for the rotational superposition and to calculate 432 the RMSD (see :mod:`MDAnalysis.lib.qcprot` for implementation details). 433 434 The class runs various checks on the input to ensure that the two atom 435 groups can be compared. This includes a comparison of atom masses (i.e., 436 only the positions of atoms of the same mass will be considered to be 437 correct for comparison). If masses should not be checked, just set 438 `tol_mass` to a large value such as 1000. 439 440 .. _ClustalW: http://www.clustal.org/ 441 .. _STAMP: http://www.compbio.dundee.ac.uk/manuals/stamp.4.2/ 442 443 444 See Also 445 -------- 446 rmsd 447 448 449 .. versionadded:: 0.7.7 450 .. versionchanged:: 0.8 451 `groupselections` added 452 .. versionchanged:: 0.16.0 453 Flexible weighting scheme with new `weights` keyword. 454 .. deprecated:: 0.16.0 455 Instead of ``mass_weighted=True`` (removal in 0.17.0) use new 456 ``weights='mass'``; refactored to fit with AnalysisBase API 457 .. versionchanged:: 0.17.0 458 removed deprecated `mass_weighted` keyword; `groupselections` 459 are *not* rotationally superimposed any more. 460 .. deprecated:: 0.19.0 461 `filename` will be removed in 1.0 462 463 """ 464 super(RMSD, self).__init__(atomgroup.universe.trajectory, 465 **kwargs) 466 self.atomgroup = atomgroup 467 self.reference = reference if reference is not None else self.atomgroup 468 469 select = process_selection(select) 470 self.groupselections = ([process_selection(s) for s in groupselections] 471 if groupselections is not None else []) 472 self.weights = weights 473 self.tol_mass = tol_mass 474 self.ref_frame = ref_frame 475 self.filename = filename # DEPRECATED in 0.19.0, remove in 1.0.0 476 477 self.ref_atoms = self.reference.select_atoms(*select['reference']) 478 self.mobile_atoms = self.atomgroup.select_atoms(*select['mobile']) 479 480 if len(self.ref_atoms) != len(self.mobile_atoms): 481 err = ("Reference and trajectory atom selections do " 482 "not contain the same number of atoms: " 483 "N_ref={0:d}, N_traj={1:d}".format(self.ref_atoms.n_atoms, 484 self.mobile_atoms.n_atoms)) 485 logger.exception(err) 486 raise SelectionError(err) 487 logger.info("RMS calculation " 488 "for {0:d} atoms.".format(len(self.ref_atoms))) 489 mass_mismatches = (np.absolute((self.ref_atoms.masses - 490 self.mobile_atoms.masses)) > 491 self.tol_mass) 492 493 if np.any(mass_mismatches): 494 # diagnostic output: 495 logger.error("Atoms: reference | mobile") 496 for ar, at in zip(self.ref_atoms, self.mobile_atoms): 497 if ar.name != at.name: 498 logger.error("{0!s:>4} {1:3d} {2!s:>3} {3!s:>3} {4:6.3f}" 499 "| {5!s:>4} {6:3d} {7!s:>3} {8!s:>3}" 500 "{9:6.3f}".format(ar.segid, ar.resid, 501 ar.resname, ar.name, 502 ar.mass, at.segid, at.resid, 503 at.resname, at.name, 504 at.mass)) 505 errmsg = ("Inconsistent selections, masses differ by more than" 506 "{0:f}; mis-matching atoms" 507 "are shown above.".format(self.tol_mass)) 508 logger.error(errmsg) 509 raise SelectionError(errmsg) 510 del mass_mismatches 511 512 # TODO: 513 # - make a group comparison a class that contains the checks above 514 # - use this class for the *select* group and the additional 515 # *groupselections* groups each a dict with reference/mobile 516 self._groupselections_atoms = [ 517 { 518 'reference': self.reference.universe.select_atoms(*s['reference']), 519 'mobile': self.atomgroup.universe.select_atoms(*s['mobile']), 520 } 521 for s in self.groupselections] 522 # sanity check 523 for igroup, (sel, atoms) in enumerate(zip(self.groupselections, 524 self._groupselections_atoms)): 525 if len(atoms['mobile']) != len(atoms['reference']): 526 logger.exception('SelectionError: Group Selection') 527 raise SelectionError( 528 "Group selection {0}: {1} | {2}: Reference and trajectory " 529 "atom selections do not contain the same number of atoms: " 530 "N_ref={3}, N_traj={4}".format( 531 igroup, sel['reference'], sel['mobile'], 532 len(atoms['reference']), len(atoms['mobile']))) 533 534 # Explicitly check for "mass" because this option CAN 535 # be used with groupselection. (get_weights() returns the mass array 536 # for "mass") 537 if not iterable(self.weights) and self.weights == "mass": 538 pass 539 else: 540 self.weights = get_weights(self.mobile_atoms, self.weights) 541 542 # cannot use arbitrary weight array (for superposition) with 543 # groupselections because arrays will not match 544 if (len(self.groupselections) > 0 and ( 545 iterable(self.weights) or self.weights not in ("mass", None))): 546 raise ValueError("groupselections can only be combined with " 547 "weights=None or weights='mass', not a weight " 548 "array.") 549 550 # initialized to note for testing the save function 551 self.rmsd = None 552 553 554 def _prepare(self): 555 self._n_atoms = self.mobile_atoms.n_atoms 556 557 if not iterable(self.weights) and self.weights == 'mass': 558 self.weights = self.ref_atoms.masses 559 if self.weights is not None: 560 self.weights = np.asarray(self.weights, dtype=np.float64) / np.mean(self.weights) 561 562 current_frame = self.reference.universe.trajectory.ts.frame 563 564 try: 565 # Move to the ref_frame 566 # (coordinates MUST be stored in case the ref traj is advanced 567 # elsewhere or if ref == mobile universe) 568 self.reference.universe.trajectory[self.ref_frame] 569 self._ref_com = self.ref_atoms.center(self.weights) 570 # makes a copy 571 self._ref_coordinates = self.ref_atoms.positions - self._ref_com 572 if self._groupselections_atoms: 573 self._groupselections_ref_coords64 = [(self.reference. 574 select_atoms(*s['reference']). 575 positions.astype(np.float64)) for s in 576 self.groupselections] 577 finally: 578 # Move back to the original frame 579 self.reference.universe.trajectory[current_frame] 580 581 self._ref_coordinates64 = self._ref_coordinates.astype(np.float64) 582 583 if self._groupselections_atoms: 584 # Only carry out a rotation if we want to calculate secondary 585 # RMSDs. 586 # R: rotation matrix that aligns r-r_com, x~-x~com 587 # (x~: selected coordinates, x: all coordinates) 588 # Final transformed traj coordinates: x' = (x-x~_com)*R + ref_com 589 self._rot = np.zeros(9, dtype=np.float64) # allocate space 590 self._R = self._rot.reshape(3, 3) 591 else: 592 self._rot = None 593 594 self.rmsd = np.zeros((self.n_frames, 595 3 + len(self._groupselections_atoms))) 596 597 self._pm.format = ("RMSD {rmsd:5.2f} A at frame " 598 "{step:5d}/{numsteps} [{percentage:5.1f}%]\r") 599 self._mobile_coordinates64 = self.mobile_atoms.positions.copy().astype(np.float64) 600 601 def _single_frame(self): 602 mobile_com = self.mobile_atoms.center(self.weights).astype(np.float64) 603 self._mobile_coordinates64[:] = self.mobile_atoms.positions 604 self._mobile_coordinates64 -= mobile_com 605 606 self.rmsd[self._frame_index, :2] = self._ts.frame, self._trajectory.time 607 608 if self._groupselections_atoms: 609 # superimpose structures: MDAnalysis qcprot needs Nx3 coordinate 610 # array with float64 datatype (float32 leads to errors up to 1e-3 in 611 # RMSD). Note that R is defined in such a way that it acts **to the 612 # left** so that we can easily use broadcasting and save one 613 # expensive numpy transposition. 614 615 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix( 616 self._ref_coordinates64, self._mobile_coordinates64, 617 self._n_atoms, self._rot, self.weights) 618 619 self._R[:, :] = self._rot.reshape(3, 3) 620 # Transform each atom in the trajectory (use inplace ops to 621 # avoid copying arrays) (Marginally (~3%) faster than 622 # "ts.positions[:] = (ts.positions - x_com) * R + ref_com".) 623 self._ts.positions[:] -= mobile_com 624 625 # R acts to the left & is broadcasted N times. 626 self._ts.positions[:] = np.dot(self._ts.positions, self._R) 627 self._ts.positions += self._ref_com 628 629 # 2) calculate secondary RMSDs (without any further 630 # superposition) 631 for igroup, (refpos, atoms) in enumerate( 632 zip(self._groupselections_ref_coords64, 633 self._groupselections_atoms), 3): 634 self.rmsd[self._frame_index, igroup] = rmsd( 635 refpos, atoms['mobile'].positions, 636 weights=self.weights, 637 center=False, superposition=False) 638 else: 639 # only calculate RMSD by setting the Rmatrix to None (no need 640 # to carry out the rotation as we already get the optimum RMSD) 641 self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix( 642 self._ref_coordinates64, self._mobile_coordinates64, 643 self._n_atoms, None, self.weights) 644 645 self._pm.rmsd = self.rmsd[self._frame_index, 2] 646 647 @deprecate(release="0.19.0", remove="1.0.0", 648 message="You can instead use " 649 "``np.savetxt(filename, RMSD.rmsd)``.") 650 def save(self, filename=None): 651 """Save RMSD from :attr:`RMSD.rmsd` to text file *filename*. 652 653 Parameters 654 ---------- 655 filename : str (optional) 656 if no filename is given the default provided to the constructor is 657 used. 658 659 """ 660 filename = filename or self.filename 661 if filename is not None: 662 if self.rmsd is None: 663 raise NoDataError("rmsd has not been calculated yet") 664 np.savetxt(filename, self.rmsd) 665 logger.info("Wrote RMSD timeseries to file %r", filename) 666 return filename 667 668 669class RMSF(AnalysisBase): 670 r"""Calculate RMSF of given atoms across a trajectory. 671 672 Note 673 ---- 674 No RMSD-superposition is performed; it is assumed that the user is 675 providing a trajectory where the protein of interest has been structurally 676 aligned to a reference structure (see the Examples section below). The 677 protein also has be whole because periodic boundaries are not taken into 678 account. 679 680 681 Run the analysis with :meth:`RMSF.run`, which stores the results 682 in the array :attr:`RMSF.rmsf`. 683 684 """ 685 def __init__(self, atomgroup, **kwargs): 686 r"""Parameters 687 ---------- 688 atomgroup : AtomGroup 689 Atoms for which RMSF is calculated 690 start : int (optional) 691 starting frame, default None becomes 0. 692 stop : int (optional) 693 Frame index to stop analysis. Default: None becomes 694 n_frames. Iteration stops *before* this frame number, 695 which means that the trajectory would be read until the end. 696 step : int (optional) 697 step between frames, default None becomes 1. 698 verbose : bool (optional) 699 Show detailed progress of the calculation if set to ``True``; the 700 default is ``False``. 701 702 Raises 703 ------ 704 ValueError 705 raised if negative values are calculated, which indicates that a 706 numerical overflow or underflow occured 707 708 709 Notes 710 ----- 711 The root mean square fluctuation of an atom :math:`i` is computed as the 712 time average 713 714 .. math:: 715 716 \rho_i = \sqrt{\left\langle (\mathbf{x}_i - \langle\mathbf{x}_i\rangle)^2 \right\rangle} 717 718 No mass weighting is performed. 719 720 This method implements an algorithm for computing sums of squares while 721 avoiding overflows and underflows [Welford1962]_. 722 723 724 Examples 725 -------- 726 727 In this example we calculate the residue RMSF fluctuations by analyzing 728 the :math:`\text{C}_\alpha` atoms. First we need to fit the trajectory 729 to the average structure as a reference. That requires calculating the 730 average structure first. Because we need to analyze and manipulate the 731 same trajectory multiple times, we are going to load it into memory 732 using the :mod:`~MDAnalysis.coordinates.MemoryReader`. (If your 733 trajectory does not fit into memory, you will need to :ref:`write out 734 intermediate trajectories <writing-trajectories>` to disk or 735 :ref:`generate an in-memory universe 736 <creating-in-memory-trajectory-label>` that only contains, say, the 737 protein):: 738 739 import MDAnalysis as mda 740 from MDAnalysis.analysis import align 741 742 from MDAnalysis.tests.datafiles import TPR, XTC 743 744 u = mda.Universe(TPR, XTC, in_memory=True) 745 protein = u.select_atoms("protein") 746 747 # 1) need a step to center and make whole: this trajectory 748 # contains the protein being split across periodic boundaries 749 # 750 # TODO 751 752 # 2) fit to the initial frame to get a better average structure 753 # (the trajectory is changed in memory) 754 prealigner = align.AlignTraj(u, select="protein and name CA", in_memory=True).run() 755 756 # 3) reference = average structure 757 reference_coordinates = u.trajectory.timeseries(asel=protein).mean(axis=1) 758 # make a reference structure (need to reshape into a 1-frame "trajectory") 759 reference = mda.Merge(protein).load_new( 760 reference_coordinates[:, None, :], order="afc") 761 762 We created a new universe ``reference`` that contains a single frame 763 with the averaged coordinates of the protein. Now we need to fit the 764 whole trajectory to the reference by minimizing the RMSD. We use 765 :class:`MDAnalysis.analysis.align.AlignTraj`:: 766 767 aligner = align.AlignTraj(u, reference, select="protein and name CA", in_memory=True).run() 768 769 The trajectory is now fitted to the reference (the RMSD is stored as 770 `aligner.rmsd` for further inspection). Now we can calculate the RMSF:: 771 772 from MDAnalysis.analysis.rms import RMSF 773 774 calphas = protein.select_atoms("name CA") 775 rmsfer = RMSF(calphas, verbose=True).run() 776 777 and plot:: 778 779 import matplotlib.pyplot as plt 780 781 plt.plot(calphas.resnums, rmsfer.rmsf) 782 783 784 785 References 786 ---------- 787 .. [Welford1962] B. P. Welford (1962). "Note on a Method for 788 Calculating Corrected Sums of Squares and Products." Technometrics 789 4(3):419-420. 790 791 792 .. versionadded:: 0.11.0 793 .. versionchanged:: 0.16.0 794 refactored to fit with AnalysisBase API 795 .. deprecated:: 0.16.0 796 the keyword argument `quiet` is deprecated in favor of `verbose`. 797 .. versionchanged:: 0.17.0 798 removed unused keyword `weights` 799 800 """ 801 super(RMSF, self).__init__(atomgroup.universe.trajectory, **kwargs) 802 self.atomgroup = atomgroup 803 804 def _prepare(self): 805 self.sumsquares = np.zeros((self.atomgroup.n_atoms, 3)) 806 self.mean = self.sumsquares.copy() 807 808 def _single_frame(self): 809 k = self._frame_index 810 self.sumsquares += (k / (k+1.0)) * (self.atomgroup.positions - self.mean) ** 2 811 self.mean = (k * self.mean + self.atomgroup.positions) / (k + 1) 812 813 def _conclude(self): 814 k = self._frame_index 815 self.rmsf = np.sqrt(self.sumsquares.sum(axis=1) / (k + 1)) 816 817 if not (self.rmsf >= 0).all(): 818 raise ValueError("Some RMSF values negative; overflow " + 819 "or underflow occurred") 820