1# ----------------------------------------------------------------------------- 2# Author(s) of Original Implementation: 3# Douglas L. Theobald 4# Department of Biochemistry 5# MS 009 6# Brandeis University 7# 415 South St 8# Waltham, MA 02453 9# USA 10# 11# dtheobald@brandeis.edu 12# 13# Pu Liu 14# Johnson & Johnson Pharmaceutical Research and Development, L.L.C. 15# 665 Stockton Drive 16# Exton, PA 19341 17# USA 18# 19# pliu24@its.jnj.com 20# 21# For the original code written in C see: 22# http://theobald.brandeis.edu/qcp/ 23# 24# 25# Author of Python Port: 26# Joshua L. Adelman 27# Department of Biological Sciences 28# University of Pittsburgh 29# Pittsburgh, PA 15260 30# 31# jla65@pitt.edu 32# 33# 34# If you use this QCP rotation calculation method in a publication, please 35# reference: 36# 37# Douglas L. Theobald (2005) 38# "Rapid calculation of RMSD using a quaternion-based characteristic 39# polynomial." 40# Acta Crystallographica A 61(4):478-480. 41# 42# Pu Liu, Dmitris K. Agrafiotis, and Douglas L. Theobald (2010) 43# "Fast determination of the optimal rotational matrix for macromolecular 44# superpositions." 45# J. Comput. Chem. 31, 1561-1563. 46# 47# 48# Copyright (c) 2009-2010, Pu Liu and Douglas L. Theobald 49# Copyright (c) 2011 Joshua L. Adelman 50# Copyright (c) 2016 Robert R. Delgado 51# All rights reserved. 52# 53# Redistribution and use in source and binary forms, with or without modification, are permitted 54# provided that the following conditions are met: 55# 56# * Redistributions of source code must retain the above copyright notice, this list of 57# conditions and the following disclaimer. 58# * Redistributions in binary form must reproduce the above copyright notice, this list 59# of conditions and the following disclaimer in the documentation and/or other materials 60# provided with the distribution. 61# * Neither the name of the <ORGANIZATION> nor the names of its contributors may be used to 62# endorse or promote products derived from this software without specific prior written 63# permission. 64# 65# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 66# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 67# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 68# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 69# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 70# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 71# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 72# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 73# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 74# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 75# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 76# ----------------------------------------------------------------------------- 77 78""" 79Fast QCP RMSD structure alignment --- :mod:`MDAnalysis.lib.qcprot` 80================================================================== 81 82:Author: Joshua L. Adelman, University of Pittsburgh 83:Author: Robert R. Delgado, Cornell University and Arizona State University 84:Contact: jla65@pitt.edu 85:Year: 2011, 2016 86:Licence: BSD 87 88PyQCPROT_ is a python/cython implementation of Douglas Theobald's QCP 89method for calculating the minimum RMSD between two structures 90[Theobald2005]_ and determining the optimal least-squares rotation 91matrix [Liu2010]_. 92 93A full description of the method, along with the original C implementation can 94be found at http://theobald.brandeis.edu/qcp/ 95 96See Also 97-------- 98MDAnalysis.analysis.align: 99 Align structures using :func:`CalcRMSDRotationalMatrix` 100MDAnalysis.analysis.rms.rmsd: 101 Calculate the RMSD between two structures using 102 :func:`CalcRMSDRotationalMatrix` 103 104 105.. versionchanged:: 0.16.0 106 Call signatures were changed to directly interface with MDAnalysis 107 coordinate arrays: shape (N, 3) 108 109References 110---------- 111 112If you use this QCP rotation calculation method in a publication, please 113reference: 114 115.. [Theobald2005] Douglas L. Theobald (2005) 116 "Rapid calculation of RMSD using a quaternion-based characteristic 117 polynomial." Acta Crystallographica A 61(4):478-480. 118 119.. [Liu2010] Pu Liu, Dmitris K. Agrafiotis, and Douglas L. Theobald (2010) 120 "Fast determination of the optimal rotational matrix for macromolecular 121 superpositions." J. Comput. Chem. 31, 1561-1563. 122 123.. _PyQCPROT: https://github.com/synapticarbors/pyqcprot 124 125 126Functions 127--------- 128 129Users will typically use the :func:`CalcRMSDRotationalMatrix` function. 130 131.. autofunction:: CalcRMSDRotationalMatrix 132 133.. autofunction:: InnerProduct 134 135.. autofunction:: FastCalcRMSDAndRotation 136 137""" 138 139import numpy as np 140cimport numpy as np 141 142from ..due import due, BibTeX, Doi 143 144# providing DOI for this citation doesnt seem to work (as of 22/04/18) 145_QCBIB = """\ 146@article{qcprot2, 147author = {Pu Liu and Dimitris K. Agrafiotis and Douglas L. Theobald}, 148title = {Fast determination of the optimal rotational matrix for macromolecular superpositions}, 149journal = {Journal of Computational Chemistry}, 150volume = {31}, 151number = {7}, 152pages = {1561-1563}, 153doi = {10.1002/jcc.21439}, 154} 155""" 156 157due.cite(Doi("10.1107/s0108767305015266"), 158 description="QCProt implementation", 159 path="MDAnalysis.lib.qcprot", 160 cite_module=True) 161due.cite(BibTeX(_QCBIB), 162 description="QCProt implementation", 163 path="MDAnalysis.lib.qcprot", 164 cite_module=True) 165 166 167import cython 168 169cdef extern from "math.h": 170 double sqrt(double x) 171 double fabs(double x) 172 173@cython.boundscheck(False) 174@cython.wraparound(False) 175def InnerProduct(np.ndarray[np.float64_t, ndim=1] A, 176 np.ndarray[np.float64_t, ndim=2] coords1, 177 np.ndarray[np.float64_t, ndim=2] coords2, 178 int N, 179 np.ndarray[np.float64_t, ndim=1] weight): 180 """Calculate the inner product of two structures. 181 182 Parameters 183 ---------- 184 A : ndarray np.float64_t 185 result inner product array, modified in place 186 coords1 : ndarray np.float64_t 187 reference structure 188 coord2 : ndarray np.float64_t 189 candidate structure 190 N : int 191 size of system 192 weights : ndarray np.float64_t (optional) 193 use to calculate weighted inner product 194 195 196 197 Returns 198 ------- 199 E0 : float 200 0.5 * (G1 + G2), can be used as input for :func:`FastCalcRMSDAndRotation` 201 202 Notes 203 ----- 204 1. You MUST center the structures, coords1 and coords2, before calling this 205 function. 206 207 2. Coordinates are stored as Nx3 arrays (as everywhere else in MDAnalysis). 208 209 .. versionchanged:: 0.16.0 210 Array size changed from 3xN to Nx3. 211 """ 212 213 cdef double x1, x2, y1, y2, z1, z2 214 cdef unsigned int i 215 cdef double G1, G2 216 217 G1 = 0.0 218 G2 = 0.0 219 220 A[0] = A[1] = A[2] = A[3] = A[4] = A[5] = A[6] = A[7] = A[8] = 0.0 221 222 if (weight is not None): 223 for i in range(N): 224 x1 = weight[i] * coords1[i, 0] 225 y1 = weight[i] * coords1[i, 1] 226 z1 = weight[i] * coords1[i, 2] 227 228 G1 += x1 * coords1[i, 0] + y1 * coords1[i, 1] + z1 * coords1[i, 2] 229 230 x2 = coords2[i, 0] 231 y2 = coords2[i, 1] 232 z2 = coords2[i, 2] 233 234 G2 += weight[i] * (x2 * x2 + y2 * y2 + z2 * z2) 235 236 A[0] += (x1 * x2) 237 A[1] += (x1 * y2) 238 A[2] += (x1 * z2) 239 240 A[3] += (y1 * x2) 241 A[4] += (y1 * y2) 242 A[5] += (y1 * z2) 243 244 A[6] += (z1 * x2) 245 A[7] += (z1 * y2) 246 A[8] += (z1 * z2) 247 248 else: 249 for i in range(N): 250 x1 = coords1[i, 0] 251 y1 = coords1[i, 1] 252 z1 = coords1[i, 2] 253 254 G1 += (x1 * x1 + y1 * y1 + z1 * z1) 255 256 x2 = coords2[i, 0] 257 y2 = coords2[i, 1] 258 z2 = coords2[i, 2] 259 260 G2 += (x2 * x2 + y2 * y2 + z2 * z2) 261 262 A[0] += (x1 * x2) 263 A[1] += (x1 * y2) 264 A[2] += (x1 * z2) 265 266 A[3] += (y1 * x2) 267 A[4] += (y1 * y2) 268 A[5] += (y1 * z2) 269 270 A[6] += (z1 * x2) 271 A[7] += (z1 * y2) 272 A[8] += (z1 * z2) 273 274 return (G1 + G2) * 0.5 275 276@cython.boundscheck(False) 277@cython.wraparound(False) 278def CalcRMSDRotationalMatrix(np.ndarray[np.float64_t, ndim=2] ref, 279 np.ndarray[np.float64_t, ndim=2] conf, 280 int N, 281 np.ndarray[np.float64_t, ndim=1] rot, 282 np.ndarray[np.float64_t, ndim=1] weights): 283 """ 284 Calculate the RMSD & rotational matrix. 285 286 Parameters 287 ---------- 288 ref : ndarray, np.float64_t 289 reference structure coordinates 290 conf : ndarray, np.float64_t 291 condidate structure coordinates 292 N : int 293 size of the system 294 rot : ndarray, np.float64_t 295 array to store rotation matrix. Must be flat 296 weights : ndarray, npfloat64_t (optional) 297 weights for each component 298 299 Returns 300 ------- 301 rmsd : float 302 RMSD value 303 304 .. versionchanged:: 0.16.0 305 Array size changed from 3xN to Nx3. 306 """ 307 cdef double E0 308 cdef np.ndarray[np.float64_t, ndim = 1] A = np.zeros(9,dtype = np.float64) 309 310 E0 = InnerProduct(A, conf, ref, N, weights) 311 return FastCalcRMSDAndRotation(rot, A, E0, N) 312 313def FastCalcRMSDAndRotation(np.ndarray[np.float64_t, ndim=1] rot, 314 np.ndarray[np.float64_t, ndim=1] A, 315 double E0, int N): 316 """ 317 Calculate the RMSD, and/or the optimal rotation matrix. 318 319 Parameters 320 ---------- 321 rot : ndarray np.float64_t 322 result rotation matrix, modified inplace 323 A : ndarray np.float64_t 324 the inner product of two structures 325 E0 : float64 326 0.5 * (G1 + G2) 327 N : int 328 size of the system 329 330 Returns 331 ------- 332 rmsd : float 333 RMSD value for two structures 334 335 336 .. versionchanged:: 0.16.0 337 Array sized changed from 3xN to Nx3. 338 """ 339 cdef double rmsd 340 cdef double Sxx, Sxy, Sxz, Syx, Syy, Syz, Szx, Szy, Szz 341 cdef double Szz2, Syy2, Sxx2, Sxy2, Syz2, Sxz2, Syx2, Szy2, Szx2, 342 cdef double SyzSzymSyySzz2, Sxx2Syy2Szz2Syz2Szy2, Sxy2Sxz2Syx2Szx2, 343 cdef double SxzpSzx, SyzpSzy, SxypSyx, SyzmSzy, 344 cdef double SxzmSzx, SxymSyx, SxxpSyy, SxxmSyy 345 346 cdef np.ndarray[np.float64_t, ndim=1] C = np.zeros(4,) 347 cdef unsigned int i 348 cdef double mxEigenV 349 cdef double oldg = 0.0 350 cdef double b, a, delta, rms, qsqr 351 cdef double q1, q2, q3, q4, normq 352 cdef double a11, a12, a13, a14, a21, a22, a23, a24 353 cdef double a31, a32, a33, a34, a41, a42, a43, a44 354 cdef double a2, x2, y2, z2 355 cdef double xy, az, zx, ay, yz, ax 356 cdef double a3344_4334, a3244_4234, a3243_4233, a3143_4133,a3144_4134, a3142_4132 357 cdef double evecprec = 1e-6 358 cdef double evalprec = 1e-14 359 360 cdef double a1324_1423, a1224_1422, a1223_1322, a1124_1421, a1123_1321, a1122_1221 361 Sxx = A[0] 362 Sxy = A[1] 363 Sxz = A[2] 364 Syx = A[3] 365 Syy = A[4] 366 Syz = A[5] 367 Szx = A[6] 368 Szy = A[7] 369 Szz = A[8] 370 371 Sxx2 = Sxx * Sxx 372 Syy2 = Syy * Syy 373 Szz2 = Szz * Szz 374 375 Sxy2 = Sxy * Sxy 376 Syz2 = Syz * Syz 377 Sxz2 = Sxz * Sxz 378 379 Syx2 = Syx * Syx 380 Szy2 = Szy * Szy 381 Szx2 = Szx * Szx 382 383 SyzSzymSyySzz2 = 2.0 * (Syz*Szy - Syy*Szz) 384 Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2 385 386 C[2] = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2) 387 C[1] = 8.0 * (Sxx*Syz*Szy + Syy*Szx*Sxz + Szz*Sxy*Syx - Sxx*Syy*Szz - Syz*Szx*Sxy - Szy*Syx*Sxz) 388 389 SxzpSzx = Sxz + Szx 390 SyzpSzy = Syz + Szy 391 SxypSyx = Sxy + Syx 392 SyzmSzy = Syz - Szy 393 SxzmSzx = Sxz - Szx 394 SxymSyx = Sxy - Syx 395 SxxpSyy = Sxx + Syy 396 SxxmSyy = Sxx - Syy 397 Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2 398 399 C[0] = (Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2 400 + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2) * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2) 401 + (-(SxzpSzx)*(SyzmSzy)+(SxymSyx)*(SxxmSyy-Szz)) * (-(SxzmSzx)*(SyzpSzy)+(SxymSyx)*(SxxmSyy+Szz)) 402 + (-(SxzpSzx)*(SyzpSzy)-(SxypSyx)*(SxxpSyy-Szz)) * (-(SxzmSzx)*(SyzmSzy)-(SxypSyx)*(SxxpSyy+Szz)) 403 + (+(SxypSyx)*(SyzpSzy)+(SxzpSzx)*(SxxmSyy+Szz)) * (-(SxymSyx)*(SyzmSzy)+(SxzpSzx)*(SxxpSyy+Szz)) 404 + (+(SxypSyx)*(SyzmSzy)+(SxzmSzx)*(SxxmSyy-Szz)) * (-(SxymSyx)*(SyzpSzy)+(SxzmSzx)*(SxxpSyy-Szz))) 405 406 mxEigenV = E0 407 for i in range(50): 408 oldg = mxEigenV 409 x2 = mxEigenV*mxEigenV 410 b = (x2 + C[2])*mxEigenV 411 a = b + C[1] 412 delta = ((a*mxEigenV + C[0])/(2.0*x2*mxEigenV + b + a)) 413 mxEigenV -= delta 414 if (fabs(mxEigenV - oldg) < fabs((evalprec)*mxEigenV)): 415 break 416 417 #if (i == 50): 418 # print "\nMore than %d iterations needed!\n" % (i) 419 420 # the fabs() is to guard against extremely small, 421 # but *negative* numbers due to floating point error 422 rms = sqrt(fabs(2.0 * (E0 - mxEigenV)/N)) 423 424 if (rot is None): 425 return rms # Don't bother with rotation. 426 427 a11 = SxxpSyy + Szz-mxEigenV 428 a12 = SyzmSzy 429 a13 = - SxzmSzx 430 a14 = SxymSyx 431 a21 = SyzmSzy 432 a22 = SxxmSyy - Szz-mxEigenV 433 a23 = SxypSyx 434 a24= SxzpSzx 435 a31 = a13 436 a32 = a23 437 a33 = Syy-Sxx-Szz - mxEigenV 438 a34 = SyzpSzy 439 a41 = a14 440 a42 = a24 441 a43 = a34 442 a44 = Szz - SxxpSyy - mxEigenV 443 a3344_4334 = a33 * a44 - a43 * a34 444 a3244_4234 = a32 * a44-a42*a34 445 a3243_4233 = a32 * a43 - a42 * a33 446 a3143_4133 = a31 * a43-a41*a33 447 a3144_4134 = a31 * a44 - a41 * a34 448 a3142_4132 = a31 * a42-a41*a32 449 q1 = a22*a3344_4334-a23*a3244_4234+a24*a3243_4233 450 q2 = -a21*a3344_4334+a23*a3144_4134-a24*a3143_4133 451 q3 = a21*a3244_4234-a22*a3144_4134+a24*a3142_4132 452 q4 = -a21*a3243_4233+a22*a3143_4133-a23*a3142_4132 453 454 qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4 455 456 # The following code tries to calculate another column in the adjoint matrix 457 # when the norm of the current column is too small. Usually this commented 458 # block will never be activated. To be absolutely safe this should be 459 # uncommented, but it is most likely unnecessary. 460 461 if (qsqr < evecprec): 462 q1 = a12*a3344_4334 - a13*a3244_4234 + a14*a3243_4233 463 q2 = -a11*a3344_4334 + a13*a3144_4134 - a14*a3143_4133 464 q3 = a11*a3244_4234 - a12*a3144_4134 + a14*a3142_4132 465 q4 = -a11*a3243_4233 + a12*a3143_4133 - a13*a3142_4132 466 qsqr = q1*q1 + q2 *q2 + q3*q3+q4*q4 467 468 if (qsqr < evecprec): 469 a1324_1423 = a13 * a24 - a14 * a23 470 a1224_1422 = a12 * a24 - a14 * a22 471 a1223_1322 = a12 * a23 - a13 * a22 472 a1124_1421 = a11 * a24 - a14 * a21 473 a1123_1321 = a11 * a23 - a13 * a21 474 a1122_1221 = a11 * a22 - a12 * a21 475 476 q1 = a42 * a1324_1423 - a43 * a1224_1422 + a44 * a1223_1322 477 q2 = -a41 * a1324_1423 + a43 * a1124_1421 - a44 * a1123_1321 478 q3 = a41 * a1224_1422 - a42 * a1124_1421 + a44 * a1122_1221 479 q4 = -a41 * a1223_1322 + a42 * a1123_1321 - a43 * a1122_1221 480 qsqr = q1*q1 + q2 *q2 + q3*q3+q4*q4 481 482 if (qsqr < evecprec): 483 q1 = a32 * a1324_1423 - a33 * a1224_1422 + a34 * a1223_1322 484 q2 = -a31 * a1324_1423 + a33 * a1124_1421 - a34 * a1123_1321 485 q3 = a31 * a1224_1422 - a32 * a1124_1421 + a34 * a1122_1221 486 q4 = -a31 * a1223_1322 + a32 * a1123_1321 - a33 * a1122_1221 487 qsqr = q1*q1 + q2 *q2 + q3*q3 + q4*q4 488 489 if (qsqr < evecprec): 490 # if qsqr is still too small, return the identity matrix. # 491 rot[0] = rot[4] = rot[8] = 1.0 492 rot[1] = rot[2] = rot[3] = rot[5] = rot[6] = rot[7] = 0.0 493 494 return 495 496 497 normq = sqrt(qsqr) 498 q1 /= normq 499 q2 /= normq 500 q3 /= normq 501 q4 /= normq 502 503 a2 = q1 * q1 504 x2 = q2 * q2 505 y2 = q3 * q3 506 z2 = q4 * q4 507 508 xy = q2 * q3 509 az = q1 * q4 510 zx = q4 * q2 511 ay = q1 * q3 512 yz = q3 * q4 513 ax = q1 * q2 514 515 rot[0] = a2 + x2 - y2 - z2 516 rot[1] = 2 * (xy + az) 517 rot[2] = 2 * (zx - ay) 518 rot[3] = 2 * (xy - az) 519 rot[4] = a2 - x2 + y2 - z2 520 rot[5] = 2 * (yz + ax) 521 rot[6] = 2 * (zx + ay) 522 rot[7] = 2 * (yz - ax) 523 rot[8] = a2 - x2 - y2 + z2 524 525 return rms 526 527