1# -----------------------------------------------------------------------------
2#    Author(s) of Original Implementation:
3#                  Douglas L. Theobald
4#                  Department of Biochemistry
5#                  MS 009
6#                  Brandeis University
7#                  415 South St
8#                  Waltham, MA  02453
9#                  USA
10#
11#                  dtheobald@brandeis.edu
12#
13#                  Pu Liu
14#                  Johnson & Johnson Pharmaceutical Research and Development, L.L.C.
15#                  665 Stockton Drive
16#                  Exton, PA  19341
17#                  USA
18#
19#                  pliu24@its.jnj.com
20#
21#                  For the original code written in C see:
22#                  http://theobald.brandeis.edu/qcp/
23#
24#
25#    Author of Python Port:
26#                  Joshua L. Adelman
27#                  Department of Biological Sciences
28#                  University of Pittsburgh
29#                  Pittsburgh, PA 15260
30#
31#                  jla65@pitt.edu
32#
33#
34#    If you use this QCP rotation calculation method in a publication, please
35#    reference:
36#
37#      Douglas L. Theobald (2005)
38#      "Rapid calculation of RMSD using a quaternion-based characteristic
39#      polynomial."
40#      Acta Crystallographica A 61(4):478-480.
41#
42#      Pu Liu, Dmitris K. Agrafiotis, and Douglas L. Theobald (2010)
43#      "Fast determination of the optimal rotational matrix for macromolecular
44#      superpositions."
45#      J. Comput. Chem. 31, 1561-1563.
46#
47#
48#    Copyright (c) 2009-2010, Pu Liu and Douglas L. Theobald
49#    Copyright (c) 2011       Joshua L. Adelman
50#    Copyright (c) 2016       Robert R. Delgado
51#    All rights reserved.
52#
53#    Redistribution and use in source and binary forms, with or without modification, are permitted
54#    provided that the following conditions are met:
55#
56#    * Redistributions of source code must retain the above copyright notice, this list of
57#      conditions and the following disclaimer.
58#    * Redistributions in binary form must reproduce the above copyright notice, this list
59#      of conditions and the following disclaimer in the documentation and/or other materials
60#      provided with the distribution.
61#    * Neither the name of the <ORGANIZATION> nor the names of its contributors may be used to
62#      endorse or promote products derived from this software without specific prior written
63#      permission.
64#
65#    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
66#    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
67#    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
68#    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
69#    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
70#    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
71#    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
72#    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
73#    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
74#    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
75#    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
76# -----------------------------------------------------------------------------
77
78"""
79Fast QCP RMSD structure alignment --- :mod:`MDAnalysis.lib.qcprot`
80==================================================================
81
82:Author:   Joshua L. Adelman, University of Pittsburgh
83:Author:   Robert R. Delgado, Cornell University and Arizona State University
84:Contact:  jla65@pitt.edu
85:Year:     2011, 2016
86:Licence:  BSD
87
88PyQCPROT_ is a python/cython implementation of Douglas Theobald's QCP
89method for calculating the minimum RMSD between two structures
90[Theobald2005]_ and determining the optimal least-squares rotation
91matrix [Liu2010]_.
92
93A full description of the method, along with the original C implementation can
94be found at http://theobald.brandeis.edu/qcp/
95
96See Also
97--------
98MDAnalysis.analysis.align:
99    Align structures using :func:`CalcRMSDRotationalMatrix`
100MDAnalysis.analysis.rms.rmsd:
101    Calculate the RMSD between two structures using
102    :func:`CalcRMSDRotationalMatrix`
103
104
105.. versionchanged:: 0.16.0
106   Call signatures were changed to directly interface with MDAnalysis
107   coordinate arrays: shape (N, 3)
108
109References
110----------
111
112If you use this QCP rotation calculation method in a publication, please
113reference:
114
115.. [Theobald2005] Douglas L. Theobald (2005)
116   "Rapid calculation of RMSD using a quaternion-based characteristic
117   polynomial."  Acta Crystallographica A 61(4):478-480.
118
119.. [Liu2010] Pu Liu, Dmitris K. Agrafiotis, and Douglas L. Theobald (2010)
120   "Fast determination of the optimal rotational matrix for macromolecular
121   superpositions."  J. Comput. Chem. 31, 1561-1563.
122
123.. _PyQCPROT: https://github.com/synapticarbors/pyqcprot
124
125
126Functions
127---------
128
129Users will typically use the :func:`CalcRMSDRotationalMatrix` function.
130
131.. autofunction:: CalcRMSDRotationalMatrix
132
133.. autofunction:: InnerProduct
134
135.. autofunction:: FastCalcRMSDAndRotation
136
137"""
138
139import numpy as np
140cimport numpy as np
141
142from ..due import due, BibTeX, Doi
143
144# providing DOI for this citation doesnt seem to work (as of 22/04/18)
145_QCBIB = """\
146@article{qcprot2,
147author = {Pu Liu and Dimitris K. Agrafiotis and Douglas L. Theobald},
148title = {Fast determination of the optimal rotational matrix for macromolecular superpositions},
149journal = {Journal of Computational Chemistry},
150volume = {31},
151number = {7},
152pages = {1561-1563},
153doi = {10.1002/jcc.21439},
154}
155"""
156
157due.cite(Doi("10.1107/s0108767305015266"),
158         description="QCProt implementation",
159         path="MDAnalysis.lib.qcprot",
160         cite_module=True)
161due.cite(BibTeX(_QCBIB),
162         description="QCProt implementation",
163         path="MDAnalysis.lib.qcprot",
164         cite_module=True)
165
166
167import cython
168
169cdef extern from "math.h":
170    double sqrt(double x)
171    double fabs(double x)
172
173@cython.boundscheck(False)
174@cython.wraparound(False)
175def InnerProduct(np.ndarray[np.float64_t, ndim=1] A,
176                 np.ndarray[np.float64_t, ndim=2] coords1,
177                 np.ndarray[np.float64_t, ndim=2] coords2,
178                 int N,
179                 np.ndarray[np.float64_t, ndim=1] weight):
180    """Calculate the inner product of two structures.
181
182    Parameters
183    ----------
184    A : ndarray np.float64_t
185        result inner product array, modified in place
186    coords1 : ndarray np.float64_t
187        reference structure
188    coord2 : ndarray np.float64_t
189        candidate structure
190    N : int
191        size of system
192    weights : ndarray np.float64_t (optional)
193        use to calculate weighted inner product
194
195
196
197    Returns
198    -------
199    E0 : float
200    0.5 * (G1 + G2), can be used as input for :func:`FastCalcRMSDAndRotation`
201
202    Notes
203    -----
204    1. You MUST center the structures, coords1 and coords2, before calling this
205       function.
206
207    2. Coordinates are stored as Nx3 arrays (as everywhere else in MDAnalysis).
208
209    .. versionchanged:: 0.16.0
210       Array size changed from 3xN to Nx3.
211    """
212
213    cdef double          x1, x2, y1, y2, z1, z2
214    cdef unsigned int    i
215    cdef double          G1, G2
216
217    G1 = 0.0
218    G2 = 0.0
219
220    A[0] = A[1] = A[2] = A[3] = A[4] = A[5] = A[6] = A[7] = A[8] = 0.0
221
222    if (weight is not None):
223        for i in range(N):
224            x1 = weight[i] * coords1[i, 0]
225            y1 = weight[i] * coords1[i, 1]
226            z1 = weight[i] * coords1[i, 2]
227
228            G1 += x1 * coords1[i, 0] + y1 * coords1[i, 1] + z1 * coords1[i, 2]
229
230            x2 = coords2[i, 0]
231            y2 = coords2[i, 1]
232            z2 = coords2[i, 2]
233
234            G2 += weight[i] * (x2 * x2 + y2 * y2 + z2 * z2)
235
236            A[0] +=  (x1 * x2)
237            A[1] +=  (x1 * y2)
238            A[2] +=  (x1 * z2)
239
240            A[3] +=  (y1 * x2)
241            A[4] +=  (y1 * y2)
242            A[5] +=  (y1 * z2)
243
244            A[6] +=  (z1 * x2)
245            A[7] +=  (z1 * y2)
246            A[8] +=  (z1 * z2)
247
248    else:
249        for i in range(N):
250            x1 = coords1[i, 0]
251            y1 = coords1[i, 1]
252            z1 = coords1[i, 2]
253
254            G1 += (x1 * x1 + y1 * y1 + z1 * z1)
255
256            x2 = coords2[i, 0]
257            y2 = coords2[i, 1]
258            z2 = coords2[i, 2]
259
260            G2 += (x2 * x2 + y2 * y2 + z2 * z2)
261
262            A[0] +=  (x1 * x2)
263            A[1] +=  (x1 * y2)
264            A[2] +=  (x1 * z2)
265
266            A[3] +=  (y1 * x2)
267            A[4] +=  (y1 * y2)
268            A[5] +=  (y1 * z2)
269
270            A[6] +=  (z1 * x2)
271            A[7] +=  (z1 * y2)
272            A[8] +=  (z1 * z2)
273
274    return (G1 + G2) * 0.5
275
276@cython.boundscheck(False)
277@cython.wraparound(False)
278def CalcRMSDRotationalMatrix(np.ndarray[np.float64_t, ndim=2] ref,
279                             np.ndarray[np.float64_t, ndim=2] conf,
280                             int N,
281                             np.ndarray[np.float64_t, ndim=1] rot,
282                             np.ndarray[np.float64_t, ndim=1] weights):
283    """
284    Calculate the RMSD & rotational matrix.
285
286    Parameters
287    ----------
288    ref : ndarray, np.float64_t
289        reference structure coordinates
290    conf : ndarray, np.float64_t
291        condidate structure coordinates
292    N : int
293        size of the system
294    rot : ndarray, np.float64_t
295        array to store rotation matrix. Must be flat
296    weights : ndarray, npfloat64_t (optional)
297        weights for each component
298
299    Returns
300    -------
301    rmsd : float
302        RMSD value
303
304    .. versionchanged:: 0.16.0
305       Array size changed from 3xN to Nx3.
306    """
307    cdef double E0
308    cdef np.ndarray[np.float64_t, ndim = 1] A = np.zeros(9,dtype = np.float64)
309
310    E0 = InnerProduct(A, conf, ref, N, weights)
311    return FastCalcRMSDAndRotation(rot, A, E0, N)
312
313def FastCalcRMSDAndRotation(np.ndarray[np.float64_t, ndim=1] rot,
314                            np.ndarray[np.float64_t, ndim=1] A,
315                            double E0, int N):
316    """
317    Calculate the RMSD, and/or the optimal rotation matrix.
318
319    Parameters
320    ----------
321    rot : ndarray np.float64_t
322        result rotation matrix, modified inplace
323    A : ndarray np.float64_t
324        the inner product of two structures
325    E0 : float64
326        0.5 * (G1 + G2)
327    N : int
328        size of the system
329
330    Returns
331    -------
332    rmsd : float
333        RMSD value for two structures
334
335
336    .. versionchanged:: 0.16.0
337       Array sized changed from 3xN to Nx3.
338    """
339    cdef double rmsd
340    cdef double Sxx, Sxy, Sxz, Syx, Syy, Syz, Szx, Szy, Szz
341    cdef double Szz2, Syy2, Sxx2, Sxy2, Syz2, Sxz2, Syx2, Szy2, Szx2,
342    cdef double SyzSzymSyySzz2, Sxx2Syy2Szz2Syz2Szy2, Sxy2Sxz2Syx2Szx2,
343    cdef double SxzpSzx, SyzpSzy, SxypSyx, SyzmSzy,
344    cdef double SxzmSzx, SxymSyx, SxxpSyy, SxxmSyy
345
346    cdef np.ndarray[np.float64_t, ndim=1] C = np.zeros(4,)
347    cdef unsigned int i
348    cdef double mxEigenV
349    cdef double oldg = 0.0
350    cdef double b, a, delta, rms, qsqr
351    cdef double q1, q2, q3, q4, normq
352    cdef double a11, a12, a13, a14, a21, a22, a23, a24
353    cdef double a31, a32, a33, a34, a41, a42, a43, a44
354    cdef double a2, x2, y2, z2
355    cdef double xy, az, zx, ay, yz, ax
356    cdef double a3344_4334, a3244_4234, a3243_4233, a3143_4133,a3144_4134, a3142_4132
357    cdef double evecprec = 1e-6
358    cdef double evalprec = 1e-14
359
360    cdef double a1324_1423, a1224_1422, a1223_1322, a1124_1421, a1123_1321, a1122_1221
361    Sxx = A[0]
362    Sxy = A[1]
363    Sxz = A[2]
364    Syx = A[3]
365    Syy = A[4]
366    Syz = A[5]
367    Szx = A[6]
368    Szy = A[7]
369    Szz = A[8]
370
371    Sxx2 = Sxx * Sxx
372    Syy2 = Syy * Syy
373    Szz2 = Szz * Szz
374
375    Sxy2 = Sxy * Sxy
376    Syz2 = Syz * Syz
377    Sxz2 = Sxz * Sxz
378
379    Syx2 = Syx * Syx
380    Szy2 = Szy * Szy
381    Szx2 = Szx * Szx
382
383    SyzSzymSyySzz2 = 2.0 * (Syz*Szy - Syy*Szz)
384    Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2
385
386    C[2] = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2)
387    C[1] = 8.0 * (Sxx*Syz*Szy + Syy*Szx*Sxz + Szz*Sxy*Syx - Sxx*Syy*Szz - Syz*Szx*Sxy - Szy*Syx*Sxz)
388
389    SxzpSzx = Sxz + Szx
390    SyzpSzy = Syz + Szy
391    SxypSyx = Sxy + Syx
392    SyzmSzy = Syz - Szy
393    SxzmSzx = Sxz - Szx
394    SxymSyx = Sxy - Syx
395    SxxpSyy = Sxx + Syy
396    SxxmSyy = Sxx - Syy
397    Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2
398
399    C[0] = (Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2
400         + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2) * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2)
401         + (-(SxzpSzx)*(SyzmSzy)+(SxymSyx)*(SxxmSyy-Szz)) * (-(SxzmSzx)*(SyzpSzy)+(SxymSyx)*(SxxmSyy+Szz))
402         + (-(SxzpSzx)*(SyzpSzy)-(SxypSyx)*(SxxpSyy-Szz)) * (-(SxzmSzx)*(SyzmSzy)-(SxypSyx)*(SxxpSyy+Szz))
403         + (+(SxypSyx)*(SyzpSzy)+(SxzpSzx)*(SxxmSyy+Szz)) * (-(SxymSyx)*(SyzmSzy)+(SxzpSzx)*(SxxpSyy+Szz))
404         + (+(SxypSyx)*(SyzmSzy)+(SxzmSzx)*(SxxmSyy-Szz)) * (-(SxymSyx)*(SyzpSzy)+(SxzmSzx)*(SxxpSyy-Szz)))
405
406    mxEigenV = E0
407    for i in range(50):
408        oldg = mxEigenV
409        x2 = mxEigenV*mxEigenV
410        b = (x2 + C[2])*mxEigenV
411        a = b + C[1]
412        delta = ((a*mxEigenV + C[0])/(2.0*x2*mxEigenV + b + a))
413        mxEigenV -= delta
414        if (fabs(mxEigenV - oldg) < fabs((evalprec)*mxEigenV)):
415            break
416
417    #if (i == 50):
418    #   print "\nMore than %d iterations needed!\n" % (i)
419
420    # the fabs() is to guard against extremely small,
421    # but *negative* numbers due to floating point error
422    rms = sqrt(fabs(2.0 * (E0 - mxEigenV)/N))
423
424    if (rot is None):
425        return rms # Don't bother with rotation.
426
427    a11 = SxxpSyy + Szz-mxEigenV
428    a12 = SyzmSzy
429    a13 = - SxzmSzx
430    a14 = SxymSyx
431    a21 = SyzmSzy
432    a22 = SxxmSyy - Szz-mxEigenV
433    a23 = SxypSyx
434    a24= SxzpSzx
435    a31 = a13
436    a32 = a23
437    a33 = Syy-Sxx-Szz - mxEigenV
438    a34 = SyzpSzy
439    a41 = a14
440    a42 = a24
441    a43 = a34
442    a44 = Szz - SxxpSyy - mxEigenV
443    a3344_4334 = a33 * a44 - a43 * a34
444    a3244_4234 = a32 * a44-a42*a34
445    a3243_4233 = a32 * a43 - a42 * a33
446    a3143_4133 = a31 * a43-a41*a33
447    a3144_4134 = a31 * a44 - a41 * a34
448    a3142_4132 = a31 * a42-a41*a32
449    q1 =  a22*a3344_4334-a23*a3244_4234+a24*a3243_4233
450    q2 = -a21*a3344_4334+a23*a3144_4134-a24*a3143_4133
451    q3 =  a21*a3244_4234-a22*a3144_4134+a24*a3142_4132
452    q4 = -a21*a3243_4233+a22*a3143_4133-a23*a3142_4132
453
454    qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4
455
456    # The following code tries to calculate another column in the adjoint matrix
457    # when the norm of the current column is too small. Usually this commented
458    # block will never be activated. To be absolutely safe this should be
459    # uncommented, but it is most likely unnecessary.
460
461    if (qsqr < evecprec):
462        q1 =  a12*a3344_4334 - a13*a3244_4234 + a14*a3243_4233
463        q2 = -a11*a3344_4334 + a13*a3144_4134 - a14*a3143_4133
464        q3 =  a11*a3244_4234 - a12*a3144_4134 + a14*a3142_4132
465        q4 = -a11*a3243_4233 + a12*a3143_4133 - a13*a3142_4132
466        qsqr = q1*q1 + q2 *q2 + q3*q3+q4*q4
467
468        if (qsqr < evecprec):
469            a1324_1423 = a13 * a24 - a14 * a23
470            a1224_1422 = a12 * a24 - a14 * a22
471            a1223_1322 = a12 * a23 - a13 * a22
472            a1124_1421 = a11 * a24 - a14 * a21
473            a1123_1321 = a11 * a23 - a13 * a21
474            a1122_1221 = a11 * a22 - a12 * a21
475
476            q1 =  a42 * a1324_1423 - a43 * a1224_1422 + a44 * a1223_1322
477            q2 = -a41 * a1324_1423 + a43 * a1124_1421 - a44 * a1123_1321
478            q3 =  a41 * a1224_1422 - a42 * a1124_1421 + a44 * a1122_1221
479            q4 = -a41 * a1223_1322 + a42 * a1123_1321 - a43 * a1122_1221
480            qsqr = q1*q1 + q2 *q2 + q3*q3+q4*q4
481
482            if (qsqr < evecprec):
483                q1 =  a32 * a1324_1423 - a33 * a1224_1422 + a34 * a1223_1322
484                q2 = -a31 * a1324_1423 + a33 * a1124_1421 - a34 * a1123_1321
485                q3 =  a31 * a1224_1422 - a32 * a1124_1421 + a34 * a1122_1221
486                q4 = -a31 * a1223_1322 + a32 * a1123_1321 - a33 * a1122_1221
487                qsqr = q1*q1 + q2 *q2 + q3*q3 + q4*q4
488
489                if (qsqr < evecprec):
490                    # if qsqr is still too small, return the identity matrix. #
491                    rot[0] = rot[4] = rot[8] = 1.0
492                    rot[1] = rot[2] = rot[3] = rot[5] = rot[6] = rot[7] = 0.0
493
494                    return
495
496
497    normq = sqrt(qsqr)
498    q1 /= normq
499    q2 /= normq
500    q3 /= normq
501    q4 /= normq
502
503    a2 = q1 * q1
504    x2 = q2 * q2
505    y2 = q3 * q3
506    z2 = q4 * q4
507
508    xy = q2 * q3
509    az = q1 * q4
510    zx = q4 * q2
511    ay = q1 * q3
512    yz = q3 * q4
513    ax = q1 * q2
514
515    rot[0] = a2 + x2 - y2 - z2
516    rot[1] = 2 * (xy + az)
517    rot[2] = 2 * (zx - ay)
518    rot[3] = 2 * (xy - az)
519    rot[4] = a2 - x2 + y2 - z2
520    rot[5] = 2 * (yz + ax)
521    rot[6] = 2 * (zx + ay)
522    rot[7] = 2 * (yz - ax)
523    rot[8] = a2 - x2 - y2 + z2
524
525    return rms
526
527