1# Copyright 2017 Jacob D. Durrant 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15from __future__ import absolute_import 16from __future__ import print_function 17from scoria import dumbpy as numpy 18from scoria.Quaternion import Quaternion 19 20 21class OtherMolecules(): 22 """ 23 A class for characterizing the relationships between multiple 24 scoria.Molecule objects. 25 """ 26 27 def __init__(self, parent_molecule_object): 28 """ 29 Initializes the scoria.OtherMolecules class. 30 31 :param scoria.Molecule parent_molecule_object: The scoria.Molecule object 32 associated with this class. 33 """ 34 35 self.__parent_molecule = parent_molecule_object 36 37 def get_other_molecules_aligned_to_this(self, other_mol, tethers, 38 weight_mat = None): 39 """ 40 Aligns a molecule to self (this scoria.Molecule object) using a 41 quaternion RMSD alignment. 42 43 Requires the :any:`numpy` library. 44 45 Wrapper function for :meth:`~scoria.Molecule.Molecule.get_other_molecules_aligned_to_this` 46 47 :param scoria.Molecule other_mol: A scoria.Molecule that is to be aligned to 48 this one. 49 :param list[list] tethers: A list of lists, where the inner list is 50 (tether1_index, tether2_index). That inner list can 51 also be a numpy array. Each inner array contains the 52 indices of self and other_mol, respectively, such 53 that equivalent atoms are listed in the same order. 54 So, for example, if (atom 1, self = atom 3, other) 55 and (atom2, self = atom6, other) than the tethers 56 would be (numpy.array([1, 2]), numpy.array([3, 6])), 57 or [(1, 2), (3, 6)]. 58 59 :returns: The new molecule. 60 """ 61 62 if not numpy.class_dependency("align molecules. Missing the dot-product function", "NUMPY"): 63 return 64 65 tethers = numpy.array(tethers).T 66 67 # Adapted from Itzhack Y. Bar-Itzhack. New Method for Extracting the 68 # Quaternion from a Rotation Matrix. Journal of Guidance, Control, and 69 # Dynamics 2000 70 if tethers is None: 71 raise Exception('No tethers specified for RMSD alignment') 72 elif tethers.shape[0] != 2: 73 raise Exception('Tethers should have only 2 columns') # Because see T above, which makes rows at this point. 74 75 # If weight_matrix isn't specified, then treat all atoms equally 76 if weight_mat is None: 77 weight_mat = numpy.identity(tethers.shape[1]) 78 79 # get the atoms corresponding to the tethers, in tether order 80 self_static_atom_coordinates = ( 81 self.__parent_molecule.get_coordinates()[tethers[0]] 82 ) 83 84 other_dynamic_atom_coordinates = ( 85 other_mol.get_coordinates()[tethers[1]] 86 ) 87 88 # translate the tether atoms to the origin 89 center_self = numpy.mean(self_static_atom_coordinates, 0) 90 center_other = numpy.mean(other_dynamic_atom_coordinates, 0) 91 92 self_static_atom_coordinates = (self_static_atom_coordinates - 93 center_self) 94 95 other_dynamic_atom_coordinates = (other_dynamic_atom_coordinates - 96 center_other) 97 98 # get optimal rotation 99 M = numpy.dot( 100 numpy.dot( 101 numpy.transpose(self_static_atom_coordinates), 102 weight_mat 103 ), 104 other_dynamic_atom_coordinates 105 ) 106 107 #Create symmetric 4x4 matrix K from M 108 K = numpy.array([[M[0, 0] + M[1, 1] + M[2, 2], 109 M[1, 2] - M[2, 1], 110 M[2, 0] - M[0, 2], 111 M[0, 1] - M[1, 0] 112 ], 113 [ 114 M[1, 2] - M[2, 1], 115 M[0, 0] - M[1, 1] - M[2, 2], 116 M[1, 0] + M[0, 1], 117 M[2, 0] + M[0, 2] 118 ], 119 [ 120 M[2, 0] - M[0, 2], 121 M[1, 0] + M[0, 1], 122 M[1, 1] - M[0, 0] - M[2, 2], 123 M[1, 2] + M[2, 1] 124 ], 125 [ 126 M[0, 1] - M[1, 0], 127 M[2, 0] + M[0, 2], 128 M[1, 2] + M[2, 1], 129 M[2, 2] - M [0, 0] - M[1, 1] 130 ] 131 ] 132 ) 133 134 # Find eigenvector associated with the most positive eigenvalue of K. 135 # Multiple quaternions can 136 E, V = numpy.linalg.eig(K) 137 index = numpy.argmax(E) 138 eigenvector = V[:, index] 139 rot_quat = Quaternion(eigenvector[0], eigenvector[1], 140 eigenvector[2], eigenvector[3]) 141 142 rot_mat = rot_quat.to_matrix() 143 144 #Apply translation and rotation to the other molecule 145 new_mol = other_mol.copy() 146 147 new_mol.set_coordinates(new_mol.information.get_coordinates() - 148 center_other) 149 150 new_mol.set_coordinates(numpy.dot( 151 new_mol.information.get_coordinates(), 152 rot_mat 153 )) 154 155 new_mol.set_coordinates(new_mol.information.get_coordinates() + 156 center_self) 157 158 return new_mol 159 160 def steric_clash_with_another_molecule(self, other_mol, cutoff, 161 pairwise_comparison = True): 162 """ 163 Detects steric clashes between the scoria.Molecule (self) and 164 another scoria.Molecule. 165 166 Requires the :any:`numpy` and :any:`scipy<scipy.spatial>` libraries. 167 168 Wrapper function for :meth:`~scoria.Molecule.Molecule.steric_clash_with_another_molecule` 169 170 :param scoria.Molecule other_mol: The scoria.Molecule object that will be 171 evaluated for steric clashes. 172 :param float cutoff: A float, the user-defined distance cutoff in 173 Angstroms. 174 :param bool pairwise_comparison: An optional boolean, whether or not to 175 perform a simple pairwise distance comparison (if True) or 176 to use a more sophisitcated method (if False). True by 177 default. 178 179 :returns: A boolean. True if steric clashes are present, False if they 180 are not. 181 """ 182 183 if not numpy.class_dependency("calculate the steric clashes with another molecule", "NUMPY"): 184 return 185 186 if not numpy.class_dependency("calculate the steric clashes with another molecule", "SCIPY"): 187 return 188 189 prnt = self.__parent_molecule 190 191 sel_cls_aatms_frm_diff_mols = ( 192 prnt.select_close_atoms_from_different_molecules 193 ) 194 195 if pairwise_comparison == True: 196 # so use a simple pairwise comparison to find close atoms 197 indices1, indices2 = sel_cls_aatms_frm_diff_mols(other_mol, 198 cutoff, True) 199 else: # so the more sophisticated heirarchical method 200 # terminate early is true because you don't want all close ones 201 indices1, indices2 = sel_cls_aatms_frm_diff_mols(other_mol, cutoff, 202 False, True) 203 204 if len(indices1) == 0 and len(indices2) == 0: 205 return False 206 else: 207 return True 208 209 def merge_with_another_molecule(self, other_molecules): 210 """ 211 Merges two molecular models into a single model. 212 213 Wrapper function for :meth:`~scoria.Molecule.Molecule.merge_with_another_molecule` 214 215 :param scoria.Molecule other_molecules: A molecular model (scoria.Molecule 216 object). 217 218 :returns: A single scoria.Molecule object containing the atoms of 219 this model combined with the atoms of other_molecules. 220 """ 221 222 merged = self.__parent_molecule.copy() 223 224 # if masses have been assigned to either molecule, they must be 225 # assigned to both 226 slf_atm_inf = self.__parent_molecule.get_atom_information() 227 if ('mass' in merged.information.get_atom_information().dtype.names or 228 'mass' in slf_atm_inf.dtype.names): 229 self.__parent_molecule.assign_masses() 230 merged.information.assign_masses() 231 232 merged.filename = "" 233 merged.get_remarks().extend(other_molecules.get_remarks()) 234 235 merged.set_atom_information( 236 numpy.stack_arrays( 237 ( 238 merged.get_atom_information(), 239 other_molecules.get_atom_information() 240 ), 241 usemask = False 242 ) 243 ) 244 245 merged.set_coordinates(numpy.vstack(( 246 merged.get_coordinates(), other_molecules.get_coordinates() 247 ))) 248 249 merged.set_coordinates_undo_point(None) 250 251 # merge the bonds, though note that bonds between the two molecules 252 # will not be set 253 if (not merged.get_bonds() is None and 254 not other_molecules.get_bonds() is None): 255 256 bonds1 = merged.get_bonds().copy() 257 bonds2 = other_molecules.get_bonds().copy() 258 259 bonds1_v2 = numpy.hstack(( 260 bonds1, numpy.zeros((len(bonds1), len(bonds2))) 261 )) 262 263 bonds2_v2 = numpy.hstack(( 264 numpy.zeros((len(bonds2), len(bonds1))), bonds2 265 )) 266 267 merged.set_bonds(numpy.vstack((bonds1_v2, bonds2_v2))) 268 else: 269 merged.set_bonds(None) 270 271 # the molecule center will be redefined, so you might as well start the 272 # hierarchy all over 273 try: 274 del merged.information.hierarchy['spheres'] 275 except: 276 pass 277 278 return merged 279 280 def get_distance_to_another_molecule(self, other_molecules, 281 pairwise_comparison = True): 282 """ 283 Computes the minimum distance between any of the atoms of this 284 molecular model and any of the atoms of a second specified model. 285 286 Requires the :any:`numpy` and :any:`scipy<scipy.spatial>` libraries. 287 288 Wrapper function for :meth:`~scoria.Molecule.Molecule.get_distance_to_another_molecule` 289 290 :param scoria.Molecule other_molecules: a scoria.Molecule, the other molecular 291 model. 292 :param bool pairwise_comparison: An optional boolean, whether or not to 293 perform a simple pairwise distance comparison (if True) or 294 to use a more sophisitcated method (if False). True by 295 default. 296 297 :returns: A float, the minimum distance between any two atoms of the two 298 specified molecular models (self and other_molecules). 299 """ 300 301 if not numpy.class_dependency("calculate the distance to another molecule", "NUMPY"): 302 return 303 304 if not numpy.class_dependency("calculate the distance to another molecule", "SCIPY"): 305 return 306 307 if pairwise_comparison == True: 308 return numpy.amin(numpy.cdist( 309 self.__parent_molecule.get_coordinates(), 310 other_molecules.get_coordinates() 311 )) 312 else: 313 # so use the more sophisticated methods for comparison 314 # note that this is not the fastest way to do this, but it uses 315 # existing functions 316 # and is still pretty fast, so I'm going to stick with it. 317 318 # first, get a cutoff distance. Let's just do a quick survey of the 319 # two molecules to pick a good one. 320 slf_gt_crs = self.__parent_molecule.get_coordinates() 321 oth_gt_crs = other_molecules.get_coordinates() 322 self_tmp = slf_gt_crs[numpy.arange(0, len(slf_gt_crs), 323 len(slf_gt_crs) / 10.0, 324 dtype = int)] 325 326 other_tmp = oth_gt_crs[numpy.arange(0, len(oth_gt_crs), 327 len(oth_gt_crs) / 10.0, 328 dtype = int)] 329 330 cutoff = numpy.amin(numpy.cdist(self_tmp, other_tmp)) 331 332 # now get all the indices that come within that cutoff 333 prnt = self.__parent_molecule 334 335 sel_cls_atms_dif_mols = ( 336 prnt.select_close_atoms_from_different_molecules 337 ) 338 339 self_indices, other_indices = sel_cls_atms_dif_mols(other_molecules, 340 cutoff, False) 341 342 self_coors = self.__parent_molecule.get_coordinates()[self_indices] 343 self_other = other_molecules.get_coordinates()[other_indices] 344 345 return numpy.amin(numpy.cdist(self_coors, self_other)) 346 347 def get_rmsd_equivalent_atoms_specified(self, other_mol, tethers): 348 """ 349 Calculates the RMSD between this scoria.Molecle object and 350 another, where equivalent atoms are explicitly specified. 351 352 Wrapper function for :meth:`~scoria.Molecule.Molecule.get_rmsd_equivalent_atoms_specified` 353 354 :param scoria.Molecule other_mol: The other scoria.Molecule object. 355 :param tuple tethers: A tuple of two numpy.array objects, where each array 356 contains the indices of self and other_mol, respectively, 357 such that equivalent atoms are listed in the same order. 358 So, for example, if (atom 1, self = atom 3, other) and 359 (atom2, self = atom6, other) than the tethers would be 360 (numpy.array([1, 2]), numpy.array([3, 6])). 361 362 :returns: A float, the RMSD between self and other_mol. 363 """ 364 365 tethers = numpy.transpose(tethers) 366 367 slf_gt_crs = self.__parent_molecule.get_coordinates() 368 if (len(slf_gt_crs) != 369 len(other_mol.get_coordinates())): 370 371 print("Cannot calculate RMSD: number of atoms are not equal.") 372 print(("\t" + (str(len(slf_gt_crs)) + 373 " vs. " + str(len(other_mol.get_coordinates())) + 374 " atoms."))) 375 return 99999999.0 376 377 self_coors_in_order = slf_gt_crs[tethers[0]] 378 other_coors_in_order = other_mol.get_coordinates()[tethers[1]] 379 380 delta = self_coors_in_order - other_coors_in_order 381 norm_squared = numpy.sum(delta * delta, axis = -1) 382 rmsd = numpy.power(numpy.sum(norm_squared) / len(norm_squared), 0.5) 383 return rmsd 384 385 def get_rmsd_order_dependent(self, other_mol): 386 """ 387 Calculates the RMSD between two structures, where equivalent atoms 388 are listed in the same order. 389 390 Wrapper function for :meth:`~scoria.Molecule.Molecule.get_rmsd_order_dependent` 391 392 :param scoria.Molecule other_mol: The other scoria.Molecule object. 393 394 :returns: A float, the RMSD between self and other_mol. 395 """ 396 397 self_index_in_order = numpy.arange( 398 0, len(self.__parent_molecule.get_coordinates()), 1, dtype = int 399 ) 400 401 other_index_in_order = numpy.arange( 402 0, len(other_mol.get_coordinates()), 1, dtype = int 403 ) 404 405 return self.get_rmsd_equivalent_atoms_specified( 406 other_mol, 407 list(zip(self_index_in_order, other_index_in_order)) 408 ) 409 410 def get_rmsd_heuristic(self, other_mol): 411 """ 412 Caluclates the RMSD between two identical molecules with different 413 conformations, per the definition given in "AutoDock Vina: Improving 414 the speed and accuracy of docking with a new scoring function, 415 efficient optimization, and multithreading,"" by Oleg Trott and Arthur 416 J. Olson. Note: Identical means the order of the atoms is the same as 417 well. 418 419 Requires the :any:`numpy` and :any:`scipy<scipy.spatial>` libraries. 420 421 Wrapper function for :meth:`~scoria.Molecule.Molecule.get_rmsd_heuristic` 422 423 :param scoria.Molecule other_mol: The other scoria.Molecule object. 424 425 :returns: A float, the RMSD between self and other_mol. 426 """ 427 428 if not numpy.class_dependency("calculate an RMSD using a heuristical algorithm", "NUMPY"): 429 return 430 431 # Group the other_mol atoms by element (atom type in pdbqt speak) 432 atom_inf = self.__parent_molecule.get_atom_information() 433 self_atom_coors = self.__parent_molecule.get_coordinates() 434 other_atom_coors = other_mol.get_coordinates() 435 436 self_atom_grps = {} 437 other_atom_grps = {} 438 439 for i, atm in enumerate(atom_inf): 440 element_stripped = atm["element"] 441 if not element_stripped in self_atom_grps.keys(): 442 self_atom_grps[element_stripped] = [] 443 other_atom_grps[element_stripped] = [] 444 self_atom_grps[element_stripped].append(self_atom_coors[i]) 445 other_atom_grps[element_stripped].append(other_atom_coors[i]) 446 447 for element in self_atom_grps.keys(): 448 self_atom_grps[element] = numpy.array(self_atom_grps[element]) 449 other_atom_grps[element] = numpy.array(other_atom_grps[element]) 450 451 # Calculate the rmsds going both ways 452 rmsd1 = self._get_rmsd_heuristic_helper_func(self_atom_grps, other_atom_grps) 453 rmsd2 = self._get_rmsd_heuristic_helper_func(other_atom_grps, self_atom_grps) 454 455 return numpy.max((rmsd1, rmsd2)) 456 457 def _get_rmsd_heuristic_helper_func(self, atom_grp1, atom_grp2): 458 """ 459 A helper function for calculating heuristic RMSD. 460 461 Requires the :any:`scipy<scipy.spatial>` library. 462 463 :param dict atom_grp1: A dictionary, where the keys are atom types 464 and the values are numpy arrays of the coordinates. 465 :param dict atom_grp2: The same, but now given the atoms of the 466 other molecule. 467 468 :returns: A float, the heuristic RMSD between the two molecules 469 (atom_grp1 to atom_grp2) 470 """ 471 472 if not numpy.class_dependency("use this helper function", "SCIPY"): 473 return 474 475 # Go through each of the element types 476 tot_sum_min_dists_sqrd = 0 477 num_heavy_atms = 0 478 for element in atom_grp1.keys(): 479 if element[:1] != "H": # Because only heavy atoms 480 coor1 = atom_grp1[element] 481 coor2 = atom_grp2[element] 482 483 dists = numpy.cdist(coor1, coor2) 484 dists_sqr = dists * dists 485 min_dists_sqr = numpy.min(dists_sqr, axis = 0) 486 487 tot_sum_min_dists_sqrd = tot_sum_min_dists_sqrd + numpy.sum(min_dists_sqr) 488 num_heavy_atms = num_heavy_atms + len(coor1) 489 rmsd = numpy.sqrt(tot_sum_min_dists_sqrd / num_heavy_atms) 490 return rmsd 491 492