1# -*- coding: utf-8 -*- 2# MolMod is a collection of molecular modelling tools for python. 3# Copyright (C) 2007 - 2019 Toon Verstraelen <Toon.Verstraelen@UGent.be>, Center 4# for Molecular Modeling (CMM), Ghent University, Ghent, Belgium; all rights 5# reserved unless otherwise stated. 6# 7# This file is part of MolMod. 8# 9# MolMod is free software; you can redistribute it and/or 10# modify it under the terms of the GNU General Public License 11# as published by the Free Software Foundation; either version 3 12# of the License, or (at your option) any later version. 13# 14# MolMod is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17# GNU General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with this program; if not, see <http://www.gnu.org/licenses/> 21# 22# -- 23"""Representation, analysis and manipulation of molecular systems.""" 24 25 26from __future__ import division 27 28from builtins import range 29import numpy as np 30 31from molmod.periodic import periodic 32from molmod.units import angstrom 33from molmod.utils import cached, ReadOnly, ReadOnlyAttribute 34from molmod.molecular_graphs import MolecularGraph 35from molmod.unit_cells import UnitCell 36from molmod.transformations import fit_rmsd 37from molmod.symmetry import compute_rotsym 38 39 40__all__ = ["Molecule"] 41 42 43class Molecule(ReadOnly): 44 """Extensible class for molecular systems. 45 46 Most attributes of the molecule object are treated as constants. If you 47 want to modify the molecular geometry, just create a modified molecule 48 object with the method :meth:`molmod.utils.ReadOnly.copy_with`. This facilitates the caching of 49 derived quantities such as the distance matrix, while it imposes a 50 cleaner coding style without a signifacant computational overhead. 51 """ 52 def _check_coordinates(self, coordinates): 53 """the number of rows must be the same as the length of the array numbers""" 54 if len(coordinates) != self.size: 55 raise TypeError("The number of coordinates does not match the " 56 "length of the atomic numbers array.") 57 58 def _check_masses(self, masses): 59 """the size must be the same as the length of the array numbers""" 60 if len(masses) != self.size: 61 raise TypeError("The number of masses does not match the length of " 62 "the atomic numbers array.") 63 64 def _check_graph(self, graph): 65 """the atomic numbers must match""" 66 if graph.num_vertices != self.size: 67 raise TypeError("The number of vertices in the graph does not " 68 "match the length of the atomic numbers array.") 69 # In practice these are typically the same arrays using the same piece 70 # of memory. Just checking to be sure. 71 if (self.numbers != graph.numbers).any(): 72 raise TypeError("The atomic numbers in the graph do not match the " 73 "atomic numbers in the molecule.") 74 75 def _check_symbols(self, symbols): 76 """the size must be the same as the length of the array numbers and all elements must be strings""" 77 if len(symbols) != self.size: 78 raise TypeError("The number of symbols in the graph does not " 79 "match the length of the atomic numbers array.") 80 for symbol in symbols: 81 if not isinstance(symbol, str): 82 raise TypeError("All symbols must be strings.") 83 84 numbers = ReadOnlyAttribute(np.ndarray, none=False, npdim=1, 85 npdtype=np.signedinteger, doc="the atomic numbers") 86 coordinates = ReadOnlyAttribute(np.ndarray, npdim=2, npshape=(None,3), 87 npdtype=np.floating, check=_check_coordinates, doc="atomic Cartesian " 88 "coordinates") 89 title = ReadOnlyAttribute(str, doc="a short description of the system") 90 masses = ReadOnlyAttribute(np.ndarray, npdim=1, npdtype=np.floating, 91 check=_check_masses, doc="the atomic masses") 92 graph = ReadOnlyAttribute(MolecularGraph, check=_check_graph, 93 doc="the molecular graph with the atom connectivity") 94 symbols = ReadOnlyAttribute(tuple, _check_symbols, doc="symbols for the " 95 "atoms, which can be element names for force-field atom types") 96 unit_cell = ReadOnlyAttribute(UnitCell, doc="description of the periodic " 97 "boundary conditions") 98 99 def __init__(self, numbers, coordinates=None, title=None, masses=None, graph=None, symbols=None, unit_cell=None): 100 """ 101 Mandatory arguments: 102 | ``numbers`` -- numpy array (1D, N elements) with the atomic numbers 103 104 Optional keyword arguments: 105 | ``coordinates`` -- numpy array (2D, Nx3 elements) Cartesian coordinates 106 | ``title`` -- a string with the name of the molecule 107 | ``massess`` -- a numpy array with atomic masses in atomic units 108 | ``graph`` -- a MolecularGraph instance 109 | ``symbols`` -- atomic elements or force-field atom-types 110 | ``unit_cell`` -- the unit cell in case the system is periodic 111 """ 112 self.numbers = numbers 113 self.coordinates = coordinates 114 self.title = title 115 self.masses = masses 116 self.graph = graph 117 self.symbols = symbols 118 self.unit_cell = unit_cell 119 120 @classmethod 121 def from_file(cls, filename): 122 """Construct a molecule object read from the given file. 123 124 The file format is inferred from the extensions. Currently supported 125 formats are: ``*.cml``, ``*.fchk``, ``*.pdb``, ``*.sdf``, ``*.xyz`` 126 127 If a file contains more than one molecule, only the first one is 128 read. 129 130 Argument: 131 | ``filename`` -- the name of the file containing the molecule 132 133 Example usage:: 134 135 >>> mol = Molecule.from_file("foo.xyz") 136 """ 137 # TODO: many different API's to load files. brrr... 138 if filename.endswith(".cml"): 139 from molmod.io import load_cml 140 return load_cml(filename)[0] 141 elif filename.endswith(".fchk"): 142 from molmod.io import FCHKFile 143 fchk = FCHKFile(filename, field_labels=[]) 144 return fchk.molecule 145 elif filename.endswith(".pdb"): 146 from molmod.io import load_pdb 147 return load_pdb(filename) 148 elif filename.endswith(".sdf"): 149 from molmod.io import SDFReader 150 return next(SDFReader(filename)) 151 elif filename.endswith(".xyz"): 152 from molmod.io import XYZReader 153 xyz_reader = XYZReader(filename) 154 title, coordinates = next(xyz_reader) 155 return Molecule(xyz_reader.numbers, coordinates, title, symbols=xyz_reader.symbols) 156 else: 157 raise ValueError("Could not determine file format for %s." % filename) 158 159 size = property(lambda self: self.numbers.shape[0], 160 doc="*Read-only attribute:* the number of atoms.") 161 162 @cached 163 def distance_matrix(self): 164 """the matrix with all atom pair distances""" 165 from molmod.ext import molecules_distance_matrix 166 return molecules_distance_matrix(self.coordinates) 167 168 @cached 169 def mass(self): 170 """the total mass of the molecule""" 171 return self.masses.sum() 172 173 @cached 174 def com(self): 175 """the center of mass of the molecule""" 176 return (self.coordinates*self.masses.reshape((-1,1))).sum(axis=0)/self.mass 177 178 @cached 179 def inertia_tensor(self): 180 """the intertia tensor of the molecule""" 181 result = np.zeros((3,3), float) 182 for i in range(self.size): 183 r = self.coordinates[i] - self.com 184 # the diagonal term 185 result.ravel()[::4] += self.masses[i]*(r**2).sum() 186 # the outer product term 187 result -= self.masses[i]*np.outer(r,r) 188 return result 189 190 @cached 191 def chemical_formula(self): 192 """the chemical formula of the molecule""" 193 counts = {} 194 for number in self.numbers: 195 counts[number] = counts.get(number, 0)+1 196 items = [] 197 for number, count in sorted(counts.items(), reverse=True): 198 if count == 1: 199 items.append(periodic[number].symbol) 200 else: 201 items.append("%s%i" % (periodic[number].symbol, count)) 202 return "".join(items) 203 204 def set_default_masses(self): 205 """Set self.masses based on self.numbers and periodic table.""" 206 self.masses = np.array([periodic[n].mass for n in self.numbers]) 207 208 def set_default_graph(self): 209 """Set self.graph to the default graph. 210 211 This method is equivalent to:: 212 213 mol.graph = MolecularGraph.from_geometry(mol) 214 215 with the default options, and only works if the graph object is not 216 present yet. 217 See :meth:`molmod.molecular_graphs.MolecularGraph.from_geometry` 218 for more fine-grained control over the assignment of bonds. 219 """ 220 self.graph = MolecularGraph.from_geometry(self) 221 222 def set_default_symbols(self): 223 """Set self.symbols based on self.numbers and the periodic table.""" 224 self.symbols = tuple(periodic[n].symbol for n in self.numbers) 225 226 def write_to_file(self, filename): 227 """Write the molecular geometry to a file. 228 229 The file format is inferred from the extensions. Currently supported 230 formats are: ``*.xyz``, ``*.cml`` 231 232 Argument: 233 | ``filename`` -- a filename 234 """ 235 # TODO: give all file format writers the same API 236 if filename.endswith('.cml'): 237 from molmod.io import dump_cml 238 dump_cml(filename, [self]) 239 elif filename.endswith('.xyz'): 240 from molmod.io import XYZWriter 241 symbols = [] 242 for n in self.numbers: 243 atom = periodic[n] 244 if atom is None: 245 symbols.append("X") 246 else: 247 symbols.append(atom.symbol) 248 xyz_writer = XYZWriter(filename, symbols) 249 xyz_writer.dump(self.title, self.coordinates) 250 del xyz_writer 251 else: 252 raise ValueError("Could not determine file format for %s." % filename) 253 254 def rmsd(self, other): 255 """Compute the RMSD between two molecules. 256 257 Arguments: 258 | ``other`` -- Another molecule with the same atom numbers 259 260 Return values: 261 | ``transformation`` -- the transformation that brings 'self' into 262 overlap with 'other' 263 | ``other_trans`` -- the transformed coordinates of geometry 'other' 264 | ``rmsd`` -- the rmsd of the distances between corresponding atoms in 265 'self' and 'other' 266 267 Make sure the atoms in `self` and `other` are in the same order. 268 269 Usage:: 270 271 >>> print molecule1.rmsd(molecule2)[2]/angstrom 272 """ 273 if self.numbers.shape != other.numbers.shape or \ 274 (self.numbers != other.numbers).all(): 275 raise ValueError("The other molecule does not have the same numbers as this molecule.") 276 return fit_rmsd(self.coordinates, other.coordinates) 277 278 def compute_rotsym(self, threshold=1e-3*angstrom): 279 """Compute the rotational symmetry number. 280 281 Optional argument: 282 | ``threshold`` -- only when a rotation results in an rmsd below the given 283 threshold, the rotation is considered to transform the 284 molecule onto itself. 285 """ 286 # Generate a graph with a more permissive threshold for bond lengths: 287 # (is convenient in case of transition state geometries) 288 graph = MolecularGraph.from_geometry(self, scaling=1.5) 289 try: 290 return compute_rotsym(self, graph, threshold) 291 except ValueError: 292 raise ValueError("The rotational symmetry number can only be computed when the graph is fully connected.") 293