1# -*- coding: utf-8 -*-
2# MolMod is a collection of molecular modelling tools for python.
3# Copyright (C) 2007 - 2019 Toon Verstraelen <Toon.Verstraelen@UGent.be>, Center
4# for Molecular Modeling (CMM), Ghent University, Ghent, Belgium; all rights
5# reserved unless otherwise stated.
6#
7# This file is part of MolMod.
8#
9# MolMod is free software; you can redistribute it and/or
10# modify it under the terms of the GNU General Public License
11# as published by the Free Software Foundation; either version 3
12# of the License, or (at your option) any later version.
13#
14# MolMod is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program; if not, see <http://www.gnu.org/licenses/>
21#
22# --
23"""Representation, analysis and manipulation of molecular systems."""
24
25
26from __future__ import division
27
28from builtins import range
29import numpy as np
30
31from molmod.periodic import periodic
32from molmod.units import angstrom
33from molmod.utils import cached, ReadOnly, ReadOnlyAttribute
34from molmod.molecular_graphs import MolecularGraph
35from molmod.unit_cells import UnitCell
36from molmod.transformations import fit_rmsd
37from molmod.symmetry import compute_rotsym
38
39
40__all__ = ["Molecule"]
41
42
43class Molecule(ReadOnly):
44    """Extensible class for molecular systems.
45
46       Most attributes of the molecule object are treated as constants. If you
47       want to modify the molecular geometry, just create a modified molecule
48       object with the method :meth:`molmod.utils.ReadOnly.copy_with`. This facilitates the caching of
49       derived quantities such as the distance matrix, while it imposes a
50       cleaner coding style without a signifacant computational overhead.
51    """
52    def _check_coordinates(self, coordinates):
53        """the number of rows must be the same as the length of the array numbers"""
54        if len(coordinates) != self.size:
55            raise TypeError("The number of coordinates does not match the "
56                "length of the atomic numbers array.")
57
58    def _check_masses(self, masses):
59        """the size must be the same as the length of the array numbers"""
60        if len(masses) != self.size:
61            raise TypeError("The number of masses does not match the length of "
62                "the atomic numbers array.")
63
64    def _check_graph(self, graph):
65        """the atomic numbers must match"""
66        if graph.num_vertices != self.size:
67            raise TypeError("The number of vertices in the graph does not "
68                "match the length of the atomic numbers array.")
69        # In practice these are typically the same arrays using the same piece
70        # of memory. Just checking to be sure.
71        if (self.numbers != graph.numbers).any():
72            raise TypeError("The atomic numbers in the graph do not match the "
73                "atomic numbers in the molecule.")
74
75    def _check_symbols(self, symbols):
76        """the size must be the same as the length of the array numbers and all elements must be strings"""
77        if len(symbols) != self.size:
78            raise TypeError("The number of symbols in the graph does not "
79                "match the length of the atomic numbers array.")
80        for symbol in symbols:
81            if not isinstance(symbol, str):
82                raise TypeError("All symbols must be strings.")
83
84    numbers = ReadOnlyAttribute(np.ndarray, none=False, npdim=1,
85        npdtype=np.signedinteger, doc="the atomic numbers")
86    coordinates = ReadOnlyAttribute(np.ndarray, npdim=2, npshape=(None,3),
87        npdtype=np.floating, check=_check_coordinates, doc="atomic Cartesian "
88        "coordinates")
89    title = ReadOnlyAttribute(str, doc="a short description of the system")
90    masses = ReadOnlyAttribute(np.ndarray, npdim=1, npdtype=np.floating,
91        check=_check_masses, doc="the atomic masses")
92    graph = ReadOnlyAttribute(MolecularGraph, check=_check_graph,
93        doc="the molecular graph with the atom connectivity")
94    symbols = ReadOnlyAttribute(tuple, _check_symbols, doc="symbols for the "
95        "atoms, which can be element names for force-field atom types")
96    unit_cell = ReadOnlyAttribute(UnitCell, doc="description of the periodic "
97        "boundary conditions")
98
99    def __init__(self, numbers, coordinates=None, title=None, masses=None, graph=None, symbols=None, unit_cell=None):
100        """
101           Mandatory arguments:
102            | ``numbers``  --  numpy array (1D, N elements) with the atomic numbers
103
104           Optional keyword arguments:
105            | ``coordinates``  --  numpy array (2D, Nx3 elements) Cartesian coordinates
106            | ``title``  --  a string with the name of the molecule
107            | ``massess``  --  a numpy array with atomic masses in atomic units
108            | ``graph``  --  a MolecularGraph instance
109            | ``symbols``  --  atomic elements or force-field atom-types
110            | ``unit_cell``  --  the unit cell in case the system is periodic
111        """
112        self.numbers = numbers
113        self.coordinates = coordinates
114        self.title = title
115        self.masses = masses
116        self.graph = graph
117        self.symbols = symbols
118        self.unit_cell = unit_cell
119
120    @classmethod
121    def from_file(cls, filename):
122        """Construct a molecule object read from the given file.
123
124           The file format is inferred from the extensions. Currently supported
125           formats are: ``*.cml``, ``*.fchk``, ``*.pdb``, ``*.sdf``, ``*.xyz``
126
127           If a file contains more than one molecule, only the first one is
128           read.
129
130           Argument:
131            | ``filename``  --  the name of the file containing the molecule
132
133           Example usage::
134
135             >>> mol = Molecule.from_file("foo.xyz")
136        """
137        # TODO: many different API's to load files. brrr...
138        if filename.endswith(".cml"):
139            from molmod.io import load_cml
140            return load_cml(filename)[0]
141        elif filename.endswith(".fchk"):
142            from molmod.io import FCHKFile
143            fchk = FCHKFile(filename, field_labels=[])
144            return fchk.molecule
145        elif filename.endswith(".pdb"):
146            from molmod.io import load_pdb
147            return load_pdb(filename)
148        elif filename.endswith(".sdf"):
149            from molmod.io import SDFReader
150            return next(SDFReader(filename))
151        elif filename.endswith(".xyz"):
152            from molmod.io import XYZReader
153            xyz_reader = XYZReader(filename)
154            title, coordinates = next(xyz_reader)
155            return Molecule(xyz_reader.numbers, coordinates, title, symbols=xyz_reader.symbols)
156        else:
157            raise ValueError("Could not determine file format for %s." % filename)
158
159    size = property(lambda self: self.numbers.shape[0],
160        doc="*Read-only attribute:* the number of atoms.")
161
162    @cached
163    def distance_matrix(self):
164        """the matrix with all atom pair distances"""
165        from molmod.ext import molecules_distance_matrix
166        return molecules_distance_matrix(self.coordinates)
167
168    @cached
169    def mass(self):
170        """the total mass of the molecule"""
171        return self.masses.sum()
172
173    @cached
174    def com(self):
175        """the center of mass of the molecule"""
176        return (self.coordinates*self.masses.reshape((-1,1))).sum(axis=0)/self.mass
177
178    @cached
179    def inertia_tensor(self):
180        """the intertia tensor of the molecule"""
181        result = np.zeros((3,3), float)
182        for i in range(self.size):
183            r = self.coordinates[i] - self.com
184            # the diagonal term
185            result.ravel()[::4] += self.masses[i]*(r**2).sum()
186            # the outer product term
187            result -= self.masses[i]*np.outer(r,r)
188        return result
189
190    @cached
191    def chemical_formula(self):
192        """the chemical formula of the molecule"""
193        counts = {}
194        for number in self.numbers:
195            counts[number] = counts.get(number, 0)+1
196        items = []
197        for number, count in sorted(counts.items(), reverse=True):
198            if count == 1:
199                items.append(periodic[number].symbol)
200            else:
201                items.append("%s%i" % (periodic[number].symbol, count))
202        return "".join(items)
203
204    def set_default_masses(self):
205        """Set self.masses based on self.numbers and periodic table."""
206        self.masses = np.array([periodic[n].mass for n in self.numbers])
207
208    def set_default_graph(self):
209        """Set self.graph to the default graph.
210
211           This method is equivalent to::
212
213              mol.graph = MolecularGraph.from_geometry(mol)
214
215           with the default options, and only works if the graph object is not
216           present yet.
217           See :meth:`molmod.molecular_graphs.MolecularGraph.from_geometry`
218           for more fine-grained control over the assignment of bonds.
219        """
220        self.graph = MolecularGraph.from_geometry(self)
221
222    def set_default_symbols(self):
223        """Set self.symbols based on self.numbers and the periodic table."""
224        self.symbols = tuple(periodic[n].symbol for n in self.numbers)
225
226    def write_to_file(self, filename):
227        """Write the molecular geometry to a file.
228
229           The file format is inferred from the extensions. Currently supported
230           formats are: ``*.xyz``, ``*.cml``
231
232           Argument:
233            | ``filename``  --  a filename
234        """
235        # TODO: give all file format writers the same API
236        if filename.endswith('.cml'):
237            from molmod.io import dump_cml
238            dump_cml(filename, [self])
239        elif filename.endswith('.xyz'):
240            from molmod.io import XYZWriter
241            symbols = []
242            for n in self.numbers:
243                atom = periodic[n]
244                if atom is None:
245                    symbols.append("X")
246                else:
247                    symbols.append(atom.symbol)
248            xyz_writer = XYZWriter(filename, symbols)
249            xyz_writer.dump(self.title, self.coordinates)
250            del xyz_writer
251        else:
252            raise ValueError("Could not determine file format for %s." % filename)
253
254    def rmsd(self, other):
255        """Compute the RMSD between two molecules.
256
257           Arguments:
258            | ``other``  --  Another molecule with the same atom numbers
259
260           Return values:
261            | ``transformation``  --  the transformation that brings 'self' into
262                                  overlap with 'other'
263            | ``other_trans``  --  the transformed coordinates of geometry 'other'
264            | ``rmsd``  --  the rmsd of the distances between corresponding atoms in
265                            'self' and 'other'
266
267           Make sure the atoms in `self` and `other` are in the same order.
268
269           Usage::
270
271             >>> print molecule1.rmsd(molecule2)[2]/angstrom
272        """
273        if self.numbers.shape != other.numbers.shape or \
274           (self.numbers != other.numbers).all():
275            raise ValueError("The other molecule does not have the same numbers as this molecule.")
276        return fit_rmsd(self.coordinates, other.coordinates)
277
278    def compute_rotsym(self, threshold=1e-3*angstrom):
279        """Compute the rotational symmetry number.
280
281           Optional argument:
282            | ``threshold``  --  only when a rotation results in an rmsd below the given
283                                 threshold, the rotation is considered to transform the
284                                 molecule onto itself.
285        """
286        # Generate a graph with a more permissive threshold for bond lengths:
287        # (is convenient in case of transition state geometries)
288        graph = MolecularGraph.from_geometry(self, scaling=1.5)
289        try:
290            return compute_rotsym(self, graph, threshold)
291        except ValueError:
292            raise ValueError("The rotational symmetry number can only be computed when the graph is fully connected.")
293