1 //
2 //  Copyright (C) 2003-2017 Greg Landrum and Rational Discovery LLC
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RDDEPICTOR_H
13 #define RDDEPICTOR_H
14 
15 #include <GraphMol/Substruct/SubstructMatch.h>
16 #include <RDGeneral/types.h>
17 #include <Geometry/point.h>
18 #include <boost/smart_ptr.hpp>
19 
20 namespace RDKit {
21 class ROMol;
22 }
23 
24 namespace RDDepict {
25 
26 RDKIT_DEPICTOR_EXPORT extern bool
27     preferCoordGen;  // Ignored if coordgen support isn't active
28 
29 typedef boost::shared_array<double> DOUBLE_SMART_PTR;
30 
31 class RDKIT_DEPICTOR_EXPORT DepictException : public std::exception {
32  public:
DepictException(const char * msg)33   DepictException(const char *msg) : _msg(msg){};
DepictException(const std::string msg)34   DepictException(const std::string msg) : _msg(msg){};
what()35   const char *what() const noexcept override { return _msg.c_str(); };
~DepictException()36   ~DepictException() noexcept {};
37 
38  private:
39   std::string _msg;
40 };
41 
42 //! \brief Generate 2D coordinates (a depiction) for a molecule
43 /*!
44 
45   \param mol the molecule were are interested in
46 
47   \param coordMap a map of int to Point2D, between atom IDs and
48   their locations.  This is the container the user needs to fill if
49   he/she wants to specify coordinates for a portion of the molecule,
50   defaults to 0
51 
52   \param canonOrient canonicalize the orientation so that the long
53   axes align with the x-axis etc.
54 
55   \param clearConfs clear all existing conformations on the molecule
56   before adding the 2D coordinates instead of simply adding to the
57   list
58 
59   \param nFlipsPerSample - the number of rotatable bonds that are
60   flipped at random for each sample
61 
62   \param nSamples - the number of samples
63 
64   \param sampleSeed - seed for the random sampling process
65 
66   \param permuteDeg4Nodes - try permuting the drawing order of bonds around
67         atoms with four neighbors in order to improve the depiction
68 
69   \param forceRDKit - use RDKit to generate coordinates even if
70         preferCoordGen is set to true
71 
72   \return ID of the conformation added to the molecule containing the
73   2D coordinates
74 
75 */
76 RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoords(
77     RDKit::ROMol &mol, const RDGeom::INT_POINT2D_MAP *coordMap = nullptr,
78     bool canonOrient = false, bool clearConfs = true,
79     unsigned int nFlipsPerSample = 0, unsigned int nSamples = 0,
80     int sampleSeed = 0, bool permuteDeg4Nodes = false, bool forceRDKit = false);
81 
82 //! \brief Compute the 2D coordinates such the interatom distances
83 //   mimic those in a distance matrix
84 /*!
85 
86   This function generates 2D coordinates such that the inter-atom
87   distances mimic those specified via dmat. This is done by randomly
88   sampling(flipping) the rotatable bonds in the molecule and
89   evaluating a cost function which contains two components. The
90   first component is the sum of inverse of the squared inter-atom
91   distances, this helps in spreading the atoms far from each
92   other. The second component is the sum of squares of the
93   difference in distance between those in dmat and the generated
94   structure.  The user can adjust the relative importance of the two
95   components via a adjustable parameter (see below)
96 
97   ARGUMENTS:
98 
99   \param mol - molecule to generate coordinates for
100 
101   \param dmat - the distance matrix we want to mimic, this is a
102   symmetric N by N matrix where N is the number of atoms in mol. All
103   negative entries in dmat are ignored.
104 
105   \param canonOrient - canonicalize the orientation after the 2D
106   embedding is done
107 
108   \param clearConfs - clear any previously existing conformations on
109   mol before adding a conformation
110 
111   \param weightDistMat - A value between 0.0 and 1.0, this
112   determines the importance of mimicing the inter atoms
113   distances in dmat. (1.0 - weightDistMat) is the weight associated
114   to spreading out the structure (density) in the cost function
115 
116   \param nFlipsPerSample - the number of rotatable bonds that are
117   flipped at random for each sample
118 
119   \param nSamples - the number of samples
120 
121   \param sampleSeed - seed for the random sampling process
122 
123   \param permuteDeg4Nodes - try permuting the drawing order of bonds around
124         atoms with four neighbors in order to improve the depiction
125 
126   \param forceRDKit - use RDKit to generate coordinates even if
127         preferCoordGen is set to true
128 
129   \return ID of the conformation added to the molecule containing the
130   2D coordinates
131 
132 
133 */
134 RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoordsMimicDistMat(
135     RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat = nullptr,
136     bool canonOrient = true, bool clearConfs = true, double weightDistMat = 0.5,
137     unsigned int nFlipsPerSample = 3, unsigned int nSamples = 100,
138     int sampleSeed = 25, bool permuteDeg4Nodes = true, bool forceRDKit = false);
139 
140 //! \brief Compute 2D coordinates where a piece of the molecule is
141 //   constrained to have the same coordinates as a reference.
142 /*!
143   This function generates a depiction for a molecule where a piece of the
144   molecule is constrained to have the same coordinates as a reference.
145 
146   This is useful for, for example, generating depictions of SAR data
147   sets so that the cores of the molecules are all oriented the same way.
148 
149   ARGUMENTS:
150 
151   \param mol -    the molecule to be aligned, this will come back
152                   with a single conformer.
153   \param reference -    a molecule with the reference atoms to align to;
154                         this should have a depiction.
155   \param confId -       (optional) the id of the reference conformation to use
156   \param referencePattern -  (optional) a query molecule to be used to
157                              generate the atom mapping between the molecule
158                              and the reference.
159   \param acceptFailure - (optional) if true, standard depictions will be
160                          generated for molecules that don't have a substructure
161                          match to the reference; if false, throws a
162                          DepictException.
163   \param forceRDKit - (optional) use RDKit to generate coordinates even if
164                       preferCoordGen is set to true
165   \param allowOptionalAttachments -  (optional) if true, terminal dummy atoms in
166                          the reference are ignored if they match an implicit
167                          hydrogen in the molecule, and a constrained
168                          depiction is still attempted
169   RETURNS:
170 
171   \return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
172           the constrained depiction
173 */
174 RDKIT_DEPICTOR_EXPORT RDKit::MatchVectType generateDepictionMatching2DStructure(
175     RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
176     const RDKit::ROMol *referencePattern =
177         static_cast<const RDKit::ROMol *>(nullptr),
178     bool acceptFailure = false, bool forceRDKit = false,
179     bool allowOptionalAttachments = false);
180 
181 //! \brief Compute 2D coordinates where a piece of the molecule is
182 //   constrained to have the same coordinates as a reference.
183 /*!
184   This function generates a depiction for a molecule where a piece of the
185   molecule is constrained to have the same coordinates as a reference.
186 
187   This is useful for, for example, generating depictions of SAR data
188   sets so that the cores of the molecules are all oriented the same way.
189   This overload allow to specify the (referenceAtom, molAtom) index pairs
190   which should be matched as MatchVectType. Please note that the
191   vector can be shorter than the number of atoms in the reference.
192 
193   ARGUMENTS:
194 
195   \param mol -    the molecule to be aligned, this will come back
196                   with a single conformer.
197   \param reference -    a molecule with the reference atoms to align to;
198                         this should have a depiction.
199   \param refMatchVect -  a MatchVectType that will be used to
200                          generate the atom mapping between the molecule
201                          and the reference.
202   \param confId -       (optional) the id of the reference conformation to use
203   \param forceRDKit - (optional) use RDKit to generate coordinates even if
204                       preferCoordGen is set to true
205 */
206 RDKIT_DEPICTOR_EXPORT void generateDepictionMatching2DStructure(
207     RDKit::ROMol &mol, const RDKit::ROMol &reference,
208     const RDKit::MatchVectType &refMatchVect, int confId = -1,
209     bool forceRDKit = false);
210 
211 //! \brief Generate a 2D depiction for a molecule where all or part of
212 //   it mimics the coordinates of a 3D reference structure.
213 /*!
214   Generates a depiction for a molecule where a piece of the molecule
215   is constrained to have coordinates similar to those of a 3D reference
216   structure.
217 
218   ARGUMENTS:
219   \param mol - the molecule to be aligned, this will come back
220                with a single conformer containing 2D coordinates
221   \param reference - a molecule with the reference atoms to align to.
222                      By default this should be the same as mol, but with
223                      3D coordinates
224   \param confId - (optional) the id of the reference conformation to use
225   \param refPattern - (optional) a query molecule to map a subset of
226                       the reference onto the mol, so that only some of the
227                       atoms are aligned.
228   \param acceptFailure - (optional) if true, standard depictions will be
229                          generated
230                          for molecules that don't match the reference or the
231                          referencePattern; if false, throws a DepictException.
232   \param forceRDKit - (optional) use RDKit to generate coordinates even if
233                       preferCoordGen is set to true
234 */
235 RDKIT_DEPICTOR_EXPORT void generateDepictionMatching3DStructure(
236     RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
237     RDKit::ROMol *referencePattern = nullptr, bool acceptFailure = false,
238     bool forceRDKit = false);
239 };  // namespace RDDepict
240 
241 #endif
242