1 //
2 //  Copyright (C) 2002-2020 Greg Landrum and Rational Discovery LLC
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SMILESWRITE_H_012020
12 #define RD_SMILESWRITE_H_012020
13 
14 #include <string>
15 #include <vector>
16 #include <memory>
17 
18 namespace RDKit {
19 class Atom;
20 class Bond;
21 class ROMol;
22 namespace SmilesWrite {
23 
24 //! \brief returns the cxsmiles data for a molecule
25 RDKIT_SMILESPARSE_EXPORT std::string getCXExtensions(const ROMol &mol);
26 
27 //! \brief returns true if the atom number is in the SMILES organic subset
28 RDKIT_SMILESPARSE_EXPORT bool inOrganicSubset(int atomicNumber);
29 
30 //! \brief returns the SMILES for an atom
31 /*!
32   \param atom : the atom to work with
33   \param doKekule : we're doing kekulized smiles (e.g. don't use
34     lower case for the atom label)
35   \param bondIn : the bond we came into the atom on (unused)
36   \param allHsExplicit : if true, hydrogen counts will be provided for every
37   atom.
38   \param isomericSmiles : if true, isomeric SMILES will be generated
39 */
40 RDKIT_SMILESPARSE_EXPORT std::string GetAtomSmiles(const Atom *atom,
41                                                    bool doKekule = false,
42                                                    const Bond *bondIn = nullptr,
43                                                    bool allHsExplicit = false,
44                                                    bool isomericSmiles = true);
45 
46 //! \brief returns the SMILES for a bond
47 /*!
48   \param bond : the bond to work with
49   \param atomToLeftIdx : the index of the atom preceding \c bond
50     in the SMILES
51   \param doKekule : we're doing kekulized smiles (e.g. write out
52     bond orders for aromatic bonds)
53   \param allBondsExplicit : if true, symbols will be included for all bonds.
54 */
55 RDKIT_SMILESPARSE_EXPORT std::string GetBondSmiles(
56     const Bond *bond, int atomToLeftIdx = -1, bool doKekule = false,
57     bool allBondsExplicit = false);
58 }  // namespace SmilesWrite
59 
60 //! \brief returns canonical SMILES for a molecule
61 /*!
62   \param mol : the molecule in question.
63   \param doIsomericSmiles : include stereochemistry and isotope information
64       in the SMILES
65 
66   \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) NOTE that
67       this will throw an exception if the molecule cannot be kekulized.
68 
69   \param rootedAtAtom : make sure the SMILES starts at the specified atom.
70       The resulting SMILES is not, of course, canonical.
71   \param canonical : if false, no attempt will be made to canonicalize the
72   SMILES
73   \param allBondsExplicit : if true, symbols will be included for all bonds.
74   \param allHsExplicit : if true, hydrogen counts will be provided for every
75   atom.
76  */
77 RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(
78     const ROMol &mol, bool doIsomericSmiles = true, bool doKekule = false,
79     int rootedAtAtom = -1, bool canonical = true, bool allBondsExplicit = false,
80     bool allHsExplicit = false, bool doRandom = false);
81 
82 //! \brief returns a vector of random SMILES for a molecule (may contain
83 //! duplicates)
84 /*!
85   \param mol : the molecule in question.
86   \param numSmiles : the number of SMILES to return
87   \param randomSeed : if >0, will be used to seed the random number generator
88   \param doIsomericSmiles : include stereochemistry and isotope information
89       in the SMILES
90   \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
91   \param allBondsExplicit : if true, symbols will be included for all bonds.
92   \param allHsExplicit : if true, hydrogen counts will be provided for every
93   atom.
94  */
95 RDKIT_SMILESPARSE_EXPORT std::vector<std::string> MolToRandomSmilesVect(
96     const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed = 0,
97     bool doIsomericSmiles = true, bool doKekule = false,
98     bool allBondsExplicit = false, bool allHsExplicit = false);
99 
100 //! \brief returns canonical SMILES for part of a molecule
101 /*!
102   \param mol : the molecule in question.
103   \param atomsToUse : indices of the atoms in the fragment
104   \param bondsToUse : indices of the bonds in the fragment. If this is not
105   provided,
106                       all bonds between the atoms in atomsToUse will be included
107   \param atomSymbols : symbols to use for the atoms in the output SMILES
108   \param bondSymbols : symbols to use for the bonds in the output SMILES
109   \param doIsomericSmiles : include stereochemistry and isotope information
110       in the SMILES
111   \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
112   \param rootedAtAtom : make sure the SMILES starts at the specified atom.
113       The resulting SMILES is not, of course, canonical.
114   \param canonical : if false, no attempt will be made to canonicalize the
115   SMILES
116   \param allBondsExplicit : if true, symbols will be included for all bonds.
117   \param allHsExplicit : if true, hydrogen counts will be provided for every
118   atom.
119   \param doRandom : generate a randomized smiles string by randomly choosing
120                     the priority to follow in the DFS traversal. [default false]
121 
122   \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
123 
124  */
125 RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToSmiles(
126     const ROMol &mol, const std::vector<int> &atomsToUse,
127     const std::vector<int> *bondsToUse = nullptr,
128     const std::vector<std::string> *atomSymbols = nullptr,
129     const std::vector<std::string> *bondSymbols = nullptr,
130     bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
131     bool canonical = true, bool allBondsExplicit = false,
132     bool allHsExplicit = false);
133 
134 //! \brief returns canonical CXSMILES for a molecule
135 /*!
136   \param mol : the molecule in question.
137   \param doIsomericSmiles : include stereochemistry and isotope information
138       in the SMILES
139   \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
140   \param rootedAtAtom : make sure the SMILES starts at the specified atom.
141       The resulting SMILES is not, of course, canonical.
142   \param canonical : if false, no attempt will be made to canonicalize the
143   SMILES
144   \param allBondsExplicit : if true, symbols will be included for all bonds.
145   \param allHsExplicit : if true, hydrogen counts will be provided for every
146   atom.
147  */
148 RDKIT_SMILESPARSE_EXPORT std::string MolToCXSmiles(
149     const ROMol &mol, bool doIsomericSmiles = true, bool doKekule = false,
150     int rootedAtAtom = -1, bool canonical = true, bool allBondsExplicit = false,
151     bool allHsExplicit = false, bool doRandom = false);
152 
153 //! \brief returns canonical CXSMILES for part of a molecule
154 /*!
155   \param mol : the molecule in question.
156   \param atomsToUse : indices of the atoms in the fragment
157   \param bondsToUse : indices of the bonds in the fragment. If this is not
158   provided,
159                       all bonds between the atoms in atomsToUse will be included
160   \param atomSymbols : symbols to use for the atoms in the output SMILES
161   \param bondSymbols : symbols to use for the bonds in the output SMILES
162   \param doIsomericSmiles : include stereochemistry and isotope information
163       in the SMILES
164   \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
165   \param rootedAtAtom : make sure the SMILES starts at the specified atom.
166       The resulting SMILES is not, of course, canonical.
167   \param canonical : if false, no attempt will be made to canonicalize the
168   SMILES
169   \param allBondsExplicit : if true, symbols will be included for all bonds.
170   \param allHsExplicit : if true, hydrogen counts will be provided for every
171   atom.
172   \param doRandom : generate a randomized smiles string by randomly choosing
173                     the priority to follow in the DFS traversal. [default false]
174 
175   \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
176 
177  */
178 RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToCXSmiles(
179     const ROMol &mol, const std::vector<int> &atomsToUse,
180     const std::vector<int> *bondsToUse = nullptr,
181     const std::vector<std::string> *atomSymbols = nullptr,
182     const std::vector<std::string> *bondSymbols = nullptr,
183     bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
184     bool canonical = true, bool allBondsExplicit = false,
185     bool allHsExplicit = false);
186 
187 }  // namespace RDKit
188 #endif
189