1 // 2 // Copyright (C) 2002-2020 Greg Landrum and Rational Discovery LLC 3 // 4 // @@ All Rights Reserved @@ 5 // This file is part of the RDKit. 6 // The contents are covered by the terms of the BSD license 7 // which is included in the file license.txt, found at the root 8 // of the RDKit source tree. 9 // 10 #include <RDGeneral/export.h> 11 #ifndef RD_SMILESWRITE_H_012020 12 #define RD_SMILESWRITE_H_012020 13 14 #include <string> 15 #include <vector> 16 #include <memory> 17 18 namespace RDKit { 19 class Atom; 20 class Bond; 21 class ROMol; 22 namespace SmilesWrite { 23 24 //! \brief returns the cxsmiles data for a molecule 25 RDKIT_SMILESPARSE_EXPORT std::string getCXExtensions(const ROMol &mol); 26 27 //! \brief returns true if the atom number is in the SMILES organic subset 28 RDKIT_SMILESPARSE_EXPORT bool inOrganicSubset(int atomicNumber); 29 30 //! \brief returns the SMILES for an atom 31 /*! 32 \param atom : the atom to work with 33 \param doKekule : we're doing kekulized smiles (e.g. don't use 34 lower case for the atom label) 35 \param bondIn : the bond we came into the atom on (unused) 36 \param allHsExplicit : if true, hydrogen counts will be provided for every 37 atom. 38 \param isomericSmiles : if true, isomeric SMILES will be generated 39 */ 40 RDKIT_SMILESPARSE_EXPORT std::string GetAtomSmiles(const Atom *atom, 41 bool doKekule = false, 42 const Bond *bondIn = nullptr, 43 bool allHsExplicit = false, 44 bool isomericSmiles = true); 45 46 //! \brief returns the SMILES for a bond 47 /*! 48 \param bond : the bond to work with 49 \param atomToLeftIdx : the index of the atom preceding \c bond 50 in the SMILES 51 \param doKekule : we're doing kekulized smiles (e.g. write out 52 bond orders for aromatic bonds) 53 \param allBondsExplicit : if true, symbols will be included for all bonds. 54 */ 55 RDKIT_SMILESPARSE_EXPORT std::string GetBondSmiles( 56 const Bond *bond, int atomToLeftIdx = -1, bool doKekule = false, 57 bool allBondsExplicit = false); 58 } // namespace SmilesWrite 59 60 //! \brief returns canonical SMILES for a molecule 61 /*! 62 \param mol : the molecule in question. 63 \param doIsomericSmiles : include stereochemistry and isotope information 64 in the SMILES 65 66 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) NOTE that 67 this will throw an exception if the molecule cannot be kekulized. 68 69 \param rootedAtAtom : make sure the SMILES starts at the specified atom. 70 The resulting SMILES is not, of course, canonical. 71 \param canonical : if false, no attempt will be made to canonicalize the 72 SMILES 73 \param allBondsExplicit : if true, symbols will be included for all bonds. 74 \param allHsExplicit : if true, hydrogen counts will be provided for every 75 atom. 76 */ 77 RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles( 78 const ROMol &mol, bool doIsomericSmiles = true, bool doKekule = false, 79 int rootedAtAtom = -1, bool canonical = true, bool allBondsExplicit = false, 80 bool allHsExplicit = false, bool doRandom = false); 81 82 //! \brief returns a vector of random SMILES for a molecule (may contain 83 //! duplicates) 84 /*! 85 \param mol : the molecule in question. 86 \param numSmiles : the number of SMILES to return 87 \param randomSeed : if >0, will be used to seed the random number generator 88 \param doIsomericSmiles : include stereochemistry and isotope information 89 in the SMILES 90 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) 91 \param allBondsExplicit : if true, symbols will be included for all bonds. 92 \param allHsExplicit : if true, hydrogen counts will be provided for every 93 atom. 94 */ 95 RDKIT_SMILESPARSE_EXPORT std::vector<std::string> MolToRandomSmilesVect( 96 const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed = 0, 97 bool doIsomericSmiles = true, bool doKekule = false, 98 bool allBondsExplicit = false, bool allHsExplicit = false); 99 100 //! \brief returns canonical SMILES for part of a molecule 101 /*! 102 \param mol : the molecule in question. 103 \param atomsToUse : indices of the atoms in the fragment 104 \param bondsToUse : indices of the bonds in the fragment. If this is not 105 provided, 106 all bonds between the atoms in atomsToUse will be included 107 \param atomSymbols : symbols to use for the atoms in the output SMILES 108 \param bondSymbols : symbols to use for the bonds in the output SMILES 109 \param doIsomericSmiles : include stereochemistry and isotope information 110 in the SMILES 111 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) 112 \param rootedAtAtom : make sure the SMILES starts at the specified atom. 113 The resulting SMILES is not, of course, canonical. 114 \param canonical : if false, no attempt will be made to canonicalize the 115 SMILES 116 \param allBondsExplicit : if true, symbols will be included for all bonds. 117 \param allHsExplicit : if true, hydrogen counts will be provided for every 118 atom. 119 \param doRandom : generate a randomized smiles string by randomly choosing 120 the priority to follow in the DFS traversal. [default false] 121 122 \b NOTE: the bondSymbols are *not* currently used in the canonicalization. 123 124 */ 125 RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToSmiles( 126 const ROMol &mol, const std::vector<int> &atomsToUse, 127 const std::vector<int> *bondsToUse = nullptr, 128 const std::vector<std::string> *atomSymbols = nullptr, 129 const std::vector<std::string> *bondSymbols = nullptr, 130 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1, 131 bool canonical = true, bool allBondsExplicit = false, 132 bool allHsExplicit = false); 133 134 //! \brief returns canonical CXSMILES for a molecule 135 /*! 136 \param mol : the molecule in question. 137 \param doIsomericSmiles : include stereochemistry and isotope information 138 in the SMILES 139 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) 140 \param rootedAtAtom : make sure the SMILES starts at the specified atom. 141 The resulting SMILES is not, of course, canonical. 142 \param canonical : if false, no attempt will be made to canonicalize the 143 SMILES 144 \param allBondsExplicit : if true, symbols will be included for all bonds. 145 \param allHsExplicit : if true, hydrogen counts will be provided for every 146 atom. 147 */ 148 RDKIT_SMILESPARSE_EXPORT std::string MolToCXSmiles( 149 const ROMol &mol, bool doIsomericSmiles = true, bool doKekule = false, 150 int rootedAtAtom = -1, bool canonical = true, bool allBondsExplicit = false, 151 bool allHsExplicit = false, bool doRandom = false); 152 153 //! \brief returns canonical CXSMILES for part of a molecule 154 /*! 155 \param mol : the molecule in question. 156 \param atomsToUse : indices of the atoms in the fragment 157 \param bondsToUse : indices of the bonds in the fragment. If this is not 158 provided, 159 all bonds between the atoms in atomsToUse will be included 160 \param atomSymbols : symbols to use for the atoms in the output SMILES 161 \param bondSymbols : symbols to use for the bonds in the output SMILES 162 \param doIsomericSmiles : include stereochemistry and isotope information 163 in the SMILES 164 \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds) 165 \param rootedAtAtom : make sure the SMILES starts at the specified atom. 166 The resulting SMILES is not, of course, canonical. 167 \param canonical : if false, no attempt will be made to canonicalize the 168 SMILES 169 \param allBondsExplicit : if true, symbols will be included for all bonds. 170 \param allHsExplicit : if true, hydrogen counts will be provided for every 171 atom. 172 \param doRandom : generate a randomized smiles string by randomly choosing 173 the priority to follow in the DFS traversal. [default false] 174 175 \b NOTE: the bondSymbols are *not* currently used in the canonicalization. 176 177 */ 178 RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToCXSmiles( 179 const ROMol &mol, const std::vector<int> &atomsToUse, 180 const std::vector<int> *bondsToUse = nullptr, 181 const std::vector<std::string> *atomSymbols = nullptr, 182 const std::vector<std::string> *bondSymbols = nullptr, 183 bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1, 184 bool canonical = true, bool allBondsExplicit = false, 185 bool allHsExplicit = false); 186 187 } // namespace RDKit 188 #endif 189