1 // 2 // Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC 3 // 4 // @@ All Rights Reserved @@ 5 // This file is part of the RDKit. 6 // The contents are covered by the terms of the BSD license 7 // which is included in the file license.txt, found at the root 8 // of the RDKit source tree. 9 // 10 #include <RDGeneral/export.h> 11 #ifndef RD_SMILESPARSE_H 12 #define RD_SMILESPARSE_H 13 14 #include <GraphMol/RWMol.h> 15 #include <GraphMol/SanitException.h> 16 #include <string> 17 #include <exception> 18 #include <map> 19 20 namespace RDKit { 21 22 struct RDKIT_SMILESPARSE_EXPORT SmilesParserParams { 23 int debugParse = 0; /**< enable debugging in the SMILES parser*/ 24 bool sanitize = true; /**< sanitize the molecule after building it */ 25 std::map<std::string, std::string> *replacements = 26 nullptr; /**< allows SMILES "macros" */ 27 bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/ 28 bool strictCXSMILES = 29 true; /**< throw an exception if the CXSMILES parsing fails */ 30 bool parseName = false; /**< parse (and set) the molecule name as well */ 31 bool removeHs = true; /**< remove Hs after constructing the molecule */ 32 bool useLegacyStereo = 33 true; /**< use the legacy stereochemistry perception code */ 34 }; 35 RDKIT_SMILESPARSE_EXPORT RWMol *SmilesToMol(const std::string &smi, 36 const SmilesParserParams ¶ms); 37 38 RDKIT_SMILESPARSE_EXPORT Atom *SmilesToAtom(const std::string &smi); 39 RDKIT_SMILESPARSE_EXPORT Bond *SmilesToBond(const std::string &smi); 40 41 //! Construct a molecule from a SMILES string 42 /*! 43 \param smi the SMILES to convert 44 \param debugParse toggles verbose debugging information from the parser 45 \param sanitize toggles H removal and sanitization of the molecule 46 \param replacements a string->string map of replacement strings. See below 47 for more information about replacements. 48 49 \return a pointer to the new molecule; the caller is responsible for free'ing 50 this. 51 52 The optional replacements map can be used to do string substitution of 53 abbreviations 54 in the input SMILES. The set of substitutions is repeatedly looped through 55 until 56 the string no longer changes. It is the responsibility of the caller to make 57 sure 58 that substitutions results in legal and sensible SMILES. 59 60 Examples of substitutions: 61 \code 62 CC{Q}C with {"{Q}":"OCCO"} -> CCOCCOC 63 C{A}C{Q}C with {"{Q}":"OCCO", "{A}":"C1(CC1)"} -> CC1(CC1)COCCOC 64 C{A}C{Q}C with {"{Q}":"{X}CC{X}", "{A}":"C1CC1", "{X}":"N"} -> CC1CC1CNCCNC 65 \endcode 66 67 */ 68 inline RWMol *SmilesToMol( 69 const std::string &smi, int debugParse = 0, bool sanitize = true, 70 std::map<std::string, std::string> *replacements = nullptr) { 71 SmilesParserParams params; 72 params.debugParse = debugParse; 73 params.replacements = replacements; 74 if (sanitize) { 75 params.sanitize = true; 76 params.removeHs = true; 77 } else { 78 params.sanitize = false; 79 params.removeHs = false; 80 } 81 return SmilesToMol(smi, params); 82 }; 83 84 //! Construct a molecule from a SMARTS string 85 /*! 86 \param sma the SMARTS to convert 87 \param debugParse toggles verbose debugging information from the parser 88 \param mergeHs toggles merging H atoms in the SMARTS into neighboring 89 atoms 90 \param replacements a string->string map of replacement strings. 91 \see SmilesToMol for more information about replacements 92 93 \return a pointer to the new molecule; the caller is responsible for free'ing 94 this. 95 */ 96 RDKIT_SMILESPARSE_EXPORT RWMol *SmartsToMol( 97 const std::string &sma, int debugParse = 0, bool mergeHs = false, 98 std::map<std::string, std::string> *replacements = nullptr); 99 100 RDKIT_SMILESPARSE_EXPORT Atom *SmartsToAtom(const std::string &sma); 101 RDKIT_SMILESPARSE_EXPORT Bond *SmartsToBond(const std::string &sma); 102 103 class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception { 104 public: SmilesParseException(const char * msg)105 SmilesParseException(const char *msg) : _msg(msg){}; SmilesParseException(const std::string msg)106 SmilesParseException(const std::string msg) : _msg(msg){}; what()107 const char *what() const noexcept override { return _msg.c_str(); }; ~SmilesParseException()108 ~SmilesParseException() noexcept {}; 109 110 private: 111 std::string _msg; 112 }; 113 114 inline std::unique_ptr<RDKit::RWMol> operator"" _smiles(const char *text, 115 size_t len) { 116 std::string smi(text, len); 117 RWMol *ptr = nullptr; 118 try { 119 ptr = SmilesToMol(smi); catch(const RDKit::MolSanitizeException &)120 } catch (const RDKit::MolSanitizeException &) { 121 ptr = nullptr; 122 } 123 return std::unique_ptr<RWMol>(ptr); 124 } 125 inline std::unique_ptr<RDKit::RWMol> operator"" _smarts(const char *text, 126 size_t len) { 127 std::string smi(text, len); 128 // no need for exception handling here: SmartsToMol() doesn't do 129 // sanitization 130 RWMol *ptr = SmartsToMol(smi); 131 return std::unique_ptr<RWMol>(ptr); 132 } 133 134 } // namespace RDKit 135 136 #endif 137