1 //
2 //  Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SMILESPARSE_H
12 #define RD_SMILESPARSE_H
13 
14 #include <GraphMol/RWMol.h>
15 #include <GraphMol/SanitException.h>
16 #include <string>
17 #include <exception>
18 #include <map>
19 
20 namespace RDKit {
21 
22 struct RDKIT_SMILESPARSE_EXPORT SmilesParserParams {
23   int debugParse = 0;   /**< enable debugging in the SMILES parser*/
24   bool sanitize = true; /**< sanitize the molecule after building it */
25   std::map<std::string, std::string> *replacements =
26       nullptr;               /**< allows SMILES "macros" */
27   bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/
28   bool strictCXSMILES =
29       true; /**< throw an exception if the CXSMILES parsing fails */
30   bool parseName = false; /**< parse (and set) the molecule name as well */
31   bool removeHs = true;   /**< remove Hs after constructing the molecule */
32   bool useLegacyStereo =
33       true; /**< use the legacy stereochemistry perception code */
34 };
35 RDKIT_SMILESPARSE_EXPORT RWMol *SmilesToMol(const std::string &smi,
36                                             const SmilesParserParams &params);
37 
38 RDKIT_SMILESPARSE_EXPORT Atom *SmilesToAtom(const std::string &smi);
39 RDKIT_SMILESPARSE_EXPORT Bond *SmilesToBond(const std::string &smi);
40 
41 //! Construct a molecule from a SMILES string
42 /*!
43  \param smi           the SMILES to convert
44  \param debugParse    toggles verbose debugging information from the parser
45  \param sanitize      toggles H removal and sanitization of the molecule
46  \param replacements  a string->string map of replacement strings. See below
47                       for more information about replacements.
48 
49  \return a pointer to the new molecule; the caller is responsible for free'ing
50  this.
51 
52  The optional replacements map can be used to do string substitution of
53  abbreviations
54  in the input SMILES. The set of substitutions is repeatedly looped through
55  until
56  the string no longer changes. It is the responsibility of the caller to make
57  sure
58  that substitutions results in legal and sensible SMILES.
59 
60  Examples of substitutions:
61  \code
62    CC{Q}C with {"{Q}":"OCCO"} -> CCOCCOC
63    C{A}C{Q}C with {"{Q}":"OCCO", "{A}":"C1(CC1)"} -> CC1(CC1)COCCOC
64    C{A}C{Q}C with {"{Q}":"{X}CC{X}", "{A}":"C1CC1", "{X}":"N"} -> CC1CC1CNCCNC
65  \endcode
66 
67  */
68 inline RWMol *SmilesToMol(
69     const std::string &smi, int debugParse = 0, bool sanitize = true,
70     std::map<std::string, std::string> *replacements = nullptr) {
71   SmilesParserParams params;
72   params.debugParse = debugParse;
73   params.replacements = replacements;
74   if (sanitize) {
75     params.sanitize = true;
76     params.removeHs = true;
77   } else {
78     params.sanitize = false;
79     params.removeHs = false;
80   }
81   return SmilesToMol(smi, params);
82 };
83 
84 //! Construct a molecule from a SMARTS string
85 /*!
86  \param sma           the SMARTS to convert
87  \param debugParse    toggles verbose debugging information from the parser
88  \param mergeHs       toggles merging H atoms in the SMARTS into neighboring
89  atoms
90  \param replacements  a string->string map of replacement strings.
91                       \see SmilesToMol for more information about replacements
92 
93  \return a pointer to the new molecule; the caller is responsible for free'ing
94  this.
95  */
96 RDKIT_SMILESPARSE_EXPORT RWMol *SmartsToMol(
97     const std::string &sma, int debugParse = 0, bool mergeHs = false,
98     std::map<std::string, std::string> *replacements = nullptr);
99 
100 RDKIT_SMILESPARSE_EXPORT Atom *SmartsToAtom(const std::string &sma);
101 RDKIT_SMILESPARSE_EXPORT Bond *SmartsToBond(const std::string &sma);
102 
103 class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception {
104  public:
SmilesParseException(const char * msg)105   SmilesParseException(const char *msg) : _msg(msg){};
SmilesParseException(const std::string msg)106   SmilesParseException(const std::string msg) : _msg(msg){};
what()107   const char *what() const noexcept override { return _msg.c_str(); };
~SmilesParseException()108   ~SmilesParseException() noexcept {};
109 
110  private:
111   std::string _msg;
112 };
113 
114 inline std::unique_ptr<RDKit::RWMol> operator"" _smiles(const char *text,
115                                                         size_t len) {
116   std::string smi(text, len);
117   RWMol *ptr = nullptr;
118   try {
119     ptr = SmilesToMol(smi);
catch(const RDKit::MolSanitizeException &)120   } catch (const RDKit::MolSanitizeException &) {
121     ptr = nullptr;
122   }
123   return std::unique_ptr<RWMol>(ptr);
124 }
125 inline std::unique_ptr<RDKit::RWMol> operator"" _smarts(const char *text,
126                                                         size_t len) {
127   std::string smi(text, len);
128   // no need for exception handling here: SmartsToMol() doesn't do
129   // sanitization
130   RWMol *ptr = SmartsToMol(smi);
131   return std::unique_ptr<RWMol>(ptr);
132 }
133 
134 }  // namespace RDKit
135 
136 #endif
137