1 //
2 //  Copyright (C) 2018 Susan H. Leung
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include "MolStandardize.h"
11 #include "Metal.h"
12 #include "Normalize.h"
13 #include "Tautomer.h"
14 #include "Fragment.h"
15 #include <GraphMol/RDKitBase.h>
16 #include <iostream>
17 #include <GraphMol/ROMol.h>
18 #include <GraphMol/MolOps.h>
19 #include <GraphMol/MolStandardize/TransformCatalog/TransformCatalogParams.h>
20 #include "Charge.h"
21 #include <GraphMol/SmilesParse/SmilesWrite.h>
22 #include <GraphMol/SmilesParse/SmilesParse.h>
23 
24 using namespace std;
25 namespace RDKit {
26 namespace MolStandardize {
27 const CleanupParameters defaultCleanupParameters;
28 
cleanup(const RWMol & mol,const CleanupParameters & params)29 RWMol *cleanup(const RWMol &mol, const CleanupParameters &params) {
30   RWMol m(mol);
31   MolOps::removeHs(m);
32 
33   MolStandardize::MetalDisconnector md;
34   md.disconnect(m);
35   RWMOL_SPTR normalized(MolStandardize::normalize(&m, params));
36   RWMol *reionized = MolStandardize::reionize(normalized.get(), params);
37   MolOps::assignStereochemistry(*reionized);
38 
39   // update properties of reionized using m.
40   reionized->updateProps(m);
41 
42   return reionized;
43 }
44 
tautomerParent(RWMol & mol,const CleanupParameters & params)45 void tautomerParent(RWMol &mol, const CleanupParameters &params) {
46   RDUNUSED_PARAM(mol);
47   RDUNUSED_PARAM(params);
48   UNDER_CONSTRUCTION("Not yet implemented");
49 }
50 
51 // Return the fragment parent of a given molecule.
52 // The fragment parent is the largest organic covalent unit in the molecule.
53 //
fragmentParent(const RWMol & mol,const CleanupParameters & params,bool skip_standardize)54 RWMol *fragmentParent(const RWMol &mol, const CleanupParameters &params,
55                       bool skip_standardize) {
56   const RWMol *cleaned = nullptr;
57 
58   if (!skip_standardize) {
59     cleaned = cleanup(mol, params);
60   } else {
61     cleaned = &mol;
62   }
63 
64   LargestFragmentChooser lfragchooser(params.preferOrganic);
65   ROMol nm(*cleaned);
66   ROMOL_SPTR lfrag(lfragchooser.choose(nm));
67 
68   if (!skip_standardize) {
69     delete cleaned;
70   }
71 
72   return new RWMol(*lfrag);
73 }
74 
stereoParent(RWMol & mol,const CleanupParameters & params)75 void stereoParent(RWMol &mol, const CleanupParameters &params) {
76   RDUNUSED_PARAM(mol);
77   RDUNUSED_PARAM(params);
78   UNDER_CONSTRUCTION("Not yet implemented");
79 }
80 
isotopeParent(RWMol & mol,const CleanupParameters & params)81 void isotopeParent(RWMol &mol, const CleanupParameters &params) {
82   RDUNUSED_PARAM(mol);
83   RDUNUSED_PARAM(params);
84   UNDER_CONSTRUCTION("Not yet implemented");
85 }
86 
chargeParent(const RWMol & mol,const CleanupParameters & params,bool skip_standardize)87 RWMol *chargeParent(const RWMol &mol, const CleanupParameters &params,
88                     bool skip_standardize) {
89   // Return the charge parent of a given molecule.
90   // The charge parent is the uncharged version of the fragment parent.
91 
92   RWMOL_SPTR fragparent(fragmentParent(mol, params, skip_standardize));
93 
94   // if fragment...
95   ROMol nm(*fragparent);
96 
97   Uncharger uncharger(params.doCanonical);
98   ROMOL_SPTR uncharged(uncharger.uncharge(nm));
99   RWMol *omol = cleanup(static_cast<RWMol>(*uncharged), params);
100   return omol;
101 }
102 
superParent(RWMol & mol,const CleanupParameters & params)103 void superParent(RWMol &mol, const CleanupParameters &params) {
104   RDUNUSED_PARAM(mol);
105   RDUNUSED_PARAM(params);
106   UNDER_CONSTRUCTION("Not yet implemented");
107 }
108 
normalize(const RWMol * mol,const CleanupParameters & params)109 RWMol *normalize(const RWMol *mol, const CleanupParameters &params) {
110   Normalizer normalizer(params.normalizations, params.maxRestarts);
111 
112   ROMol m(*mol);
113   ROMol *normalized = normalizer.normalize(m);
114 
115   return static_cast<RWMol *>(normalized);
116 }
117 
reionize(const RWMol * mol,const CleanupParameters & params)118 RWMol *reionize(const RWMol *mol, const CleanupParameters &params) {
119   Reionizer reionizer(params.acidbaseFile);
120   ROMol m(*mol);
121   ROMol *reionized = reionizer.reionize(m);
122 
123   return static_cast<RWMol *>(reionized);
124 }
125 
standardizeSmiles(const std::string & smiles)126 std::string standardizeSmiles(const std::string &smiles) {
127   RWMOL_SPTR mol(SmilesToMol(smiles, 0, false));
128   if (!mol) {
129     std::string message =
130         "SMILES Parse Error: syntax error for input: " + smiles;
131     throw ValueErrorException(message);
132   }
133 
134   CleanupParameters params;
135   RWMOL_SPTR cleaned(cleanup(*mol, params));
136   return MolToSmiles(*cleaned);
137 }
138 
enumerateTautomerSmiles(const std::string & smiles,const CleanupParameters & params)139 std::vector<std::string> enumerateTautomerSmiles(
140     const std::string &smiles, const CleanupParameters &params) {
141   std::shared_ptr<RWMol> mol(SmilesToMol(smiles, 0, false));
142   cleanup(*mol, params);
143   MolOps::sanitizeMol(*mol);
144 
145   TautomerEnumerator te(params);
146 
147   auto res = te.enumerate(*mol);
148 
149   return res.smiles();
150 }
151 
152 }  // end of namespace MolStandardize
153 }  // end of namespace RDKit
154