1 //
2 // Copyright (C) 2018 Susan H. Leung
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include "MolStandardize.h"
11 #include "Metal.h"
12 #include "Normalize.h"
13 #include "Tautomer.h"
14 #include "Fragment.h"
15 #include <GraphMol/RDKitBase.h>
16 #include <iostream>
17 #include <GraphMol/ROMol.h>
18 #include <GraphMol/MolOps.h>
19 #include <GraphMol/MolStandardize/TransformCatalog/TransformCatalogParams.h>
20 #include "Charge.h"
21 #include <GraphMol/SmilesParse/SmilesWrite.h>
22 #include <GraphMol/SmilesParse/SmilesParse.h>
23
24 using namespace std;
25 namespace RDKit {
26 namespace MolStandardize {
27 const CleanupParameters defaultCleanupParameters;
28
cleanup(const RWMol & mol,const CleanupParameters & params)29 RWMol *cleanup(const RWMol &mol, const CleanupParameters ¶ms) {
30 RWMol m(mol);
31 MolOps::removeHs(m);
32
33 MolStandardize::MetalDisconnector md;
34 md.disconnect(m);
35 RWMOL_SPTR normalized(MolStandardize::normalize(&m, params));
36 RWMol *reionized = MolStandardize::reionize(normalized.get(), params);
37 MolOps::assignStereochemistry(*reionized);
38
39 // update properties of reionized using m.
40 reionized->updateProps(m);
41
42 return reionized;
43 }
44
tautomerParent(RWMol & mol,const CleanupParameters & params)45 void tautomerParent(RWMol &mol, const CleanupParameters ¶ms) {
46 RDUNUSED_PARAM(mol);
47 RDUNUSED_PARAM(params);
48 UNDER_CONSTRUCTION("Not yet implemented");
49 }
50
51 // Return the fragment parent of a given molecule.
52 // The fragment parent is the largest organic covalent unit in the molecule.
53 //
fragmentParent(const RWMol & mol,const CleanupParameters & params,bool skip_standardize)54 RWMol *fragmentParent(const RWMol &mol, const CleanupParameters ¶ms,
55 bool skip_standardize) {
56 const RWMol *cleaned = nullptr;
57
58 if (!skip_standardize) {
59 cleaned = cleanup(mol, params);
60 } else {
61 cleaned = &mol;
62 }
63
64 LargestFragmentChooser lfragchooser(params.preferOrganic);
65 ROMol nm(*cleaned);
66 ROMOL_SPTR lfrag(lfragchooser.choose(nm));
67
68 if (!skip_standardize) {
69 delete cleaned;
70 }
71
72 return new RWMol(*lfrag);
73 }
74
stereoParent(RWMol & mol,const CleanupParameters & params)75 void stereoParent(RWMol &mol, const CleanupParameters ¶ms) {
76 RDUNUSED_PARAM(mol);
77 RDUNUSED_PARAM(params);
78 UNDER_CONSTRUCTION("Not yet implemented");
79 }
80
isotopeParent(RWMol & mol,const CleanupParameters & params)81 void isotopeParent(RWMol &mol, const CleanupParameters ¶ms) {
82 RDUNUSED_PARAM(mol);
83 RDUNUSED_PARAM(params);
84 UNDER_CONSTRUCTION("Not yet implemented");
85 }
86
chargeParent(const RWMol & mol,const CleanupParameters & params,bool skip_standardize)87 RWMol *chargeParent(const RWMol &mol, const CleanupParameters ¶ms,
88 bool skip_standardize) {
89 // Return the charge parent of a given molecule.
90 // The charge parent is the uncharged version of the fragment parent.
91
92 RWMOL_SPTR fragparent(fragmentParent(mol, params, skip_standardize));
93
94 // if fragment...
95 ROMol nm(*fragparent);
96
97 Uncharger uncharger(params.doCanonical);
98 ROMOL_SPTR uncharged(uncharger.uncharge(nm));
99 RWMol *omol = cleanup(static_cast<RWMol>(*uncharged), params);
100 return omol;
101 }
102
superParent(RWMol & mol,const CleanupParameters & params)103 void superParent(RWMol &mol, const CleanupParameters ¶ms) {
104 RDUNUSED_PARAM(mol);
105 RDUNUSED_PARAM(params);
106 UNDER_CONSTRUCTION("Not yet implemented");
107 }
108
normalize(const RWMol * mol,const CleanupParameters & params)109 RWMol *normalize(const RWMol *mol, const CleanupParameters ¶ms) {
110 Normalizer normalizer(params.normalizations, params.maxRestarts);
111
112 ROMol m(*mol);
113 ROMol *normalized = normalizer.normalize(m);
114
115 return static_cast<RWMol *>(normalized);
116 }
117
reionize(const RWMol * mol,const CleanupParameters & params)118 RWMol *reionize(const RWMol *mol, const CleanupParameters ¶ms) {
119 Reionizer reionizer(params.acidbaseFile);
120 ROMol m(*mol);
121 ROMol *reionized = reionizer.reionize(m);
122
123 return static_cast<RWMol *>(reionized);
124 }
125
standardizeSmiles(const std::string & smiles)126 std::string standardizeSmiles(const std::string &smiles) {
127 RWMOL_SPTR mol(SmilesToMol(smiles, 0, false));
128 if (!mol) {
129 std::string message =
130 "SMILES Parse Error: syntax error for input: " + smiles;
131 throw ValueErrorException(message);
132 }
133
134 CleanupParameters params;
135 RWMOL_SPTR cleaned(cleanup(*mol, params));
136 return MolToSmiles(*cleaned);
137 }
138
enumerateTautomerSmiles(const std::string & smiles,const CleanupParameters & params)139 std::vector<std::string> enumerateTautomerSmiles(
140 const std::string &smiles, const CleanupParameters ¶ms) {
141 std::shared_ptr<RWMol> mol(SmilesToMol(smiles, 0, false));
142 cleanup(*mol, params);
143 MolOps::sanitizeMol(*mol);
144
145 TautomerEnumerator te(params);
146
147 auto res = te.enumerate(*mol);
148
149 return res.smiles();
150 }
151
152 } // end of namespace MolStandardize
153 } // end of namespace RDKit
154