1 //
2 //  Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
3 //  All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 //       notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 //       copyright notice, this list of conditions and the following
13 //       disclaimer in the documentation and/or other materials provided
14 //       with the distribution.
15 //     * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 //       nor the names of its contributors may be used to endorse or promote
17 //       products derived from this software without specific prior written
18 //       permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 
33 #include <GraphMol/ChemReactions/Reaction.h>
34 #include <GraphMol/ChemReactions/ReactionUtils.h>
35 #include <GraphMol/Substruct/SubstructMatch.h>
36 #include <GraphMol/ROMol.h>
37 #include <GraphMol/Descriptors/MolDescriptors.h>
38 #include <cmath>
39 
40 namespace RDKit {
41 
getStartIterator(const ChemicalReaction & rxn,ReactionMoleculeType t)42 MOL_SPTR_VECT::const_iterator getStartIterator(const ChemicalReaction &rxn,
43                                                ReactionMoleculeType t) {
44   MOL_SPTR_VECT::const_iterator begin;
45   if (t == Reactant) {
46     begin = rxn.beginReactantTemplates();
47   }
48   if (t == Product) {
49     begin = rxn.beginProductTemplates();
50     ;
51   }
52   if (t == Agent) {
53     begin = rxn.beginAgentTemplates();
54   }
55   return begin;
56 }
57 
getEndIterator(const ChemicalReaction & rxn,ReactionMoleculeType t)58 MOL_SPTR_VECT::const_iterator getEndIterator(const ChemicalReaction &rxn,
59                                              ReactionMoleculeType t) {
60   MOL_SPTR_VECT::const_iterator end;
61   if (t == Reactant) {
62     end = rxn.endReactantTemplates();
63   }
64   if (t == Product) {
65     end = rxn.endProductTemplates();
66     ;
67   }
68   if (t == Agent) {
69     end = rxn.endAgentTemplates();
70   }
71   return end;
72 }
73 
74 namespace {
75 
hasReactionMoleculeTemplateSubstructMatch(const RDKit::ChemicalReaction & rxn,const RDKit::ChemicalReaction & query_rxn,RDKit::ReactionMoleculeType t)76 bool hasReactionMoleculeTemplateSubstructMatch(
77     const RDKit::ChemicalReaction &rxn,
78     const RDKit::ChemicalReaction &query_rxn, RDKit::ReactionMoleculeType t) {
79   for (auto begin = getStartIterator(rxn, t); begin != getEndIterator(rxn, t);
80        ++begin) {
81     for (auto begin_query = getStartIterator(query_rxn, t);
82          begin_query != getEndIterator(query_rxn, t); ++begin_query) {
83       MatchVectType tvect;
84       if (SubstructMatch(*begin->get(), *begin_query->get(), tvect)) {
85         return true;
86       }
87     }
88   }
89   return false;
90 }
91 }  // namespace
92 
hasReactantTemplateSubstructMatch(const ChemicalReaction & rxn,const ChemicalReaction & query_rxn)93 bool hasReactantTemplateSubstructMatch(const ChemicalReaction &rxn,
94                                        const ChemicalReaction &query_rxn) {
95   if (rxn.getNumReactantTemplates() < query_rxn.getNumReactantTemplates()) {
96     return false;
97   }
98   if (query_rxn.getNumReactantTemplates() == 0) {
99     return true;
100   }
101   return hasReactionMoleculeTemplateSubstructMatch(rxn, query_rxn, Reactant);
102 }
103 
hasProductTemplateSubstructMatch(const ChemicalReaction & rxn,const ChemicalReaction & query_rxn)104 bool hasProductTemplateSubstructMatch(const ChemicalReaction &rxn,
105                                       const ChemicalReaction &query_rxn) {
106   if (rxn.getNumProductTemplates() < query_rxn.getNumProductTemplates()) {
107     return false;
108   }
109   if (query_rxn.getNumProductTemplates() == 0) {
110     return true;
111   }
112   return hasReactionMoleculeTemplateSubstructMatch(rxn, query_rxn, Product);
113 }
114 
hasAgentTemplateSubstructMatch(const ChemicalReaction & rxn,const ChemicalReaction & query_rxn)115 bool hasAgentTemplateSubstructMatch(const ChemicalReaction &rxn,
116                                     const ChemicalReaction &query_rxn) {
117   if (rxn.getNumAgentTemplates() < query_rxn.getNumAgentTemplates()) {
118     return false;
119   }
120   if (query_rxn.getNumAgentTemplates() == 0) {
121     return true;
122   }
123   return hasReactionMoleculeTemplateSubstructMatch(rxn, query_rxn, Agent);
124 }
125 
hasReactionSubstructMatch(const ChemicalReaction & rxn,const ChemicalReaction & query_rxn,bool includeAgents)126 bool hasReactionSubstructMatch(const ChemicalReaction &rxn,
127                                const ChemicalReaction &query_rxn,
128                                bool includeAgents) {
129   if (includeAgents) {
130     return (hasReactantTemplateSubstructMatch(rxn, query_rxn) &&
131             hasProductTemplateSubstructMatch(rxn, query_rxn) &&
132             hasAgentTemplateSubstructMatch(rxn, query_rxn));
133   }
134   return (hasReactantTemplateSubstructMatch(rxn, query_rxn) &&
135           hasProductTemplateSubstructMatch(rxn, query_rxn));
136 }
137 
hasReactionAtomMapping(const ChemicalReaction & rxn)138 bool hasReactionAtomMapping(const ChemicalReaction &rxn) {
139   auto begin = getStartIterator(rxn, Reactant);
140   auto end = getEndIterator(rxn, Reactant);
141   for (; begin != end; ++begin) {
142     const ROMol &reactant = *begin->get();
143     if (MolOps::getNumAtomsWithDistinctProperty(
144             reactant, common_properties::molAtomMapNumber)) {
145       return true;
146     }
147   }
148   begin = getStartIterator(rxn, Product);
149   end = getEndIterator(rxn, Product);
150   for (; begin != end; ++begin) {
151     const ROMol &reactant = *begin->get();
152     if (MolOps::getNumAtomsWithDistinctProperty(
153             reactant, common_properties::molAtomMapNumber)) {
154       return true;
155     }
156   }
157   return false;
158 }
159 
isReactionTemplateMoleculeAgent(const ROMol & mol,double agentThreshold)160 bool isReactionTemplateMoleculeAgent(const ROMol &mol, double agentThreshold) {
161   unsigned numMappedAtoms = MolOps::getNumAtomsWithDistinctProperty(
162       mol, common_properties::molAtomMapNumber);
163   unsigned numAtoms = mol.getNumHeavyAtoms();
164   if (numAtoms > 0 &&
165       static_cast<double>(numMappedAtoms) / static_cast<double>(numAtoms) >=
166           agentThreshold) {
167     return false;
168   }
169   return true;
170 }
171 
172 namespace {
173 
getMappingNumAtomIdxMapReactants(const ChemicalReaction & rxn,std::map<int,Atom * > & reactantAtomMapping)174 void getMappingNumAtomIdxMapReactants(
175     const ChemicalReaction &rxn, std::map<int, Atom *> &reactantAtomMapping) {
176   for (auto reactIt = rxn.beginReactantTemplates();
177        reactIt != rxn.endReactantTemplates(); ++reactIt) {
178     for (const auto atom : (*reactIt)->atoms()) {
179       int reactMapNum;
180       if (atom->getPropIfPresent(common_properties::molAtomMapNumber,
181                                  reactMapNum)) {
182         reactantAtomMapping[reactMapNum] = atom;
183       }
184     }
185   }
186 }
187 
188 // returns the atom map numbers of the neighbors of atom1 in the order in which
189 // the neighbors are attached. -1 in the vector for unmapped atoms,
190 // -1 at the end of the vector if the degree of atom1 < the degree of atom 2
getNbrOrder(const Atom * atom1,const Atom * atom2)191 std::pair<unsigned int, std::vector<int>> getNbrOrder(const Atom *atom1,
192                                                       const Atom *atom2) {
193   std::vector<int> order;
194   order.reserve(atom1->getDegree());
195   unsigned nUnmapped = 0;
196   for (const auto &nbri : boost::make_iterator_range(
197            atom1->getOwningMol().getAtomNeighbors(atom1))) {
198     const auto &nbrAtom = atom1->getOwningMol()[nbri];
199     if (nbrAtom->getAtomMapNum() > 0) {
200       order.push_back(nbrAtom->getAtomMapNum());
201     } else {
202       order.push_back(-1);
203       ++nUnmapped;
204     }
205   }
206   if (atom1->getDegree() < atom2->getDegree()) {
207     order.push_back(-1);
208     ++nUnmapped;
209   }
210   return {nUnmapped, order};
211 }
212 
checkOrderOverlap(std::vector<int> & order,unsigned int nUnmapped,const std::vector<int> & refOrder)213 bool checkOrderOverlap(std::vector<int> &order, unsigned int nUnmapped,
214                        const std::vector<int> &refOrder) {
215   bool allFound = true;
216   for (auto elem : refOrder) {
217     if (elem >= 0) {
218       if (std::find(order.begin(), order.end(), elem) == order.end()) {
219         // this one was not there, is there an unmapped slot for
220         // it (i.e. a -1 value in the order)?
221         if (nUnmapped) {
222           auto negOne = std::find(order.begin(), order.end(), -1);
223           if (negOne != order.end()) {
224             *negOne = elem;
225           } else {
226             allFound = false;
227             break;
228           }
229         } else {
230           allFound = false;
231           break;
232         }
233       }
234     }
235   }
236   return allFound;
237 }
238 
239 }  // namespace
240 
241 // returns -1 if we don't find a good match
countSwapsBetweenReactantAndProduct(const Atom * reactAtom,const Atom * prodAtom)242 int countSwapsBetweenReactantAndProduct(const Atom *reactAtom,
243                                         const Atom *prodAtom) {
244   PRECONDITION(reactAtom, "bad atom");
245   PRECONDITION(prodAtom, "bad atom");
246   if (reactAtom->getDegree() >= 3 && prodAtom->getDegree() >= 3 &&
247       std::abs(static_cast<int>(prodAtom->getDegree()) -
248                static_cast<int>(reactAtom->getDegree())) <= 1) {
249     std::vector<int> reactOrder;
250     unsigned int nReactUnmapped;
251     std::tie(nReactUnmapped, reactOrder) = getNbrOrder(reactAtom, prodAtom);
252     if (nReactUnmapped <= 1) {
253       std::vector<int> prodOrder;
254       unsigned int nProdUnmapped;
255       std::tie(nProdUnmapped, prodOrder) = getNbrOrder(prodAtom, reactAtom);
256       if (nProdUnmapped <= 1) {
257         // check that each element of the product mappings is
258         // in the reactant mappings
259         if (checkOrderOverlap(reactOrder, nReactUnmapped, prodOrder)) {
260           // found a match for all the product atoms, what about all
261           // the reactant atoms?
262           if (checkOrderOverlap(prodOrder, nProdUnmapped, reactOrder)) {
263             return countSwapsToInterconvert(reactOrder, prodOrder);
264           }
265         }
266       }
267     }
268   }
269   return -1;
270 }
271 
updateProductsStereochem(ChemicalReaction * rxn)272 void updateProductsStereochem(ChemicalReaction *rxn) {
273   std::map<int, Atom *> reactantMapping;
274   getMappingNumAtomIdxMapReactants(*rxn, reactantMapping);
275   for (MOL_SPTR_VECT::const_iterator prodIt = rxn->beginProductTemplates();
276        prodIt != rxn->endProductTemplates(); ++prodIt) {
277     for (auto prodAtom : (*prodIt)->atoms()) {
278       if (prodAtom->hasProp(common_properties::molInversionFlag)) {
279         continue;
280       }
281       if (!prodAtom->hasProp(common_properties::molAtomMapNumber)) {
282         // if we have stereochemistry specified, it's automatically
283         // creating stereochem:
284         prodAtom->setProp(common_properties::molInversionFlag, 4);
285         continue;
286       }
287       int mapNum;
288       prodAtom->getProp(common_properties::molAtomMapNumber, mapNum);
289       if (reactantMapping.find(mapNum) != reactantMapping.end()) {
290         const auto reactAtom = reactantMapping[mapNum];
291         if (prodAtom->getChiralTag() != Atom::CHI_UNSPECIFIED &&
292             prodAtom->getChiralTag() != Atom::CHI_OTHER) {
293           if (reactAtom->getChiralTag() != Atom::CHI_UNSPECIFIED &&
294               reactAtom->getChiralTag() != Atom::CHI_OTHER) {
295             // both have stereochem specified, we're either preserving
296             // or inverting
297             if (reactAtom->getChiralTag() == prodAtom->getChiralTag()) {
298               prodAtom->setProp(common_properties::molInversionFlag, 2);
299             } else {
300               // FIX: this is technically fragile: it should be checking
301               // if the atoms both have tetrahedral chirality. However,
302               // at the moment that's the only chirality available, so
303               // there's no need to go monkeying around.
304               prodAtom->setProp(common_properties::molInversionFlag, 1);
305             }
306 
307             // FIX this should move out into a separate function
308             // last thing to check here: if the ordering of the bonds
309             // around the atom changed from reactants->products then we
310             // may need to adjust the inversion flag
311             int nSwaps =
312                 countSwapsBetweenReactantAndProduct(reactAtom, prodAtom);
313             if (nSwaps >= 0 && nSwaps % 2) {
314               auto mival =
315                   prodAtom->getProp<int>(common_properties::molInversionFlag);
316               if (mival == 1) {
317                 prodAtom->setProp(common_properties::molInversionFlag, 2);
318               } else if (mival == 2) {
319                 prodAtom->setProp(common_properties::molInversionFlag, 1);
320               } else {
321                 CHECK_INVARIANT(false, "inconsistent molInversionFlag");
322               }
323             }
324           } else {
325             // stereochem in the product, but not in the reactant
326             prodAtom->setProp(common_properties::molInversionFlag, 4);
327           }
328         } else if (reactantMapping[mapNum]->getChiralTag() !=
329                        Atom::CHI_UNSPECIFIED &&
330                    reactantMapping[mapNum]->getChiralTag() != Atom::CHI_OTHER) {
331           // stereochem in the reactant, but not the product:
332           prodAtom->setProp(common_properties::molInversionFlag, 3);
333         }
334       } else {
335         // introduction of new stereocenter by the reaction
336         prodAtom->setProp(common_properties::molInversionFlag, 4);
337       }
338     }
339   }
340 }
341 
342 namespace {
343 
removeMappingNumbersFromReactionMoleculeTemplate(const MOL_SPTR_VECT & molVec)344 void removeMappingNumbersFromReactionMoleculeTemplate(
345     const MOL_SPTR_VECT &molVec) {
346   for (const auto &begin : molVec) {
347     ROMol &mol = *begin.get();
348     for (ROMol::AtomIterator atomIt = mol.beginAtoms();
349          atomIt != mol.endAtoms(); ++atomIt) {
350       if ((*atomIt)->hasProp(common_properties::molAtomMapNumber)) {
351         (*atomIt)->clearProp(common_properties::molAtomMapNumber);
352       }
353     }
354   }
355 }
356 
357 }  // namespace
358 
removeMappingNumbersFromReactions(const ChemicalReaction & rxn)359 void removeMappingNumbersFromReactions(const ChemicalReaction &rxn) {
360   removeMappingNumbersFromReactionMoleculeTemplate(rxn.getAgents());
361   removeMappingNumbersFromReactionMoleculeTemplate(rxn.getProducts());
362   removeMappingNumbersFromReactionMoleculeTemplate(rxn.getReactants());
363 }
364 
365 }  // namespace RDKit
366