1 // $Id$
2 //
3 //  Copyright (C) 2005-2006 Rational Discovery LLC
4 //
5 //   @@ All Rights Reserved @@
6 //  This file is part of the RDKit.
7 //  The contents are covered by the terms of the BSD license
8 //  which is included in the file license.txt, found at the root
9 //  of the RDKit source tree.
10 //
11 
12 #include <RDGeneral/Invariant.h>
13 #include <RDGeneral/RDLog.h>
14 #include <GraphMol/RDKitBase.h>
15 
16 #include "FeatTree.h"
17 
18 #include <boost/graph/biconnected_components.hpp>
19 #include <boost/property_map.hpp>
20 #include <map>
21 #include <set>
22 
23 namespace RDKit {
24 namespace FeatTrees {
25 typedef std::vector<boost::graph_traits<FeatTreeGraph>::vertex_descriptor>
26     NodeVect;
27 typedef std::set<boost::graph_traits<FeatTreeGraph>::vertex_descriptor> NodeSet;
28 typedef std::set<boost::graph_traits<FeatTreeGraph>::edge_descriptor> EdgeSet;
29 
30 // local utility namespace
31 namespace {
32 /*!
33    This function replaces the elements of a system of
34    fused rings that form a cycle in the feature tree with
35    a single node. This is, in essence, a merge operation.
36 
37    \param featGraph: the graph to be modified
38    \param featGraphCopy: a copy of the modified graph
39    \param components: a property map from edge->component id,
40           this property map refers to featGraphCopy
41    \param componentIdx: component id to be replaced
42 
43 */
mergeRingCycle(FeatTreeGraph & featGraph,FeatTreeGraph & featGraphCopy,FeatTreeEdgePMap & components,unsigned int componentIdx)44 void mergeRingCycle(FeatTreeGraph &featGraph, FeatTreeGraph &featGraphCopy,
45                     FeatTreeEdgePMap &components, unsigned int componentIdx) {
46   UINT_SET atomIndices;
47   FeatTreeNodePMap nodeMap = boost::get(FeatTreeNode_t(), featGraph);
48   NodeVect nodeVect;
49 
50   boost::graph_traits<FeatTreeGraph>::edge_iterator edge, endEdges;
51   for (boost::tie(edge, endEdges) = boost::edges(featGraphCopy);
52        edge != endEdges; ++edge) {
53     if (components[*edge] == componentIdx) {
54       boost::graph_traits<FeatTreeGraph>::vertex_descriptor node;
55 
56       node = boost::source(*edge, featGraphCopy);
57       atomIndices.insert(nodeMap[node].begin(), nodeMap[node].end());
58       if (std::find(nodeVect.begin(), nodeVect.end(), node) == nodeVect.end()) {
59         nodeVect.push_back(node);
60       }
61 
62       node = boost::target(*edge, featGraphCopy);
63       atomIndices.insert(nodeMap[node].begin(), nodeMap[node].end());
64       if (std::find(nodeVect.begin(), nodeVect.end(), node) == nodeVect.end()) {
65         nodeVect.push_back(node);
66       }
67     }
68   }
69   // ------ ------ ------ ------ ------ ------ ------
70   // We now have a set of all the nodes involved in the cycle
71   // as well as the full set of atom indices that are going
72   // to be associated with the new node.
73   // We're going to:
74   //   1) construct the new node and add it to the graph
75   //   2) loop over each of the cycle nodes and:
76   //       2.1) attach each of their outer edges to the new node
77   //       2.2) remove the cycle node from the graph
78   boost::graph_traits<FeatTreeGraph>::vertex_descriptor newNode;
79   newNode = boost::add_vertex(FeatTreeNode(atomIndices), featGraph);
80 
81   // we need to have the vertices sorted in inverse order (from
82   // highest to lowest) before we start removing so that we don't
83   // invalidate any of the "descriptors" that we have:
84   std::sort(nodeVect.begin(), nodeVect.end());
85   std::reverse(nodeVect.begin(), nodeVect.end());
86 
87   for (NodeVect::const_iterator nodeIt = nodeVect.begin();
88        nodeIt != nodeVect.end(); ++nodeIt) {
89     FeatTreeEdgePMap edgeMap = boost::get(FeatTreeEdge_t(), featGraph);
90     boost::graph_traits<FeatTreeGraph>::out_edge_iterator edge, endEdges;
91     for (boost::tie(edge, endEdges) = boost::out_edges(*nodeIt, featGraph);
92          edge != endEdges; ++edge) {
93       // figure out which of the edge's nodes is the neighbor:
94       if (boost::source(*edge, featGraph) == *nodeIt) {
95         // make sure the neighbor isn't in the nodeVect:
96         if (!std::binary_search(nodeVect.rbegin(), nodeVect.rend(),
97                                 boost::target(*edge, featGraph))) {
98           boost::add_edge(newNode, boost::target(*edge, featGraph),
99                           FeatTreeEdge(0), featGraph);
100         }
101       } else if (boost::target(*edge, featGraph) == *nodeIt) {
102         // make sure the neighbor isn't in the nodeSet:
103         if (!std::binary_search(nodeVect.rbegin(), nodeVect.rend(),
104                                 boost::source(*edge, featGraph))) {
105           boost::add_edge(boost::source(*edge, featGraph), newNode,
106                           FeatTreeEdge(0), featGraph);
107         }
108       } else {
109         CHECK_INVARIANT(false,
110                         "inconsistent state encounted when replacing a cycle");
111       }
112     }
113 
114     // now remove the node from the graph:
115     boost::clear_vertex(*nodeIt, featGraph);
116     boost::remove_vertex(*nodeIt, featGraph);
117   }
118 }
119 
120 }  // end of local utility namespace
121 
122 // -----------------------------------------------------------------------
123 //
124 // -----------------------------------------------------------------------
addRingsAndConnectors(const ROMol & mol,FeatTreeGraph & resGraph)125 void addRingsAndConnectors(const ROMol &mol, FeatTreeGraph &resGraph) {
126   RingInfo *ringInfo = mol.getRingInfo();
127   unsigned int ringIdxI = 0;
128   for (VECT_INT_VECT::const_iterator ringItI = ringInfo->atomRings().begin();
129        ringItI != ringInfo->atomRings().end(); ++ringItI, ++ringIdxI) {
130     UINT_SET s;
131     s.insert(ringItI->begin(), ringItI->end());
132     boost::add_vertex(FeatTreeNode(s), resGraph);
133 
134     // ------ ------ ------ ------
135     // Add any relevant ring-ring connectors:
136     // ------ ------ ------ ------
137     // sort a copy of this ring's atoms so that it's easier to
138     // search for overlaps:
139     INT_VECT ringI = *ringItI;
140     std::sort(ringI.begin(), ringI.end());
141     unsigned int ringIdxJ = 0;
142     for (VECT_INT_VECT::const_iterator ringItJ = ringInfo->atomRings().begin();
143          ringItJ != ringItI; ++ringItJ, ++ringIdxJ) {
144       for (INT_VECT::const_iterator ringJElem = ringItJ->begin();
145            ringJElem != ringItJ->end(); ++ringJElem) {
146         if (std::binary_search(ringI.begin(), ringI.end(), *ringJElem)) {
147           // these two rings share a common atom, so set up an
148           // edge between them in the feature tree:
149           boost::add_edge(ringIdxI, ringIdxJ, FeatTreeEdge(2), resGraph);
150 
151           // no point continuing the search for ringJ:
152           break;
153         }
154       }
155     }
156   }
157 }
158 
159 // -----------------------------------------------------------------------
160 //
161 // -----------------------------------------------------------------------
addRingRingBonds(const ROMol & mol,FeatTreeGraph & resGraph)162 void addRingRingBonds(const ROMol &mol, FeatTreeGraph &resGraph) {
163   FeatTreeNodePMap nodeMap = boost::get(FeatTreeNode_t(), resGraph);
164 
165   boost::graph_traits<FeatTreeGraph>::vertex_iterator nodeI, endNodes;
166   for (boost::tie(nodeI, endNodes) = boost::vertices(resGraph);
167        nodeI != endNodes; ++nodeI) {
168     boost::graph_traits<FeatTreeGraph>::vertex_iterator nodeJ;
169     nodeJ = nodeI;
170     while (++nodeJ != endNodes) {
171       boost::graph_traits<FeatTreeGraph>::edge_descriptor tmp;
172       bool found = false;
173       boost::tie(tmp, found) = boost::edge(*nodeI, *nodeJ, resGraph);
174       for (UINT_SET::const_iterator setI = nodeMap[*nodeI].begin();
175            setI != nodeMap[*nodeI].end() && !found; setI++) {
176         for (UINT_SET::const_iterator setJ = nodeMap[*nodeJ].begin();
177              setJ != nodeMap[*nodeJ].end() && !found; setJ++) {
178           if (mol.getBondBetweenAtoms(*setI, *setJ)) {
179             // set up the bond:
180             boost::add_edge(*nodeI, *nodeJ, FeatTreeEdge(1), resGraph);
181             found = true;
182             break;
183           }
184         }
185       }
186     }
187   }
188 }
189 
190 // -----------------------------------------------------------------------
191 //
192 // -----------------------------------------------------------------------
addNonringAtoms(const ROMol & mol,FeatTreeGraph & resGraph)193 std::vector<unsigned int> addNonringAtoms(const ROMol &mol,
194                                           FeatTreeGraph &resGraph) {
195   RingInfo *ringInfo = mol.getRingInfo();
196   unsigned int nAts = mol.getNumAtoms();
197   std::vector<unsigned int> atomIndices(nAts, nAts + 1);
198   for (ROMol::ConstAtomIterator atomIt = mol.beginAtoms();
199        atomIt != mol.endAtoms(); ++atomIt) {
200     const Atom *atom = *atomIt;
201     if ((atom->getDegree() > 1 ||
202          atom->getDegree() + atom->getTotalNumHs() > 1) &&
203         !ringInfo->numAtomRings(atom->getIdx())) {
204       UINT_SET s;
205       s.insert(atom->getIdx());
206       unsigned int idx = boost::add_vertex(FeatTreeNode(s), resGraph);
207       atomIndices[atom->getIdx()] = idx;
208     }
209   }
210   return atomIndices;
211 }
212 
213 // -----------------------------------------------------------------------
214 //
215 // -----------------------------------------------------------------------
addBondsFromNonringAtoms(const ROMol & mol,FeatTreeGraph & resGraph,std::vector<unsigned int> & atomIndices)216 void addBondsFromNonringAtoms(const ROMol &mol, FeatTreeGraph &resGraph,
217                               std::vector<unsigned int> &atomIndices) {
218   FeatTreeNodePMap nodeMap = boost::get(FeatTreeNode_t(), resGraph);
219   unsigned int nAts = mol.getNumAtoms();
220   for (ROMol::ConstAtomIterator atomIt = mol.beginAtoms();
221        atomIt != mol.endAtoms(); ++atomIt) {
222     const Atom *atom = *atomIt;
223     if (atomIndices[atom->getIdx()] < nAts) {
224       // this atom has already been added as a free-standing atom:
225       ROMol::ADJ_ITER nbrIdx, endNbrs;
226       for (boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
227            nbrIdx != endNbrs; ++nbrIdx) {
228         if (atomIndices[*nbrIdx] < nAts) {
229           if (*nbrIdx > atom->getIdx()) {
230             // the neighbor has already been added, and it has a higher
231             // index than ours (we make this check to avoid duplicated
232             // bonds), so add a bond to it:
233             boost::add_edge(atomIndices[atom->getIdx()], atomIndices[*nbrIdx],
234                             FeatTreeEdge(0), resGraph);
235           }
236         } else if (mol.getAtomWithIdx(*nbrIdx)->getDegree() > 1) {
237           // the neighbor hasn't been added to the graph
238           // on its own and the degree is above 1, so it's
239           // got to be a ring atom, find the rings it's present in by
240           // looping over nodes already in the graph.  Note that we can't
241           // use the ringInfo structure anymore because we may have combined
242           // some rings in the call to replaceCycles() above:
243           boost::graph_traits<FeatTreeGraph>::vertex_iterator node, endNodes;
244           for (boost::tie(node, endNodes) = boost::vertices(resGraph);
245                node != endNodes; ++node) {
246             if (atomIndices[*node] != *nbrIdx &&
247                 nodeMap[*node].count(*nbrIdx)) {
248               boost::add_edge(atomIndices[atom->getIdx()], *node,
249                               FeatTreeEdge(1), resGraph);
250             }
251           }
252         }
253       }
254     }
255   }
256 }
257 
258 // -----------------------------------------------------------------------
259 //
260 // -----------------------------------------------------------------------
replaceCycles(FeatTreeGraph & featGraph)261 void replaceCycles(FeatTreeGraph &featGraph) {
262   FeatTreeGraph featGraphCopy = featGraph;
263 
264   FeatTreeEdgePMap edgeMap = boost::get(FeatTreeEdge_t(), featGraph);
265   FeatTreeEdgePMap componentMap = boost::get(FeatTreeEdge_t(), featGraphCopy);
266 
267   // ------ ------ ------ ------ ------ ------ ------
268   // Start by finding the biconnected components:
269   unsigned int numComponents =
270       boost::biconnected_components(featGraphCopy, componentMap);
271   if (numComponents == boost::num_vertices(featGraph)) {
272     // no cycles, our work here is done, so go ahead and return
273     return;
274   }
275 
276   // ------ ------ ------ ------ ------ ------ ------
277   // loop over the elements of the biconnected-components map and count:
278   //    - how many times each index occurs
279   std::vector<unsigned int> memberCount(numComponents, 0);
280   boost::graph_traits<FeatTreeGraph>::edge_iterator edge, endEdges;
281   boost::graph_traits<FeatTreeGraph>::edge_iterator cpyEdge, endCpyEdges;
282   for ((boost::tie(edge, endEdges) = boost::edges(featGraph)),
283        (boost::tie(cpyEdge, endCpyEdges) = boost::edges(featGraphCopy));
284        edge != endEdges; ++edge, ++cpyEdge) {
285     memberCount[componentMap[*cpyEdge]] += 1;
286     CHECK_INVARIANT(edgeMap[*edge] == 2,
287                     "replaceCycles() should be called with only ring-ring "
288                     "edges in the graph");
289   }
290 
291   // ------ ------ ------ ------ ------ ------ ------
292   // Replace any cycles that exist in the graph:
293   for (unsigned int i = 0; i < numComponents; ++i) {
294     if (memberCount[i] > 1) {
295       mergeRingCycle(featGraph, featGraphCopy, componentMap, i);
296     }
297   }
298 }
299 
300 // -----------------------------------------------------------------------
301 //
302 // -----------------------------------------------------------------------
addZeroNodes(FeatTreeGraph & featGraph)303 void addZeroNodes(FeatTreeGraph &featGraph) {
304   FeatTreeEdgePMap edgeMap = boost::get(FeatTreeEdge_t(), featGraph);
305   FeatTreeGraph featGraphCopy = featGraph;
306   FeatTreeEdgePMap componentMap = boost::get(FeatTreeEdge_t(), featGraphCopy);
307 
308   // ------ ------ ------ ------ ------ ------ ------
309   // Start by finding the biconnected components:
310   unsigned int numComponents =
311       boost::biconnected_components(featGraphCopy, componentMap);
312 
313   if (numComponents == boost::num_edges(featGraph)) {
314     // no cycles, our work here is done, so go ahead and return
315     return;
316   }
317 
318   // ------ ------ ------ ------ ------ ------ ------
319   // loop over the elements of the biconnected-components map and:
320   //    1) count how many times each index occurs
321   //    2) determine if each component has a non-ring->ring bond
322   std::vector<unsigned int> memberCount(numComponents, 0);
323   std::vector<bool> weight1EdgeObserved(numComponents, false);
324   boost::graph_traits<FeatTreeGraph>::edge_iterator edge, endEdges;
325   boost::graph_traits<FeatTreeGraph>::edge_iterator cpyEdge, endCpyEdges;
326   for ((boost::tie(edge, endEdges) = boost::edges(featGraph)),
327        (boost::tie(cpyEdge, endCpyEdges) = boost::edges(featGraphCopy));
328        edge != endEdges; ++edge, ++cpyEdge) {
329     memberCount[componentMap[*cpyEdge]] += 1;
330     if (edgeMap[*edge] == 1) {
331       weight1EdgeObserved[componentMap[*cpyEdge]] = true;
332     }
333   }
334 
335   for (unsigned int i = 0; i < numComponents; i++) {
336     if (memberCount[i] > 2) {
337       CHECK_INVARIANT(weight1EdgeObserved[i], "internal inconsistency");
338       // add the zero node:
339       unsigned int newIdx;
340       UINT_SET emptySet;
341       newIdx = boost::add_vertex(FeatTreeNode(emptySet), featGraph);
342 
343       // figure out who it needs to be connected to:
344       NodeSet nodesToConnect;
345       for ((boost::tie(edge, endEdges) = boost::edges(featGraph)),
346            (boost::tie(cpyEdge, endCpyEdges) = boost::edges(featGraphCopy));
347            cpyEdge != endCpyEdges; ++edge, ++cpyEdge) {
348         if (componentMap[*cpyEdge] == i) {
349           nodesToConnect.insert(boost::source(*edge, featGraph));
350           nodesToConnect.insert(boost::target(*edge, featGraph));
351           // we can't actually remove edges at this stage, because we need
352           // the iterators for featGraph edges and featGraphCopy edges to
353           // stay in sync. So just mark this edge as one that should be
354           // removed and we'll take care of it later.
355           edgeMap[*edge] = 23;
356         }
357       }
358       for (NodeSet::iterator nodeI = nodesToConnect.begin();
359            nodeI != nodesToConnect.end(); nodeI++) {
360         boost::add_edge(newIdx, *nodeI, 4, featGraph);
361       }
362     }
363   }
364   edgeMap = boost::get(FeatTreeEdge_t(), featGraph);
365   boost::tie(edge, endEdges) = boost::edges(featGraph);
366   while (edge != endEdges) {
367     if (edgeMap[*edge] == 23) {
368       boost::graph_traits<FeatTreeGraph>::edge_iterator nextEdge = edge;
369       ++nextEdge;
370       boost::remove_edge(*edge, featGraph);
371       edge = nextEdge;
372     } else {
373       ++edge;
374     }
375   }
376 }
377 
378 }  // end of namespace FeatTrees
379 }  // end of namespace RDKit
380