1 // $Id$
2 //
3 // Copyright (C) 2005-2006 Rational Discovery LLC
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11
12 #include <RDGeneral/Invariant.h>
13 #include <RDGeneral/RDLog.h>
14 #include <GraphMol/RDKitBase.h>
15
16 #include "FeatTree.h"
17
18 #include <boost/graph/biconnected_components.hpp>
19 #include <boost/property_map.hpp>
20 #include <map>
21 #include <set>
22
23 namespace RDKit {
24 namespace FeatTrees {
25 typedef std::vector<boost::graph_traits<FeatTreeGraph>::vertex_descriptor>
26 NodeVect;
27 typedef std::set<boost::graph_traits<FeatTreeGraph>::vertex_descriptor> NodeSet;
28 typedef std::set<boost::graph_traits<FeatTreeGraph>::edge_descriptor> EdgeSet;
29
30 // local utility namespace
31 namespace {
32 /*!
33 This function replaces the elements of a system of
34 fused rings that form a cycle in the feature tree with
35 a single node. This is, in essence, a merge operation.
36
37 \param featGraph: the graph to be modified
38 \param featGraphCopy: a copy of the modified graph
39 \param components: a property map from edge->component id,
40 this property map refers to featGraphCopy
41 \param componentIdx: component id to be replaced
42
43 */
mergeRingCycle(FeatTreeGraph & featGraph,FeatTreeGraph & featGraphCopy,FeatTreeEdgePMap & components,unsigned int componentIdx)44 void mergeRingCycle(FeatTreeGraph &featGraph, FeatTreeGraph &featGraphCopy,
45 FeatTreeEdgePMap &components, unsigned int componentIdx) {
46 UINT_SET atomIndices;
47 FeatTreeNodePMap nodeMap = boost::get(FeatTreeNode_t(), featGraph);
48 NodeVect nodeVect;
49
50 boost::graph_traits<FeatTreeGraph>::edge_iterator edge, endEdges;
51 for (boost::tie(edge, endEdges) = boost::edges(featGraphCopy);
52 edge != endEdges; ++edge) {
53 if (components[*edge] == componentIdx) {
54 boost::graph_traits<FeatTreeGraph>::vertex_descriptor node;
55
56 node = boost::source(*edge, featGraphCopy);
57 atomIndices.insert(nodeMap[node].begin(), nodeMap[node].end());
58 if (std::find(nodeVect.begin(), nodeVect.end(), node) == nodeVect.end()) {
59 nodeVect.push_back(node);
60 }
61
62 node = boost::target(*edge, featGraphCopy);
63 atomIndices.insert(nodeMap[node].begin(), nodeMap[node].end());
64 if (std::find(nodeVect.begin(), nodeVect.end(), node) == nodeVect.end()) {
65 nodeVect.push_back(node);
66 }
67 }
68 }
69 // ------ ------ ------ ------ ------ ------ ------
70 // We now have a set of all the nodes involved in the cycle
71 // as well as the full set of atom indices that are going
72 // to be associated with the new node.
73 // We're going to:
74 // 1) construct the new node and add it to the graph
75 // 2) loop over each of the cycle nodes and:
76 // 2.1) attach each of their outer edges to the new node
77 // 2.2) remove the cycle node from the graph
78 boost::graph_traits<FeatTreeGraph>::vertex_descriptor newNode;
79 newNode = boost::add_vertex(FeatTreeNode(atomIndices), featGraph);
80
81 // we need to have the vertices sorted in inverse order (from
82 // highest to lowest) before we start removing so that we don't
83 // invalidate any of the "descriptors" that we have:
84 std::sort(nodeVect.begin(), nodeVect.end());
85 std::reverse(nodeVect.begin(), nodeVect.end());
86
87 for (NodeVect::const_iterator nodeIt = nodeVect.begin();
88 nodeIt != nodeVect.end(); ++nodeIt) {
89 FeatTreeEdgePMap edgeMap = boost::get(FeatTreeEdge_t(), featGraph);
90 boost::graph_traits<FeatTreeGraph>::out_edge_iterator edge, endEdges;
91 for (boost::tie(edge, endEdges) = boost::out_edges(*nodeIt, featGraph);
92 edge != endEdges; ++edge) {
93 // figure out which of the edge's nodes is the neighbor:
94 if (boost::source(*edge, featGraph) == *nodeIt) {
95 // make sure the neighbor isn't in the nodeVect:
96 if (!std::binary_search(nodeVect.rbegin(), nodeVect.rend(),
97 boost::target(*edge, featGraph))) {
98 boost::add_edge(newNode, boost::target(*edge, featGraph),
99 FeatTreeEdge(0), featGraph);
100 }
101 } else if (boost::target(*edge, featGraph) == *nodeIt) {
102 // make sure the neighbor isn't in the nodeSet:
103 if (!std::binary_search(nodeVect.rbegin(), nodeVect.rend(),
104 boost::source(*edge, featGraph))) {
105 boost::add_edge(boost::source(*edge, featGraph), newNode,
106 FeatTreeEdge(0), featGraph);
107 }
108 } else {
109 CHECK_INVARIANT(false,
110 "inconsistent state encounted when replacing a cycle");
111 }
112 }
113
114 // now remove the node from the graph:
115 boost::clear_vertex(*nodeIt, featGraph);
116 boost::remove_vertex(*nodeIt, featGraph);
117 }
118 }
119
120 } // end of local utility namespace
121
122 // -----------------------------------------------------------------------
123 //
124 // -----------------------------------------------------------------------
addRingsAndConnectors(const ROMol & mol,FeatTreeGraph & resGraph)125 void addRingsAndConnectors(const ROMol &mol, FeatTreeGraph &resGraph) {
126 RingInfo *ringInfo = mol.getRingInfo();
127 unsigned int ringIdxI = 0;
128 for (VECT_INT_VECT::const_iterator ringItI = ringInfo->atomRings().begin();
129 ringItI != ringInfo->atomRings().end(); ++ringItI, ++ringIdxI) {
130 UINT_SET s;
131 s.insert(ringItI->begin(), ringItI->end());
132 boost::add_vertex(FeatTreeNode(s), resGraph);
133
134 // ------ ------ ------ ------
135 // Add any relevant ring-ring connectors:
136 // ------ ------ ------ ------
137 // sort a copy of this ring's atoms so that it's easier to
138 // search for overlaps:
139 INT_VECT ringI = *ringItI;
140 std::sort(ringI.begin(), ringI.end());
141 unsigned int ringIdxJ = 0;
142 for (VECT_INT_VECT::const_iterator ringItJ = ringInfo->atomRings().begin();
143 ringItJ != ringItI; ++ringItJ, ++ringIdxJ) {
144 for (INT_VECT::const_iterator ringJElem = ringItJ->begin();
145 ringJElem != ringItJ->end(); ++ringJElem) {
146 if (std::binary_search(ringI.begin(), ringI.end(), *ringJElem)) {
147 // these two rings share a common atom, so set up an
148 // edge between them in the feature tree:
149 boost::add_edge(ringIdxI, ringIdxJ, FeatTreeEdge(2), resGraph);
150
151 // no point continuing the search for ringJ:
152 break;
153 }
154 }
155 }
156 }
157 }
158
159 // -----------------------------------------------------------------------
160 //
161 // -----------------------------------------------------------------------
addRingRingBonds(const ROMol & mol,FeatTreeGraph & resGraph)162 void addRingRingBonds(const ROMol &mol, FeatTreeGraph &resGraph) {
163 FeatTreeNodePMap nodeMap = boost::get(FeatTreeNode_t(), resGraph);
164
165 boost::graph_traits<FeatTreeGraph>::vertex_iterator nodeI, endNodes;
166 for (boost::tie(nodeI, endNodes) = boost::vertices(resGraph);
167 nodeI != endNodes; ++nodeI) {
168 boost::graph_traits<FeatTreeGraph>::vertex_iterator nodeJ;
169 nodeJ = nodeI;
170 while (++nodeJ != endNodes) {
171 boost::graph_traits<FeatTreeGraph>::edge_descriptor tmp;
172 bool found = false;
173 boost::tie(tmp, found) = boost::edge(*nodeI, *nodeJ, resGraph);
174 for (UINT_SET::const_iterator setI = nodeMap[*nodeI].begin();
175 setI != nodeMap[*nodeI].end() && !found; setI++) {
176 for (UINT_SET::const_iterator setJ = nodeMap[*nodeJ].begin();
177 setJ != nodeMap[*nodeJ].end() && !found; setJ++) {
178 if (mol.getBondBetweenAtoms(*setI, *setJ)) {
179 // set up the bond:
180 boost::add_edge(*nodeI, *nodeJ, FeatTreeEdge(1), resGraph);
181 found = true;
182 break;
183 }
184 }
185 }
186 }
187 }
188 }
189
190 // -----------------------------------------------------------------------
191 //
192 // -----------------------------------------------------------------------
addNonringAtoms(const ROMol & mol,FeatTreeGraph & resGraph)193 std::vector<unsigned int> addNonringAtoms(const ROMol &mol,
194 FeatTreeGraph &resGraph) {
195 RingInfo *ringInfo = mol.getRingInfo();
196 unsigned int nAts = mol.getNumAtoms();
197 std::vector<unsigned int> atomIndices(nAts, nAts + 1);
198 for (ROMol::ConstAtomIterator atomIt = mol.beginAtoms();
199 atomIt != mol.endAtoms(); ++atomIt) {
200 const Atom *atom = *atomIt;
201 if ((atom->getDegree() > 1 ||
202 atom->getDegree() + atom->getTotalNumHs() > 1) &&
203 !ringInfo->numAtomRings(atom->getIdx())) {
204 UINT_SET s;
205 s.insert(atom->getIdx());
206 unsigned int idx = boost::add_vertex(FeatTreeNode(s), resGraph);
207 atomIndices[atom->getIdx()] = idx;
208 }
209 }
210 return atomIndices;
211 }
212
213 // -----------------------------------------------------------------------
214 //
215 // -----------------------------------------------------------------------
addBondsFromNonringAtoms(const ROMol & mol,FeatTreeGraph & resGraph,std::vector<unsigned int> & atomIndices)216 void addBondsFromNonringAtoms(const ROMol &mol, FeatTreeGraph &resGraph,
217 std::vector<unsigned int> &atomIndices) {
218 FeatTreeNodePMap nodeMap = boost::get(FeatTreeNode_t(), resGraph);
219 unsigned int nAts = mol.getNumAtoms();
220 for (ROMol::ConstAtomIterator atomIt = mol.beginAtoms();
221 atomIt != mol.endAtoms(); ++atomIt) {
222 const Atom *atom = *atomIt;
223 if (atomIndices[atom->getIdx()] < nAts) {
224 // this atom has already been added as a free-standing atom:
225 ROMol::ADJ_ITER nbrIdx, endNbrs;
226 for (boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(atom);
227 nbrIdx != endNbrs; ++nbrIdx) {
228 if (atomIndices[*nbrIdx] < nAts) {
229 if (*nbrIdx > atom->getIdx()) {
230 // the neighbor has already been added, and it has a higher
231 // index than ours (we make this check to avoid duplicated
232 // bonds), so add a bond to it:
233 boost::add_edge(atomIndices[atom->getIdx()], atomIndices[*nbrIdx],
234 FeatTreeEdge(0), resGraph);
235 }
236 } else if (mol.getAtomWithIdx(*nbrIdx)->getDegree() > 1) {
237 // the neighbor hasn't been added to the graph
238 // on its own and the degree is above 1, so it's
239 // got to be a ring atom, find the rings it's present in by
240 // looping over nodes already in the graph. Note that we can't
241 // use the ringInfo structure anymore because we may have combined
242 // some rings in the call to replaceCycles() above:
243 boost::graph_traits<FeatTreeGraph>::vertex_iterator node, endNodes;
244 for (boost::tie(node, endNodes) = boost::vertices(resGraph);
245 node != endNodes; ++node) {
246 if (atomIndices[*node] != *nbrIdx &&
247 nodeMap[*node].count(*nbrIdx)) {
248 boost::add_edge(atomIndices[atom->getIdx()], *node,
249 FeatTreeEdge(1), resGraph);
250 }
251 }
252 }
253 }
254 }
255 }
256 }
257
258 // -----------------------------------------------------------------------
259 //
260 // -----------------------------------------------------------------------
replaceCycles(FeatTreeGraph & featGraph)261 void replaceCycles(FeatTreeGraph &featGraph) {
262 FeatTreeGraph featGraphCopy = featGraph;
263
264 FeatTreeEdgePMap edgeMap = boost::get(FeatTreeEdge_t(), featGraph);
265 FeatTreeEdgePMap componentMap = boost::get(FeatTreeEdge_t(), featGraphCopy);
266
267 // ------ ------ ------ ------ ------ ------ ------
268 // Start by finding the biconnected components:
269 unsigned int numComponents =
270 boost::biconnected_components(featGraphCopy, componentMap);
271 if (numComponents == boost::num_vertices(featGraph)) {
272 // no cycles, our work here is done, so go ahead and return
273 return;
274 }
275
276 // ------ ------ ------ ------ ------ ------ ------
277 // loop over the elements of the biconnected-components map and count:
278 // - how many times each index occurs
279 std::vector<unsigned int> memberCount(numComponents, 0);
280 boost::graph_traits<FeatTreeGraph>::edge_iterator edge, endEdges;
281 boost::graph_traits<FeatTreeGraph>::edge_iterator cpyEdge, endCpyEdges;
282 for ((boost::tie(edge, endEdges) = boost::edges(featGraph)),
283 (boost::tie(cpyEdge, endCpyEdges) = boost::edges(featGraphCopy));
284 edge != endEdges; ++edge, ++cpyEdge) {
285 memberCount[componentMap[*cpyEdge]] += 1;
286 CHECK_INVARIANT(edgeMap[*edge] == 2,
287 "replaceCycles() should be called with only ring-ring "
288 "edges in the graph");
289 }
290
291 // ------ ------ ------ ------ ------ ------ ------
292 // Replace any cycles that exist in the graph:
293 for (unsigned int i = 0; i < numComponents; ++i) {
294 if (memberCount[i] > 1) {
295 mergeRingCycle(featGraph, featGraphCopy, componentMap, i);
296 }
297 }
298 }
299
300 // -----------------------------------------------------------------------
301 //
302 // -----------------------------------------------------------------------
addZeroNodes(FeatTreeGraph & featGraph)303 void addZeroNodes(FeatTreeGraph &featGraph) {
304 FeatTreeEdgePMap edgeMap = boost::get(FeatTreeEdge_t(), featGraph);
305 FeatTreeGraph featGraphCopy = featGraph;
306 FeatTreeEdgePMap componentMap = boost::get(FeatTreeEdge_t(), featGraphCopy);
307
308 // ------ ------ ------ ------ ------ ------ ------
309 // Start by finding the biconnected components:
310 unsigned int numComponents =
311 boost::biconnected_components(featGraphCopy, componentMap);
312
313 if (numComponents == boost::num_edges(featGraph)) {
314 // no cycles, our work here is done, so go ahead and return
315 return;
316 }
317
318 // ------ ------ ------ ------ ------ ------ ------
319 // loop over the elements of the biconnected-components map and:
320 // 1) count how many times each index occurs
321 // 2) determine if each component has a non-ring->ring bond
322 std::vector<unsigned int> memberCount(numComponents, 0);
323 std::vector<bool> weight1EdgeObserved(numComponents, false);
324 boost::graph_traits<FeatTreeGraph>::edge_iterator edge, endEdges;
325 boost::graph_traits<FeatTreeGraph>::edge_iterator cpyEdge, endCpyEdges;
326 for ((boost::tie(edge, endEdges) = boost::edges(featGraph)),
327 (boost::tie(cpyEdge, endCpyEdges) = boost::edges(featGraphCopy));
328 edge != endEdges; ++edge, ++cpyEdge) {
329 memberCount[componentMap[*cpyEdge]] += 1;
330 if (edgeMap[*edge] == 1) {
331 weight1EdgeObserved[componentMap[*cpyEdge]] = true;
332 }
333 }
334
335 for (unsigned int i = 0; i < numComponents; i++) {
336 if (memberCount[i] > 2) {
337 CHECK_INVARIANT(weight1EdgeObserved[i], "internal inconsistency");
338 // add the zero node:
339 unsigned int newIdx;
340 UINT_SET emptySet;
341 newIdx = boost::add_vertex(FeatTreeNode(emptySet), featGraph);
342
343 // figure out who it needs to be connected to:
344 NodeSet nodesToConnect;
345 for ((boost::tie(edge, endEdges) = boost::edges(featGraph)),
346 (boost::tie(cpyEdge, endCpyEdges) = boost::edges(featGraphCopy));
347 cpyEdge != endCpyEdges; ++edge, ++cpyEdge) {
348 if (componentMap[*cpyEdge] == i) {
349 nodesToConnect.insert(boost::source(*edge, featGraph));
350 nodesToConnect.insert(boost::target(*edge, featGraph));
351 // we can't actually remove edges at this stage, because we need
352 // the iterators for featGraph edges and featGraphCopy edges to
353 // stay in sync. So just mark this edge as one that should be
354 // removed and we'll take care of it later.
355 edgeMap[*edge] = 23;
356 }
357 }
358 for (NodeSet::iterator nodeI = nodesToConnect.begin();
359 nodeI != nodesToConnect.end(); nodeI++) {
360 boost::add_edge(newIdx, *nodeI, 4, featGraph);
361 }
362 }
363 }
364 edgeMap = boost::get(FeatTreeEdge_t(), featGraph);
365 boost::tie(edge, endEdges) = boost::edges(featGraph);
366 while (edge != endEdges) {
367 if (edgeMap[*edge] == 23) {
368 boost::graph_traits<FeatTreeGraph>::edge_iterator nextEdge = edge;
369 ++nextEdge;
370 boost::remove_edge(*edge, featGraph);
371 edge = nextEdge;
372 } else {
373 ++edge;
374 }
375 }
376 }
377
378 } // end of namespace FeatTrees
379 } // end of namespace RDKit
380