1 /*
2 * This file is part of the RingDecomposerLib, licensed
3 * under BSD New license (see LICENSE in the root directory).
4 * Copyright (c) 2016
5 * University of Hamburg, ZBH - Center for Bioinformatics
6 * Niek Andresen, Florian Flachsenberg, Matthias Rarey
7 *
8 * Please cite:
9 *
10 * Kolodzik, A.; Urbaczek, S.; Rarey, M.
11 * Unique Ring Families: A Chemically Meaningful Description
12 * of Molecular Ring Topologies.
13 * J. Chem. Inf. Model., 2012, 52 (8), pp 2013-2021
14 *
15 * Flachsenberg, F.; Andresen, N.; Rarey, M.
16 * RingDecomposerLib: An Open-Source Implementation of
17 * Unique Ring Families and Other Cycle Bases.
18 * J. Chem. Inf. Model., 2017, 57 (2), pp 122-126
19 */
20
21 #include "RDLcycleFams.h"
22
23 #include <stdlib.h>
24 #include <limits.h>
25 #include <stdio.h>
26
27 #include "RDLapsp.h"
28 #include "RDLgraph.h"
29
30 /** returns 1 if the intersection of P(r,y) and P(r,z) is equal to {r};
31 0 otherwise */
RDL_pathsShareOnlyStart(unsigned r,unsigned y,unsigned z,RDL_graph * gra,RDL_sPathInfo * spi)32 static int RDL_pathsShareOnlyStart(unsigned r, unsigned y, unsigned z, RDL_graph *gra, RDL_sPathInfo *spi)
33 {
34 unsigned result = 0, i, pnt, count=0;
35 unsigned *vertInRY, *vertInRZ; /*edges in P(r,y) and P(r,z)*/
36 vertInRY = malloc(gra->V * sizeof(*vertInRY));
37 vertInRZ = malloc(gra->V * sizeof(*vertInRZ));
38
39 for(i=0; i<gra->V; ++i)
40 {
41 vertInRY[i] = 0;
42 vertInRZ[i] = 0;
43 }
44 vertInRY[y] = 1;
45 vertInRZ[z] = 1;
46 /*find out which vertices are on the path P(r,y)*/
47 pnt = y;
48 do
49 {
50 pnt = spi->pred[r][pnt];
51 vertInRY[pnt] = 1;
52 }while(pnt != r);
53 /*find out which vertices are on the path P(r,z)*/
54 pnt = z;
55 do
56 {
57 pnt = spi->pred[r][pnt];
58 vertInRZ[pnt] = 1;
59 }while(pnt != r);
60 /*find out if more than one vertex is shared by the paths*/
61 for(i=0; i<gra->V && count<2; ++i)
62 {
63 if(vertInRY[i] == 1 && vertInRZ[i] == 1)
64 {
65 ++count;
66 }
67 }
68 /*if r is the only shared vertex of the two paths return 1*/
69 if(count == 1 && (vertInRY[r] == 1) && (vertInRZ[r] == 1))
70 {
71 result = 1;
72 }
73
74 free(vertInRY);
75 free(vertInRZ);
76 return result;
77 }
78
79 /** returns a cycle vector (element of {0,1}^m). odd (x=UINT_MAX) or even cycle.*/
RDL_findPrototype(unsigned r,unsigned y,unsigned z,unsigned x,RDL_graph * gra,RDL_sPathInfo * spi)80 static char *RDL_findPrototype(unsigned r, unsigned y, unsigned z, unsigned x,
81 RDL_graph *gra, RDL_sPathInfo *spi)
82 {
83 unsigned i, vert1, vert2;
84 char *proto;
85
86 proto = malloc(gra->E * sizeof(*proto));
87 for(i=0; i<gra->E; ++i)
88 {
89 proto[i] = 0;
90 }
91 /*path from r to y*/
92 vert1 = y;
93 do
94 {
95 vert2 = vert1;
96 vert1 = spi->pred[r][vert1];
97 proto[RDL_edgeId(gra, vert1, vert2)] = 1;
98 }while(vert1 != r);
99 /*path from r to z*/
100 vert1 = z;
101 do
102 {
103 vert2 = vert1;
104 vert1 = spi->pred[r][vert1];
105 proto[RDL_edgeId(gra, vert1, vert2)] = 1;
106 }while(vert1 != r);
107 if(x == UINT_MAX)/*odd cycle*/
108 {
109 proto[RDL_edgeId(gra,y,z)] = 1;
110 }
111 else /*even cycle*/
112 {
113 proto[RDL_edgeId(gra,y,x)] = 1;
114 proto[RDL_edgeId(gra,z,x)] = 1;
115 }
116 return proto;
117 }
118
119 /** fills the rc datastructure with the odd cycle r-y-z-r */
RDL_addOdd(unsigned r,unsigned y,unsigned z,RDL_graph * gra,RDL_sPathInfo * spi,RDL_cfURF * rc)120 static void RDL_addOdd(unsigned r, unsigned y, unsigned z,
121 RDL_graph *gra, RDL_sPathInfo *spi, RDL_cfURF *rc)
122 {
123 RDL_cfam *new;
124 if (rc->alloced == rc->nofFams) {
125 rc->alloced *= 2;
126 rc->fams = realloc(rc->fams, rc->alloced * sizeof(*rc->fams));
127 }
128 /* skip if we're out of memory... */
129 if (rc->fams) {
130 new = malloc(sizeof(*new));
131 new->r = r;
132 new->p = y;
133 new->q = z;
134 new->x = UINT_MAX; /*odd cycle*/
135 new->mark = 0;
136 new->prototype = RDL_findPrototype(r, y, z, UINT_MAX, gra, spi);
137 new->weight = spi->dist[r][y] + spi->dist[r][z] + 1;
138 rc->fams[rc->nofFams++] = new;
139 }
140 }
141
142 /** fills the rc datastructure with the even cycle r-y-x-z-r */
RDL_addEven(unsigned r,unsigned y,unsigned x,unsigned z,RDL_graph * gra,RDL_sPathInfo * spi,RDL_cfURF * rc)143 static void RDL_addEven(unsigned r, unsigned y, unsigned x,
144 unsigned z, RDL_graph *gra, RDL_sPathInfo *spi, RDL_cfURF *rc)
145 {
146 RDL_cfam *new;
147 if (rc->alloced == rc->nofFams) {
148 rc->alloced *= 2;
149 rc->fams = realloc(rc->fams, rc->alloced * sizeof(*rc->fams));
150 }
151 /* skip if we're out of memory... */
152 if (rc->fams) {
153 new = malloc(sizeof(**rc->fams));
154 new->r = r;
155 new->p = y;
156 new->q = z;
157 new->x = x; /*even cycle*/
158 new->mark = 0;
159 new->prototype = RDL_findPrototype(r, y, z, x, gra, spi);
160 new->weight = spi->dist[r][y] + spi->dist[r][z] + 2;
161 rc->fams[rc->nofFams++] = new;
162 }
163 }
164
165 /** finds a number of cycle families that contain at least all RELEVANT cycle
166 families - just like in Vismara's pseudocode */
RDL_vismara(RDL_cfURF * rc,RDL_graph * gra,RDL_sPathInfo * spi)167 static void RDL_vismara(RDL_cfURF *rc, RDL_graph *gra, RDL_sPathInfo *spi)
168 {
169 unsigned i,j;
170 unsigned rv,yv,zv,pv,qv; /*variables as in Vismara's algorithm, extended by a 'v'*/
171 unsigned *evenCand; /*'S' in Vismara's algorithm*/
172 unsigned nofCandidates = 0;
173 evenCand = malloc(gra->V * sizeof(*evenCand));
174
175 for(rv = 0; rv < gra->V; ++rv) {
176 for(yv = 0; yv < gra->V; ++yv) {
177 /*all yv reachable from rv respecting the ordering*/
178 if(spi->reachable[rv][yv] == 1) {
179 nofCandidates = 0;
180 for(i = 0; i < gra->degree[yv]; ++i) {
181 zv = gra->adjList[yv][i][0];
182 /*all zv reachable from rv respecting the ordering and adjacent to yv*/
183 if(spi->reachable[rv][zv] == 1) {
184 if(spi->dist[rv][zv] + 1 == spi->dist[rv][yv]) {
185 evenCand[nofCandidates] = zv;
186 ++nofCandidates;
187 }
188 else if(spi->dist[rv][zv] != spi->dist[rv][yv] + 1
189 && (gra->degree[zv] < gra->degree[yv] ||
190 (gra->degree[zv] == gra->degree[yv] && zv<yv))
191 && RDL_pathsShareOnlyStart(rv, yv, zv, gra, spi) == 1) {
192 /*add odd cycle rv-yv rv-zv zv-yv*/
193 RDL_addOdd(rv, yv, zv, gra, spi, rc);
194 }
195 /*to fill dPaths in sPathInfo with the edges to r*/
196 if(spi->dist[rv][zv] == 1) {
197 RDL_addEdge(spi->dPaths[rv], zv, rv);
198 }
199 }
200 }
201 /*any pair in evenCand*/
202 for(i = 0; i < nofCandidates; ++i) {
203 pv = evenCand[i];
204 for(j = i+1; j < nofCandidates; ++j) {
205 qv = evenCand[j];
206 if((RDL_pathsShareOnlyStart(rv, pv, qv, gra, spi) == 1)) {
207 /*add even cycle rv-pv rv-qv pv-yv-qv*/
208 RDL_addEven(rv, pv, yv, qv, gra, spi, rc);
209 }
210 }
211 }
212 /*to fill dPaths in sPathInfo/fill U_r (see Vismara)*/
213 for(i = 0; i < nofCandidates; ++i) {
214 pv = evenCand[i];
215 RDL_addEdge(spi->dPaths[rv], yv, pv);
216 }
217 }
218 }
219 }
220
221 free(evenCand);
222 }
223
RDL_cycleFamsComp(const void * cf1,const void * cf2)224 static int RDL_cycleFamsComp(const void *cf1, const void *cf2)
225 {
226 if((*((RDL_cfam **)cf1))->weight < (*((RDL_cfam **)cf2))->weight)
227 {
228 return -1;
229 }
230 else if((*((RDL_cfam **)cf1))->weight > (*((RDL_cfam **)cf2))->weight)
231 {
232 return 1;
233 }
234 else return 0;
235 }
236
RDL_findCycleFams(RDL_graph * gra,RDL_sPathInfo * spi)237 RDL_cfURF *RDL_findCycleFams(RDL_graph *gra, RDL_sPathInfo *spi)
238 {
239 RDL_cfURF *rc = malloc(sizeof(*rc));
240 /*number of CFs is at most 2m^2+vn (Vismara Lemma 3)*/
241
242 rc->alloced = 64;
243 rc->fams = malloc(rc->alloced * sizeof(*rc->fams));
244 rc->nofFams = 0;
245 RDL_vismara(rc, gra, spi);
246
247 if (!rc->fams) {
248 RDL_outputFunc(RDL_ERROR, "Graph is too large, can't allocate memory!\n");
249 free(rc);
250 return NULL;
251 }
252
253 rc->alloced = rc->nofFams;
254 rc->fams = realloc(rc->fams, rc->alloced * sizeof(*rc->fams));
255
256 /* sort by size */
257 qsort(rc->fams, rc->nofFams, sizeof(*rc->fams), RDL_cycleFamsComp);
258 return rc;
259 }
260
RDL_deleteCycleFams(RDL_cfURF * rc)261 void RDL_deleteCycleFams(RDL_cfURF *rc)
262 {
263 unsigned i;
264 for(i=0; i<rc->nofFams; ++i)
265 {
266 free(rc->fams[i]->prototype);
267 free(rc->fams[i]);
268 }
269 free(rc->fams);
270 free(rc);
271 }
272
273