1 /*
2  * This file is part of the RingDecomposerLib, licensed
3  * under BSD New license (see LICENSE in the root directory).
4  * Copyright (c) 2016
5  * University of Hamburg, ZBH - Center for Bioinformatics
6  * Niek Andresen, Florian Flachsenberg, Matthias Rarey
7  *
8  * Please cite:
9  *
10  * Kolodzik, A.; Urbaczek, S.; Rarey, M.
11  * Unique Ring Families: A Chemically Meaningful Description
12  * of Molecular Ring Topologies.
13  * J. Chem. Inf. Model., 2012, 52 (8), pp 2013-2021
14  *
15  * Flachsenberg, F.; Andresen, N.; Rarey, M.
16  * RingDecomposerLib: An Open-Source Implementation of
17  * Unique Ring Families and Other Cycle Bases.
18  * J. Chem. Inf. Model., 2017, 57 (2), pp 122-126
19  */
20 
21 #include "RDLcycleFams.h"
22 
23 #include <stdlib.h>
24 #include <limits.h>
25 #include <stdio.h>
26 
27 #include "RDLapsp.h"
28 #include "RDLgraph.h"
29 
30 /** returns 1 if the intersection of P(r,y) and P(r,z) is equal to {r};
31 0 otherwise */
RDL_pathsShareOnlyStart(unsigned r,unsigned y,unsigned z,RDL_graph * gra,RDL_sPathInfo * spi)32 static int RDL_pathsShareOnlyStart(unsigned r, unsigned y, unsigned z, RDL_graph *gra, RDL_sPathInfo *spi)
33 {
34   unsigned result = 0, i, pnt, count=0;
35   unsigned *vertInRY, *vertInRZ; /*edges in P(r,y) and P(r,z)*/
36   vertInRY = malloc(gra->V * sizeof(*vertInRY));
37   vertInRZ = malloc(gra->V * sizeof(*vertInRZ));
38 
39   for(i=0; i<gra->V; ++i)
40   {
41     vertInRY[i] = 0;
42     vertInRZ[i] = 0;
43   }
44   vertInRY[y] = 1;
45   vertInRZ[z] = 1;
46   /*find out which vertices are on the path P(r,y)*/
47   pnt = y;
48   do
49   {
50     pnt = spi->pred[r][pnt];
51     vertInRY[pnt] = 1;
52   }while(pnt != r);
53   /*find out which vertices are on the path P(r,z)*/
54   pnt = z;
55   do
56   {
57     pnt = spi->pred[r][pnt];
58     vertInRZ[pnt] = 1;
59   }while(pnt != r);
60   /*find out if more than one vertex is shared by the paths*/
61   for(i=0; i<gra->V && count<2; ++i)
62   {
63     if(vertInRY[i] == 1 && vertInRZ[i] == 1)
64     {
65       ++count;
66     }
67   }
68   /*if r is the only shared vertex of the two paths return 1*/
69   if(count == 1 && (vertInRY[r] == 1) && (vertInRZ[r] == 1))
70   {
71     result = 1;
72   }
73 
74   free(vertInRY);
75   free(vertInRZ);
76   return result;
77 }
78 
79 /** returns a cycle vector (element of {0,1}^m). odd (x=UINT_MAX) or even cycle.*/
RDL_findPrototype(unsigned r,unsigned y,unsigned z,unsigned x,RDL_graph * gra,RDL_sPathInfo * spi)80 static char *RDL_findPrototype(unsigned r, unsigned y, unsigned z, unsigned x,
81     RDL_graph *gra, RDL_sPathInfo *spi)
82 {
83   unsigned i, vert1, vert2;
84   char *proto;
85 
86   proto = malloc(gra->E * sizeof(*proto));
87   for(i=0; i<gra->E; ++i)
88   {
89     proto[i] = 0;
90   }
91   /*path from r to y*/
92   vert1 = y;
93   do
94   {
95     vert2 = vert1;
96     vert1 = spi->pred[r][vert1];
97     proto[RDL_edgeId(gra, vert1, vert2)] = 1;
98   }while(vert1 != r);
99   /*path from r to z*/
100   vert1 = z;
101   do
102   {
103     vert2 = vert1;
104     vert1 = spi->pred[r][vert1];
105     proto[RDL_edgeId(gra, vert1, vert2)] = 1;
106   }while(vert1 != r);
107   if(x == UINT_MAX)/*odd cycle*/
108   {
109     proto[RDL_edgeId(gra,y,z)] = 1;
110   }
111   else /*even cycle*/
112   {
113     proto[RDL_edgeId(gra,y,x)] = 1;
114     proto[RDL_edgeId(gra,z,x)] = 1;
115   }
116   return proto;
117 }
118 
119 /** fills the rc datastructure with the odd cycle r-y-z-r */
RDL_addOdd(unsigned r,unsigned y,unsigned z,RDL_graph * gra,RDL_sPathInfo * spi,RDL_cfURF * rc)120 static void RDL_addOdd(unsigned r, unsigned y, unsigned z,
121     RDL_graph *gra, RDL_sPathInfo *spi, RDL_cfURF *rc)
122 {
123   RDL_cfam *new;
124   if (rc->alloced == rc->nofFams) {
125     rc->alloced *= 2;
126     rc->fams = realloc(rc->fams, rc->alloced * sizeof(*rc->fams));
127   }
128   /* skip if we're out of memory... */
129   if (rc->fams) {
130     new = malloc(sizeof(*new));
131     new->r = r;
132     new->p = y;
133     new->q = z;
134     new->x = UINT_MAX; /*odd cycle*/
135     new->mark = 0;
136     new->prototype = RDL_findPrototype(r, y, z, UINT_MAX, gra, spi);
137     new->weight = spi->dist[r][y] + spi->dist[r][z] + 1;
138     rc->fams[rc->nofFams++] = new;
139   }
140 }
141 
142 /** fills the rc datastructure with the even cycle r-y-x-z-r */
RDL_addEven(unsigned r,unsigned y,unsigned x,unsigned z,RDL_graph * gra,RDL_sPathInfo * spi,RDL_cfURF * rc)143 static void RDL_addEven(unsigned r, unsigned y, unsigned x,
144     unsigned z, RDL_graph *gra, RDL_sPathInfo *spi, RDL_cfURF *rc)
145 {
146   RDL_cfam *new;
147   if (rc->alloced == rc->nofFams) {
148     rc->alloced *= 2;
149     rc->fams = realloc(rc->fams, rc->alloced * sizeof(*rc->fams));
150   }
151   /* skip if we're out of memory... */
152   if (rc->fams) {
153     new = malloc(sizeof(**rc->fams));
154     new->r = r;
155     new->p = y;
156     new->q = z;
157     new->x = x; /*even cycle*/
158     new->mark = 0;
159     new->prototype = RDL_findPrototype(r, y, z, x, gra, spi);
160     new->weight = spi->dist[r][y] + spi->dist[r][z] + 2;
161     rc->fams[rc->nofFams++] = new;
162   }
163 }
164 
165 /** finds a number of cycle families that contain at least all RELEVANT cycle
166 families - just like in Vismara's pseudocode */
RDL_vismara(RDL_cfURF * rc,RDL_graph * gra,RDL_sPathInfo * spi)167 static void RDL_vismara(RDL_cfURF *rc, RDL_graph *gra, RDL_sPathInfo *spi)
168 {
169   unsigned i,j;
170   unsigned rv,yv,zv,pv,qv; /*variables as in Vismara's algorithm, extended by a 'v'*/
171   unsigned *evenCand; /*'S' in Vismara's algorithm*/
172   unsigned nofCandidates = 0;
173   evenCand = malloc(gra->V * sizeof(*evenCand));
174 
175   for(rv = 0; rv < gra->V; ++rv) {
176     for(yv = 0; yv < gra->V; ++yv) {
177       /*all yv reachable from rv respecting the ordering*/
178       if(spi->reachable[rv][yv] == 1) {
179         nofCandidates = 0;
180         for(i = 0; i < gra->degree[yv]; ++i) {
181           zv = gra->adjList[yv][i][0];
182           /*all zv reachable from rv respecting the ordering and adjacent to yv*/
183           if(spi->reachable[rv][zv] == 1) {
184             if(spi->dist[rv][zv] + 1 == spi->dist[rv][yv]) {
185               evenCand[nofCandidates] = zv;
186               ++nofCandidates;
187             }
188             else if(spi->dist[rv][zv] != spi->dist[rv][yv] + 1
189                 && (gra->degree[zv] < gra->degree[yv] ||
190                     (gra->degree[zv] == gra->degree[yv] && zv<yv))
191                 && RDL_pathsShareOnlyStart(rv, yv, zv, gra, spi) == 1) {
192               /*add odd cycle rv-yv rv-zv zv-yv*/
193               RDL_addOdd(rv, yv, zv, gra, spi, rc);
194             }
195             /*to fill dPaths in sPathInfo with the edges to r*/
196             if(spi->dist[rv][zv] == 1) {
197               RDL_addEdge(spi->dPaths[rv], zv, rv);
198             }
199           }
200         }
201         /*any pair in evenCand*/
202         for(i = 0; i < nofCandidates; ++i) {
203           pv = evenCand[i];
204           for(j = i+1; j < nofCandidates; ++j) {
205             qv = evenCand[j];
206             if((RDL_pathsShareOnlyStart(rv, pv, qv, gra, spi) == 1)) {
207               /*add even cycle rv-pv rv-qv pv-yv-qv*/
208               RDL_addEven(rv, pv, yv, qv, gra, spi, rc);
209             }
210           }
211         }
212         /*to fill dPaths in sPathInfo/fill U_r (see Vismara)*/
213         for(i = 0; i < nofCandidates; ++i) {
214           pv = evenCand[i];
215           RDL_addEdge(spi->dPaths[rv], yv, pv);
216         }
217       }
218     }
219   }
220 
221   free(evenCand);
222 }
223 
RDL_cycleFamsComp(const void * cf1,const void * cf2)224 static int RDL_cycleFamsComp(const void *cf1, const void *cf2)
225 {
226     if((*((RDL_cfam **)cf1))->weight < (*((RDL_cfam **)cf2))->weight)
227     {
228         return -1;
229     }
230     else if((*((RDL_cfam **)cf1))->weight > (*((RDL_cfam **)cf2))->weight)
231     {
232         return 1;
233     }
234     else return 0;
235 }
236 
RDL_findCycleFams(RDL_graph * gra,RDL_sPathInfo * spi)237 RDL_cfURF *RDL_findCycleFams(RDL_graph *gra, RDL_sPathInfo *spi)
238 {
239   RDL_cfURF *rc = malloc(sizeof(*rc));
240   /*number of CFs is at most 2m^2+vn (Vismara Lemma 3)*/
241 
242   rc->alloced = 64;
243   rc->fams = malloc(rc->alloced * sizeof(*rc->fams));
244   rc->nofFams = 0;
245   RDL_vismara(rc, gra, spi);
246 
247   if (!rc->fams) {
248     RDL_outputFunc(RDL_ERROR, "Graph is too large, can't allocate memory!\n");
249     free(rc);
250     return NULL;
251   }
252 
253   rc->alloced = rc->nofFams;
254   rc->fams = realloc(rc->fams, rc->alloced * sizeof(*rc->fams));
255 
256   /* sort by size */
257   qsort(rc->fams, rc->nofFams, sizeof(*rc->fams), RDL_cycleFamsComp);
258   return rc;
259 }
260 
RDL_deleteCycleFams(RDL_cfURF * rc)261 void RDL_deleteCycleFams(RDL_cfURF *rc)
262 {
263   unsigned i;
264   for(i=0; i<rc->nofFams; ++i)
265   {
266     free(rc->fams[i]->prototype);
267     free(rc->fams[i]);
268   }
269   free(rc->fams);
270   free(rc);
271 }
272 
273