1 /* -*- tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
2 vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
3
4 MDAnalysis --- https://www.mdanalysis.org
5 Copyright (c) 2006-2016 The MDAnalysis Development Team and contributors
6 (see the file AUTHORS for the full list of names)
7
8 Released under the GNU Public Licence, v2 or any higher version
9
10 Please cite your use of MDAnalysis in published work:
11
12 R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
13 D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
14 MDAnalysis: A Python package for the rapid analysis of molecular dynamics
15 simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
16 Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
17
18 N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
19 MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
20 J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
21 */
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <float.h>
25
26 /* Helper functions */
27
trmIndex(int row,int col)28 inline int trmIndex(int row, int col) { // array index for triangular matrix
29 return (row) > (col) ? ((row)+1)*(row)/2+(col) : ((col)+1)*(col)/2+(row);
30 }
31
sqmIndex(int colsn,int row,int col)32 inline int sqmIndex(int colsn, int row, int col) { // array index for square matrix
33 return row*colsn + col;
34 }
35
36 #ifndef _WIN32
min(float * values,int length)37 float min(float * values, int length) { //array min
38 float min = values[0];
39 for (int i=1;i<length;i++) {
40 if (values[i] < min) {
41 min = values[i];
42 }
43 }
44 return min;
45 }
46
max(float * values,int length)47 float max(float * values, int length) { //array max
48 float max = values[0];
49 for (int i=1;i<length;i++) {
50 if (values[i] > max) {
51 max = values[i];
52 }
53 }
54 return max;
55 }
56 #endif
57
printarray(float * array,int lenarray)58 void printarray(float* array, int lenarray) { //print an array, for debug purposes
59 for (int i=0;i<lenarray;i++) {
60 printf("%1.2f\n",array[i]);
61 }
62 }
63
printsqmatrix(float * array,int lenarray)64 void printsqmatrix(float* array, int lenarray) { //print a square matrix, for debug purposes
65 //printf("%s","\n");
66 for (int i=0;i<lenarray;i++) {
67 for (int j=0;j<lenarray;j++) {
68 printf("%1.2f\t",array[sqmIndex(lenarray,i,j)]);
69 }
70 printf("%s","\n");
71 }
72 }
73
printtrmatrix(float * array,int lenarray)74 void printtrmatrix(float* array, int lenarray) { //print a triangular matrix, for debug purposes
75 //printf("%s","\n");
76 for (int i=0; i<lenarray; i++) {
77 for (int j=0; j<=i; j++) {
78 //printf("%d\t",trmIndex(i,j));
79 printf("%1.2f\t",array[trmIndex(i,j)]);
80 }
81 printf("%s","\n");
82 }
83 }
84
CAffinityPropagation(float * s,int n,float lambda,int max_iterations,int convergence,int noise,long * clusters)85 int CAffinityPropagation(float *s, int n, float lambda, int max_iterations, int convergence, int noise, long* clusters) { // Affinity Propagation clustering algorithm
86
87 /* n: number of elements
88 s: similarity matrix
89 lambda: damping parameter ([0.5;1.0[)
90 max_iterations: maximum number of iterations
91 convergence: convergence reached when centroids are stable for convergence iterations
92 noise: apply noise to input similarities to eliminate redundancy */
93
94 float *r = (float *) calloc( n*n , sizeof(float)); // N*N responsibilities matrix
95 float *a = (float *) calloc( n*n , sizeof(float)); // N*N availabilities matrix
96 int *exemplars = (int *) malloc( n * sizeof(int)); // N array of exemplars
97 int *old_exemplars = (int *) malloc( n * sizeof(int)); // N array of old exemplars, for convergence checking
98
99
100 int i = 0; // index i over elements
101 int k = 0; // index k over elements
102 int j = 0; // generic index
103 int idx = 0; // index for triangular matrix
104 int sqm_idx = 0; // index for square matrix
105 int currit = 0; // current iteration number
106 int conv_count = 0; // number of iterations with constant centroids so far
107 float tmpsum = 0.0, maxsim = 0.0, this_tmpsum = 0.0; // accumulators
108 float tmp = 0.0; // temporary value
109 float max1 = 0;
110 float max2 = 0;
111 int conv_reached = 0; // convergence flag
112 int has_cluster = 0; // found clusters flag
113 float lamprev = 1.0 - lambda; // 1-lambda
114 int n_clusters = 0; // number of clusters
115
116
117 if (noise != 0) { // Add noise to data
118 for (int i=0;i<n*(n+1)/2;i++) {
119 s[i] = s[i] + (1e-16*s[i] )*(rand()/((float)RAND_MAX+1));
120 }
121 }
122
123 for (int i=0;i<n*n;i++) {
124 r[i] = 0.0;
125 a[i] = 0.0;
126 }
127
128 for (i=0;i<n;i++) { // Initialize exemplars
129 exemplars[i] = -1;
130 }
131
132 //printtrmatrix(s,n);
133 //printf("\n");
134
135 //printf("Preference %3.2f: running Affinity Propagation.\n",s[0]);
136
137 while (currit < max_iterations && conv_reached == 0) { // Start iterations
138
139 // Update r
140
141 for (int i=0;i<n;i++) {
142 max1 = -FLT_MAX;
143 max2 = -FLT_MAX;
144 for (int j=0;j<n;j++) {
145 sqm_idx = sqmIndex(n,i,j);
146 idx = trmIndex(i,j);
147 tmp = s[idx]+a[sqm_idx];
148 if (tmp > max1) {
149 max2 = max1;
150 max1 = tmp;
151 }
152 else if (tmp > max2) {
153 max2 = tmp;
154 }
155 }
156 for (int k=0;k<n;k++) {
157 idx = trmIndex(i,k);
158 sqm_idx = sqmIndex(n,i,k);
159 if (a[sqm_idx]+s[idx] == max1)
160 r[sqm_idx] = lambda*r[sqm_idx] + lamprev*(s[idx] - max2);
161 else
162 r[sqm_idx] = lambda*r[sqm_idx] + lamprev*(s[idx] - max1);
163 }
164 }
165
166 //printf("%s","r\n");
167 //printsqmatrix(r, n);
168
169 // Update a
170
171 for (int k=0;k<n;k++) {
172 tmpsum = 0.0;
173 for (int j=0;j<n;j++) { //sum all the elements > 0 of column k
174 tmp = r[sqmIndex(n,j,k)];
175 if (j!=k) {
176 if (tmp > 0.0)
177 tmpsum = tmpsum + tmp; // if j!=k (r(j,k)): if > 0, sum it
178 }
179 else
180 tmpsum = tmpsum + tmp; // if j == k (r(k,k)): always sum it.
181 // we will have to remove the j==i (r(i,k)) case, for every i.
182 }
183 //printf("tempsum %1.2f\n",tmpsum);
184 for (int i=0;i<n;i++) {
185 this_tmpsum = tmpsum;
186 sqm_idx = sqmIndex(n,i,k);
187 if (i != k) {
188 tmp = r[sqm_idx];
189 //printf("tmp %1.2f\n",tmp);
190 if (tmp > 0.0)
191 this_tmpsum = this_tmpsum - tmp; //subtract r(i,k)
192 //printf("tmpsum2 %1.2f\n",tmpsum);
193 if (this_tmpsum < 0.0)
194 a[sqm_idx] = lambda*a[sqm_idx] + lamprev*this_tmpsum;
195 else
196 a[sqm_idx] = lambda*a[sqm_idx];
197 }
198 else // we don't need to remove the r(i,k) case, BUT wee need to remove to remove the r(k,k) case
199 a[sqm_idx] = lambda*a[sqm_idx] + lamprev*(this_tmpsum - r[sqmIndex(n,k,k)]);
200 }
201 }
202
203 //printf("%s","a\n");
204 //printsqmatrix(a,n);
205
206 //Check for convergence
207
208 int* tmp_exemplars = old_exemplars; // current exemplars become old, before calculating the new ones. Pointer swap - fast and convenient
209 old_exemplars = exemplars;
210 exemplars = tmp_exemplars;
211
212 has_cluster = 0;
213 for (int i=0;i<n;i++) { // identify exemplars
214 idx = sqmIndex(n,i,i);
215 if (r[idx] + a[idx] > 0.0) {
216 exemplars[i] = 1;
217 has_cluster = 1;
218 }
219 else
220 exemplars[i] = 0;
221 }
222
223 if (has_cluster != 0) {
224 conv_count++;
225 for (j=0;j<n;j++) { // check if exemplars have changed. If they have changed, or if no clusters have been identified, reset convergence counter.
226 if (exemplars[j] != old_exemplars[j]) {
227 conv_count = 0;
228 break;
229 }
230 }
231 }
232 else conv_count = 0;
233
234 if (conv_count == convergence) conv_reached = 1; // check convergence
235
236 currit++; // increment iteration number
237 } // start a new iteration. If convergence or max_iterations reached
238
239 //printf("Preference %3.2f: Convergence reached at iteration %d!\n",currit); // print convergence info
240
241 // find number of clusters
242 for (int i=0;i<n;i++) {
243 idx = sqmIndex(n,i,i);
244 if (r[idx]+a[idx] > 0) {
245 exemplars[n_clusters] = i;
246 n_clusters++;
247 }
248 }
249
250 for (int i=0;i<n;i++) { // assign elements to clusters
251 idx = sqmIndex(n,i,0);
252 maxsim = r[idx]+a[idx];
253 k=0;
254 //printf("%3.1f, ",maxsim);
255 for (k=1;k<n;k++) {
256 idx = sqmIndex(n,i,k);
257 tmpsum = r[idx]+a[idx];
258 if (tmpsum >= maxsim) {
259 clusters[i] = k;
260 maxsim = tmpsum;
261 }
262 }
263 }
264
265 for (int i=0;i<n_clusters;i++) {
266 clusters[exemplars[i]] = exemplars[i];
267 }
268
269
270
271
272
273 //Free memory anyway
274 free(r);
275 free(a);
276 free(exemplars);
277 free(old_exemplars);
278
279 return conv_reached == 1 ? currit : -currit;
280 }
281