1 /* -*- tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
2   vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
3 
4   MDAnalysis --- https://www.mdanalysis.org
5   Copyright (c) 2006-2016 The MDAnalysis Development Team and contributors
6   (see the file AUTHORS for the full list of names)
7 
8   Released under the GNU Public Licence, v2 or any higher version
9 
10   Please cite your use of MDAnalysis in published work:
11 
12   R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
13   D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
14   MDAnalysis: A Python package for the rapid analysis of molecular dynamics
15   simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
16   Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
17 
18   N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
19   MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
20   J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
21 */
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <float.h>
25 
26 /* Helper functions */
27 
trmIndex(int row,int col)28 inline int trmIndex(int row, int col) {  // array index for triangular matrix
29 	return (row) > (col) ? ((row)+1)*(row)/2+(col) : ((col)+1)*(col)/2+(row);
30 }
31 
sqmIndex(int colsn,int row,int col)32 inline int sqmIndex(int colsn, int row, int col) { // array index for square matrix
33 	return row*colsn + col;
34 }
35 
36 #ifndef _WIN32
min(float * values,int length)37 float min(float * values, int length) { //array min
38 	float min = values[0];
39 	for (int i=1;i<length;i++) {
40 		if (values[i] < min) {
41 			min = values[i];
42 		}
43 	}
44 	return min;
45 }
46 
max(float * values,int length)47 float max(float * values, int length) { //array max
48 	float max = values[0];
49 	for (int i=1;i<length;i++) {
50 		if (values[i] > max) {
51 			max = values[i];
52 		}
53 	}
54 	return max;
55 }
56 #endif
57 
printarray(float * array,int lenarray)58 void printarray(float* array, int lenarray) { //print an array, for debug purposes
59 	for (int i=0;i<lenarray;i++) {
60 		printf("%1.2f\n",array[i]);
61 	}
62 }
63 
printsqmatrix(float * array,int lenarray)64 void printsqmatrix(float* array, int lenarray) { //print a square matrix, for debug purposes
65 	//printf("%s","\n");
66 	for (int i=0;i<lenarray;i++) {
67 		for (int j=0;j<lenarray;j++) {
68 			printf("%1.2f\t",array[sqmIndex(lenarray,i,j)]);
69 		}
70 		printf("%s","\n");
71 	}
72 }
73 
printtrmatrix(float * array,int lenarray)74 void printtrmatrix(float* array, int lenarray) { //print a triangular matrix, for debug purposes
75 	//printf("%s","\n");
76 	for (int i=0; i<lenarray; i++) {
77 		for (int j=0; j<=i; j++) {
78 			//printf("%d\t",trmIndex(i,j));
79 			printf("%1.2f\t",array[trmIndex(i,j)]);
80 		}
81 	printf("%s","\n");
82 	}
83 }
84 
CAffinityPropagation(float * s,int n,float lambda,int max_iterations,int convergence,int noise,long * clusters)85 int CAffinityPropagation(float *s, int n, float lambda, int max_iterations, int convergence, int noise, long* clusters) { // Affinity Propagation clustering algorithm
86 
87     /* n: number of elements
88        s: similarity matrix
89        lambda: damping parameter ([0.5;1.0[)
90        max_iterations: maximum number of iterations
91        convergence: convergence reached when centroids are stable for convergence iterations
92        noise: apply noise to input similarities to eliminate redundancy */
93 
94 	float *r =              (float *)  calloc( n*n , sizeof(float));  // N*N responsibilities matrix
95 	float *a  =             (float *)  calloc( n*n , sizeof(float));  // N*N availabilities matrix
96     int *exemplars =          (int *)  malloc( n   * sizeof(int));        // N array of exemplars
97     int *old_exemplars =      (int *)  malloc( n   * sizeof(int));        // N array of old exemplars, for convergence checking
98 
99 
100 	int i = 0;                        // index i over elements
101 	int k = 0;                        // index k over elements
102 	int j = 0;                        // generic index
103     int idx = 0;                      // index for triangular matrix
104     int sqm_idx = 0;                  // index for square matrix
105     int currit = 0;                   // current iteration number
106     int conv_count = 0;               // number of iterations with constant centroids so far
107 	float tmpsum = 0.0, maxsim = 0.0, this_tmpsum = 0.0;      // accumulators
108 	float tmp = 0.0;                  // temporary value
109 	float max1 = 0;
110 	float max2 = 0;
111 	int conv_reached = 0;        // convergence flag
112 	int has_cluster = 0;         // found clusters flag
113 	float lamprev = 1.0 - lambda;     // 1-lambda
114 	int n_clusters = 0; 			// number of clusters
115 
116 
117     if (noise != 0) { // Add noise to data
118         for (int i=0;i<n*(n+1)/2;i++) {
119             s[i] = s[i] + (1e-16*s[i] )*(rand()/((float)RAND_MAX+1));
120         }
121      }
122 
123     for (int i=0;i<n*n;i++) {
124         r[i] = 0.0;
125         a[i] = 0.0;
126     }
127 
128     for (i=0;i<n;i++) { // Initialize exemplars
129 		exemplars[i] = -1;
130 	}
131 
132     //printtrmatrix(s,n);
133     //printf("\n");
134 
135     //printf("Preference %3.2f: running Affinity Propagation.\n",s[0]);
136 
137 	while (currit < max_iterations && conv_reached == 0) { // Start iterations
138 
139 	// Update r
140 
141 		for (int i=0;i<n;i++) {
142 			max1 = -FLT_MAX;
143 			max2 = -FLT_MAX;
144 			for (int j=0;j<n;j++) {
145 				sqm_idx = sqmIndex(n,i,j);
146 				idx = trmIndex(i,j);
147 				tmp = s[idx]+a[sqm_idx];
148 				if (tmp > max1) {
149 					max2 = max1;
150 					max1 = tmp;
151 				}
152 				else if (tmp > max2) {
153 					max2 = tmp;
154 				}
155 			}
156 			for (int k=0;k<n;k++) {
157 				idx = trmIndex(i,k);
158 				sqm_idx = sqmIndex(n,i,k);
159 				if (a[sqm_idx]+s[idx] == max1)
160 					r[sqm_idx] = lambda*r[sqm_idx] + lamprev*(s[idx] - max2);
161 				else
162 					r[sqm_idx] = lambda*r[sqm_idx] + lamprev*(s[idx] - max1);
163 			}
164 		}
165 
166 		//printf("%s","r\n");
167 		//printsqmatrix(r, n);
168 
169 	// Update a
170 
171 		for (int k=0;k<n;k++) {
172 			tmpsum = 0.0;
173 			for (int j=0;j<n;j++) { //sum all the elements > 0 of column k
174 				tmp = r[sqmIndex(n,j,k)];
175 				if (j!=k) {
176                     if (tmp > 0.0)
177                         tmpsum = tmpsum + tmp; // if j!=k (r(j,k)): if > 0, sum it
178                 }
179                 else
180                     tmpsum = tmpsum + tmp; // if j == k (r(k,k)): always sum it.
181                     // we will have to remove the j==i (r(i,k)) case, for every i.
182             }
183             //printf("tempsum %1.2f\n",tmpsum);
184 			for (int i=0;i<n;i++) {
185 			    this_tmpsum = tmpsum;
186 				sqm_idx = sqmIndex(n,i,k);
187 				if (i != k) {
188                     tmp = r[sqm_idx];
189                     //printf("tmp %1.2f\n",tmp);
190 					if (tmp > 0.0)
191 						this_tmpsum = this_tmpsum - tmp; //subtract r(i,k)
192                         //printf("tmpsum2 %1.2f\n",tmpsum);
193 					if (this_tmpsum < 0.0)
194 						a[sqm_idx] = lambda*a[sqm_idx] + lamprev*this_tmpsum;
195 					else
196 						a[sqm_idx] = lambda*a[sqm_idx];
197 				}
198 				else  // we don't need to remove the r(i,k) case, BUT wee need to remove to remove the r(k,k) case
199 					a[sqm_idx] = lambda*a[sqm_idx] + lamprev*(this_tmpsum - r[sqmIndex(n,k,k)]);
200 			}
201 		}
202 
203 		//printf("%s","a\n");
204 		//printsqmatrix(a,n);
205 
206     //Check for convergence
207 
208         int* tmp_exemplars = old_exemplars; // current exemplars become old, before calculating the new ones. Pointer swap - fast and convenient
209         old_exemplars = exemplars;
210         exemplars = tmp_exemplars;
211 
212         has_cluster = 0;
213         for (int i=0;i<n;i++) { // identify exemplars
214             idx = sqmIndex(n,i,i);
215             if (r[idx] + a[idx] > 0.0) {
216                 exemplars[i] = 1;
217                 has_cluster = 1;
218             }
219             else
220                 exemplars[i] = 0;
221         }
222 
223         if (has_cluster != 0) {
224             conv_count++;
225             for (j=0;j<n;j++) { // check if exemplars have changed. If they have changed, or if no clusters have been identified, reset convergence counter.
226                 if (exemplars[j] != old_exemplars[j]) {
227                     conv_count = 0;
228                     break;
229                 }
230             }
231         }
232         else conv_count = 0;
233 
234         if (conv_count == convergence) conv_reached = 1; // check convergence
235 
236         currit++; // increment iteration number
237     } // start a new iteration. If convergence or max_iterations reached
238 
239     //printf("Preference %3.2f: Convergence reached at iteration %d!\n",currit); // print convergence info
240 
241     // find number of clusters
242 	for (int i=0;i<n;i++) {
243 		idx = sqmIndex(n,i,i);
244 		if (r[idx]+a[idx] > 0) {
245 			exemplars[n_clusters] = i;
246 			n_clusters++;
247 		}
248 	}
249 
250     for (int i=0;i<n;i++) { // assign elements to clusters
251         idx = sqmIndex(n,i,0);
252         maxsim = r[idx]+a[idx];
253         k=0;
254 	//printf("%3.1f, ",maxsim);
255         for (k=1;k<n;k++) {
256             idx = sqmIndex(n,i,k);
257             tmpsum = r[idx]+a[idx];
258             if (tmpsum >= maxsim) {
259                 clusters[i] = k;
260                 maxsim = tmpsum;
261             }
262         }
263     }
264 
265 	for (int i=0;i<n_clusters;i++) {
266 		clusters[exemplars[i]] = exemplars[i];
267 	}
268 
269 
270 
271 
272 
273     //Free memory anyway
274     free(r);
275     free(a);
276     free(exemplars);
277     free(old_exemplars);
278 
279     return conv_reached == 1 ? currit : -currit;
280 }
281