1 #include <stdio.h>
2 #include <gsl/gsl_multifit.h>
3
4 #include "../mrilib.h"
5 #include "cluster_floatNOMASK.h"
6 #include "thd_segtools_fNM.h"
7
8 static int verb=0;
9 static int writedists=0;
segtools_verb(int ii)10 void segtools_verb( int ii ){ verb = ii ; if (verb > 1) writedists = 1;}
segtools_writedists(int ii)11 void segtools_writedists (int ii) {
12 /* The files you get with writedists
13 .kgg.1D : Cluster assignments
14 .dis.1D : Distance between clusters
15 .cen.1D : Cluster centroids
16 .info1.1D: Within cluster sum of distances
17 .info2.1D: Maximum distance within each cluster
18 .vcd.1D: Voxel to centroids distnaces
19 */
20 writedists = ii;
21 }
22
new_kmeans_oc(void)23 OPT_KMEANS new_kmeans_oc(void)
24 {
25 OPT_KMEANS oc;
26 int i;
27 memset(&(oc), 0, sizeof(OPT_KMEANS));
28
29 oc.r = 1;
30 oc.k = 0;
31 oc.kh = 0;
32 oc.jobname = NULL;
33 oc.distmetric = 'u';
34 oc.verb = 0;
35 oc.writedists = 0;
36 oc.rand_seed = 1234567;
37 oc.remap = NONE;
38 oc.user_labeltable=NULL;
39 for (i=0; i<400; ++i) oc.clabels[i] = NULL;
40 oc.nclabels=0;
41 oc.voxdebug[0]=oc.voxdebug[1]=oc.voxdebug[2]=oc.voxdebug[3]=-1;
42 return(oc);
43 }
44
45 /*!
46 \brief out_set = thd_polyfit( in_set,
47 mask, polorder,
48 prefix, verb);
49 fits a polynomial model of order polorder to
50 the time series of voxels in in_set
51 \param in_set (THD_3dim_dataset* ) An AFNI dset pointer to input data
52 \param mask (byte *) if mask is not NULL then voxel i will be processed
53 if mask[i] != 0.
54 if mask is NULL then all voxels are processed.
55 \param polorder (int) polynomial order
56 \param prefix (char *) prefix of output dset
57 \param verb (int) verbosity flag
58 \return out_set (THD_3dim_dataset* ) Dset containing polynomial fits.
59 */
thd_polyfit(THD_3dim_dataset * in_set,byte * mask,int polorder,char * prefix,int verb)60 THD_3dim_dataset *thd_polyfit(THD_3dim_dataset *in_set,
61 byte *mask, int polorder,
62 char *prefix, int verb)
63 {
64 int i=0, j=0, nl=0, k=0,
65 posi=0, posj=0, posk=0, nrow=0,
66 ncol = 0;
67 double xi=0.0, yi=0.0, yy=0.0, ei=0.0, sumsq=0.0, med=0.0;
68 gsl_matrix *X=NULL, *cov=NULL;
69 gsl_vector *y=NULL, *w=NULL, *c=NULL;
70 MRI_IMAGE *im = NULL;
71 THD_3dim_dataset *out_set=NULL;
72 double *dar = NULL;
73 float *cbuf=NULL;
74 float *dvec = NULL;
75 gsl_multifit_linear_workspace *work=NULL;
76
77 ENTRY("thd_polyfit");
78
79 /* prepare output */
80 out_set = EDIT_empty_copy(in_set) ;
81 EDIT_dset_items( out_set ,
82 ADN_nvals , polorder ,
83 ADN_ntt , polorder ,
84 ADN_datum_all , MRI_float ,
85 ADN_brick_fac , NULL ,
86 ADN_prefix , prefix ? prefix : "OMG!" ,
87 ADN_none ) ;
88
89 for( j=0 ; j < polorder ; j++ ) /* create empty bricks to be filled below */
90 EDIT_substitute_brick( out_set , j , MRI_float , NULL ) ;
91
92
93 /* do the fitting */
94 if (verb) fprintf (stderr,"Now fitting...\n");
95
96 ncol = DSET_NVALS(in_set);
97 nrow = DSET_NVOX(in_set);
98
99 X = gsl_matrix_alloc (ncol, polorder);
100 y = gsl_vector_alloc (ncol);
101
102 c = gsl_vector_alloc (polorder);
103 cov = gsl_matrix_alloc (polorder, polorder);
104
105 for (i = 0; i < ncol; i++) {
106 xi = i+1;
107 gsl_matrix_set (X, i, 0, 1.0);
108 gsl_matrix_set (X, i, 1, xi);
109 gsl_matrix_set (X, i, 2, xi*xi);
110
111 gsl_matrix_set (X, i, 3, xi*xi*xi);
112 gsl_matrix_set (X, i, 4, xi*xi*xi*xi);
113 // printf ("%lg ",xi);
114 }
115
116
117 /*make header
118 printf ("matrvola\n"); */
119 if (verb > 1)
120 fprintf(stdout, "#%s_0\t%s_1\t%s_2\t%s_3\t%s_4\n",
121 DSET_PREFIX(in_set),DSET_PREFIX(in_set),
122 DSET_PREFIX(in_set),DSET_PREFIX(in_set),
123 DSET_PREFIX(in_set));
124
125 /* go by lines - signatures
126 pre-allocate, I think this should be just fine,
127 there should be no need to reinitialize work
128 all the time */
129 work = gsl_multifit_linear_alloc (ncol, polorder);
130
131 dvec = (float * )malloc(sizeof(float)*ncol) ; /* array to hold signature */
132 cbuf = (float *)malloc(sizeof(float)*polorder) ;
133 /* array to hold fit */
134 for (nl=0; nl<nrow; ++nl) {
135 if (!mask || mask[nl]) {
136 posi = -1;
137 posj = -1;
138 posk = -1;
139
140 THD_extract_array( nl , in_set , 0 , dvec ) ;
141 /*get signature from voxel */
142
143 for (k = 0; k < ncol; k++) {
144 gsl_vector_set (y, k, dvec[k]);
145 }
146
147 gsl_multifit_linear (X, y, c, cov,
148 &sumsq, work);
149
150 /* printf ( "\n # best fit: Y = %g + %g X + %g X^2 +%g X^3 + %g X^4\n",
151 C(0), C(1), C(2), C(3), C(4));
152 printf ("# sumsq = %g\n", sumsq); */
153
154 for (i=0;i<polorder;++i) cbuf[i] = (float)C(i);
155 THD_insert_series( nl , out_set , polorder , MRI_float , cbuf , 1 ) ;
156 /* stick result in output */
157 if (verb > 1)
158 fprintf (stdout, "%11g\t%11g\t%11g\t%11g\t%11g\n",
159 C(0), C(1), C(2), C(3), C(4));
160
161 /*
162 printf ("# covariance matrix:\n");
163 printf ("[ %+.5e, %+.5e, %+.5e \n",
164 COV(0,0), COV(0,1), COV(0,2));
165 printf (" %+.5e, %+.5e, %+.5e \n",
166 COV(1,0), COV(1,1), COV(1,2));
167 printf (" %+.5e, %+.5e, %+.5e ]\n",
168 COV(2,0), COV(2,1), COV(2,2));
169 printf ("# chisq = %g\n", chisq);
170 */
171
172 }
173 }
174 gsl_multifit_linear_free (work); work = NULL;
175 free(dvec); dvec = NULL;
176 free(cbuf); cbuf = NULL;
177 gsl_vector_free (y);
178 gsl_vector_free (c);
179 gsl_matrix_free (cov);
180 gsl_matrix_free (X);
181 //gsl_vector_free (w);
182 free(dvec); dvec = NULL;
183
184 RETURN(out_set);
185 }
186
187
188 /**********************************************************************
189 BEGIN: functions based on command.c code from The C clustering library.
190 ANDREJ: If we end up using this, we must include copyright notice
191 in AFNI prominently
192 **********************************************************************/
193
clusterlib_display_version(void)194 void clusterlib_display_version(void)
195 { printf ("\n"
196 "Clustering for segmentation, command line version ,\n"
197 "using the C Clustering Library version 1.38.\n"
198 "\n"
199 "Cluster was originally written by Michael Eisen (eisen 'AT' rana.lbl.gov)\n"
200 "Copyright 1998-99 Stanford University.\n");
201 printf ("\n"
202 "The command line version of Clustering version was created by Andrej Vovk\n"
203 "and will be surely corrected by Ziad Saad ;),\n"
204 "\n");
205 return;
206 }
207
clusterlib_setjobname(const char * basename,int strip)208 char* clusterlib_setjobname(const char* basename, int strip)
209 { char* jobname=NULL; char* extension=NULL;
210 int n = strlen(basename);
211 if (strip)
212 { extension = strrchr(basename, '.');
213 if (extension) n -= strlen(extension);
214 }
215 jobname = malloc((n+1)*sizeof(char));
216 strncpy (jobname, basename, n);
217 jobname[n] = '\0';
218
219 if (strip) { /* ZSS, also remove +orig? */
220 n = strlen(jobname);
221 extension = strstr(jobname,"+orig");
222 if (!extension) extension = strstr(jobname,"+acpc");
223 if (!extension) extension = strstr(jobname,"+tlrc");
224 if (extension) n -= strlen(extension);
225 jobname[n] = '\0';
226 }
227 return jobname;
228 }
229
clusterlib_readnumber(const char word[])230 int clusterlib_readnumber(const char word[])
231 { char* error = 0;
232 long value = 0;
233 if (word[0] == '0' && word[1] != '\0') ++word;
234 value = strtol(word,&error,0);
235 if (*error=='\0') return (int)value;
236 else return -1;
237 }
238
clusterlib_getmetric(int i)239 char clusterlib_getmetric(int i)
240 { switch (i)
241 { case 1: return 'u';
242 case 2: return 'c';
243 case 3: return 'x';
244 case 4: return 'a';
245 case 5: return 's';
246 case 6: return 'k';
247 case 7: return 'e';
248 case 8: return 'b';
249 default: return '\0';
250 }
251 /* Never get here */
252 return '\0';
253 }
254
255 /* ========================================================================= */
256
257
258
example_distance_gene(int nrows,int ncols,float ** data)259 float** example_distance_gene(int nrows, int ncols, float** data)
260 /* Calculate the distance matrix between genes using the Euclidean distance. */
261 { int i, j, ii, nl, nc;
262 float** distMatrix;
263 float* weight = malloc(ncols*sizeof(float));
264
265
266
267 printf("============ Euclidean distance matrix between genes ============\n");
268 for (i = 0; i < ncols; i++) weight[i] = 1.0;
269 distMatrix = distancematrix(nrows, ncols, data, weight, 'e', 0);
270 /* ANDREJ: The compiler gave me a warning that:
271 thd_segtools.c: In function 'example_distance_gene':
272 thd_segtools.c:234: warning: assignment makes pointer from integer
273 without a cast
274 This led me to realize that the compiler did not have the prototype
275 of this function before reaching this point and assumed the returned
276 value was an int to be typecast to float **.
277 But int may not be enough to hold a pointer anymore
278
279 In any case, there just is not enough memory for this operation here
280 so we'll have to consider whether this is useful enough to pursue
281 in different ways. */
282
283 if (!distMatrix)
284 { printf ("Insufficient memory to store the distance matrix\n");
285 free(weight);
286 return NULL;
287 }
288 /*printf(" Gene:");
289 for(i=0; i<nrows-1; i++) printf("%6d", i);
290 printf("\n");
291 for(i=0; i<nrows; i++)
292 { printf("Gene %2d:",i);
293 for(j=0; j<i; j++) printf(" %5.2f",distMatrix[i][j]);
294 printf("\n");
295 }
296 printf("\n");
297 */
298
299
300
301 free(weight);
302 return distMatrix;
303 }
304
305
306 /* ========================================================================= */
307
example_hierarchical(int nrows,int ncols,float ** data,char * jobname,int k,float ** distmatrix,int * clusterid)308 void example_hierarchical( int nrows, int ncols,
309 float** data,
310 char* jobname,
311 int k, float** distmatrix,
312 int *clusterid)
313 /* Perform hierarchical clustering ... , float** distmatrix */
314 { int i, ii, nl, nc;
315 const int nnodes = nrows-1;
316 float* weight = malloc(ncols*sizeof(float));
317 Node* tree;
318
319 char* filename;
320 //char* filename2;
321 FILE *out1; int n;
322
323
324 for (i = 0; i < ncols; i++) weight[i] = 1.0;
325 printf("\n");
326
327
328
329 n = 1 + strlen(jobname) + strlen("_C") + strlen(".ext");
330
331 if (k)
332 { int dummy = k;
333 do n++; while (dummy/=10);
334 }
335
336
337 filename = malloc(n*sizeof(char));
338
339 sprintf (filename, "%s_C%d.hie", jobname, k);
340 out1 = fopen( filename, "w" );
341
342 /*FILE *out2;
343 filename2 = malloc(n*sizeof(char));
344
345 sprintf (filename2, "%s_C%d.hi1", jobname, k);
346 out2 = fopen( filename2, "w" );*/
347
348 //HERE SHOULD USE method instead of 'xxx' (s,m,a,c)
349
350
351 printf("================ Pairwise single linkage clustering ============\n");
352 /* Since we have the distance matrix here, we may as well use it. */
353 tree = treecluster(nrows, ncols, 0, 0, 0, 'e', 's', distmatrix);
354 /* The distance matrix was modified by treecluster, so we cannot use it any
355 * more. But we still need to deallocate it here.
356 * The first row of distmatrix is a single null pointer; no need to free it.
357 */
358 for (i = 1; i < nrows; i++) free(distmatrix[i]);
359 free(distmatrix);
360 if (!tree)
361 { /* Indication that the treecluster routine failed */
362 printf ("treecluster routine failed due to insufficient memory\n");
363 free(weight);
364 return;
365 }
366
367 #if 0
368 /* Andrej: This block looked like it was commented out
369 I took out some of the * / because they
370 were generating warning and blocked out the
371 entire section with #if 0 .
372 The compiler will not compile this section */
373
374 fprintf(out2,"Node Item 1 Item 2 Distance\n");
375 for(i=0; i<nnodes; i++)
376 fprintf(out2,"%3d:%9d%9d %g\n",
377 -i-1, tree[i].left, tree[i].right, tree[i].distance);
378 printf("\n");
379 fclose(out2);
380 //free(tree);
381
382
383 printf("================ Pairwise maximum linkage clustering ============\n");
384 tree = treecluster(nrows, ncols, data, weight, 0, 'e', 'm', 0);
385 /* Here, we let treecluster calculate the distance matrix for us. In that
386 * case, the treecluster routine may fail due to insufficient memory to store
387 * the distance matrix. For the small data sets in this example, that is
388 * unlikely to occur though. Let's check for it anyway:
389 */
390 if (!tree)
391 { /* Indication that the treecluster routine failed */
392 printf ("treecluster routine failed due to insufficient memory\n");
393 free(weight);
394 return;
395 }
396 printf("Node Item 1 Item 2 Distance\n");
397 for(i=0; i<nnodes; i++)
398 printf("%3d:%9d%9d %g\n",
399 -i-1, tree[i].left, tree[i].right, tree[i].distance);
400 printf("\n");
401 free(tree);
402
403
404
405 printf("================ Pairwise average linkage clustering ============\n");
406 tree = treecluster(nrows, ncols, data, weight, 0, 'e', 'a', 0);
407 if (!tree)
408 { /* Indication that the treecluster routine failed */
409 printf ("treecluster routine failed due to insufficient memory\n");
410 free(weight);
411 return;
412 }
413 printf("Node Item 1 Item 2 Distance\n");
414 for(i=0; i<nnodes; i++)
415 printf("%3d:%9d%9d %g\n",
416 -i-1, tree[i].left, tree[i].right, tree[i].distance);
417 printf("\n");
418 free(tree);
419
420
421
422 printf("================ Pairwise centroid linkage clustering ===========\n");
423 tree = treecluster(nrows, ncols, data, weight, 0, 'e', 'c', 0);
424 if (!tree)
425 { /* Indication that the treecluster routine failed */
426 printf ("treecluster routine failed due to insufficient memory\n");
427 free(weight);
428 return;
429 }
430 printf("Node Item 1 Item 2 Distance\n");
431 for(i=0; i<nnodes; i++)
432 printf("%3d:%9d%9d %g\n",
433 -i-1, tree[i].left, tree[i].right, tree[i].distance);
434 printf("\n");
435
436 #endif
437
438
439
440 printf("=============== Cutting a hierarchical clustering tree ==========\n");
441 clusterid = malloc(nrows*sizeof(int));
442 printf(" number of clusters %d \n",k);
443 cuttree (nrows, tree, k, clusterid);
444 for(i=0; i<nrows; i++)
445 fprintf(out1, "%09d\t%2d\n", i, clusterid[i]);
446 fprintf(out1, "\n");
447 fclose(out1);
448
449
450 free(tree);
451 free(weight);
452 return;
453 }
454
455 /* ========================================================================= */
456
getvoxlclusterdist(int * count,float ** cdata,int * clusterid,float ** data,char * jobname,int nclusters,int nrows,int ncols,float ** vcdata,char dist)457 void getvoxlclusterdist(int* count, float** cdata,
458 int* clusterid, float** data, char* jobname,
459 int nclusters, int nrows, int ncols, float** vcdata,
460 char dist)
461 {
462 int i, j, n;
463 char* filename4;
464 FILE *out4=NULL;
465 FILE *out5=NULL;
466 float difference, difference1;
467 float* max_vcdata = NULL;
468 int dummy = 0;
469 char* filename5;
470 float distance = 0.0;
471 float (*metric)
472 (int, float**, float**, const float[], int, int, int) =
473 setmetric(dist);
474 float *weight=NULL;
475
476
477 ENTRY("getvoxlclusterdist");
478 /* allocate for answer arrays */
479 if (!(max_vcdata = (float *)calloc(sizeof(float), nclusters))) {
480 fprintf(stderr,"ERROR: Failed to allocate for max_vcdata\n");
481 EXRETURN;
482 }
483
484
485 n = 512 + strlen(jobname) + strlen("_K_G") + strlen(".ext");
486
487 dummy = nclusters;
488 do n++; while (dummy/=10);
489
490 /*avovk, june 09, changed output names*/
491
492 filename4 = malloc(n*sizeof(char));
493 sprintf (filename4, "%s_K%d_Gx.vcd.1D", jobname, nclusters);
494 if (writedists) { out4 = fopen( filename4, "w" ); }
495
496 filename5 = malloc((n+2)*sizeof(char));
497 sprintf (filename5, "%s_K%d_Gx.info2.1D", jobname, nclusters);
498 if (writedists) { out5 = fopen( filename5, "w" ); }
499
500 weight = (float *)calloc(ncols, sizeof(float));
501 for (i = 0; i < ncols; ++i) weight[i] = 1.0;
502
503 for (i = 0; i < nrows; i++){
504 #if 0 /* ZSS, distances should be same for clustering */
505 difference = 0;
506 difference1 = 0;
507 for (j = 0; j < ncols; j++) {
508 difference1 = cdata[clusterid[i]][j]-data[i][j];
509 difference = difference + difference1*difference1;
510 }
511 vcdata[i][0] = 100/(1+sqrt(difference)); /* if values are close to 0 ?! */
512 #else
513 distance = metric(ncols, cdata, data, weight, clusterid[i], i, 0);
514 vcdata[i][0] = distance; /* ZSS: Don't know what you want to do here */
515 #endif
516 }
517
518 /* avovk JULY29_2008, sept 05 */
519 for (i = 0; i < nclusters; i++){
520 max_vcdata[i] = 0;
521 }
522
523 for (i = 0; i < nrows; i++){
524 if (vcdata[i][0] > max_vcdata[clusterid[i]]) {
525 max_vcdata[clusterid[i]] = vcdata[i][0];
526 }
527 }
528
529 if (out5) {
530 if (verb)
531 printf ("------- writing max distances within clusters to file:\t\t"
532 " %s_K_G%d.info2.1D", jobname, nclusters);
533 fprintf(out5, "#max distance within cluster (job %s, %d clusters)\n",
534 jobname, nclusters);
535 }
536 for (i = 0; i < nclusters; i++){
537 if (verb) printf("%7.3f\n",max_vcdata[i]);
538 if (out5) fprintf(out5, "#cluster %d:\n"
539 "%d %7.3f\n",i, i, max_vcdata[i]);
540 }
541
542 for (i = 0; i < nrows; i++){
543 difference = vcdata[i][0];
544 vcdata[i][0] = 100*clusterid[i]+100*difference/(max_vcdata[clusterid[i]]);
545 }
546 /* avovk JULY29_2008, sept 05 */
547
548 if (out4) {
549 if (verb) printf ("------- writing voxels-centroids distances to file:\t\t"
550 " %s_K_G%d.vcd.1D\n",jobname, nclusters);
551 fprintf (out4, "#distance from voxel to its centroid (job %s, %d clusters)\n",
552 jobname, nclusters);
553 for (i = 0; i < nrows; i++)
554 fprintf (out4, "%09d\t%7.3f\n", i, vcdata[i][0]);
555 }
556 if (out4) fclose(out4); out4=NULL;
557 if (out5) fclose(out5); out5=NULL;
558
559 EXRETURN;
560 }
561
562 /* ========================================================================= */
563
564
getvoxlclustersdist(int * count,float ** cdata,int * clusterid,float ** data,char * jobname,int nclusters,int nrows,int ncols,float ** vcdata,char dist)565 void getvoxlclustersdist(int* count, float** cdata,
566 int* clusterid, float** data, char* jobname,
567 int nclusters, int nrows, int ncols, float **vcdata,
568 char dist)
569 {
570 int i, j, k;
571 float distance = 0.0, *weight=NULL;
572 float (*metric)
573 (int, float**, float**, const float[], int, int, int) =
574 setmetric(dist);
575
576
577 weight = (float *)calloc(ncols, sizeof(float));
578 for (i = 0; i < ncols; ++i) weight[i] = 1.0;
579
580 for (k = 0; k < nclusters; k++){
581 for (i = 0; i < nrows; i++){
582 #if 0 /* ZSS: distances should be same for clustering */
583 difference = 0;
584 difference1 = 0;
585 for (j = 0; j < ncols; j++) {
586 /*ZSS: Did you CHECK THIS LOOP ? ; j is going through fitcoef*/
587 difference1 = cdata[k][j]-data[i][j];
588 difference = difference + difference1*difference1;
589 }
590 vcdata[i][k+1] = 100/(1+sqrt(difference)); /* actually 1/distance */
591 #else
592 distance = metric(ncols, cdata, data, weight, k, i, 0);
593 vcdata[i][k+1] = distance;/* ZSS: Don't know what you want to do here */
594 #endif
595 } /* some kind of scaled probability */
596 }
597
598 return;
599
600 }
601
602
603 /* avovkJULY30: it would be nice to have color pallete for cluster probability */
604
color_palette(int nclusters,char * jobname)605 void color_palette(int nclusters, char* jobname)
606 {
607 int n = 0;
608 int i, j;
609 char* filename=NULL;
610 FILE *out=NULL;
611 float a, c;
612 int nsteps, step, colorv, hexp1, hexp2;
613 char* hexnumbers=NULL;
614 int verb = 0;
615
616 hexnumbers = (char *)malloc(32*sizeof(char));
617 sprintf (hexnumbers, "0123456789abcdef");
618
619 n = 512 + strlen(jobname) + strlen(".pal") + 2;
620 filename = (char *)malloc(n*sizeof(char));
621 sprintf (filename, "%s_K%d.pal", jobname,nclusters);
622 /* output file name not good ! */
623 if (!(out = fopen( filename, "w" ))) {
624 fprintf(stderr,"Failed to open %s for writing\n", filename);
625 }
626
627 c = nclusters;
628
629 nsteps = 256/nclusters;
630
631 /*printf("num of color steps per cluster: %d \n",nsteps); */
632
633 /* now we use those steps to create color fading to/from black*/
634
635 step = 256/nsteps - 2; /* number of clusters -1, that we don't go to black*/
636
637 if (nclusters < 3)
638 step = 256/nsteps - 1;
639
640
641 fprintf (out, "color_%d_clusters\n",nclusters);
642
643 /* for ( i = 0; i < nclusters; i++) {*/
644
645 colorv = 255;
646 for ( j = 0; j < nsteps; j++) {
647 colorv = colorv - step;
648 hexp1 = colorv/16;
649 hexp2 = colorv - hexp1*16;
650
651 fprintf (out, "#%c%c0000\n", hexnumbers[hexp1], hexnumbers[hexp2]);
652
653 }
654
655
656 if (nclusters > 1) {
657 colorv = 255;
658 for ( j = 0; j < nsteps; j++) {
659 colorv = colorv - step;
660 hexp1 = colorv/16;
661 hexp2 = colorv - hexp1*16;
662
663 fprintf (out, "#00%c%c00\n", hexnumbers[hexp1], hexnumbers[hexp2]);
664
665 }
666 }
667
668 if (nclusters > 2) {
669 colorv = 255;
670 for ( j = 0; j < nsteps; j++) {
671 colorv = colorv - step;
672 hexp1 = colorv/16;
673 hexp2 = colorv - hexp1*16;
674
675 fprintf (out, "#0000%c%c\n", hexnumbers[hexp1], hexnumbers[hexp2]);
676
677 }
678 }
679
680
681 if (nclusters > 3) {
682 colorv = 255;
683 for ( j = 0; j < nsteps; j++) {
684 colorv = colorv - step;
685 hexp1 = colorv/16;
686 hexp2 = colorv - hexp1*16;
687
688 fprintf (out, "#%c%c%c%c00\n", hexnumbers[hexp1], hexnumbers[hexp2],
689 hexnumbers[hexp1], hexnumbers[hexp2]);
690
691 }
692 }
693
694 if (nclusters > 4) {
695 colorv = 255;
696 for ( j = 0; j < nsteps; j++) {
697 colorv = colorv - step;
698 hexp1 = colorv/16;
699 hexp2 = colorv - hexp1*16;
700
701 fprintf (out, "#00%c%c%c%c\n", hexnumbers[hexp1], hexnumbers[hexp2],
702 hexnumbers[hexp1], hexnumbers[hexp2]);
703
704 }
705 }
706
707 if (nclusters > 5) {
708 colorv = 255;
709 for ( j = 0; j < nsteps; j++) {
710 colorv = colorv - step;
711 hexp1 = colorv/16;
712 hexp2 = colorv - hexp1*16;
713
714 fprintf (out, "#%c%c00%c%c\n", hexnumbers[hexp1], hexnumbers[hexp2],
715 hexnumbers[hexp1], hexnumbers[hexp2]);
716
717 }
718 }
719
720
721 if (verb) printf ("------- Color palette written to file:\t\t"
722 "%s_K%d.pal\n",jobname,nclusters);
723
724 fclose(out); out=NULL;
725 free(hexnumbers); hexnumbers = NULL; /* clean up */
726 }
727
728 /* ========================================================================= */
729
example_kmeans(int nrows,int ncols,float ** data,int nclusters,int npass,char dist,char * jobname,int * clusterid,float ** vcdata,REMAPS remap)730 void example_kmeans( int nrows, int ncols,
731 float** data,
732 int nclusters, int npass,
733 char dist, char* jobname,
734 int *clusterid, float **vcdata,
735 REMAPS remap)
736 /* Perform k-means clustering on genes */
737 {
738 int i, j, ii, nl, nc;
739 //const int nclusters = 3;
740 const int transpose = 0;
741 //const char dist = 'e';
742 const char method = 'a';
743 /* For method=='a', the centroid is defined as the mean over all elements
744 belonging to a cluster for each dimension.
745 For method=='m', the centroid is defined as the median over all elements
746 belonging to a cluster for each dimension.
747 */
748
749 //int npass = 1;
750 int ifound = 0;
751 int test=0;
752 float error;
753 float distance;
754 int** index=NULL;
755 int* count=NULL;
756 float* weight = malloc(ncols*sizeof(float));
757 float** cdata = malloc(nclusters*sizeof(float*));
758
759 int n=0;
760 char* filename=NULL;
761 char* filename2=NULL;
762 char* filename3=NULL;
763 char* filename4=NULL;
764 FILE *out1=NULL;
765 FILE *out2=NULL;
766 FILE *out3=NULL;
767 FILE *out4=NULL;
768 float a, b;
769
770
771 for (i = 0; i < nclusters; i++)
772 { cdata[i] = malloc(ncols*sizeof(float));
773
774 }
775 for (i = 0; i < ncols; i++) weight[i] = 1.0;
776
777
778 /* ZSS: Andrej, that n was too small. I added 512 to get
779 avoid the problem. */
780 n = 512 + strlen(jobname) + strlen("_K_G") + strlen(".ext");
781
782 if (dist)
783 { int dummy = nclusters;
784 do n++; while (dummy/=10);
785 }
786
787 /* avovk */
788 if (verb > 1) printf("a je u omari :) \n");
789 filename = (char *)malloc(n*sizeof(char));
790 filename2 = (char *)malloc(n*sizeof(char));
791 filename3 = (char *)malloc(n*sizeof(char));
792 filename4 = (char *)malloc((n+2)*sizeof(char));
793
794 /* ZSS: Put a .1D in the name wherever it is appropriate */
795 sprintf (filename, "%s_K02%d_G%c.kgg.1D", jobname, nclusters,dist);
796 if (writedists) { out1 = fopen( filename, "w" ); }
797
798 sprintf (filename2, "%s_K02%d_G%c.dis.1D", jobname, nclusters,dist);
799 if (writedists) { out2 = fopen( filename2, "w" ); }
800
801 sprintf (filename3, "%s_K%02d_G%c.cen.1D", jobname, nclusters,dist);
802 if (writedists) { out3 = fopen( filename3, "w" ); }
803 sprintf (filename4, "%s_K%02d_G%c.info1.1D", jobname, nclusters,dist);
804 if (writedists) { out4 = fopen( filename4, "w" ); }
805
806 if (verb) {
807 printf("======================== k-means clustering"
808 " ========================\n");
809
810 printf ("\n");
811 printf ("----- doing %d pass-es... go stretch your legs...\n",npass);
812 }
813
814 kcluster( nclusters,
815 nrows,ncols,data,
816 weight,
817 transpose,npass,
818 method,dist,
819 clusterid, &error, &ifound);
820 switch (remap) {
821 case COUNT:
822 case iCOUNT:
823 { double count[nclusters];
824 int *isort=NULL, imap[nclusters];
825
826 /* count number of voxels */
827 for (i=0; i<nclusters; ++i) count[i]=0;
828 for (i=0; i<nrows; ++i) {
829 ++count[clusterid[i]];
830 }
831 /* sort result */
832 isort = z_idoubleqsort(count, nclusters);
833 for (i=0; i<nclusters; ++i) {
834 if (remap==COUNT) {
835 imap[isort[i]] = nclusters -i -1;
836 } else {
837 imap[isort[i]] = i;
838 }
839 fprintf(stderr,"Remapping cluster %d (count %ld) --> %d\n",
840 isort[i], (long)count[i], imap[isort[i]]);
841 }
842 for (i=0; i<nrows; ++i) {
843 clusterid[i] = imap[clusterid[i]];
844 }
845 free(isort); isort=NULL;
846 }
847 break;
848 case MAG:
849 case iMAG:
850 { double mg[nclusters];
851 int *isort=NULL, imap[nclusters];
852 if (!getclustercentroids( nclusters, nrows, ncols,
853 data, clusterid, cdata,
854 0, 'a')) {
855 fprintf(stderr,"Failed to get centroids");
856 }
857 for (i=0; i<nclusters; ++i) {
858 mg[i]=0.0;
859 for (j=0; j<ncols; ++j) {
860 mg[i] += cdata[i][j]*cdata[i][j];
861 }
862 mg[i] = sqrt(mg[i]);
863 }
864 isort = z_idoubleqsort(mg, nclusters);
865 if (verb) fprintf(stderr,"ncols = %d\n", ncols);
866 for (i=0; i<nclusters; ++i) {
867 if (remap==MAG) {
868 imap[isort[i]] = nclusters -i -1;
869 } else {
870 imap[isort[i]] = i;
871 }
872 if (verb) fprintf(stderr,"Remapping cluster %d (mag %f) --> %d\n",
873 isort[i], mg[i], imap[isort[i]]);
874 }
875
876 for (i=0; i<nrows; ++i) {
877 clusterid[i] = imap[clusterid[i]];
878 }
879
880 free(isort);
881 }
882 break;
883 case NONE:
884 break;
885 default:
886 fprintf(stderr,
887 "REMAPPING flag of %d unknown. No remapping done.\n", remap);
888 }
889
890 if (verb) printf ("Solution found %d times; ", ifound);
891 if (verb) printf ("within-cluster sum of distances is %f\n", error);
892
893
894 if (writedists) {
895 fprintf (out4,"#within-cluster sum of distances: %f\n",error);
896 fclose(out4); out4=NULL;
897 }
898
899 if (writedists) {
900 if (verb) printf ("------- writing Cluster assignments to file:\t\t"
901 " %s_K_G%d.kgg.1D\n",jobname, nclusters);
902 for (i = 0; i < nrows; i++)
903 fprintf (out1, "%09d\t %d\n", i, clusterid[i]);
904 fclose(out1); out1=NULL;
905 }
906
907 index = (int **)malloc(nclusters*sizeof(int*));
908 count = (int *)malloc(nclusters*sizeof(int));
909 for (i = 0; i < nclusters; i++) count[i] = 0;
910 for (i = 0; i < nrows; i++) count[clusterid[i]]++;
911 for (i = 0; i < nclusters; i++) index[i] = malloc(count[i]*sizeof(int));
912 for (i = 0; i < nclusters; i++) count[i] = 0;
913 for (i = 0; i < nrows; i++){
914 int id = clusterid[i];
915 index[id][count[id]] = i;
916 count[id]++;
917 }
918
919 if (writedists) {
920 if (verb) printf ("------- writing Distance between clusters to file:\t "
921 "%s_K_G%d.dis.1D \n", jobname, nclusters);
922 fprintf (out2,"#------- Distance between clusters:\n");
923
924 for (i = 0; i < nclusters-1; i++)
925 {
926 for (j = 1+i; j < nclusters; j++)
927 {
928 distance = clusterdistance(nrows, ncols, data,
929 weight, count[i], count[j], index[i],
930 index[j], dist , 'a', 0);
931
932 fprintf(out2,"#Distance between %d and %d: %7.3f\n",
933 i, j, distance);
934
935 }
936 }
937
938 fclose(out2); out2=NULL;
939 }
940
941
942 getclustercentroids(nclusters, nrows, ncols, data, clusterid,
943 cdata, 0, 'a');
944 if (writedists) {
945 if (verb) printf ("------- writing Cluster centroids to file:\t\t"
946 "%s_K_G%d.cen.1D\n",jobname, nclusters);
947 for (i = 0; i < nclusters; i++){
948 /* fprintf(out3,"Cluster %2d:", i);*/
949 for (j = 0; j < ncols; j++) fprintf(out3,"\t%7.3f", cdata[i][j]);
950 fprintf(out3,"\n");
951 }
952 fclose(out3); out3=NULL;
953
954 color_palette(nclusters, jobname);
955 }
956 if (verb) printf("Done...\n");
957
958 /* call function to calculate distance between each voxel and centroid */
959 /* we will need:
960 count - number of elements in cluster as we allready have it
961 cdata - cluster centroids
962 clusterid
963 data */
964
965 getvoxlclusterdist(count, cdata, clusterid, data, jobname,
966 nclusters, nrows, ncols, vcdata, dist);
967 getvoxlclustersdist(count, cdata, clusterid, data, jobname,
968 nclusters, nrows, ncols, vcdata, dist);
969
970
971 /*might want to make some calculations with vcdata*/
972
973 /*lets calculate distance to centroid/sum(distance 2 all centroids)*/
974
975 for (i = 0; i < nrows; i++)
976 {
977 a = vcdata[i][0];
978 b = a+1e10;
979 for (j = 0; j < nclusters; j++)
980 if (vcdata[i][j+1]>a && vcdata[i][j+1]< b) b = vcdata[i][j+1];
981 vcdata[i][nclusters+1] = (1.0-a/b)*100.0;
982 }
983
984 /* just a notice: first column is dist to centroid in THE cluster,
985 then there are columns that describe distance to centroids from
986 each cluster centroid
987 - and we are adding another column here*/
988
989
990 for (i = 0; i < nclusters; i++) free(index[i]);
991 free(index);
992 free(count);
993
994 for (i = 0; i < nclusters; i++){
995 free(cdata[i]);
996 }
997
998 free(cdata);
999 free(weight);
1000
1001 return;
1002 }
1003
1004 /**********************************************************************
1005 END: functions based on command.c code from The C clustering library.
1006 **********************************************************************/
1007 # define EPSILON 1E-10
thd_Adist(THD_3dim_dataset * in_set,byte * mask,float * sigs,int nsigs,THD_3dim_dataset ** dist_set,OPT_KMEANS oc)1008 int thd_Adist ( THD_3dim_dataset *in_set,
1009 byte *mask,
1010 float *sigs, int nsigs,
1011 THD_3dim_dataset **dist_set,
1012 OPT_KMEANS oc)
1013 {
1014 int ii, jj, kk;
1015 int ncol = -1, nvox = -1;
1016 float *distvec=NULL, **asigvec=NULL, **afsig=NULL;
1017 char lll[25]={"buffer"};
1018 float *weight = NULL, factor =0.0;
1019 float (*metric)
1020 (int, float**, float**, const float[], int, int, int) =
1021 setmetric(oc.distmetric);
1022
1023
1024 ENTRY("thd_Adist");
1025
1026 nvox = DSET_NVOX(in_set);
1027 ncol = DSET_NVALS(in_set);
1028 if (ncol < DSET_NUM_TIMES(in_set)) ncol = DSET_NUM_TIMES(in_set);
1029
1030 /* make sure in_set is loaded */
1031 DSET_load(in_set);
1032
1033
1034 /* equal weights for all elements of signature */
1035 weight = (float *)calloc(ncol, sizeof(float));
1036 for (ii = 0; ii < ncol; ii++) weight[ii] = 1.0;
1037
1038 /*array to hold one voxel's signature */
1039 asigvec = (float ** )calloc(1, sizeof(float*));
1040 asigvec[0] = (float *)calloc(ncol, sizeof(float)) ;
1041
1042 /* array to hold signatures in a way pleasing to metric */
1043 afsig = (float **)calloc(nsigs, sizeof(float*));
1044 ii=0;
1045 for (jj=0; jj<nsigs; ++jj) {
1046 afsig[jj] =(float *)calloc(ncol, sizeof(float));
1047 for (kk=0; kk<ncol; ++kk) {
1048 afsig[jj][kk] = sigs[ii]; ++ii;
1049 }
1050 }
1051 /* Create temporary distance vector */
1052 distvec = (float *)calloc(nvox, sizeof(float));
1053
1054 /* prepare output */
1055 *dist_set = EDIT_empty_copy(in_set) ;
1056 EDIT_dset_items( *dist_set ,
1057 ADN_nvals , nsigs ,
1058 ADN_ntt , nsigs ,
1059 ADN_datum_all , MRI_short ,
1060 ADN_brick_fac , NULL ,
1061 ADN_prefix , "adist" ,
1062 ADN_none ) ;
1063
1064 for (jj=0; jj<nsigs; ++jj) { /* for each signature type */
1065 for (ii=0; ii< nvox; ++ii) {
1066 distvec[ii]=0;
1067 if (!mask || mask[ii]) {
1068 THD_extract_array( ii , in_set , 0 , asigvec[0] ) ;
1069 /* check if it is all non zero */
1070 for (kk=0; kk<ncol; ++kk) {
1071 if (asigvec[0][kk]) { /* non zero, calculate the distance */
1072 distvec[ii] = metric(ncol, afsig, asigvec, weight,jj, 0, 0);
1073 break; /* get out of the loop */
1074 }
1075 }
1076 if (oc.voxdebug[3] == ii) {
1077 char fname[128]={""};
1078 FILE *fout=NULL;
1079 sprintf(fname, "distdbg.vox%d-%d-%d.sig%d.1D",
1080 oc.voxdebug[0], oc.voxdebug[1], oc.voxdebug[2], jj);
1081 if ((fout = fopen(fname, "w"))) {
1082 fprintf(fout,"#Col. 0 signature for voxel %d %d %d\n"
1083 "#Col. 1 reference signature %d\n"
1084 "#Distance per metric %c = %f\n",
1085 oc.voxdebug[0], oc.voxdebug[1], oc.voxdebug[2],
1086 jj, oc.distmetric, distvec[ii]);
1087 for (kk=0; kk<ncol; ++kk){
1088 fprintf(fout, "%f %f\n", asigvec[0][kk], afsig[jj][kk]);
1089 }
1090 ININFO_message("Voxel %d %d %d is in mask \n"
1091 "See file %s for debug info with signature %d/%d\n",
1092 oc.voxdebug[0], oc.voxdebug[1], oc.voxdebug[2],
1093 fname, jj,nsigs);
1094 fclose(fout); fout = NULL;
1095 } else {
1096 ERROR_message("No permission for writing? No debug output.");
1097 }
1098 }
1099 } else if (oc.voxdebug[3] == ii) {
1100 ININFO_message("Voxel %d %d %d is not in mask \n ",
1101 oc.voxdebug[0], oc.voxdebug[1], oc.voxdebug[2]);
1102 }
1103 }
1104 /* put distvec back into output, and watch the scaling */
1105 EDIT_substitute_brick( *dist_set , jj , MRI_short , NULL ) ;
1106 factor = EDIT_coerce_autoscale_new(nvox,
1107 MRI_float,distvec,
1108 MRI_short,
1109 (short*)DSET_BRICK_ARRAY(*dist_set,jj));
1110 if (factor < EPSILON) factor = 0.0;
1111 else factor = 1.0 / factor;
1112 if( DSET_BRICK_TYPE(*dist_set,jj) == MRI_short )
1113 EDIT_misfit_report( DSET_FILECODE(*dist_set) , jj ,
1114 nvox , factor ,
1115 (short*)DSET_BRICK_ARRAY(*dist_set,jj) , distvec) ;
1116 if (oc.verb) {
1117 ININFO_message("Subbrick factor for %d is %f\n ",
1118 jj, factor);
1119 }
1120 sprintf(lll,"Dist.%c.Sig.%02d", oc.distmetric, jj);
1121 EDIT_dset_items (*dist_set, ADN_brick_label_one + jj, lll, ADN_none); EDIT_BRICK_FACTOR (*dist_set, jj, factor);
1122
1123 }
1124
1125 if (oc.verb) {
1126 ININFO_message("Have %d/%d voxels to process "
1127 "with %d dimensions per voxel.\n",
1128 nvox, DSET_NVOX(in_set), ncol);
1129 }
1130
1131
1132 if (oc.verb) {
1133 ININFO_message("Freedom");
1134 }
1135
1136 free(weight); weight = NULL;
1137 free(distvec); distvec=NULL;
1138 free(asigvec[0]); asigvec[0]=NULL;
1139 free(asigvec); asigvec=NULL;
1140 for (jj=0; jj<nsigs; ++jj) free(afsig[jj]); free(afsig);
1141
1142 RETURN(1);
1143 }
1144
1145 /*! This function is a wrapper for thd_Acluster and deals with
1146 one input dset only */
thd_Acluster1(THD_3dim_dataset * in_set,byte * mask,int nmask,THD_3dim_dataset ** clust_set,THD_3dim_dataset ** dist_set,THD_3dim_dataset * clust_init,OPT_KMEANS oc)1147 int thd_Acluster1 ( THD_3dim_dataset *in_set,
1148 byte *mask, int nmask,
1149 THD_3dim_dataset **clust_set,
1150 THD_3dim_dataset **dist_set,
1151 THD_3dim_dataset *clust_init,
1152 OPT_KMEANS oc)
1153
1154 {
1155 static char FuncName[]={"thd_Acluster1"};
1156 float **D=NULL;
1157 int ncol=0, ii, nl, nc, ret=0;
1158 float *dvec=NULL;
1159
1160 ENTRY("thd_Acluster1");
1161
1162 ncol = DSET_NVALS(in_set);
1163 if (!nmask) {
1164 ERROR_message("No voxels in mask");
1165 RETURN(0);
1166 }
1167 /* allocate for D */
1168 D = (float **)calloc(sizeof(float*), nmask);
1169 for (ii=0;ii<(nmask);++ii) {
1170 if (!(D[ii] = (float *)calloc(sizeof(float), ncol))) {
1171 fprintf(stderr,"ERROR: Failed while allocating %dx%d float matrix\n",
1172 nmask, ncol);
1173 RETURN(0);
1174 }
1175 }
1176
1177 dvec = (float * )malloc(sizeof(float)*ncol) ;
1178 if (oc.verb) {
1179 ININFO_message("Filling %d cols of D(%dx%d) (mask=%p).\n",
1180 ncol, nmask, ncol, mask);
1181 }
1182 ii = 0;
1183 for (nl=0; nl<DSET_NVOX(in_set); ++nl) {
1184 if (!mask || mask[nl]) {
1185 THD_extract_array( nl , in_set , 0 , dvec ) ;
1186 for (nc=0; nc<ncol; ++nc) D[ii][nc] = dvec[nc];
1187 ++ii;
1188 }
1189 }
1190 free(dvec); dvec = NULL;
1191
1192
1193 /* Now call clustering function */
1194 if (!(ret=thd_Acluster ( in_set,
1195 mask, nmask,
1196 clust_set,
1197 dist_set ,
1198 clust_init,
1199 oc, D, ncol))) {
1200 ERROR_message("Failed in thd_Acluster");
1201 }
1202
1203 /* freedom */
1204 if (D) {
1205 for (ii=0; ii<nmask; ++ii) if (D[ii]) free(D[ii]);
1206 free(D); D = NULL;
1207 }
1208
1209 RETURN(ret);
1210 }
1211
1212 /*!
1213 Andrej: Put some help like for function thd_polyfit
1214 You can form the data array Dp outside of this function,
1215 so if Dp is not NULL, then only header structure of in_set
1216 is used. If Dp is NULL then it is formed inside the function
1217 from the contents of in_set
1218 D_ncol is used to pass the number of columns in Dp. Note that
1219 D_ncol may be different from the number of sub-bricks in in_set
1220 because when Dp is filled externally, in_set can be one of the volumes
1221 used to form Dp.
1222 */
thd_Acluster(THD_3dim_dataset * in_set,byte * mask,int nmask,THD_3dim_dataset ** clust_set,THD_3dim_dataset ** dist_set,THD_3dim_dataset * clust_init,OPT_KMEANS oc,float ** Dp,int D_ncol)1223 int thd_Acluster ( THD_3dim_dataset *in_set,
1224 byte *mask, int nmask,
1225 THD_3dim_dataset **clust_set,
1226 THD_3dim_dataset **dist_set,
1227 THD_3dim_dataset *clust_init,
1228 OPT_KMEANS oc,
1229 float **Dp, int D_ncol)
1230 {
1231 int ii, nl, nc, j;
1232 float **D=NULL, **distmatrix=NULL; /* this double business is a waste of
1233 memory, at least for D..*/
1234 int ncol = -1;
1235 float *dvec=NULL;
1236 float factor = 0.0;
1237 int* clusterid = NULL;
1238 short *sc = NULL;
1239 float *fc = NULL;
1240 float** vcdata = NULL;
1241 int nvc; /*this will be for number of columns in vcdata matrix*/
1242 int *vals = NULL, *vmap=NULL, N_vmap=0;
1243 char *label_table=NULL;
1244 char lll[25]={"buffer"};
1245
1246
1247 ENTRY("thd_Acluster");
1248
1249 if (!clust_set) {
1250 fprintf(stderr,
1251 "ERROR: Bad input\n");
1252 RETURN(0);
1253 }
1254
1255 nvc = oc.k+2;
1256
1257 if (*clust_set) { /* reuse verify match */
1258 if (DSET_NVOX(in_set) != DSET_NVOX(*clust_set) ||
1259 DSET_NX(in_set) != DSET_NX(*clust_set) ||
1260 DSET_NY(in_set) != DSET_NY(*clust_set) ||
1261 DSET_NZ(in_set) != DSET_NZ(*clust_set)) {
1262 ERROR_message("mismatch in *clust_set grid");
1263 RETURN(0);
1264 }
1265 } else { /* make new one */
1266 *clust_set = EDIT_empty_copy(in_set ) ;
1267 EDIT_dset_items( *clust_set ,
1268 ADN_nvals , 1 ,
1269 ADN_ntt , 1 ,
1270 ADN_datum_all , MRI_short ,
1271 ADN_brick_fac , NULL ,
1272 ADN_prefix , "OML!" ,
1273 ADN_none ) ;
1274 sc = (short *)calloc(sizeof(short),DSET_NVOX(in_set));
1275 EDIT_substitute_brick( *clust_set , 0 , MRI_short , sc ) ;
1276 }
1277
1278 if (dist_set) {
1279 if (*dist_set) { /* reuse verify match */
1280 if (DSET_NVOX(in_set) != DSET_NVOX(*dist_set) ||
1281 DSET_NX(in_set) != DSET_NX(*dist_set) ||
1282 DSET_NY(in_set) != DSET_NY(*dist_set) ||
1283 DSET_NZ(in_set) != DSET_NZ(*dist_set) ||
1284 DSET_NVALS(*dist_set) != nvc ) {
1285 ERROR_message("mismatch in *dist_set grid");
1286 RETURN(0);
1287 }
1288 } else { /* make new one */
1289 *dist_set = EDIT_empty_copy(in_set) ;
1290 EDIT_dset_items( *dist_set ,
1291 ADN_nvals , nvc ,
1292 ADN_ntt , nvc ,
1293 ADN_datum_all , MRI_short ,
1294 ADN_brick_fac , NULL ,
1295 ADN_prefix , "vcd" ,
1296 ADN_none ) ;
1297 for( j=0 ; j < nvc ; j++ ) /* create empty bricks to be filled below */
1298 EDIT_substitute_brick( *dist_set , j , MRI_short , NULL ) ;
1299 }
1300 }
1301
1302
1303 if (!mask) nmask = DSET_NVOX(in_set);
1304
1305 if (!Dp) {
1306 ncol = DSET_NVALS(in_set);
1307 if (ncol < DSET_NUM_TIMES(in_set)) ncol = DSET_NUM_TIMES(in_set);
1308
1309 /* make sure in_set is loaded */
1310 DSET_load(in_set);
1311
1312 /* Create data matrix */
1313 D = (float **)calloc(sizeof(float*), nmask);
1314 for (ii=0;ii<(nmask);++ii) {
1315 if (!(D[ii] = (float *)calloc(sizeof(float), ncol))) {
1316 fprintf(stderr,"ERROR: Failed while allocating %dx%d float matrix\n",
1317 nmask, ncol);
1318 RETURN(0);
1319 }
1320 }
1321
1322
1323 dvec = (float * )malloc(sizeof(float)*ncol) ; /* array to hold series */
1324 if (oc.verb) {
1325 ININFO_message("Filling D(%dx%d) (mask=%p).\n", nmask, ncol, mask);
1326 }
1327 ii = 0;
1328 for (nl=0; nl<DSET_NVOX(in_set); ++nl) {
1329 if (!mask || mask[nl]) {
1330 THD_extract_array( nl , in_set , 0 , dvec ) ;
1331 for (nc=0; nc<ncol; ++nc) D[ii][nc] = dvec[nc];
1332 ++ii;
1333 }
1334 }
1335 /* dump voxel values in in_set to free up memory,
1336 but keep header of in_set */
1337 DSET_unload(in_set);
1338 } else {
1339 ncol = D_ncol;
1340 D = Dp;
1341 }
1342
1343 clust_seed(oc.rand_seed);
1344 if (oc.verb) {
1345 ININFO_message("Have %d/%d (%f) voxels to process "
1346 "with %d dimensions per voxel.\n"
1347 "Seed now: %d\n",
1348 nmask, DSET_NVOX(in_set), (float)nmask/DSET_NVOX(in_set),
1349 ncol,
1350 clust_seed(0));
1351 }
1352
1353 /* allocate for answer arrays */
1354 if (!(clusterid = (int *)calloc(sizeof(int), nmask))) {
1355 fprintf(stderr,"ERROR: Failed to allocate for clusterid\n");
1356 RETURN(0);
1357 }
1358
1359 /* initialize clusterid */
1360 if (clust_init) {
1361 if (0 && oc.remap != NONE) { /* allow that. Just initializing does
1362 not mean cluster ids won't change in
1363 unpleasant manners */
1364 ERROR_message("Cannot use -clust_init, along with "
1365 "-remap other than NONE");
1366 RETURN (0);
1367 }
1368 if (oc.verb) {
1369 ININFO_message("Initializing cluster per %s\n",
1370 DSET_PREFIX(clust_init));
1371 }
1372 if (!(vals = THD_unique_rank(clust_init, 0, mask,
1373 "Lamour.1D", &vmap, &N_vmap))) {
1374 ERROR_message("Failed to rank\n");
1375 RETURN (0);
1376 }
1377 if (oc.verb) {
1378 for (ii=0; ii<N_vmap; ++ii) {
1379 fprintf(stderr," vmap[%d]=%d\n", ii, vmap[ii]);
1380 }
1381 }
1382 nl=0;
1383 for (ii=0; ii<DSET_NVOX(clust_init); ++ii) {
1384 if ((!mask || mask[ii])) {
1385 /* -1 is because clusterid starts at 0 */
1386 if (vals[ii]>0) { /* only allow non zero for initialization*/
1387 clusterid[nl] = vals[ii]-1;
1388 } else {
1389 clusterid[nl] = THD_SEG_IRAN(oc.k); /* random assign */
1390 }
1391 if (clusterid[nl]<0) {
1392 ERROR_message( "Negative values!");
1393 RETURN(0);
1394 }
1395 ++nl;
1396 }
1397 }
1398 if (N_vmap-1 != oc.k) {
1399 ERROR_message( "Initializing with a dset of %d clusters\n"
1400 " but asking for %d clusters in return", N_vmap-1, oc.k);
1401 RETURN(0);
1402 }
1403 if (!label_table && clust_init->Label_Dtable) {
1404 label_table = Dtable_to_nimlstring(clust_init->Label_Dtable,
1405 "VALUE_LABEL_DTABLE");
1406 }
1407 }
1408
1409 /* allocate for answer array distance voxel centroid */
1410 if (oc.verb) {
1411 ININFO_message("Allocating for D, %dx%d\n", nmask, nvc);
1412 }
1413 vcdata = (float **)calloc(sizeof(float*), nmask);
1414 for (ii=0;ii<(nmask);++ii) {
1415 if (!(vcdata[ii] = (float *)calloc(sizeof(float), nvc))) {
1416 fprintf(stderr,"ERROR: Failed to allocate for voxel cluster distance\n");
1417 RETURN(0);
1418 }
1419 }
1420
1421 /* now do the clustering */
1422 if (oc.k > 0) {
1423 if (oc.verb) {
1424 ININFO_message("Going to cluster: k=%d, r=%d\n"
1425 "distmetric %c, jobname %s, verb = %d\n",
1426 oc.k, oc.r, oc.distmetric, oc.jobname, oc.verb);
1427 }
1428 segtools_verb(oc.verb);
1429 segtools_writedists(oc.writedists);
1430 example_kmeans( nmask, ncol, D,
1431 oc.k, oc.r, oc.distmetric,
1432 oc.jobname, clusterid, vcdata,
1433 oc.remap);
1434 } else if (oc.kh > 0) {
1435 if (oc.verb) {
1436 ININFO_message("Going to h cluster: kh=%d\n"
1437 "jobname %s\n",
1438 oc.kh, oc.jobname);
1439 }
1440 if ((distmatrix = example_distance_gene(nmask, ncol, D))) {
1441 example_hierarchical( nmask, ncol, D,
1442 oc.jobname, oc.kh,
1443 distmatrix,
1444 clusterid);
1445 /* ZSS: Andrej, this looks like a memory leak,
1446 distmatrix should be freed here.
1447 This free code is not tested yet! */
1448 ININFO_message("The freeing below has not been tested");
1449 for (ii=0; ii<ncol; ++ii) free(distmatrix[ii]);
1450 free(distmatrix); distmatrix = NULL;
1451 } else {
1452 ERROR_message("Failed to create distmatrix");
1453 RETURN(0);
1454 }
1455 } else {
1456 ERROR_message("Bad option selection");
1457 RETURN(0);
1458 }
1459
1460 /* remap clusterid if needed */
1461 if (oc.remap == NONE && vmap) {
1462 if (oc.verb) {
1463 ININFO_message("Remapping output to cluster_init\n");
1464 }
1465
1466 for (ii=0; ii<nmask; ++ii) {
1467 clusterid[ii] = vmap[clusterid[ii]+1];
1468 }
1469 } else { /* just add 1 because cluster ids start at 0 */
1470 for (ii=0; ii<nmask; ++ii) clusterid[ii] += 1;
1471 }
1472
1473 /* create output datasets, if required*/
1474 if (oc.verb) {
1475 ININFO_message("loading results into %s\n",
1476 DSET_PREFIX(*clust_set));
1477 }
1478
1479 /* transfer ints in clusterid to shorts array */
1480 sc = (short *)DSET_ARRAY(*clust_set,0);
1481 ii = 0;
1482 for (nl=0; nl<DSET_NVOX(in_set); ++nl) {
1483 if (!mask || mask[nl]) {
1484 sc[nl] = (short)clusterid[ii];
1485 ++ii;
1486 } else {
1487 sc[nl] = 0;
1488 }
1489 }
1490 free(clusterid); clusterid = NULL;
1491 sc = NULL;
1492
1493 /* add the labeltable if it exists */
1494 if (label_table) {
1495 if (oc.verb) {
1496 ININFO_message("Adding labeltable from initializing dset ");
1497 }
1498 (*clust_set)->Label_Dtable = Dtable_from_nimlstring(label_table);
1499 if (!(*clust_set)->Label_Dtable) {
1500 ERROR_message("Failed to create Label_Dtable");
1501 RETURN(0);
1502 }
1503 /* and stick it in the header. Label_Dtable is not preserved
1504 by dset writing function */
1505 THD_set_string_atr( (*clust_set)->dblk ,
1506 "VALUE_LABEL_DTABLE" , label_table) ;
1507
1508 free(label_table); label_table=NULL;
1509 }
1510
1511 if (oc.user_labeltable) {
1512 Dtable *vl_dtable=NULL ;
1513
1514 if (oc.verb) {
1515 ININFO_message("Applying labeltable from %s", oc.user_labeltable) ;
1516 }
1517 if ((*clust_set)->Label_Dtable) {
1518 destroy_Dtable((*clust_set)->Label_Dtable);
1519 (*clust_set)->Label_Dtable=NULL;
1520 }
1521 /* read the table */
1522 if (!(label_table = AFNI_suck_file( oc.user_labeltable))) {
1523 ERROR_message("Failed to read %s", oc.user_labeltable);
1524 RETURN(0);
1525 }
1526 if (!(vl_dtable = Dtable_from_nimlstring(label_table))) {
1527 ERROR_message("Could not parse labeltable");
1528 RETURN(0);
1529 }
1530 destroy_Dtable(vl_dtable); vl_dtable = NULL;
1531 THD_set_string_atr( (*clust_set)->dblk ,
1532 "VALUE_LABEL_DTABLE" , label_table ) ;
1533 free(label_table); label_table = NULL;
1534 }
1535
1536 if (!THD_find_atr( (*clust_set)->dblk , "VALUE_LABEL_DTABLE")) {
1537 /* Still no blasted labeltable */
1538 Dtable *vl_dtable=new_Dtable(5);
1539 char slab[256], sval[64], skmet[64];
1540 int nclusters=0;
1541 if (oc.verb) ININFO_message("Creating new labeltable") ;
1542 if (oc.k > 0) {
1543 nclusters = oc.k;
1544 sprintf(skmet, "kclust");
1545 } else {
1546 nclusters = oc.kh;
1547 sprintf(skmet, "hclust");
1548 }
1549 for (ii=0; ii<nclusters; ++ii) {
1550 if (!oc.nclabels) {
1551 if (vmap) {
1552 sprintf(sval,"%d", vmap[ii+1]);
1553 sprintf(slab,"%s%d",skmet, vmap[ii+1]);
1554 } else {
1555 sprintf(sval,"%d", ii+1);
1556 sprintf(slab,"%s%d",skmet, ii+1);
1557 }
1558 addto_Dtable( sval , slab , vl_dtable ) ;
1559 } else {
1560 if (vmap) {
1561 sprintf(sval,"%d", vmap[ii+1]);
1562 } else {
1563 sprintf(sval,"%d", ii+1);
1564 }
1565 snprintf(slab,128,"%s",oc.clabels[ii]);
1566 if (findin_Dtable_b( slab , vl_dtable )) {
1567 ERROR_message("Label %s already used.\n"
1568 " No labeltable will be added\n", slab);
1569 RETURN(0);
1570 }
1571 addto_Dtable( sval , slab , vl_dtable ) ;
1572 }
1573 }
1574 label_table = Dtable_to_nimlstring(vl_dtable, "VALUE_LABEL_DTABLE");
1575 destroy_Dtable(vl_dtable); vl_dtable = NULL;
1576 THD_set_string_atr( (*clust_set)->dblk ,
1577 "VALUE_LABEL_DTABLE" , label_table ) ;
1578 free(label_table); label_table = NULL;
1579 }
1580
1581 /* prepare output */
1582 if (dist_set) {
1583 ININFO_message("loading results into %s\n",
1584 DSET_PREFIX(*dist_set));
1585
1586 for (j = 0; j < nvc; j++) {
1587 ININFO_message("...%d,", j);
1588
1589 /* transfer data in vcdata to full float array */
1590 fc = (float *)calloc(sizeof(float),DSET_NVOX(in_set));
1591 ii = 0;
1592 for (nl=0; nl<DSET_NVOX(in_set); ++nl) {
1593 if (!mask || mask[nl]) {
1594 fc[nl] = (float)vcdata[ii][j];
1595 ++ii;
1596 }
1597 }
1598 factor = EDIT_coerce_autoscale_new(DSET_NVOX(in_set),
1599 MRI_float, fc,
1600 MRI_short,
1601 (short*)DSET_BRICK_ARRAY(*dist_set,j));
1602 if (factor < EPSILON) factor = 0.0;
1603 else factor = 1.0 / factor;
1604 if( DSET_BRICK_TYPE(*dist_set,j) == MRI_short )
1605 EDIT_misfit_report( DSET_FILECODE(*dist_set) , j ,
1606 DSET_NVOX(in_set) , factor ,
1607 (short*)DSET_BRICK_ARRAY(*dist_set,j), fc) ;
1608 free(fc); fc = NULL;
1609 if (oc.verb) {
1610 ININFO_message("Subbrick factor for %d is %f\n ",
1611 j, factor);
1612 }
1613
1614 /* label bricks */
1615 if (j==0)
1616 EDIT_dset_items (*dist_set, ADN_brick_label_one + j, "Dc", ADN_none);
1617 else if (j < nvc -1) {
1618 sprintf(lll,"Dc%02d", j-1);
1619 EDIT_dset_items (*dist_set, ADN_brick_label_one + j, lll, ADN_none);
1620 } else {
1621 EDIT_dset_items (*dist_set, ADN_brick_label_one + j,
1622 "Dc_norm", ADN_none);
1623 }
1624 EDIT_BRICK_FACTOR (*dist_set, j, factor);
1625 }
1626 }
1627
1628 for (ii=0;ii<(nmask);++ii) {
1629 free(vcdata[ii]);
1630 }
1631 free(vcdata); vcdata = NULL;
1632
1633 if (oc.verb) {
1634 ININFO_message("Freedom");
1635 }
1636
1637 if (dvec) free(dvec); dvec=NULL;
1638
1639
1640 if (D != Dp) {
1641 // To free D
1642 for (ii=0;ii<nmask;++ii) {
1643 if (D[ii]) free(D[ii]);
1644 }
1645 free(D); D = NULL;
1646 }else D = NULL;
1647
1648
1649 RETURN(1);
1650 }
1651
1652