1 #include <stdio.h>
2 #include <gsl/gsl_multifit.h>
3 
4 #include "../mrilib.h"
5 #include "cluster_floatNOMASK.h"
6 #include "thd_segtools_fNM.h"
7 
8 static int verb=0;
9 static int writedists=0;
segtools_verb(int ii)10 void segtools_verb( int ii ){ verb = ii ; if (verb > 1) writedists = 1;}
segtools_writedists(int ii)11 void segtools_writedists (int ii) {
12    /* The files you get with writedists
13       .kgg.1D : Cluster assignments
14       .dis.1D : Distance between clusters
15       .cen.1D : Cluster centroids
16       .info1.1D: Within cluster sum of distances
17       .info2.1D: Maximum distance within each cluster
18       .vcd.1D: Voxel to centroids distnaces
19    */
20    writedists = ii;
21 }
22 
new_kmeans_oc(void)23 OPT_KMEANS new_kmeans_oc(void)
24 {
25    OPT_KMEANS oc;
26    int i;
27    memset(&(oc), 0, sizeof(OPT_KMEANS));
28 
29    oc.r = 1;
30    oc.k = 0;
31    oc.kh = 0;
32    oc.jobname = NULL;
33    oc.distmetric = 'u';
34    oc.verb = 0;
35    oc.writedists = 0;
36    oc.rand_seed = 1234567;
37    oc.remap = NONE;
38    oc.user_labeltable=NULL;
39    for (i=0; i<400; ++i) oc.clabels[i] = NULL;
40    oc.nclabels=0;
41    oc.voxdebug[0]=oc.voxdebug[1]=oc.voxdebug[2]=oc.voxdebug[3]=-1;
42    return(oc);
43 }
44 
45 /*!
46    \brief out_set = thd_polyfit( in_set,
47                                  mask, polorder,
48                                  prefix, verb);
49           fits a polynomial model of order polorder to
50           the time series of voxels in in_set
51    \param in_set (THD_3dim_dataset* ) An AFNI dset pointer to input data
52    \param mask (byte *)  if mask is not NULL then voxel i will be processed
53                          if mask[i] != 0.
54                          if mask is NULL then all voxels are processed.
55    \param polorder (int) polynomial order
56    \param prefix (char *) prefix of output dset
57    \param verb (int) verbosity flag
58    \return out_set (THD_3dim_dataset* ) Dset containing polynomial fits.
59 */
thd_polyfit(THD_3dim_dataset * in_set,byte * mask,int polorder,char * prefix,int verb)60 THD_3dim_dataset *thd_polyfit(THD_3dim_dataset *in_set,
61                               byte *mask, int polorder,
62                               char *prefix, int verb)
63 {
64    int   i=0, j=0, nl=0, k=0,
65          posi=0, posj=0, posk=0, nrow=0,
66          ncol = 0;
67    double xi=0.0, yi=0.0, yy=0.0, ei=0.0, sumsq=0.0,  med=0.0;
68    gsl_matrix *X=NULL, *cov=NULL;
69    gsl_vector *y=NULL, *w=NULL, *c=NULL;
70    MRI_IMAGE *im = NULL;
71    THD_3dim_dataset *out_set=NULL;
72    double *dar = NULL;
73    float *cbuf=NULL;
74    float *dvec = NULL;
75    gsl_multifit_linear_workspace *work=NULL;
76 
77    ENTRY("thd_polyfit");
78 
79    /* prepare output */
80    out_set = EDIT_empty_copy(in_set) ;
81    EDIT_dset_items(  out_set ,
82                      ADN_nvals     , polorder           ,
83                      ADN_ntt       , polorder          ,
84                      ADN_datum_all , MRI_float      ,
85                      ADN_brick_fac , NULL           ,
86                      ADN_prefix    , prefix ? prefix : "OMG!"   ,
87                      ADN_none ) ;
88 
89    for( j=0 ; j < polorder ; j++ ) /* create empty bricks to be filled below */
90       EDIT_substitute_brick( out_set , j , MRI_float , NULL ) ;
91 
92 
93    /* do the fitting */
94    if (verb) fprintf (stderr,"Now fitting...\n");
95 
96    ncol = DSET_NVALS(in_set);
97    nrow = DSET_NVOX(in_set);
98 
99    X = gsl_matrix_alloc (ncol, polorder);
100    y = gsl_vector_alloc (ncol);
101 
102    c = gsl_vector_alloc (polorder);
103    cov = gsl_matrix_alloc (polorder, polorder);
104 
105    for (i = 0; i < ncol; i++)  {
106       xi = i+1;
107       gsl_matrix_set (X, i, 0, 1.0);
108       gsl_matrix_set (X, i, 1, xi);
109       gsl_matrix_set (X, i, 2, xi*xi);
110 
111       gsl_matrix_set (X, i, 3, xi*xi*xi);
112       gsl_matrix_set (X, i, 4, xi*xi*xi*xi);
113       //    printf ("%lg ",xi);
114    }
115 
116 
117     /*make header
118       printf ("matrvola\n"); */
119     if (verb > 1)
120       fprintf(stdout, "#%s_0\t%s_1\t%s_2\t%s_3\t%s_4\n",
121                     DSET_PREFIX(in_set),DSET_PREFIX(in_set),
122                     DSET_PREFIX(in_set),DSET_PREFIX(in_set),
123                     DSET_PREFIX(in_set));
124 
125     /* go by lines - signatures
126        pre-allocate, I think this should be just fine,
127        there should be no need to reinitialize work
128        all the time */
129     work = gsl_multifit_linear_alloc (ncol, polorder);
130 
131     dvec = (float * )malloc(sizeof(float)*ncol) ;  /* array to hold signature */
132     cbuf = (float *)malloc(sizeof(float)*polorder) ;
133                               /* array to hold fit */
134     for (nl=0; nl<nrow; ++nl) {
135       if (!mask || mask[nl]) {
136          posi = -1;
137          posj = -1;
138          posk = -1;
139 
140          THD_extract_array( nl , in_set , 0 , dvec ) ;
141                                     /*get signature from voxel */
142 
143          for (k = 0; k < ncol; k++) {
144             gsl_vector_set (y, k, dvec[k]);
145          }
146 
147          gsl_multifit_linear (X, y, c, cov,
148                               &sumsq, work);
149 
150          /* printf ( "\n # best fit: Y = %g + %g X + %g X^2 +%g X^3 + %g X^4\n",
151                      C(0), C(1), C(2), C(3), C(4));
152             printf ("# sumsq = %g\n", sumsq); */
153 
154          for (i=0;i<polorder;++i) cbuf[i] = (float)C(i);
155          THD_insert_series( nl , out_set , polorder , MRI_float , cbuf , 1 ) ;
156                                        /* stick result in output */
157          if (verb > 1)
158             fprintf (stdout,  "%11g\t%11g\t%11g\t%11g\t%11g\n",
159                            C(0), C(1), C(2), C(3), C(4));
160 
161          /*
162          printf ("# covariance matrix:\n");
163          printf ("[ %+.5e, %+.5e, %+.5e  \n",
164                  COV(0,0), COV(0,1), COV(0,2));
165          printf ("  %+.5e, %+.5e, %+.5e  \n",
166                  COV(1,0), COV(1,1), COV(1,2));
167          printf ("  %+.5e, %+.5e, %+.5e ]\n",
168                  COV(2,0), COV(2,1), COV(2,2));
169           printf ("# chisq = %g\n", chisq);
170          */
171 
172       }
173    }
174    gsl_multifit_linear_free (work); work = NULL;
175    free(dvec); dvec = NULL;
176    free(cbuf); cbuf = NULL;
177    gsl_vector_free (y);
178    gsl_vector_free (c);
179    gsl_matrix_free (cov);
180    gsl_matrix_free (X);
181    //gsl_vector_free (w);
182    free(dvec); dvec = NULL;
183 
184    RETURN(out_set);
185 }
186 
187 
188 /**********************************************************************
189    BEGIN: functions based on command.c code from The C clustering library.
190    ANDREJ: If we end up using this, we must include copyright notice
191    in AFNI prominently
192 **********************************************************************/
193 
clusterlib_display_version(void)194 void clusterlib_display_version(void)
195 { printf ("\n"
196 "Clustering for segmentation, command line version ,\n"
197 "using the C Clustering Library version 1.38.\n"
198 "\n"
199 "Cluster was originally written by Michael Eisen (eisen 'AT' rana.lbl.gov)\n"
200 "Copyright 1998-99 Stanford University.\n");
201   printf ("\n"
202 "The command line version of Clustering version was created by Andrej Vovk\n"
203 "and will be surely corrected by Ziad Saad ;),\n"
204 "\n");
205   return;
206 }
207 
clusterlib_setjobname(const char * basename,int strip)208 char* clusterlib_setjobname(const char* basename, int strip)
209 { char* jobname=NULL; char* extension=NULL;
210   int n = strlen(basename);
211   if (strip)
212   { extension = strrchr(basename, '.');
213     if (extension) n -= strlen(extension);
214   }
215   jobname = malloc((n+1)*sizeof(char));
216   strncpy (jobname, basename, n);
217   jobname[n] = '\0';
218 
219   if (strip) { /* ZSS, also remove +orig? */
220    n = strlen(jobname);
221    extension = strstr(jobname,"+orig");
222    if (!extension) extension = strstr(jobname,"+acpc");
223    if (!extension) extension = strstr(jobname,"+tlrc");
224    if (extension) n -= strlen(extension);
225    jobname[n] = '\0';
226   }
227   return jobname;
228 }
229 
clusterlib_readnumber(const char word[])230 int clusterlib_readnumber(const char word[])
231 { char* error = 0;
232   long value = 0;
233   if (word[0] == '0' && word[1] != '\0') ++word;
234   value = strtol(word,&error,0);
235   if (*error=='\0') return (int)value;
236   else return -1;
237 }
238 
clusterlib_getmetric(int i)239 char clusterlib_getmetric(int i)
240 { switch (i)
241   { case 1: return 'u';
242     case 2: return 'c';
243     case 3: return 'x';
244     case 4: return 'a';
245     case 5: return 's';
246     case 6: return 'k';
247     case 7: return 'e';
248     case 8: return 'b';
249     default: return '\0';
250   }
251   /* Never get here */
252   return '\0';
253 }
254 
255 /* ========================================================================= */
256 
257 
258 
example_distance_gene(int nrows,int ncols,float ** data)259 float** example_distance_gene(int nrows, int ncols, float** data)
260 /* Calculate the distance matrix between genes using the Euclidean distance. */
261 { int i, j, ii, nl, nc;
262   float** distMatrix;
263   float* weight = malloc(ncols*sizeof(float));
264 
265 
266 
267   printf("============ Euclidean distance matrix between genes ============\n");
268   for (i = 0; i < ncols; i++) weight[i] = 1.0;
269   distMatrix = distancematrix(nrows, ncols, data, weight, 'e', 0);
270          /* ANDREJ: The compiler gave me a warning that:
271          thd_segtools.c: In function 'example_distance_gene':
272          thd_segtools.c:234: warning: assignment makes pointer from integer
273                                     without a cast
274          This led me to realize that the compiler did not have the prototype
275          of this function before reaching this point and assumed the returned
276          value was an int to be typecast to float **.
277          But int may not be enough to hold a pointer anymore
278 
279          In any case, there just is not enough memory for this operation here
280          so we'll have to consider whether this is useful enough to pursue
281          in different ways. */
282 
283   if (!distMatrix)
284   { printf ("Insufficient memory to store the distance matrix\n");
285     free(weight);
286     return NULL;
287   }
288   /*printf("   Gene:");
289   for(i=0; i<nrows-1; i++) printf("%6d", i);
290   printf("\n");
291   for(i=0; i<nrows; i++)
292      { printf("Gene %2d:",i);
293     for(j=0; j<i; j++) printf(" %5.2f",distMatrix[i][j]);
294     printf("\n");
295   }
296   printf("\n");
297     */
298 
299 
300 
301   free(weight);
302   return distMatrix;
303 }
304 
305 
306 /* ========================================================================= */
307 
example_hierarchical(int nrows,int ncols,float ** data,char * jobname,int k,float ** distmatrix,int * clusterid)308 void example_hierarchical( int nrows, int ncols,
309                            float** data,
310                            char* jobname,
311                            int k, float** distmatrix,
312                            int *clusterid)
313 /* Perform hierarchical clustering ... , float** distmatrix */
314 { int i, ii, nl, nc;
315   const int nnodes = nrows-1;
316   float* weight = malloc(ncols*sizeof(float));
317   Node* tree;
318 
319   char* filename;
320   //char* filename2;
321   FILE *out1; int n;
322 
323 
324   for (i = 0; i < ncols; i++) weight[i] = 1.0;
325   printf("\n");
326 
327 
328 
329   n = 1 + strlen(jobname) + strlen("_C") + strlen(".ext");
330 
331   if (k)
332     { int dummy = k;
333       do n++; while (dummy/=10);
334     }
335 
336 
337   filename = malloc(n*sizeof(char));
338 
339   sprintf (filename, "%s_C%d.hie", jobname, k);
340   out1 = fopen( filename, "w" );
341 
342   /*FILE *out2;
343   filename2 = malloc(n*sizeof(char));
344 
345   sprintf (filename2, "%s_C%d.hi1", jobname, k);
346   out2 = fopen( filename2, "w" );*/
347 
348   //HERE SHOULD USE method instead of 'xxx' (s,m,a,c)
349 
350 
351   printf("================ Pairwise single linkage clustering ============\n");
352   /* Since we have the distance matrix here, we may as well use it. */
353   tree = treecluster(nrows, ncols, 0, 0, 0, 'e', 's', distmatrix);
354   /* The distance matrix was modified by treecluster, so we cannot use it any
355    * more. But we still need to deallocate it here.
356    * The first row of distmatrix is a single null pointer; no need to free it.
357    */
358   for (i = 1; i < nrows; i++) free(distmatrix[i]);
359   free(distmatrix);
360   if (!tree)
361   { /* Indication that the treecluster routine failed */
362     printf ("treecluster routine failed due to insufficient memory\n");
363     free(weight);
364     return;
365   }
366 
367   #if 0
368    /* Andrej: This block looked like it was commented out
369     I took out some of the * / because they
370     were generating warning and blocked out the
371     entire section with #if 0 .
372     The compiler will not compile this section */
373 
374   fprintf(out2,"Node     Item 1   Item 2    Distance\n");
375   for(i=0; i<nnodes; i++)
376     fprintf(out2,"%3d:%9d%9d      %g\n",
377            -i-1, tree[i].left, tree[i].right, tree[i].distance);
378 	   printf("\n");
379 	   fclose(out2);
380   //free(tree);
381 
382 
383   printf("================ Pairwise maximum linkage clustering ============\n");
384   tree = treecluster(nrows, ncols, data, weight, 0, 'e', 'm', 0);
385   /* Here, we let treecluster calculate the distance matrix for us. In that
386    * case, the treecluster routine may fail due to insufficient memory to store
387    * the distance matrix. For the small data sets in this example, that is
388    * unlikely to occur though. Let's check for it anyway:
389    */
390   if (!tree)
391   { /* Indication that the treecluster routine failed */
392     printf ("treecluster routine failed due to insufficient memory\n");
393     free(weight);
394     return;
395   }
396   printf("Node     Item 1   Item 2    Distance\n");
397   for(i=0; i<nnodes; i++)
398     printf("%3d:%9d%9d      %g\n",
399            -i-1, tree[i].left, tree[i].right, tree[i].distance);
400   printf("\n");
401   free(tree);
402 
403 
404 
405   printf("================ Pairwise average linkage clustering ============\n");
406   tree = treecluster(nrows, ncols, data, weight, 0, 'e', 'a', 0);
407   if (!tree)
408   { /* Indication that the treecluster routine failed */
409     printf ("treecluster routine failed due to insufficient memory\n");
410     free(weight);
411     return;
412   }
413   printf("Node     Item 1   Item 2    Distance\n");
414   for(i=0; i<nnodes; i++)
415     printf("%3d:%9d%9d      %g\n",
416            -i-1, tree[i].left, tree[i].right, tree[i].distance);
417   printf("\n");
418   free(tree);
419 
420 
421 
422   printf("================ Pairwise centroid linkage clustering ===========\n");
423   tree = treecluster(nrows, ncols, data, weight, 0, 'e', 'c', 0);
424   if (!tree)
425   { /* Indication that the treecluster routine failed */
426     printf ("treecluster routine failed due to insufficient memory\n");
427     free(weight);
428     return;
429   }
430   printf("Node     Item 1   Item 2    Distance\n");
431   for(i=0; i<nnodes; i++)
432     printf("%3d:%9d%9d      %g\n",
433            -i-1, tree[i].left, tree[i].right, tree[i].distance);
434   printf("\n");
435 
436   #endif
437 
438 
439 
440   printf("=============== Cutting a hierarchical clustering tree ==========\n");
441   clusterid = malloc(nrows*sizeof(int));
442   printf(" number of clusters %d \n",k);
443   cuttree (nrows, tree, k, clusterid);
444   for(i=0; i<nrows; i++)
445   fprintf(out1, "%09d\t%2d\n", i, clusterid[i]);
446   fprintf(out1, "\n");
447   fclose(out1);
448 
449 
450    free(tree);
451   free(weight);
452   return;
453 }
454 
455 /* ========================================================================= */
456 
getvoxlclusterdist(int * count,float ** cdata,int * clusterid,float ** data,char * jobname,int nclusters,int nrows,int ncols,float ** vcdata,char dist)457 void getvoxlclusterdist(int* count, float** cdata,
458 			int* clusterid, float** data, char* jobname,
459 			int nclusters, int nrows, int ncols, float** vcdata,
460          char dist)
461 {
462    int i, j, n;
463    char* filename4;
464    FILE *out4=NULL;
465    FILE *out5=NULL;
466    float difference, difference1;
467    float* max_vcdata = NULL;
468    int dummy = 0;
469    char* filename5;
470    float distance = 0.0;
471    float (*metric)
472       (int, float**, float**, const float[], int, int, int) =
473          setmetric(dist);
474    float *weight=NULL;
475 
476 
477    ENTRY("getvoxlclusterdist");
478    /* allocate for answer arrays */
479    if (!(max_vcdata = (float *)calloc(sizeof(float), nclusters))) {
480       fprintf(stderr,"ERROR: Failed to allocate for max_vcdata\n");
481       EXRETURN;
482    }
483 
484 
485   n = 512 + strlen(jobname) + strlen("_K_G") + strlen(".ext");
486 
487   dummy = nclusters;
488   do n++; while (dummy/=10);
489 
490   /*avovk, june 09, changed output names*/
491 
492   filename4 = malloc(n*sizeof(char));
493   sprintf (filename4, "%s_K%d_Gx.vcd.1D", jobname, nclusters);
494   if (writedists) { out4 = fopen( filename4, "w" ); }
495 
496   filename5 = malloc((n+2)*sizeof(char));
497   sprintf (filename5, "%s_K%d_Gx.info2.1D", jobname, nclusters);
498   if (writedists) {  out5 = fopen( filename5, "w" ); }
499 
500   weight = (float *)calloc(ncols, sizeof(float));
501   for (i = 0; i < ncols; ++i) weight[i] = 1.0;
502 
503   for (i = 0; i < nrows; i++){
504     #if 0      /* ZSS, distances should be same for clustering */
505        difference = 0;
506        difference1 = 0;
507        for (j = 0; j < ncols; j++) {
508          difference1 = cdata[clusterid[i]][j]-data[i][j];
509          difference = difference + difference1*difference1;
510        }
511        vcdata[i][0] = 100/(1+sqrt(difference)); /* if values are close to 0 ?! */
512     #else
513       distance = metric(ncols, cdata, data, weight, clusterid[i], i, 0);
514       vcdata[i][0] = distance;   /* ZSS: Don't know what you want to do here */
515     #endif
516   }
517 
518   /* avovk JULY29_2008, sept 05 */
519   for (i = 0; i < nclusters; i++){
520     max_vcdata[i] = 0;
521   }
522 
523   for (i = 0; i < nrows; i++){
524     if (vcdata[i][0] > max_vcdata[clusterid[i]]) {
525        max_vcdata[clusterid[i]] = vcdata[i][0];
526     }
527   }
528 
529   if (out5) {
530    if (verb)
531       printf ("------- writing  max distances within clusters to file:\t\t"
532               " %s_K_G%d.info2.1D", jobname, nclusters);
533    fprintf(out5, "#max distance within cluster (job %s, %d clusters)\n",
534             jobname, nclusters);
535   }
536   for (i = 0; i < nclusters; i++){
537     if (verb) printf("%7.3f\n",max_vcdata[i]);
538     if (out5) fprintf(out5, "#cluster %d:\n"
539                             "%d   %7.3f\n",i, i, max_vcdata[i]);
540   }
541 
542   for (i = 0; i < nrows; i++){
543     difference = vcdata[i][0];
544     vcdata[i][0] = 100*clusterid[i]+100*difference/(max_vcdata[clusterid[i]]);
545   }
546   /* avovk JULY29_2008, sept 05 */
547 
548   if (out4) {
549      if (verb) printf ("------- writing voxels-centroids distances to file:\t\t"
550              " %s_K_G%d.vcd.1D\n",jobname, nclusters);
551    fprintf (out4, "#distance from voxel to its centroid (job %s, %d clusters)\n",
552             jobname, nclusters);
553    for (i = 0; i < nrows; i++)
554       fprintf (out4, "%09d\t%7.3f\n", i, vcdata[i][0]);
555   }
556   if (out4) fclose(out4); out4=NULL;
557   if (out5) fclose(out5); out5=NULL;
558 
559   EXRETURN;
560 }
561 
562 /* ========================================================================= */
563 
564 
getvoxlclustersdist(int * count,float ** cdata,int * clusterid,float ** data,char * jobname,int nclusters,int nrows,int ncols,float ** vcdata,char dist)565 void getvoxlclustersdist(int* count, float** cdata,
566 			int* clusterid, float** data, char* jobname,
567 			int nclusters, int nrows, int ncols, float **vcdata,
568          char dist)
569 {
570    int i, j, k;
571    float distance = 0.0, *weight=NULL;
572    float (*metric)
573       (int, float**, float**, const float[], int, int, int) =
574          setmetric(dist);
575 
576 
577    weight = (float *)calloc(ncols, sizeof(float));
578    for (i = 0; i < ncols; ++i) weight[i] = 1.0;
579 
580    for (k = 0; k < nclusters; k++){
581       for (i = 0; i < nrows; i++){
582       #if 0 /* ZSS: distances should be same for clustering */
583          difference = 0;
584          difference1 = 0;
585          for (j = 0; j < ncols; j++) {
586             /*ZSS: Did you CHECK THIS LOOP ? ; j is going through fitcoef*/
587       difference1 = cdata[k][j]-data[i][j];
588       difference = difference + difference1*difference1;
589          }
590          vcdata[i][k+1] = 100/(1+sqrt(difference)); /* actually 1/distance */
591       #else
592          distance = metric(ncols, cdata, data, weight, k, i, 0);
593          vcdata[i][k+1] = distance;/* ZSS: Don't know what you want to do here */
594       #endif
595       }   /* some kind of scaled probability */
596    }
597 
598   return;
599 
600 }
601 
602 
603 /* avovkJULY30: it would be nice to have color pallete for cluster probability */
604 
color_palette(int nclusters,char * jobname)605 void color_palette(int nclusters, char* jobname)
606 {
607   int n = 0;
608   int i, j;
609   char* filename=NULL;
610   FILE *out=NULL;
611   float a, c;
612   int nsteps, step, colorv, hexp1, hexp2;
613   char* hexnumbers=NULL;
614   int verb = 0;
615 
616   hexnumbers = (char *)malloc(32*sizeof(char));
617   sprintf (hexnumbers, "0123456789abcdef");
618 
619   n = 512 + strlen(jobname) + strlen(".pal") + 2;
620   filename = (char *)malloc(n*sizeof(char));
621   sprintf (filename, "%s_K%d.pal", jobname,nclusters);
622   /* output file name not good ! */
623   if (!(out = fopen( filename, "w" ))) {
624    fprintf(stderr,"Failed to open %s for writing\n", filename);
625   }
626 
627   c = nclusters;
628 
629   nsteps = 256/nclusters;
630 
631   /*printf("num of color steps per cluster: %d \n",nsteps); */
632 
633   /* now we use those steps to create color fading to/from black*/
634 
635   step = 256/nsteps - 2; /* number of clusters -1, that we don't go to black*/
636 
637   if (nclusters < 3)
638     step = 256/nsteps - 1;
639 
640 
641   fprintf (out, "color_%d_clusters\n",nclusters);
642 
643   /* for ( i = 0; i < nclusters; i++) {*/
644 
645   colorv = 255;
646   for ( j = 0; j < nsteps; j++) {
647     colorv = colorv - step;
648     hexp1 = colorv/16;
649     hexp2 = colorv - hexp1*16;
650 
651     fprintf (out, "#%c%c0000\n", hexnumbers[hexp1], hexnumbers[hexp2]);
652 
653   }
654 
655 
656   if (nclusters > 1) {
657     colorv = 255;
658     for ( j = 0; j < nsteps; j++) {
659       colorv = colorv - step;
660       hexp1 = colorv/16;
661       hexp2 = colorv - hexp1*16;
662 
663       fprintf (out, "#00%c%c00\n", hexnumbers[hexp1], hexnumbers[hexp2]);
664 
665     }
666   }
667 
668   if (nclusters > 2) {
669     colorv = 255;
670     for ( j = 0; j < nsteps; j++) {
671       colorv = colorv - step;
672       hexp1 = colorv/16;
673       hexp2 = colorv - hexp1*16;
674 
675       fprintf (out, "#0000%c%c\n", hexnumbers[hexp1], hexnumbers[hexp2]);
676 
677     }
678   }
679 
680 
681   if (nclusters > 3) {
682     colorv = 255;
683     for ( j = 0; j < nsteps; j++) {
684       colorv = colorv - step;
685       hexp1 = colorv/16;
686       hexp2 = colorv - hexp1*16;
687 
688       fprintf (out, "#%c%c%c%c00\n", hexnumbers[hexp1], hexnumbers[hexp2],
689 	       hexnumbers[hexp1], hexnumbers[hexp2]);
690 
691     }
692   }
693 
694   if (nclusters > 4) {
695     colorv = 255;
696     for ( j = 0; j < nsteps; j++) {
697       colorv = colorv - step;
698       hexp1 = colorv/16;
699       hexp2 = colorv - hexp1*16;
700 
701       fprintf (out, "#00%c%c%c%c\n", hexnumbers[hexp1], hexnumbers[hexp2],
702 	       hexnumbers[hexp1], hexnumbers[hexp2]);
703 
704     }
705   }
706 
707   if (nclusters > 5) {
708     colorv = 255;
709     for ( j = 0; j < nsteps; j++) {
710       colorv = colorv - step;
711       hexp1 = colorv/16;
712       hexp2 = colorv - hexp1*16;
713 
714       fprintf (out, "#%c%c00%c%c\n", hexnumbers[hexp1], hexnumbers[hexp2],
715 	       hexnumbers[hexp1], hexnumbers[hexp2]);
716 
717     }
718   }
719 
720 
721   if (verb) printf ("------- Color palette written to file:\t\t"
722 	  "%s_K%d.pal\n",jobname,nclusters);
723 
724   fclose(out); out=NULL;
725    free(hexnumbers); hexnumbers  = NULL; /* clean up */
726 }
727 
728 /* ========================================================================= */
729 
example_kmeans(int nrows,int ncols,float ** data,int nclusters,int npass,char dist,char * jobname,int * clusterid,float ** vcdata,REMAPS remap)730 void example_kmeans( int nrows, int ncols,
731                      float** data,
732                      int nclusters, int npass,
733                      char dist, char* jobname,
734                      int *clusterid, float **vcdata,
735                      REMAPS remap)
736 /* Perform k-means clustering on genes */
737 {
738    int i, j, ii, nl, nc;
739    //const int nclusters = 3;
740    const int transpose = 0;
741    //const char dist = 'e';
742    const char method = 'a';
743    /* For method=='a', the centroid is defined as the mean over all elements
744      belonging to a cluster for each dimension.
745      For method=='m', the centroid is defined as the median over all elements
746      belonging to a cluster for each dimension.
747    */
748 
749    //int npass = 1;
750    int ifound = 0;
751    int test=0;
752    float error;
753    float distance;
754    int** index=NULL;
755    int* count=NULL;
756    float* weight = malloc(ncols*sizeof(float));
757    float** cdata = malloc(nclusters*sizeof(float*));
758 
759    int n=0;
760    char* filename=NULL;
761    char* filename2=NULL;
762    char* filename3=NULL;
763    char* filename4=NULL;
764    FILE *out1=NULL;
765    FILE *out2=NULL;
766    FILE *out3=NULL;
767    FILE *out4=NULL;
768    float a, b;
769 
770 
771    for (i = 0; i < nclusters; i++)
772    { cdata[i] = malloc(ncols*sizeof(float));
773 
774    }
775    for (i = 0; i < ncols; i++) weight[i] = 1.0;
776 
777 
778    /* ZSS: Andrej, that n was too small. I added 512 to get
779            avoid the problem. */
780    n = 512 + strlen(jobname) + strlen("_K_G") + strlen(".ext");
781 
782    if (dist)
783    { int dummy = nclusters;
784     do n++; while (dummy/=10);
785    }
786 
787    /* avovk */
788    if (verb > 1) printf("a je u omari :) \n");
789    filename = (char *)malloc(n*sizeof(char));
790    filename2 = (char *)malloc(n*sizeof(char));
791    filename3 = (char *)malloc(n*sizeof(char));
792    filename4 = (char *)malloc((n+2)*sizeof(char));
793 
794    /* ZSS:  Put a .1D in the name wherever it is appropriate */
795    sprintf (filename, "%s_K02%d_G%c.kgg.1D", jobname, nclusters,dist);
796    if (writedists) { out1 = fopen( filename, "w" ); }
797 
798    sprintf (filename2, "%s_K02%d_G%c.dis.1D", jobname, nclusters,dist);
799    if (writedists) { out2 = fopen( filename2, "w" ); }
800 
801    sprintf (filename3, "%s_K%02d_G%c.cen.1D", jobname, nclusters,dist);
802    if (writedists) { out3 = fopen( filename3, "w" ); }
803    sprintf (filename4, "%s_K%02d_G%c.info1.1D", jobname, nclusters,dist);
804    if (writedists) { out4 = fopen( filename4, "w" ); }
805 
806    if (verb) {
807      printf("======================== k-means clustering"
808             " ========================\n");
809 
810       printf ("\n");
811       printf ("----- doing %d pass-es... go stretch your legs...\n",npass);
812    }
813 
814    kcluster(   nclusters,
815                nrows,ncols,data,
816                weight,
817                transpose,npass,
818                method,dist,
819                clusterid, &error, &ifound);
820    switch (remap) {
821       case COUNT:
822       case iCOUNT:
823        { double count[nclusters];
824          int *isort=NULL, imap[nclusters];
825 
826          /* count number of voxels */
827          for (i=0; i<nclusters; ++i) count[i]=0;
828          for (i=0; i<nrows; ++i) {
829             ++count[clusterid[i]];
830          }
831          /* sort result */
832          isort = z_idoubleqsort(count, nclusters);
833          for (i=0; i<nclusters; ++i) {
834          if (remap==COUNT) {
835             imap[isort[i]] = nclusters -i -1;
836          } else {
837             imap[isort[i]] = i;
838          }
839          fprintf(stderr,"Remapping cluster %d (count %ld) --> %d\n",
840                   isort[i], (long)count[i], imap[isort[i]]);
841        }
842          for (i=0; i<nrows; ++i) {
843             clusterid[i] = imap[clusterid[i]];
844          }
845          free(isort); isort=NULL;
846        }
847        break;
848       case MAG:
849       case iMAG:
850        { double mg[nclusters];
851          int *isort=NULL, imap[nclusters];
852        if (!getclustercentroids( nclusters, nrows, ncols,
853                                  data, clusterid, cdata,
854                                  0, 'a')) {
855          fprintf(stderr,"Failed to get centroids");
856        }
857        for (i=0; i<nclusters; ++i) {
858          mg[i]=0.0;
859          for (j=0; j<ncols; ++j) {
860             mg[i] += cdata[i][j]*cdata[i][j];
861          }
862          mg[i] = sqrt(mg[i]);
863        }
864        isort = z_idoubleqsort(mg, nclusters);
865        if (verb) fprintf(stderr,"ncols = %d\n", ncols);
866        for (i=0; i<nclusters; ++i) {
867          if (remap==MAG) {
868             imap[isort[i]] = nclusters -i -1;
869          } else {
870             imap[isort[i]] = i;
871          }
872          if (verb) fprintf(stderr,"Remapping cluster %d (mag %f) --> %d\n",
873                   isort[i], mg[i], imap[isort[i]]);
874        }
875 
876          for (i=0; i<nrows; ++i) {
877             clusterid[i] = imap[clusterid[i]];
878          }
879 
880        free(isort);
881        }
882          break;
883       case NONE:
884          break;
885       default:
886          fprintf(stderr,
887             "REMAPPING flag of %d unknown. No remapping done.\n", remap);
888    }
889 
890    if (verb) printf ("Solution found %d times; ", ifound);
891    if (verb) printf ("within-cluster sum of distances is %f\n", error);
892 
893 
894    if (writedists) {
895       fprintf (out4,"#within-cluster sum of distances: %f\n",error);
896       fclose(out4); out4=NULL;
897    }
898 
899    if (writedists) {
900       if (verb) printf ("------- writing Cluster assignments to file:\t\t"
901              " %s_K_G%d.kgg.1D\n",jobname, nclusters);
902       for (i = 0; i < nrows; i++)
903          fprintf (out1, "%09d\t %d\n", i, clusterid[i]);
904       fclose(out1); out1=NULL;
905    }
906 
907    index = (int **)malloc(nclusters*sizeof(int*));
908    count = (int *)malloc(nclusters*sizeof(int));
909    for (i = 0; i < nclusters; i++) count[i] = 0;
910    for (i = 0; i < nrows; i++) count[clusterid[i]]++;
911    for (i = 0; i < nclusters; i++) index[i] = malloc(count[i]*sizeof(int));
912    for (i = 0; i < nclusters; i++) count[i] = 0;
913    for (i = 0; i < nrows; i++){
914       int id = clusterid[i];
915       index[id][count[id]] = i;
916       count[id]++;
917    }
918 
919    if (writedists) {
920       if (verb) printf ("------- writing Distance between clusters to file:\t "
921              "%s_K_G%d.dis.1D \n", jobname, nclusters);
922       fprintf (out2,"#------- Distance between clusters:\n");
923 
924       for (i = 0; i < nclusters-1; i++)
925         {
926           for (j = 1+i; j < nclusters; j++)
927 	    {
928 	      distance = clusterdistance(nrows, ncols, data,
929 				         weight, count[i], count[j], index[i],
930 				         index[j], dist , 'a', 0);
931 
932 	      fprintf(out2,"#Distance between %d and %d: %7.3f\n",
933 		      i, j, distance);
934 
935 	    }
936         }
937 
938       fclose(out2); out2=NULL;
939    }
940 
941 
942    getclustercentroids(nclusters, nrows, ncols, data, clusterid,
943                       cdata, 0, 'a');
944    if (writedists) {
945       if (verb) printf ("------- writing Cluster centroids to file:\t\t"
946              "%s_K_G%d.cen.1D\n",jobname, nclusters);
947       for (i = 0; i < nclusters; i++){
948         /*      fprintf(out3,"Cluster %2d:", i);*/
949          for (j = 0; j < ncols; j++) fprintf(out3,"\t%7.3f", cdata[i][j]);
950          fprintf(out3,"\n");
951       }
952       fclose(out3); out3=NULL;
953 
954       color_palette(nclusters, jobname);
955    }
956    if (verb) printf("Done...\n");
957 
958    /* call function to calculate distance between each voxel and centroid */
959    /* we will need:
960       count - number of elements in cluster as we allready have it
961       cdata - cluster centroids
962       clusterid
963       data */
964 
965       getvoxlclusterdist(count, cdata, clusterid, data, jobname,
966 		      nclusters, nrows, ncols, vcdata, dist);
967       getvoxlclustersdist(count, cdata, clusterid, data, jobname,
968 		      nclusters, nrows, ncols, vcdata, dist);
969 
970 
971       /*might want to make some calculations with vcdata*/
972 
973       /*lets calculate distance to centroid/sum(distance 2 all centroids)*/
974 
975       for (i = 0; i < nrows; i++)
976 	{
977 	  a = vcdata[i][0];
978      b = a+1e10;
979 	  for (j = 0; j < nclusters; j++)
980 	    if (vcdata[i][j+1]>a && vcdata[i][j+1]< b) b = vcdata[i][j+1];
981 	  vcdata[i][nclusters+1] = (1.0-a/b)*100.0;
982 	}
983 
984       /* just a notice: first column is dist to centroid in THE cluster,
985 	 then there are columns that describe distance to centroids from
986          each cluster centroid
987          - and we are adding another column here*/
988 
989 
990    for (i = 0; i < nclusters; i++) free(index[i]);
991    free(index);
992    free(count);
993 
994    for (i = 0; i < nclusters; i++){
995       free(cdata[i]);
996    }
997 
998    free(cdata);
999    free(weight);
1000 
1001    return;
1002 }
1003 
1004 /**********************************************************************
1005    END: functions based on command.c code from The C clustering library.
1006 **********************************************************************/
1007 # define EPSILON             1E-10
thd_Adist(THD_3dim_dataset * in_set,byte * mask,float * sigs,int nsigs,THD_3dim_dataset ** dist_set,OPT_KMEANS oc)1008 int thd_Adist (  THD_3dim_dataset *in_set,
1009                  byte *mask,
1010                  float *sigs, int nsigs,
1011                  THD_3dim_dataset **dist_set,
1012                  OPT_KMEANS oc)
1013 {
1014    int ii, jj, kk;
1015    int ncol = -1, nvox = -1;
1016    float *distvec=NULL, **asigvec=NULL, **afsig=NULL;
1017    char lll[25]={"buffer"};
1018    float *weight = NULL, factor =0.0;
1019    float (*metric)
1020     (int, float**, float**, const float[], int, int, int) =
1021        setmetric(oc.distmetric);
1022 
1023 
1024    ENTRY("thd_Adist");
1025 
1026    nvox = DSET_NVOX(in_set);
1027    ncol = DSET_NVALS(in_set);
1028    if (ncol < DSET_NUM_TIMES(in_set)) ncol = DSET_NUM_TIMES(in_set);
1029 
1030    /* make sure in_set is loaded */
1031    DSET_load(in_set);
1032 
1033 
1034    /* equal weights for all elements of signature */
1035    weight = (float *)calloc(ncol, sizeof(float));
1036    for (ii = 0; ii < ncol; ii++) weight[ii] = 1.0;
1037 
1038    /*array to hold one voxel's signature */
1039    asigvec = (float ** )calloc(1, sizeof(float*));
1040    asigvec[0] = (float *)calloc(ncol, sizeof(float)) ;
1041 
1042    /* array to hold signatures in a way pleasing to metric */
1043    afsig = (float **)calloc(nsigs, sizeof(float*));
1044    ii=0;
1045    for (jj=0; jj<nsigs; ++jj) {
1046       afsig[jj] =(float *)calloc(ncol, sizeof(float));
1047       for (kk=0; kk<ncol; ++kk) {
1048          afsig[jj][kk] = sigs[ii]; ++ii;
1049       }
1050    }
1051    /* Create temporary distance vector */
1052    distvec = (float *)calloc(nvox, sizeof(float));
1053 
1054    /* prepare output */
1055    *dist_set = EDIT_empty_copy(in_set) ;
1056    EDIT_dset_items(  *dist_set ,
1057                      ADN_nvals     , nsigs           ,
1058                      ADN_ntt       , nsigs          ,
1059                      ADN_datum_all , MRI_short      ,
1060                      ADN_brick_fac , NULL           ,
1061                      ADN_prefix    , "adist"   ,
1062                      ADN_none ) ;
1063 
1064    for (jj=0; jj<nsigs; ++jj) {  /* for each signature type */
1065       for (ii=0; ii< nvox; ++ii) {
1066          distvec[ii]=0;
1067          if (!mask || mask[ii]) {
1068             THD_extract_array( ii , in_set , 0 , asigvec[0] ) ;
1069             /* check if it is all non zero */
1070             for (kk=0; kk<ncol; ++kk) {
1071                if (asigvec[0][kk]) { /* non zero, calculate the distance */
1072                   distvec[ii] = metric(ncol, afsig, asigvec, weight,jj, 0, 0);
1073                   break; /* get out of the loop */
1074                }
1075             }
1076             if (oc.voxdebug[3] == ii) {
1077                char fname[128]={""};
1078                FILE *fout=NULL;
1079                sprintf(fname, "distdbg.vox%d-%d-%d.sig%d.1D",
1080                   oc.voxdebug[0], oc.voxdebug[1], oc.voxdebug[2], jj);
1081                if ((fout = fopen(fname, "w"))) {
1082                   fprintf(fout,"#Col. 0 signature for voxel %d %d %d\n"
1083                                "#Col. 1 reference signature %d\n"
1084                                "#Distance per metric %c = %f\n",
1085                                oc.voxdebug[0], oc.voxdebug[1], oc.voxdebug[2],
1086                                jj, oc.distmetric, distvec[ii]);
1087                   for (kk=0; kk<ncol; ++kk){
1088                      fprintf(fout, "%f   %f\n", asigvec[0][kk], afsig[jj][kk]);
1089                   }
1090                   ININFO_message("Voxel %d %d %d is in mask \n"
1091                            "See file %s for debug info with signature %d/%d\n",
1092                                  oc.voxdebug[0], oc.voxdebug[1], oc.voxdebug[2],
1093                                  fname, jj,nsigs);
1094                   fclose(fout); fout = NULL;
1095                } else {
1096                   ERROR_message("No permission for writing? No debug output.");
1097                }
1098             }
1099          } else if (oc.voxdebug[3] == ii) {
1100             ININFO_message("Voxel %d %d %d is not in mask \n ",
1101                            oc.voxdebug[0], oc.voxdebug[1], oc.voxdebug[2]);
1102          }
1103       }
1104       /* put distvec back into output, and watch the scaling */
1105       EDIT_substitute_brick( *dist_set , jj , MRI_short , NULL ) ;
1106       factor = EDIT_coerce_autoscale_new(nvox,
1107                                 MRI_float,distvec,
1108                                 MRI_short,
1109                                  (short*)DSET_BRICK_ARRAY(*dist_set,jj));
1110       if (factor < EPSILON)  factor = 0.0;
1111       else factor = 1.0 / factor;
1112       if( DSET_BRICK_TYPE(*dist_set,jj) == MRI_short )
1113       EDIT_misfit_report( DSET_FILECODE(*dist_set) , jj ,
1114                           nvox , factor ,
1115                           (short*)DSET_BRICK_ARRAY(*dist_set,jj) ,  distvec) ;
1116       if (oc.verb) {
1117          ININFO_message("Subbrick factor for %d is %f\n ",
1118                         jj, factor);
1119       }
1120       sprintf(lll,"Dist.%c.Sig.%02d", oc.distmetric, jj);
1121       EDIT_dset_items (*dist_set, ADN_brick_label_one + jj, lll, ADN_none);             EDIT_BRICK_FACTOR (*dist_set, jj, factor);
1122 
1123    }
1124 
1125    if (oc.verb) {
1126       ININFO_message("Have %d/%d voxels to process "
1127                      "with %d dimensions per voxel.\n",
1128                      nvox, DSET_NVOX(in_set), ncol);
1129    }
1130 
1131 
1132    if (oc.verb) {
1133       ININFO_message("Freedom");
1134    }
1135 
1136    free(weight); weight = NULL;
1137    free(distvec); distvec=NULL;
1138    free(asigvec[0]); asigvec[0]=NULL;
1139    free(asigvec); asigvec=NULL;
1140    for (jj=0; jj<nsigs; ++jj) free(afsig[jj]); free(afsig);
1141 
1142    RETURN(1);
1143 }
1144 
1145 /*! This function is a wrapper for thd_Acluster and deals with
1146     one input dset only */
thd_Acluster1(THD_3dim_dataset * in_set,byte * mask,int nmask,THD_3dim_dataset ** clust_set,THD_3dim_dataset ** dist_set,THD_3dim_dataset * clust_init,OPT_KMEANS oc)1147 int thd_Acluster1 (   THD_3dim_dataset *in_set,
1148                   byte *mask, int nmask,
1149                   THD_3dim_dataset **clust_set,
1150                   THD_3dim_dataset **dist_set,
1151                   THD_3dim_dataset *clust_init,
1152                   OPT_KMEANS oc)
1153 
1154 {
1155    static char FuncName[]={"thd_Acluster1"};
1156    float **D=NULL;
1157    int ncol=0, ii, nl, nc, ret=0;
1158    float *dvec=NULL;
1159 
1160    ENTRY("thd_Acluster1");
1161 
1162    ncol = DSET_NVALS(in_set);
1163    if (!nmask) {
1164       ERROR_message("No voxels in mask");
1165       RETURN(0);
1166    }
1167    /* allocate for D */
1168    D = (float **)calloc(sizeof(float*), nmask);
1169    for (ii=0;ii<(nmask);++ii) {
1170       if (!(D[ii] = (float *)calloc(sizeof(float), ncol))) {
1171          fprintf(stderr,"ERROR: Failed while allocating %dx%d float matrix\n",
1172                         nmask, ncol);
1173          RETURN(0);
1174       }
1175    }
1176 
1177    dvec = (float * )malloc(sizeof(float)*ncol) ;
1178    if (oc.verb) {
1179       ININFO_message("Filling %d cols of D(%dx%d) (mask=%p).\n",
1180                         ncol, nmask, ncol, mask);
1181    }
1182    ii = 0;
1183    for (nl=0; nl<DSET_NVOX(in_set); ++nl) {
1184       if (!mask || mask[nl]) {
1185          THD_extract_array( nl , in_set , 0 , dvec ) ;
1186          for (nc=0; nc<ncol; ++nc) D[ii][nc] = dvec[nc];
1187          ++ii;
1188       }
1189    }
1190    free(dvec); dvec = NULL;
1191 
1192 
1193    /* Now call clustering function */
1194    if (!(ret=thd_Acluster ( in_set,
1195                      mask, nmask,
1196                      clust_set,
1197                      dist_set ,
1198                      clust_init,
1199                      oc, D, ncol))) {
1200       ERROR_message("Failed in thd_Acluster");
1201    }
1202 
1203    /* freedom */
1204    if (D) {
1205       for (ii=0; ii<nmask; ++ii) if (D[ii]) free(D[ii]);
1206       free(D); D = NULL;
1207    }
1208 
1209    RETURN(ret);
1210 }
1211 
1212 /*!
1213    Andrej: Put some help like for function thd_polyfit
1214    You can form the data array Dp outside of this function,
1215    so if Dp is not NULL, then only header structure of in_set
1216    is used. If Dp is NULL then it is formed inside the function
1217    from the contents of in_set
1218    D_ncol is used to pass the number of columns in Dp. Note that
1219    D_ncol may be different from the number of sub-bricks in in_set
1220    because when Dp is filled externally, in_set can be one of the volumes
1221    used to form Dp.
1222 */
thd_Acluster(THD_3dim_dataset * in_set,byte * mask,int nmask,THD_3dim_dataset ** clust_set,THD_3dim_dataset ** dist_set,THD_3dim_dataset * clust_init,OPT_KMEANS oc,float ** Dp,int D_ncol)1223 int thd_Acluster (  THD_3dim_dataset *in_set,
1224                   byte *mask, int nmask,
1225                   THD_3dim_dataset **clust_set,
1226                   THD_3dim_dataset **dist_set,
1227                   THD_3dim_dataset *clust_init,
1228                   OPT_KMEANS oc,
1229                   float **Dp, int D_ncol)
1230 {
1231    int ii, nl, nc, j;
1232    float **D=NULL, **distmatrix=NULL;  /* this double business is a waste of
1233                                            memory, at least for D..*/
1234    int ncol = -1;
1235    float *dvec=NULL;
1236    float factor = 0.0;
1237    int* clusterid = NULL;
1238    short *sc = NULL;
1239    float *fc = NULL;
1240    float** vcdata = NULL;
1241    int nvc; /*this will be for number of columns in vcdata matrix*/
1242    int *vals = NULL, *vmap=NULL, N_vmap=0;
1243    char *label_table=NULL;
1244    char lll[25]={"buffer"};
1245 
1246 
1247    ENTRY("thd_Acluster");
1248 
1249    if (!clust_set) {
1250       fprintf(stderr,
1251                "ERROR: Bad input\n");
1252       RETURN(0);
1253    }
1254 
1255    nvc = oc.k+2;
1256 
1257    if (*clust_set) { /* reuse verify match */
1258       if (DSET_NVOX(in_set) != DSET_NVOX(*clust_set) ||
1259           DSET_NX(in_set) !=  DSET_NX(*clust_set) ||
1260           DSET_NY(in_set) !=  DSET_NY(*clust_set) ||
1261           DSET_NZ(in_set) !=  DSET_NZ(*clust_set)) {
1262          ERROR_message("mismatch in *clust_set grid");
1263          RETURN(0);
1264       }
1265    } else { /* make new one */
1266       *clust_set = EDIT_empty_copy(in_set ) ;
1267       EDIT_dset_items(  *clust_set ,
1268                         ADN_nvals     , 1           ,
1269                         ADN_ntt       , 1          ,
1270                         ADN_datum_all , MRI_short      ,
1271                         ADN_brick_fac , NULL           ,
1272                         ADN_prefix    , "OML!"   ,
1273                         ADN_none ) ;
1274       sc = (short *)calloc(sizeof(short),DSET_NVOX(in_set));
1275       EDIT_substitute_brick( *clust_set , 0 , MRI_short , sc ) ;
1276    }
1277 
1278    if (dist_set) {
1279       if (*dist_set) { /* reuse verify match */
1280          if (DSET_NVOX(in_set) != DSET_NVOX(*dist_set) ||
1281              DSET_NX(in_set) !=  DSET_NX(*dist_set) ||
1282              DSET_NY(in_set) !=  DSET_NY(*dist_set) ||
1283              DSET_NZ(in_set) !=  DSET_NZ(*dist_set) ||
1284              DSET_NVALS(*dist_set) !=  nvc ) {
1285             ERROR_message("mismatch in *dist_set grid");
1286             RETURN(0);
1287          }
1288       } else { /* make new one */
1289          *dist_set = EDIT_empty_copy(in_set) ;
1290          EDIT_dset_items(  *dist_set ,
1291                         ADN_nvals     , nvc           ,
1292                         ADN_ntt       , nvc          ,
1293                         ADN_datum_all , MRI_short      ,
1294                         ADN_brick_fac , NULL           ,
1295                         ADN_prefix    , "vcd"   ,
1296                         ADN_none ) ;
1297          for( j=0 ; j < nvc ; j++ ) /* create empty bricks to be filled below */
1298            EDIT_substitute_brick( *dist_set , j , MRI_short , NULL ) ;
1299       }
1300    }
1301 
1302 
1303    if (!mask) nmask = DSET_NVOX(in_set);
1304 
1305    if (!Dp) {
1306       ncol = DSET_NVALS(in_set);
1307       if (ncol < DSET_NUM_TIMES(in_set)) ncol = DSET_NUM_TIMES(in_set);
1308 
1309       /* make sure in_set is loaded */
1310       DSET_load(in_set);
1311 
1312       /* Create data matrix */
1313       D = (float **)calloc(sizeof(float*), nmask);
1314       for (ii=0;ii<(nmask);++ii) {
1315          if (!(D[ii] = (float *)calloc(sizeof(float), ncol))) {
1316             fprintf(stderr,"ERROR: Failed while allocating %dx%d float matrix\n",
1317                            nmask, ncol);
1318             RETURN(0);
1319          }
1320       }
1321 
1322 
1323       dvec = (float * )malloc(sizeof(float)*ncol) ;  /* array to hold series */
1324       if (oc.verb) {
1325          ININFO_message("Filling D(%dx%d) (mask=%p).\n", nmask, ncol, mask);
1326       }
1327       ii = 0;
1328       for (nl=0; nl<DSET_NVOX(in_set); ++nl) {
1329          if (!mask || mask[nl]) {
1330             THD_extract_array( nl , in_set , 0 , dvec ) ;
1331             for (nc=0; nc<ncol; ++nc) D[ii][nc] = dvec[nc];
1332             ++ii;
1333          }
1334       }
1335       /* dump voxel values in in_set to free up memory,
1336          but keep header of in_set */
1337       DSET_unload(in_set);
1338    } else {
1339       ncol = D_ncol;
1340       D = Dp;
1341    }
1342 
1343    clust_seed(oc.rand_seed);
1344    if (oc.verb) {
1345       ININFO_message("Have %d/%d (%f) voxels to process "
1346                      "with %d dimensions per voxel.\n"
1347                      "Seed now: %d\n",
1348                      nmask, DSET_NVOX(in_set), (float)nmask/DSET_NVOX(in_set),
1349                      ncol,
1350                      clust_seed(0));
1351    }
1352 
1353    /* allocate for answer arrays */
1354    if (!(clusterid = (int *)calloc(sizeof(int), nmask))) {
1355       fprintf(stderr,"ERROR: Failed to allocate for clusterid\n");
1356       RETURN(0);
1357    }
1358 
1359    /* initialize clusterid */
1360    if (clust_init) {
1361       if (0 && oc.remap != NONE) { /* allow that. Just initializing does
1362                                      not mean cluster ids won't change in
1363                                      unpleasant manners */
1364          ERROR_message("Cannot use -clust_init, along with "
1365                        "-remap other than NONE");
1366          RETURN (0);
1367       }
1368       if (oc.verb) {
1369          ININFO_message("Initializing cluster per %s\n",
1370                      DSET_PREFIX(clust_init));
1371       }
1372       if (!(vals = THD_unique_rank(clust_init, 0, mask,
1373                                     "Lamour.1D", &vmap, &N_vmap))) {
1374          ERROR_message("Failed to rank\n");
1375          RETURN (0);
1376       }
1377       if (oc.verb) {
1378          for (ii=0; ii<N_vmap; ++ii) {
1379             fprintf(stderr,"   vmap[%d]=%d\n", ii, vmap[ii]);
1380          }
1381       }
1382       nl=0;
1383       for (ii=0; ii<DSET_NVOX(clust_init); ++ii) {
1384          if ((!mask || mask[ii])) {
1385             /* -1 is because clusterid starts at 0 */
1386             if (vals[ii]>0) { /* only allow non zero for initialization*/
1387                clusterid[nl] = vals[ii]-1;
1388             } else {
1389                clusterid[nl] = THD_SEG_IRAN(oc.k); /* random assign */
1390             }
1391             if (clusterid[nl]<0) {
1392                ERROR_message( "Negative values!");
1393                RETURN(0);
1394             }
1395             ++nl;
1396          }
1397       }
1398       if (N_vmap-1 != oc.k) {
1399          ERROR_message( "Initializing with a dset of %d clusters\n"
1400                         " but asking for %d clusters in return", N_vmap-1, oc.k);
1401          RETURN(0);
1402       }
1403       if (!label_table && clust_init->Label_Dtable) {
1404          label_table = Dtable_to_nimlstring(clust_init->Label_Dtable,
1405                                             "VALUE_LABEL_DTABLE");
1406       }
1407    }
1408 
1409    /* allocate for answer array distance voxel centroid */
1410    if (oc.verb) {
1411       ININFO_message("Allocating for D, %dx%d\n", nmask, nvc);
1412    }
1413    vcdata = (float **)calloc(sizeof(float*), nmask);
1414    for (ii=0;ii<(nmask);++ii) {
1415      if (!(vcdata[ii] = (float *)calloc(sizeof(float), nvc))) {
1416      fprintf(stderr,"ERROR: Failed to allocate for voxel cluster distance\n");
1417      RETURN(0);
1418      }
1419    }
1420 
1421    /* now do the clustering */
1422    if (oc.k > 0) {
1423       if (oc.verb) {
1424          ININFO_message("Going to cluster: k=%d, r=%d\n"
1425                         "distmetric %c, jobname %s, verb = %d\n",
1426                         oc.k, oc.r, oc.distmetric, oc.jobname, oc.verb);
1427       }
1428       segtools_verb(oc.verb);
1429       segtools_writedists(oc.writedists);
1430       example_kmeans(   nmask, ncol, D,
1431                         oc.k, oc.r, oc.distmetric,
1432                         oc.jobname, clusterid, vcdata,
1433                         oc.remap);
1434    } else if (oc.kh > 0) {
1435       if (oc.verb) {
1436          ININFO_message("Going to h cluster: kh=%d\n"
1437                         "jobname %s\n",
1438                         oc.kh, oc.jobname);
1439       }
1440       if ((distmatrix = example_distance_gene(nmask, ncol, D))) {
1441          example_hierarchical(   nmask, ncol, D,
1442                                  oc.jobname, oc.kh,
1443                                  distmatrix,
1444                                  clusterid);
1445          /* ZSS: Andrej, this looks like a memory leak,
1446                   distmatrix should be freed here.
1447                   This free code is not tested yet! */
1448          ININFO_message("The freeing below has not been tested");
1449          for (ii=0; ii<ncol; ++ii) free(distmatrix[ii]);
1450          free(distmatrix); distmatrix = NULL;
1451       } else {
1452          ERROR_message("Failed to create distmatrix");
1453          RETURN(0);
1454       }
1455    } else {
1456       ERROR_message("Bad option selection");
1457       RETURN(0);
1458    }
1459 
1460    /* remap clusterid if needed */
1461    if (oc.remap == NONE && vmap) {
1462       if (oc.verb) {
1463          ININFO_message("Remapping output to cluster_init\n");
1464       }
1465 
1466       for (ii=0; ii<nmask; ++ii) {
1467          clusterid[ii] = vmap[clusterid[ii]+1];
1468       }
1469    } else { /* just add 1 because cluster ids start at 0 */
1470       for (ii=0; ii<nmask; ++ii) clusterid[ii] += 1;
1471    }
1472 
1473    /* create output datasets, if required*/
1474    if (oc.verb) {
1475       ININFO_message("loading results into %s\n",
1476                      DSET_PREFIX(*clust_set));
1477    }
1478 
1479    /* transfer ints in clusterid to shorts array */
1480    sc = (short *)DSET_ARRAY(*clust_set,0);
1481    ii = 0;
1482    for (nl=0; nl<DSET_NVOX(in_set); ++nl) {
1483       if (!mask || mask[nl]) {
1484          sc[nl] = (short)clusterid[ii];
1485          ++ii;
1486       } else {
1487          sc[nl] = 0;
1488       }
1489    }
1490    free(clusterid); clusterid = NULL;
1491    sc = NULL;
1492 
1493    /* add the labeltable if it exists */
1494    if (label_table) {
1495       if (oc.verb) {
1496          ININFO_message("Adding labeltable from initializing dset ");
1497       }
1498       (*clust_set)->Label_Dtable = Dtable_from_nimlstring(label_table);
1499       if (!(*clust_set)->Label_Dtable) {
1500          ERROR_message("Failed to create Label_Dtable");
1501          RETURN(0);
1502       }
1503      /* and stick it in the header. Label_Dtable is not preserved
1504          by dset writing function */
1505       THD_set_string_atr( (*clust_set)->dblk ,
1506                         "VALUE_LABEL_DTABLE" ,  label_table) ;
1507 
1508       free(label_table); label_table=NULL;
1509    }
1510 
1511    if (oc.user_labeltable) {
1512       Dtable *vl_dtable=NULL ;
1513 
1514       if (oc.verb) {
1515          ININFO_message("Applying labeltable from %s", oc.user_labeltable) ;
1516       }
1517       if ((*clust_set)->Label_Dtable) {
1518          destroy_Dtable((*clust_set)->Label_Dtable);
1519          (*clust_set)->Label_Dtable=NULL;
1520       }
1521       /* read the table */
1522       if (!(label_table = AFNI_suck_file( oc.user_labeltable))) {
1523          ERROR_message("Failed to read %s", oc.user_labeltable);
1524          RETURN(0);
1525       }
1526       if (!(vl_dtable = Dtable_from_nimlstring(label_table))) {
1527          ERROR_message("Could not parse labeltable");
1528          RETURN(0);
1529       }
1530       destroy_Dtable(vl_dtable); vl_dtable = NULL;
1531       THD_set_string_atr( (*clust_set)->dblk ,
1532                            "VALUE_LABEL_DTABLE" , label_table ) ;
1533       free(label_table); label_table = NULL;
1534    }
1535 
1536    if (!THD_find_atr( (*clust_set)->dblk , "VALUE_LABEL_DTABLE")) {
1537       /* Still no blasted labeltable */
1538       Dtable *vl_dtable=new_Dtable(5);
1539       char slab[256], sval[64], skmet[64];
1540       int nclusters=0;
1541       if (oc.verb) ININFO_message("Creating new labeltable") ;
1542       if (oc.k > 0) {
1543          nclusters = oc.k;
1544          sprintf(skmet, "kclust");
1545       } else {
1546          nclusters = oc.kh;
1547          sprintf(skmet, "hclust");
1548       }
1549       for (ii=0; ii<nclusters; ++ii) {
1550          if (!oc.nclabels) {
1551             if (vmap) {
1552                sprintf(sval,"%d", vmap[ii+1]);
1553                sprintf(slab,"%s%d",skmet, vmap[ii+1]);
1554             } else {
1555                sprintf(sval,"%d", ii+1);
1556                sprintf(slab,"%s%d",skmet, ii+1);
1557             }
1558             addto_Dtable( sval , slab , vl_dtable ) ;
1559          } else {
1560             if (vmap) {
1561                sprintf(sval,"%d", vmap[ii+1]);
1562             } else {
1563                sprintf(sval,"%d", ii+1);
1564             }
1565             snprintf(slab,128,"%s",oc.clabels[ii]);
1566             if (findin_Dtable_b( slab , vl_dtable )) {
1567                ERROR_message("Label %s already used.\n"
1568                              " No labeltable will be added\n", slab);
1569                RETURN(0);
1570             }
1571             addto_Dtable( sval , slab , vl_dtable ) ;
1572          }
1573       }
1574       label_table = Dtable_to_nimlstring(vl_dtable, "VALUE_LABEL_DTABLE");
1575       destroy_Dtable(vl_dtable); vl_dtable = NULL;
1576       THD_set_string_atr( (*clust_set)->dblk ,
1577                         "VALUE_LABEL_DTABLE" , label_table ) ;
1578       free(label_table); label_table = NULL;
1579    }
1580 
1581    /* prepare output */
1582    if (dist_set) {
1583       ININFO_message("loading results into %s\n",
1584                      DSET_PREFIX(*dist_set));
1585 
1586       for (j = 0; j < nvc; j++) {
1587          ININFO_message("...%d,", j);
1588 
1589          /* transfer data in vcdata to full float array */
1590          fc = (float *)calloc(sizeof(float),DSET_NVOX(in_set));
1591          ii = 0;
1592          for (nl=0; nl<DSET_NVOX(in_set); ++nl) {
1593 	         if (!mask || mask[nl]) {
1594 	            fc[nl] = (float)vcdata[ii][j];
1595 	            ++ii;
1596 	         }
1597          }
1598          factor = EDIT_coerce_autoscale_new(DSET_NVOX(in_set),
1599                                 MRI_float, fc,
1600                                 MRI_short,
1601                                  (short*)DSET_BRICK_ARRAY(*dist_set,j));
1602          if (factor < EPSILON)  factor = 0.0;
1603          else factor = 1.0 / factor;
1604          if( DSET_BRICK_TYPE(*dist_set,j) == MRI_short )
1605             EDIT_misfit_report( DSET_FILECODE(*dist_set) , j ,
1606                                 DSET_NVOX(in_set) , factor ,
1607                                 (short*)DSET_BRICK_ARRAY(*dist_set,j), fc) ;
1608          free(fc); fc = NULL;
1609          if (oc.verb) {
1610             ININFO_message("Subbrick factor for %d is %f\n ",
1611                            j, factor);
1612          }
1613 
1614          /* label bricks */
1615          if (j==0)
1616             EDIT_dset_items (*dist_set, ADN_brick_label_one + j, "Dc", ADN_none);
1617          else if (j < nvc -1) {
1618             sprintf(lll,"Dc%02d", j-1);
1619             EDIT_dset_items (*dist_set, ADN_brick_label_one + j, lll, ADN_none);
1620          } else {
1621             EDIT_dset_items (*dist_set, ADN_brick_label_one + j,
1622                               "Dc_norm", ADN_none);
1623          }
1624          EDIT_BRICK_FACTOR (*dist_set, j, factor);
1625       }
1626    }
1627 
1628    for (ii=0;ii<(nmask);++ii) {
1629       free(vcdata[ii]);
1630    }
1631    free(vcdata); vcdata = NULL;
1632 
1633    if (oc.verb) {
1634       ININFO_message("Freedom");
1635    }
1636 
1637    if (dvec) free(dvec); dvec=NULL;
1638 
1639 
1640    if (D != Dp) {
1641       // To free D
1642       for (ii=0;ii<nmask;++ii) {
1643        if (D[ii]) free(D[ii]);
1644       }
1645       free(D); D = NULL;
1646    }else D = NULL;
1647 
1648 
1649    RETURN(1);
1650 }
1651 
1652