1 /******************************************************************************/
2 /* The C Clustering Library.
3  * Copyright (C) 2002 Michiel Jan Laurens de Hoon.
4  *
5  * This library was written at the Laboratory of DNA Information Analysis,
6  * Human Genome Center, Institute of Medical Science, University of Tokyo,
7  * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
8  * Contact: mdehoon 'AT' gsc.riken.jp
9  *
10  * Permission to use, copy, modify, and distribute this software and its
11  * documentation with or without modifications and for any purpose and
12  * without fee is hereby granted, provided that any copyright notices
13  * appear in all copies and that both those copyright notices and this
14  * permission notice appear in supporting documentation, and that the
15  * names of the contributors or copyright holders not be used in
16  * advertising or publicity pertaining to distribution of the software
17  * without specific prior permission.
18  *
19  * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
20  * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
22  * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
23  * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
24  * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
25  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
26  * OR PERFORMANCE OF THIS SOFTWARE.
27  *
28  */
29 
30 #ifndef CALL
31 # define CALL
32 #endif
33 
34 #ifndef min
35 #define min(x, y)	((x) < (y) ? (x) : (y))
36 #endif
37 #ifndef max
38 #define	max(x, y)	((x) > (y) ? (x) : (y))
39 #endif
40 
41 #ifdef WINDOWS
42 #  include <windows.h>
43 #endif
44 
45 unsigned int clust_seed(int seed);
46 
47 /* Chapter 2 */
48 float CALL clusterdistance (int nrows, int ncolumns, float** data,
49   float weight[], int n1, int n2, int index1[], int index2[], char dist,
50   char method, int transpose);
51 float** CALL distancematrix (int ngenes, int ndata, float** data,
52   float* weight, char dist, int transpose);
53 
54 /* Chapter 3 */
55 int getclustercentroids(int nclusters, int nrows, int ncolumns,
56   float** data, int clusterid[], float** cdata,
57   int transpose, char method);
58 void getclustermedoids(int nclusters, int nelements, float** distance,
59   int clusterid[], int centroids[], float errors[]);
60 void CALL kcluster (int nclusters, int ngenes, int ndata, float** data,
61   float weight[], int transpose, int npass, char method, char dist,
62   int clusterid[], float* error, int* ifound);
63 void CALL kmedoids (int nclusters, int nelements, float** distance,
64   int npass, int clusterid[], float* error, int* ifound);
65 int  comp(const void *p, const void *q);
66 
67 /* Chapter 4 */
68 typedef struct {int left; int right; float distance;} Node;
69 /*
70  * A Node struct describes a single node in a tree created by hierarchical
71  * clustering. The tree can be represented by an array of n Node structs,
72  * where n is the number of elements minus one. The integers left and right
73  * in each Node struct refer to the two elements or subnodes that are joined
74  * in this node. The original elements are numbered 0..nelements-1, and the
75  * nodes -1..-(nelements-1). For each node, distance contains the distance
76  * between the two subnodes that were joined.
77  */
78 
79 Node* CALL treecluster (int nrows, int ncolumns, float** data,
80   float weight[], int transpose, char dist, char method, float** distmatrix);
81 void cuttree (int nelements, Node* tree, int nclusters, int clusterid[]);
82 
83 /* Chapter 5
84 void CALL somcluster (int nrows, int ncolumns, float** data,
85   const float weight[], int transpose, int nxnodes, int nynodes,
86   float inittau, int niter, char dist, float*** celldata,
87   int clusterid[][2]);
88 */
89 /* Chapter 6 */
90 void CALL svd(int m, int n, float** u, float w[], float** v, int* ierr);
91 
92 /* Utility routines, currently undocumented */
93 void CALL sort(int n, const float data[], int index[]);
94 float CALL mean(int n, float x[]);
95 float CALL median (int n, float x[]);
96 
97 float* calculate_weights(int nrows, int ncolumns, float** data,
98   float weights[], int transpose, char dist, float cutoff, float exponent);
99 
100 float(*setmetric(char dist))
101   (int, float**, float**, const float[], int, int, int);
102