1 /******************************************************************************/ 2 /* The C Clustering Library. 3 * Copyright (C) 2002 Michiel Jan Laurens de Hoon. 4 * 5 * This library was written at the Laboratory of DNA Information Analysis, 6 * Human Genome Center, Institute of Medical Science, University of Tokyo, 7 * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan. 8 * Contact: mdehoon 'AT' gsc.riken.jp 9 * 10 * Permission to use, copy, modify, and distribute this software and its 11 * documentation with or without modifications and for any purpose and 12 * without fee is hereby granted, provided that any copyright notices 13 * appear in all copies and that both those copyright notices and this 14 * permission notice appear in supporting documentation, and that the 15 * names of the contributors or copyright holders not be used in 16 * advertising or publicity pertaining to distribution of the software 17 * without specific prior permission. 18 * 19 * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL 20 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE 22 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT 23 * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 24 * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 25 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE 26 * OR PERFORMANCE OF THIS SOFTWARE. 27 * 28 */ 29 30 #ifndef CALL 31 # define CALL 32 #endif 33 34 #ifndef min 35 #define min(x, y) ((x) < (y) ? (x) : (y)) 36 #endif 37 #ifndef max 38 #define max(x, y) ((x) > (y) ? (x) : (y)) 39 #endif 40 41 #ifdef WINDOWS 42 # include <windows.h> 43 #endif 44 45 unsigned int clust_seed(int seed); 46 47 /* Chapter 2 */ 48 float CALL clusterdistance (int nrows, int ncolumns, float** data, 49 float weight[], int n1, int n2, int index1[], int index2[], char dist, 50 char method, int transpose); 51 float** CALL distancematrix (int ngenes, int ndata, float** data, 52 float* weight, char dist, int transpose); 53 54 /* Chapter 3 */ 55 int getclustercentroids(int nclusters, int nrows, int ncolumns, 56 float** data, int clusterid[], float** cdata, 57 int transpose, char method); 58 void getclustermedoids(int nclusters, int nelements, float** distance, 59 int clusterid[], int centroids[], float errors[]); 60 void CALL kcluster (int nclusters, int ngenes, int ndata, float** data, 61 float weight[], int transpose, int npass, char method, char dist, 62 int clusterid[], float* error, int* ifound); 63 void CALL kmedoids (int nclusters, int nelements, float** distance, 64 int npass, int clusterid[], float* error, int* ifound); 65 int comp(const void *p, const void *q); 66 67 /* Chapter 4 */ 68 typedef struct {int left; int right; float distance;} Node; 69 /* 70 * A Node struct describes a single node in a tree created by hierarchical 71 * clustering. The tree can be represented by an array of n Node structs, 72 * where n is the number of elements minus one. The integers left and right 73 * in each Node struct refer to the two elements or subnodes that are joined 74 * in this node. The original elements are numbered 0..nelements-1, and the 75 * nodes -1..-(nelements-1). For each node, distance contains the distance 76 * between the two subnodes that were joined. 77 */ 78 79 Node* CALL treecluster (int nrows, int ncolumns, float** data, 80 float weight[], int transpose, char dist, char method, float** distmatrix); 81 void cuttree (int nelements, Node* tree, int nclusters, int clusterid[]); 82 83 /* Chapter 5 84 void CALL somcluster (int nrows, int ncolumns, float** data, 85 const float weight[], int transpose, int nxnodes, int nynodes, 86 float inittau, int niter, char dist, float*** celldata, 87 int clusterid[][2]); 88 */ 89 /* Chapter 6 */ 90 void CALL svd(int m, int n, float** u, float w[], float** v, int* ierr); 91 92 /* Utility routines, currently undocumented */ 93 void CALL sort(int n, const float data[], int index[]); 94 float CALL mean(int n, float x[]); 95 float CALL median (int n, float x[]); 96 97 float* calculate_weights(int nrows, int ncolumns, float** data, 98 float weights[], int transpose, char dist, float cutoff, float exponent); 99 100 float(*setmetric(char dist)) 101 (int, float**, float**, const float[], int, int, int); 102