1 #include "EXTERN.h"
2 #include "perl.h"
3 #include "XSUB.h"
4 
5 /* The Perl include files perl.h redefines malloc and free. Here, we need the
6  * usual malloc and free, defined in stdlib.h. So we undefine the ones in
7  * perl.h.
8  */
9 
10 #ifdef malloc
11 #undef malloc
12 #endif
13 #ifdef free
14 #undef free
15 #endif
16 
17 #include <stdlib.h>
18 
19 #include "../src/cluster.h"
20 
21 
22 typedef struct {Node* nodes; int n;} Tree;
23 
24 /* -------------------------------------------------
25  * Using the warnings registry, check to see if warnings
26  * are enabled for the Algorithm::Cluster module.
27  */
28 static int
warnings_enabled(pTHX)29 warnings_enabled(pTHX) {
30 
31     dSP;
32 
33     I32 count;
34     bool isEnabled;
35     SV * mysv;
36 
37     ENTER ;
38     SAVETMPS;
39     PUSHMARK(SP) ;
40     XPUSHs(sv_2mortal(newSVpv("Algorithm::Cluster",18)));
41     PUTBACK ;
42 
43     count = perl_call_pv("warnings::enabled", G_SCALAR) ;
44 
45     if (count != 1) croak("No arguments returned from call_pv()\n") ;
46 
47     mysv = POPs;
48     isEnabled = (bool) SvTRUE(mysv);
49 
50     PUTBACK ;
51     FREETMPS ;
52     LEAVE ;
53 
54     return isEnabled;
55 }
56 
57 /* -------------------------------------------------
58  * Create a row of doubles, initialized to a value
59  */
60 static double*
malloc_row_dbl(pTHX_ int ncols,double val)61 malloc_row_dbl(pTHX_ int ncols, double val) {
62 
63     int j;
64     double * row;
65 
66     row = malloc(ncols * sizeof(double) );
67     if (!row) {
68         return NULL;
69     }
70 
71     for (j = 0; j < ncols; j++) {
72         row[j] = val;
73     }
74     return row;
75 }
76 
77 /* -------------------------------------------------
78  * Only coerce to a double if we already know it's
79  * an integer or double, or a string which is actually numeric.
80  * Don't blindly run the macro SvNV, because that will coerce
81  * a non-numeric string to be a double of value 0.0,
82  * and we do not want that to happen, because if we test it again,
83  * it will then appear to be a valid double value.
84  */
85 static int
extract_double_from_scalar(pTHX_ SV * mysv,double * number)86 extract_double_from_scalar(pTHX_ SV * mysv, double * number) {
87 
88     if (SvPOKp(mysv) && SvLEN(mysv)) {
89 
90         /* This function is not in the public perl API */
91         if (Perl_looks_like_number(aTHX_ mysv)) {
92             *number = SvNV( mysv );
93             return 1;
94         } else {
95             return 0;
96         }
97     } else if (SvNIOK(mysv)) {
98         *number = SvNV( mysv );
99         return 1;
100     } else {
101         return 0;
102     }
103 }
104 
105 
106 
107 /* -------------------------------------------------
108  * Convert a Perl 2D matrix into a 2D matrix of C doubles.
109  * If no data are masked, mask can be passed as NULL.
110  * NOTE: on errors this function returns a value greater than zero.
111  */
112 static double**
parse_data(pTHX_ SV * matrix_ref,int ** mask)113 parse_data(pTHX_ SV * matrix_ref, int** mask) {
114 
115     AV * matrix_av;
116     SV * row_ref;
117     AV * row_av;
118     SV * cell;
119 
120     int type, i, j, nrows, ncols, n;
121 
122     double** matrix;
123 
124     /* NOTE -- we will just assume that matrix_ref points to an arrayref,
125      * and that the first item in the array is itself an arrayref.
126      * The calling perl functions must check this before we get this pointer.
127      * (It's easier to implement these checks in Perl rather than C.)
128      * The value of perl_rows is now fixed. But the value of
129      * rows will be decremented, if we skip any (invalid) Perl rows.
130      */
131     matrix_av  = (AV *) SvRV(matrix_ref);
132     nrows = (int) av_len(matrix_av) + 1;
133 
134     if(nrows <= 0) {
135         return NULL;
136     }
137     matrix   = malloc(nrows*sizeof(double*));
138     if (!matrix) {
139         return NULL;
140     }
141 
142     row_ref  = *(av_fetch(matrix_av, (I32) 0, 0));
143     row_av   = (AV *) SvRV(row_ref);
144     ncols    = (int) av_len(row_av) + 1;
145 
146 
147     /* ------------------------------------------------------------
148      * Loop once for each row in the Perl matrix, and convert it to
149      * C doubles.
150      */
151     for (i=0; i < nrows; i++) {
152 
153         row_ref = *(av_fetch(matrix_av, (I32) i, 0));
154 
155         if(! SvROK(row_ref) ) {
156 
157             if(warnings_enabled(aTHX))
158                 Perl_warn(aTHX_
159                     "Row %d: Wanted array reference, but "
160                     "got a scalar. No row to process?\n", i);
161             break;
162         }
163 
164         row_av = (AV *) SvRV(row_ref);
165         type = SvTYPE(row_av);
166 
167         /* Handle unexpected cases */
168         if(type != SVt_PVAV ) {
169 
170              /* Reference doesn't point to an array at all. */
171             if(warnings_enabled(aTHX))
172                 Perl_warn(aTHX_
173                     "Row %d: Wanted array reference, but got "
174                     "a reference to something else (%d)\n",
175                     i, type);
176             break;
177 
178         }
179 
180         n = (int) av_len(row_av) + 1;
181         if (n != ncols) {
182             /* All rows in the matrix should have the same
183              * number of columns. */
184             if(warnings_enabled(aTHX))
185                 Perl_warn(aTHX_
186                     "Row %d: Contains %d columns "
187                     "(expected %d)\n", i, n, ncols);
188             break;
189         }
190 
191         matrix[i] = malloc(ncols*sizeof(double));
192         if (!matrix[i])
193             break;
194 
195         /* Loop once for each cell in the row. */
196         for (j=0; j < ncols; j++) {
197 
198             double num;
199             if (!mask || mask[i][j]) {
200                 cell = *(av_fetch(row_av, (I32) j, 0));
201                 if(extract_double_from_scalar(aTHX_ cell,&num) <= 0) {
202                     if(warnings_enabled(aTHX))
203                         Perl_warn(aTHX_
204                             "Row %d col %d: Value is not "
205                                                     "a number.\n", i, j);
206                     free(matrix[i]); /* not included below */
207                     break;
208                 }
209             }
210             else {
211                 /* Don't read the value if it is masked.
212                  * Set it to some arbitrary value. */
213                 num = 0.0;
214             }
215             matrix[i][j] = num;
216 
217         } /* End for (j=0; j < ncols; j++) */
218         if (j < ncols) break;
219 
220     } /* End for (i=0; i < nrows; i++) */
221 
222     if (i < nrows) { /* encountered a break */
223         nrows = i;
224         for (i = 0; i < nrows; i++) free(matrix[i]);
225         free(matrix);
226         matrix = NULL;
227     }
228 
229     return matrix;
230 }
231 
232 
233 /* -------------------------------------------------
234  * Convert a Perl 2D matrix into a 2D matrix of C ints.
235  * On errors this function returns a value greater than zero.
236  */
237 static int**
parse_mask(pTHX_ SV * matrix_ref)238 parse_mask(pTHX_ SV * matrix_ref) {
239 
240     AV * matrix_av;
241     SV * row_ref;
242     AV * row_av;
243     SV * cell;
244 
245     int type, i, j, nrows, ncols, n;
246 
247     int** matrix;
248 
249     /* NOTE -- we will just assume that matrix_ref points to an arrayref,
250      * and that the first item in the array is itself an arrayref.
251      * The calling perl functions must check this before we get this pointer.
252      * (It's easier to implement these checks in Perl rather than C.)
253      * The value of perl_rows is now fixed. But the value of
254      * rows will be decremented, if we skip any (invalid) Perl rows.
255      */
256     matrix_av = (AV *) SvRV(matrix_ref);
257     nrows = (int) av_len(matrix_av) + 1;
258 
259     if(nrows <= 0) {
260         return NULL;  /* Caller must handle this case!! */
261     }
262     matrix    = malloc(nrows * sizeof(int *) );
263     if (!matrix) {
264         return NULL;
265     }
266 
267     row_ref   = *(av_fetch(matrix_av, (I32) 0, 0));
268     row_av    = (AV *) SvRV(row_ref);
269     ncols     = (int) av_len(row_av) + 1;
270 
271 
272 
273     /* ------------------------------------------------------------
274      * Loop once for each row in the Perl matrix, and convert it to C ints.
275      */
276     for (i=0; i < nrows; i++) {
277 
278         row_ref = *(av_fetch(matrix_av, (I32) i, 0));
279 
280         if(! SvROK(row_ref) ) {
281 
282             if(warnings_enabled(aTHX))
283                 Perl_warn(aTHX_
284                     "Row %d: Wanted array reference, but "
285                     "got a scalar. No row to process?\n", i);
286             break;
287         }
288 
289         row_av = (AV *) SvRV(row_ref);
290         type = SvTYPE(row_av);
291 
292         /* Handle unexpected cases */
293         if(type != SVt_PVAV ) {
294 
295              /* Reference doesn't point to an array at all. */
296             if(warnings_enabled(aTHX))
297                 Perl_warn(aTHX_
298                     "Row %d: Wanted array reference, but got "
299                     "a reference to something else (%d)\n",
300                     i, type);
301             break;
302 
303         }
304 
305         n = (int) av_len(row_av) + 1;
306         if (n != ncols) {
307             /* All rows in the matrix should have the same
308              * number of columns. */
309             if(warnings_enabled(aTHX))
310                 Perl_warn(aTHX_
311                     "Row %d: Contains %d columns "
312                     "(expected %d)\n", i, n, ncols);
313             break;
314         }
315 
316         matrix[i] = malloc(ncols * sizeof(int) );
317         if (!matrix[i]) {
318             break;
319         }
320 
321         /* Loop once for each cell in the row. */
322         for (j=0; j < ncols; ++j) {
323             double num;
324             cell = *(av_fetch(row_av, (I32) j, 0));
325             if(extract_double_from_scalar(aTHX_ cell,&num) <= 0) {
326                 if(warnings_enabled(aTHX))
327                     Perl_warn(aTHX_
328                         "Row %d col %d: Value is not "
329                         "a number.\n", i, j);
330                 free(matrix[i]); /* not included below */
331                 break;
332             }
333             matrix[i][j] = (int) num;
334 
335         } /* End for (j=0; j < ncols; j++) */
336         if (j < ncols) break;
337 
338     } /* End for (i=0; i < nrows; i++) */
339 
340     if (i < nrows) { /* break statement encountered */
341         nrows = i;
342         for (i = 0; i < nrows; i++) free(matrix[i]);
343         free(matrix);
344         matrix = NULL;
345     }
346 
347     return matrix;
348 }
349 
350 
351 /* -------------------------------------------------
352  *
353  */
354 static void
free_matrix_int(int ** matrix,int nrows)355 free_matrix_int(int ** matrix, int nrows) {
356 
357     int i;
358     for(i = 0; i < nrows; ++i ) {
359         free(matrix[i]);
360     }
361 
362     free(matrix);
363 }
364 
365 
366 /* -------------------------------------------------
367  *
368  */
369 static void
free_matrix_dbl(double ** matrix,int nrows)370 free_matrix_dbl(double ** matrix, int nrows) {
371 
372     int i;
373     for(i = 0; i < nrows; ++i ) {
374         free(matrix[i]);
375     }
376 
377     free(matrix);
378 }
379 
380 
381 /* -------------------------------------------------
382  *
383  */
384 static void
free_ragged_matrix_dbl(double ** matrix,int nrows)385 free_ragged_matrix_dbl(double ** matrix, int nrows) {
386 
387     int i;
388     for(i = 1; i < nrows; ++i ) {
389         free(matrix[i]);
390     }
391 
392     free(matrix);
393 }
394 
395 
396 /* -------------------------------------------------
397  * Convert a Perl array into an array of doubles
398  * On error, this function returns NULL.
399  */
400 static double*
malloc_row_perl2c_dbl(pTHX_ SV * input,int * np)401 malloc_row_perl2c_dbl (pTHX_ SV * input, int* np) {
402 
403     int i;
404     AV* array    = (AV *) SvRV(input);
405     const int n  = (int) av_len(array) + 1;
406     double* data = malloc(n * sizeof(double));
407     if (!data) {
408         return NULL;
409     }
410 
411     /* Loop once for each item in the Perl array, and convert
412          * it to a C double.
413      */
414     for (i=0; i < n; i++) {
415         double num;
416         SV * mysv = *(av_fetch(array, (I32) i, (I32) 0));
417         if(extract_double_from_scalar(aTHX_ mysv,&num) > 0) {
418             data[i] = num;
419         } else {
420             /* Error reading data */
421             if (warnings_enabled(aTHX))
422                 Perl_warn(aTHX_
423                     "Error parsing array: item %d is not a number\n", i);
424             free(data);
425             return NULL;
426         }
427     }
428     if(np) *np = n;
429     return data;
430 }
431 
432 /* -------------------------------------------------
433  * Convert a Perl array into an array of ints
434  * On errors this function returns NULL.
435  */
436 static int*
malloc_row_perl2c_int(pTHX_ SV * input)437 malloc_row_perl2c_int (pTHX_ SV * input) {
438 
439     int i;
440 
441     AV* array = (AV *) SvRV(input);
442     const int n = (int) av_len(array) + 1;
443     int* data = malloc(n*sizeof(int));
444     if (!data) {
445         return NULL;
446     }
447 
448     /* Loop once for each item in the Perl array,
449      * and convert it to a C double.
450      */
451     for (i=0; i < n; i++) {
452         double num;
453         SV * mysv = *(av_fetch(array, (I32) i, (I32) 0));
454         if(extract_double_from_scalar(aTHX_ mysv,&num) > 0) {
455             data[i] = (int) num;
456         } else {
457             /* Check if the item is numeric */
458             if (warnings_enabled(aTHX))
459                 Perl_warn(aTHX_ "Error when parsing array: item %d is"
460                     " not a number, skipping\n", i);
461             free(data);
462             return NULL;
463         }
464     }
465 
466     return data;
467 }
468 
469 /* -------------------------------------------------
470  * Copy a Perl array into an array of ints.
471  * If an error occurs, return 0; otherwise return 1.
472  */
473 static int
copy_row_perl2c_int(pTHX_ SV * input,int * output)474 copy_row_perl2c_int (pTHX_ SV * input, int* output) {
475 
476     int i;
477 
478     AV* array = (AV *) SvRV(input);
479     const int n = (int) av_len(array) + 1;
480 
481     /* Loop once for each item in the Perl array,
482      * and convert it to a C double.
483      */
484     for (i=0; i < n; i++) {
485         double num;
486         SV * mysv = *(av_fetch(array, (I32) i, (I32) 0));
487         if(extract_double_from_scalar(aTHX_ mysv,&num) > 0) {
488             output[i] = (int) num;
489         } else {
490             /* Skip any items which are not numeric */
491             if (warnings_enabled(aTHX))
492                 Perl_warn(aTHX_
493                     "Error when parsing array: item %d is"
494                     " not a number\n", i);
495             return 0;
496         }
497     }
498     return 1;
499 }
500 /* -------------------------------------------------
501  *
502  */
503 static SV *
row_c2perl_dbl(pTHX_ double * row,int ncols)504 row_c2perl_dbl(pTHX_ double * row, int ncols) {
505 
506     int j;
507     AV * row_av = newAV();
508     for(j=0; j<ncols; ++j) {
509         av_push(row_av, newSVnv(row[j]));
510     }
511     return newRV_noinc((SV*) row_av);
512 }
513 
514 /* -------------------------------------------------
515  *
516  */
517 static SV*
row_c2perl_int(pTHX_ int * row,int ncols)518 row_c2perl_int(pTHX_ int * row, int ncols) {
519 
520     int j;
521     AV * row_av = newAV();
522     for(j=0; j<ncols; ++j) {
523         av_push(row_av, newSVnv(row[j]));
524     }
525     return ( newRV_noinc( (SV*) row_av ) );
526 }
527 
528 /* -------------------------------------------------
529  *
530  */
531 static SV*
matrix_c2perl_dbl(pTHX_ double ** matrix,int nrows,int ncols)532 matrix_c2perl_dbl(pTHX_ double ** matrix, int nrows, int ncols) {
533 
534     int i;
535     AV * matrix_av = newAV();
536     SV * row_ref;
537     for(i=0; i<nrows; ++i) {
538         row_ref = row_c2perl_dbl(aTHX_ matrix[i], ncols);
539         av_push(matrix_av, row_ref);
540     }
541     return ( newRV_noinc( (SV*) matrix_av ) );
542 }
543 
544 /* -------------------------------------------------
545  *
546  */
547 static SV*
matrix_c2perl_int(pTHX_ int ** matrix,int nrows,int ncols)548 matrix_c2perl_int(pTHX_ int ** matrix, int nrows, int ncols) {
549 
550     int i;
551     AV * matrix_av = newAV();
552     SV * row_ref;
553     for(i=0; i<nrows; ++i) {
554         row_ref = row_c2perl_int(aTHX_ matrix[i], ncols);
555         av_push(matrix_av, row_ref);
556     }
557     return ( newRV_noinc( (SV*) matrix_av ) );
558 }
559 
560 /* -------------------------------------------------
561  *
562  */
563 static SV*
ragged_matrix_c2perl_dbl(pTHX_ double ** matrix,int nobjects)564 ragged_matrix_c2perl_dbl(pTHX_ double ** matrix, int nobjects) {
565 
566     int i;
567     AV * matrix_av = newAV();
568     SV * row_ref;
569     for(i=0; i<nobjects; ++i) {
570         row_ref = row_c2perl_dbl(aTHX_ matrix[i], i);
571         av_push(matrix_av, row_ref);
572     }
573     return ( newRV_noinc( (SV*) matrix_av ) );
574 }
575 
576 /* -------------------------------------------------
577  * Check if the data matrix is a distance matrix, or
578  * a raw distance matrix.
579  */
580 static int
is_distance_matrix(pTHX_ SV * data_ref)581 is_distance_matrix(pTHX_ SV * data_ref)
582 {
583     /* We don't check data_ref because we expect the caller to check it
584      */
585     AV * matrix_av  = (AV *) SvRV(data_ref);
586     SV * row_ref    = *(av_fetch(matrix_av, (I32) 0, 0));
587     AV * row_av     = (AV *) SvRV(row_ref);
588     const int ncols = (int) av_len(row_av) + 1;
589     if (ncols==0) return 1;
590 
591     return 0;
592 }
593 
594 
595 /* -------------------------------------------------
596  * Convert the 'data' and 'mask' matrices and the 'weight' array
597  * from C to Perl.  Also check for errors, and bail out if there are any.
598  */
599 static int
malloc_matrices(pTHX_ SV * weight_ref,double ** weight,int nweights,SV * data_ref,double *** matrix,SV * mask_ref,int *** mask,int nrows,int ncols)600 malloc_matrices(pTHX_
601     SV *  weight_ref, double  ** weight, int nweights,
602     SV *  data_ref,   double *** matrix,
603     SV *  mask_ref,   int    *** mask,
604     int   nrows,      int        ncols
605 ) {
606 
607     if(SvROK(mask_ref) && SvTYPE(SvRV(mask_ref)) == SVt_PVAV) {
608         *mask = parse_mask(aTHX_ mask_ref);
609         if(*mask==NULL) return 0;
610     } else {
611         int i,j;
612         int** p = malloc(nrows*sizeof(int*));
613         if(!p) return 0;
614         for (i = 0; i < nrows; ++i) {
615             p[i] = malloc(ncols*sizeof(int));
616             if(!p[i]) {
617                 while(--i >= 0) free(p[i]);
618                 free(p);
619                 return 0;
620             }
621             for (j = 0; j < ncols; j++) p[i][j] = 1;
622         }
623         *mask = p;
624     }
625 
626     /* We don't check data_ref because we expect the caller to check it
627      */
628     *matrix = parse_data(aTHX_ data_ref, *mask);
629     if(*matrix==NULL) {
630         free_matrix_int(*mask,     nrows);
631         return 0;
632     }
633 
634     if(weight_ref==NULL) return 1; /* Weights not needed */
635     if(SvROK(weight_ref) && SvTYPE(SvRV(weight_ref)) == SVt_PVAV) {
636         *weight = malloc_row_perl2c_dbl(aTHX_ weight_ref, NULL);
637     } else {
638         *weight = malloc_row_dbl(aTHX_ nweights,1.0);
639     }
640 
641     if(!(*weight)) {
642         free_matrix_int(*mask,     nrows);
643         free_matrix_dbl(*matrix,   nrows);
644         return 0;
645     }
646 
647     return 1;
648 }
649 
650 static double**
parse_distance(pTHX_ SV * matrix_ref,int nobjects)651 parse_distance(pTHX_ SV* matrix_ref, int nobjects)
652 {
653     int i,j;
654 
655     AV* matrix_av  = (AV *) SvRV(matrix_ref);
656     double** matrix = malloc(nobjects*sizeof(double*));
657     if (!matrix) {
658         return NULL;
659     }
660 
661     matrix[0] = NULL;
662     for (i=1; i < nobjects; i++) {
663         SV* row_ref = *(av_fetch(matrix_av, (I32) i, 0));
664         AV* row_av  = (AV *) SvRV(row_ref);
665         matrix[i] = malloc(i * sizeof(double));
666         if (!matrix[i]) {
667             break;
668         }
669         /* Loop once for each cell in the row. */
670         for (j=0; j < i; j++) {
671             double num;
672             SV* cell = *(av_fetch(row_av, (I32) j, 0));
673             if(extract_double_from_scalar(aTHX_ cell,&num) > 0) {
674                 matrix[i][j] = num;
675             } else {
676                 if(warnings_enabled(aTHX))
677                     Perl_warn(aTHX_
678                         "Row %d col %d: Value is not "
679                                                 "a number.\n", i, j);
680                 break;
681             }
682         }
683     }
684 
685     if (i < nobjects) {
686         nobjects = i+1;
687         for (i = 1; i < nobjects; i++) free(matrix[i]);
688         free(matrix);
689         matrix = NULL;
690     }
691 
692     return matrix;
693 }
694 
695 /******************************************************************************/
696 /**                                                                          **/
697 /** XS code begins here                                                      **/
698 /**                                                                          **/
699 /******************************************************************************/
700 /******************************************************************************/
701 
702 MODULE = Algorithm::Cluster PACKAGE = Algorithm::Cluster::Node
703 PROTOTYPES: ENABLE
704 
705 SV*
706 new (class, left, right, distance)
707     char* class
708     int left
709     int right
710     double distance
711     PREINIT:
712     Node* node;
713     SV* obj;
714     CODE:
715     node = malloc(sizeof(Node));
716     RETVAL = newSViv(0);
717     obj = newSVrv(RETVAL, class);
718     node->left = left;
719     node->right = right;
720     node->distance = distance;
721 
722     sv_setiv(obj, PTR2IV(node));
723     SvREADONLY_on(obj);
724     OUTPUT:
725     RETVAL
726 
727 
728 int
729 left (obj)
730     SV* obj
731     CODE:
732     RETVAL = (INT2PTR(Node*,SvIV(SvRV(obj))))->left;
733     OUTPUT:
734     RETVAL
735 
736 int
737 right (obj)
738     SV* obj
739     CODE:
740     RETVAL = (INT2PTR(Node*,SvIV(SvRV(obj))))->right;
741     OUTPUT:
742     RETVAL
743 
744 double
745 distance (obj)
746     SV* obj
747     CODE:
748     RETVAL = (INT2PTR(Node*,SvIV(SvRV(obj))))->distance;
749     OUTPUT:
750     RETVAL
751 
752 void
set_left(obj,left)753 set_left (obj, left)
754     SV* obj
755     int left
756     PREINIT:
757     Node* node;
758     CODE:
759     if (!sv_isa(obj, "Algorithm::Cluster::Node")) {
760         croak("set_left should be applied to an Algorithm::Cluster::Node object");
761     }
762     node = INT2PTR(Node*,SvIV(SvRV(obj)));
763     node->left = left;
764 
765 void
set_right(obj,right)766 set_right (obj, right)
767     SV* obj
768     int right
769     PREINIT:
770     Node* node;
771     CODE:
772     if (!sv_isa(obj, "Algorithm::Cluster::Node")) {
773         croak("set_right should be applied to an Algorithm::Cluster::Node object");
774     }
775     node = INT2PTR(Node*,SvIV(SvRV(obj)));
776     node->right = right;
777 
778 void
set_distance(obj,distance)779 set_distance (obj, distance)
780     SV* obj
781     double distance
782     PREINIT:
783     Node* node;
784     CODE:
785     if (!sv_isa(obj, "Algorithm::Cluster::Node")) {
786         croak("set_distance should be applied to an Algorithm::Cluster::Node object");
787     }
788     node = INT2PTR(Node*,SvIV(SvRV(obj)));
789     node->distance = distance;
790 
791 void DESTROY (obj)
792     SV* obj
793     PREINIT:
794     I32* temp;
795     Node* node;
796     PPCODE:
797     temp = PL_markstack_ptr++;
798     node = INT2PTR(Node*, SvIV(SvRV(obj)));
799     free(node);
800     if (PL_markstack_ptr != temp) {
801         /* truly void, because dXSARGS not invoked */
802         PL_markstack_ptr = temp;
803         XSRETURN_EMPTY;
804         /* return empty stack */
805     }  /* must have used dXSARGS; list context implied */
806     return;  /* assume stack size is correct */
807 
808 
809 MODULE = Algorithm::Cluster PACKAGE = Algorithm::Cluster::Tree
810 PROTOTYPES: ENABLE
811 
812 SV*
813 new (class, nodes)
814     char* class
815     SV* nodes
816 
817     PREINIT:
818     Tree* tree;
819     SV* obj;
820     int i;
821     int n;
822     AV* array;
823     int* flag;
824 
825     CODE:
826     if(!SvROK(nodes) || SvTYPE(SvRV(nodes)) != SVt_PVAV) {
827         croak("Algorithm::Cluster::Tree::new expects an array of nodes\n");
828     }
829     array = (AV *) SvRV(nodes);
830     n = (int) av_len(array) + 1;
831     tree = malloc(sizeof(Tree));
832     if (tree) {
833         tree->n = n;
834         tree->nodes = malloc(n*sizeof(Node));
835     }
836     if (! tree || !tree->nodes) {
837         if (tree) free(tree);
838         croak("Algorithm::Cluster::Tree::new memory error\n");
839     }
840 
841     for (i = 0; i < n; i++) {
842         Node* node;
843         SV* node_ref = *(av_fetch(array, (I32) i, 0));
844         if (!sv_isa(node_ref, "Algorithm::Cluster::Node")) break;
845         node = INT2PTR(Node*,SvIV(SvRV(node_ref)));
846         tree->nodes[i].left = node->left;
847         tree->nodes[i].right = node->right;
848         tree->nodes[i].distance = node->distance;
849     }
850 
851     if (i < n) {
852         /* break encountered */
853         free(tree->nodes);
854         free(tree);
855         croak("Algorithm::Cluster::Tree::new expects an array of nodes\n");
856     }
857 
858     flag = malloc((2*n+1)*sizeof(int));
859     if(flag) {
860          int j;
861         for (i = 0; i < 2*n+1; i++) flag[i] = 0;
862         for (i = 0; i < n; i++) {
863             j = tree->nodes[i].left;
864             if (j < 0) {
865                 j = -j-1;
866                 if (j>=i) break;
867             }
868             else j+=n;
869             if (flag[j]) break;
870             flag[j] = 1;
871             j = tree->nodes[i].right;
872             if (j < 0) {
873                 j = -j-1;
874                 if (j>=i) break;
875             }
876             else j+=n;
877             if (flag[j]) break;
878             flag[j] = 1;
879         }
880         free(flag);
881     }
882 
883     if (!flag || i < n) {
884         /* break encountered */
885         free(tree->nodes);
886         free(tree);
887         croak("the array of nodes passed to Algorithm::Cluster::Tree::new does not represent a valid tree\n");
888     }
889 
890     RETVAL = newSViv(0);
891     obj = newSVrv(RETVAL, class);
892     sv_setiv(obj, PTR2IV(tree));
893     SvREADONLY_on(obj);
894 
895     OUTPUT:
896     RETVAL
897 
898 int
899 length (obj)
900     SV* obj
901     CODE:
902     RETVAL = (INT2PTR(Tree*,SvIV(SvRV(obj))))->n;
903     OUTPUT:
904     RETVAL
905 
906 
907 SV *
908 get (obj, index)
909     SV* obj
910     int index
911     PREINIT:
912     Tree* tree;
913     Node* node;
914     SV* scalar;
915     CODE:
916     tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
917     if (index < 0 || index >= tree->n) {
918         croak("Index out of bounds in Algorithm::Cluster::Tree::get\n");
919     }
920     RETVAL = newSViv(0);
921     scalar = newSVrv(RETVAL, "Algorithm::Cluster::Node");
922     node = malloc(sizeof(Node));
923     if (!node) {
924         croak("Memory allocation failure in Algorithm::Cluster::Tree::get\n");
925     }
926     node->left = tree->nodes[index].left;
927     node->right = tree->nodes[index].right;
928     node->distance = tree->nodes[index].distance;
929     sv_setiv(scalar, PTR2IV(node));
930     SvREADONLY_on(scalar);
931     OUTPUT:
932     RETVAL
933 
934 void
935 scale(obj)
936     SV* obj
937     PREINIT:
938     int i;
939     int n;
940     Tree* tree;
941     Node* nodes;
942     double maximum;
943     CODE:
944     if (!sv_isa(obj, "Algorithm::Cluster::Tree")) {
945         croak("scale can only be applied to an Algorithm::Cluster::Tree object");
946     }
947     tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
948     n = tree->n;
949     nodes = tree->nodes;
950     maximum = DBL_MIN;
951     for (i = 0; i < n; i++) {
952         double distance = nodes[i].distance;
953         if (distance > maximum) maximum = distance;
954     }
955     if (maximum!=0.0) {
956         for (i = 0; i < n; i++) nodes[i].distance /= maximum;
957     }
958 
959 
960 void
961 sort(obj, order = NULL)
962     SV* obj
963     SV* order
964 
965     PREINIT:
966     int i;
967     int n;
968     Tree* tree;
969     int* indices;
970     double* values = NULL;
971     int ok;
972 
973     PPCODE:
974     if (!sv_isa(obj, "Algorithm::Cluster::Tree")) {
975         croak("sort can only be applied to an Algorithm::Cluster::Tree object");
976     }
977     tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
978     if (order) {
979         if(!SvROK(order) || SvTYPE(SvRV(order)) != SVt_PVAV) {
980             croak("Algorithm::Cluster::Tree::sort expects an order array\n");
981         }
982         values = malloc_row_perl2c_dbl(aTHX_ order, &n);
983         if (!values) {
984             croak("Algorithm::Cluster::Tree::sort memory error\n");
985         }
986         if (n != tree->n + 1) {
987             free(values);
988             croak("sort: size of order array is inconsistent with tree size\n");
989         }
990     }
991     else {
992         n = tree->n + 1;
993     }
994     indices = malloc(n*sizeof(int));
995     if (!indices) {
996         if(values) free(values);
997         croak("sort: insufficient memory");
998     }
999     /* --------------------------------------------------------------- */
1000     ok = sorttree(tree->n, tree->nodes, values, indices);
1001     if(values) free(values);
1002     /* -- Check for errors flagged by the C routine ------------------ */
1003     if (!ok) {
1004         free(indices);
1005         croak("sort: Error in the sorttree routine");
1006     }
1007     for(i=0; i<n; i++) XPUSHs(sv_2mortal(newSVnv(indices[i])));
1008     free(indices);
1009 
1010 void
1011 cut(obj, nclusters=0)
1012     SV* obj
1013     int nclusters
1014     PREINIT:
1015     int ok;
1016     int i;
1017     int n;
1018     Tree* tree;
1019     int* clusterid;
1020     PPCODE:
1021     if (!sv_isa(obj, "Algorithm::Cluster::Tree")) {
1022         croak("cut can only be applied to an Algorithm::Cluster::Tree object\n");
1023     }
1024     tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
1025     n = tree->n + 1;
1026     if (nclusters < 0) {
1027         croak("cut: Requested number of clusters should be positive\n");
1028     }
1029     if (nclusters > n) {
1030         croak("cut: More clusters requested than items available\n");
1031     }
1032     if (nclusters == 0) {
1033         nclusters = n;
1034     }
1035     clusterid = malloc(n*sizeof(int));
1036     if (!clusterid) {
1037         croak("cut: Insufficient memory\n");
1038     }
1039     ok = cuttree(n, tree->nodes, nclusters, clusterid);
1040     if (!ok) {
1041         free(clusterid);
1042         croak("cut: Error in the cuttree routine\n");
1043     }
1044     for(i=0; i<n; i++) XPUSHs(sv_2mortal(newSVnv(clusterid[i])));
1045     free(clusterid);
1046 
1047 
1048 void DESTROY (obj)
1049     SV* obj
1050     PREINIT:
1051     I32* temp;
1052     Tree* tree;
1053     PPCODE:
1054     temp = PL_markstack_ptr++;
1055     tree = INT2PTR(Tree*, SvIV(SvRV(obj)));
1056     free(tree->nodes);
1057     free(tree);
1058     if (PL_markstack_ptr != temp) {
1059         /* truly void, because dXSARGS not invoked */
1060         PL_markstack_ptr = temp;
1061         XSRETURN_EMPTY;
1062         /* return empty stack */
1063     }  /* must have used dXSARGS; list context implied */
1064     return;  /* assume stack size is correct */
1065 
1066 
1067 MODULE = Algorithm::Cluster    PACKAGE = Algorithm::Cluster
1068 PROTOTYPES: ENABLE
1069 
1070 
1071 SV *
1072 _version()
1073     CODE:
1074     RETVAL = newSVpv( CLUSTERVERSION , 0);
1075 
1076     OUTPUT:
1077     RETVAL
1078 
1079 
1080 
1081 SV *
1082 _mean(input)
1083     SV * input;
1084 
1085     PREINIT:
1086     int array_length;
1087     double * data;  /* one-dimensional array of doubles */
1088 
1089     CODE:
1090     if(SvTYPE(SvRV(input)) != SVt_PVAV) {
1091         XSRETURN_UNDEF;
1092     }
1093 
1094     data = malloc_row_perl2c_dbl (aTHX_ input, &array_length);
1095     if (data) {
1096         RETVAL = newSVnv( mean(array_length, data) );
1097         free(data);
1098     } else {
1099         croak("memory allocation failure in _mean\n");
1100     }
1101 
1102     OUTPUT:
1103     RETVAL
1104 
1105 
1106 SV *
1107 _median(input)
1108     SV * input;
1109 
1110     PREINIT:
1111     int array_length;
1112     double * data;  /* one-dimensional array of doubles */
1113 
1114     CODE:
1115     if(SvTYPE(SvRV(input)) != SVt_PVAV) {
1116         XSRETURN_UNDEF;
1117     }
1118 
1119     data = malloc_row_perl2c_dbl (aTHX_ input, &array_length);
1120     if (data) {
1121         RETVAL = newSVnv( median(array_length, data) );
1122         free(data);
1123     } else {
1124         croak("memory allocation failure in _median\n");
1125     }
1126 
1127     OUTPUT:
1128     RETVAL
1129 
1130 
1131 SV *
1132 _treecluster(nrows,ncols,data_ref,mask_ref,weight_ref,transpose,dist,method)
1133     int      nrows;
1134     int      ncols;
1135     SV *     data_ref;
1136     SV *     mask_ref;
1137     SV *     weight_ref;
1138     int      transpose;
1139     char *   dist;
1140     char *   method;
1141 
1142     PREINIT:
1143     Node*    nodes;
1144 
1145     double  * weight = NULL;
1146     double ** matrix = NULL;
1147     int    ** mask   = NULL;
1148     double ** distancematrix = NULL;
1149     const int ndata = transpose ? nrows : ncols;
1150     const int nelements = transpose ? ncols : nrows;
1151 
1152     CODE:
1153     /* ------------------------
1154      * Don't check the parameters, because we rely on the Perl
1155      * caller to check most paramters.
1156      */
1157     /* ------------------------
1158      * Convert data and mask matrices and the weight array
1159      * from C to Perl.  Also check for errors, and ignore the
1160      * mask or the weight array if there are any errors.
1161      */
1162     if (is_distance_matrix(aTHX_ data_ref)) {
1163         distancematrix = parse_distance(aTHX_ data_ref, nelements);
1164         if (!distancematrix) {
1165                 croak("memory allocation failure in _treecluster\n");
1166         }
1167     } else {
1168         int ok;
1169         ok = malloc_matrices(aTHX_ weight_ref, &weight, ndata,
1170                     data_ref,   &matrix,
1171                     mask_ref,   &mask,
1172                     nrows,      ncols);
1173         if (!ok) {
1174             croak("failed to read input data for _treecluster\n");
1175         }
1176     }
1177 
1178     /* ------------------------
1179      * Run the library function
1180      */
1181     nodes = treecluster(nrows, ncols, matrix, mask, weight, transpose,
1182                 dist[0], method[0], distancematrix);
1183 
1184     /* ------------------------
1185      * Check result to make sure we didn't run into memory problems
1186      */
1187     if(!nodes) {
1188         /* treecluster failed due to insufficient memory */
1189         if (matrix) {
1190             free_matrix_int(mask,     nrows);
1191             free_matrix_dbl(matrix,   nrows);
1192             free(weight);
1193         } else {
1194             free_ragged_matrix_dbl(distancematrix, nelements);
1195         }
1196         croak("memory allocation failure in treecluster\n");
1197     }
1198     else {
1199 
1200         /* ------------------------
1201           * Convert generated C matrices to Perl matrices
1202           */
1203         const int n = nelements-1;
1204         int i;
1205         SV* obj;
1206         Tree* tree;
1207         RETVAL = newSViv(0);
1208         obj = newSVrv(RETVAL, "Algorithm::Cluster::Tree");
1209         tree = malloc(sizeof(Tree));
1210         if (!tree)
1211             croak("Memory allocation failure in Algorithm::Cluster::Tree\n");
1212         tree->n = n;
1213         tree->nodes = malloc(n*sizeof(Node));
1214         if (!tree->nodes) {
1215             free(tree);
1216             croak("Memory allocation failure in Algorithm::Cluster::Tree\n");
1217         }
1218         sv_setiv(obj, PTR2IV(tree));
1219         SvREADONLY_on(obj);
1220         for(i=0; i<n; i++) {
1221             tree->nodes[i].left = nodes[i].left;
1222             tree->nodes[i].right = nodes[i].right;
1223             tree->nodes[i].distance = nodes[i].distance;
1224         }
1225         free(nodes);
1226     }
1227 
1228     /* ------------------------
1229      * Free what we've malloc'ed
1230      */
1231     if (matrix) {
1232         free_matrix_int(mask,     nrows);
1233         free_matrix_dbl(matrix,   nrows);
1234         free(weight);
1235     } else {
1236         free_ragged_matrix_dbl(distancematrix, nelements);
1237     }
1238 
1239     /* Finished _treecluster() */
1240     OUTPUT:
1241     RETVAL
1242 
1243 
1244 void
1245 _kcluster(nclusters,nrows,ncols,data_ref,mask_ref,weight_ref,transpose,npass,method,dist,initialid_ref)
1246     int      nclusters;
1247     int      nrows;
1248     int      ncols;
1249     SV *     data_ref;
1250     SV *     mask_ref;
1251     SV *     weight_ref;
1252     int      transpose;
1253     int      npass;
1254     char *   method;
1255     char *   dist;
1256     SV *     initialid_ref;
1257 
1258     PREINIT:
1259     SV  *    clusterid_ref;
1260     int *    clusterid;
1261     int      nobjects;
1262     int      ndata;
1263     double   error;
1264     int      ifound;
1265     int      ok;
1266 
1267     double  * weight;
1268     double ** matrix;
1269     int    ** mask;
1270 
1271 
1272     PPCODE:
1273     /* ------------------------
1274      * Don't check the parameters, because we rely on the Perl
1275      * caller to check most parameters.
1276      */
1277 
1278     /* ------------------------
1279      * Malloc space for the return values from the library function
1280      */
1281     if (transpose==0) {
1282         nobjects = nrows;
1283         ndata = ncols;
1284     } else {
1285         nobjects = ncols;
1286         ndata = nrows;
1287     }
1288     clusterid = malloc(nobjects * sizeof(int) );
1289     if (!clusterid) {
1290         croak("memory allocation failure in _kcluster\n");
1291     }
1292 
1293     /* ------------------------
1294      * Convert data and mask matrices and the weight array
1295      * from C to Perl.  Also check for errors, and ignore the
1296      * mask or the weight array if there are any errors.
1297      */
1298     ok = malloc_matrices( aTHX_ weight_ref, &weight, ndata,
1299                 data_ref,   &matrix,
1300                 mask_ref,   &mask,
1301                 nrows,      ncols);
1302     if (!ok) {
1303         free(clusterid);
1304         croak("failed to read input data for _kcluster\n");
1305     }
1306 
1307     /* ------------------------
1308      * Copy initialid to clusterid, if needed
1309      */
1310 
1311     if (npass==0) {
1312         copy_row_perl2c_int(aTHX_ initialid_ref, clusterid);
1313     }
1314 
1315     /* ------------------------
1316      * Run the library function
1317      */
1318     kcluster(
1319         nclusters, nrows, ncols,
1320         matrix, mask, weight, transpose,
1321         npass, method[0], dist[0], clusterid,  &error, &ifound
1322 
1323     );
1324 
1325     /* ------------------------
1326      * Convert generated C matrices to Perl matrices
1327      */
1328     clusterid_ref =    row_c2perl_int(aTHX_ clusterid, nobjects);
1329 
1330     /* ------------------------
1331      * Push the new Perl matrices onto the return stack
1332      */
1333     XPUSHs(sv_2mortal( clusterid_ref   ));
1334     XPUSHs(sv_2mortal( newSVnv(error) ));
1335     XPUSHs(sv_2mortal( newSViv(ifound) ));
1336 
1337     /* ------------------------
1338      * Free what we've malloc'ed
1339      */
1340     free(clusterid);
1341     free_matrix_int(mask,     nrows);
1342     free_matrix_dbl(matrix,   nrows);
1343     free(weight);
1344 
1345     /* Finished _kcluster() */
1346 
1347 
1348 
1349 void
1350 _kmedoids(nclusters,nobjects,distancematrix_ref,npass,initialid_ref)
1351     int      nclusters;
1352     int      nobjects;
1353     SV *     distancematrix_ref;
1354     int      npass;
1355     SV *     initialid_ref;
1356 
1357 
1358     PREINIT:
1359     double** distancematrix;
1360     SV  *    clusterid_ref;
1361     int *    clusterid;
1362     double   error;
1363     int      ifound;
1364 
1365 
1366 
1367     PPCODE:
1368     /* ------------------------
1369      * Don't check the parameters, because we rely on the Perl
1370      * caller to check most parameters.
1371      */
1372 
1373     /* ------------------------
1374      * Malloc space for the return values from the library function
1375      */
1376     clusterid = malloc(nobjects * sizeof(int));
1377     if (!clusterid) {
1378         croak("memory allocation failure in _kmedoids\n");
1379     }
1380 
1381     /* ------------------------
1382      * Convert data and mask matrices and the weight array
1383      * from C to Perl.  Also check for errors, and ignore the
1384      * mask or the weight array if there are any errors.
1385      */
1386     distancematrix = parse_distance(aTHX_ distancematrix_ref, nobjects);
1387     if (!distancematrix) {
1388         free(clusterid);
1389             croak("failed to allocate memory for distance matrix in _kmedoids\n");
1390     }
1391 
1392     /* ------------------------
1393      * Copy initialid to clusterid, if needed
1394      */
1395 
1396     if (npass==0) {
1397         copy_row_perl2c_int(aTHX_ initialid_ref, clusterid);
1398     }
1399 
1400     /* ------------------------
1401      * Run the library function
1402      */
1403     kmedoids(
1404         nclusters, nobjects,
1405         distancematrix, npass, clusterid,
1406         &error, &ifound
1407     );
1408 
1409     if(ifound==-1) {
1410         free(clusterid);
1411         free_ragged_matrix_dbl(distancematrix, nobjects);
1412             croak("memory allocation failure in _kmedoids\n");
1413     }
1414     else if(ifound==0) {
1415         free(clusterid);
1416         free_ragged_matrix_dbl(distancematrix, nobjects);
1417             croak("error in input arguments in kmedoids\n");
1418     }
1419     else {
1420 
1421         /* ------------------------
1422          * Convert generated C matrices to Perl matrices
1423          */
1424         clusterid_ref =    row_c2perl_int(aTHX_ clusterid, nobjects);
1425 
1426         /* ------------------------
1427          * Push the new Perl matrices onto the return stack
1428          */
1429         XPUSHs(sv_2mortal( clusterid_ref   ));
1430         XPUSHs(sv_2mortal( newSVnv(error) ));
1431         XPUSHs(sv_2mortal( newSViv(ifound) ));
1432 
1433     }
1434     /* ------------------------
1435      * Free what we've malloc'ed
1436      */
1437     free(clusterid);
1438     free_ragged_matrix_dbl(distancematrix, nobjects);
1439 
1440     /* Finished _kmedoids() */
1441 
1442 
1443 
1444 double
1445 _clusterdistance(nrows,ncols,data_ref,mask_ref,weight_ref,cluster1_len,cluster2_len,cluster1_ref,cluster2_ref,dist,method,transpose)
1446     int      nrows;
1447     int      ncols;
1448     SV *     data_ref;
1449     SV *     mask_ref;
1450     SV *     weight_ref;
1451     int      cluster1_len;
1452     int      cluster2_len;
1453     SV *     cluster1_ref;
1454     SV *     cluster2_ref;
1455     char *   dist;
1456     char *   method;
1457     int      transpose;
1458 
1459     PREINIT:
1460     int   nweights;
1461 
1462     int     * cluster1;
1463     int     * cluster2;
1464 
1465     double  * weight;
1466     double ** matrix;
1467     int    ** mask;
1468 
1469     double distance;
1470 
1471     int ok;
1472 
1473     CODE:
1474 
1475     /* ------------------------
1476      * Don't check the parameters, because we rely on the Perl
1477      * caller to check most paramters.
1478      */
1479 
1480     /* ------------------------
1481      * Convert cluster index Perl arrays to C arrays
1482      */
1483     cluster1 = malloc_row_perl2c_int(aTHX_ cluster1_ref);
1484     cluster2 = malloc_row_perl2c_int(aTHX_ cluster2_ref);
1485     if (!cluster1 || !cluster2) {
1486         if (cluster1) free(cluster1);
1487         if (cluster2) free(cluster2);
1488         croak("memory allocation failure in _clusterdistance\n");
1489     }
1490 
1491     /* ------------------------
1492      * Convert data and mask matrices and the weight array
1493      * from C to Perl.  Also check for errors, and ignore the
1494      * mask or the weight array if there are any errors.
1495      * Set nweights to the correct number of weights.
1496      */
1497     nweights = (transpose==0) ? ncols : nrows;
1498     ok = malloc_matrices( aTHX_ weight_ref, &weight, nweights,
1499                 data_ref,   &matrix,
1500                 mask_ref,   &mask,
1501                 nrows,      ncols);
1502     if (!ok) {
1503         free(cluster1);
1504         free(cluster2);
1505             croak("failed to read input data for _clusterdistance\n");
1506     }
1507 
1508     /* ------------------------
1509      * Run the library function
1510      */
1511     distance = clusterdistance(
1512         nrows, ncols,
1513         matrix, mask, weight,
1514         cluster1_len, cluster2_len, cluster1, cluster2,
1515         dist[0], method[0], transpose
1516     );
1517 
1518     RETVAL = distance;
1519 
1520     /* ------------------------
1521      * Free what we've malloc'ed
1522      */
1523     free_matrix_int(mask,     nrows);
1524     free_matrix_dbl(matrix,   nrows);
1525     free(weight);
1526     free(cluster1);
1527     free(cluster2);
1528 
1529     /* Finished _clusterdistance() */
1530 
1531     OUTPUT:
1532     RETVAL
1533 
1534 
1535 
1536 void
1537 _clustercentroids(nclusters,nrows,ncols,data_ref,mask_ref,clusterid_ref,transpose,method)
1538     int      nclusters;
1539     int      nrows;
1540     int      ncols;
1541     SV *     data_ref;
1542     SV *     mask_ref;
1543     SV *     clusterid_ref;
1544     int      transpose;
1545     char *   method;
1546 
1547     PREINIT:
1548     SV  *    cdata_ref;
1549     SV  *    cmask_ref;
1550     int     * clusterid;
1551 
1552     double ** matrix;
1553     int    ** mask;
1554 
1555     double ** cdata;
1556     int    ** cmask;
1557     int       cnrows = 0; /* Initialize to make the compiler shut up */
1558     int       cncols = 0; /* Initialize to make the compiler shut up */
1559 
1560     int i;
1561     int ok;
1562 
1563     PPCODE:
1564 
1565     /* ------------------------
1566      * Don't check the parameters, because we rely on the Perl
1567      * caller to check most paramters.
1568      */
1569         if (transpose==0)
1570         {    cnrows = nclusters;
1571              cncols = ncols;
1572         }
1573         else if (transpose==1)
1574         {    cnrows = nrows;
1575              cncols = nclusters;
1576         }
1577 
1578     /* ------------------------
1579      * Convert cluster index Perl arrays to C arrays
1580      */
1581     clusterid = malloc_row_perl2c_int(aTHX_ clusterid_ref);
1582     if (!clusterid) {
1583         croak("memory allocation failure in _clustercentroids\n");
1584     }
1585 
1586     /* ------------------------
1587      * Convert data and mask matrices and the weight array
1588      * from C to Perl.  Also check for errors, and ignore the
1589      * mask or the weight array if there are any errors.
1590      * Set nweights to the correct number of weights.
1591      */
1592     ok = malloc_matrices( aTHX_ NULL, NULL, 0,
1593                 data_ref,   &matrix,
1594                 mask_ref,   &mask,
1595                 nrows,      ncols);
1596     if (!ok) {
1597         free(clusterid);
1598             croak("failed to read input data for _clustercentroids\n");
1599     }
1600 
1601 
1602     /* ------------------------
1603      * Create the output variables cdata and cmask.
1604      */
1605     i = 0;
1606     cdata = malloc(cnrows * sizeof(double*));
1607     cmask = malloc(cnrows * sizeof(int*));
1608     if (cdata && cmask) {
1609         for ( ; i < cnrows; i++) {
1610             cdata[i] = malloc(cncols*sizeof(double));
1611             cmask[i] = malloc(cncols*sizeof(int));
1612             if (!cdata[i] || !cmask[i]) break;
1613         }
1614     }
1615     if (i < cnrows)
1616     {
1617         if (cdata[i]) free(cdata[i]);
1618         if (cmask[i]) free(cmask[i]);
1619         while (--i >= 0) {
1620             free(cdata[i]);
1621             free(cmask[i]);
1622         }
1623         if (cdata) free(cdata);
1624         if (cmask) free(cmask);
1625         free(clusterid);
1626         free_matrix_int(mask,     nrows);
1627         free_matrix_dbl(matrix,   nrows);
1628         croak("memory allocation failure in _clustercentroids\n");
1629     }
1630 
1631     /* ------------------------
1632      * Run the library function
1633      */
1634     ok = getclustercentroids(
1635                nclusters, nrows, ncols,
1636                matrix, mask, clusterid,
1637                cdata, cmask, transpose, method[0]);
1638 
1639     if (ok) {
1640             /* ------------------------
1641              * Convert generated C matrices to Perl matrices
1642              */
1643             cdata_ref = matrix_c2perl_dbl(aTHX_ cdata, cnrows, cncols);
1644             cmask_ref = matrix_c2perl_int(aTHX_ cmask, cnrows, cncols);
1645 
1646             /* ------------------------
1647              * Push the new Perl matrices onto the return stack
1648              */
1649             XPUSHs(sv_2mortal( cdata_ref   ));
1650             XPUSHs(sv_2mortal( cmask_ref   ));
1651     }
1652 
1653     /* ------------------------
1654      * Free what we've malloc'ed
1655      */
1656     free_matrix_int(mask,     nrows);
1657     free_matrix_dbl(matrix,   nrows);
1658     free_matrix_int(cmask,    cnrows);
1659     free_matrix_dbl(cdata,    cnrows);
1660     free(clusterid);
1661 
1662     if (!ok) {
1663         croak("memory allocation failure in _clustercentroids\n");
1664     }
1665     /* Finished _clustercentroids() */
1666 
1667 void
1668 _distancematrix(nrows,ncols,data_ref,mask_ref,weight_ref,transpose,dist)
1669     int      nrows;
1670     int      ncols;
1671     SV *     data_ref;
1672     SV *     mask_ref;
1673     SV *     weight_ref;
1674     int      transpose;
1675     char *   dist;
1676 
1677     PREINIT:
1678     SV  *    matrix_ref;
1679     int      nobjects;
1680     int      ndata;
1681 
1682     double ** data;
1683     int    ** mask;
1684     double  * weight;
1685     double ** matrix;
1686 
1687     int       i;
1688     int       ok;
1689 
1690 
1691     PPCODE:
1692     /* ------------------------
1693      * Don't check the parameters, because we rely on the Perl
1694      * caller to check most parameters.
1695      */
1696 
1697     /* ------------------------
1698      * Malloc space for the return values from the library function
1699      */
1700         if (transpose==0) {
1701         nobjects = nrows;
1702         ndata = ncols;
1703     } else {
1704         nobjects = ncols;
1705         ndata = nrows;
1706     }
1707 
1708     /* ------------------------
1709      * Convert data and mask matrices and the weight array
1710      * from C to Perl.  Also check for errors, and ignore the
1711      * mask or the weight array if there are any errors.
1712      */
1713     ok = malloc_matrices( aTHX_
1714         weight_ref, &weight, ndata,
1715         data_ref,   &data,
1716         mask_ref,   &mask,
1717         nrows,      ncols
1718     );
1719     if (!ok) {
1720         croak("failed to read input data for _distancematrix");
1721     }
1722 
1723     /* Set up the ragged array */
1724     matrix = malloc(nobjects*sizeof(double*));
1725     if (matrix) {
1726         matrix[0] = NULL;
1727         for (i = 1; i < nobjects; i++) {
1728             matrix[i] = malloc(i*sizeof(double));
1729             if (matrix[i]==NULL) {
1730                 while (--i >= 0) free(matrix[i]);
1731                 free(matrix);
1732                 matrix = NULL;
1733                 break;
1734             }
1735         }
1736     }
1737     if (!matrix) {
1738         free_matrix_int(mask, nrows);
1739         free_matrix_dbl(data, nrows);
1740         free(weight);
1741         croak("failed to allocate memory for distance matrix");
1742     }
1743 
1744 
1745 
1746     /* ------------------------
1747      * Run the library function
1748      */
1749     distancematrix(nrows,
1750                    ncols,
1751                    data,
1752                    mask,
1753                    weight,
1754                    dist[0],
1755                    transpose,
1756                    matrix);
1757 
1758     /* ------------------------
1759      * Convert generated C matrices to Perl matrices
1760      */
1761     matrix_ref  = ragged_matrix_c2perl_dbl(aTHX_ matrix,  nobjects);
1762 
1763     /* ------------------------
1764      * Push the new Perl matrices onto the return stack
1765      */
1766     XPUSHs(sv_2mortal(matrix_ref));
1767 
1768     /* ------------------------
1769      * Free what we've malloc'ed
1770      */
1771     free_ragged_matrix_dbl(matrix, nobjects);
1772     free_matrix_int(mask, nrows);
1773     free_matrix_dbl(data, nrows);
1774     free(weight);
1775 
1776     /* Finished _distancematrix() */
1777 
1778 
1779 void
1780 _somcluster(nrows,ncols,data_ref,mask_ref,weight_ref,transpose,nxgrid,nygrid,inittau,niter,dist)
1781     int      nrows;
1782     int      ncols;
1783     SV *     data_ref;
1784     SV *     mask_ref;
1785     SV *     weight_ref;
1786     int      transpose;
1787     int      nxgrid;
1788     int      nygrid;
1789     double   inittau;
1790     int      niter;
1791     char *   dist;
1792 
1793     PREINIT:
1794     int      (*clusterid)[2];
1795     SV *  clusterid_ref;
1796     double*** celldata;
1797 
1798     double  * weight;
1799     double ** matrix;
1800     int    ** mask;
1801 
1802     int ok;
1803 
1804     int i;
1805     AV * matrix_av;
1806     const int ndata = transpose ? nrows : ncols;
1807     const int nelements = transpose ? ncols : nrows;
1808 
1809     PPCODE:
1810     /* ------------------------
1811      * Don't check the parameters, because we rely on the Perl
1812      * caller to check most paramters.
1813      */
1814 
1815     /* ------------------------
1816      * Allocate space for clusterid[][2].
1817      */
1818     clusterid = malloc(nelements*sizeof(int[2]));
1819     if (!clusterid) {
1820         croak("memory allocation failure in _somcluster\n");
1821     }
1822     celldata  =  0;
1823     /* Don't return celldata, for now at least */
1824 
1825 
1826     /* ------------------------
1827      * Convert data and mask matrices and the weight array
1828      * from C to Perl.  Also check for errors, and ignore the
1829      * mask or the weight array if there are any errors.
1830      * Set nweights to the correct number of weights.
1831      */
1832     ok = malloc_matrices( aTHX_ weight_ref, &weight, ndata,
1833                 data_ref,   &matrix,
1834                 mask_ref,   &mask,
1835                 nrows,      ncols);
1836     if (!ok) {
1837             croak("failed to read input data for _somcluster\n");
1838     }
1839 
1840     /* ------------------------
1841      * Run the library function
1842      */
1843     somcluster(
1844         nrows, ncols,
1845         matrix, mask, weight,
1846         transpose, nxgrid, nygrid, inittau, niter,
1847         dist[0], celldata, clusterid
1848     );
1849 
1850     /* ------------------------
1851      * Convert generated C matrices to Perl matrices
1852      */
1853     matrix_av = newAV();
1854     for(i=0; i<nelements; ++i) {
1855         SV* row_ref;
1856         AV* row_av = newAV();
1857         av_push(row_av, newSViv(clusterid[i][0]));
1858         av_push(row_av, newSViv(clusterid[i][1]));
1859         row_ref = newRV((SV*)row_av);
1860         av_push(matrix_av, row_ref);
1861     }
1862     clusterid_ref = newRV_noinc((SV*)matrix_av);
1863 
1864     /* ------------------------
1865      * Push the new Perl matrices onto the return stack
1866      */
1867     XPUSHs(sv_2mortal( clusterid_ref   ));
1868 
1869     /* ------------------------
1870      * Free what we've malloc'ed
1871      */
1872     free_matrix_int(mask,     nrows);
1873     free_matrix_dbl(matrix,   nrows);
1874     free(weight);
1875     free(clusterid);
1876 
1877     /* Finished _somcluster() */
1878 
1879 
1880 void
1881 _pca(nrows, ncols, data_ref)
1882     int      nrows;
1883     int      ncols;
1884     SV * data_ref;
1885 
1886     PREINIT:
1887     double** u;
1888     double** v;
1889     double* w;
1890     double* m;
1891     int i;
1892     int j;
1893     int nmin;
1894     int error;
1895     SV * mean_ref;
1896     SV * coordinates_ref;
1897     SV * pc_ref;
1898     SV * eigenvalues_ref;
1899 
1900     PPCODE:
1901     if(SvTYPE(SvRV(data_ref)) != SVt_PVAV) {
1902         croak("argument to _pca is not an array reference\n");
1903     }
1904     nmin = nrows < ncols ? nrows : ncols;
1905     /* -- Create the output variables -------------------------------------- */
1906     u = parse_data(aTHX_ data_ref, NULL);
1907     w = malloc(nmin*sizeof(double));
1908     v = malloc(nmin*sizeof(double*));
1909     m = malloc(ncols*sizeof(double));
1910     if (v) {
1911         for (i = 0; i < nmin; i++) {
1912             v[i] = malloc(nmin*sizeof(double));
1913             if (!v[i]) break;
1914         }
1915         if (i < nmin) { /* then we encountered the break */
1916             while (i-- > 0) free(v[i]);
1917             free(v);
1918             v = NULL;
1919         }
1920     }
1921     if (!u || !v || !w || !m) {
1922         if (u) free(u);
1923         if (v) free(v);
1924         if (w) free(w);
1925         if (m) free(m);
1926         croak("memory allocation failure in _pca\n");
1927     }
1928     /* -- Calculate the mean of each column ------------------------------ */
1929     for (j = 0; j < ncols; j++) {
1930         m[j] = 0.0;
1931         for (i = 0; i < nrows; i++) m[j] += u[i][j];
1932         m[j] /= nrows;
1933     }
1934     /* -- Subtract the mean of each column ------------------------------- */
1935     for (i = 0; i < nrows; i++)
1936         for (j = 0; j < ncols; j++)
1937             u[i][j] -= m[j];
1938     error = pca(nrows, ncols, u, v, w);
1939     if (error==0) {
1940         /* Convert the C variables to Perl variables */
1941         mean_ref = row_c2perl_dbl(aTHX_ m, ncols);
1942         if (nrows >= ncols) {
1943             coordinates_ref = matrix_c2perl_dbl(aTHX_ u, nrows, ncols);
1944             pc_ref = matrix_c2perl_dbl(aTHX_ v, nmin, nmin);
1945         }
1946         else /* nrows < ncols */ {
1947             pc_ref = matrix_c2perl_dbl(aTHX_ u, nrows, ncols);
1948             coordinates_ref = matrix_c2perl_dbl(aTHX_ v, nmin, nmin);
1949         }
1950         eigenvalues_ref = row_c2perl_dbl(aTHX_ w, nmin);
1951     }
1952     for (i = 0; i < nrows; i++) free(u[i]);
1953     for (i = 0; i < nmin; i++) free(v[i]);
1954     free(u);
1955     free(v);
1956     free(w);
1957     free(m);
1958     if (error==-1)
1959         croak("Insufficient memory for principal components analysis");
1960     if (error > 0)
1961         croak("Singular value decomposition failed to converge");
1962     /* ------------------------
1963      * Push the new Perl matrices onto the return stack
1964      */
1965     XPUSHs(sv_2mortal(mean_ref));
1966     XPUSHs(sv_2mortal(coordinates_ref));
1967     XPUSHs(sv_2mortal(pc_ref));
1968     XPUSHs(sv_2mortal(eigenvalues_ref));
1969