1 #include "EXTERN.h"
2 #include "perl.h"
3 #include "XSUB.h"
4
5 /* The Perl include files perl.h redefines malloc and free. Here, we need the
6 * usual malloc and free, defined in stdlib.h. So we undefine the ones in
7 * perl.h.
8 */
9
10 #ifdef malloc
11 #undef malloc
12 #endif
13 #ifdef free
14 #undef free
15 #endif
16
17 #include <stdlib.h>
18
19 #include "../src/cluster.h"
20
21
22 typedef struct {Node* nodes; int n;} Tree;
23
24 /* -------------------------------------------------
25 * Using the warnings registry, check to see if warnings
26 * are enabled for the Algorithm::Cluster module.
27 */
28 static int
warnings_enabled(pTHX)29 warnings_enabled(pTHX) {
30
31 dSP;
32
33 I32 count;
34 bool isEnabled;
35 SV * mysv;
36
37 ENTER ;
38 SAVETMPS;
39 PUSHMARK(SP) ;
40 XPUSHs(sv_2mortal(newSVpv("Algorithm::Cluster",18)));
41 PUTBACK ;
42
43 count = perl_call_pv("warnings::enabled", G_SCALAR) ;
44
45 if (count != 1) croak("No arguments returned from call_pv()\n") ;
46
47 mysv = POPs;
48 isEnabled = (bool) SvTRUE(mysv);
49
50 PUTBACK ;
51 FREETMPS ;
52 LEAVE ;
53
54 return isEnabled;
55 }
56
57 /* -------------------------------------------------
58 * Create a row of doubles, initialized to a value
59 */
60 static double*
malloc_row_dbl(pTHX_ int ncols,double val)61 malloc_row_dbl(pTHX_ int ncols, double val) {
62
63 int j;
64 double * row;
65
66 row = malloc(ncols * sizeof(double) );
67 if (!row) {
68 return NULL;
69 }
70
71 for (j = 0; j < ncols; j++) {
72 row[j] = val;
73 }
74 return row;
75 }
76
77 /* -------------------------------------------------
78 * Only coerce to a double if we already know it's
79 * an integer or double, or a string which is actually numeric.
80 * Don't blindly run the macro SvNV, because that will coerce
81 * a non-numeric string to be a double of value 0.0,
82 * and we do not want that to happen, because if we test it again,
83 * it will then appear to be a valid double value.
84 */
85 static int
extract_double_from_scalar(pTHX_ SV * mysv,double * number)86 extract_double_from_scalar(pTHX_ SV * mysv, double * number) {
87
88 if (SvPOKp(mysv) && SvLEN(mysv)) {
89
90 /* This function is not in the public perl API */
91 if (Perl_looks_like_number(aTHX_ mysv)) {
92 *number = SvNV( mysv );
93 return 1;
94 } else {
95 return 0;
96 }
97 } else if (SvNIOK(mysv)) {
98 *number = SvNV( mysv );
99 return 1;
100 } else {
101 return 0;
102 }
103 }
104
105
106
107 /* -------------------------------------------------
108 * Convert a Perl 2D matrix into a 2D matrix of C doubles.
109 * If no data are masked, mask can be passed as NULL.
110 * NOTE: on errors this function returns a value greater than zero.
111 */
112 static double**
parse_data(pTHX_ SV * matrix_ref,int ** mask)113 parse_data(pTHX_ SV * matrix_ref, int** mask) {
114
115 AV * matrix_av;
116 SV * row_ref;
117 AV * row_av;
118 SV * cell;
119
120 int type, i, j, nrows, ncols, n;
121
122 double** matrix;
123
124 /* NOTE -- we will just assume that matrix_ref points to an arrayref,
125 * and that the first item in the array is itself an arrayref.
126 * The calling perl functions must check this before we get this pointer.
127 * (It's easier to implement these checks in Perl rather than C.)
128 * The value of perl_rows is now fixed. But the value of
129 * rows will be decremented, if we skip any (invalid) Perl rows.
130 */
131 matrix_av = (AV *) SvRV(matrix_ref);
132 nrows = (int) av_len(matrix_av) + 1;
133
134 if(nrows <= 0) {
135 return NULL;
136 }
137 matrix = malloc(nrows*sizeof(double*));
138 if (!matrix) {
139 return NULL;
140 }
141
142 row_ref = *(av_fetch(matrix_av, (I32) 0, 0));
143 row_av = (AV *) SvRV(row_ref);
144 ncols = (int) av_len(row_av) + 1;
145
146
147 /* ------------------------------------------------------------
148 * Loop once for each row in the Perl matrix, and convert it to
149 * C doubles.
150 */
151 for (i=0; i < nrows; i++) {
152
153 row_ref = *(av_fetch(matrix_av, (I32) i, 0));
154
155 if(! SvROK(row_ref) ) {
156
157 if(warnings_enabled(aTHX))
158 Perl_warn(aTHX_
159 "Row %d: Wanted array reference, but "
160 "got a scalar. No row to process?\n", i);
161 break;
162 }
163
164 row_av = (AV *) SvRV(row_ref);
165 type = SvTYPE(row_av);
166
167 /* Handle unexpected cases */
168 if(type != SVt_PVAV ) {
169
170 /* Reference doesn't point to an array at all. */
171 if(warnings_enabled(aTHX))
172 Perl_warn(aTHX_
173 "Row %d: Wanted array reference, but got "
174 "a reference to something else (%d)\n",
175 i, type);
176 break;
177
178 }
179
180 n = (int) av_len(row_av) + 1;
181 if (n != ncols) {
182 /* All rows in the matrix should have the same
183 * number of columns. */
184 if(warnings_enabled(aTHX))
185 Perl_warn(aTHX_
186 "Row %d: Contains %d columns "
187 "(expected %d)\n", i, n, ncols);
188 break;
189 }
190
191 matrix[i] = malloc(ncols*sizeof(double));
192 if (!matrix[i])
193 break;
194
195 /* Loop once for each cell in the row. */
196 for (j=0; j < ncols; j++) {
197
198 double num;
199 if (!mask || mask[i][j]) {
200 cell = *(av_fetch(row_av, (I32) j, 0));
201 if(extract_double_from_scalar(aTHX_ cell,&num) <= 0) {
202 if(warnings_enabled(aTHX))
203 Perl_warn(aTHX_
204 "Row %d col %d: Value is not "
205 "a number.\n", i, j);
206 free(matrix[i]); /* not included below */
207 break;
208 }
209 }
210 else {
211 /* Don't read the value if it is masked.
212 * Set it to some arbitrary value. */
213 num = 0.0;
214 }
215 matrix[i][j] = num;
216
217 } /* End for (j=0; j < ncols; j++) */
218 if (j < ncols) break;
219
220 } /* End for (i=0; i < nrows; i++) */
221
222 if (i < nrows) { /* encountered a break */
223 nrows = i;
224 for (i = 0; i < nrows; i++) free(matrix[i]);
225 free(matrix);
226 matrix = NULL;
227 }
228
229 return matrix;
230 }
231
232
233 /* -------------------------------------------------
234 * Convert a Perl 2D matrix into a 2D matrix of C ints.
235 * On errors this function returns a value greater than zero.
236 */
237 static int**
parse_mask(pTHX_ SV * matrix_ref)238 parse_mask(pTHX_ SV * matrix_ref) {
239
240 AV * matrix_av;
241 SV * row_ref;
242 AV * row_av;
243 SV * cell;
244
245 int type, i, j, nrows, ncols, n;
246
247 int** matrix;
248
249 /* NOTE -- we will just assume that matrix_ref points to an arrayref,
250 * and that the first item in the array is itself an arrayref.
251 * The calling perl functions must check this before we get this pointer.
252 * (It's easier to implement these checks in Perl rather than C.)
253 * The value of perl_rows is now fixed. But the value of
254 * rows will be decremented, if we skip any (invalid) Perl rows.
255 */
256 matrix_av = (AV *) SvRV(matrix_ref);
257 nrows = (int) av_len(matrix_av) + 1;
258
259 if(nrows <= 0) {
260 return NULL; /* Caller must handle this case!! */
261 }
262 matrix = malloc(nrows * sizeof(int *) );
263 if (!matrix) {
264 return NULL;
265 }
266
267 row_ref = *(av_fetch(matrix_av, (I32) 0, 0));
268 row_av = (AV *) SvRV(row_ref);
269 ncols = (int) av_len(row_av) + 1;
270
271
272
273 /* ------------------------------------------------------------
274 * Loop once for each row in the Perl matrix, and convert it to C ints.
275 */
276 for (i=0; i < nrows; i++) {
277
278 row_ref = *(av_fetch(matrix_av, (I32) i, 0));
279
280 if(! SvROK(row_ref) ) {
281
282 if(warnings_enabled(aTHX))
283 Perl_warn(aTHX_
284 "Row %d: Wanted array reference, but "
285 "got a scalar. No row to process?\n", i);
286 break;
287 }
288
289 row_av = (AV *) SvRV(row_ref);
290 type = SvTYPE(row_av);
291
292 /* Handle unexpected cases */
293 if(type != SVt_PVAV ) {
294
295 /* Reference doesn't point to an array at all. */
296 if(warnings_enabled(aTHX))
297 Perl_warn(aTHX_
298 "Row %d: Wanted array reference, but got "
299 "a reference to something else (%d)\n",
300 i, type);
301 break;
302
303 }
304
305 n = (int) av_len(row_av) + 1;
306 if (n != ncols) {
307 /* All rows in the matrix should have the same
308 * number of columns. */
309 if(warnings_enabled(aTHX))
310 Perl_warn(aTHX_
311 "Row %d: Contains %d columns "
312 "(expected %d)\n", i, n, ncols);
313 break;
314 }
315
316 matrix[i] = malloc(ncols * sizeof(int) );
317 if (!matrix[i]) {
318 break;
319 }
320
321 /* Loop once for each cell in the row. */
322 for (j=0; j < ncols; ++j) {
323 double num;
324 cell = *(av_fetch(row_av, (I32) j, 0));
325 if(extract_double_from_scalar(aTHX_ cell,&num) <= 0) {
326 if(warnings_enabled(aTHX))
327 Perl_warn(aTHX_
328 "Row %d col %d: Value is not "
329 "a number.\n", i, j);
330 free(matrix[i]); /* not included below */
331 break;
332 }
333 matrix[i][j] = (int) num;
334
335 } /* End for (j=0; j < ncols; j++) */
336 if (j < ncols) break;
337
338 } /* End for (i=0; i < nrows; i++) */
339
340 if (i < nrows) { /* break statement encountered */
341 nrows = i;
342 for (i = 0; i < nrows; i++) free(matrix[i]);
343 free(matrix);
344 matrix = NULL;
345 }
346
347 return matrix;
348 }
349
350
351 /* -------------------------------------------------
352 *
353 */
354 static void
free_matrix_int(int ** matrix,int nrows)355 free_matrix_int(int ** matrix, int nrows) {
356
357 int i;
358 for(i = 0; i < nrows; ++i ) {
359 free(matrix[i]);
360 }
361
362 free(matrix);
363 }
364
365
366 /* -------------------------------------------------
367 *
368 */
369 static void
free_matrix_dbl(double ** matrix,int nrows)370 free_matrix_dbl(double ** matrix, int nrows) {
371
372 int i;
373 for(i = 0; i < nrows; ++i ) {
374 free(matrix[i]);
375 }
376
377 free(matrix);
378 }
379
380
381 /* -------------------------------------------------
382 *
383 */
384 static void
free_ragged_matrix_dbl(double ** matrix,int nrows)385 free_ragged_matrix_dbl(double ** matrix, int nrows) {
386
387 int i;
388 for(i = 1; i < nrows; ++i ) {
389 free(matrix[i]);
390 }
391
392 free(matrix);
393 }
394
395
396 /* -------------------------------------------------
397 * Convert a Perl array into an array of doubles
398 * On error, this function returns NULL.
399 */
400 static double*
malloc_row_perl2c_dbl(pTHX_ SV * input,int * np)401 malloc_row_perl2c_dbl (pTHX_ SV * input, int* np) {
402
403 int i;
404 AV* array = (AV *) SvRV(input);
405 const int n = (int) av_len(array) + 1;
406 double* data = malloc(n * sizeof(double));
407 if (!data) {
408 return NULL;
409 }
410
411 /* Loop once for each item in the Perl array, and convert
412 * it to a C double.
413 */
414 for (i=0; i < n; i++) {
415 double num;
416 SV * mysv = *(av_fetch(array, (I32) i, (I32) 0));
417 if(extract_double_from_scalar(aTHX_ mysv,&num) > 0) {
418 data[i] = num;
419 } else {
420 /* Error reading data */
421 if (warnings_enabled(aTHX))
422 Perl_warn(aTHX_
423 "Error parsing array: item %d is not a number\n", i);
424 free(data);
425 return NULL;
426 }
427 }
428 if(np) *np = n;
429 return data;
430 }
431
432 /* -------------------------------------------------
433 * Convert a Perl array into an array of ints
434 * On errors this function returns NULL.
435 */
436 static int*
malloc_row_perl2c_int(pTHX_ SV * input)437 malloc_row_perl2c_int (pTHX_ SV * input) {
438
439 int i;
440
441 AV* array = (AV *) SvRV(input);
442 const int n = (int) av_len(array) + 1;
443 int* data = malloc(n*sizeof(int));
444 if (!data) {
445 return NULL;
446 }
447
448 /* Loop once for each item in the Perl array,
449 * and convert it to a C double.
450 */
451 for (i=0; i < n; i++) {
452 double num;
453 SV * mysv = *(av_fetch(array, (I32) i, (I32) 0));
454 if(extract_double_from_scalar(aTHX_ mysv,&num) > 0) {
455 data[i] = (int) num;
456 } else {
457 /* Check if the item is numeric */
458 if (warnings_enabled(aTHX))
459 Perl_warn(aTHX_ "Error when parsing array: item %d is"
460 " not a number, skipping\n", i);
461 free(data);
462 return NULL;
463 }
464 }
465
466 return data;
467 }
468
469 /* -------------------------------------------------
470 * Copy a Perl array into an array of ints.
471 * If an error occurs, return 0; otherwise return 1.
472 */
473 static int
copy_row_perl2c_int(pTHX_ SV * input,int * output)474 copy_row_perl2c_int (pTHX_ SV * input, int* output) {
475
476 int i;
477
478 AV* array = (AV *) SvRV(input);
479 const int n = (int) av_len(array) + 1;
480
481 /* Loop once for each item in the Perl array,
482 * and convert it to a C double.
483 */
484 for (i=0; i < n; i++) {
485 double num;
486 SV * mysv = *(av_fetch(array, (I32) i, (I32) 0));
487 if(extract_double_from_scalar(aTHX_ mysv,&num) > 0) {
488 output[i] = (int) num;
489 } else {
490 /* Skip any items which are not numeric */
491 if (warnings_enabled(aTHX))
492 Perl_warn(aTHX_
493 "Error when parsing array: item %d is"
494 " not a number\n", i);
495 return 0;
496 }
497 }
498 return 1;
499 }
500 /* -------------------------------------------------
501 *
502 */
503 static SV *
row_c2perl_dbl(pTHX_ double * row,int ncols)504 row_c2perl_dbl(pTHX_ double * row, int ncols) {
505
506 int j;
507 AV * row_av = newAV();
508 for(j=0; j<ncols; ++j) {
509 av_push(row_av, newSVnv(row[j]));
510 }
511 return newRV_noinc((SV*) row_av);
512 }
513
514 /* -------------------------------------------------
515 *
516 */
517 static SV*
row_c2perl_int(pTHX_ int * row,int ncols)518 row_c2perl_int(pTHX_ int * row, int ncols) {
519
520 int j;
521 AV * row_av = newAV();
522 for(j=0; j<ncols; ++j) {
523 av_push(row_av, newSVnv(row[j]));
524 }
525 return ( newRV_noinc( (SV*) row_av ) );
526 }
527
528 /* -------------------------------------------------
529 *
530 */
531 static SV*
matrix_c2perl_dbl(pTHX_ double ** matrix,int nrows,int ncols)532 matrix_c2perl_dbl(pTHX_ double ** matrix, int nrows, int ncols) {
533
534 int i;
535 AV * matrix_av = newAV();
536 SV * row_ref;
537 for(i=0; i<nrows; ++i) {
538 row_ref = row_c2perl_dbl(aTHX_ matrix[i], ncols);
539 av_push(matrix_av, row_ref);
540 }
541 return ( newRV_noinc( (SV*) matrix_av ) );
542 }
543
544 /* -------------------------------------------------
545 *
546 */
547 static SV*
matrix_c2perl_int(pTHX_ int ** matrix,int nrows,int ncols)548 matrix_c2perl_int(pTHX_ int ** matrix, int nrows, int ncols) {
549
550 int i;
551 AV * matrix_av = newAV();
552 SV * row_ref;
553 for(i=0; i<nrows; ++i) {
554 row_ref = row_c2perl_int(aTHX_ matrix[i], ncols);
555 av_push(matrix_av, row_ref);
556 }
557 return ( newRV_noinc( (SV*) matrix_av ) );
558 }
559
560 /* -------------------------------------------------
561 *
562 */
563 static SV*
ragged_matrix_c2perl_dbl(pTHX_ double ** matrix,int nobjects)564 ragged_matrix_c2perl_dbl(pTHX_ double ** matrix, int nobjects) {
565
566 int i;
567 AV * matrix_av = newAV();
568 SV * row_ref;
569 for(i=0; i<nobjects; ++i) {
570 row_ref = row_c2perl_dbl(aTHX_ matrix[i], i);
571 av_push(matrix_av, row_ref);
572 }
573 return ( newRV_noinc( (SV*) matrix_av ) );
574 }
575
576 /* -------------------------------------------------
577 * Check if the data matrix is a distance matrix, or
578 * a raw distance matrix.
579 */
580 static int
is_distance_matrix(pTHX_ SV * data_ref)581 is_distance_matrix(pTHX_ SV * data_ref)
582 {
583 /* We don't check data_ref because we expect the caller to check it
584 */
585 AV * matrix_av = (AV *) SvRV(data_ref);
586 SV * row_ref = *(av_fetch(matrix_av, (I32) 0, 0));
587 AV * row_av = (AV *) SvRV(row_ref);
588 const int ncols = (int) av_len(row_av) + 1;
589 if (ncols==0) return 1;
590
591 return 0;
592 }
593
594
595 /* -------------------------------------------------
596 * Convert the 'data' and 'mask' matrices and the 'weight' array
597 * from C to Perl. Also check for errors, and bail out if there are any.
598 */
599 static int
malloc_matrices(pTHX_ SV * weight_ref,double ** weight,int nweights,SV * data_ref,double *** matrix,SV * mask_ref,int *** mask,int nrows,int ncols)600 malloc_matrices(pTHX_
601 SV * weight_ref, double ** weight, int nweights,
602 SV * data_ref, double *** matrix,
603 SV * mask_ref, int *** mask,
604 int nrows, int ncols
605 ) {
606
607 if(SvROK(mask_ref) && SvTYPE(SvRV(mask_ref)) == SVt_PVAV) {
608 *mask = parse_mask(aTHX_ mask_ref);
609 if(*mask==NULL) return 0;
610 } else {
611 int i,j;
612 int** p = malloc(nrows*sizeof(int*));
613 if(!p) return 0;
614 for (i = 0; i < nrows; ++i) {
615 p[i] = malloc(ncols*sizeof(int));
616 if(!p[i]) {
617 while(--i >= 0) free(p[i]);
618 free(p);
619 return 0;
620 }
621 for (j = 0; j < ncols; j++) p[i][j] = 1;
622 }
623 *mask = p;
624 }
625
626 /* We don't check data_ref because we expect the caller to check it
627 */
628 *matrix = parse_data(aTHX_ data_ref, *mask);
629 if(*matrix==NULL) {
630 free_matrix_int(*mask, nrows);
631 return 0;
632 }
633
634 if(weight_ref==NULL) return 1; /* Weights not needed */
635 if(SvROK(weight_ref) && SvTYPE(SvRV(weight_ref)) == SVt_PVAV) {
636 *weight = malloc_row_perl2c_dbl(aTHX_ weight_ref, NULL);
637 } else {
638 *weight = malloc_row_dbl(aTHX_ nweights,1.0);
639 }
640
641 if(!(*weight)) {
642 free_matrix_int(*mask, nrows);
643 free_matrix_dbl(*matrix, nrows);
644 return 0;
645 }
646
647 return 1;
648 }
649
650 static double**
parse_distance(pTHX_ SV * matrix_ref,int nobjects)651 parse_distance(pTHX_ SV* matrix_ref, int nobjects)
652 {
653 int i,j;
654
655 AV* matrix_av = (AV *) SvRV(matrix_ref);
656 double** matrix = malloc(nobjects*sizeof(double*));
657 if (!matrix) {
658 return NULL;
659 }
660
661 matrix[0] = NULL;
662 for (i=1; i < nobjects; i++) {
663 SV* row_ref = *(av_fetch(matrix_av, (I32) i, 0));
664 AV* row_av = (AV *) SvRV(row_ref);
665 matrix[i] = malloc(i * sizeof(double));
666 if (!matrix[i]) {
667 break;
668 }
669 /* Loop once for each cell in the row. */
670 for (j=0; j < i; j++) {
671 double num;
672 SV* cell = *(av_fetch(row_av, (I32) j, 0));
673 if(extract_double_from_scalar(aTHX_ cell,&num) > 0) {
674 matrix[i][j] = num;
675 } else {
676 if(warnings_enabled(aTHX))
677 Perl_warn(aTHX_
678 "Row %d col %d: Value is not "
679 "a number.\n", i, j);
680 break;
681 }
682 }
683 }
684
685 if (i < nobjects) {
686 nobjects = i+1;
687 for (i = 1; i < nobjects; i++) free(matrix[i]);
688 free(matrix);
689 matrix = NULL;
690 }
691
692 return matrix;
693 }
694
695 /******************************************************************************/
696 /** **/
697 /** XS code begins here **/
698 /** **/
699 /******************************************************************************/
700 /******************************************************************************/
701
702 MODULE = Algorithm::Cluster PACKAGE = Algorithm::Cluster::Node
703 PROTOTYPES: ENABLE
704
705 SV*
706 new (class, left, right, distance)
707 char* class
708 int left
709 int right
710 double distance
711 PREINIT:
712 Node* node;
713 SV* obj;
714 CODE:
715 node = malloc(sizeof(Node));
716 RETVAL = newSViv(0);
717 obj = newSVrv(RETVAL, class);
718 node->left = left;
719 node->right = right;
720 node->distance = distance;
721
722 sv_setiv(obj, PTR2IV(node));
723 SvREADONLY_on(obj);
724 OUTPUT:
725 RETVAL
726
727
728 int
729 left (obj)
730 SV* obj
731 CODE:
732 RETVAL = (INT2PTR(Node*,SvIV(SvRV(obj))))->left;
733 OUTPUT:
734 RETVAL
735
736 int
737 right (obj)
738 SV* obj
739 CODE:
740 RETVAL = (INT2PTR(Node*,SvIV(SvRV(obj))))->right;
741 OUTPUT:
742 RETVAL
743
744 double
745 distance (obj)
746 SV* obj
747 CODE:
748 RETVAL = (INT2PTR(Node*,SvIV(SvRV(obj))))->distance;
749 OUTPUT:
750 RETVAL
751
752 void
set_left(obj,left)753 set_left (obj, left)
754 SV* obj
755 int left
756 PREINIT:
757 Node* node;
758 CODE:
759 if (!sv_isa(obj, "Algorithm::Cluster::Node")) {
760 croak("set_left should be applied to an Algorithm::Cluster::Node object");
761 }
762 node = INT2PTR(Node*,SvIV(SvRV(obj)));
763 node->left = left;
764
765 void
set_right(obj,right)766 set_right (obj, right)
767 SV* obj
768 int right
769 PREINIT:
770 Node* node;
771 CODE:
772 if (!sv_isa(obj, "Algorithm::Cluster::Node")) {
773 croak("set_right should be applied to an Algorithm::Cluster::Node object");
774 }
775 node = INT2PTR(Node*,SvIV(SvRV(obj)));
776 node->right = right;
777
778 void
set_distance(obj,distance)779 set_distance (obj, distance)
780 SV* obj
781 double distance
782 PREINIT:
783 Node* node;
784 CODE:
785 if (!sv_isa(obj, "Algorithm::Cluster::Node")) {
786 croak("set_distance should be applied to an Algorithm::Cluster::Node object");
787 }
788 node = INT2PTR(Node*,SvIV(SvRV(obj)));
789 node->distance = distance;
790
791 void DESTROY (obj)
792 SV* obj
793 PREINIT:
794 I32* temp;
795 Node* node;
796 PPCODE:
797 temp = PL_markstack_ptr++;
798 node = INT2PTR(Node*, SvIV(SvRV(obj)));
799 free(node);
800 if (PL_markstack_ptr != temp) {
801 /* truly void, because dXSARGS not invoked */
802 PL_markstack_ptr = temp;
803 XSRETURN_EMPTY;
804 /* return empty stack */
805 } /* must have used dXSARGS; list context implied */
806 return; /* assume stack size is correct */
807
808
809 MODULE = Algorithm::Cluster PACKAGE = Algorithm::Cluster::Tree
810 PROTOTYPES: ENABLE
811
812 SV*
813 new (class, nodes)
814 char* class
815 SV* nodes
816
817 PREINIT:
818 Tree* tree;
819 SV* obj;
820 int i;
821 int n;
822 AV* array;
823 int* flag;
824
825 CODE:
826 if(!SvROK(nodes) || SvTYPE(SvRV(nodes)) != SVt_PVAV) {
827 croak("Algorithm::Cluster::Tree::new expects an array of nodes\n");
828 }
829 array = (AV *) SvRV(nodes);
830 n = (int) av_len(array) + 1;
831 tree = malloc(sizeof(Tree));
832 if (tree) {
833 tree->n = n;
834 tree->nodes = malloc(n*sizeof(Node));
835 }
836 if (! tree || !tree->nodes) {
837 if (tree) free(tree);
838 croak("Algorithm::Cluster::Tree::new memory error\n");
839 }
840
841 for (i = 0; i < n; i++) {
842 Node* node;
843 SV* node_ref = *(av_fetch(array, (I32) i, 0));
844 if (!sv_isa(node_ref, "Algorithm::Cluster::Node")) break;
845 node = INT2PTR(Node*,SvIV(SvRV(node_ref)));
846 tree->nodes[i].left = node->left;
847 tree->nodes[i].right = node->right;
848 tree->nodes[i].distance = node->distance;
849 }
850
851 if (i < n) {
852 /* break encountered */
853 free(tree->nodes);
854 free(tree);
855 croak("Algorithm::Cluster::Tree::new expects an array of nodes\n");
856 }
857
858 flag = malloc((2*n+1)*sizeof(int));
859 if(flag) {
860 int j;
861 for (i = 0; i < 2*n+1; i++) flag[i] = 0;
862 for (i = 0; i < n; i++) {
863 j = tree->nodes[i].left;
864 if (j < 0) {
865 j = -j-1;
866 if (j>=i) break;
867 }
868 else j+=n;
869 if (flag[j]) break;
870 flag[j] = 1;
871 j = tree->nodes[i].right;
872 if (j < 0) {
873 j = -j-1;
874 if (j>=i) break;
875 }
876 else j+=n;
877 if (flag[j]) break;
878 flag[j] = 1;
879 }
880 free(flag);
881 }
882
883 if (!flag || i < n) {
884 /* break encountered */
885 free(tree->nodes);
886 free(tree);
887 croak("the array of nodes passed to Algorithm::Cluster::Tree::new does not represent a valid tree\n");
888 }
889
890 RETVAL = newSViv(0);
891 obj = newSVrv(RETVAL, class);
892 sv_setiv(obj, PTR2IV(tree));
893 SvREADONLY_on(obj);
894
895 OUTPUT:
896 RETVAL
897
898 int
899 length (obj)
900 SV* obj
901 CODE:
902 RETVAL = (INT2PTR(Tree*,SvIV(SvRV(obj))))->n;
903 OUTPUT:
904 RETVAL
905
906
907 SV *
908 get (obj, index)
909 SV* obj
910 int index
911 PREINIT:
912 Tree* tree;
913 Node* node;
914 SV* scalar;
915 CODE:
916 tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
917 if (index < 0 || index >= tree->n) {
918 croak("Index out of bounds in Algorithm::Cluster::Tree::get\n");
919 }
920 RETVAL = newSViv(0);
921 scalar = newSVrv(RETVAL, "Algorithm::Cluster::Node");
922 node = malloc(sizeof(Node));
923 if (!node) {
924 croak("Memory allocation failure in Algorithm::Cluster::Tree::get\n");
925 }
926 node->left = tree->nodes[index].left;
927 node->right = tree->nodes[index].right;
928 node->distance = tree->nodes[index].distance;
929 sv_setiv(scalar, PTR2IV(node));
930 SvREADONLY_on(scalar);
931 OUTPUT:
932 RETVAL
933
934 void
935 scale(obj)
936 SV* obj
937 PREINIT:
938 int i;
939 int n;
940 Tree* tree;
941 Node* nodes;
942 double maximum;
943 CODE:
944 if (!sv_isa(obj, "Algorithm::Cluster::Tree")) {
945 croak("scale can only be applied to an Algorithm::Cluster::Tree object");
946 }
947 tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
948 n = tree->n;
949 nodes = tree->nodes;
950 maximum = DBL_MIN;
951 for (i = 0; i < n; i++) {
952 double distance = nodes[i].distance;
953 if (distance > maximum) maximum = distance;
954 }
955 if (maximum!=0.0) {
956 for (i = 0; i < n; i++) nodes[i].distance /= maximum;
957 }
958
959
960 void
961 sort(obj, order = NULL)
962 SV* obj
963 SV* order
964
965 PREINIT:
966 int i;
967 int n;
968 Tree* tree;
969 int* indices;
970 double* values = NULL;
971 int ok;
972
973 PPCODE:
974 if (!sv_isa(obj, "Algorithm::Cluster::Tree")) {
975 croak("sort can only be applied to an Algorithm::Cluster::Tree object");
976 }
977 tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
978 if (order) {
979 if(!SvROK(order) || SvTYPE(SvRV(order)) != SVt_PVAV) {
980 croak("Algorithm::Cluster::Tree::sort expects an order array\n");
981 }
982 values = malloc_row_perl2c_dbl(aTHX_ order, &n);
983 if (!values) {
984 croak("Algorithm::Cluster::Tree::sort memory error\n");
985 }
986 if (n != tree->n + 1) {
987 free(values);
988 croak("sort: size of order array is inconsistent with tree size\n");
989 }
990 }
991 else {
992 n = tree->n + 1;
993 }
994 indices = malloc(n*sizeof(int));
995 if (!indices) {
996 if(values) free(values);
997 croak("sort: insufficient memory");
998 }
999 /* --------------------------------------------------------------- */
1000 ok = sorttree(tree->n, tree->nodes, values, indices);
1001 if(values) free(values);
1002 /* -- Check for errors flagged by the C routine ------------------ */
1003 if (!ok) {
1004 free(indices);
1005 croak("sort: Error in the sorttree routine");
1006 }
1007 for(i=0; i<n; i++) XPUSHs(sv_2mortal(newSVnv(indices[i])));
1008 free(indices);
1009
1010 void
1011 cut(obj, nclusters=0)
1012 SV* obj
1013 int nclusters
1014 PREINIT:
1015 int ok;
1016 int i;
1017 int n;
1018 Tree* tree;
1019 int* clusterid;
1020 PPCODE:
1021 if (!sv_isa(obj, "Algorithm::Cluster::Tree")) {
1022 croak("cut can only be applied to an Algorithm::Cluster::Tree object\n");
1023 }
1024 tree = INT2PTR(Tree*,SvIV(SvRV(obj)));
1025 n = tree->n + 1;
1026 if (nclusters < 0) {
1027 croak("cut: Requested number of clusters should be positive\n");
1028 }
1029 if (nclusters > n) {
1030 croak("cut: More clusters requested than items available\n");
1031 }
1032 if (nclusters == 0) {
1033 nclusters = n;
1034 }
1035 clusterid = malloc(n*sizeof(int));
1036 if (!clusterid) {
1037 croak("cut: Insufficient memory\n");
1038 }
1039 ok = cuttree(n, tree->nodes, nclusters, clusterid);
1040 if (!ok) {
1041 free(clusterid);
1042 croak("cut: Error in the cuttree routine\n");
1043 }
1044 for(i=0; i<n; i++) XPUSHs(sv_2mortal(newSVnv(clusterid[i])));
1045 free(clusterid);
1046
1047
1048 void DESTROY (obj)
1049 SV* obj
1050 PREINIT:
1051 I32* temp;
1052 Tree* tree;
1053 PPCODE:
1054 temp = PL_markstack_ptr++;
1055 tree = INT2PTR(Tree*, SvIV(SvRV(obj)));
1056 free(tree->nodes);
1057 free(tree);
1058 if (PL_markstack_ptr != temp) {
1059 /* truly void, because dXSARGS not invoked */
1060 PL_markstack_ptr = temp;
1061 XSRETURN_EMPTY;
1062 /* return empty stack */
1063 } /* must have used dXSARGS; list context implied */
1064 return; /* assume stack size is correct */
1065
1066
1067 MODULE = Algorithm::Cluster PACKAGE = Algorithm::Cluster
1068 PROTOTYPES: ENABLE
1069
1070
1071 SV *
1072 _version()
1073 CODE:
1074 RETVAL = newSVpv( CLUSTERVERSION , 0);
1075
1076 OUTPUT:
1077 RETVAL
1078
1079
1080
1081 SV *
1082 _mean(input)
1083 SV * input;
1084
1085 PREINIT:
1086 int array_length;
1087 double * data; /* one-dimensional array of doubles */
1088
1089 CODE:
1090 if(SvTYPE(SvRV(input)) != SVt_PVAV) {
1091 XSRETURN_UNDEF;
1092 }
1093
1094 data = malloc_row_perl2c_dbl (aTHX_ input, &array_length);
1095 if (data) {
1096 RETVAL = newSVnv( mean(array_length, data) );
1097 free(data);
1098 } else {
1099 croak("memory allocation failure in _mean\n");
1100 }
1101
1102 OUTPUT:
1103 RETVAL
1104
1105
1106 SV *
1107 _median(input)
1108 SV * input;
1109
1110 PREINIT:
1111 int array_length;
1112 double * data; /* one-dimensional array of doubles */
1113
1114 CODE:
1115 if(SvTYPE(SvRV(input)) != SVt_PVAV) {
1116 XSRETURN_UNDEF;
1117 }
1118
1119 data = malloc_row_perl2c_dbl (aTHX_ input, &array_length);
1120 if (data) {
1121 RETVAL = newSVnv( median(array_length, data) );
1122 free(data);
1123 } else {
1124 croak("memory allocation failure in _median\n");
1125 }
1126
1127 OUTPUT:
1128 RETVAL
1129
1130
1131 SV *
1132 _treecluster(nrows,ncols,data_ref,mask_ref,weight_ref,transpose,dist,method)
1133 int nrows;
1134 int ncols;
1135 SV * data_ref;
1136 SV * mask_ref;
1137 SV * weight_ref;
1138 int transpose;
1139 char * dist;
1140 char * method;
1141
1142 PREINIT:
1143 Node* nodes;
1144
1145 double * weight = NULL;
1146 double ** matrix = NULL;
1147 int ** mask = NULL;
1148 double ** distancematrix = NULL;
1149 const int ndata = transpose ? nrows : ncols;
1150 const int nelements = transpose ? ncols : nrows;
1151
1152 CODE:
1153 /* ------------------------
1154 * Don't check the parameters, because we rely on the Perl
1155 * caller to check most paramters.
1156 */
1157 /* ------------------------
1158 * Convert data and mask matrices and the weight array
1159 * from C to Perl. Also check for errors, and ignore the
1160 * mask or the weight array if there are any errors.
1161 */
1162 if (is_distance_matrix(aTHX_ data_ref)) {
1163 distancematrix = parse_distance(aTHX_ data_ref, nelements);
1164 if (!distancematrix) {
1165 croak("memory allocation failure in _treecluster\n");
1166 }
1167 } else {
1168 int ok;
1169 ok = malloc_matrices(aTHX_ weight_ref, &weight, ndata,
1170 data_ref, &matrix,
1171 mask_ref, &mask,
1172 nrows, ncols);
1173 if (!ok) {
1174 croak("failed to read input data for _treecluster\n");
1175 }
1176 }
1177
1178 /* ------------------------
1179 * Run the library function
1180 */
1181 nodes = treecluster(nrows, ncols, matrix, mask, weight, transpose,
1182 dist[0], method[0], distancematrix);
1183
1184 /* ------------------------
1185 * Check result to make sure we didn't run into memory problems
1186 */
1187 if(!nodes) {
1188 /* treecluster failed due to insufficient memory */
1189 if (matrix) {
1190 free_matrix_int(mask, nrows);
1191 free_matrix_dbl(matrix, nrows);
1192 free(weight);
1193 } else {
1194 free_ragged_matrix_dbl(distancematrix, nelements);
1195 }
1196 croak("memory allocation failure in treecluster\n");
1197 }
1198 else {
1199
1200 /* ------------------------
1201 * Convert generated C matrices to Perl matrices
1202 */
1203 const int n = nelements-1;
1204 int i;
1205 SV* obj;
1206 Tree* tree;
1207 RETVAL = newSViv(0);
1208 obj = newSVrv(RETVAL, "Algorithm::Cluster::Tree");
1209 tree = malloc(sizeof(Tree));
1210 if (!tree)
1211 croak("Memory allocation failure in Algorithm::Cluster::Tree\n");
1212 tree->n = n;
1213 tree->nodes = malloc(n*sizeof(Node));
1214 if (!tree->nodes) {
1215 free(tree);
1216 croak("Memory allocation failure in Algorithm::Cluster::Tree\n");
1217 }
1218 sv_setiv(obj, PTR2IV(tree));
1219 SvREADONLY_on(obj);
1220 for(i=0; i<n; i++) {
1221 tree->nodes[i].left = nodes[i].left;
1222 tree->nodes[i].right = nodes[i].right;
1223 tree->nodes[i].distance = nodes[i].distance;
1224 }
1225 free(nodes);
1226 }
1227
1228 /* ------------------------
1229 * Free what we've malloc'ed
1230 */
1231 if (matrix) {
1232 free_matrix_int(mask, nrows);
1233 free_matrix_dbl(matrix, nrows);
1234 free(weight);
1235 } else {
1236 free_ragged_matrix_dbl(distancematrix, nelements);
1237 }
1238
1239 /* Finished _treecluster() */
1240 OUTPUT:
1241 RETVAL
1242
1243
1244 void
1245 _kcluster(nclusters,nrows,ncols,data_ref,mask_ref,weight_ref,transpose,npass,method,dist,initialid_ref)
1246 int nclusters;
1247 int nrows;
1248 int ncols;
1249 SV * data_ref;
1250 SV * mask_ref;
1251 SV * weight_ref;
1252 int transpose;
1253 int npass;
1254 char * method;
1255 char * dist;
1256 SV * initialid_ref;
1257
1258 PREINIT:
1259 SV * clusterid_ref;
1260 int * clusterid;
1261 int nobjects;
1262 int ndata;
1263 double error;
1264 int ifound;
1265 int ok;
1266
1267 double * weight;
1268 double ** matrix;
1269 int ** mask;
1270
1271
1272 PPCODE:
1273 /* ------------------------
1274 * Don't check the parameters, because we rely on the Perl
1275 * caller to check most parameters.
1276 */
1277
1278 /* ------------------------
1279 * Malloc space for the return values from the library function
1280 */
1281 if (transpose==0) {
1282 nobjects = nrows;
1283 ndata = ncols;
1284 } else {
1285 nobjects = ncols;
1286 ndata = nrows;
1287 }
1288 clusterid = malloc(nobjects * sizeof(int) );
1289 if (!clusterid) {
1290 croak("memory allocation failure in _kcluster\n");
1291 }
1292
1293 /* ------------------------
1294 * Convert data and mask matrices and the weight array
1295 * from C to Perl. Also check for errors, and ignore the
1296 * mask or the weight array if there are any errors.
1297 */
1298 ok = malloc_matrices( aTHX_ weight_ref, &weight, ndata,
1299 data_ref, &matrix,
1300 mask_ref, &mask,
1301 nrows, ncols);
1302 if (!ok) {
1303 free(clusterid);
1304 croak("failed to read input data for _kcluster\n");
1305 }
1306
1307 /* ------------------------
1308 * Copy initialid to clusterid, if needed
1309 */
1310
1311 if (npass==0) {
1312 copy_row_perl2c_int(aTHX_ initialid_ref, clusterid);
1313 }
1314
1315 /* ------------------------
1316 * Run the library function
1317 */
1318 kcluster(
1319 nclusters, nrows, ncols,
1320 matrix, mask, weight, transpose,
1321 npass, method[0], dist[0], clusterid, &error, &ifound
1322
1323 );
1324
1325 /* ------------------------
1326 * Convert generated C matrices to Perl matrices
1327 */
1328 clusterid_ref = row_c2perl_int(aTHX_ clusterid, nobjects);
1329
1330 /* ------------------------
1331 * Push the new Perl matrices onto the return stack
1332 */
1333 XPUSHs(sv_2mortal( clusterid_ref ));
1334 XPUSHs(sv_2mortal( newSVnv(error) ));
1335 XPUSHs(sv_2mortal( newSViv(ifound) ));
1336
1337 /* ------------------------
1338 * Free what we've malloc'ed
1339 */
1340 free(clusterid);
1341 free_matrix_int(mask, nrows);
1342 free_matrix_dbl(matrix, nrows);
1343 free(weight);
1344
1345 /* Finished _kcluster() */
1346
1347
1348
1349 void
1350 _kmedoids(nclusters,nobjects,distancematrix_ref,npass,initialid_ref)
1351 int nclusters;
1352 int nobjects;
1353 SV * distancematrix_ref;
1354 int npass;
1355 SV * initialid_ref;
1356
1357
1358 PREINIT:
1359 double** distancematrix;
1360 SV * clusterid_ref;
1361 int * clusterid;
1362 double error;
1363 int ifound;
1364
1365
1366
1367 PPCODE:
1368 /* ------------------------
1369 * Don't check the parameters, because we rely on the Perl
1370 * caller to check most parameters.
1371 */
1372
1373 /* ------------------------
1374 * Malloc space for the return values from the library function
1375 */
1376 clusterid = malloc(nobjects * sizeof(int));
1377 if (!clusterid) {
1378 croak("memory allocation failure in _kmedoids\n");
1379 }
1380
1381 /* ------------------------
1382 * Convert data and mask matrices and the weight array
1383 * from C to Perl. Also check for errors, and ignore the
1384 * mask or the weight array if there are any errors.
1385 */
1386 distancematrix = parse_distance(aTHX_ distancematrix_ref, nobjects);
1387 if (!distancematrix) {
1388 free(clusterid);
1389 croak("failed to allocate memory for distance matrix in _kmedoids\n");
1390 }
1391
1392 /* ------------------------
1393 * Copy initialid to clusterid, if needed
1394 */
1395
1396 if (npass==0) {
1397 copy_row_perl2c_int(aTHX_ initialid_ref, clusterid);
1398 }
1399
1400 /* ------------------------
1401 * Run the library function
1402 */
1403 kmedoids(
1404 nclusters, nobjects,
1405 distancematrix, npass, clusterid,
1406 &error, &ifound
1407 );
1408
1409 if(ifound==-1) {
1410 free(clusterid);
1411 free_ragged_matrix_dbl(distancematrix, nobjects);
1412 croak("memory allocation failure in _kmedoids\n");
1413 }
1414 else if(ifound==0) {
1415 free(clusterid);
1416 free_ragged_matrix_dbl(distancematrix, nobjects);
1417 croak("error in input arguments in kmedoids\n");
1418 }
1419 else {
1420
1421 /* ------------------------
1422 * Convert generated C matrices to Perl matrices
1423 */
1424 clusterid_ref = row_c2perl_int(aTHX_ clusterid, nobjects);
1425
1426 /* ------------------------
1427 * Push the new Perl matrices onto the return stack
1428 */
1429 XPUSHs(sv_2mortal( clusterid_ref ));
1430 XPUSHs(sv_2mortal( newSVnv(error) ));
1431 XPUSHs(sv_2mortal( newSViv(ifound) ));
1432
1433 }
1434 /* ------------------------
1435 * Free what we've malloc'ed
1436 */
1437 free(clusterid);
1438 free_ragged_matrix_dbl(distancematrix, nobjects);
1439
1440 /* Finished _kmedoids() */
1441
1442
1443
1444 double
1445 _clusterdistance(nrows,ncols,data_ref,mask_ref,weight_ref,cluster1_len,cluster2_len,cluster1_ref,cluster2_ref,dist,method,transpose)
1446 int nrows;
1447 int ncols;
1448 SV * data_ref;
1449 SV * mask_ref;
1450 SV * weight_ref;
1451 int cluster1_len;
1452 int cluster2_len;
1453 SV * cluster1_ref;
1454 SV * cluster2_ref;
1455 char * dist;
1456 char * method;
1457 int transpose;
1458
1459 PREINIT:
1460 int nweights;
1461
1462 int * cluster1;
1463 int * cluster2;
1464
1465 double * weight;
1466 double ** matrix;
1467 int ** mask;
1468
1469 double distance;
1470
1471 int ok;
1472
1473 CODE:
1474
1475 /* ------------------------
1476 * Don't check the parameters, because we rely on the Perl
1477 * caller to check most paramters.
1478 */
1479
1480 /* ------------------------
1481 * Convert cluster index Perl arrays to C arrays
1482 */
1483 cluster1 = malloc_row_perl2c_int(aTHX_ cluster1_ref);
1484 cluster2 = malloc_row_perl2c_int(aTHX_ cluster2_ref);
1485 if (!cluster1 || !cluster2) {
1486 if (cluster1) free(cluster1);
1487 if (cluster2) free(cluster2);
1488 croak("memory allocation failure in _clusterdistance\n");
1489 }
1490
1491 /* ------------------------
1492 * Convert data and mask matrices and the weight array
1493 * from C to Perl. Also check for errors, and ignore the
1494 * mask or the weight array if there are any errors.
1495 * Set nweights to the correct number of weights.
1496 */
1497 nweights = (transpose==0) ? ncols : nrows;
1498 ok = malloc_matrices( aTHX_ weight_ref, &weight, nweights,
1499 data_ref, &matrix,
1500 mask_ref, &mask,
1501 nrows, ncols);
1502 if (!ok) {
1503 free(cluster1);
1504 free(cluster2);
1505 croak("failed to read input data for _clusterdistance\n");
1506 }
1507
1508 /* ------------------------
1509 * Run the library function
1510 */
1511 distance = clusterdistance(
1512 nrows, ncols,
1513 matrix, mask, weight,
1514 cluster1_len, cluster2_len, cluster1, cluster2,
1515 dist[0], method[0], transpose
1516 );
1517
1518 RETVAL = distance;
1519
1520 /* ------------------------
1521 * Free what we've malloc'ed
1522 */
1523 free_matrix_int(mask, nrows);
1524 free_matrix_dbl(matrix, nrows);
1525 free(weight);
1526 free(cluster1);
1527 free(cluster2);
1528
1529 /* Finished _clusterdistance() */
1530
1531 OUTPUT:
1532 RETVAL
1533
1534
1535
1536 void
1537 _clustercentroids(nclusters,nrows,ncols,data_ref,mask_ref,clusterid_ref,transpose,method)
1538 int nclusters;
1539 int nrows;
1540 int ncols;
1541 SV * data_ref;
1542 SV * mask_ref;
1543 SV * clusterid_ref;
1544 int transpose;
1545 char * method;
1546
1547 PREINIT:
1548 SV * cdata_ref;
1549 SV * cmask_ref;
1550 int * clusterid;
1551
1552 double ** matrix;
1553 int ** mask;
1554
1555 double ** cdata;
1556 int ** cmask;
1557 int cnrows = 0; /* Initialize to make the compiler shut up */
1558 int cncols = 0; /* Initialize to make the compiler shut up */
1559
1560 int i;
1561 int ok;
1562
1563 PPCODE:
1564
1565 /* ------------------------
1566 * Don't check the parameters, because we rely on the Perl
1567 * caller to check most paramters.
1568 */
1569 if (transpose==0)
1570 { cnrows = nclusters;
1571 cncols = ncols;
1572 }
1573 else if (transpose==1)
1574 { cnrows = nrows;
1575 cncols = nclusters;
1576 }
1577
1578 /* ------------------------
1579 * Convert cluster index Perl arrays to C arrays
1580 */
1581 clusterid = malloc_row_perl2c_int(aTHX_ clusterid_ref);
1582 if (!clusterid) {
1583 croak("memory allocation failure in _clustercentroids\n");
1584 }
1585
1586 /* ------------------------
1587 * Convert data and mask matrices and the weight array
1588 * from C to Perl. Also check for errors, and ignore the
1589 * mask or the weight array if there are any errors.
1590 * Set nweights to the correct number of weights.
1591 */
1592 ok = malloc_matrices( aTHX_ NULL, NULL, 0,
1593 data_ref, &matrix,
1594 mask_ref, &mask,
1595 nrows, ncols);
1596 if (!ok) {
1597 free(clusterid);
1598 croak("failed to read input data for _clustercentroids\n");
1599 }
1600
1601
1602 /* ------------------------
1603 * Create the output variables cdata and cmask.
1604 */
1605 i = 0;
1606 cdata = malloc(cnrows * sizeof(double*));
1607 cmask = malloc(cnrows * sizeof(int*));
1608 if (cdata && cmask) {
1609 for ( ; i < cnrows; i++) {
1610 cdata[i] = malloc(cncols*sizeof(double));
1611 cmask[i] = malloc(cncols*sizeof(int));
1612 if (!cdata[i] || !cmask[i]) break;
1613 }
1614 }
1615 if (i < cnrows)
1616 {
1617 if (cdata[i]) free(cdata[i]);
1618 if (cmask[i]) free(cmask[i]);
1619 while (--i >= 0) {
1620 free(cdata[i]);
1621 free(cmask[i]);
1622 }
1623 if (cdata) free(cdata);
1624 if (cmask) free(cmask);
1625 free(clusterid);
1626 free_matrix_int(mask, nrows);
1627 free_matrix_dbl(matrix, nrows);
1628 croak("memory allocation failure in _clustercentroids\n");
1629 }
1630
1631 /* ------------------------
1632 * Run the library function
1633 */
1634 ok = getclustercentroids(
1635 nclusters, nrows, ncols,
1636 matrix, mask, clusterid,
1637 cdata, cmask, transpose, method[0]);
1638
1639 if (ok) {
1640 /* ------------------------
1641 * Convert generated C matrices to Perl matrices
1642 */
1643 cdata_ref = matrix_c2perl_dbl(aTHX_ cdata, cnrows, cncols);
1644 cmask_ref = matrix_c2perl_int(aTHX_ cmask, cnrows, cncols);
1645
1646 /* ------------------------
1647 * Push the new Perl matrices onto the return stack
1648 */
1649 XPUSHs(sv_2mortal( cdata_ref ));
1650 XPUSHs(sv_2mortal( cmask_ref ));
1651 }
1652
1653 /* ------------------------
1654 * Free what we've malloc'ed
1655 */
1656 free_matrix_int(mask, nrows);
1657 free_matrix_dbl(matrix, nrows);
1658 free_matrix_int(cmask, cnrows);
1659 free_matrix_dbl(cdata, cnrows);
1660 free(clusterid);
1661
1662 if (!ok) {
1663 croak("memory allocation failure in _clustercentroids\n");
1664 }
1665 /* Finished _clustercentroids() */
1666
1667 void
1668 _distancematrix(nrows,ncols,data_ref,mask_ref,weight_ref,transpose,dist)
1669 int nrows;
1670 int ncols;
1671 SV * data_ref;
1672 SV * mask_ref;
1673 SV * weight_ref;
1674 int transpose;
1675 char * dist;
1676
1677 PREINIT:
1678 SV * matrix_ref;
1679 int nobjects;
1680 int ndata;
1681
1682 double ** data;
1683 int ** mask;
1684 double * weight;
1685 double ** matrix;
1686
1687 int i;
1688 int ok;
1689
1690
1691 PPCODE:
1692 /* ------------------------
1693 * Don't check the parameters, because we rely on the Perl
1694 * caller to check most parameters.
1695 */
1696
1697 /* ------------------------
1698 * Malloc space for the return values from the library function
1699 */
1700 if (transpose==0) {
1701 nobjects = nrows;
1702 ndata = ncols;
1703 } else {
1704 nobjects = ncols;
1705 ndata = nrows;
1706 }
1707
1708 /* ------------------------
1709 * Convert data and mask matrices and the weight array
1710 * from C to Perl. Also check for errors, and ignore the
1711 * mask or the weight array if there are any errors.
1712 */
1713 ok = malloc_matrices( aTHX_
1714 weight_ref, &weight, ndata,
1715 data_ref, &data,
1716 mask_ref, &mask,
1717 nrows, ncols
1718 );
1719 if (!ok) {
1720 croak("failed to read input data for _distancematrix");
1721 }
1722
1723 /* Set up the ragged array */
1724 matrix = malloc(nobjects*sizeof(double*));
1725 if (matrix) {
1726 matrix[0] = NULL;
1727 for (i = 1; i < nobjects; i++) {
1728 matrix[i] = malloc(i*sizeof(double));
1729 if (matrix[i]==NULL) {
1730 while (--i >= 0) free(matrix[i]);
1731 free(matrix);
1732 matrix = NULL;
1733 break;
1734 }
1735 }
1736 }
1737 if (!matrix) {
1738 free_matrix_int(mask, nrows);
1739 free_matrix_dbl(data, nrows);
1740 free(weight);
1741 croak("failed to allocate memory for distance matrix");
1742 }
1743
1744
1745
1746 /* ------------------------
1747 * Run the library function
1748 */
1749 distancematrix(nrows,
1750 ncols,
1751 data,
1752 mask,
1753 weight,
1754 dist[0],
1755 transpose,
1756 matrix);
1757
1758 /* ------------------------
1759 * Convert generated C matrices to Perl matrices
1760 */
1761 matrix_ref = ragged_matrix_c2perl_dbl(aTHX_ matrix, nobjects);
1762
1763 /* ------------------------
1764 * Push the new Perl matrices onto the return stack
1765 */
1766 XPUSHs(sv_2mortal(matrix_ref));
1767
1768 /* ------------------------
1769 * Free what we've malloc'ed
1770 */
1771 free_ragged_matrix_dbl(matrix, nobjects);
1772 free_matrix_int(mask, nrows);
1773 free_matrix_dbl(data, nrows);
1774 free(weight);
1775
1776 /* Finished _distancematrix() */
1777
1778
1779 void
1780 _somcluster(nrows,ncols,data_ref,mask_ref,weight_ref,transpose,nxgrid,nygrid,inittau,niter,dist)
1781 int nrows;
1782 int ncols;
1783 SV * data_ref;
1784 SV * mask_ref;
1785 SV * weight_ref;
1786 int transpose;
1787 int nxgrid;
1788 int nygrid;
1789 double inittau;
1790 int niter;
1791 char * dist;
1792
1793 PREINIT:
1794 int (*clusterid)[2];
1795 SV * clusterid_ref;
1796 double*** celldata;
1797
1798 double * weight;
1799 double ** matrix;
1800 int ** mask;
1801
1802 int ok;
1803
1804 int i;
1805 AV * matrix_av;
1806 const int ndata = transpose ? nrows : ncols;
1807 const int nelements = transpose ? ncols : nrows;
1808
1809 PPCODE:
1810 /* ------------------------
1811 * Don't check the parameters, because we rely on the Perl
1812 * caller to check most paramters.
1813 */
1814
1815 /* ------------------------
1816 * Allocate space for clusterid[][2].
1817 */
1818 clusterid = malloc(nelements*sizeof(int[2]));
1819 if (!clusterid) {
1820 croak("memory allocation failure in _somcluster\n");
1821 }
1822 celldata = 0;
1823 /* Don't return celldata, for now at least */
1824
1825
1826 /* ------------------------
1827 * Convert data and mask matrices and the weight array
1828 * from C to Perl. Also check for errors, and ignore the
1829 * mask or the weight array if there are any errors.
1830 * Set nweights to the correct number of weights.
1831 */
1832 ok = malloc_matrices( aTHX_ weight_ref, &weight, ndata,
1833 data_ref, &matrix,
1834 mask_ref, &mask,
1835 nrows, ncols);
1836 if (!ok) {
1837 croak("failed to read input data for _somcluster\n");
1838 }
1839
1840 /* ------------------------
1841 * Run the library function
1842 */
1843 somcluster(
1844 nrows, ncols,
1845 matrix, mask, weight,
1846 transpose, nxgrid, nygrid, inittau, niter,
1847 dist[0], celldata, clusterid
1848 );
1849
1850 /* ------------------------
1851 * Convert generated C matrices to Perl matrices
1852 */
1853 matrix_av = newAV();
1854 for(i=0; i<nelements; ++i) {
1855 SV* row_ref;
1856 AV* row_av = newAV();
1857 av_push(row_av, newSViv(clusterid[i][0]));
1858 av_push(row_av, newSViv(clusterid[i][1]));
1859 row_ref = newRV((SV*)row_av);
1860 av_push(matrix_av, row_ref);
1861 }
1862 clusterid_ref = newRV_noinc((SV*)matrix_av);
1863
1864 /* ------------------------
1865 * Push the new Perl matrices onto the return stack
1866 */
1867 XPUSHs(sv_2mortal( clusterid_ref ));
1868
1869 /* ------------------------
1870 * Free what we've malloc'ed
1871 */
1872 free_matrix_int(mask, nrows);
1873 free_matrix_dbl(matrix, nrows);
1874 free(weight);
1875 free(clusterid);
1876
1877 /* Finished _somcluster() */
1878
1879
1880 void
1881 _pca(nrows, ncols, data_ref)
1882 int nrows;
1883 int ncols;
1884 SV * data_ref;
1885
1886 PREINIT:
1887 double** u;
1888 double** v;
1889 double* w;
1890 double* m;
1891 int i;
1892 int j;
1893 int nmin;
1894 int error;
1895 SV * mean_ref;
1896 SV * coordinates_ref;
1897 SV * pc_ref;
1898 SV * eigenvalues_ref;
1899
1900 PPCODE:
1901 if(SvTYPE(SvRV(data_ref)) != SVt_PVAV) {
1902 croak("argument to _pca is not an array reference\n");
1903 }
1904 nmin = nrows < ncols ? nrows : ncols;
1905 /* -- Create the output variables -------------------------------------- */
1906 u = parse_data(aTHX_ data_ref, NULL);
1907 w = malloc(nmin*sizeof(double));
1908 v = malloc(nmin*sizeof(double*));
1909 m = malloc(ncols*sizeof(double));
1910 if (v) {
1911 for (i = 0; i < nmin; i++) {
1912 v[i] = malloc(nmin*sizeof(double));
1913 if (!v[i]) break;
1914 }
1915 if (i < nmin) { /* then we encountered the break */
1916 while (i-- > 0) free(v[i]);
1917 free(v);
1918 v = NULL;
1919 }
1920 }
1921 if (!u || !v || !w || !m) {
1922 if (u) free(u);
1923 if (v) free(v);
1924 if (w) free(w);
1925 if (m) free(m);
1926 croak("memory allocation failure in _pca\n");
1927 }
1928 /* -- Calculate the mean of each column ------------------------------ */
1929 for (j = 0; j < ncols; j++) {
1930 m[j] = 0.0;
1931 for (i = 0; i < nrows; i++) m[j] += u[i][j];
1932 m[j] /= nrows;
1933 }
1934 /* -- Subtract the mean of each column ------------------------------- */
1935 for (i = 0; i < nrows; i++)
1936 for (j = 0; j < ncols; j++)
1937 u[i][j] -= m[j];
1938 error = pca(nrows, ncols, u, v, w);
1939 if (error==0) {
1940 /* Convert the C variables to Perl variables */
1941 mean_ref = row_c2perl_dbl(aTHX_ m, ncols);
1942 if (nrows >= ncols) {
1943 coordinates_ref = matrix_c2perl_dbl(aTHX_ u, nrows, ncols);
1944 pc_ref = matrix_c2perl_dbl(aTHX_ v, nmin, nmin);
1945 }
1946 else /* nrows < ncols */ {
1947 pc_ref = matrix_c2perl_dbl(aTHX_ u, nrows, ncols);
1948 coordinates_ref = matrix_c2perl_dbl(aTHX_ v, nmin, nmin);
1949 }
1950 eigenvalues_ref = row_c2perl_dbl(aTHX_ w, nmin);
1951 }
1952 for (i = 0; i < nrows; i++) free(u[i]);
1953 for (i = 0; i < nmin; i++) free(v[i]);
1954 free(u);
1955 free(v);
1956 free(w);
1957 free(m);
1958 if (error==-1)
1959 croak("Insufficient memory for principal components analysis");
1960 if (error > 0)
1961 croak("Singular value decomposition failed to converge");
1962 /* ------------------------
1963 * Push the new Perl matrices onto the return stack
1964 */
1965 XPUSHs(sv_2mortal(mean_ref));
1966 XPUSHs(sv_2mortal(coordinates_ref));
1967 XPUSHs(sv_2mortal(pc_ref));
1968 XPUSHs(sv_2mortal(eigenvalues_ref));
1969