1#' @include dimRedResult-class.R 2#' @include dimRedData-class.R 3 4#' @export 5setGeneric("quality", 6 function (.data, ...) standardGeneric("quality"), 7 valueClass = "numeric") 8 9#' Quality Criteria for dimensionality reduction. 10#' 11#' A collection of functions to compute quality measures on 12#' \code{\link{dimRedResult}} objects. 13#' 14#' @section Implemented methods: 15#' 16#' Method must be one of \code{"\link{Q_local}", "\link{Q_global}", 17#' "\link{mean_R_NX}", "\link{total_correlation}", 18#' "\link{cophenetic_correlation}", "\link{distance_correlation}", 19#' "\link{reconstruction_rmse}"} 20#' 21#' @section Rank based criteria: 22#' 23#' \code{Q_local}, \code{Q_global}, and \code{mean_R_NX} are 24#' quality criteria based on the Co-ranking matrix. \code{Q_local} 25#' and \code{Q_global} determine the local/global quality of the 26#' embedding, while \code{mean_R_NX} determines the quality of the 27#' overall embedding. They are parameter free and return a single 28#' number. The object must include the original data. The number 29#' returns is in the range [0, 1], higher values mean a better 30#' local/global embedding. 31#' 32#' @section Correlation based criteria: 33#' 34#' \code{total_correlation} calculates the sum of the mean squared 35#' correlations of the original axes with the axes in reduced 36#' dimensions, because some methods do not care about correlations 37#' with axes, there is an option to rotate data in reduced space to 38#' maximize this criterium. The number may be greater than one if more 39#' dimensions are summed up. 40#' 41#' \code{cophenetic_correlation} calculate the correlation between the 42#' lower triangles of distance matrices, the correlation and distance 43#' methods may be specified. The result is in range [-1, 1]. 44#' 45#' \code{distance_correlation} measures the independes of samples by 46#' calculating the correlation of distances. For details see 47#' \code{\link[energy]{dcor}}. 48#' 49#' @section Reconstruction error: 50#' 51#' \code{reconstruction_rmse} calculates the root mean squared error 52#' of the reconstrucion. \code{object} requires an inverse function. 53#' 54#' 55#' @references 56#' 57#' Lueks, W., Mokbel, B., Biehl, M., Hammer, B., 2011. How 58#' to Evaluate Dimensionality Reduction? - Improving the 59#' Co-ranking Matrix. arXiv:1110.3917 [cs]. 60#' 61#' Szekely, G.J., Rizzo, M.L., Bakirov, N.K., 2007. Measuring and 62#' testing dependence by correlation of distances. Ann. Statist. 35, 63#' 2769-2794. doi:10.1214/009053607000000505 64#' 65#' Lee, J.A., Peluffo-Ordonez, D.H., Verleysen, M., 2015. Multi-scale 66#' similarities in stochastic neighbour embedding: Reducing 67#' dimensionality while preserving both local and global 68#' structure. Neurocomputing, 169, 69#' 246-261. doi:10.1016/j.neucom.2014.12.095 70#' 71#' 72#' 73#' @param .data object of class \code{dimRedResult} 74#' @param .method character vector naming one of the methods 75#' @param .mute what output from the embedding method should be muted. 76#' @param ... the pameters, internally passed as a list to the 77#' quality method as \code{pars = list(...)} 78#' @return a number 79#' 80#' @examples 81#' \dontrun{ 82#' embed_methods <- dimRedMethodList() 83#' quality_methods <- dimRedQualityList() 84#' scurve <- loadDataSet("Iris") 85#' 86#' quality_results <- matrix(NA, length(embed_methods), length(quality_methods), 87#' dimnames = list(embed_methods, quality_methods)) 88#' embedded_data <- list() 89#' 90#' for (e in embed_methods) { 91#' message("embedding: ", e) 92#' embedded_data[[e]] <- embed(scurve, e, .mute = c("message", "output")) 93#' for (q in quality_methods) { 94#' message(" quality: ", q) 95#' quality_results[e, q] <- tryCatch( 96#' quality(embedded_data[[e]], q), 97#' error = function (e) NA 98#' ) 99#' } 100#' } 101#' 102#' print(quality_results) 103#' } 104#' @author Guido Kraemer 105#' @aliases quality quality.dimRedResult 106#' @family Quality scores for dimensionality reduction 107#' @describeIn quality Calculate a quality index from a dimRedResult object. 108#' @export 109setMethod( 110 "quality", 111 "dimRedResult", 112 function (.data, .method = dimRedQualityList(), 113 .mute = character(0), # c("output", "message"), 114 ...) { 115 method <- match.arg(.method) 116 117 methodFunction <- getQualityFunction(method) 118 119 args <- c(list(object = .data), list(...)) 120 121 devnull <- if (Sys.info()["sysname"] != "Windows") 122 "/dev/null" 123 else 124 "NUL" 125 if ("message" %in% .mute){ 126 devnull1 <- file(devnull, "wt") 127 sink(devnull1, type = "message") 128 on.exit({ 129 sink(file = NULL, type = "message") 130 close(devnull1) 131 }, add = TRUE) 132 } 133 if ("output" %in% .mute) { 134 devnull2 <- file(devnull, "wt") 135 sink(devnull2, type = "output") 136 on.exit({ 137 sink() 138 close(devnull2) 139 }, add = TRUE) 140 } 141 142 do.call(methodFunction, args) 143 } 144) 145 146getQualityFunction <- function (method) { 147 switch( 148 method, 149 Q_local = Q_local, 150 Q_global = Q_global, 151 mean_R_NX = mean_R_NX, 152 AUC_lnK_R_NX = AUC_lnK_R_NX, 153 total_correlation = total_correlation, 154 cophenetic_correlation = cophenetic_correlation, 155 distance_correlation = distance_correlation, 156 reconstruction_rmse = reconstruction_rmse 157 ) 158} 159 160 161#' @export 162setGeneric( 163 "Q_local", 164 function(object, ...) standardGeneric("Q_local"), 165 valueClass = "numeric" 166) 167 168 169#' Method Q_local 170#' 171#' Calculate the Q_local score to assess the quality of a dimensionality reduction. 172#' 173#' @param object of class dimRedResult. 174#' @param ndim use the first ndim columns of the embedded data for calculation. 175#' @family Quality scores for dimensionality reduction 176#' @aliases Q_local 177#' @export 178setMethod( 179 "Q_local", 180 "dimRedResult", 181 function (object, ndim = getNDim(object)) { 182 if (!object@has.org.data) stop("object requires original data") 183 chckpkg("coRanking") 184 185 Q <- coRanking::coranking(object@org.data, 186 object@data@data[, seq_len(ndim), drop = FALSE]) 187 nQ <- nrow(Q) 188 N <- nQ + 1 189 190 Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N 191 lcmc <- Qnx - seq_len(nQ) / nQ 192 193 Kmax <- which.max(lcmc) 194 195 Qlocal <- sum(Qnx[1:Kmax]) / Kmax 196 return(as.vector(Qlocal)) 197 } 198) 199 200#' @export 201setGeneric( 202 "Q_global", 203 function(object, ...) standardGeneric("Q_global"), 204 valueClass = "numeric" 205) 206 207#' Method Q_global 208#' 209#' Calculate the Q_global score to assess the quality of a dimensionality reduction. 210#' 211#' @param object of class dimRedResult 212#' @family Quality scores for dimensionality reduction 213#' @aliases Q_global 214#' @export 215setMethod( 216 "Q_global", 217 "dimRedResult", 218 function(object){ 219 if (!object@has.org.data) stop("object requires original data") 220 chckpkg("coRanking") 221 222 Q <- coRanking::coranking(object@org.data, object@data@data) 223 nQ <- nrow(Q) 224 N <- nQ + 1 225 226 Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N 227 lcmc <- Qnx - seq_len(nQ) / nQ 228 229 Kmax <- which.max(lcmc) 230 231 Qglobal <- sum(Qnx[(Kmax + 1):nQ]) / (N - Kmax) 232 return(Qglobal) 233 } 234) 235 236#' @export 237setGeneric( 238 "mean_R_NX", 239 function(object, ...) standardGeneric("mean_R_NX"), 240 valueClass = "numeric" 241) 242 243#' Method mean_R_NX 244#' 245#' Calculate the mean_R_NX score to assess the quality of a dimensionality reduction. 246#' 247#' @param object of class dimRedResult 248#' @family Quality scores for dimensionality reduction 249#' @aliases mean_R_NX 250#' @export 251setMethod( 252 "mean_R_NX", 253 "dimRedResult", 254 function(object) mean(R_NX(object)) 255) 256 257#' @export 258setGeneric( 259 "AUC_lnK_R_NX", 260 function(object, ...) standardGeneric("AUC_lnK_R_NX"), 261 valueClass = "numeric" 262) 263 264#' Method AUC_lnK_R_NX 265#' 266#' Calculate the Area under the R_NX(ln K), used in Lee et. al. (2015). Note 267#' that despite the name, this does not weight the mean by the logarithm, but by 268#' 1/K. If explicit weighting by the logarithm is desired use \code{weight = 269#' "log"} or \code{weight = "log10"} 270#' 271#' The naming confusion originated from equation 17 in Lee et al (2015) and the 272#' name of this method may change in the future to avoid confusion. 273#' 274#' @references Lee, J.A., Peluffo-Ordonez, D.H., Verleysen, M., 2015. 275#' Multi-scale similarities in stochastic neighbour embedding: Reducing 276#' dimensionality while preserving both local and global structure. 277#' Neurocomputing 169, 246-261. https://doi.org/10.1016/j.neucom.2014.12.095 278#' 279#' @param object of class dimRedResult 280#' @param weight the weight function used, one of \code{c("inv", "log", "log10")} 281#' @family Quality scores for dimensionality reduction 282#' @aliases AUC_lnK_R_NX 283#' @export 284setMethod( 285 "AUC_lnK_R_NX", 286 "dimRedResult", 287 function(object, weight = "inv") { 288 rnx <- R_NX(object) 289 290 weight <- match.arg(weight, c("inv", "ln", "log", "log10")) 291 switch( 292 weight, 293 inv = auc_ln_k_inv(rnx), 294 log = auc_log_k(rnx), 295 ln = auc_log_k(rnx), 296 log10 = auc_log10_k(rnx), 297 stop("wrong parameter for weight") 298 ) 299 } 300) 301 302auc_ln_k_inv <- function(rnx) { 303 Ks <- seq_along(rnx) 304 return (sum(rnx / Ks) / sum(1 / Ks)) 305} 306 307auc_log_k <- function(rnx) { 308 Ks <- seq_along(rnx) 309 return (sum(rnx * log(Ks)) / sum(log(Ks))) 310} 311 312auc_log10_k <- function(rnx) { 313 Ks <- seq_along(rnx) 314 return (sum(rnx * log10(Ks)) / sum(log10(Ks))) 315} 316 317 318#' @export 319setGeneric( 320 "total_correlation", 321 function(object, ...) standardGeneric("total_correlation"), 322 valueClass = "numeric" 323) 324 325#' Method total_correlation 326#' 327#' Calculate the total correlation of the variables with the axes to 328#' assess the quality of a dimensionality reduction. 329#' 330#' @param object of class dimRedResult 331#' @param naxes the number of axes to use for optimization. 332#' @param cor_method the correlation method to use. 333#' @param is.rotated if FALSE the object is rotated. 334#' 335#' @family Quality scores for dimensionality reduction 336#' @aliases total_correlation 337#' @export 338setMethod( 339 "total_correlation", 340 "dimRedResult", 341 function(object, 342 naxes = ndims(object), 343 cor_method = "pearson", 344 is.rotated = FALSE){ 345 346 if (!object@has.org.data) stop("object requires original data") 347 if (length(naxes) != 1 || naxes < 1 || naxes > ncol(object@data@data)) 348 stop("naxes must specify the numbers of axes to optimize for, ", 349 "i.e. a single integer between 1 and ncol(object@data@data)") 350 ## try to partially match cor_method: 351 cor_methods <- c("pearson", "kendall", "spearman") 352 cor_method <- cor_methods[pmatch(cor_method, cor_methods)] 353 if (is.na(cor_method)) 354 stop("cor_method must match one of ", 355 "'pearson', 'kendall', or 'spearman', ", 356 "at least partially.") 357 358 if (!is.rotated) { 359 rotated_result <- maximize_correlation( 360 object, naxes, cor_method 361 ) 362 } else { 363 rotated_result <- object 364 } 365 366 res <- 0 367 for (i in 1:naxes) 368 res <- res + mean(correlate( 369 rotated_result@data@data, 370 rotated_result@org.data, 371 cor_method 372 )[i, ] ^ 2) 373 374 return(res) 375 } 376) 377 378setGeneric("cophenetic_correlation", 379 function(object, ...) standardGeneric("cophenetic_correlation"), 380 valueClass = "numeric") 381 382#' Method cophenetic_correlation 383#' 384#' Calculate the correlation between the distance matrices in high and 385#' low dimensioal space. 386#' 387#' @param object of class dimRedResult 388#' @param d the distance function to use. 389#' @param cor_method The correlation method. 390#' @aliases cophenetic_correlation 391#' @family Quality scores for dimensionality reduction 392#' @export 393setMethod( 394 "cophenetic_correlation", 395 "dimRedResult", 396 function(object, d = stats::dist, cor_method = "pearson"){ 397 ## if (missing(d)) d <- stats::dist 398 ## if (missing(cor_method)) cor_method <- "pearson" 399 if (!object@has.org.data) stop("object requires original data") 400 cor_methods <- c("pearson", "kendall", "spearman") 401 cor_method <- cor_methods[pmatch(cor_method, cor_methods)] 402 if (is.na(cor_method)) 403 stop("cor_method must match one of ", 404 "'pearson', 'kendall', or 'spearman', ", 405 "at least partially.") 406 407 d.org <- d(object@org.data) 408 d.emb <- d(object@data@data) 409 410 if (!inherits(d.org, "dist") || !inherits(d.emb, "dist")) 411 stop("d must return a dist object") 412 413 res <- correlate( 414 d(object@org.data), 415 d(object@data@data), 416 cor_method 417 ) 418 return(res) 419 } 420) 421 422#' @export 423setGeneric( 424 "distance_correlation", 425 function(object) standardGeneric("distance_correlation"), 426 valueClass = "numeric" 427) 428 429#' Method distance_correlation 430#' 431#' Calculate the distance correlation between the distance matrices in 432#' high and low dimensioal space. 433#' 434#' @param object of class dimRedResult 435#' @aliases distance_correlation 436#' @family Quality scores for dimensionality reduction 437#' @export 438setMethod( 439 "distance_correlation", 440 "dimRedResult", 441 function(object){ 442 if (!object@has.org.data) stop("object requires original data") 443 chckpkg("energy") 444 445 energy::dcor(object@org.data, object@data@data) 446 } 447) 448 449 450 451#' @export 452setGeneric( 453 "reconstruction_rmse", 454 function(object) standardGeneric("reconstruction_rmse"), 455 valueClass = "numeric" 456) 457 458#' Method reconstruction_rmse 459#' 460#' Calculate the reconstruction root mean squared error a dimensionality reduction, the method must have an inverse mapping. 461#' 462#' @param object of class dimRedResult 463#' @aliases reconstruction_rmse 464#' @family Quality scores for dimensionality reduction 465#' @export 466setMethod( 467 "reconstruction_rmse", 468 "dimRedResult", 469 function(object){ 470 if (!object@has.org.data) stop("object requires original data") 471 if (!object@has.inverse) stop("object requires an inverse function") 472 473 recon <- object@inverse(object@data) 474 475 rmse(recon@data, object@org.data) 476 } 477) 478 479#' @rdname quality 480#' 481#' @export 482dimRedQualityList <- function () { 483 return(c("Q_local", 484 "Q_global", 485 "mean_R_NX", 486 "AUC_lnK_R_NX", 487 "total_correlation", 488 "cophenetic_correlation", 489 "distance_correlation", 490 "reconstruction_rmse")) 491} 492 493#' @export 494setGeneric( 495 "R_NX", 496 function(object, ...) standardGeneric("R_NX"), 497 valueClass = "numeric" 498) 499 500#' Method R_NX 501#' 502#' Calculate the R_NX score from Lee et. al. (2013) which shows the neighborhood 503#' preservation for the Kth nearest neighbors, corrected for random point 504#' distributions and scaled to range [0, 1]. 505#' @param object of class dimRedResult 506#' @param ndim the number of dimensions to take from the embedded data. 507#' @family Quality scores for dimensionality reduction 508#' @aliases R_NX 509#' @export 510setMethod( 511 "R_NX", 512 "dimRedResult", 513 function(object, ndim = getNDim(object)) { 514 chckpkg("coRanking") 515 if (!object@has.org.data) stop("object requires original data") 516 517 Q <- coRanking::coranking(object@org.data, 518 object@data@data[, seq_len(ndim), 519 drop = FALSE]) 520 nQ <- nrow(Q) 521 N <- nQ + 1 522 523 Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / 524 seq_len(nQ) / N 525 526 Rnx <- ((N - 1) * Qnx - seq_len(nQ)) / 527 (N - 1 - seq_len(nQ)) 528 Rnx[-nQ] 529 } 530) 531 532#' @export 533setGeneric( 534 "Q_NX", 535 function(object, ...) standardGeneric("Q_NX"), 536 valueClass = "numeric" 537) 538 539#' Method Q_NX 540#' 541#' Calculate the Q_NX score (Chen & Buja 2006, the notation in the 542#' publication is M_k). Which is the fraction of points that remain inside 543#' the same K-ary neighborhood in high and low dimensional space. 544#' 545#' @param object of class dimRedResult 546#' @family Quality scores for dimensionality reduction 547#' @aliases Q_NX 548#' @export 549setMethod( 550 "Q_NX", 551 "dimRedResult", 552 function(object) { 553 chckpkg("coRanking") 554 555 Q <- coRanking::coranking(object@org.data, object@data@data) 556 nQ <- nrow(Q) 557 N <- nQ + 1 558 559 Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N 560 Qnx 561 } 562) 563 564#'@export 565setGeneric( 566 "LCMC", 567 function(object, ...) standardGeneric("LCMC"), 568 valueClass = "numeric" 569) 570 571#' Method LCMC 572#' 573#' Calculates the Local Continuity Meta Criterion, which is 574#' \code{\link{Q_NX}} adjusted for random overlap inside the K-ary 575#' neighborhood. 576#' 577#' @param object of class dimRedResult 578#' @family Quality scores for dimensionality reduction 579#' @aliases LCMC 580#' @export 581setMethod( 582 "LCMC", 583 "dimRedResult", 584 function(object) { 585 chckpkg("coRanking") 586 587 Q <- coRanking::coranking(object@org.data, object@data@data) 588 nQ <- nrow(Q) 589 N <- nQ + 1 590 591 lcmc <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / 592 seq_len(nQ) / N - 593 seq_len(nQ) / nQ 594 lcmc 595 } 596) 597 598rnx2qnx <- function(rnx, K = seq_along(rnx), N = length(rnx) + 1) { 599 (rnx * (N - 1 - K) + K) / (N - 1) 600} 601qnx2rnx <- function(qnx, K = seq_along(qnx), N = length(qnx) + 1) { 602 ((N - 1) * qnx - K) / (N - 1 - K) 603} 604 605#' @export 606setGeneric( 607 "reconstruction_error", 608 function(object, ...) standardGeneric("reconstruction_error"), 609 valueClass = "numeric" 610) 611 612#' Method reconstruction_error 613#' 614#' Calculate the error using only the first \code{n} dimensions of the embedded 615#' data. \code{error_fun} can either be one of \code{c("rmse", "mae")} to 616#' calculate the root mean square error or the mean absolute error respectively, 617#' or a function that takes to equally sized vectors as input and returns a 618#' single number as output. 619#' 620#' @param object of class dimRedResult 621#' @param n a positive integer or vector of integers \code{<= ndims(object)} 622#' @param error_fun a function or string indicating an error function, if 623#' indication a function it must take to matrices of the same size and return 624#' a scalar. 625#' @return a vector of number with the same length as \code{n} with the 626#' 627#' @examples 628#' \dontrun{ 629#' ir <- loadDataSet("Iris") 630#' ir.drr <- embed(ir, "DRR", ndim = ndims(ir)) 631#' ir.pca <- embed(ir, "PCA", ndim = ndims(ir)) 632#' 633#' rmse <- data.frame( 634#' rmse_drr = reconstruction_error(ir.drr), 635#' rmse_pca = reconstruction_error(ir.pca) 636#' ) 637#' 638#' matplot(rmse, type = "l") 639#' plot(ir) 640#' plot(ir.drr) 641#' plot(ir.pca) 642#' } 643#' @author Guido Kraemer 644#' @family Quality scores for dimensionality reduction 645#' @aliases reconstruction_error 646#' @export 647setMethod( 648 "reconstruction_error", 649 c("dimRedResult"), 650 function (object, n = seq_len(ndims(object)), error_fun = "rmse") { 651 if (any(n > ndims(object))) stop("n > ndims(object)") 652 if (any(n < 1)) stop("n < 1") 653 654 ef <- if (inherits(error_fun, "character")) { 655 switch( 656 error_fun, 657 rmse = rmse, 658 mae = mae 659 ) 660 } else if (inherits(error_fun, "function")) { 661 error_fun 662 } else { 663 stop("error_fun must be a string or function, see documentation for details") 664 } 665 666 res <- numeric(length(n)) 667 org <- getData(getOrgData(object)) 668 for (i in seq_along(n)) { 669 rec <- getData(inverse( 670 object, getData(getDimRedData(object))[, seq_len(n[i]), drop = FALSE] 671 )) 672 res[i] <- ef(org, rec) 673 } 674 res 675 } 676) 677 678rmse <- function (x1, x2) sqrt(mean((x1 - x2) ^ 2)) 679mae <- function (x1, x2) mean(abs(x1 - x2)) 680