1#' @include dimRedResult-class.R
2#' @include dimRedData-class.R
3
4#' @export
5setGeneric("quality",
6           function (.data, ...) standardGeneric("quality"),
7           valueClass = "numeric")
8
9#' Quality Criteria for dimensionality reduction.
10#'
11#' A collection of functions to compute quality measures on
12#' \code{\link{dimRedResult}} objects.
13#'
14#' @section Implemented methods:
15#'
16#' Method must be one of \code{"\link{Q_local}", "\link{Q_global}",
17#' "\link{mean_R_NX}", "\link{total_correlation}",
18#' "\link{cophenetic_correlation}", "\link{distance_correlation}",
19#' "\link{reconstruction_rmse}"}
20#'
21#' @section Rank based criteria:
22#'
23#' \code{Q_local}, \code{Q_global}, and \code{mean_R_NX} are
24#' quality criteria based on the Co-ranking matrix.  \code{Q_local}
25#' and \code{Q_global} determine the local/global quality of the
26#' embedding, while \code{mean_R_NX} determines the quality of the
27#' overall embedding. They are parameter free and return a single
28#' number. The object must include the original data.  The number
29#' returns is in the range [0, 1], higher values mean a better
30#' local/global embedding.
31#'
32#' @section Correlation based criteria:
33#'
34#' \code{total_correlation} calculates the sum of the mean squared
35#' correlations of the original axes with the axes in reduced
36#' dimensions, because some methods do not care about correlations
37#' with axes, there is an option to rotate data in reduced space to
38#' maximize this criterium. The number may be greater than one if more
39#' dimensions are summed up.
40#'
41#' \code{cophenetic_correlation} calculate the correlation between the
42#' lower triangles of distance matrices, the correlation and distance
43#' methods may be specified. The result is in range [-1, 1].
44#'
45#' \code{distance_correlation} measures the independes of samples by
46#' calculating the correlation of distances. For details see
47#' \code{\link[energy]{dcor}}.
48#'
49#' @section Reconstruction error:
50#'
51#' \code{reconstruction_rmse} calculates the root mean squared error
52#' of the reconstrucion. \code{object} requires an inverse function.
53#'
54#'
55#' @references
56#'
57#' Lueks, W., Mokbel, B., Biehl, M., Hammer, B., 2011. How
58#'     to Evaluate Dimensionality Reduction? - Improving the
59#'     Co-ranking Matrix. arXiv:1110.3917 [cs].
60#'
61#' Szekely, G.J., Rizzo, M.L., Bakirov, N.K., 2007. Measuring and
62#'     testing dependence by correlation of distances. Ann. Statist. 35,
63#'     2769-2794. doi:10.1214/009053607000000505
64#'
65#' Lee, J.A., Peluffo-Ordonez, D.H., Verleysen, M., 2015. Multi-scale
66#'     similarities in stochastic neighbour embedding: Reducing
67#'     dimensionality while preserving both local and global
68#'     structure. Neurocomputing, 169,
69#'     246-261. doi:10.1016/j.neucom.2014.12.095
70#'
71#'
72#'
73#' @param .data object of class \code{dimRedResult}
74#' @param .method character vector naming one of the methods
75#' @param .mute what output from the embedding method should be muted.
76#' @param ... the pameters, internally passed as a list to the
77#'     quality method as \code{pars = list(...)}
78#' @return a number
79#'
80#' @examples
81#' \dontrun{
82#' embed_methods <- dimRedMethodList()
83#' quality_methods <- dimRedQualityList()
84#' scurve <- loadDataSet("Iris")
85#'
86#' quality_results <- matrix(NA, length(embed_methods), length(quality_methods),
87#'                               dimnames = list(embed_methods, quality_methods))
88#' embedded_data <- list()
89#'
90#' for (e in embed_methods) {
91#'   message("embedding: ", e)
92#'   embedded_data[[e]] <- embed(scurve, e, .mute = c("message", "output"))
93#'   for (q in quality_methods) {
94#'     message("  quality: ", q)
95#'     quality_results[e, q] <- tryCatch(
96#'       quality(embedded_data[[e]], q),
97#'       error = function (e) NA
98#'     )
99#'   }
100#' }
101#'
102#' print(quality_results)
103#' }
104#' @author Guido Kraemer
105#' @aliases quality quality.dimRedResult
106#' @family Quality scores for dimensionality reduction
107#' @describeIn quality Calculate a quality index from a dimRedResult object.
108#' @export
109setMethod(
110    "quality",
111    "dimRedResult",
112    function (.data, .method = dimRedQualityList(),
113              .mute = character(0), # c("output", "message"),
114              ...) {
115        method <-  match.arg(.method)
116
117        methodFunction <- getQualityFunction(method)
118
119        args <- c(list(object = .data), list(...))
120
121        devnull <- if (Sys.info()["sysname"] != "Windows")
122                       "/dev/null"
123                   else
124                       "NUL"
125        if ("message" %in% .mute){
126            devnull1 <- file(devnull,  "wt")
127            sink(devnull1, type = "message")
128            on.exit({
129                sink(file = NULL, type = "message")
130                close(devnull1)
131            }, add = TRUE)
132        }
133        if ("output" %in% .mute) {
134            devnull2 <- file(devnull,  "wt")
135            sink(devnull2, type = "output")
136            on.exit({
137                sink()
138                close(devnull2)
139            }, add = TRUE)
140        }
141
142        do.call(methodFunction, args)
143    }
144)
145
146getQualityFunction <- function (method) {
147    switch(
148        method,
149        Q_local                = Q_local,
150        Q_global               = Q_global,
151        mean_R_NX              = mean_R_NX,
152        AUC_lnK_R_NX           = AUC_lnK_R_NX,
153        total_correlation      = total_correlation,
154        cophenetic_correlation = cophenetic_correlation,
155        distance_correlation   = distance_correlation,
156        reconstruction_rmse    = reconstruction_rmse
157    )
158}
159
160
161#' @export
162setGeneric(
163    "Q_local",
164    function(object, ...) standardGeneric("Q_local"),
165    valueClass = "numeric"
166)
167
168
169#' Method Q_local
170#'
171#' Calculate the Q_local score to assess the quality of a dimensionality reduction.
172#'
173#' @param object of class dimRedResult.
174#' @param ndim use the first ndim columns of the embedded data for calculation.
175#' @family Quality scores for dimensionality reduction
176#' @aliases Q_local
177#' @export
178setMethod(
179    "Q_local",
180    "dimRedResult",
181    function (object, ndim = getNDim(object)) {
182        if (!object@has.org.data) stop("object requires original data")
183        chckpkg("coRanking")
184
185        Q <- coRanking::coranking(object@org.data,
186                                  object@data@data[, seq_len(ndim), drop = FALSE])
187        nQ <- nrow(Q)
188        N <- nQ + 1
189
190        Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N
191        lcmc <- Qnx - seq_len(nQ) / nQ
192
193        Kmax <- which.max(lcmc)
194
195        Qlocal <- sum(Qnx[1:Kmax]) / Kmax
196        return(as.vector(Qlocal))
197    }
198)
199
200#' @export
201setGeneric(
202    "Q_global",
203    function(object, ...) standardGeneric("Q_global"),
204    valueClass = "numeric"
205)
206
207#' Method Q_global
208#'
209#' Calculate the Q_global score to assess the quality of a dimensionality reduction.
210#'
211#' @param object of class dimRedResult
212#' @family Quality scores for dimensionality reduction
213#' @aliases Q_global
214#' @export
215setMethod(
216    "Q_global",
217    "dimRedResult",
218    function(object){
219        if (!object@has.org.data) stop("object requires original data")
220        chckpkg("coRanking")
221
222        Q <- coRanking::coranking(object@org.data, object@data@data)
223        nQ <- nrow(Q)
224        N <- nQ + 1
225
226        Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N
227        lcmc <- Qnx - seq_len(nQ) / nQ
228
229        Kmax <- which.max(lcmc)
230
231        Qglobal <- sum(Qnx[(Kmax + 1):nQ]) / (N - Kmax)
232        return(Qglobal)
233    }
234)
235
236#' @export
237setGeneric(
238    "mean_R_NX",
239    function(object, ...) standardGeneric("mean_R_NX"),
240    valueClass = "numeric"
241)
242
243#' Method mean_R_NX
244#'
245#' Calculate the mean_R_NX score to assess the quality of a dimensionality reduction.
246#'
247#' @param object of class dimRedResult
248#' @family Quality scores for dimensionality reduction
249#' @aliases mean_R_NX
250#' @export
251setMethod(
252    "mean_R_NX",
253    "dimRedResult",
254    function(object) mean(R_NX(object))
255)
256
257#' @export
258setGeneric(
259    "AUC_lnK_R_NX",
260    function(object, ...) standardGeneric("AUC_lnK_R_NX"),
261    valueClass = "numeric"
262)
263
264#' Method AUC_lnK_R_NX
265#'
266#' Calculate the Area under the R_NX(ln K), used in Lee et. al. (2015). Note
267#' that despite the name, this does not weight the mean by the logarithm, but by
268#' 1/K. If explicit weighting by the logarithm is desired use \code{weight =
269#' "log"} or \code{weight = "log10"}
270#'
271#' The naming confusion originated from equation 17 in Lee et al (2015) and the
272#' name of this method may change in the future to avoid confusion.
273#'
274#' @references Lee, J.A., Peluffo-Ordonez, D.H., Verleysen, M., 2015.
275#'   Multi-scale similarities in stochastic neighbour embedding: Reducing
276#'   dimensionality while preserving both local and global structure.
277#'   Neurocomputing 169, 246-261. https://doi.org/10.1016/j.neucom.2014.12.095
278#'
279#' @param object of class dimRedResult
280#' @param weight the weight function used, one of \code{c("inv", "log", "log10")}
281#' @family Quality scores for dimensionality reduction
282#' @aliases AUC_lnK_R_NX
283#' @export
284setMethod(
285    "AUC_lnK_R_NX",
286    "dimRedResult",
287    function(object, weight = "inv") {
288        rnx <- R_NX(object)
289
290        weight <- match.arg(weight, c("inv", "ln", "log", "log10"))
291        switch(
292          weight,
293          inv   = auc_ln_k_inv(rnx),
294          log   = auc_log_k(rnx),
295          ln    = auc_log_k(rnx),
296          log10 = auc_log10_k(rnx),
297          stop("wrong parameter for weight")
298        )
299    }
300)
301
302auc_ln_k_inv <- function(rnx) {
303    Ks <- seq_along(rnx)
304    return (sum(rnx / Ks) / sum(1 / Ks))
305}
306
307auc_log_k <- function(rnx) {
308    Ks <- seq_along(rnx)
309    return (sum(rnx * log(Ks)) / sum(log(Ks)))
310}
311
312auc_log10_k <- function(rnx) {
313  Ks <- seq_along(rnx)
314  return (sum(rnx * log10(Ks)) / sum(log10(Ks)))
315}
316
317
318#' @export
319setGeneric(
320    "total_correlation",
321    function(object, ...) standardGeneric("total_correlation"),
322    valueClass = "numeric"
323)
324
325#' Method total_correlation
326#'
327#' Calculate the total correlation of the variables with the axes to
328#' assess the quality of a dimensionality reduction.
329#'
330#' @param object of class dimRedResult
331#' @param naxes the number of axes to use for optimization.
332#' @param cor_method the correlation method to use.
333#' @param is.rotated if FALSE the object is rotated.
334#'
335#' @family Quality scores for dimensionality reduction
336#' @aliases total_correlation
337#' @export
338setMethod(
339    "total_correlation",
340    "dimRedResult",
341    function(object,
342             naxes = ndims(object),
343             cor_method = "pearson",
344             is.rotated = FALSE){
345
346        if (!object@has.org.data) stop("object requires original data")
347        if (length(naxes) != 1 || naxes < 1 || naxes > ncol(object@data@data))
348            stop("naxes must specify the numbers of axes to optimize for, ",
349                 "i.e. a single integer between 1 and ncol(object@data@data)")
350        ## try to partially match cor_method:
351        cor_methods <- c("pearson", "kendall", "spearman")
352        cor_method <- cor_methods[pmatch(cor_method, cor_methods)]
353        if (is.na(cor_method))
354            stop("cor_method must match one of ",
355                 "'pearson', 'kendall', or 'spearman', ",
356                 "at least partially.")
357
358        if (!is.rotated) {
359            rotated_result <- maximize_correlation(
360                object, naxes, cor_method
361            )
362        } else {
363            rotated_result <- object
364        }
365
366        res <- 0
367        for (i in 1:naxes)
368            res <- res + mean(correlate(
369                             rotated_result@data@data,
370                             rotated_result@org.data,
371                             cor_method
372                         )[i, ] ^ 2)
373
374        return(res)
375    }
376)
377
378setGeneric("cophenetic_correlation",
379           function(object, ...) standardGeneric("cophenetic_correlation"),
380           valueClass = "numeric")
381
382#' Method cophenetic_correlation
383#'
384#' Calculate the correlation between the distance matrices in high and
385#' low dimensioal space.
386#'
387#' @param object of class dimRedResult
388#' @param d the distance function to use.
389#' @param cor_method The correlation method.
390#' @aliases cophenetic_correlation
391#' @family Quality scores for dimensionality reduction
392#' @export
393setMethod(
394    "cophenetic_correlation",
395    "dimRedResult",
396    function(object, d = stats::dist, cor_method = "pearson"){
397        ## if (missing(d)) d <- stats::dist
398        ## if (missing(cor_method)) cor_method <- "pearson"
399        if (!object@has.org.data) stop("object requires original data")
400        cor_methods <- c("pearson", "kendall", "spearman")
401        cor_method <- cor_methods[pmatch(cor_method, cor_methods)]
402        if (is.na(cor_method))
403            stop("cor_method must match one of ",
404                 "'pearson', 'kendall', or 'spearman', ",
405                 "at least partially.")
406
407        d.org <- d(object@org.data)
408        d.emb <- d(object@data@data)
409
410        if (!inherits(d.org, "dist") || !inherits(d.emb, "dist"))
411            stop("d must return a dist object")
412
413        res <- correlate(
414            d(object@org.data),
415            d(object@data@data),
416            cor_method
417        )
418        return(res)
419    }
420)
421
422#' @export
423setGeneric(
424    "distance_correlation",
425    function(object) standardGeneric("distance_correlation"),
426    valueClass = "numeric"
427)
428
429#' Method distance_correlation
430#'
431#' Calculate the distance correlation between the distance matrices in
432#' high and low dimensioal space.
433#'
434#' @param object of class dimRedResult
435#' @aliases distance_correlation
436#' @family Quality scores for dimensionality reduction
437#' @export
438setMethod(
439    "distance_correlation",
440    "dimRedResult",
441    function(object){
442        if (!object@has.org.data) stop("object requires original data")
443        chckpkg("energy")
444
445        energy::dcor(object@org.data, object@data@data)
446    }
447)
448
449
450
451#' @export
452setGeneric(
453    "reconstruction_rmse",
454    function(object) standardGeneric("reconstruction_rmse"),
455    valueClass = "numeric"
456)
457
458#' Method reconstruction_rmse
459#'
460#' Calculate the reconstruction root mean squared error a dimensionality reduction, the method must have an inverse mapping.
461#'
462#' @param object of class dimRedResult
463#' @aliases reconstruction_rmse
464#' @family Quality scores for dimensionality reduction
465#' @export
466setMethod(
467    "reconstruction_rmse",
468    "dimRedResult",
469    function(object){
470        if (!object@has.org.data) stop("object requires original data")
471        if (!object@has.inverse) stop("object requires an inverse function")
472
473        recon <- object@inverse(object@data)
474
475        rmse(recon@data, object@org.data)
476    }
477)
478
479#' @rdname quality
480#'
481#' @export
482dimRedQualityList <- function () {
483    return(c("Q_local",
484             "Q_global",
485             "mean_R_NX",
486             "AUC_lnK_R_NX",
487             "total_correlation",
488             "cophenetic_correlation",
489             "distance_correlation",
490             "reconstruction_rmse"))
491}
492
493#' @export
494setGeneric(
495    "R_NX",
496    function(object, ...) standardGeneric("R_NX"),
497    valueClass = "numeric"
498)
499
500#' Method R_NX
501#'
502#' Calculate the R_NX score from Lee et. al. (2013) which shows the neighborhood
503#' preservation for the Kth nearest neighbors, corrected for random point
504#' distributions and scaled to range [0, 1].
505#' @param object of class dimRedResult
506#' @param ndim the number of dimensions to take from the embedded data.
507#' @family Quality scores for dimensionality reduction
508#' @aliases R_NX
509#' @export
510setMethod(
511    "R_NX",
512    "dimRedResult",
513    function(object, ndim = getNDim(object)) {
514        chckpkg("coRanking")
515        if (!object@has.org.data) stop("object requires original data")
516
517        Q <- coRanking::coranking(object@org.data,
518                                  object@data@data[, seq_len(ndim),
519                                                   drop = FALSE])
520        nQ <- nrow(Q)
521        N <- nQ + 1
522
523        Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) /
524            seq_len(nQ) / N
525
526        Rnx <- ((N - 1) * Qnx - seq_len(nQ)) /
527            (N - 1 - seq_len(nQ))
528        Rnx[-nQ]
529    }
530)
531
532#' @export
533setGeneric(
534    "Q_NX",
535    function(object, ...) standardGeneric("Q_NX"),
536    valueClass = "numeric"
537)
538
539#' Method Q_NX
540#'
541#' Calculate the Q_NX score (Chen & Buja 2006, the notation in the
542#' publication is M_k). Which is the fraction of points that remain inside
543#' the same K-ary neighborhood in high and low dimensional space.
544#'
545#' @param object of class dimRedResult
546#' @family Quality scores for dimensionality reduction
547#' @aliases Q_NX
548#' @export
549setMethod(
550    "Q_NX",
551    "dimRedResult",
552    function(object) {
553        chckpkg("coRanking")
554
555        Q <- coRanking::coranking(object@org.data, object@data@data)
556        nQ <- nrow(Q)
557        N <- nQ + 1
558
559        Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N
560        Qnx
561    }
562)
563
564#'@export
565setGeneric(
566    "LCMC",
567    function(object, ...) standardGeneric("LCMC"),
568    valueClass = "numeric"
569)
570
571#' Method LCMC
572#'
573#' Calculates the Local Continuity Meta Criterion, which is
574#' \code{\link{Q_NX}} adjusted for random overlap inside the K-ary
575#' neighborhood.
576#'
577#' @param object of class dimRedResult
578#' @family Quality scores for dimensionality reduction
579#' @aliases LCMC
580#' @export
581setMethod(
582    "LCMC",
583    "dimRedResult",
584        function(object) {
585        chckpkg("coRanking")
586
587        Q <- coRanking::coranking(object@org.data, object@data@data)
588        nQ <- nrow(Q)
589        N <- nQ + 1
590
591        lcmc <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) /
592            seq_len(nQ) / N -
593            seq_len(nQ) / nQ
594        lcmc
595    }
596)
597
598rnx2qnx <- function(rnx, K = seq_along(rnx), N = length(rnx) + 1) {
599    (rnx * (N - 1 - K) + K) / (N - 1)
600}
601qnx2rnx <- function(qnx, K = seq_along(qnx), N = length(qnx) + 1) {
602    ((N - 1) * qnx - K) / (N - 1 - K)
603}
604
605#' @export
606setGeneric(
607  "reconstruction_error",
608  function(object, ...) standardGeneric("reconstruction_error"),
609  valueClass = "numeric"
610)
611
612#' Method reconstruction_error
613#'
614#' Calculate the error using only the first \code{n} dimensions of the embedded
615#' data. \code{error_fun} can either be one of \code{c("rmse", "mae")} to
616#' calculate the root mean square error or the mean absolute error respectively,
617#' or a function that takes to equally sized vectors as input and returns a
618#' single number as output.
619#'
620#' @param object of class dimRedResult
621#' @param n a positive integer or vector of integers \code{<= ndims(object)}
622#' @param error_fun a function or string indicating an error function, if
623#'   indication a function it must take to matrices of the same size and return
624#'   a scalar.
625#' @return a vector of number with the same length as \code{n} with the
626#'
627#' @examples
628#' \dontrun{
629#' ir <- loadDataSet("Iris")
630#' ir.drr <- embed(ir, "DRR", ndim = ndims(ir))
631#' ir.pca <- embed(ir, "PCA", ndim = ndims(ir))
632#'
633#' rmse <- data.frame(
634#'   rmse_drr = reconstruction_error(ir.drr),
635#'   rmse_pca = reconstruction_error(ir.pca)
636#' )
637#'
638#' matplot(rmse, type = "l")
639#' plot(ir)
640#' plot(ir.drr)
641#' plot(ir.pca)
642#' }
643#' @author Guido Kraemer
644#' @family Quality scores for dimensionality reduction
645#' @aliases reconstruction_error
646#' @export
647setMethod(
648  "reconstruction_error",
649  c("dimRedResult"),
650  function (object, n = seq_len(ndims(object)), error_fun = "rmse") {
651    if (any(n > ndims(object))) stop("n > ndims(object)")
652    if (any(n < 1))             stop("n < 1")
653
654    ef <- if (inherits(error_fun, "character")) {
655      switch(
656        error_fun,
657        rmse = rmse,
658        mae  = mae
659      )
660    } else if (inherits(error_fun, "function")) {
661      error_fun
662    } else {
663      stop("error_fun must be a string or function, see documentation for details")
664    }
665
666    res <- numeric(length(n))
667    org <- getData(getOrgData(object))
668    for (i in seq_along(n)) {
669      rec <- getData(inverse(
670        object, getData(getDimRedData(object))[, seq_len(n[i]), drop = FALSE]
671      ))
672      res[i] <- ef(org, rec)
673    }
674    res
675  }
676)
677
678rmse <- function (x1, x2) sqrt(mean((x1 - x2) ^ 2))
679mae  <- function (x1, x2) mean(abs(x1 - x2))
680