1 2## ----------------------------------------------------------------------- 3## 4## IGraph R package 5## Copyright (C) 2015 Gabor Csardi <csardi.gabor@gmail.com> 6## 334 Harvard street, Cambridge, MA 02139 USA 7## 8## This program is free software; you can redistribute it and/or modify 9## it under the terms of the GNU General Public License as published by 10## the Free Software Foundation; either version 2 of the License, or 11## (at your option) any later version. 12## 13## This program is distributed in the hope that it will be useful, 14## but WITHOUT ANY WARRANTY; without even the implied warranty of 15## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16## GNU General Public License for more details. 17## 18## You should have received a copy of the GNU General Public License 19## along with this program; if not, write to the Free Software 20## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 21## 02110-1301 USA 22## 23## ----------------------------------------------------------------------- 24 25#' Spectral Embedding of Adjacency Matrices 26#' 27#' Spectral decomposition of the adjacency matrices of graphs. 28#' 29#' This function computes a \code{no}-dimensional Euclidean representation of 30#' the graph based on its adjacency matrix, \eqn{A}. This representation is 31#' computed via the singular value decomposition of the adjacency matrix, 32#' \eqn{A=UDV^T}.In the case, where the graph is a random dot product graph 33#' generated using latent position vectors in \eqn{R^{no}} for each vertex, the 34#' embedding will provide an estimate of these latent vectors. 35#' 36#' For undirected graphs the latent positions are calculated as 37#' \eqn{X=U^{no}D^{1/2}}{U[no] sqrt(D[no])}, where \eqn{U^{no}}{U[no]} equals 38#' to the first \code{no} columns of \eqn{U}, and \eqn{D^{1/2}}{sqrt(D[no])} is 39#' a diagonal matrix containing the top \code{no} singular values on the 40#' diagonal. 41#' 42#' For directed graphs the embedding is defined as the pair 43#' \eqn{X=U^{no}D^{1/2}}{U[no] sqrt(D[no])} and \eqn{Y=V^{no}D^{1/2}}{V[no] 44#' sqrt(D[no])}. (For undirected graphs \eqn{U=V}, so it is enough to keep one 45#' of them.) 46#' 47#' @param graph The input graph, directed or undirected. 48#' @param no An integer scalar. This value is the embedding dimension of the 49#' spectral embedding. Should be smaller than the number of vertices. The 50#' largest \code{no}-dimensional non-zero singular values are used for the 51#' spectral embedding. 52#' @param weights Optional positive weight vector for calculating a weighted 53#' embedding. If the graph has a \code{weight} edge attribute, then this is 54#' used by default. In a weighted embedding, the edge weights are used instead 55#' of the binary adjacencny matrix. 56#' @param which Which eigenvalues (or singular values, for directed graphs) to 57#' use. \sQuote{lm} means the ones with the largest magnitude, \sQuote{la} is 58#' the ones (algebraic) largest, and \sQuote{sa} is the (algebraic) smallest 59#' eigenvalues. The default is \sQuote{lm}. Note that for directed graphs 60#' \sQuote{la} and \sQuote{lm} are the equivalent, because the singular values 61#' are used for the ordering. 62#' @param scaled Logical scalar, if \code{FALSE}, then \eqn{U} and \eqn{V} are 63#' returned instead of \eqn{X} and \eqn{Y}. 64#' @param cvec A numeric vector, its length is the number vertices in the 65#' graph. This vector is added to the diagonal of the adjacency matrix. 66#' @param options A named list containing the parameters for the SVD 67#' computation algorithm in ARPACK. By default, the list of values is assigned 68#' the values given by \code{\link{igraph.arpack.default}}. 69#' @return A list containing with entries: \item{X}{Estimated latent positions, 70#' an \code{n} times \code{no} matrix, \code{n} is the number of vertices.} 71#' \item{Y}{\code{NULL} for undirected graphs, the second half of the latent 72#' positions for directed graphs, an \code{n} times \code{no} matrix, \code{n} 73#' is the number of vertices.} \item{D}{The eigenvalues (for undirected graphs) 74#' or the singular values (for directed graphs) calculated by the algorithm.} 75#' \item{options}{A named list, information about the underlying ARPACK 76#' computation. See \code{\link{arpack}} for the details.} 77#' @seealso \code{\link{sample_dot_product}} 78#' @references Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. A 79#' Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs, 80#' \emph{Journal of the American Statistical Association}, Vol. 107(499), 2012 81#' @keywords graphs 82#' @examples 83#' 84#' ## A small graph 85#' lpvs <- matrix(rnorm(200), 20, 10) 86#' lpvs <- apply(lpvs, 2, function(x) { return (abs(x)/sqrt(sum(x^2))) }) 87#' RDP <- sample_dot_product(lpvs) 88#' embed <- embed_adjacency_matrix(RDP, 5) 89#' @export 90#' @include auto.R 91 92embed_adjacency_matrix <- embed_adjacency_matrix 93 94 95#' Dimensionality selection for singular values using profile likelihood. 96#' 97#' Select the number of significant singular values, by finding the 98#' \sQuote{elbow} of the scree plot, in a principled way. 99#' 100#' The input of the function is a numeric vector which contains the measure of 101#' \sQuote{importance} for each dimension. 102#' 103#' For spectral embedding, these are the singular values of the adjacency 104#' matrix. The singular values are assumed to be generated from a Gaussian 105#' mixture distribution with two components that have different means and same 106#' variance. The dimensionality \eqn{d} is chosen to maximize the likelihood 107#' when the \eqn{d} largest singular values are assigned to one component of 108#' the mixture and the rest of the singular values assigned to the other 109#' component. 110#' 111#' This function can also be used for the general separation problem, where we 112#' assume that the left and the right of the vector are coming from two Normal 113#' distributions, with different means, and we want to know their border. See 114#' examples below. 115#' 116#' @param sv A numeric vector, the ordered singular values. 117#' @return A numeric scalar, the estimate of \eqn{d}. 118#' @author Gabor Csardi \email{csardi.gabor@@gmail.com} 119#' @seealso \code{\link{embed_adjacency_matrix}} 120#' @references M. Zhu, and A. Ghodsi (2006). Automatic dimensionality selection 121#' from the scree plot via the use of profile likelihood. \emph{Computational 122#' Statistics and Data Analysis}, Vol. 51, 918--930. 123#' @keywords graphs 124#' @examples 125#' 126#' # Generate the two groups of singular values with 127#' # Gaussian mixture of two components that have different means 128#' sing.vals <- c( rnorm (10, mean=1, sd=1), rnorm(10, mean=3, sd=1) ) 129#' dim.chosen <- dim_select(sing.vals) 130#' dim.chosen 131#' 132#' # Sample random vectors with multivariate normal distribution 133#' # and normalize to unit length 134#' lpvs <- matrix(rnorm(200), 10, 20) 135#' lpvs <- apply(lpvs, 2, function(x) { (abs(x) / sqrt(sum(x^2))) }) 136#' RDP.graph <- sample_dot_product(lpvs) 137#' dim_select( embed_adjacency_matrix(RDP.graph, 10)$D ) 138#' 139#' # Sample random vectors with the Dirichlet distribution 140#' lpvs.dir <- sample_dirichlet(n=20, rep(1, 10)) 141#' RDP.graph.2 <- sample_dot_product(lpvs.dir) 142#' dim_select( embed_adjacency_matrix(RDP.graph.2, 10)$D ) 143#' 144#' # Sample random vectors from hypersphere with radius 1. 145#' lpvs.sph <- sample_sphere_surface(dim=10, n=20, radius=1) 146#' RDP.graph.3 <- sample_dot_product(lpvs.sph) 147#' dim_select( embed_adjacency_matrix(RDP.graph.3, 10)$D ) 148#' 149#' @export 150 151dim_select <- dim_select 152 153 154#' Spectral Embedding of the Laplacian of a Graph 155#' 156#' Spectral decomposition of Laplacian matrices of graphs. 157#' 158#' This function computes a \code{no}-dimensional Euclidean representation of 159#' the graph based on its Laplacian matrix, \eqn{L}. This representation is 160#' computed via the singular value decomposition of the Laplacian matrix. 161#' 162#' They are essentially doing the same as \code{\link{embed_adjacency_matrix}}, 163#' but work on the Laplacian matrix, instead of the adjacency matrix. 164#' 165#' @param graph The input graph, directed or undirected. 166#' @param no An integer scalar. This value is the embedding dimension of the 167#' spectral embedding. Should be smaller than the number of vertices. The 168#' largest \code{no}-dimensional non-zero singular values are used for the 169#' spectral embedding. 170#' @param weights Optional positive weight vector for calculating a weighted 171#' embedding. If the graph has a \code{weight} edge attribute, then this is 172#' used by default. For weighted embedding, edge weights are used instead 173#' of the binary adjacency matrix, and vertex strength (see 174#' \code{\link{strength}}) is used instead of the degrees. 175#' @param which Which eigenvalues (or singular values, for directed graphs) to 176#' use. \sQuote{lm} means the ones with the largest magnitude, \sQuote{la} is 177#' the ones (algebraic) largest, and \sQuote{sa} is the (algebraic) smallest 178#' eigenvalues. The default is \sQuote{lm}. Note that for directed graphs 179#' \sQuote{la} and \sQuote{lm} are the equivalent, because the singular values 180#' are used for the ordering. 181#' @param degmode TODO 182#' @param type The type of the Laplacian to use. Various definitions exist for 183#' the Laplacian of a graph, and one can choose between them with this 184#' argument. 185#' 186#' Possible values: \code{D-A} means \eqn{D-A} where \eqn{D} is the degree 187#' matrix and \eqn{A} is the adjacency matrix; \code{DAD} means 188#' \eqn{D^{1/2}}{D^1/2} times \eqn{A} times \eqn{D^{1/2}{D^1/2}}, 189#' \eqn{D^{1/2}}{D^1/2} is the inverse of the square root of the degree matrix; 190#' \code{I-DAD} means \eqn{I-D^{1/2}}{I-D^1/2}, where \eqn{I} is the identity 191#' matrix. \code{OAP} is \eqn{O^{1/2}AP^{1/2}}{O^1/2 A P^1/2}, where 192#' \eqn{O^{1/2}}{O^1/2} is the inverse of the square root of the out-degree 193#' matrix and \eqn{P^{1/2}}{P^1/2} is the same for the in-degree matrix. 194#' 195#' \code{OAP} is not defined for undirected graphs, and is the only defined type 196#' for directed graphs. 197#' 198#' The default (i.e. type \code{default}) is to use \code{D-A} for undirected 199#' graphs and \code{OAP} for directed graphs. 200#' @param scaled Logical scalar, if \code{FALSE}, then \eqn{U} and \eqn{V} are 201#' returned instead of \eqn{X} and \eqn{Y}. 202#' @param options A named list containing the parameters for the SVD 203#' computation algorithm in ARPACK. By default, the list of values is assigned 204#' the values given by \code{\link{igraph.arpack.default}}. 205#' @return A list containing with entries: \item{X}{Estimated latent positions, 206#' an \code{n} times \code{no} matrix, \code{n} is the number of vertices.} 207#' \item{Y}{\code{NULL} for undirected graphs, the second half of the latent 208#' positions for directed graphs, an \code{n} times \code{no} matrix, \code{n} 209#' is the number of vertices.} \item{D}{The eigenvalues (for undirected graphs) 210#' or the singular values (for directed graphs) calculated by the algorithm.} 211#' \item{options}{A named list, information about the underlying ARPACK 212#' computation. See \code{\link{arpack}} for the details.} 213#' @author Gabor Csardi \email{csardi.gabor@@gmail.com} 214#' @seealso \code{\link{embed_adjacency_matrix}}, 215#' \code{\link{sample_dot_product}} 216#' @references Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. A 217#' Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs, 218#' \emph{Journal of the American Statistical Association}, Vol. 107(499), 2012 219#' @keywords graphs 220#' @examples 221#' 222#' ## A small graph 223#' lpvs <- matrix(rnorm(200), 20, 10) 224#' lpvs <- apply(lpvs, 2, function(x) { return (abs(x)/sqrt(sum(x^2))) }) 225#' RDP <- sample_dot_product(lpvs) 226#' embed <- embed_laplacian_matrix(RDP, 5) 227 228embed_laplacian_matrix <- embed_laplacian_matrix 229 230 231#' Sample vectors uniformly from the surface of a sphere 232#' 233#' Sample finite-dimensional vectors to use as latent position vectors in 234#' random dot product graphs 235#' 236#' \code{sample_sphere_surface} generates uniform samples from \eqn{S^{dim-1}} 237#' (the \code{(dim-1)}-sphere) with radius \code{radius}, i.e. the Euclidean 238#' norm of the samples equal \code{radius}. 239#' 240#' @param dim Integer scalar, the dimension of the random vectors. 241#' @param n Integer scalar, the sample size. 242#' @param radius Numeric scalar, the radius of the sphere to sample. 243#' @param positive Logical scalar, whether to sample from the positive orthant 244#' of the sphere. 245#' @return A \code{dim} (length of the \code{alpha} vector for 246#' \code{sample_dirichlet}) times \code{n} matrix, whose columns are the sample 247#' vectors. 248#' 249#' @family latent position vector samplers 250#' 251#' @export 252#' @examples 253#' lpvs.sph <- sample_sphere_surface(dim=10, n=20, radius=1) 254#' RDP.graph.3 <- sample_dot_product(lpvs.sph) 255#' vec.norm <- apply(lpvs.sph, 2, function(x) { sum(x^2) }) 256#' vec.norm 257 258sample_sphere_surface <- function(dim, n=1, radius=1, positive=TRUE) { 259 # Argument checks 260 dim <- as.integer(dim) 261 n <- as.integer(n) 262 radius <- as.numeric(radius) 263 positive <- as.logical(positive) 264 265 on.exit( .Call(C_R_igraph_finalizer) ) 266 # Function call 267 res <- .Call(C_R_igraph_sample_sphere_surface, dim, n, radius, positive) 268 269 res 270} 271 272#' Sample vectors uniformly from the volume of a sphere 273#' 274#' Sample finite-dimensional vectors to use as latent position vectors in 275#' random dot product graphs 276#' 277#' \code{sample_sphere_volume} generates uniform samples from \eqn{S^{dim-1}} 278#' (the \code{(dim-1)}-sphere) i.e. the Euclidean norm of the samples is 279#' smaller or equal to \code{radius}. 280#' 281#' @param dim Integer scalar, the dimension of the random vectors. 282#' @param n Integer scalar, the sample size. 283#' @param radius Numeric scalar, the radius of the sphere to sample. 284#' @param positive Logical scalar, whether to sample from the positive orthant 285#' of the sphere. 286#' @return A \code{dim} (length of the \code{alpha} vector for 287#' \code{sample_dirichlet}) times \code{n} matrix, whose columns are the sample 288#' vectors. 289#' 290#' @family latent position vector samplers 291#' 292#' @export 293#' @examples 294#' lpvs.sph.vol <- sample_sphere_volume(dim=10, n=20, radius=1) 295#' RDP.graph.4 <- sample_dot_product(lpvs.sph.vol) 296#' vec.norm <- apply(lpvs.sph.vol, 2, function(x) { sum(x^2) }) 297#' vec.norm 298 299sample_sphere_volume <- function(dim, n=1, radius=1, positive=TRUE) { 300 # Argument checks 301 dim <- as.integer(dim) 302 n <- as.integer(n) 303 radius <- as.numeric(radius) 304 positive <- as.logical(positive) 305 306 on.exit( .Call(C_R_igraph_finalizer) ) 307 # Function call 308 res <- .Call(C_R_igraph_sample_sphere_volume, dim, n, radius, positive) 309 310 res 311} 312 313#' Sample from a Dirichlet distribution 314#' 315#' Sample finite-dimensional vectors to use as latent position vectors in 316#' random dot product graphs 317#' 318#' \code{sample_dirichlet} generates samples from the Dirichlet distribution 319#' with given \eqn{\alpha}{alpha} parameter. The sample is drawn from 320#' \code{length(alpha)-1}-simplex. 321#' 322#' @param n Integer scalar, the sample size. 323#' @param alpha Numeric vector, the vector of \eqn{\alpha}{alpha} parameter for 324#' the Dirichlet distribution. 325#' @return A \code{dim} (length of the \code{alpha} vector for 326#' \code{sample_dirichlet}) times \code{n} matrix, whose columns are the sample 327#' vectors. 328#' 329#' @family latent position vector samplers 330#' 331#' @export 332#' @examples 333#' lpvs.dir <- sample_dirichlet(n=20, alpha=rep(1, 10)) 334#' RDP.graph.2 <- sample_dot_product(lpvs.dir) 335#' colSums(lpvs.dir) 336 337sample_dirichlet <- function(n, alpha) { 338 # Argument checks 339 n <- as.integer(n) 340 alpha <- as.numeric(alpha) 341 342 on.exit( .Call(C_R_igraph_finalizer) ) 343 # Function call 344 res <- .Call(C_R_igraph_sample_dirichlet, n, alpha) 345 346 res 347} 348