1
2## -----------------------------------------------------------------------
3##
4##   IGraph R package
5##   Copyright (C) 2015  Gabor Csardi <csardi.gabor@gmail.com>
6##   334 Harvard street, Cambridge, MA 02139 USA
7##
8##   This program is free software; you can redistribute it and/or modify
9##   it under the terms of the GNU General Public License as published by
10##   the Free Software Foundation; either version 2 of the License, or
11##   (at your option) any later version.
12##
13##   This program is distributed in the hope that it will be useful,
14##   but WITHOUT ANY WARRANTY; without even the implied warranty of
15##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16##   GNU General Public License for more details.
17##
18##   You should have received a copy of the GNU General Public License
19##   along with this program; if not, write to the Free Software
20##   Foundation, Inc.,  51 Franklin Street, Fifth Floor, Boston, MA
21##   02110-1301 USA
22##
23## -----------------------------------------------------------------------
24
25#' Spectral Embedding of Adjacency Matrices
26#'
27#' Spectral decomposition of the adjacency matrices of graphs.
28#'
29#' This function computes a \code{no}-dimensional Euclidean representation of
30#' the graph based on its adjacency matrix, \eqn{A}. This representation is
31#' computed via the singular value decomposition of the adjacency matrix,
32#' \eqn{A=UDV^T}.In the case, where the graph is a random dot product graph
33#' generated using latent position vectors in \eqn{R^{no}} for each vertex, the
34#' embedding will provide an estimate of these latent vectors.
35#'
36#' For undirected graphs the latent positions are calculated as
37#' \eqn{X=U^{no}D^{1/2}}{U[no] sqrt(D[no])}, where \eqn{U^{no}}{U[no]} equals
38#' to the first \code{no} columns of \eqn{U}, and \eqn{D^{1/2}}{sqrt(D[no])} is
39#' a diagonal matrix containing the top \code{no} singular values on the
40#' diagonal.
41#'
42#' For directed graphs the embedding is defined as the pair
43#' \eqn{X=U^{no}D^{1/2}}{U[no] sqrt(D[no])} and \eqn{Y=V^{no}D^{1/2}}{V[no]
44#' sqrt(D[no])}. (For undirected graphs \eqn{U=V}, so it is enough to keep one
45#' of them.)
46#'
47#' @param graph The input graph, directed or undirected.
48#' @param no An integer scalar. This value is the embedding dimension of the
49#' spectral embedding. Should be smaller than the number of vertices. The
50#' largest \code{no}-dimensional non-zero singular values are used for the
51#' spectral embedding.
52#' @param weights Optional positive weight vector for calculating a weighted
53#' embedding. If the graph has a \code{weight} edge attribute, then this is
54#' used by default. In a weighted embedding, the edge weights are used instead
55#' of the binary adjacencny matrix.
56#' @param which Which eigenvalues (or singular values, for directed graphs) to
57#' use. \sQuote{lm} means the ones with the largest magnitude, \sQuote{la} is
58#' the ones (algebraic) largest, and \sQuote{sa} is the (algebraic) smallest
59#' eigenvalues. The default is \sQuote{lm}. Note that for directed graphs
60#' \sQuote{la} and \sQuote{lm} are the equivalent, because the singular values
61#' are used for the ordering.
62#' @param scaled Logical scalar, if \code{FALSE}, then \eqn{U} and \eqn{V} are
63#' returned instead of \eqn{X} and \eqn{Y}.
64#' @param cvec A numeric vector, its length is the number vertices in the
65#' graph. This vector is added to the diagonal of the adjacency matrix.
66#' @param options A named list containing the parameters for the SVD
67#' computation algorithm in ARPACK. By default, the list of values is assigned
68#' the values given by \code{\link{igraph.arpack.default}}.
69#' @return A list containing with entries: \item{X}{Estimated latent positions,
70#' an \code{n} times \code{no} matrix, \code{n} is the number of vertices.}
71#' \item{Y}{\code{NULL} for undirected graphs, the second half of the latent
72#' positions for directed graphs, an \code{n} times \code{no} matrix, \code{n}
73#' is the number of vertices.} \item{D}{The eigenvalues (for undirected graphs)
74#' or the singular values (for directed graphs) calculated by the algorithm.}
75#' \item{options}{A named list, information about the underlying ARPACK
76#' computation. See \code{\link{arpack}} for the details.}
77#' @seealso \code{\link{sample_dot_product}}
78#' @references Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  A
79#' Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,
80#' \emph{Journal of the American Statistical Association}, Vol. 107(499), 2012
81#' @keywords graphs
82#' @examples
83#'
84#' ## A small graph
85#' lpvs <- matrix(rnorm(200), 20, 10)
86#' lpvs <- apply(lpvs, 2, function(x) { return (abs(x)/sqrt(sum(x^2))) })
87#' RDP <- sample_dot_product(lpvs)
88#' embed <- embed_adjacency_matrix(RDP, 5)
89#' @export
90#' @include auto.R
91
92embed_adjacency_matrix <- embed_adjacency_matrix
93
94
95#' Dimensionality selection for singular values using profile likelihood.
96#'
97#' Select the number of significant singular values, by finding the
98#' \sQuote{elbow} of the scree plot, in a principled way.
99#'
100#' The input of the function is a numeric vector which contains the measure of
101#' \sQuote{importance} for each dimension.
102#'
103#' For spectral embedding, these are the singular values of the adjacency
104#' matrix. The singular values are assumed to be generated from a Gaussian
105#' mixture distribution with two components that have different means and same
106#' variance. The dimensionality \eqn{d} is chosen to maximize the likelihood
107#' when the \eqn{d} largest singular values are assigned to one component of
108#' the mixture and the rest of the singular values assigned to the other
109#' component.
110#'
111#' This function can also be used for the general separation problem, where we
112#' assume that the left and the right of the vector are coming from two Normal
113#' distributions, with different means, and we want to know their border. See
114#' examples below.
115#'
116#' @param sv A numeric vector, the ordered singular values.
117#' @return A numeric scalar, the estimate of \eqn{d}.
118#' @author Gabor Csardi \email{csardi.gabor@@gmail.com}
119#' @seealso \code{\link{embed_adjacency_matrix}}
120#' @references M. Zhu, and A. Ghodsi (2006). Automatic dimensionality selection
121#' from the scree plot via the use of profile likelihood. \emph{Computational
122#' Statistics and Data Analysis}, Vol. 51, 918--930.
123#' @keywords graphs
124#' @examples
125#'
126#' # Generate the two groups of singular values with
127#' # Gaussian mixture of two components that have different means
128#' sing.vals  <- c( rnorm (10, mean=1, sd=1), rnorm(10, mean=3, sd=1) )
129#' dim.chosen <- dim_select(sing.vals)
130#' dim.chosen
131#'
132#' # Sample random vectors with multivariate normal distribution
133#' # and normalize to unit length
134#' lpvs <- matrix(rnorm(200), 10, 20)
135#' lpvs <- apply(lpvs, 2, function(x) { (abs(x) / sqrt(sum(x^2))) })
136#' RDP.graph  <- sample_dot_product(lpvs)
137#' dim_select( embed_adjacency_matrix(RDP.graph, 10)$D )
138#'
139#' # Sample random vectors with the Dirichlet distribution
140#' lpvs.dir    <- sample_dirichlet(n=20, rep(1, 10))
141#' RDP.graph.2 <- sample_dot_product(lpvs.dir)
142#' dim_select( embed_adjacency_matrix(RDP.graph.2, 10)$D )
143#'
144#' # Sample random vectors from hypersphere with radius 1.
145#' lpvs.sph    <- sample_sphere_surface(dim=10, n=20, radius=1)
146#' RDP.graph.3 <- sample_dot_product(lpvs.sph)
147#' dim_select( embed_adjacency_matrix(RDP.graph.3, 10)$D )
148#'
149#' @export
150
151dim_select <- dim_select
152
153
154#' Spectral Embedding of the Laplacian of a Graph
155#'
156#' Spectral decomposition of Laplacian matrices of graphs.
157#'
158#' This function computes a \code{no}-dimensional Euclidean representation of
159#' the graph based on its Laplacian matrix, \eqn{L}. This representation is
160#' computed via the singular value decomposition of the Laplacian matrix.
161#'
162#' They are essentially doing the same as \code{\link{embed_adjacency_matrix}},
163#' but work on the Laplacian matrix, instead of the adjacency matrix.
164#'
165#' @param graph The input graph, directed or undirected.
166#' @param no An integer scalar. This value is the embedding dimension of the
167#' spectral embedding. Should be smaller than the number of vertices. The
168#' largest \code{no}-dimensional non-zero singular values are used for the
169#' spectral embedding.
170#' @param weights Optional positive weight vector for calculating a weighted
171#' embedding. If the graph has a \code{weight} edge attribute, then this is
172#' used by default. For weighted embedding, edge weights are used instead
173#' of the binary adjacency matrix, and vertex strength (see
174#' \code{\link{strength}}) is used instead of the degrees.
175#' @param which Which eigenvalues (or singular values, for directed graphs) to
176#' use. \sQuote{lm} means the ones with the largest magnitude, \sQuote{la} is
177#' the ones (algebraic) largest, and \sQuote{sa} is the (algebraic) smallest
178#' eigenvalues. The default is \sQuote{lm}. Note that for directed graphs
179#' \sQuote{la} and \sQuote{lm} are the equivalent, because the singular values
180#' are used for the ordering.
181#' @param degmode TODO
182#' @param type The type of the Laplacian to use. Various definitions exist for
183#' the Laplacian of a graph, and one can choose between them with this
184#' argument.
185#'
186#' Possible values: \code{D-A} means \eqn{D-A} where \eqn{D} is the degree
187#' matrix and \eqn{A} is the adjacency matrix; \code{DAD} means
188#' \eqn{D^{1/2}}{D^1/2} times \eqn{A} times \eqn{D^{1/2}{D^1/2}},
189#' \eqn{D^{1/2}}{D^1/2} is the inverse of the square root of the degree matrix;
190#' \code{I-DAD} means \eqn{I-D^{1/2}}{I-D^1/2}, where \eqn{I} is the identity
191#' matrix.  \code{OAP} is \eqn{O^{1/2}AP^{1/2}}{O^1/2 A P^1/2}, where
192#' \eqn{O^{1/2}}{O^1/2} is the inverse of the square root of the out-degree
193#' matrix and \eqn{P^{1/2}}{P^1/2} is the same for the in-degree matrix.
194#'
195#' \code{OAP} is not defined for undirected graphs, and is the only defined type
196#' for directed graphs.
197#'
198#' The default (i.e. type \code{default}) is to use \code{D-A} for undirected
199#' graphs and \code{OAP} for directed graphs.
200#' @param scaled Logical scalar, if \code{FALSE}, then \eqn{U} and \eqn{V} are
201#' returned instead of \eqn{X} and \eqn{Y}.
202#' @param options A named list containing the parameters for the SVD
203#' computation algorithm in ARPACK. By default, the list of values is assigned
204#' the values given by \code{\link{igraph.arpack.default}}.
205#' @return A list containing with entries: \item{X}{Estimated latent positions,
206#' an \code{n} times \code{no} matrix, \code{n} is the number of vertices.}
207#' \item{Y}{\code{NULL} for undirected graphs, the second half of the latent
208#' positions for directed graphs, an \code{n} times \code{no} matrix, \code{n}
209#' is the number of vertices.} \item{D}{The eigenvalues (for undirected graphs)
210#' or the singular values (for directed graphs) calculated by the algorithm.}
211#' \item{options}{A named list, information about the underlying ARPACK
212#' computation. See \code{\link{arpack}} for the details.}
213#' @author Gabor Csardi \email{csardi.gabor@@gmail.com}
214#' @seealso \code{\link{embed_adjacency_matrix}},
215#' \code{\link{sample_dot_product}}
216#' @references Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  A
217#' Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,
218#' \emph{Journal of the American Statistical Association}, Vol. 107(499), 2012
219#' @keywords graphs
220#' @examples
221#'
222#' ## A small graph
223#' lpvs <- matrix(rnorm(200), 20, 10)
224#' lpvs <- apply(lpvs, 2, function(x) { return (abs(x)/sqrt(sum(x^2))) })
225#' RDP <- sample_dot_product(lpvs)
226#' embed <- embed_laplacian_matrix(RDP, 5)
227
228embed_laplacian_matrix <- embed_laplacian_matrix
229
230
231#' Sample vectors uniformly from the surface of a sphere
232#'
233#' Sample finite-dimensional vectors to use as latent position vectors in
234#' random dot product graphs
235#'
236#' \code{sample_sphere_surface} generates uniform samples from \eqn{S^{dim-1}}
237#' (the \code{(dim-1)}-sphere) with radius \code{radius}, i.e. the Euclidean
238#' norm of the samples equal \code{radius}.
239#'
240#' @param dim Integer scalar, the dimension of the random vectors.
241#' @param n Integer scalar, the sample size.
242#' @param radius Numeric scalar, the radius of the sphere to sample.
243#' @param positive Logical scalar, whether to sample from the positive orthant
244#'   of the sphere.
245#' @return A \code{dim} (length of the \code{alpha} vector for
246#' \code{sample_dirichlet}) times \code{n} matrix, whose columns are the sample
247#' vectors.
248#'
249#' @family latent position vector samplers
250#'
251#' @export
252#' @examples
253#' lpvs.sph    <- sample_sphere_surface(dim=10, n=20, radius=1)
254#' RDP.graph.3 <- sample_dot_product(lpvs.sph)
255#' vec.norm    <- apply(lpvs.sph, 2, function(x) { sum(x^2) })
256#' vec.norm
257
258sample_sphere_surface <- function(dim, n=1, radius=1, positive=TRUE) {
259  # Argument checks
260  dim <- as.integer(dim)
261  n <- as.integer(n)
262  radius <- as.numeric(radius)
263  positive <- as.logical(positive)
264
265  on.exit( .Call(C_R_igraph_finalizer) )
266  # Function call
267  res <- .Call(C_R_igraph_sample_sphere_surface, dim, n, radius, positive)
268
269  res
270}
271
272#' Sample vectors uniformly from the volume of a sphere
273#'
274#' Sample finite-dimensional vectors to use as latent position vectors in
275#' random dot product graphs
276#'
277#' \code{sample_sphere_volume} generates uniform samples from \eqn{S^{dim-1}}
278#' (the \code{(dim-1)}-sphere) i.e. the Euclidean norm of the samples is
279#' smaller or equal to \code{radius}.
280#'
281#' @param dim Integer scalar, the dimension of the random vectors.
282#' @param n Integer scalar, the sample size.
283#' @param radius Numeric scalar, the radius of the sphere to sample.
284#' @param positive Logical scalar, whether to sample from the positive orthant
285#'   of the sphere.
286#' @return A \code{dim} (length of the \code{alpha} vector for
287#' \code{sample_dirichlet}) times \code{n} matrix, whose columns are the sample
288#' vectors.
289#'
290#' @family latent position vector samplers
291#'
292#' @export
293#' @examples
294#' lpvs.sph.vol <- sample_sphere_volume(dim=10, n=20, radius=1)
295#' RDP.graph.4  <- sample_dot_product(lpvs.sph.vol)
296#' vec.norm     <- apply(lpvs.sph.vol, 2, function(x) { sum(x^2) })
297#' vec.norm
298
299sample_sphere_volume <- function(dim, n=1, radius=1, positive=TRUE) {
300  # Argument checks
301  dim <- as.integer(dim)
302  n <- as.integer(n)
303  radius <- as.numeric(radius)
304  positive <- as.logical(positive)
305
306  on.exit( .Call(C_R_igraph_finalizer) )
307  # Function call
308  res <- .Call(C_R_igraph_sample_sphere_volume, dim, n, radius, positive)
309
310  res
311}
312
313#' Sample from a Dirichlet distribution
314#'
315#' Sample finite-dimensional vectors to use as latent position vectors in
316#' random dot product graphs
317#'
318#' \code{sample_dirichlet} generates samples from the Dirichlet distribution
319#' with given \eqn{\alpha}{alpha} parameter. The sample is drawn from
320#' \code{length(alpha)-1}-simplex.
321#'
322#' @param n Integer scalar, the sample size.
323#' @param alpha Numeric vector, the vector of \eqn{\alpha}{alpha} parameter for
324#' the Dirichlet distribution.
325#' @return A \code{dim} (length of the \code{alpha} vector for
326#' \code{sample_dirichlet}) times \code{n} matrix, whose columns are the sample
327#' vectors.
328#'
329#' @family latent position vector samplers
330#'
331#' @export
332#' @examples
333#' lpvs.dir    <- sample_dirichlet(n=20, alpha=rep(1, 10))
334#' RDP.graph.2 <- sample_dot_product(lpvs.dir)
335#' colSums(lpvs.dir)
336
337sample_dirichlet <- function(n, alpha) {
338  # Argument checks
339  n <- as.integer(n)
340  alpha <- as.numeric(alpha)
341
342  on.exit( .Call(C_R_igraph_finalizer) )
343  # Function call
344  res <- .Call(C_R_igraph_sample_dirichlet, n, alpha)
345
346  res
347}
348