1
2#   IGraph R package
3#   Copyright (C) 2011-2012  Gabor Csardi <csardi.gabor@gmail.com>
4#   334 Harvard street, Cambridge, MA 02139 USA
5#
6#   This program is free software; you can redistribute it and/or modify
7#   it under the terms of the GNU General Public License as published by
8#   the Free Software Foundation; either version 2 of the License, or
9#   (at your option) any later version.
10#
11#   This program is distributed in the hope that it will be useful,
12#   but WITHOUT ANY WARRANTY; without even the implied warranty of
13#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14#   GNU General Public License for more details.
15#
16#   You should have received a copy of the GNU General Public License
17#   along with this program; if not, write to the Free Software
18#   Foundation, Inc.,  51 Franklin Street, Fifth Floor, Boston, MA
19#   02110-1301 USA
20#
21###################################################################
22
23makeNexusDatasetInfo <- function(entries) {
24  dsi <- lapply(entries, "[", 2)
25  nam <- sapply(entries, "[", 1)
26
27  attr <- nam=="attribute"
28  myattr <- unlist(dsi[attr])
29  dsi <- dsi[!attr]
30  nam <- nam[!attr]
31  names(dsi) <- nam
32  class(dsi) <- "nexusDatasetInfo"
33
34  if (length(myattr) != 0) {
35    myattr <- strsplit(myattr, "\n", fixed=TRUE)
36    attrdat <- lapply(myattr, function(x) strsplit(x[1], " ")[[1]])
37    myattr <- sapply(myattr, "[", 2)
38    dsi$attributes <- mapply(attrdat, myattr, SIMPLIFY=FALSE,
39                             FUN=function(dat, desc) {
40                               list(type=dat[1], datatype=dat[2], name=dat[3],
41                                    description=desc)
42                             })
43  }
44
45  dsi$id <- as.numeric(dsi$id)
46  dsi$tags <- strsplit(dsi$tags, ";", fixed=TRUE)[[1]]
47
48  dsi
49}
50
51#' @method print nexusDatasetInfo
52#' @rdname nexus
53
54print.nexusDatasetInfo <- function(x, ...) {
55  ve <- strsplit(parseVE(x$`vertices/edges`), "/")[[1]]
56  nc <- c("U", "-", "-", "-")
57  if ("directed" %in% x$tags && "undirected" %in% x$tags) {
58    nc[1] <- "B"
59  } else if ("directed" %in% x$tags) {
60    nc[1] <- "D"
61  }
62  if (is.null(x$attributes)) {
63    nc[2] <- "?"
64  } else if (any(sapply(x$attributes,
65                        function(X) X$name=="name" && X$type=="vertex"))) {
66    nc[2] <- "N"
67  }
68  if ("weighted" %in% x$tags) {
69    nc[3] <- "W"
70  }
71  if ("bipartite" %in% x$tags) {
72    nc[4] <- "B"
73  }
74  nc <- paste(nc, collapse="")
75  head <- paste(sep="", "NEXUS ", nc, " ", ve[1], " ", ve[2], " #",
76                x$id, " ", x$sid, " -- ", x$name)
77  if (nchar(head) > getOption("width")) {
78    head <- paste(sep="", substr(head, 1, getOption("width")-1), "+")
79  }
80  cat(head, sep="", "\n")
81  if (length(x$tags) != 0) {
82    tt <- strwrap(paste(sep="", "+ tags: ", paste(x$tags, collapse="; ")),
83                  initial="", prefix="  ")
84    cat(tt, sep="\n")
85  }
86  if ("networks" %in% names(x)) {
87    nets <- strsplit(x$networks, " ")[[1]]
88    nn <- strwrap(paste(sep="", "+ nets: ", paste(nets, collapse="; ")),
89                  initial="", prefix="  ")
90    cat(nn, sep="\n")
91  }
92  attr <- x[["attributes"]]
93  printed <- c("id", "sid", "vertices/edges", "name", "tags", "networks",
94               "attributes")
95  x <- x[ setdiff(names(x), printed) ]
96  if (length(attr)>0) {
97    dcode <- function(d) {
98      if (d=="numeric") return("n")
99      if (d=="string") return("c")
100      "x"
101    }
102    cat("+ attr: ")
103    astr <- sapply(attr, function(a) {
104      paste(sep="", a$name, " (", substr(a$type, 1, 1), "/",
105            dcode(a$datatype), ")")
106    })
107    cat(strwrap(paste(astr, collapse=", "), exdent=2), "\n")
108  }
109  for (i in names(x)) {
110    xx <- strsplit(x[[i]], "\n")[[1]]
111    ff <- strwrap(paste(sep="", "+ ", i, ": ", xx[1]), initial="",
112                  prefix="  ")
113    xx <- unlist(sapply(xx[-1], strwrap, prefix="  "))
114    cat(ff, sep="\n")
115    if (length(xx)>0) {
116      cat(xx, sep="\n")
117    }
118  }
119  invisible(x)
120}
121
122#' @method summary nexusDatasetInfoList
123#' @rdname nexus
124
125summary.nexusDatasetInfoList <- function(object, ...) {
126  o <- as.numeric(attr(object, "offset"))
127  s <- as.numeric(attr(object, "size"))
128  t <- as.numeric(attr(object, "totalsize"))
129  n <- attr(object, "name")
130  cat(sep="", "NEXUS ", o+1, "-", o+s, "/", t, " -- ", n, "\n")
131  invisible(object)
132}
133
134parseVE <- function(ve) {
135  if (length(ve)==0) { return(character(0)) }
136  ve <- strsplit(unname(ve), " ")
137  ve <- lapply(ve, strsplit, "/")
138  v <- lapply(ve, function(x) sapply(x, "[", 1))
139  e <- lapply(ve, function(x) sapply(x, "[", 2))
140  int <- function(x) {
141    if (length(unique(x))==1) {
142      as.character(x[1])
143    } else {
144      paste(sep="", min(x), "-", max(x))
145    }
146  }
147  v <- sapply(v, int)
148  e <- sapply(e, int)
149  paste(v, sep="/", e)
150}
151
152#' @method print nexusDatasetInfoList
153#' @rdname nexus
154
155print.nexusDatasetInfoList <- function(x, ...) {
156  summary(x)
157
158  if (length(x)==0) { return(invisible(x)) }
159
160  ve <- parseVE(unname(sapply(x, "[[", "vertices/edges")))
161  nets <- sapply(x, function(y) length(strsplit(y$networks, " ")[[1]]))
162  sid <- sapply(x, "[[", "sid")
163  if (any(nets>1)) {
164    sid[nets > 1] <- paste(sep="", sid[nets>1], ".", nets[nets>1])
165  }
166  df <- data.frame(no=paste(sep="", "[", format(seq_along(x)), "] "),
167                   sid=format(sid),
168                   size=paste(sep="", " ", format(ve)),
169                   id=paste(sep="", " #", format(sapply(x, "[[", "id")), " "),
170                   name=sapply(x, "[[", "name"))
171  out <- do.call(paste, c(as.list(df), sep=""))
172  long <- nchar(out) > getOption("width")
173  out <- paste(sep="", substr(out, 1, getOption("width")-1),
174               ifelse(long, "+", ""))
175  cat(out, sep="\n")
176  invisible(x)
177}
178
179nexus.format.result <- function(l, name="") {
180
181  if (length(l)==0) {
182    res <- list()
183    class(res) <- "nexusDatasetInfoList"
184    return(res)
185  }
186
187  l <- lapply(l, function(x) c(sub("[ ]*:[^:]*$", "", x),
188                               sub("^[^:]*:[ ]*", "", x)))
189  spos <- which(sapply(l, function(x) x[1]=="id"))
190  epos <- c((spos-1), length(l))
191  ehead <- epos[1]
192  epos <- epos[-1]
193
194  res <- mapply(spos, epos, SIMPLIFY=FALSE, FUN=function(s, e)
195                makeNexusDatasetInfo(l[s:e]))
196  class(res) <- "nexusDatasetInfoList"
197
198  for (h in 1:ehead) {
199    attr(res, l[[h]][1]) <- l[[h]][2]
200    attr(res, "name") <- name
201  }
202
203  res
204}
205
206#' Query and download from the Nexus network repository
207#'
208#' The Nexus network repository is an online collection of network data sets.
209#' These functions can be used to query it and download data from it, directly
210#' as an igraph graph.
211#'
212#' Nexus is an online repository of networks, with an API that allow
213#' programmatic queries against it, and programmatic data download as well.
214#'
215#' The \code{nexus_list} and \code{nexus_info} functions query the online
216#' database. They both return \code{nexusDatasetInfo} objects.
217#' \code{nexus_info} returns more information than \code{nexus_list}.
218#'
219#' \code{nexus_search} searches Nexus, and returns a list of data sets, as
220#' \code{nexusDatasetInfo} objects. See below for some search examples.
221#'
222#' \code{nexus_get} downloads a data set from Nexus, based on its numeric id,
223#' or based on a Nexus search string. For search strings, only the first search
224#' hit is downloaded, but see also the \code{offset} argument. (If there are
225#' not data sets found, then the function returns an error.)
226#'
227#' The \code{nexusDatasetInfo} objects returned by \code{nexus_list} have the
228#' following fields: \describe{
229#'   \item{id}{The numeric id of the dataset.}
230#'   \item{sid}{The character id of the dataset.}
231#'   \item{name}{Character scalar, the name of the dataset.}
232#'   \item{vertices/edges}{Character, the number of vertices and edges in
233#'     the graph(s). Vertices and edges are separated by a  slash, and if
234#'     the data set consists of multiple networks, then they are separated
235#'     by spaces.}
236#'   \item{tags}{Character vector, the tags of the dataset. Directed graph
237#'     have the tags \sQuote{directed}. Undirected graphs are tagged
238#'     as \sQuote{undirected}. Other common tags are: \sQuote{weighted},
239#'     \sQuote{bipartite}, \sQuote{social network}, etc.}
240#'   \item{networks}{The ids and names of the networks in the data set. The
241#'     numeric and character id are separated by a slash, and multiple networks
242#'     are separated by spaces.}
243#' }
244#'
245#' \code{nexusDatasetInfo} objects returned by \code{nexus_info} have the
246#' following additional fields: \describe{
247#'   \item{date}{Character scalar, e.g. \sQuote{2011-01-09}, the date when
248#'     the dataset was added to the database.}
249#'   \item{formats}{Character vector, the data formats in which the data set is
250#'     available. The various formats are separated by semicolons.}
251#'   \item{licence}{Character scalar, the licence of the dataset.}
252#'   \item{licence url}{Character scalar, the URL of the licence of the
253#'     dataset. Please make sure you consult this before using a dataset.}
254#'   \item{summary}{Character scalar, the short description of the dataset,
255#'     this is usually a single sentence.}
256#'   \item{description}{Character scalar, the full description of the
257#'     dataset.}
258#'   \item{citation}{Character scalar, the paper(s) describing the
259#'     dataset. Please cite these papers if you are using the dataset in your
260#'     research, the licence of most datasets requires this.}
261#'   \item{attributes}{A list of lists, each list entry is a graph, vertex
262#'     or edge attribute and has the following entries: \describe{
263#'       \item{type}{Type of the attribute, either \sQuote{graph},
264#'         \sQuote{vertex} or \sQuote{edge}.}
265#'       \item{datatype}{Data type of the attribute, currently it can be
266#'         \sQuote{numeric} and \sQuote{string}.}
267#'       \item{name}{Character scalar, the name of the attribute.}
268#'       \item{description}{Character scalar, the description of the
269#'         attribute.}
270#'     }
271#'   }
272#' }
273#'
274#' The results of the Nexus queries are printed to the screen in a consise
275#' format, similar to the format of igraph graphs. A data set list (typically
276#' the result of \code{nexus_list} and \code{nexus_search}) looks like this:
277#' \preformatted{NEXUS 1-5/18 -- data set list
278#' [1] kaptail.4         39/109-223   #18 Kapferer tailor shop
279#' [2] condmatcollab2003 31163/120029 #17 Condensed matter collaborations+
280#' [3] condmatcollab     16726/47594  #16 Condensed matter collaborations+
281#' [4] powergrid         4941/6594    #15 Western US power grid
282#' [5] celegansneural    297/2359     #14 C. Elegans neural network }
283#' Each line here represents a data set, and the following information is
284#' given about them: the character id of the data set (e.g. \code{kaptail}
285#' or \code{powergrid}), the number of vertices and number of edges in the
286#' graph of the data sets.  For data sets with multiple graphs, intervals
287#' are given here. Then the numeric id of the data set and the remaining
288#' space is filled with the name of the data set.
289#'
290#' Summary information about an individual Nexus data set is printed as
291#' \preformatted{NEXUS B--- 39 109-223 #18 kaptail -- Kapferer tailor shop
292#' + tags: directed; social network; undirected
293#' + nets: 1/KAPFTI2; 2/KAPFTS2; 3/KAPFTI1; 4/KAPFTS1}
294#' This is very similar to the header that is used for printing igraph
295#' graphs, but there are some differences as well. The four characters
296#' after the \code{NEXUS} word give the most important properties of the
297#' graph(s): the first is \sQuote{\code{U}} for undirected and
298#' \sQuote{\code{D}} for directed graphs, and \sQuote{\code{B}} if the data
299#' set contains both directed and undirected graphs. The second is
300#' \sQuote{\code{N}} named graphs. The third character is \sQuote{\code{W}}
301#' for weighted graphs, the fourth is \sQuote{\code{B}} if the data set
302#' contains bipartite graphs. Then the number of vertices and number of
303#' edges are printed, for data sets with multiple graphs, the smallest and
304#' the largest values are given. Then comes the numeric id, and the string
305#' id of the data set. The end of the first line contains the name of the
306#' data set. The second row lists the data set tags, and the third row the
307#' networks that are included in the data set.
308#'
309#' Detailed data set information is printed similarly, but it contains more
310#' fields.
311#'
312#' @rdname nexus
313#' @aliases nexus nexus.list nexus.info nexus.get nexus.search nexus_list
314#' nexus_info nexus_get nexus_search nexusDatasetInfo print.nexusDatasetInfo
315#' print.nexusDatasetInfoList summary.nexusDatasetInfoList
316#' @param tags A character vector, the tags that are searched. If not given (or
317#' \code{NULL}), then all datasets are listed.
318#' @param offset An offset to select part of the results. Results are listed
319#' from \code{offset}+1.
320#' @param limit The maximum number of results to return.
321#' @param operator A character scalar. If \sQuote{or} (the default), then all
322#' datasets that have at least one of the given tags, are returned. If it if
323#' \sQuote{and}, then only datasets that have all the given tags, are returned.
324#' @param order The ordering of the results, possible values are:
325#' \sQuote{date}, \sQuote{name}, \sQuote{popularity}.
326#' @param id The numeric or character id of the data set to query or download.
327#' Instead of the data set ids, it is possible to supply a
328#' \code{nexusDatasetInfo} or \code{nexusDatasetInfoList} object here directly
329#' and then the query is done on the corresponding data set(s).
330#' @param q Nexus search string. See examples below.
331#' @param nexus.url The URL of the Nexus server. Don't change this from the
332#' default, unless you set up your own Nexus server.
333#' @param x,object The \code{nexusDatasetInfo} object to print.
334#' @param \dots Currently ignored.
335#' @return \code{nexus_list} and \code{nexus_search} return a list of
336#' \code{nexusDatasetInfo} objects. The list also has these attributes:
337#' \describe{ \item{size}{The number of data sets returned by the query.}
338#' \item{totalsize}{The total number of data sets found for the query.}
339#' \item{offset}{The offset parameter of the query.} \item{limit}{The limit
340#' parameter of the query.} }
341#'
342#' \code{nexus_info} returns a single \code{nexusDatasetInfo} object.
343#'
344#' \code{nexus_get} returns an igraph graph object, or a list of graph objects,
345#' if the data set consists of multiple networks.
346#' @section Examples:
347#' \preformatted{
348#' nexus_list(tag="weighted")
349#' nexus_list(limit=3, order="name")
350#' nexus_list(limit=3, order="name")[[1]]
351#' nexus_info(2)
352#' g <- nexus_get(2)
353#' summary(g)
354#'
355#' ## Data sets related to 'US':
356#' nexus_search("US")
357#'
358#' ## Search for data sets that have 'network' in their name:
359#' nexus_search("name:network")
360#'
361#' ## Any word can match
362#' nexus_search("blog or US or karate")
363#' }
364#' @export
365#' @importFrom utils URLencode
366
367nexus_list <- function(tags=NULL, offset=0, limit=10,
368                       operator=c("or", "and"),
369                       order=c("date", "name", "popularity"),
370                       nexus.url=igraph_opt("nexus.url")) {
371
372  operator=igraph.match.arg(operator)
373  order=igraph.match.arg(order)
374
375  if (is.null(tags)) {
376    u <- paste(sep="", nexus.url, "/api/dataset_info?format=text",
377               "&offset=", offset, "&limit=", limit, "&order=", order)
378    name <- "data set list"
379  } else {
380    tags <- paste(tags, collapse="|")
381    u <- paste(sep="", nexus.url, "/api/dataset_info?tag=", tags,
382               "&operator=", operator, "&format=text",
383               "&offset=", offset, "&limit=", limit, "&order=", order)
384    name <- paste("tags:", gsub("|", "; ", tags, fixed=TRUE))
385  }
386  f <- url(URLencode(u))
387  l <- readLines(f)
388  close(f)
389
390  nexus.format.result(l, name)
391}
392
393#' @export
394#' @rdname nexus
395#' @importFrom utils URLencode
396
397nexus_info <- function(id, nexus.url=igraph_opt("nexus.url")) {
398
399  if (inherits(id, "nexusDatasetInfo")) {
400    id <- id$id
401  } else if (inherits(id, "nexusDatasetInfoList")) {
402    rid <- sapply(id, "[[", "id")
403    res <- lapply(rid, nexus_info, nexus.url=nexus.url)
404    class(res) <- class(id)
405    attributes(res) <- attributes(id)
406    return(res)
407  }
408
409  u <- paste(sep="", nexus.url, "/api/dataset_info?format=text&id=", id)
410  f <- url(URLencode(u))
411  l <- readLines(f)
412  close(f)
413  l2 <- character()
414  for (i in seq_along(l)) {
415    if (!grepl("^  ", l[i])) {
416      l2 <- c(l2, l[i])
417    } else {
418      l2[length(l2)] <- paste(sep="\n", l2[length(l2)],
419                              sub("  ", "", l[i], fixed=TRUE))
420    }
421  }
422  l2 <- lapply(l2, function(x)
423               c(sub("[ ]*:.*$", "", x), sub("^[^:]*:[ ]*", "", x)))
424  res <- makeNexusDatasetInfo(l2)
425  if (! "attributes" %in% names(res)) { res$attributes <- list() }
426  return(res)
427}
428
429#' @export
430#' @rdname nexus
431#' @importFrom utils URLencode
432
433nexus_get <- function(id, offset=0,
434                      order=c("date", "name", "popularity"),
435                      nexus.url=igraph_opt("nexus.url")) {
436
437  order=igraph.match.arg(order)
438
439  if (inherits(id, "nexusDatasetInfo")) {
440    id <- id$id
441  } else if (inherits(id, "nexusDatasetInfoList")) {
442    id <- sapply(id, "[[", "id")
443    return(lapply(id, nexus_get, nexus.url=nexus.url))
444  }
445
446  u <- paste(sep="", nexus.url, "/api/dataset?id=", id, "&format=R-igraph")
447  env <- new.env()
448  rdata <- url(URLencode(u))
449  load(rdata, envir=env)
450  close(rdata)
451  res <- get(ls(env)[1], env)
452
453  upgrade_if_igraph <- function(x) if (is_igraph(x)) upgrade_graph(x) else x
454
455  if (is_igraph(res)) {
456    upgrade_if_igraph(res)
457  } else if (is.list(res)) {
458    res2 <- lapply(res, upgrade_if_igraph)
459    attributes(res2) <- attributes(res)
460    res2
461  }
462}
463
464#' @export
465#' @rdname nexus
466#' @importFrom utils URLencode
467
468nexus_search <- function(q, offset=0, limit=10,
469                         order=c("date", "name", "popularity"),
470                         nexus.url=igraph_opt("nexus.url")) {
471
472  order=igraph.match.arg(order)
473
474  u <- paste(sep="", nexus.url, "/api/search?q=", q,
475             "&format=text","&offset=", offset, "&limit=", limit,
476             "&order=", order)
477  f <- url(URLencode(u))
478  l <- readLines(f)
479  close(f)
480
481  if (length(l)==0) {
482    res <- list()
483    class(res) <- "nexusDatasetInfoList"
484    return(res)
485  }
486
487  nexus.format.result(l, name=paste("q:", q))
488}
489
490#' @param i Index.
491#' @method [ nexusDatasetInfoList
492#' @rdname nexus
493
494`[.nexusDatasetInfoList` <- function(x, i) {
495  res <- unclass(x)[i]
496  class(res) <- class(x)
497  attributes(res) <- attributes(x)
498  res
499}
500
501'
502DATA SET LIST:
503--------------
504
505NEXUS 1-10/18 -- data set list
506[ 1] kaptail.4         #18 39/109-223   Kapferer tailor shop
507[ 2] condmatcollab2003 #17 31163/120029 Condensed matter collaborations, 2003
508[ 3] condmatcollab     #16 16726/47594  Condensed matter collaborations, 1999
509[ 4] powergrid         #15 4941/6594    Western US power grid
510[ 5] celegansneural    #14 297/2359     C. Elegans neural network
511[ 6] polblogs          #13 1490/19090   US political blog network
512[ 7] dolphins          #12 62/159       Dolphin social network
513[ 8] football          #11 115/616      Network of American college ...
514[ 9] adjnoun           #10 112/425      Word adjacencies from David ...
515[10] huckleberry       # 9 74/301       Coappearance network from ...
516
517
518TAG SEARCH:
519-----------
520
521NEXUS 1-4/4 -- tags: directed
522[1] kaptail.4 #18 39/109-223 Kapferer tailor shop
523[2] polblogs  #13 1490/19090 US political blog network
524[3] macaque   # 4 45/463     Macaque visuotactile brain areas
525[4] UKfaculty # 2 81/817     UK faculty social network
526
527
528FULL TEXT SEARCH:
529-----------------
530
531NEXUS 1-2/2 -- q: US
532[1] powergrid #15 4941/6594  Western US power grid
533[2] polblogs  #13 1490/19090 US political blog network
534
535
536DATA SET SUMMARY:
537-----------------
538
539NEXUS B--- 39 109-223 -- #18 Kapferer tailor shop
540+ tags: directed; social network; undirected
541+ networks: 1/KAPFTI2; 2/KAPFTS2; 3/KAPFTI1; 4/KAPFTS1
542
543NEXUS U--- 4941 6594 -- #15 Western US power grid
544+ tags: technology
545
546DATA SET INFO:
547--------------
548
549NEXUS B--- 39 109-223 -- #18 Kapferer tailor shop
550+ tags: directed; social network; undirected
551+ attr: name (v/c) [Actor names]
552+ networks: 1/KAPFTI2; 2/KAPFTS2; 3/KAPFTI1; 4/KAPFTS1
553+ nets: #1 KAPFTI2; #2 KAPFTS2; #3 KAPFTI1; #4 KAPFTS1
554+ date: 2011-01-23
555+ licence: Creative Commons by-sa 3.0
556+ licence url: http://creativecommons.org/licenses/by-sa/3.0/
557+ summary: Interactions in a tailor shop in Zambia (then
558  Northern Rhodesia) over a period of ten months.
559+ details: Bruce Kapferer (1972) observed interactions in a tailor
560  shop in Zambia (then Northern Rhodesia) over a period of ten months.
561  His focus was the changing patterns of alliance among workers during
562  extended negotiations for higher wages. . The matrices represent two
563  different types of interaction, recorded at two different times
564  (seven months apart) over a period of one month. TI1 and TI2 record
565  the "instrumental" (work- and assistance-related) interactions at the
566  two times; TS1 and TS2 the "sociational" (friendship, socioemotional)
567  interactions. . The data are particularly interesting since an
568  abortive strike occurred after the first set of observations, and a
569  successful strike took place after the second.
570+ formats: Pajek; R-igraph
571+ citation: Kapferer B. (1972). Strategy and transaction in an African
572  factory. Manchester: Manchester University Press.
573'
574