1 2# IGraph R package 3# Copyright (C) 2011-2012 Gabor Csardi <csardi.gabor@gmail.com> 4# 334 Harvard street, Cambridge, MA 02139 USA 5# 6# This program is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program; if not, write to the Free Software 18# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19# 02110-1301 USA 20# 21################################################################### 22 23makeNexusDatasetInfo <- function(entries) { 24 dsi <- lapply(entries, "[", 2) 25 nam <- sapply(entries, "[", 1) 26 27 attr <- nam=="attribute" 28 myattr <- unlist(dsi[attr]) 29 dsi <- dsi[!attr] 30 nam <- nam[!attr] 31 names(dsi) <- nam 32 class(dsi) <- "nexusDatasetInfo" 33 34 if (length(myattr) != 0) { 35 myattr <- strsplit(myattr, "\n", fixed=TRUE) 36 attrdat <- lapply(myattr, function(x) strsplit(x[1], " ")[[1]]) 37 myattr <- sapply(myattr, "[", 2) 38 dsi$attributes <- mapply(attrdat, myattr, SIMPLIFY=FALSE, 39 FUN=function(dat, desc) { 40 list(type=dat[1], datatype=dat[2], name=dat[3], 41 description=desc) 42 }) 43 } 44 45 dsi$id <- as.numeric(dsi$id) 46 dsi$tags <- strsplit(dsi$tags, ";", fixed=TRUE)[[1]] 47 48 dsi 49} 50 51#' @method print nexusDatasetInfo 52#' @rdname nexus 53 54print.nexusDatasetInfo <- function(x, ...) { 55 ve <- strsplit(parseVE(x$`vertices/edges`), "/")[[1]] 56 nc <- c("U", "-", "-", "-") 57 if ("directed" %in% x$tags && "undirected" %in% x$tags) { 58 nc[1] <- "B" 59 } else if ("directed" %in% x$tags) { 60 nc[1] <- "D" 61 } 62 if (is.null(x$attributes)) { 63 nc[2] <- "?" 64 } else if (any(sapply(x$attributes, 65 function(X) X$name=="name" && X$type=="vertex"))) { 66 nc[2] <- "N" 67 } 68 if ("weighted" %in% x$tags) { 69 nc[3] <- "W" 70 } 71 if ("bipartite" %in% x$tags) { 72 nc[4] <- "B" 73 } 74 nc <- paste(nc, collapse="") 75 head <- paste(sep="", "NEXUS ", nc, " ", ve[1], " ", ve[2], " #", 76 x$id, " ", x$sid, " -- ", x$name) 77 if (nchar(head) > getOption("width")) { 78 head <- paste(sep="", substr(head, 1, getOption("width")-1), "+") 79 } 80 cat(head, sep="", "\n") 81 if (length(x$tags) != 0) { 82 tt <- strwrap(paste(sep="", "+ tags: ", paste(x$tags, collapse="; ")), 83 initial="", prefix=" ") 84 cat(tt, sep="\n") 85 } 86 if ("networks" %in% names(x)) { 87 nets <- strsplit(x$networks, " ")[[1]] 88 nn <- strwrap(paste(sep="", "+ nets: ", paste(nets, collapse="; ")), 89 initial="", prefix=" ") 90 cat(nn, sep="\n") 91 } 92 attr <- x[["attributes"]] 93 printed <- c("id", "sid", "vertices/edges", "name", "tags", "networks", 94 "attributes") 95 x <- x[ setdiff(names(x), printed) ] 96 if (length(attr)>0) { 97 dcode <- function(d) { 98 if (d=="numeric") return("n") 99 if (d=="string") return("c") 100 "x" 101 } 102 cat("+ attr: ") 103 astr <- sapply(attr, function(a) { 104 paste(sep="", a$name, " (", substr(a$type, 1, 1), "/", 105 dcode(a$datatype), ")") 106 }) 107 cat(strwrap(paste(astr, collapse=", "), exdent=2), "\n") 108 } 109 for (i in names(x)) { 110 xx <- strsplit(x[[i]], "\n")[[1]] 111 ff <- strwrap(paste(sep="", "+ ", i, ": ", xx[1]), initial="", 112 prefix=" ") 113 xx <- unlist(sapply(xx[-1], strwrap, prefix=" ")) 114 cat(ff, sep="\n") 115 if (length(xx)>0) { 116 cat(xx, sep="\n") 117 } 118 } 119 invisible(x) 120} 121 122#' @method summary nexusDatasetInfoList 123#' @rdname nexus 124 125summary.nexusDatasetInfoList <- function(object, ...) { 126 o <- as.numeric(attr(object, "offset")) 127 s <- as.numeric(attr(object, "size")) 128 t <- as.numeric(attr(object, "totalsize")) 129 n <- attr(object, "name") 130 cat(sep="", "NEXUS ", o+1, "-", o+s, "/", t, " -- ", n, "\n") 131 invisible(object) 132} 133 134parseVE <- function(ve) { 135 if (length(ve)==0) { return(character(0)) } 136 ve <- strsplit(unname(ve), " ") 137 ve <- lapply(ve, strsplit, "/") 138 v <- lapply(ve, function(x) sapply(x, "[", 1)) 139 e <- lapply(ve, function(x) sapply(x, "[", 2)) 140 int <- function(x) { 141 if (length(unique(x))==1) { 142 as.character(x[1]) 143 } else { 144 paste(sep="", min(x), "-", max(x)) 145 } 146 } 147 v <- sapply(v, int) 148 e <- sapply(e, int) 149 paste(v, sep="/", e) 150} 151 152#' @method print nexusDatasetInfoList 153#' @rdname nexus 154 155print.nexusDatasetInfoList <- function(x, ...) { 156 summary(x) 157 158 if (length(x)==0) { return(invisible(x)) } 159 160 ve <- parseVE(unname(sapply(x, "[[", "vertices/edges"))) 161 nets <- sapply(x, function(y) length(strsplit(y$networks, " ")[[1]])) 162 sid <- sapply(x, "[[", "sid") 163 if (any(nets>1)) { 164 sid[nets > 1] <- paste(sep="", sid[nets>1], ".", nets[nets>1]) 165 } 166 df <- data.frame(no=paste(sep="", "[", format(seq_along(x)), "] "), 167 sid=format(sid), 168 size=paste(sep="", " ", format(ve)), 169 id=paste(sep="", " #", format(sapply(x, "[[", "id")), " "), 170 name=sapply(x, "[[", "name")) 171 out <- do.call(paste, c(as.list(df), sep="")) 172 long <- nchar(out) > getOption("width") 173 out <- paste(sep="", substr(out, 1, getOption("width")-1), 174 ifelse(long, "+", "")) 175 cat(out, sep="\n") 176 invisible(x) 177} 178 179nexus.format.result <- function(l, name="") { 180 181 if (length(l)==0) { 182 res <- list() 183 class(res) <- "nexusDatasetInfoList" 184 return(res) 185 } 186 187 l <- lapply(l, function(x) c(sub("[ ]*:[^:]*$", "", x), 188 sub("^[^:]*:[ ]*", "", x))) 189 spos <- which(sapply(l, function(x) x[1]=="id")) 190 epos <- c((spos-1), length(l)) 191 ehead <- epos[1] 192 epos <- epos[-1] 193 194 res <- mapply(spos, epos, SIMPLIFY=FALSE, FUN=function(s, e) 195 makeNexusDatasetInfo(l[s:e])) 196 class(res) <- "nexusDatasetInfoList" 197 198 for (h in 1:ehead) { 199 attr(res, l[[h]][1]) <- l[[h]][2] 200 attr(res, "name") <- name 201 } 202 203 res 204} 205 206#' Query and download from the Nexus network repository 207#' 208#' The Nexus network repository is an online collection of network data sets. 209#' These functions can be used to query it and download data from it, directly 210#' as an igraph graph. 211#' 212#' Nexus is an online repository of networks, with an API that allow 213#' programmatic queries against it, and programmatic data download as well. 214#' 215#' The \code{nexus_list} and \code{nexus_info} functions query the online 216#' database. They both return \code{nexusDatasetInfo} objects. 217#' \code{nexus_info} returns more information than \code{nexus_list}. 218#' 219#' \code{nexus_search} searches Nexus, and returns a list of data sets, as 220#' \code{nexusDatasetInfo} objects. See below for some search examples. 221#' 222#' \code{nexus_get} downloads a data set from Nexus, based on its numeric id, 223#' or based on a Nexus search string. For search strings, only the first search 224#' hit is downloaded, but see also the \code{offset} argument. (If there are 225#' not data sets found, then the function returns an error.) 226#' 227#' The \code{nexusDatasetInfo} objects returned by \code{nexus_list} have the 228#' following fields: \describe{ 229#' \item{id}{The numeric id of the dataset.} 230#' \item{sid}{The character id of the dataset.} 231#' \item{name}{Character scalar, the name of the dataset.} 232#' \item{vertices/edges}{Character, the number of vertices and edges in 233#' the graph(s). Vertices and edges are separated by a slash, and if 234#' the data set consists of multiple networks, then they are separated 235#' by spaces.} 236#' \item{tags}{Character vector, the tags of the dataset. Directed graph 237#' have the tags \sQuote{directed}. Undirected graphs are tagged 238#' as \sQuote{undirected}. Other common tags are: \sQuote{weighted}, 239#' \sQuote{bipartite}, \sQuote{social network}, etc.} 240#' \item{networks}{The ids and names of the networks in the data set. The 241#' numeric and character id are separated by a slash, and multiple networks 242#' are separated by spaces.} 243#' } 244#' 245#' \code{nexusDatasetInfo} objects returned by \code{nexus_info} have the 246#' following additional fields: \describe{ 247#' \item{date}{Character scalar, e.g. \sQuote{2011-01-09}, the date when 248#' the dataset was added to the database.} 249#' \item{formats}{Character vector, the data formats in which the data set is 250#' available. The various formats are separated by semicolons.} 251#' \item{licence}{Character scalar, the licence of the dataset.} 252#' \item{licence url}{Character scalar, the URL of the licence of the 253#' dataset. Please make sure you consult this before using a dataset.} 254#' \item{summary}{Character scalar, the short description of the dataset, 255#' this is usually a single sentence.} 256#' \item{description}{Character scalar, the full description of the 257#' dataset.} 258#' \item{citation}{Character scalar, the paper(s) describing the 259#' dataset. Please cite these papers if you are using the dataset in your 260#' research, the licence of most datasets requires this.} 261#' \item{attributes}{A list of lists, each list entry is a graph, vertex 262#' or edge attribute and has the following entries: \describe{ 263#' \item{type}{Type of the attribute, either \sQuote{graph}, 264#' \sQuote{vertex} or \sQuote{edge}.} 265#' \item{datatype}{Data type of the attribute, currently it can be 266#' \sQuote{numeric} and \sQuote{string}.} 267#' \item{name}{Character scalar, the name of the attribute.} 268#' \item{description}{Character scalar, the description of the 269#' attribute.} 270#' } 271#' } 272#' } 273#' 274#' The results of the Nexus queries are printed to the screen in a consise 275#' format, similar to the format of igraph graphs. A data set list (typically 276#' the result of \code{nexus_list} and \code{nexus_search}) looks like this: 277#' \preformatted{NEXUS 1-5/18 -- data set list 278#' [1] kaptail.4 39/109-223 #18 Kapferer tailor shop 279#' [2] condmatcollab2003 31163/120029 #17 Condensed matter collaborations+ 280#' [3] condmatcollab 16726/47594 #16 Condensed matter collaborations+ 281#' [4] powergrid 4941/6594 #15 Western US power grid 282#' [5] celegansneural 297/2359 #14 C. Elegans neural network } 283#' Each line here represents a data set, and the following information is 284#' given about them: the character id of the data set (e.g. \code{kaptail} 285#' or \code{powergrid}), the number of vertices and number of edges in the 286#' graph of the data sets. For data sets with multiple graphs, intervals 287#' are given here. Then the numeric id of the data set and the remaining 288#' space is filled with the name of the data set. 289#' 290#' Summary information about an individual Nexus data set is printed as 291#' \preformatted{NEXUS B--- 39 109-223 #18 kaptail -- Kapferer tailor shop 292#' + tags: directed; social network; undirected 293#' + nets: 1/KAPFTI2; 2/KAPFTS2; 3/KAPFTI1; 4/KAPFTS1} 294#' This is very similar to the header that is used for printing igraph 295#' graphs, but there are some differences as well. The four characters 296#' after the \code{NEXUS} word give the most important properties of the 297#' graph(s): the first is \sQuote{\code{U}} for undirected and 298#' \sQuote{\code{D}} for directed graphs, and \sQuote{\code{B}} if the data 299#' set contains both directed and undirected graphs. The second is 300#' \sQuote{\code{N}} named graphs. The third character is \sQuote{\code{W}} 301#' for weighted graphs, the fourth is \sQuote{\code{B}} if the data set 302#' contains bipartite graphs. Then the number of vertices and number of 303#' edges are printed, for data sets with multiple graphs, the smallest and 304#' the largest values are given. Then comes the numeric id, and the string 305#' id of the data set. The end of the first line contains the name of the 306#' data set. The second row lists the data set tags, and the third row the 307#' networks that are included in the data set. 308#' 309#' Detailed data set information is printed similarly, but it contains more 310#' fields. 311#' 312#' @rdname nexus 313#' @aliases nexus nexus.list nexus.info nexus.get nexus.search nexus_list 314#' nexus_info nexus_get nexus_search nexusDatasetInfo print.nexusDatasetInfo 315#' print.nexusDatasetInfoList summary.nexusDatasetInfoList 316#' @param tags A character vector, the tags that are searched. If not given (or 317#' \code{NULL}), then all datasets are listed. 318#' @param offset An offset to select part of the results. Results are listed 319#' from \code{offset}+1. 320#' @param limit The maximum number of results to return. 321#' @param operator A character scalar. If \sQuote{or} (the default), then all 322#' datasets that have at least one of the given tags, are returned. If it if 323#' \sQuote{and}, then only datasets that have all the given tags, are returned. 324#' @param order The ordering of the results, possible values are: 325#' \sQuote{date}, \sQuote{name}, \sQuote{popularity}. 326#' @param id The numeric or character id of the data set to query or download. 327#' Instead of the data set ids, it is possible to supply a 328#' \code{nexusDatasetInfo} or \code{nexusDatasetInfoList} object here directly 329#' and then the query is done on the corresponding data set(s). 330#' @param q Nexus search string. See examples below. 331#' @param nexus.url The URL of the Nexus server. Don't change this from the 332#' default, unless you set up your own Nexus server. 333#' @param x,object The \code{nexusDatasetInfo} object to print. 334#' @param \dots Currently ignored. 335#' @return \code{nexus_list} and \code{nexus_search} return a list of 336#' \code{nexusDatasetInfo} objects. The list also has these attributes: 337#' \describe{ \item{size}{The number of data sets returned by the query.} 338#' \item{totalsize}{The total number of data sets found for the query.} 339#' \item{offset}{The offset parameter of the query.} \item{limit}{The limit 340#' parameter of the query.} } 341#' 342#' \code{nexus_info} returns a single \code{nexusDatasetInfo} object. 343#' 344#' \code{nexus_get} returns an igraph graph object, or a list of graph objects, 345#' if the data set consists of multiple networks. 346#' @section Examples: 347#' \preformatted{ 348#' nexus_list(tag="weighted") 349#' nexus_list(limit=3, order="name") 350#' nexus_list(limit=3, order="name")[[1]] 351#' nexus_info(2) 352#' g <- nexus_get(2) 353#' summary(g) 354#' 355#' ## Data sets related to 'US': 356#' nexus_search("US") 357#' 358#' ## Search for data sets that have 'network' in their name: 359#' nexus_search("name:network") 360#' 361#' ## Any word can match 362#' nexus_search("blog or US or karate") 363#' } 364#' @export 365#' @importFrom utils URLencode 366 367nexus_list <- function(tags=NULL, offset=0, limit=10, 368 operator=c("or", "and"), 369 order=c("date", "name", "popularity"), 370 nexus.url=igraph_opt("nexus.url")) { 371 372 operator=igraph.match.arg(operator) 373 order=igraph.match.arg(order) 374 375 if (is.null(tags)) { 376 u <- paste(sep="", nexus.url, "/api/dataset_info?format=text", 377 "&offset=", offset, "&limit=", limit, "&order=", order) 378 name <- "data set list" 379 } else { 380 tags <- paste(tags, collapse="|") 381 u <- paste(sep="", nexus.url, "/api/dataset_info?tag=", tags, 382 "&operator=", operator, "&format=text", 383 "&offset=", offset, "&limit=", limit, "&order=", order) 384 name <- paste("tags:", gsub("|", "; ", tags, fixed=TRUE)) 385 } 386 f <- url(URLencode(u)) 387 l <- readLines(f) 388 close(f) 389 390 nexus.format.result(l, name) 391} 392 393#' @export 394#' @rdname nexus 395#' @importFrom utils URLencode 396 397nexus_info <- function(id, nexus.url=igraph_opt("nexus.url")) { 398 399 if (inherits(id, "nexusDatasetInfo")) { 400 id <- id$id 401 } else if (inherits(id, "nexusDatasetInfoList")) { 402 rid <- sapply(id, "[[", "id") 403 res <- lapply(rid, nexus_info, nexus.url=nexus.url) 404 class(res) <- class(id) 405 attributes(res) <- attributes(id) 406 return(res) 407 } 408 409 u <- paste(sep="", nexus.url, "/api/dataset_info?format=text&id=", id) 410 f <- url(URLencode(u)) 411 l <- readLines(f) 412 close(f) 413 l2 <- character() 414 for (i in seq_along(l)) { 415 if (!grepl("^ ", l[i])) { 416 l2 <- c(l2, l[i]) 417 } else { 418 l2[length(l2)] <- paste(sep="\n", l2[length(l2)], 419 sub(" ", "", l[i], fixed=TRUE)) 420 } 421 } 422 l2 <- lapply(l2, function(x) 423 c(sub("[ ]*:.*$", "", x), sub("^[^:]*:[ ]*", "", x))) 424 res <- makeNexusDatasetInfo(l2) 425 if (! "attributes" %in% names(res)) { res$attributes <- list() } 426 return(res) 427} 428 429#' @export 430#' @rdname nexus 431#' @importFrom utils URLencode 432 433nexus_get <- function(id, offset=0, 434 order=c("date", "name", "popularity"), 435 nexus.url=igraph_opt("nexus.url")) { 436 437 order=igraph.match.arg(order) 438 439 if (inherits(id, "nexusDatasetInfo")) { 440 id <- id$id 441 } else if (inherits(id, "nexusDatasetInfoList")) { 442 id <- sapply(id, "[[", "id") 443 return(lapply(id, nexus_get, nexus.url=nexus.url)) 444 } 445 446 u <- paste(sep="", nexus.url, "/api/dataset?id=", id, "&format=R-igraph") 447 env <- new.env() 448 rdata <- url(URLencode(u)) 449 load(rdata, envir=env) 450 close(rdata) 451 res <- get(ls(env)[1], env) 452 453 upgrade_if_igraph <- function(x) if (is_igraph(x)) upgrade_graph(x) else x 454 455 if (is_igraph(res)) { 456 upgrade_if_igraph(res) 457 } else if (is.list(res)) { 458 res2 <- lapply(res, upgrade_if_igraph) 459 attributes(res2) <- attributes(res) 460 res2 461 } 462} 463 464#' @export 465#' @rdname nexus 466#' @importFrom utils URLencode 467 468nexus_search <- function(q, offset=0, limit=10, 469 order=c("date", "name", "popularity"), 470 nexus.url=igraph_opt("nexus.url")) { 471 472 order=igraph.match.arg(order) 473 474 u <- paste(sep="", nexus.url, "/api/search?q=", q, 475 "&format=text","&offset=", offset, "&limit=", limit, 476 "&order=", order) 477 f <- url(URLencode(u)) 478 l <- readLines(f) 479 close(f) 480 481 if (length(l)==0) { 482 res <- list() 483 class(res) <- "nexusDatasetInfoList" 484 return(res) 485 } 486 487 nexus.format.result(l, name=paste("q:", q)) 488} 489 490#' @param i Index. 491#' @method [ nexusDatasetInfoList 492#' @rdname nexus 493 494`[.nexusDatasetInfoList` <- function(x, i) { 495 res <- unclass(x)[i] 496 class(res) <- class(x) 497 attributes(res) <- attributes(x) 498 res 499} 500 501' 502DATA SET LIST: 503-------------- 504 505NEXUS 1-10/18 -- data set list 506[ 1] kaptail.4 #18 39/109-223 Kapferer tailor shop 507[ 2] condmatcollab2003 #17 31163/120029 Condensed matter collaborations, 2003 508[ 3] condmatcollab #16 16726/47594 Condensed matter collaborations, 1999 509[ 4] powergrid #15 4941/6594 Western US power grid 510[ 5] celegansneural #14 297/2359 C. Elegans neural network 511[ 6] polblogs #13 1490/19090 US political blog network 512[ 7] dolphins #12 62/159 Dolphin social network 513[ 8] football #11 115/616 Network of American college ... 514[ 9] adjnoun #10 112/425 Word adjacencies from David ... 515[10] huckleberry # 9 74/301 Coappearance network from ... 516 517 518TAG SEARCH: 519----------- 520 521NEXUS 1-4/4 -- tags: directed 522[1] kaptail.4 #18 39/109-223 Kapferer tailor shop 523[2] polblogs #13 1490/19090 US political blog network 524[3] macaque # 4 45/463 Macaque visuotactile brain areas 525[4] UKfaculty # 2 81/817 UK faculty social network 526 527 528FULL TEXT SEARCH: 529----------------- 530 531NEXUS 1-2/2 -- q: US 532[1] powergrid #15 4941/6594 Western US power grid 533[2] polblogs #13 1490/19090 US political blog network 534 535 536DATA SET SUMMARY: 537----------------- 538 539NEXUS B--- 39 109-223 -- #18 Kapferer tailor shop 540+ tags: directed; social network; undirected 541+ networks: 1/KAPFTI2; 2/KAPFTS2; 3/KAPFTI1; 4/KAPFTS1 542 543NEXUS U--- 4941 6594 -- #15 Western US power grid 544+ tags: technology 545 546DATA SET INFO: 547-------------- 548 549NEXUS B--- 39 109-223 -- #18 Kapferer tailor shop 550+ tags: directed; social network; undirected 551+ attr: name (v/c) [Actor names] 552+ networks: 1/KAPFTI2; 2/KAPFTS2; 3/KAPFTI1; 4/KAPFTS1 553+ nets: #1 KAPFTI2; #2 KAPFTS2; #3 KAPFTI1; #4 KAPFTS1 554+ date: 2011-01-23 555+ licence: Creative Commons by-sa 3.0 556+ licence url: http://creativecommons.org/licenses/by-sa/3.0/ 557+ summary: Interactions in a tailor shop in Zambia (then 558 Northern Rhodesia) over a period of ten months. 559+ details: Bruce Kapferer (1972) observed interactions in a tailor 560 shop in Zambia (then Northern Rhodesia) over a period of ten months. 561 His focus was the changing patterns of alliance among workers during 562 extended negotiations for higher wages. . The matrices represent two 563 different types of interaction, recorded at two different times 564 (seven months apart) over a period of one month. TI1 and TI2 record 565 the "instrumental" (work- and assistance-related) interactions at the 566 two times; TS1 and TS2 the "sociational" (friendship, socioemotional) 567 interactions. . The data are particularly interesting since an 568 abortive strike occurred after the first set of observations, and a 569 successful strike took place after the second. 570+ formats: Pajek; R-igraph 571+ citation: Kapferer B. (1972). Strategy and transaction in an African 572 factory. Manchester: Manchester University Press. 573' 574