1context("read_xml") 2 3test_that("read_xml errors with an empty document", { 4 expect_error(read_xml(character()), "Document is empty") 5 6 tf <- tempfile() 7 file.create(tf) 8 on.exit(unlink(tf)) 9 10 expect_error(read_xml(tf), "Document is empty") 11}) 12 13test_that("read_html correctly parses malformed document", { 14 lego <- read_html("lego.html.bz2") 15 expect_equal(length(xml_find_all(lego, ".//p")), 39) 16}) 17 18test_that("parse_options errors when given an invalid option", { 19 expect_error(parse_options("INVALID", xml_parse_options()), 20 "`options` 'INVALID' is not a valid option") 21 22 expect_error(read_html("lego.html.bz2", options = "INVALID"), 23 "`options` 'INVALID' is not a valid option") 24 25 # Empty inputs returned as 0 26 expect_identical(0L, parse_options("", xml_parse_options())) 27 expect_identical(0L, parse_options(NULL, xml_parse_options())) 28 29 # Numerics returned as integers 30 expect_identical(12L, parse_options(12L, xml_parse_options())) 31 expect_identical(12L, parse_options(12, xml_parse_options())) 32 33 # Multiple inputs summed 34 expect_identical(3L, parse_options(c("RECOVER", "NOENT"), xml_parse_options())) 35}) 36 37test_that("read_html properly passes parser arguments", { 38 39 skip_if_not(libxml2_version() >= "2.9.2") 40 41 blanks <- read_html(xml2_example("cd_catalog.xml"), options = c("RECOVER", "NOERROR")) 42 expect_equal(as_list(blanks)$html$body$catalog$cd[[1]], 43 "\r\n ") 44 45 no_blanks <- read_html(xml2_example("cd_catalog.xml"), options = c("RECOVER", "NOERROR", "NOBLANKS")) 46 47 expect_equal(as_list(no_blanks)$html$body$catalog$cd[[1]], 48 list("Empire Burlesque")) 49}) 50 51test_that("read_xml works with httr response objects", { 52 skip_on_cran() 53 skip_if_offline() 54 skip_if_not_installed("httr") 55 56 x <- read_xml(httr::GET("http://httpbin.org/xml")) 57 expect_is(x, "xml_document") 58 59 expect_equal(length(xml_find_all(x, "//slide")), 2) 60}) 61 62test_that("read_html works with httr response objects", { 63 skip_on_cran() 64 skip_if_offline() 65 66 x <- read_html(httr::GET("http://httpbin.org/xml")) 67 expect_is(x, "xml_document") 68 69 expect_equal(length(xml_find_all(x, "//slide")), 2) 70}) 71 72test_that("read_xml works with raw inputs", { 73 x <- read_xml("<foo/>") 74 expect_equal(xml_url(x), NA_character_) 75}) 76 77test_that("read_xml and read_html fail for bad status codes", { 78 79 skip_on_cran() 80 skip_if_not_installed("httr") 81 skip_if_offline() 82 83 expect_error( 84 read_xml(httr::GET("http://httpbin.org/status/404")), 85 class = "http_404" 86 ) 87 88 expect_error( 89 read_html(httr::GET("http://httpbin.org/status/404")), 90 class = "http_404" 91 ) 92}) 93 94test_that("read_html works with non-ASCII encodings", { 95 tmp <- tempfile() 96 on.exit(unlink(tmp)) 97 98 writeLines("<html><body>\U2019</body></html>", tmp, useBytes = TRUE) 99 res <- read_html(tmp, encoding = "UTF-8") 100 101 expect_equal(as.character(res, options = ""), 102 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n<html><body>\U2019</body></html>\n") 103}) 104 105test_that("read_xml and read_html fail with > 1 input", { 106 expect_error(read_xml(c("foo", "bar")), "`x` must be a string of length 1") 107 expect_error(read_html(c("foo", "bar")), "`x` must be a string of length 1") 108}) 109