1# read_spss --------------------------------------------------------------- 2 3test_that("variable label stored as attributes", { 4 df <- read_spss(test_path("spss/variable-label.sav")) 5 expect_equal(attr(df$sex, "label"), "Gender") 6}) 7 8test_that("value labels stored as labelled class", { 9 num <- zap_formats(read_spss(test_path("spss/labelled-num.sav"))) 10 str <- zap_formats(read_spss(test_path("spss/labelled-str.sav"))) 11 12 expect_equal(num[[1]], labelled(1, c("This is one" = 1))) 13 expect_equal(str[[1]], labelled(c("M", "F"), c(Female = "F", Male = "M"))) 14}) 15 16test_that("value labels read in as same type as vector", { 17 df <- read_spss(test_path("spss/variable-label.sav")) 18 num <- read_spss(test_path("spss/labelled-num.sav")) 19 str <- read_spss(test_path("spss/labelled-str.sav")) 20 21 expect_equal(typeof(df$sex), typeof(attr(df$sex, "labels"))) 22 expect_equal(typeof(num[[1]]), typeof(attr(num[[1]], "labels"))) 23 expect_equal(typeof(str[[1]]), typeof(attr(str[[1]], "labels"))) 24}) 25 26test_that("non-ASCII labels converted to utf-8", { 27 x <- read_spss(test_path("spss/umlauts.sav"))[[1]] 28 29 expect_equal(attr(x, "label"), "This is an \u00e4-umlaut") 30 expect_equal(names(attr(x, "labels"))[1], "the \u00e4 umlaut") 31}) 32 33test_that("datetime variables converted to the correct class", { 34 df <- read_spss(test_path("spss/datetime.sav")) 35 expect_true(inherits(df$date, "Date")) 36 expect_true(inherits(df$date.posix, "POSIXct")) 37 expect_true(inherits(df$time, "hms")) 38}) 39 40test_that("datetime values correctly imported (offset)", { 41 df <- read_spss(test_path("spss/datetime.sav")) 42 expect_equal(df$date[1], as.Date("2014-09-22d")) 43 expect_equal(df$date.posix[2], as.POSIXct("2014-09-23 15:59:20", tz = "UTC")) 44 expect_equal(as.integer(df$time[1]), 43870) 45}) 46 47test_that("formats roundtrip", { 48 df <- tibble::tibble( 49 a = structure(c(1, 1, 2), format.spss = "F1.0"), 50 b = structure(4:6, format.spss = "F2.1"), 51 c = structure(7:9, format.spss = "N2"), 52 d = structure(c("Text", "Text", ""), format.spss = "A100") 53 ) 54 55 tmp <- tempfile() 56 on.exit(unlink(tmp)) 57 58 write_sav(df, tmp) 59 df2 <- read_sav(tmp) 60 61 expect_equal(df$a, df2$a) 62 expect_equal(df$b, df2$b) 63 expect_equal(df$c, df2$c) 64 expect_equal(df$d, df2$d) 65}) 66 67test_that("widths roundtrip", { 68 df <- tibble::tibble( 69 a = structure(c(1, 1, 2), display_width = 10), 70 b = structure(4:6, display_width = 11), 71 c = structure(7:9, display_width = 12), 72 d = structure(c("Text", "Text", ""), display_width = 10) 73 ) 74 75 tmp <- tempfile() 76 on.exit(unlink(tmp)) 77 78 write_sav(df, tmp) 79 df2 <- read_sav(tmp) 80 81 expect_equal(df$a, zap_formats(df2$a)) 82 expect_equal(df$b, zap_formats(df2$b)) 83 expect_equal(df$c, zap_formats(df2$c)) 84 expect_equal(df$d, zap_formats(df2$d)) 85}) 86 87test_that("only selected columns are read", { 88 out <- read_spss(test_path("spss/datetime.sav"), col_select = "date") 89 expect_named(out, "date") 90}) 91 92# Row skipping/limiting -------------------------------------------------------- 93 94test_that("using skip returns correct number of rows", { 95 rows_after_skipping <- function(n) { 96 nrow(read_spss(test_path("spss/datetime.sav"), skip = n)) 97 } 98 99 n <- rows_after_skipping(0) 100 101 expect_equal(rows_after_skipping(1), n - 1) 102 expect_equal(rows_after_skipping(n - 1), 1) 103 expect_equal(rows_after_skipping(n + 0), 0) 104 expect_equal(rows_after_skipping(n + 1), 0) 105}) 106 107test_that("can limit the number of rows to read", { 108 rows_with_limit <- function(n) { 109 nrow(read_spss(test_path("spss/datetime.sav"), n_max = n)) 110 } 111 112 n <- rows_with_limit(Inf) 113 expect_equal(rows_with_limit(0), 0) 114 expect_equal(rows_with_limit(1), 1) 115 expect_equal(rows_with_limit(n), n) 116 expect_equal(rows_with_limit(n + 1), n) 117 118 # alternatives for unlimited rows 119 expect_equal(rows_with_limit(NA), n) 120 expect_equal(rows_with_limit(-1), n) 121}) 122 123# User-defined missings --------------------------------------------------- 124 125test_that("user-defined missing values read as missing by default", { 126 num <- read_spss(test_path("spss/labelled-num-na.sav"))[[1]] 127 expect_equal(vec_data(num)[[2]], NA_real_) 128}) 129 130test_that("user-defined missing values can be preserved", { 131 num <- read_spss(test_path("spss/labelled-num-na.sav"), user_na = TRUE)[[1]] 132 133 expect_s3_class(num, "haven_labelled_spss") 134 expect_equal(vec_data(num)[[2]], 9) 135 136 expect_equal(attr(num, "na_values"), 9) 137 expect_equal(attr(num, "na_range"), NULL) 138 139 num 140}) 141 142test_that("system missings read as NA", { 143 df <- tibble::tibble(x = c(1, NA)) 144 out <- roundtrip_sav(df) 145 146 expect_identical(df$x, c(1, NA)) 147}) 148 149 150# write_sav --------------------------------------------------------------- 151 152test_that("can roundtrip basic types", { 153 x <- runif(10) 154 expect_equal(roundtrip_var(x, "sav"), x) 155 expect_equal(roundtrip_var(1:10, "sav"), 1:10) 156 expect_equal(roundtrip_var(c(TRUE, FALSE), "sav"), c(1, 0)) 157 expect_equal(roundtrip_var(letters, "sav"), letters) 158}) 159 160test_that("can roundtrip missing values (as much as possible)", { 161 expect_equal(roundtrip_var(NA, "sav"), NA_integer_) 162 expect_equal(roundtrip_var(NA_real_, "sav"), NA_real_) 163 expect_equal(roundtrip_var(NA_integer_, "sav"), NA_integer_) 164 expect_equal(roundtrip_var(NA_character_, "sav"), "") 165}) 166 167test_that("can roundtrip date times", { 168 x1 <- c(as.Date("2010-01-01"), NA) 169 x2 <- as.POSIXct(x1) 170 attr(x2, "tzone") <- "UTC" 171 172 expect_equal(roundtrip_var(x1, "sav"), x1) 173 expect_equal(roundtrip_var(x2, "sav"), x2) 174}) 175 176test_that("can roundtrip times", { 177 x <- hms::hms(c(1, NA, 86400)) 178 expect_equal(roundtrip_var(x, "sav"), x) 179}) 180 181test_that("infinity gets converted to NA", { 182 expect_equal(roundtrip_var(c(Inf, 0, -Inf), "sav"), c(NA, 0, NA)) 183}) 184 185test_that("factors become labelleds", { 186 f <- factor(c("a", "b"), levels = letters[1:3]) 187 rt <- roundtrip_var(f, "sav") 188 189 expect_s3_class(rt, "haven_labelled") 190 expect_equal(as.vector(rt), 1:2) 191 expect_equal(attr(rt, "labels"), c(a = 1, b = 2, c = 3)) 192}) 193 194test_that("labels are preserved", { 195 x <- 1:10 196 attr(x, "label") <- "abc" 197 198 expect_equal(attr(roundtrip_var(x, "sav"), "label"), "abc") 199}) 200 201test_that("labelleds are round tripped", { 202 int <- labelled(c(1L, 2L), c(a = 1L, b = 3L)) 203 num <- labelled(c(1, 2), c(a = 1, b = 3)) 204 chr <- labelled(c("a", "b"), c(a = "b", b = "a")) 205 206 expect_equal(roundtrip_var(num, "sav"), num) 207 expect_equal(roundtrip_var(chr, "sav"), chr) 208}) 209 210test_that("spss labelleds are round tripped", { 211 df <- tibble( 212 x = labelled_spss( 213 c(1, 2, 1, 9), 214 labels = c(no = 1, yes = 2, unknown = 9), 215 na_values = 9, 216 na_range = c(80, 90) 217 ) 218 ) 219 220 path <- tempfile() 221 write_sav(df, path) 222 223 df2 <- read_sav(path) 224 expect_s3_class(df2$x, "haven_labelled") 225 expect_equal(as.double(df2$x), c(1, 2, 1, NA)) 226 227 df3 <- read_sav(path, user_na = TRUE) 228 expect_s3_class(df3$x, "haven_labelled_spss") 229 expect_equal(attr(df3$x, "na_values"), attr(df$x, "na_values")) 230 expect_equal(attr(df3$x, "na_range"), attr(df$x, "na_range")) 231}) 232 233test_that("spss string labelleds are round tripped", { 234 df <- tibble( 235 x = labelled_spss( 236 c("1", "2", "3", "99"), 237 labels = c(one = "1"), 238 na_values = "99", 239 na_range = c("2", "3") 240 ) 241 ) 242 243 path <- tempfile() 244 write_sav(df, path) 245 246 df2 <- read_sav(path) 247 expect_s3_class(df2$x, "haven_labelled") 248 expect_equal(as.character(df2$x), c("1", NA, NA, NA)) 249 250 df3 <- read_sav(path, user_na = TRUE) 251 expect_s3_class(df3$x, "haven_labelled_spss") 252 expect_equal(attr(df3$x, "na_values"), attr(df$x, "na_values")) 253 expect_equal(attr(df3$x, "na_range"), attr(df$x, "na_range")) 254}) 255 256 257test_that("factors become labelleds", { 258 f <- factor(c("a", "b"), levels = letters[1:3]) 259 rt <- roundtrip_var(f, "sav") 260 261 expect_s3_class(rt, "haven_labelled") 262 expect_equal(as.vector(rt), 1:2) 263 expect_equal(attr(rt, "labels"), c(a = 1, b = 2, c = 3)) 264}) 265 266test_that("labels are converted to utf-8", { 267 labels_utf8 <- c("\u00e9\u00e8", "\u00e0", "\u00ef") 268 labels_latin1 <- iconv(labels_utf8, "utf-8", "latin1") 269 270 v_utf8 <- labelled(3:1, setNames(1:3, labels_utf8)) 271 v_latin1 <- labelled(3:1, setNames(1:3, labels_latin1)) 272 273 expect_equal(names(attr(roundtrip_var(v_utf8, "sav"), "labels")), labels_utf8) 274 expect_equal(names(attr(roundtrip_var(v_latin1, "sav"), "labels")), labels_utf8) 275}) 276 277 278test_that("complain about long factor labels", { 279 expect_snapshot(error = TRUE, { 280 x <- paste(rep("a", 200), collapse = "") 281 df <- data.frame(x = factor(x)) 282 write_sav(df, tempfile()) 283 }) 284}) 285 286# max_level_lengths ------------------------------------------------------- 287 288test_that("works with NA levels", { 289 x <- factor(c("a", "abc", NA), exclude = NULL) 290 expect_equal(max_level_length(x), 3) 291}) 292 293test_that("works with empty factors", { 294 x <- factor(character(), levels = character()) 295 expect_equal(max_level_length(x), 0) 296 297 x <- factor(character(), levels = c(NA_character_)) 298 expect_equal(max_level_length(x), 0) 299}) 300 301