1# read_spss ---------------------------------------------------------------
2
3test_that("variable label stored as attributes", {
4  df <- read_spss(test_path("spss/variable-label.sav"))
5  expect_equal(attr(df$sex, "label"), "Gender")
6})
7
8test_that("value labels stored as labelled class", {
9  num <- zap_formats(read_spss(test_path("spss/labelled-num.sav")))
10  str <- zap_formats(read_spss(test_path("spss/labelled-str.sav")))
11
12  expect_equal(num[[1]], labelled(1, c("This is one" = 1)))
13  expect_equal(str[[1]], labelled(c("M", "F"), c(Female = "F", Male = "M")))
14})
15
16test_that("value labels read in as same type as vector", {
17  df <- read_spss(test_path("spss/variable-label.sav"))
18  num <- read_spss(test_path("spss/labelled-num.sav"))
19  str <- read_spss(test_path("spss/labelled-str.sav"))
20
21  expect_equal(typeof(df$sex), typeof(attr(df$sex, "labels")))
22  expect_equal(typeof(num[[1]]), typeof(attr(num[[1]], "labels")))
23  expect_equal(typeof(str[[1]]), typeof(attr(str[[1]], "labels")))
24})
25
26test_that("non-ASCII labels converted to utf-8", {
27  x <- read_spss(test_path("spss/umlauts.sav"))[[1]]
28
29  expect_equal(attr(x, "label"), "This is an \u00e4-umlaut")
30  expect_equal(names(attr(x, "labels"))[1], "the \u00e4 umlaut")
31})
32
33test_that("datetime variables converted to the correct class", {
34  df <- read_spss(test_path("spss/datetime.sav"))
35  expect_true(inherits(df$date, "Date"))
36  expect_true(inherits(df$date.posix, "POSIXct"))
37  expect_true(inherits(df$time, "hms"))
38})
39
40test_that("datetime values correctly imported (offset)", {
41  df <- read_spss(test_path("spss/datetime.sav"))
42  expect_equal(df$date[1], as.Date("2014-09-22d"))
43  expect_equal(df$date.posix[2], as.POSIXct("2014-09-23 15:59:20", tz = "UTC"))
44  expect_equal(as.integer(df$time[1]), 43870)
45})
46
47test_that("formats roundtrip", {
48  df <- tibble::tibble(
49    a = structure(c(1, 1, 2), format.spss = "F1.0"),
50    b = structure(4:6, format.spss = "F2.1"),
51    c = structure(7:9, format.spss = "N2"),
52    d = structure(c("Text", "Text", ""), format.spss = "A100")
53  )
54
55  tmp <- tempfile()
56  on.exit(unlink(tmp))
57
58  write_sav(df, tmp)
59  df2 <- read_sav(tmp)
60
61  expect_equal(df$a, df2$a)
62  expect_equal(df$b, df2$b)
63  expect_equal(df$c, df2$c)
64  expect_equal(df$d, df2$d)
65})
66
67test_that("widths roundtrip", {
68  df <- tibble::tibble(
69    a = structure(c(1, 1, 2), display_width = 10),
70    b = structure(4:6, display_width = 11),
71    c = structure(7:9, display_width = 12),
72    d = structure(c("Text", "Text", ""), display_width = 10)
73  )
74
75  tmp <- tempfile()
76  on.exit(unlink(tmp))
77
78  write_sav(df, tmp)
79  df2 <- read_sav(tmp)
80
81  expect_equal(df$a, zap_formats(df2$a))
82  expect_equal(df$b, zap_formats(df2$b))
83  expect_equal(df$c, zap_formats(df2$c))
84  expect_equal(df$d, zap_formats(df2$d))
85})
86
87test_that("only selected columns are read", {
88  out <- read_spss(test_path("spss/datetime.sav"), col_select = "date")
89  expect_named(out, "date")
90})
91
92# Row skipping/limiting --------------------------------------------------------
93
94test_that("using skip returns correct number of rows", {
95  rows_after_skipping <- function(n) {
96    nrow(read_spss(test_path("spss/datetime.sav"), skip = n))
97  }
98
99  n <- rows_after_skipping(0)
100
101  expect_equal(rows_after_skipping(1), n - 1)
102  expect_equal(rows_after_skipping(n - 1), 1)
103  expect_equal(rows_after_skipping(n + 0), 0)
104  expect_equal(rows_after_skipping(n + 1), 0)
105})
106
107test_that("can limit the number of rows to read", {
108  rows_with_limit <- function(n) {
109    nrow(read_spss(test_path("spss/datetime.sav"), n_max = n))
110  }
111
112  n <- rows_with_limit(Inf)
113  expect_equal(rows_with_limit(0), 0)
114  expect_equal(rows_with_limit(1), 1)
115  expect_equal(rows_with_limit(n), n)
116  expect_equal(rows_with_limit(n + 1), n)
117
118  # alternatives for unlimited rows
119  expect_equal(rows_with_limit(NA), n)
120  expect_equal(rows_with_limit(-1), n)
121})
122
123# User-defined missings ---------------------------------------------------
124
125test_that("user-defined missing values read as missing by default", {
126  num <- read_spss(test_path("spss/labelled-num-na.sav"))[[1]]
127  expect_equal(vec_data(num)[[2]], NA_real_)
128})
129
130test_that("user-defined missing values can be preserved", {
131  num <- read_spss(test_path("spss/labelled-num-na.sav"), user_na = TRUE)[[1]]
132
133  expect_s3_class(num, "haven_labelled_spss")
134  expect_equal(vec_data(num)[[2]], 9)
135
136  expect_equal(attr(num, "na_values"), 9)
137  expect_equal(attr(num, "na_range"), NULL)
138
139  num
140})
141
142test_that("system missings read as NA", {
143  df <- tibble::tibble(x = c(1, NA))
144  out <- roundtrip_sav(df)
145
146  expect_identical(df$x, c(1, NA))
147})
148
149
150# write_sav ---------------------------------------------------------------
151
152test_that("can roundtrip basic types", {
153  x <- runif(10)
154  expect_equal(roundtrip_var(x, "sav"), x)
155  expect_equal(roundtrip_var(1:10, "sav"), 1:10)
156  expect_equal(roundtrip_var(c(TRUE, FALSE), "sav"), c(1, 0))
157  expect_equal(roundtrip_var(letters, "sav"), letters)
158})
159
160test_that("can roundtrip missing values (as much as possible)", {
161  expect_equal(roundtrip_var(NA, "sav"), NA_integer_)
162  expect_equal(roundtrip_var(NA_real_, "sav"), NA_real_)
163  expect_equal(roundtrip_var(NA_integer_, "sav"), NA_integer_)
164  expect_equal(roundtrip_var(NA_character_, "sav"), "")
165})
166
167test_that("can roundtrip date times", {
168  x1 <- c(as.Date("2010-01-01"), NA)
169  x2 <- as.POSIXct(x1)
170  attr(x2, "tzone") <- "UTC"
171
172  expect_equal(roundtrip_var(x1, "sav"), x1)
173  expect_equal(roundtrip_var(x2, "sav"), x2)
174})
175
176test_that("can roundtrip times", {
177  x <- hms::hms(c(1, NA, 86400))
178  expect_equal(roundtrip_var(x, "sav"), x)
179})
180
181test_that("infinity gets converted to NA", {
182  expect_equal(roundtrip_var(c(Inf, 0, -Inf), "sav"), c(NA, 0, NA))
183})
184
185test_that("factors become labelleds", {
186  f <- factor(c("a", "b"), levels = letters[1:3])
187  rt <- roundtrip_var(f, "sav")
188
189  expect_s3_class(rt, "haven_labelled")
190  expect_equal(as.vector(rt), 1:2)
191  expect_equal(attr(rt, "labels"), c(a = 1, b = 2, c = 3))
192})
193
194test_that("labels are preserved", {
195  x <- 1:10
196  attr(x, "label") <- "abc"
197
198  expect_equal(attr(roundtrip_var(x, "sav"), "label"), "abc")
199})
200
201test_that("labelleds are round tripped", {
202  int <- labelled(c(1L, 2L), c(a = 1L, b = 3L))
203  num <- labelled(c(1, 2), c(a = 1, b = 3))
204  chr <- labelled(c("a", "b"), c(a = "b", b = "a"))
205
206  expect_equal(roundtrip_var(num, "sav"), num)
207  expect_equal(roundtrip_var(chr, "sav"), chr)
208})
209
210test_that("spss labelleds are round tripped", {
211  df <- tibble(
212    x = labelled_spss(
213      c(1, 2, 1, 9),
214      labels = c(no = 1, yes = 2, unknown = 9),
215      na_values = 9,
216      na_range = c(80, 90)
217    )
218  )
219
220  path <- tempfile()
221  write_sav(df, path)
222
223  df2 <- read_sav(path)
224  expect_s3_class(df2$x, "haven_labelled")
225  expect_equal(as.double(df2$x), c(1, 2, 1, NA))
226
227  df3 <- read_sav(path, user_na = TRUE)
228  expect_s3_class(df3$x, "haven_labelled_spss")
229  expect_equal(attr(df3$x, "na_values"), attr(df$x, "na_values"))
230  expect_equal(attr(df3$x, "na_range"), attr(df$x, "na_range"))
231})
232
233test_that("spss string labelleds are round tripped", {
234  df <- tibble(
235    x = labelled_spss(
236      c("1", "2", "3", "99"),
237      labels = c(one = "1"),
238      na_values = "99",
239      na_range = c("2", "3")
240    )
241  )
242
243  path <- tempfile()
244  write_sav(df, path)
245
246  df2 <- read_sav(path)
247  expect_s3_class(df2$x, "haven_labelled")
248  expect_equal(as.character(df2$x), c("1", NA, NA, NA))
249
250  df3 <- read_sav(path, user_na = TRUE)
251  expect_s3_class(df3$x, "haven_labelled_spss")
252  expect_equal(attr(df3$x, "na_values"), attr(df$x, "na_values"))
253  expect_equal(attr(df3$x, "na_range"), attr(df$x, "na_range"))
254})
255
256
257test_that("factors become labelleds", {
258  f <- factor(c("a", "b"), levels = letters[1:3])
259  rt <- roundtrip_var(f, "sav")
260
261  expect_s3_class(rt, "haven_labelled")
262  expect_equal(as.vector(rt), 1:2)
263  expect_equal(attr(rt, "labels"), c(a = 1, b = 2, c = 3))
264})
265
266test_that("labels are converted to utf-8", {
267  labels_utf8 <- c("\u00e9\u00e8", "\u00e0", "\u00ef")
268  labels_latin1 <- iconv(labels_utf8, "utf-8", "latin1")
269
270  v_utf8 <- labelled(3:1, setNames(1:3, labels_utf8))
271  v_latin1 <- labelled(3:1, setNames(1:3, labels_latin1))
272
273  expect_equal(names(attr(roundtrip_var(v_utf8, "sav"), "labels")), labels_utf8)
274  expect_equal(names(attr(roundtrip_var(v_latin1, "sav"), "labels")), labels_utf8)
275})
276
277
278test_that("complain about long factor labels", {
279  expect_snapshot(error = TRUE, {
280    x <- paste(rep("a", 200), collapse = "")
281    df <- data.frame(x = factor(x))
282    write_sav(df, tempfile())
283  })
284})
285
286# max_level_lengths -------------------------------------------------------
287
288test_that("works with NA levels", {
289  x <- factor(c("a", "abc", NA), exclude = NULL)
290  expect_equal(max_level_length(x), 3)
291})
292
293test_that("works with empty factors", {
294  x <- factor(character(), levels = character())
295  expect_equal(max_level_length(x), 0)
296
297  x <- factor(character(), levels = c(NA_character_))
298  expect_equal(max_level_length(x), 0)
299})
300
301