1# ------------------------------------------------------------------------------
2# vec_normalize_encoding()
3
4test_that("can translate a character vector of various encodings (#553)", {
5  x <- unlist(encodings(), use.names = FALSE)
6
7  results <- vec_normalize_encoding(x)
8
9  expect_equal_encoding(results, encodings()$utf8)
10})
11
12test_that("translates all encodings to UTF-8", {
13  encs <- encodings()
14
15  for (enc in encs) {
16    expect_equal_encoding(vec_normalize_encoding(enc), encodings()$utf8)
17  }
18})
19
20test_that("can translate a list containing character vectors with different encodings", {
21  results <- vec_normalize_encoding(encodings())
22  results <- unlist(results)
23
24  expect_equal_encoding(results, encodings()$utf8)
25})
26
27test_that("translation fails purposefully with any bytes", {
28  expect_error(
29    vec_normalize_encoding(encoding_bytes()),
30    "translating strings with \"bytes\" encoding"
31  )
32})
33
34test_that("translation fails purposefully when mixing with bytes with other encodings", {
35  for (enc in encodings()) {
36    x <- c(encoding_bytes(), enc)
37    expect_error(vec_normalize_encoding(x), "translating strings with \"bytes\" encoding")
38  }
39})
40
41test_that("attributes are kept on translation (#599)", {
42  encs <- encodings()
43
44  x <- c(encs$utf8, encs$latin1)
45  x <- structure(x, names = c("a", "b"), extra = 1)
46
47  expect_equal(attributes(vec_normalize_encoding(x)), attributes(x))
48})
49
50test_that("translation is robust against scalar types contained in lists (#633)", {
51  x <- list(a = z ~ y, b = z ~ z)
52  expect_equal(vec_normalize_encoding(x), x)
53})
54
55test_that("translation can still occur even if a scalar type is in a list", {
56  encs <- encodings()
57  x <- list(a = z ~ y, b = encs$latin1)
58
59  result <- vec_normalize_encoding(x)
60
61  expect_equal_encoding(result$b, encs$utf8)
62})
63
64test_that("translation occurs inside scalars contained in a list", {
65  encs <- encodings()
66
67  scalar <- structure(list(x = encs$latin1), class = "scalar_list")
68  lst <- list(scalar)
69
70  result <- vec_normalize_encoding(lst)
71
72  expect_equal_encoding(result[[1]]$x, encs$utf8)
73})
74
75test_that("translation treats data frames elements of lists as lists (#1233)", {
76  encs <- encodings()
77
78  field <- c(encs$utf8, encs$latin1)
79
80  a <- new_rcrd(list(field = field))
81  df <- data.frame(a = a, b = 1:2)
82  x <- list(df)
83
84  # Recursive proxy won't proxy list elements,
85  # so the rcrd column in the data frame won't get proxied
86  proxy <- vec_proxy_equal(x)
87
88  result <- vec_normalize_encoding(proxy)
89
90  expect_identical(result, x)
91
92  result_field <- field(result[[1]]$a, "field")
93  expect_field <- c(encs$utf8, encs$utf8)
94
95  expect_equal_encoding(result_field, expect_field)
96})
97
98test_that("attributes are translated", {
99  utf8 <- encodings()$utf8
100  latin1 <- encodings()$latin1
101
102  a <- structure(1, enc = utf8)
103  b <- structure(1, enc = latin1)
104  c <- structure(1, enc1 = utf8, enc2 = list(latin1), enc3 = latin1)
105  x <- list(a, b, c)
106
107  result <- vec_normalize_encoding(x)
108
109  a_enc <- attr(result[[1]], "enc")
110  b_enc <- attr(result[[2]], "enc")
111  c_enc1 <- attr(result[[3]], "enc1")
112  c_enc2 <- attr(result[[3]], "enc2")[[1]]
113  c_enc3 <- attr(result[[3]], "enc3")
114
115  expect_equal_encoding(a_enc, utf8)
116  expect_equal_encoding(b_enc, utf8)
117  expect_equal_encoding(c_enc1, utf8)
118  expect_equal_encoding(c_enc2, utf8)
119  expect_equal_encoding(c_enc3, utf8)
120
121  expect <- list(
122    structure(1, enc = utf8),
123    structure(1, enc1 = utf8, enc2 = list(utf8), enc3 = utf8)
124  )
125
126  expect_identical(vec_unique(x), expect)
127})
128
129test_that("attributes are translated recursively", {
130  utf8 <- encodings()$utf8
131  latin1 <- encodings()$latin1
132
133  nested <- structure(1, latin1 = latin1)
134  x <- structure(2, nested = nested, foo = 1, latin1 = latin1)
135
136  result <- vec_normalize_encoding(x)
137  attrib <- attributes(result)
138  attrib_nested <- attributes(attrib$nested)
139
140  expect_equal_encoding(attrib$latin1, utf8)
141  expect_equal_encoding(attrib_nested$latin1, utf8)
142})
143
144test_that("NAs aren't converted to 'NA' (#1291)", {
145  utf8 <- c(NA, encodings()$utf8)
146  latin1 <- c(NA, encodings()$latin1)
147
148  result1 <- vec_normalize_encoding(utf8)
149  result2 <- vec_normalize_encoding(latin1)
150
151  expect_equal_encoding(result1, utf8)
152  expect_equal_encoding(result2, utf8)
153
154  expect_identical(result1[[1]], NA_character_)
155  expect_identical(result2[[1]], NA_character_)
156})
157