1# ------------------------------------------------------------------------------ 2# vec_normalize_encoding() 3 4test_that("can translate a character vector of various encodings (#553)", { 5 x <- unlist(encodings(), use.names = FALSE) 6 7 results <- vec_normalize_encoding(x) 8 9 expect_equal_encoding(results, encodings()$utf8) 10}) 11 12test_that("translates all encodings to UTF-8", { 13 encs <- encodings() 14 15 for (enc in encs) { 16 expect_equal_encoding(vec_normalize_encoding(enc), encodings()$utf8) 17 } 18}) 19 20test_that("can translate a list containing character vectors with different encodings", { 21 results <- vec_normalize_encoding(encodings()) 22 results <- unlist(results) 23 24 expect_equal_encoding(results, encodings()$utf8) 25}) 26 27test_that("translation fails purposefully with any bytes", { 28 expect_error( 29 vec_normalize_encoding(encoding_bytes()), 30 "translating strings with \"bytes\" encoding" 31 ) 32}) 33 34test_that("translation fails purposefully when mixing with bytes with other encodings", { 35 for (enc in encodings()) { 36 x <- c(encoding_bytes(), enc) 37 expect_error(vec_normalize_encoding(x), "translating strings with \"bytes\" encoding") 38 } 39}) 40 41test_that("attributes are kept on translation (#599)", { 42 encs <- encodings() 43 44 x <- c(encs$utf8, encs$latin1) 45 x <- structure(x, names = c("a", "b"), extra = 1) 46 47 expect_equal(attributes(vec_normalize_encoding(x)), attributes(x)) 48}) 49 50test_that("translation is robust against scalar types contained in lists (#633)", { 51 x <- list(a = z ~ y, b = z ~ z) 52 expect_equal(vec_normalize_encoding(x), x) 53}) 54 55test_that("translation can still occur even if a scalar type is in a list", { 56 encs <- encodings() 57 x <- list(a = z ~ y, b = encs$latin1) 58 59 result <- vec_normalize_encoding(x) 60 61 expect_equal_encoding(result$b, encs$utf8) 62}) 63 64test_that("translation occurs inside scalars contained in a list", { 65 encs <- encodings() 66 67 scalar <- structure(list(x = encs$latin1), class = "scalar_list") 68 lst <- list(scalar) 69 70 result <- vec_normalize_encoding(lst) 71 72 expect_equal_encoding(result[[1]]$x, encs$utf8) 73}) 74 75test_that("translation treats data frames elements of lists as lists (#1233)", { 76 encs <- encodings() 77 78 field <- c(encs$utf8, encs$latin1) 79 80 a <- new_rcrd(list(field = field)) 81 df <- data.frame(a = a, b = 1:2) 82 x <- list(df) 83 84 # Recursive proxy won't proxy list elements, 85 # so the rcrd column in the data frame won't get proxied 86 proxy <- vec_proxy_equal(x) 87 88 result <- vec_normalize_encoding(proxy) 89 90 expect_identical(result, x) 91 92 result_field <- field(result[[1]]$a, "field") 93 expect_field <- c(encs$utf8, encs$utf8) 94 95 expect_equal_encoding(result_field, expect_field) 96}) 97 98test_that("attributes are translated", { 99 utf8 <- encodings()$utf8 100 latin1 <- encodings()$latin1 101 102 a <- structure(1, enc = utf8) 103 b <- structure(1, enc = latin1) 104 c <- structure(1, enc1 = utf8, enc2 = list(latin1), enc3 = latin1) 105 x <- list(a, b, c) 106 107 result <- vec_normalize_encoding(x) 108 109 a_enc <- attr(result[[1]], "enc") 110 b_enc <- attr(result[[2]], "enc") 111 c_enc1 <- attr(result[[3]], "enc1") 112 c_enc2 <- attr(result[[3]], "enc2")[[1]] 113 c_enc3 <- attr(result[[3]], "enc3") 114 115 expect_equal_encoding(a_enc, utf8) 116 expect_equal_encoding(b_enc, utf8) 117 expect_equal_encoding(c_enc1, utf8) 118 expect_equal_encoding(c_enc2, utf8) 119 expect_equal_encoding(c_enc3, utf8) 120 121 expect <- list( 122 structure(1, enc = utf8), 123 structure(1, enc1 = utf8, enc2 = list(utf8), enc3 = utf8) 124 ) 125 126 expect_identical(vec_unique(x), expect) 127}) 128 129test_that("attributes are translated recursively", { 130 utf8 <- encodings()$utf8 131 latin1 <- encodings()$latin1 132 133 nested <- structure(1, latin1 = latin1) 134 x <- structure(2, nested = nested, foo = 1, latin1 = latin1) 135 136 result <- vec_normalize_encoding(x) 137 attrib <- attributes(result) 138 attrib_nested <- attributes(attrib$nested) 139 140 expect_equal_encoding(attrib$latin1, utf8) 141 expect_equal_encoding(attrib_nested$latin1, utf8) 142}) 143 144test_that("NAs aren't converted to 'NA' (#1291)", { 145 utf8 <- c(NA, encodings()$utf8) 146 latin1 <- c(NA, encodings()$latin1) 147 148 result1 <- vec_normalize_encoding(utf8) 149 result2 <- vec_normalize_encoding(latin1) 150 151 expect_equal_encoding(result1, utf8) 152 expect_equal_encoding(result2, utf8) 153 154 expect_identical(result1[[1]], NA_character_) 155 expect_identical(result2[[1]], NA_character_) 156}) 157