1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2008-2016. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20 21%% 22 23-module(bs_utf_SUITE). 24 25-export([all/0, suite/0,groups/0,init_per_group/2,end_per_group/2, 26 init_per_suite/1,end_per_suite/1, 27 init_per_testcase/2,end_per_testcase/2, 28 utf8_roundtrip/1,unused_utf_char/1,utf16_roundtrip/1, 29 utf32_roundtrip/1,guard/1,extreme_tripping/1]). 30 31-include_lib("common_test/include/ct.hrl"). 32-compile([no_jopt,time]). 33 34suite() -> 35 [{ct_hooks,[ts_install_cth]}, 36 {timetrap,{minutes,1}}]. 37 38all() -> 39 cases(). 40 41groups() -> 42 []. 43 44init_per_group(_GroupName, Config) -> 45 Config. 46 47end_per_group(_GroupName, Config) -> 48 Config. 49 50 51cases() -> 52 [utf8_roundtrip, unused_utf_char, utf16_roundtrip, 53 utf32_roundtrip, guard, extreme_tripping]. 54 55init_per_testcase(_Case, Config) -> 56 test_lib:interpret(?MODULE), 57 Config. 58 59end_per_testcase(_Case, _Config) -> 60 ok. 61 62init_per_suite(Config) when is_list(Config) -> 63 test_lib:interpret(?MODULE), 64 true = lists:member(?MODULE, int:interpreted()), 65 Config. 66 67end_per_suite(Config) when is_list(Config) -> 68 ok. 69 70utf8_roundtrip(Config) when is_list(Config) -> 71 [utf8_roundtrip_1(P) || P <- utf_data()], 72 ok. 73 74utf8_roundtrip_1({Str,Bin,Bin}) -> 75 Str = utf8_to_list(Bin), 76 Bin = list_to_utf8(Str), 77 [ok = utf8_guard(C, <<42,C/utf8>>) || C <- Str], 78 [error = utf8_guard(C, <<C/utf8>>) || C <- Str], 79 ok. 80 81utf8_guard(C, Bin) when <<42,C/utf8>> =:= Bin -> ok; 82utf8_guard(_, _) -> error. 83 84utf8_to_list(<<C/utf8,T/binary>>) -> 85 [C|utf8_to_list(T)]; 86utf8_to_list(<<>>) -> []. 87 88list_to_utf8(L) -> 89 list_to_utf8(L, <<>>). 90 91list_to_utf8([H|T], Bin) -> 92 list_to_utf8(T, <<Bin/binary,H/utf8>>); 93list_to_utf8([], Bin) -> Bin. 94 95unused_utf_char(Config) when is_list(Config) -> 96 [true = utf8_len(Utf8) =:= length(Str) || 97 {Str,Utf8} <- utf_data()], 98 ok. 99 100utf8_len(B) -> 101 utf8_len(B, 0). 102 103utf8_len(<<_/utf8,T/binary>>, N) -> 104 utf8_len(T, N+1); 105utf8_len(<<>>, N) -> N. 106 107utf16_roundtrip(Config) when is_list(Config) -> 108 {Str,Big,Big,Little,Little} = utf16_data(), 109 4 = utf16_big_len(Big), 110 4 = utf16_little_len(Little), 111 Str = big_utf16_to_list(Big), 112 Str = little_utf16_to_list(Little), 113 114 Big = list_to_big_utf16(Str), 115 Little = list_to_little_utf16(Str), 116 117 ok. 118 119utf16_big_len(B) -> 120 utf16_big_len(B, 0). 121 122utf16_big_len(<<_/utf16,T/binary>>, N) -> 123 utf16_big_len(T, N+1); 124utf16_big_len(<<>>, N) -> N. 125 126utf16_little_len(B) -> 127 utf16_little_len(B, 0). 128 129utf16_little_len(<<_/little-utf16,T/binary>>, N) -> 130 utf16_little_len(T, N+1); 131utf16_little_len(<<>>, N) -> N. 132 133list_to_big_utf16(List) -> 134 list_to_big_utf16(List, <<>>). 135 136list_to_big_utf16([H|T], Bin) -> 137 list_to_big_utf16(T, <<Bin/binary,H/utf16>>); 138list_to_big_utf16([], Bin) -> Bin. 139 140list_to_little_utf16(List) -> 141 list_to_little_utf16(List, <<>>). 142 143list_to_little_utf16([H|T], Bin) -> 144 list_to_little_utf16(T, <<Bin/binary,H/little-utf16>>); 145list_to_little_utf16([], Bin) -> Bin. 146 147big_utf16_to_list(<<H/utf16,T/binary>>) -> 148 [H|big_utf16_to_list(T)]; 149big_utf16_to_list(<<>>) -> []. 150 151little_utf16_to_list(<<H/little-utf16,T/binary>>) -> 152 [H|little_utf16_to_list(T)]; 153little_utf16_to_list(<<>>) -> []. 154 155utf32_roundtrip(Config) when is_list(Config) -> 156 {Str,Big,Big,Little,Little} = utf32_data(), 157 4 = utf32_big_len(Big), 158 4 = utf32_little_len(Little), 159 Str = big_utf32_to_list(Big), 160 Str = little_utf32_to_list(Little), 161 162 Big = list_to_big_utf32(Str), 163 Little = list_to_little_utf32(Str), 164 165 ok. 166 167utf32_big_len(B) -> 168 utf32_big_len(B, 0). 169 170utf32_big_len(<<_/utf32,T/binary>>, N) -> 171 utf32_big_len(T, N+1); 172utf32_big_len(<<>>, N) -> N. 173 174utf32_little_len(B) -> 175 utf32_little_len(B, 0). 176 177utf32_little_len(<<_/little-utf32,T/binary>>, N) -> 178 utf32_little_len(T, N+1); 179utf32_little_len(<<>>, N) -> N. 180 181list_to_big_utf32(List) -> 182 list_to_big_utf32(List, <<>>). 183 184list_to_big_utf32([H|T], Bin) -> 185 list_to_big_utf32(T, <<Bin/binary,H/utf32>>); 186list_to_big_utf32([], Bin) -> Bin. 187 188list_to_little_utf32(List) -> 189 list_to_little_utf32(List, <<>>). 190 191list_to_little_utf32([H|T], Bin) -> 192 list_to_little_utf32(T, <<Bin/binary,H/little-utf32>>); 193list_to_little_utf32([], Bin) -> Bin. 194 195big_utf32_to_list(<<H/utf32,T/binary>>) -> 196 [H|big_utf32_to_list(T)]; 197big_utf32_to_list(<<>>) -> []. 198 199little_utf32_to_list(<<H/little-utf32,T/binary>>) -> 200 [H|little_utf32_to_list(T)]; 201little_utf32_to_list(<<>>) -> []. 202 203 204guard(Config) when is_list(Config) -> 205 error = do_guard(16#D800), 206 ok. 207 208do_guard(C) when byte_size(<<C/utf8>>) =/= 42 -> ok; 209do_guard(C) when byte_size(<<C/utf16>>) =/= 42 -> ok; 210do_guard(C) when byte_size(<<C/utf32>>) =/= 42 -> ok; 211do_guard(_) -> error. 212 213%% The purpose of this test is to make sure that 214%% the delayed creation of sub-binaries works. 215 216extreme_tripping(Config) when is_list(Config) -> 217 Unicode = lists:seq(0, 1024), 218 Utf8 = unicode_to_utf8(Unicode, <<>>), 219 Utf16 = utf8_to_utf16(Utf8, <<>>), 220 Utf32 = utf8_to_utf32(Utf8, <<>>), 221 Utf32 = utf16_to_utf32(Utf16, <<>>), 222 Utf8 = utf32_to_utf8(Utf32, <<>>), 223 Unicode = utf32_to_unicode(Utf32), 224 ok. 225 226unicode_to_utf8([C|T], Bin) -> 227 unicode_to_utf8(T, <<Bin/bytes,C/utf8>>); 228unicode_to_utf8([], Bin) -> Bin. 229 230utf8_to_utf16(<<C/utf8,T/binary>>, Bin) -> 231 utf8_to_utf16(T, <<Bin/bytes,C/utf16>>); 232utf8_to_utf16(<<>>, Bin) -> Bin. 233 234utf16_to_utf32(<<C/utf16,T/binary>>, Bin) -> 235 utf16_to_utf32(T, <<Bin/bytes,C/utf32>>); 236utf16_to_utf32(<<>>, Bin) -> Bin. 237 238utf8_to_utf32(<<C/utf8,T/binary>>, Bin) -> 239 utf8_to_utf32(T, <<Bin/bytes,C/utf32>>); 240utf8_to_utf32(<<>>, Bin) -> Bin. 241 242utf32_to_utf8(<<C/utf32,T/binary>>, Bin) -> 243 utf32_to_utf8(T, <<Bin/bytes,C/utf8>>); 244utf32_to_utf8(<<>>, Bin) -> Bin. 245 246utf32_to_unicode(<<C/utf32,T/binary>>) -> 247 [C|utf32_to_unicode(T)]; 248utf32_to_unicode(<<>>) -> []. 249 250utf_data() -> 251 %% From RFC-3629. 252 253 %% Give the compiler a change to do some constant propagation. 254 NotIdentical = 16#2262, 255 256 [ 257 %% "A<NOT IDENTICAL TO><ALPHA>." 258 {[16#0041,NotIdentical,16#0391,16#002E], 259 <<16#0041/utf8,NotIdentical/utf8,16#0391/utf8,16#002E/utf8>>, 260 <<16#41,16#E2,16#89,16#A2,16#CE,16#91,16#2E>>}, 261 262 %% Korean "hangugeo" (meaning "the Korean language") 263 {[16#D55C,16#AD6D,16#C5B4], 264 <<16#D55C/utf8,16#AD6D/utf8,16#C5B4/utf8>>, 265 <<16#ED,16#95,16#9C,16#EA,16#B5,16#AD,16#EC,16#96,16#B4>>}, 266 267 %% Japanese "nihongo" (meaning "the Japanese language"). 268 {[16#65E5,16#672C,16#8A9E], 269 <<16#65E5/utf8,16#672C/utf8,16#8A9E/utf8>>, 270 <<16#E6,16#97,16#A5,16#E6,16#9C,16#AC,16#E8,16#AA,16#9E>>} 271 ]. 272 273utf16_data() -> 274 %% Example from RFC-2781. "*=Ra", where "*" represents a 275 %% hypothetical Ra hieroglyph (code point 16#12345). 276 277 %% Give the compiler a change to do some constant propagation. 278 RaHieroglyph = 16#12345, 279 280 %% First as a list of Unicode characters. 281 {[RaHieroglyph,16#3D,16#52,16#61], 282 283 %% Big endian (the two binaries should be equal). 284 <<RaHieroglyph/big-utf16,16#3D/big-utf16,16#52/big-utf16,16#61/big-utf16>>, 285 <<16#D8,16#08,16#DF,16#45,16#00,16#3D,16#00,16#52,16#00,16#61>>, 286 287 %% Little endian (the two binaries should be equal). 288 <<RaHieroglyph/little-utf16,16#3D/little-utf16, 289 16#52/little-utf16,16#61/little-utf16>>, 290 <<16#08,16#D8,16#45,16#DF,16#3D,16#00,16#52,16#00,16#61,16#00>>}. 291 292utf32_data() -> 293 %% "A<NOT IDENTICAL TO><ALPHA>." 294 NotIdentical = 16#2262, 295 {[16#0041,NotIdentical,16#0391,16#002E], 296 297 %% Big endian. 298 <<16#0041/utf32,NotIdentical/utf32,16#0391/utf32,16#002E/utf32>>, 299 <<16#41:32,NotIdentical:32,16#0391:32,16#2E:32>>, 300 301 %% Little endian. 302 <<16#0041/little-utf32,NotIdentical/little-utf32, 303 16#0391/little-utf32,16#002E/little-utf32>>, 304 <<16#41:32/little,NotIdentical:32/little, 305 16#0391:32/little,16#2E:32/little>>}. 306