1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2008-2016. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20
21%%
22
23-module(bs_utf_SUITE).
24
25-export([all/0, suite/0,groups/0,init_per_group/2,end_per_group/2,
26	 init_per_suite/1,end_per_suite/1,
27	 init_per_testcase/2,end_per_testcase/2,
28	 utf8_roundtrip/1,unused_utf_char/1,utf16_roundtrip/1,
29	 utf32_roundtrip/1,guard/1,extreme_tripping/1]).
30
31-include_lib("common_test/include/ct.hrl").
32-compile([no_jopt,time]).
33
34suite() ->
35    [{ct_hooks,[ts_install_cth]},
36     {timetrap,{minutes,1}}].
37
38all() ->
39    cases().
40
41groups() ->
42    [].
43
44init_per_group(_GroupName, Config) ->
45    Config.
46
47end_per_group(_GroupName, Config) ->
48    Config.
49
50
51cases() ->
52    [utf8_roundtrip, unused_utf_char, utf16_roundtrip,
53     utf32_roundtrip, guard, extreme_tripping].
54
55init_per_testcase(_Case, Config) ->
56    test_lib:interpret(?MODULE),
57    Config.
58
59end_per_testcase(_Case, _Config) ->
60    ok.
61
62init_per_suite(Config) when is_list(Config) ->
63    test_lib:interpret(?MODULE),
64    true = lists:member(?MODULE, int:interpreted()),
65    Config.
66
67end_per_suite(Config) when is_list(Config) ->
68    ok.
69
70utf8_roundtrip(Config) when is_list(Config) ->
71    [utf8_roundtrip_1(P) || P <- utf_data()],
72    ok.
73
74utf8_roundtrip_1({Str,Bin,Bin}) ->
75    Str = utf8_to_list(Bin),
76    Bin = list_to_utf8(Str),
77    [ok = utf8_guard(C, <<42,C/utf8>>) || C <- Str],
78    [error = utf8_guard(C, <<C/utf8>>) || C <- Str],
79    ok.
80
81utf8_guard(C, Bin) when <<42,C/utf8>> =:= Bin -> ok;
82utf8_guard(_, _) -> error.
83
84utf8_to_list(<<C/utf8,T/binary>>) ->
85    [C|utf8_to_list(T)];
86utf8_to_list(<<>>) -> [].
87
88list_to_utf8(L) ->
89    list_to_utf8(L, <<>>).
90
91list_to_utf8([H|T], Bin) ->
92    list_to_utf8(T, <<Bin/binary,H/utf8>>);
93list_to_utf8([], Bin) -> Bin.
94
95unused_utf_char(Config) when is_list(Config) ->
96    [true = utf8_len(Utf8) =:= length(Str) ||
97	{Str,Utf8} <- utf_data()],
98    ok.
99
100utf8_len(B) ->
101    utf8_len(B, 0).
102
103utf8_len(<<_/utf8,T/binary>>, N) ->
104    utf8_len(T, N+1);
105utf8_len(<<>>, N) -> N.
106
107utf16_roundtrip(Config) when is_list(Config) ->
108    {Str,Big,Big,Little,Little} = utf16_data(),
109    4 = utf16_big_len(Big),
110    4 = utf16_little_len(Little),
111    Str = big_utf16_to_list(Big),
112    Str = little_utf16_to_list(Little),
113
114    Big = list_to_big_utf16(Str),
115    Little = list_to_little_utf16(Str),
116
117    ok.
118
119utf16_big_len(B) ->
120    utf16_big_len(B, 0).
121
122utf16_big_len(<<_/utf16,T/binary>>, N) ->
123    utf16_big_len(T, N+1);
124utf16_big_len(<<>>, N) -> N.
125
126utf16_little_len(B) ->
127    utf16_little_len(B, 0).
128
129utf16_little_len(<<_/little-utf16,T/binary>>, N) ->
130    utf16_little_len(T, N+1);
131utf16_little_len(<<>>, N) -> N.
132
133list_to_big_utf16(List) ->
134    list_to_big_utf16(List, <<>>).
135
136list_to_big_utf16([H|T], Bin) ->
137    list_to_big_utf16(T, <<Bin/binary,H/utf16>>);
138list_to_big_utf16([], Bin) -> Bin.
139
140list_to_little_utf16(List) ->
141    list_to_little_utf16(List, <<>>).
142
143list_to_little_utf16([H|T], Bin) ->
144    list_to_little_utf16(T, <<Bin/binary,H/little-utf16>>);
145list_to_little_utf16([], Bin) -> Bin.
146
147big_utf16_to_list(<<H/utf16,T/binary>>) ->
148    [H|big_utf16_to_list(T)];
149big_utf16_to_list(<<>>) -> [].
150
151little_utf16_to_list(<<H/little-utf16,T/binary>>) ->
152    [H|little_utf16_to_list(T)];
153little_utf16_to_list(<<>>) -> [].
154
155utf32_roundtrip(Config) when is_list(Config) ->
156    {Str,Big,Big,Little,Little} = utf32_data(),
157    4 = utf32_big_len(Big),
158    4 = utf32_little_len(Little),
159    Str = big_utf32_to_list(Big),
160    Str = little_utf32_to_list(Little),
161
162    Big = list_to_big_utf32(Str),
163    Little = list_to_little_utf32(Str),
164
165    ok.
166
167utf32_big_len(B) ->
168    utf32_big_len(B, 0).
169
170utf32_big_len(<<_/utf32,T/binary>>, N) ->
171    utf32_big_len(T, N+1);
172utf32_big_len(<<>>, N) -> N.
173
174utf32_little_len(B) ->
175    utf32_little_len(B, 0).
176
177utf32_little_len(<<_/little-utf32,T/binary>>, N) ->
178    utf32_little_len(T, N+1);
179utf32_little_len(<<>>, N) -> N.
180
181list_to_big_utf32(List) ->
182    list_to_big_utf32(List, <<>>).
183
184list_to_big_utf32([H|T], Bin) ->
185    list_to_big_utf32(T, <<Bin/binary,H/utf32>>);
186list_to_big_utf32([], Bin) -> Bin.
187
188list_to_little_utf32(List) ->
189    list_to_little_utf32(List, <<>>).
190
191list_to_little_utf32([H|T], Bin) ->
192    list_to_little_utf32(T, <<Bin/binary,H/little-utf32>>);
193list_to_little_utf32([], Bin) -> Bin.
194
195big_utf32_to_list(<<H/utf32,T/binary>>) ->
196    [H|big_utf32_to_list(T)];
197big_utf32_to_list(<<>>) -> [].
198
199little_utf32_to_list(<<H/little-utf32,T/binary>>) ->
200    [H|little_utf32_to_list(T)];
201little_utf32_to_list(<<>>) -> [].
202
203
204guard(Config) when is_list(Config) ->
205    error = do_guard(16#D800),
206    ok.
207
208do_guard(C) when byte_size(<<C/utf8>>) =/= 42 -> ok;
209do_guard(C) when byte_size(<<C/utf16>>) =/= 42 -> ok;
210do_guard(C) when byte_size(<<C/utf32>>) =/= 42 -> ok;
211do_guard(_) -> error.
212
213%% The purpose of this test is to make sure that
214%% the delayed creation of sub-binaries works.
215
216extreme_tripping(Config) when is_list(Config) ->
217    Unicode = lists:seq(0, 1024),
218    Utf8 = unicode_to_utf8(Unicode, <<>>),
219    Utf16 = utf8_to_utf16(Utf8, <<>>),
220    Utf32 = utf8_to_utf32(Utf8, <<>>),
221    Utf32 = utf16_to_utf32(Utf16, <<>>),
222    Utf8 = utf32_to_utf8(Utf32, <<>>),
223    Unicode = utf32_to_unicode(Utf32),
224    ok.
225
226unicode_to_utf8([C|T], Bin) ->
227    unicode_to_utf8(T, <<Bin/bytes,C/utf8>>);
228unicode_to_utf8([], Bin) -> Bin.
229
230utf8_to_utf16(<<C/utf8,T/binary>>, Bin) ->
231    utf8_to_utf16(T, <<Bin/bytes,C/utf16>>);
232utf8_to_utf16(<<>>, Bin) -> Bin.
233
234utf16_to_utf32(<<C/utf16,T/binary>>, Bin) ->
235    utf16_to_utf32(T, <<Bin/bytes,C/utf32>>);
236utf16_to_utf32(<<>>, Bin) -> Bin.
237
238utf8_to_utf32(<<C/utf8,T/binary>>, Bin) ->
239    utf8_to_utf32(T, <<Bin/bytes,C/utf32>>);
240utf8_to_utf32(<<>>, Bin) -> Bin.
241
242utf32_to_utf8(<<C/utf32,T/binary>>, Bin) ->
243    utf32_to_utf8(T, <<Bin/bytes,C/utf8>>);
244utf32_to_utf8(<<>>, Bin) -> Bin.
245
246utf32_to_unicode(<<C/utf32,T/binary>>) ->
247    [C|utf32_to_unicode(T)];
248utf32_to_unicode(<<>>) -> [].
249
250utf_data() ->
251    %% From RFC-3629.
252
253    %% Give the compiler a change to do some constant propagation.
254    NotIdentical = 16#2262,
255
256    [
257     %% "A<NOT IDENTICAL TO><ALPHA>."
258     {[16#0041,NotIdentical,16#0391,16#002E],
259      <<16#0041/utf8,NotIdentical/utf8,16#0391/utf8,16#002E/utf8>>,
260      <<16#41,16#E2,16#89,16#A2,16#CE,16#91,16#2E>>},
261
262     %% Korean "hangugeo" (meaning "the Korean language")
263     {[16#D55C,16#AD6D,16#C5B4],
264      <<16#D55C/utf8,16#AD6D/utf8,16#C5B4/utf8>>,
265      <<16#ED,16#95,16#9C,16#EA,16#B5,16#AD,16#EC,16#96,16#B4>>},
266
267     %% Japanese "nihongo" (meaning "the Japanese language").
268     {[16#65E5,16#672C,16#8A9E],
269      <<16#65E5/utf8,16#672C/utf8,16#8A9E/utf8>>,
270      <<16#E6,16#97,16#A5,16#E6,16#9C,16#AC,16#E8,16#AA,16#9E>>}
271    ].
272
273utf16_data() ->
274    %% Example from RFC-2781. "*=Ra", where "*" represents a
275    %% hypothetical Ra hieroglyph (code point 16#12345).
276
277    %% Give the compiler a change to do some constant propagation.
278    RaHieroglyph = 16#12345,
279
280    %% First as a list of Unicode characters.
281    {[RaHieroglyph,16#3D,16#52,16#61],
282
283     %% Big endian (the two binaries should be equal).
284     <<RaHieroglyph/big-utf16,16#3D/big-utf16,16#52/big-utf16,16#61/big-utf16>>,
285     <<16#D8,16#08,16#DF,16#45,16#00,16#3D,16#00,16#52,16#00,16#61>>,
286
287     %% Little endian (the two binaries should be equal).
288     <<RaHieroglyph/little-utf16,16#3D/little-utf16,
289       16#52/little-utf16,16#61/little-utf16>>,
290     <<16#08,16#D8,16#45,16#DF,16#3D,16#00,16#52,16#00,16#61,16#00>>}.
291
292utf32_data() ->
293    %% "A<NOT IDENTICAL TO><ALPHA>."
294    NotIdentical = 16#2262,
295    {[16#0041,NotIdentical,16#0391,16#002E],
296
297     %% Big endian.
298     <<16#0041/utf32,NotIdentical/utf32,16#0391/utf32,16#002E/utf32>>,
299     <<16#41:32,NotIdentical:32,16#0391:32,16#2E:32>>,
300
301     %% Little endian.
302     <<16#0041/little-utf32,NotIdentical/little-utf32,
303       16#0391/little-utf32,16#002E/little-utf32>>,
304     <<16#41:32/little,NotIdentical:32/little,
305       16#0391:32/little,16#2E:32/little>>}.
306