1import Codec.Binary.UTF8.String
2import Test.HUnit (Test (TestCase, TestList, TestLabel), assertEqual, errors, failures, runTestTT)
3import System.Exit (exitFailure)
4import Control.Monad (when)
5
6main :: IO ()
7main = do counts <- runTestTT tests
8          when (errors counts > 0 || failures counts > 0) exitFailure
9
10tests :: Test
11tests = TestList [test_1, test_2, test_3, test_4, test_5, test_6]
12
13test_1 :: Test
14test_1 = TestLabel "1 Some correct UTF-8 text" $
15  TestCase $ assertEqual "kosme, " "\x03ba\x1f79\x03c3\x03bc\x03b5 "
16    (decode [0xce,0xba,0xe1,0xbd,0xb9,0xcf,0x83,0xce,0xbc,0xce,0xb5,0x20])
17
18test_2 :: Test
19test_2 = TestLabel "2 Boundary condition test cases" $
20  TestList [test_2_1, test_2_2, test_2_3]
21
22test_2_1 :: Test
23test_2_1 = TestLabel "2.1 First possible sequence of a certain length" $
24  TestList $ map TestCase $
25  [ assertEqual "2.1.1, " "\0\0" (decode [0, 0])
26  , assertEqual "2.1.2, " "\x80\0" (decode [0xc2, 0x80, 0])
27  , assertEqual "2.1.3, " "\x800\0" (decode [0xe0, 0xa0, 0x80, 0])
28  , assertEqual "2.1.4, " "\x10000\0" (decode [0xf0, 0x90, 0x80, 0x80, 0])
29  , assertEqual "2.1.5, " "\xfffd\0" (decode [0xf8, 0x88, 0x80, 0x80, 0x80, 0])
30  , assertEqual "2.1.6, " "\xfffd\0" (decode [0xfc,0x84,0x80,0x80,0x80,0x80,0])
31  ]
32
33test_2_2 :: Test
34test_2_2 = TestLabel "2.2 Last possible sequence of a certain length" $
35  TestList $ map TestCase $
36  [ assertEqual "2.2.1, " "\x7f\0" (decode [0x7f, 0])
37  , assertEqual "2.2.2, " "\x7ff\0" (decode [0xdf, 0xbf, 0])
38  , assertEqual "2.2.3, " "\xfffd\0" (decode [0xef, 0xbf, 0xbf, 0])
39  , assertEqual "2.2.4, " "\xfffd\0" (decode [0xf7, 0xbf, 0xbf, 0xbf, 0])
40  , assertEqual "2.2.5, " "\xfffd\0" (decode [0xfb, 0xbf, 0xbf, 0xbf, 0xbf, 0])
41  , assertEqual "2.2.6, " "\xfffd\0" (decode [0xfd,0xbf,0xbf,0xbf,0xbf,0xbf,0])
42  ]
43
44test_2_3 :: Test
45test_2_3 = TestLabel "2.3 Other boundary conditions" $
46  TestList $ map TestCase $
47  [ assertEqual "2.3.1, " "\xd7ff\0" (decode [0xed, 0x9f, 0xbf, 0])
48  , assertEqual "2.3.2, " "\xe000\0" (decode [0xee, 0x80, 0x80, 0])
49  , assertEqual "2.3.3, " "\xfffd\0" (decode [0xef, 0xbf, 0xbd, 0])
50  , assertEqual "2.3.4, " "\x10ffff\0" (decode [0xf4, 0x8f, 0xbf, 0xbf, 0])
51  , assertEqual "2.3.5, " "\xfffd\0" (decode [0xf4, 0x90, 0x80, 0x80, 0])
52  ]
53
54test_3 :: Test
55test_3 = TestLabel "3 Malformed sequences" $
56  TestList [test_3_1, test_3_2, test_3_3, test_3_4, test_3_5]
57
58test_3_1 :: Test
59test_3_1 = TestLabel "3.1 Unexpected continuation bytes" $
60  TestList $ map TestCase $
61  [ assertEqual "3.1.1, " "\xfffd\0" (decode [0x80, 0])
62  , assertEqual "3.1.2, " "\xfffd\0" (decode [0xbf, 0])
63  , assertEqual "3.1.3, " "\xfffd\xfffd\0" (decode [0x80, 0xbf, 0])
64  , assertEqual "3.1.4, " "\xfffd\xfffd\xfffd\0" (decode [0x80, 0xbf, 0x80, 0])
65  , assertEqual "3.1.5, " "\xfffd\xfffd\xfffd\xfffd\0"
66                          (decode [0x80, 0xbf, 0x80, 0xbf, 0])
67  , assertEqual "3.1.6, " "\xfffd\xfffd\xfffd\xfffd\xfffd\0"
68                          (decode [0x80, 0xbf, 0x80, 0xbf, 0x80, 0])
69  , assertEqual "3.1.7, " "\xfffd\xfffd\xfffd\xfffd\xfffd\xfffd\0"
70                          (decode [0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf, 0])
71  , assertEqual "3.1.8, " "\xfffd\xfffd\xfffd\xfffd\xfffd\xfffd\xfffd\0"
72                          (decode [0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf, 0x80, 0])
73  , assertEqual "3.1.9, " (replicate 64 '\xfffd') (decode [0x80..0xbf])
74  ]
75
76test_3_2 :: Test
77test_3_2 = TestLabel "3.2 Lonely start characters" $
78  TestList $ map TestCase $
79  [ assertEqual "3.2.1, " (concat (replicate 32 "\xfffd "))
80                          (decode (concat [[x,0x20] | x <- [0xc0..0xdf]]))
81  , assertEqual "3.2.2, " (concat (replicate 16 "\xfffd "))
82                          (decode (concat [[x,0x20] | x <- [0xe0..0xef]]))
83  , assertEqual "3.2.3, " (concat (replicate 8 "\xfffd "))
84                          (decode (concat [[x,0x20] | x <- [0xf0..0xf7]]))
85  , assertEqual "3.2.4, " "\xfffd \xfffd \xfffd \xfffd "
86                          (decode (concat [[x,0x20] | x <- [0xf8..0xfb]]))
87  , assertEqual "3.2.5, " "\xfffd \xfffd " (decode [0xfc, 0x20, 0xfd, 0x20])
88  ]
89
90test_3_3 :: Test
91test_3_3 = TestLabel "3.3 Sequences with last continuation byte missing" $
92  TestList $ map TestCase $
93  [ assertEqual "3.3.1, " "\xfffd " (decode [0xc0, 0x20])
94  , assertEqual "3.3.2, " "\xfffd " (decode [0xe0, 0x80, 0x20])
95  , assertEqual "3.3.3, " "\xfffd " (decode [0xf0, 0x80, 0x80, 0x20])
96  , assertEqual "3.3.4, " "\xfffd " (decode [0xf8, 0x80, 0x80, 0x80, 0x20])
97  , assertEqual "3.3.5, " "\xfffd " (decode [0xfc, 0x80, 0x80, 0x80,0x80,0x20])
98  , assertEqual "3.3.6, " "\xfffd " (decode [0xdf, 0x20])
99  , assertEqual "3.3.7, " "\xfffd " (decode [0xef, 0xbf, 0x20])
100  , assertEqual "3.3.8, " "\xfffd " (decode [0xf7, 0xbf, 0xbf, 0x20])
101  , assertEqual "3.3.9, " "\xfffd " (decode [0xfb, 0xbf, 0xbf, 0xbf, 0x20])
102  , assertEqual "3.3.10, " "\xfffd " (decode [0xfd, 0xbf, 0xbf, 0xbf,0xbf,0x20])
103  ]
104
105test_3_4 :: Test
106test_3_4 = TestLabel "3.4 Concatenation of incomplete sequences" $
107  TestCase $ assertEqual "3.4, "
108  (replicate 10 '\xfffd')
109  (decode [0xc0, 0xe0, 0x80, 0xf0, 0x80, 0x80, 0xf8, 0x80, 0x80, 0x80,
110   0xfc, 0x80, 0x80, 0x80,0x80, 0xdf, 0xef, 0xbf, 0xf7, 0xbf, 0xbf,
111   0xfb, 0xbf, 0xbf, 0xbf, 0xfd, 0xbf, 0xbf, 0xbf,0xbf])
112
113test_3_5 :: Test
114test_3_5 = TestLabel "3.5 Impossible bytes" $
115  TestList $ map TestCase $
116  [ assertEqual "3.5.1, " "\xfffd " (decode [0xfe, 0x20])
117  , assertEqual "3.5.2, " "\xfffd " (decode [0xff, 0x20])
118  , assertEqual "3.5.3, " "\xfffd\xfffd\xfffd\xfffd "
119                          (decode [0xfe, 0xfe, 0xff, 0xff, 0x20])
120  ]
121
122test_4 :: Test
123test_4 = TestLabel "4 Overlong sequences" $
124  TestList [test_4_1, test_4_2, test_4_3]
125
126test_4_1 :: Test
127test_4_1 = TestLabel "4.1" $ TestList $ map TestCase $
128  [ assertEqual "4.1.1, " "\xfffd " (decode [0xc0, 0xaf, 0x20])
129  , assertEqual "4.1.2, " "\xfffd " (decode [0xe0, 0x80, 0xaf, 0x20])
130  , assertEqual "4.1.3, " "\xfffd " (decode [0xf0, 0x80, 0x80, 0xaf, 0x20])
131  , assertEqual "4.1.4, " "\xfffd " (decode [0xf8, 0x80, 0x80,0x80,0xaf, 0x20])
132  , assertEqual "4.1.5, " "\xfffd " (decode[0xfc,0x80,0x80,0x80,0x80,0xaf,0x20])
133  ]
134
135test_4_2 :: Test
136test_4_2 = TestLabel "4.2 Maximum overlong sequences" $
137  TestList $ map TestCase $
138  [ assertEqual "4.2.1, " "\xfffd " (decode [0xc1, 0xbf, 0x20])
139  , assertEqual "4.2.2, " "\xfffd " (decode [0xe0, 0x9f, 0xbf, 0x20])
140  , assertEqual "4.2.3, " "\xfffd " (decode [0xf0, 0x8f, 0xbf, 0xbf, 0x20])
141  , assertEqual "4.2.4, " "\xfffd " (decode [0xf8, 0x87, 0xbf, 0xbf,0xbf,0x20])
142  , assertEqual "4.2.5, " "\xfffd "(decode[0xfc,0x83,0xbf,0xbf,0xbf,0xbf,0x20])
143  ]
144
145test_4_3 :: Test
146test_4_3 = TestLabel "4.2 Overlong NUL" $
147  TestList $ map TestCase $
148  [ assertEqual "4.3.1, " "\xfffd " (decode [0xc0, 0x80, 0x20])
149  , assertEqual "4.3.2, " "\xfffd " (decode [0xe0, 0x80, 0x80, 0x20])
150  , assertEqual "4.3.3, " "\xfffd " (decode [0xf0, 0x80, 0x80, 0x80, 0x20])
151  , assertEqual "4.3.4, " "\xfffd " (decode [0xf8, 0x80, 0x80, 0x80,0x80,0x20])
152  , assertEqual "4.3.5, " "\xfffd "(decode[0xfc,0x80,0x80,0x80,0x80,0x80,0x20])
153  ]
154
155test_5 :: Test
156test_5 = TestLabel "Illegal code positions" $
157  TestList [test_5_1, test_5_2, test_5_3]
158
159test_5_1 :: Test
160test_5_1 = TestLabel "5.1 Single UTF-16 surrogates" $
161  TestList $ map TestCase $
162  [ assertEqual "5.1.1, " "\xfffd " (decode [0xed,0xa0,0x80,0x20])
163  , assertEqual "5.1.2, " "\xfffd " (decode [0xed,0xad,0xbf,0x20])
164  , assertEqual "5.1.3, " "\xfffd " (decode [0xed,0xae,0x80,0x20])
165  , assertEqual "5.1.4, " "\xfffd " (decode [0xed,0xaf,0xbf,0x20])
166  , assertEqual "5.1.5, " "\xfffd " (decode [0xed,0xb0,0x80,0x20])
167  , assertEqual "5.1.6, " "\xfffd " (decode [0xed,0xbe,0x80,0x20])
168  , assertEqual "5.1.7, " "\xfffd " (decode [0xed,0xbf,0xbf,0x20])
169  ]
170
171test_5_2 :: Test
172test_5_2 = TestLabel "5.2 Paired UTF-16 surrogates" $
173  TestList $ map TestCase $
174  [ assertEqual "5.2.1, " res (decode [0xed,0xa0,0x80,0xed,0xb0,0x80,0x20])
175  , assertEqual "5.2.2, " res (decode [0xed,0xa0,0x80,0xed,0xbf,0xbf,0x20])
176  , assertEqual "5.2.3, " res (decode [0xed,0xad,0xbf,0xed,0xb0,0x80,0x20])
177  , assertEqual "5.2.4, " res (decode [0xed,0xad,0xbf,0xed,0xbf,0xbf,0x20])
178  , assertEqual "5.2.5, " res (decode [0xed,0xae,0x80,0xed,0xb0,0x80,0x20])
179  , assertEqual "5.2.6, " res (decode [0xed,0xae,0x80,0xed,0xbf,0xbf,0x20])
180  , assertEqual "5.2.7, " res (decode [0xed,0xaf,0xbf,0xed,0xb0,0x80,0x20])
181  , assertEqual "5.2.8, " res (decode [0xed,0xaf,0xbf,0xed,0xbf,0xbf,0x20])
182  ]
183  where res = "\xfffd\xfffd "
184
185test_5_3 :: Test
186test_5_3 = TestLabel "5.3 Other illegal code positions" $
187  TestList $ map TestCase $
188  [ assertEqual "5.3.1, " "\xfffd " (decode [0xef, 0xbf, 0xbe, 0x20])
189  , assertEqual "5.3.2, " "\xfffd " (decode [0xef, 0xbf, 0xbf, 0x20])
190  ]
191
192test_6 :: Test
193test_6 = TestLabel "Encode then decode" $
194  TestList $ map TestCase $
195  [ assertEqual "6.1" encodeDecodeTest []
196  ]
197
198--
199-- test decode . encode == id for the class of chars we know that to be true of
200--
201encodeDecodeTest :: [Char]
202encodeDecodeTest = filter (\x -> [x] /= decode (encode [x])) legal_codepoints ++
203                   filter (\x -> ['\xfffd'] /= decode (encode [x])) illegal_codepoints
204  where
205    legal_codepoints = ['\0'..'\xd7ff'] ++ ['\xe000'..'\xfffd'] ++ ['\x10000'..'\x10ffff']
206    illegal_codepoints = '\xffff' : '\xfffe' : ['\xd800'..'\xdfff']
207
208
209