1import Codec.Binary.UTF8.String 2import Test.HUnit (Test (TestCase, TestList, TestLabel), assertEqual, errors, failures, runTestTT) 3import System.Exit (exitFailure) 4import Control.Monad (when) 5 6main :: IO () 7main = do counts <- runTestTT tests 8 when (errors counts > 0 || failures counts > 0) exitFailure 9 10tests :: Test 11tests = TestList [test_1, test_2, test_3, test_4, test_5, test_6] 12 13test_1 :: Test 14test_1 = TestLabel "1 Some correct UTF-8 text" $ 15 TestCase $ assertEqual "kosme, " "\x03ba\x1f79\x03c3\x03bc\x03b5 " 16 (decode [0xce,0xba,0xe1,0xbd,0xb9,0xcf,0x83,0xce,0xbc,0xce,0xb5,0x20]) 17 18test_2 :: Test 19test_2 = TestLabel "2 Boundary condition test cases" $ 20 TestList [test_2_1, test_2_2, test_2_3] 21 22test_2_1 :: Test 23test_2_1 = TestLabel "2.1 First possible sequence of a certain length" $ 24 TestList $ map TestCase $ 25 [ assertEqual "2.1.1, " "\0\0" (decode [0, 0]) 26 , assertEqual "2.1.2, " "\x80\0" (decode [0xc2, 0x80, 0]) 27 , assertEqual "2.1.3, " "\x800\0" (decode [0xe0, 0xa0, 0x80, 0]) 28 , assertEqual "2.1.4, " "\x10000\0" (decode [0xf0, 0x90, 0x80, 0x80, 0]) 29 , assertEqual "2.1.5, " "\xfffd\0" (decode [0xf8, 0x88, 0x80, 0x80, 0x80, 0]) 30 , assertEqual "2.1.6, " "\xfffd\0" (decode [0xfc,0x84,0x80,0x80,0x80,0x80,0]) 31 ] 32 33test_2_2 :: Test 34test_2_2 = TestLabel "2.2 Last possible sequence of a certain length" $ 35 TestList $ map TestCase $ 36 [ assertEqual "2.2.1, " "\x7f\0" (decode [0x7f, 0]) 37 , assertEqual "2.2.2, " "\x7ff\0" (decode [0xdf, 0xbf, 0]) 38 , assertEqual "2.2.3, " "\xfffd\0" (decode [0xef, 0xbf, 0xbf, 0]) 39 , assertEqual "2.2.4, " "\xfffd\0" (decode [0xf7, 0xbf, 0xbf, 0xbf, 0]) 40 , assertEqual "2.2.5, " "\xfffd\0" (decode [0xfb, 0xbf, 0xbf, 0xbf, 0xbf, 0]) 41 , assertEqual "2.2.6, " "\xfffd\0" (decode [0xfd,0xbf,0xbf,0xbf,0xbf,0xbf,0]) 42 ] 43 44test_2_3 :: Test 45test_2_3 = TestLabel "2.3 Other boundary conditions" $ 46 TestList $ map TestCase $ 47 [ assertEqual "2.3.1, " "\xd7ff\0" (decode [0xed, 0x9f, 0xbf, 0]) 48 , assertEqual "2.3.2, " "\xe000\0" (decode [0xee, 0x80, 0x80, 0]) 49 , assertEqual "2.3.3, " "\xfffd\0" (decode [0xef, 0xbf, 0xbd, 0]) 50 , assertEqual "2.3.4, " "\x10ffff\0" (decode [0xf4, 0x8f, 0xbf, 0xbf, 0]) 51 , assertEqual "2.3.5, " "\xfffd\0" (decode [0xf4, 0x90, 0x80, 0x80, 0]) 52 ] 53 54test_3 :: Test 55test_3 = TestLabel "3 Malformed sequences" $ 56 TestList [test_3_1, test_3_2, test_3_3, test_3_4, test_3_5] 57 58test_3_1 :: Test 59test_3_1 = TestLabel "3.1 Unexpected continuation bytes" $ 60 TestList $ map TestCase $ 61 [ assertEqual "3.1.1, " "\xfffd\0" (decode [0x80, 0]) 62 , assertEqual "3.1.2, " "\xfffd\0" (decode [0xbf, 0]) 63 , assertEqual "3.1.3, " "\xfffd\xfffd\0" (decode [0x80, 0xbf, 0]) 64 , assertEqual "3.1.4, " "\xfffd\xfffd\xfffd\0" (decode [0x80, 0xbf, 0x80, 0]) 65 , assertEqual "3.1.5, " "\xfffd\xfffd\xfffd\xfffd\0" 66 (decode [0x80, 0xbf, 0x80, 0xbf, 0]) 67 , assertEqual "3.1.6, " "\xfffd\xfffd\xfffd\xfffd\xfffd\0" 68 (decode [0x80, 0xbf, 0x80, 0xbf, 0x80, 0]) 69 , assertEqual "3.1.7, " "\xfffd\xfffd\xfffd\xfffd\xfffd\xfffd\0" 70 (decode [0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf, 0]) 71 , assertEqual "3.1.8, " "\xfffd\xfffd\xfffd\xfffd\xfffd\xfffd\xfffd\0" 72 (decode [0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf, 0x80, 0]) 73 , assertEqual "3.1.9, " (replicate 64 '\xfffd') (decode [0x80..0xbf]) 74 ] 75 76test_3_2 :: Test 77test_3_2 = TestLabel "3.2 Lonely start characters" $ 78 TestList $ map TestCase $ 79 [ assertEqual "3.2.1, " (concat (replicate 32 "\xfffd ")) 80 (decode (concat [[x,0x20] | x <- [0xc0..0xdf]])) 81 , assertEqual "3.2.2, " (concat (replicate 16 "\xfffd ")) 82 (decode (concat [[x,0x20] | x <- [0xe0..0xef]])) 83 , assertEqual "3.2.3, " (concat (replicate 8 "\xfffd ")) 84 (decode (concat [[x,0x20] | x <- [0xf0..0xf7]])) 85 , assertEqual "3.2.4, " "\xfffd \xfffd \xfffd \xfffd " 86 (decode (concat [[x,0x20] | x <- [0xf8..0xfb]])) 87 , assertEqual "3.2.5, " "\xfffd \xfffd " (decode [0xfc, 0x20, 0xfd, 0x20]) 88 ] 89 90test_3_3 :: Test 91test_3_3 = TestLabel "3.3 Sequences with last continuation byte missing" $ 92 TestList $ map TestCase $ 93 [ assertEqual "3.3.1, " "\xfffd " (decode [0xc0, 0x20]) 94 , assertEqual "3.3.2, " "\xfffd " (decode [0xe0, 0x80, 0x20]) 95 , assertEqual "3.3.3, " "\xfffd " (decode [0xf0, 0x80, 0x80, 0x20]) 96 , assertEqual "3.3.4, " "\xfffd " (decode [0xf8, 0x80, 0x80, 0x80, 0x20]) 97 , assertEqual "3.3.5, " "\xfffd " (decode [0xfc, 0x80, 0x80, 0x80,0x80,0x20]) 98 , assertEqual "3.3.6, " "\xfffd " (decode [0xdf, 0x20]) 99 , assertEqual "3.3.7, " "\xfffd " (decode [0xef, 0xbf, 0x20]) 100 , assertEqual "3.3.8, " "\xfffd " (decode [0xf7, 0xbf, 0xbf, 0x20]) 101 , assertEqual "3.3.9, " "\xfffd " (decode [0xfb, 0xbf, 0xbf, 0xbf, 0x20]) 102 , assertEqual "3.3.10, " "\xfffd " (decode [0xfd, 0xbf, 0xbf, 0xbf,0xbf,0x20]) 103 ] 104 105test_3_4 :: Test 106test_3_4 = TestLabel "3.4 Concatenation of incomplete sequences" $ 107 TestCase $ assertEqual "3.4, " 108 (replicate 10 '\xfffd') 109 (decode [0xc0, 0xe0, 0x80, 0xf0, 0x80, 0x80, 0xf8, 0x80, 0x80, 0x80, 110 0xfc, 0x80, 0x80, 0x80,0x80, 0xdf, 0xef, 0xbf, 0xf7, 0xbf, 0xbf, 111 0xfb, 0xbf, 0xbf, 0xbf, 0xfd, 0xbf, 0xbf, 0xbf,0xbf]) 112 113test_3_5 :: Test 114test_3_5 = TestLabel "3.5 Impossible bytes" $ 115 TestList $ map TestCase $ 116 [ assertEqual "3.5.1, " "\xfffd " (decode [0xfe, 0x20]) 117 , assertEqual "3.5.2, " "\xfffd " (decode [0xff, 0x20]) 118 , assertEqual "3.5.3, " "\xfffd\xfffd\xfffd\xfffd " 119 (decode [0xfe, 0xfe, 0xff, 0xff, 0x20]) 120 ] 121 122test_4 :: Test 123test_4 = TestLabel "4 Overlong sequences" $ 124 TestList [test_4_1, test_4_2, test_4_3] 125 126test_4_1 :: Test 127test_4_1 = TestLabel "4.1" $ TestList $ map TestCase $ 128 [ assertEqual "4.1.1, " "\xfffd " (decode [0xc0, 0xaf, 0x20]) 129 , assertEqual "4.1.2, " "\xfffd " (decode [0xe0, 0x80, 0xaf, 0x20]) 130 , assertEqual "4.1.3, " "\xfffd " (decode [0xf0, 0x80, 0x80, 0xaf, 0x20]) 131 , assertEqual "4.1.4, " "\xfffd " (decode [0xf8, 0x80, 0x80,0x80,0xaf, 0x20]) 132 , assertEqual "4.1.5, " "\xfffd " (decode[0xfc,0x80,0x80,0x80,0x80,0xaf,0x20]) 133 ] 134 135test_4_2 :: Test 136test_4_2 = TestLabel "4.2 Maximum overlong sequences" $ 137 TestList $ map TestCase $ 138 [ assertEqual "4.2.1, " "\xfffd " (decode [0xc1, 0xbf, 0x20]) 139 , assertEqual "4.2.2, " "\xfffd " (decode [0xe0, 0x9f, 0xbf, 0x20]) 140 , assertEqual "4.2.3, " "\xfffd " (decode [0xf0, 0x8f, 0xbf, 0xbf, 0x20]) 141 , assertEqual "4.2.4, " "\xfffd " (decode [0xf8, 0x87, 0xbf, 0xbf,0xbf,0x20]) 142 , assertEqual "4.2.5, " "\xfffd "(decode[0xfc,0x83,0xbf,0xbf,0xbf,0xbf,0x20]) 143 ] 144 145test_4_3 :: Test 146test_4_3 = TestLabel "4.2 Overlong NUL" $ 147 TestList $ map TestCase $ 148 [ assertEqual "4.3.1, " "\xfffd " (decode [0xc0, 0x80, 0x20]) 149 , assertEqual "4.3.2, " "\xfffd " (decode [0xe0, 0x80, 0x80, 0x20]) 150 , assertEqual "4.3.3, " "\xfffd " (decode [0xf0, 0x80, 0x80, 0x80, 0x20]) 151 , assertEqual "4.3.4, " "\xfffd " (decode [0xf8, 0x80, 0x80, 0x80,0x80,0x20]) 152 , assertEqual "4.3.5, " "\xfffd "(decode[0xfc,0x80,0x80,0x80,0x80,0x80,0x20]) 153 ] 154 155test_5 :: Test 156test_5 = TestLabel "Illegal code positions" $ 157 TestList [test_5_1, test_5_2, test_5_3] 158 159test_5_1 :: Test 160test_5_1 = TestLabel "5.1 Single UTF-16 surrogates" $ 161 TestList $ map TestCase $ 162 [ assertEqual "5.1.1, " "\xfffd " (decode [0xed,0xa0,0x80,0x20]) 163 , assertEqual "5.1.2, " "\xfffd " (decode [0xed,0xad,0xbf,0x20]) 164 , assertEqual "5.1.3, " "\xfffd " (decode [0xed,0xae,0x80,0x20]) 165 , assertEqual "5.1.4, " "\xfffd " (decode [0xed,0xaf,0xbf,0x20]) 166 , assertEqual "5.1.5, " "\xfffd " (decode [0xed,0xb0,0x80,0x20]) 167 , assertEqual "5.1.6, " "\xfffd " (decode [0xed,0xbe,0x80,0x20]) 168 , assertEqual "5.1.7, " "\xfffd " (decode [0xed,0xbf,0xbf,0x20]) 169 ] 170 171test_5_2 :: Test 172test_5_2 = TestLabel "5.2 Paired UTF-16 surrogates" $ 173 TestList $ map TestCase $ 174 [ assertEqual "5.2.1, " res (decode [0xed,0xa0,0x80,0xed,0xb0,0x80,0x20]) 175 , assertEqual "5.2.2, " res (decode [0xed,0xa0,0x80,0xed,0xbf,0xbf,0x20]) 176 , assertEqual "5.2.3, " res (decode [0xed,0xad,0xbf,0xed,0xb0,0x80,0x20]) 177 , assertEqual "5.2.4, " res (decode [0xed,0xad,0xbf,0xed,0xbf,0xbf,0x20]) 178 , assertEqual "5.2.5, " res (decode [0xed,0xae,0x80,0xed,0xb0,0x80,0x20]) 179 , assertEqual "5.2.6, " res (decode [0xed,0xae,0x80,0xed,0xbf,0xbf,0x20]) 180 , assertEqual "5.2.7, " res (decode [0xed,0xaf,0xbf,0xed,0xb0,0x80,0x20]) 181 , assertEqual "5.2.8, " res (decode [0xed,0xaf,0xbf,0xed,0xbf,0xbf,0x20]) 182 ] 183 where res = "\xfffd\xfffd " 184 185test_5_3 :: Test 186test_5_3 = TestLabel "5.3 Other illegal code positions" $ 187 TestList $ map TestCase $ 188 [ assertEqual "5.3.1, " "\xfffd " (decode [0xef, 0xbf, 0xbe, 0x20]) 189 , assertEqual "5.3.2, " "\xfffd " (decode [0xef, 0xbf, 0xbf, 0x20]) 190 ] 191 192test_6 :: Test 193test_6 = TestLabel "Encode then decode" $ 194 TestList $ map TestCase $ 195 [ assertEqual "6.1" encodeDecodeTest [] 196 ] 197 198-- 199-- test decode . encode == id for the class of chars we know that to be true of 200-- 201encodeDecodeTest :: [Char] 202encodeDecodeTest = filter (\x -> [x] /= decode (encode [x])) legal_codepoints ++ 203 filter (\x -> ['\xfffd'] /= decode (encode [x])) illegal_codepoints 204 where 205 legal_codepoints = ['\0'..'\xd7ff'] ++ ['\xe000'..'\xfffd'] ++ ['\x10000'..'\x10ffff'] 206 illegal_codepoints = '\xffff' : '\xfffe' : ['\xd800'..'\xdfff'] 207 208 209