1{-# LANGUAGE CPP #-} 2 3-- | Word8 library to be used with Data.ByteString. 4-- All function assumes that 'Word8' is encoded in Latin-1 (ISO-8859-1). 5-- All utility functions are supposed to work as if 6-- those of 'Data.Char'. Exceptions are described in 7-- the function documentations. 8-- 9-- Base library 4.7 (GHC 7.8) or earlier is based on Unicode 6. 10-- Base library 4.8 (GHC 7.10) or later is based on Unicode 7. 11-- 'isLower', 'isSymbol' and 'isPunctuation' behave differently. 12 13module Data.Word8 ( 14 -- * Re-exporting 15 Word8 16 -- * Character classification 17 , isControl, isSpace, isLower, isUpper 18 , isAlpha, isAlphaNum, isPrint, isDigit, isOctDigit, isHexDigit 19 , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator 20 -- * Subranges 21 , isAscii, isLatin1, isAsciiUpper, isAsciiLower 22 -- * Case conversion 23 , toUpper, toLower, toTitle 24 -- * ASCII charactors 25 , _nul, _tab, _lf, _vt, _np, _cr 26 , _space, _exclam, _quotedbl, _numbersign, _dollar, _percent, _ampersand, _quotesingle, _parenleft, _parenright, _asterisk, _plus, _comma, _hyphen, _period, _slash 27 , _0, _1, _2, _3, _4, _5, _6, _7, _8, _9 28 , _colon, _semicolon, _less, _equal, _greater, _question, _at 29 , _A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z 30 , _bracketleft, _backslash, _bracketright, _circum, _underscore, _grave 31 , _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z 32 , _braceleft, _bar, _braceright, _tilde, _del 33 -- * Some Latin-1 charactors 34 , _nbsp 35 , _ordfeminine, _softhyphen, _mu, _ordmasculine 36 , _s2, _s3, _s1, _1'4, _1'2, _3'4 37 , _Agrave, _Odieresis, _Oslash, _Thorn 38 , _germandbls, _agrave, _odieresis, _oslash, _thorn, _ydieresis 39 ) where 40 41import Data.Word (Word8) 42 43#ifndef MIN_VERSION_base 44#define MIN_VERSION_base(x,y,z) 1 45#endif 46 47---------------------------------------------------------------- 48 49isControl :: Word8 -> Bool 50isControl w = _nul <= w && w <= 0x1f 51 || _del <= w && w <= 0x9f 52 53isSpace :: Word8 -> Bool 54isSpace w = w == _space 55 || w == _tab 56 || w == _lf 57 || w == _cr 58 || w == _np 59 || w == _vt 60 || w == _nbsp 61 62-- | This function returns 'True' for 170 and 186 in Unicode 6. 63-- But it returns 'False' in Unicode 7. 64isLower :: Word8 -> Bool 65isLower w = isLower' w 66 || w == _mu 67#if !MIN_VERSION_base(4,8,0) 68 || w == _ordfeminine 69 || w == _ordmasculine 70#endif 71 72isLowerCommon :: Word8 -> Bool 73isLowerCommon w = isLower' w 74 || w == _mu 75 || w == _ordfeminine 76 || w == _ordmasculine 77 78isLower' :: Word8 -> Bool 79isLower' w = isAsciiLower w 80 || _germandbls <= w && w <= _odieresis 81 || _oslash <= w && w <= _ydieresis 82 83isUpper :: Word8 -> Bool 84isUpper w = isAsciiUpper w 85 || _Agrave <= w && w <= _Odieresis 86 || _Oslash <= w && w <= _Thorn 87 88isAlpha :: Word8 -> Bool 89isAlpha w = isLowerCommon w || isUpper w 90 91isAlphaNum :: Word8 -> Bool 92isAlphaNum w = isAlpha w || isNumber w 93 94isPrint :: Word8 -> Bool 95isPrint w 96 | w == _softhyphen = False 97isPrint w = _space <= w && w <= _tilde 98 || _nbsp <= w && w <= _ydieresis 99 100isDigit :: Word8 -> Bool 101isDigit w = _0 <= w && w <= _9 102 103isOctDigit :: Word8 -> Bool 104isOctDigit w = _0 <= w && w <= _7 105 106isHexDigit :: Word8 -> Bool 107isHexDigit w = isDigit w 108 || _A <= w && w <= _F 109 || _a <= w && w <= _f 110 111isLetter :: Word8 -> Bool 112isLetter w = isLowerCommon w || isUpper w 113 114isMark :: Word8 -> Bool 115isMark _ = False 116 117isNumber :: Word8 -> Bool 118isNumber w = isDigit w 119 || w == _s1 120 || w == _s2 121 || w == _s3 122 || w == _1'4 123 || w == _1'2 124 || w == _3'4 125 126-- | This function returns 'False' for 167 and 182 in Unicode 6. 127-- But it returns 'True' in Unicode 7. 128isPunctuation :: Word8 -> Bool 129#if MIN_VERSION_base(4,8,0) 130isPunctuation w = w `elem` [0x21,0x22,0x23,0x25,0x26,0x27,0x28,0x29,0x2a,0x2c,0x2d,0x2e,0x2f,0x3a,0x3b,0x3f,0x40,0x5b,0x5c,0x5d,0x5f,0x7b,0x7d,0xa1,0xa7,0xab,0xb6,0xb7,0xbb,0xbf] 131#else 132isPunctuation w = w `elem` [0x21,0x22,0x23,0x25,0x26,0x27,0x28,0x29,0x2a,0x2c,0x2d,0x2e,0x2f,0x3a,0x3b,0x3f,0x40,0x5b,0x5c,0x5d,0x5f,0x7b,0x7d,0xa1,0xab,0xb7,0xbb,0xbf] 133#endif 134 135-- | This function returns 'True' for 167 and 182 in Unicode 6. 136-- But it returns 'False' in Unicode 7. 137isSymbol :: Word8 -> Bool 138#if MIN_VERSION_base(4,8,0) 139isSymbol w = w `elem` [0x24,0x2b,0x3c,0x3d,0x3e,0x5e,0x60,0x7c,0x7e,0xa2,0xa3,0xa4,0xa5,0xa6,0xa8,0xa9,0xac,0xae,0xaf,0xb0,0xb1,0xb4,0xb8,0xd7,0xf7] 140#else 141isSymbol w = w `elem` [0x24,0x2b,0x3c,0x3d,0x3e,0x5e,0x60,0x7c,0x7e,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xac,0xae,0xaf,0xb0,0xb1,0xb4,0xb6,0xb8,0xd7,0xf7] 142#endif 143 144isSeparator :: Word8 -> Bool 145isSeparator w = w == _space 146 || w == _nbsp 147 148---------------------------------------------------------------- 149 150isAscii :: Word8 -> Bool 151isAscii w = _nul <= w && w <= _del 152 153isLatin1 :: Word8 -> Bool 154isLatin1 _ = True 155 156isAsciiUpper :: Word8 -> Bool 157isAsciiUpper w = _A <= w && w <= _Z 158 159isAsciiLower :: Word8 -> Bool 160isAsciiLower w = _a <= w && w <= _z 161 162---------------------------------------------------------------- 163 164-- | Micro sign/mu (0xb5) and small letter Y with diaeresis (0xff) remain the same. 165toUpper :: Word8 -> Word8 166toUpper w 167 | w == _germandbls = w 168 | isLower' w = w - _space 169 | otherwise = w 170 171toLower :: Word8 -> Word8 172toLower w 173 | isUpper w = w + _space 174 | otherwise = w 175 176-- | Micro sign/mu (0xb5) and small letter Y with diaeresis (0xff) remain the same. 177toTitle :: Word8 -> Word8 178toTitle = toUpper 179 180---------------------------------------------------------------- 181 182_nul, _tab, _lf, _vt, _np, _cr :: Word8 183_nul = 0x00 184_tab = 0x09 185_lf = 0x0a 186_vt = 0x0b 187_np = 0x0c 188_cr = 0x0d 189 190_space, _exclam, _quotedbl, _numbersign, _dollar, _percent, _ampersand, _quotesingle, _parenleft, _parenright, _asterisk, _plus, _comma, _hyphen, _period, _slash :: Word8 191_space = 0x20 192_exclam = 0x21 193_quotedbl = 0x22 194_numbersign = 0x23 195_dollar = 0x24 196_percent = 0x25 197_ampersand = 0x26 198_quotesingle = 0x27 199_parenleft = 0x28 200_parenright = 0x29 201_asterisk = 0x2a 202_plus = 0x2b 203_comma = 0x2c 204_hyphen = 0x2d 205_period = 0x2e 206_slash = 0x2f 207 208_0, _1, _2, _3, _4, _5, _6, _7, _8, _9 :: Word8 209_0 = 0x30 210_1 = 0x31 211_2 = 0x32 212_3 = 0x33 213_4 = 0x34 214_5 = 0x35 215_6 = 0x36 216_7 = 0x37 217_8 = 0x38 218_9 = 0x39 219 220_colon, _semicolon, _less, _equal, _greater, _question, _at :: Word8 221_colon = 0x3a 222_semicolon = 0x3b 223_less = 0x3c 224_equal = 0x3d 225_greater = 0x3e 226_question = 0x3f 227_at = 0x40 228 229_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z :: Word8 230_A = 0x41 231_B = 0x42 232_C = 0x43 233_D = 0x44 234_E = 0x45 235_F = 0x46 236_G = 0x47 237_H = 0x48 238_I = 0x49 239_J = 0x4a 240_K = 0x4b 241_L = 0x4c 242_M = 0x4d 243_N = 0x4e 244_O = 0x4f 245_P = 0x50 246_Q = 0x51 247_R = 0x52 248_S = 0x53 249_T = 0x54 250_U = 0x55 251_V = 0x56 252_W = 0x57 253_X = 0x58 254_Y = 0x59 255_Z = 0x5a 256 257_bracketleft, _backslash, _bracketright, _circum, _underscore, _grave :: Word8 258_bracketleft = 0x5b 259_backslash = 0x5c 260_bracketright = 0x5d 261_circum = 0x5e 262_underscore = 0x5f 263_grave = 0x60 264 265_a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z :: Word8 266_a = 0x61 267_b = 0x62 268_c = 0x63 269_d = 0x64 270_e = 0x65 271_f = 0x66 272_g = 0x67 273_h = 0x68 274_i = 0x69 275_j = 0x6a 276_k = 0x6b 277_l = 0x6c 278_m = 0x6d 279_n = 0x6e 280_o = 0x6f 281_p = 0x70 282_q = 0x71 283_r = 0x72 284_s = 0x73 285_t = 0x74 286_u = 0x75 287_v = 0x76 288_w = 0x77 289_x = 0x78 290_y = 0x79 291_z = 0x7a 292 293_braceleft, _bar, _braceright, _tilde, _del :: Word8 294_braceleft = 0x7b 295_bar = 0x7c 296_braceright = 0x7d 297_tilde = 0x7e 298_del = 0x7f 299 300_nbsp :: Word8 301_nbsp = 0xa0 302 303_ordfeminine, _softhyphen, _mu, _ordmasculine :: Word8 304_ordfeminine = 0xaa 305_softhyphen = 0xad 306_mu = 0xb5 307_ordmasculine = 0xba 308 309_s2, _s3, _s1, _1'4, _1'2, _3'4 :: Word8 310_s2 = 0xb2 311_s3 = 0xb3 312_s1 = 0xb9 313_1'4 = 0xbc 314_1'2 = 0xbd 315_3'4 = 0xbe 316 317_Agrave, _Odieresis, _Oslash, _Thorn :: Word8 318_Agrave = 0xc0 319_Odieresis = 0xd6 320_Oslash = 0xd8 321_Thorn = 0xde 322 323_germandbls, _agrave, _odieresis, _oslash, _thorn, _ydieresis :: Word8 324_germandbls = 0xdf 325_agrave = 0xe0 326_odieresis = 0xf6 327_oslash = 0xf8 328_thorn = 0xfe 329_ydieresis = 0xff 330