1{-# LANGUAGE CPP #-}
2
3-- | Word8 library to be used with Data.ByteString.
4-- All function assumes that 'Word8' is encoded in Latin-1 (ISO-8859-1).
5-- All utility functions are supposed to work as if
6-- those of 'Data.Char'. Exceptions are described in
7-- the function documentations.
8--
9-- Base library 4.7 (GHC 7.8) or earlier is based on Unicode 6.
10-- Base library 4.8 (GHC 7.10) or later is based on Unicode 7.
11-- 'isLower', 'isSymbol' and 'isPunctuation' behave differently.
12
13module Data.Word8 (
14  -- * Re-exporting
15    Word8
16  -- * Character classification
17  , isControl, isSpace, isLower, isUpper
18  , isAlpha, isAlphaNum, isPrint, isDigit, isOctDigit, isHexDigit
19  , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
20  -- * Subranges
21  , isAscii, isLatin1, isAsciiUpper, isAsciiLower
22  -- * Case conversion
23  , toUpper, toLower, toTitle
24  -- * ASCII charactors
25  , _nul, _tab, _lf, _vt, _np, _cr
26  , _space, _exclam, _quotedbl, _numbersign, _dollar, _percent, _ampersand, _quotesingle, _parenleft, _parenright, _asterisk, _plus, _comma, _hyphen, _period, _slash
27  , _0, _1, _2, _3, _4, _5, _6, _7, _8, _9
28  , _colon, _semicolon, _less, _equal, _greater, _question, _at
29  , _A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z
30  , _bracketleft, _backslash, _bracketright, _circum, _underscore, _grave
31  , _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z
32  , _braceleft, _bar, _braceright, _tilde, _del
33  -- * Some Latin-1 charactors
34  , _nbsp
35  , _ordfeminine, _softhyphen, _mu, _ordmasculine
36  , _s2, _s3, _s1, _1'4, _1'2, _3'4
37  , _Agrave, _Odieresis, _Oslash, _Thorn
38  , _germandbls, _agrave, _odieresis, _oslash, _thorn, _ydieresis
39  ) where
40
41import Data.Word (Word8)
42
43#ifndef MIN_VERSION_base
44#define MIN_VERSION_base(x,y,z) 1
45#endif
46
47----------------------------------------------------------------
48
49isControl :: Word8 -> Bool
50isControl w = _nul <= w && w <= 0x1f
51           || _del <= w && w <= 0x9f
52
53isSpace :: Word8 -> Bool
54isSpace w = w == _space
55         || w == _tab
56         || w == _lf
57         || w == _cr
58         || w == _np
59         || w == _vt
60         || w == _nbsp
61
62-- | This function returns 'True' for 170 and 186 in Unicode 6.
63--   But it returns 'False' in Unicode 7.
64isLower :: Word8 -> Bool
65isLower w = isLower' w
66         || w == _mu
67#if !MIN_VERSION_base(4,8,0)
68         || w == _ordfeminine
69         || w == _ordmasculine
70#endif
71
72isLowerCommon :: Word8 -> Bool
73isLowerCommon w = isLower' w
74         || w == _mu
75         || w == _ordfeminine
76         || w == _ordmasculine
77
78isLower' :: Word8 -> Bool
79isLower' w = isAsciiLower w
80          || _germandbls <= w && w <= _odieresis
81          || _oslash     <= w && w <= _ydieresis
82
83isUpper :: Word8 -> Bool
84isUpper w = isAsciiUpper w
85         || _Agrave <= w && w <= _Odieresis
86         || _Oslash <= w && w <= _Thorn
87
88isAlpha :: Word8 -> Bool
89isAlpha w = isLowerCommon w || isUpper w
90
91isAlphaNum :: Word8 -> Bool
92isAlphaNum w = isAlpha w || isNumber w
93
94isPrint :: Word8 -> Bool
95isPrint w
96  | w == _softhyphen = False
97isPrint w = _space <= w && w <= _tilde
98         || _nbsp  <= w && w <= _ydieresis
99
100isDigit :: Word8 -> Bool
101isDigit w = _0 <= w && w <= _9
102
103isOctDigit :: Word8 -> Bool
104isOctDigit w = _0 <= w && w <= _7
105
106isHexDigit :: Word8 -> Bool
107isHexDigit w = isDigit w
108            || _A <= w && w <= _F
109            || _a <= w && w <= _f
110
111isLetter :: Word8 -> Bool
112isLetter w = isLowerCommon w || isUpper w
113
114isMark :: Word8 -> Bool
115isMark _ = False
116
117isNumber :: Word8 -> Bool
118isNumber w = isDigit w
119          || w == _s1
120          || w == _s2
121          || w == _s3
122          || w == _1'4
123          || w == _1'2
124          || w == _3'4
125
126-- | This function returns 'False' for 167 and 182 in Unicode 6.
127--   But it returns 'True' in Unicode 7.
128isPunctuation :: Word8 -> Bool
129#if MIN_VERSION_base(4,8,0)
130isPunctuation w = w `elem` [0x21,0x22,0x23,0x25,0x26,0x27,0x28,0x29,0x2a,0x2c,0x2d,0x2e,0x2f,0x3a,0x3b,0x3f,0x40,0x5b,0x5c,0x5d,0x5f,0x7b,0x7d,0xa1,0xa7,0xab,0xb6,0xb7,0xbb,0xbf]
131#else
132isPunctuation w = w `elem` [0x21,0x22,0x23,0x25,0x26,0x27,0x28,0x29,0x2a,0x2c,0x2d,0x2e,0x2f,0x3a,0x3b,0x3f,0x40,0x5b,0x5c,0x5d,0x5f,0x7b,0x7d,0xa1,0xab,0xb7,0xbb,0xbf]
133#endif
134
135-- | This function returns 'True' for 167 and 182 in Unicode 6.
136--   But it returns 'False' in Unicode 7.
137isSymbol :: Word8 -> Bool
138#if MIN_VERSION_base(4,8,0)
139isSymbol w = w `elem` [0x24,0x2b,0x3c,0x3d,0x3e,0x5e,0x60,0x7c,0x7e,0xa2,0xa3,0xa4,0xa5,0xa6,0xa8,0xa9,0xac,0xae,0xaf,0xb0,0xb1,0xb4,0xb8,0xd7,0xf7]
140#else
141isSymbol w = w `elem` [0x24,0x2b,0x3c,0x3d,0x3e,0x5e,0x60,0x7c,0x7e,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xac,0xae,0xaf,0xb0,0xb1,0xb4,0xb6,0xb8,0xd7,0xf7]
142#endif
143
144isSeparator :: Word8 -> Bool
145isSeparator w = w == _space
146             || w == _nbsp
147
148----------------------------------------------------------------
149
150isAscii :: Word8 -> Bool
151isAscii w = _nul <= w && w <= _del
152
153isLatin1 :: Word8 -> Bool
154isLatin1 _ = True
155
156isAsciiUpper :: Word8 -> Bool
157isAsciiUpper w = _A <= w && w <= _Z
158
159isAsciiLower :: Word8 -> Bool
160isAsciiLower w = _a <= w && w <= _z
161
162----------------------------------------------------------------
163
164-- | Micro sign/mu (0xb5) and small letter Y with diaeresis (0xff) remain the same.
165toUpper :: Word8 -> Word8
166toUpper w
167  | w == _germandbls = w
168  | isLower' w       = w - _space
169  | otherwise        = w
170
171toLower :: Word8 -> Word8
172toLower w
173  | isUpper w = w + _space
174  | otherwise = w
175
176-- | Micro sign/mu (0xb5) and small letter Y with diaeresis (0xff) remain the same.
177toTitle :: Word8 -> Word8
178toTitle = toUpper
179
180----------------------------------------------------------------
181
182_nul, _tab, _lf, _vt, _np, _cr :: Word8
183_nul = 0x00
184_tab = 0x09
185_lf  = 0x0a
186_vt  = 0x0b
187_np  = 0x0c
188_cr  = 0x0d
189
190_space, _exclam, _quotedbl, _numbersign, _dollar, _percent, _ampersand, _quotesingle, _parenleft, _parenright, _asterisk, _plus, _comma, _hyphen, _period, _slash :: Word8
191_space       = 0x20
192_exclam      = 0x21
193_quotedbl    = 0x22
194_numbersign  = 0x23
195_dollar      = 0x24
196_percent     = 0x25
197_ampersand   = 0x26
198_quotesingle = 0x27
199_parenleft   = 0x28
200_parenright  = 0x29
201_asterisk    = 0x2a
202_plus        = 0x2b
203_comma       = 0x2c
204_hyphen      = 0x2d
205_period      = 0x2e
206_slash       = 0x2f
207
208_0, _1, _2, _3, _4, _5, _6, _7, _8, _9 :: Word8
209_0 = 0x30
210_1 = 0x31
211_2 = 0x32
212_3 = 0x33
213_4 = 0x34
214_5 = 0x35
215_6 = 0x36
216_7 = 0x37
217_8 = 0x38
218_9 = 0x39
219
220_colon, _semicolon, _less, _equal, _greater, _question, _at :: Word8
221_colon      = 0x3a
222_semicolon  = 0x3b
223_less       = 0x3c
224_equal      = 0x3d
225_greater    = 0x3e
226_question   = 0x3f
227_at         = 0x40
228
229_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z :: Word8
230_A = 0x41
231_B = 0x42
232_C = 0x43
233_D = 0x44
234_E = 0x45
235_F = 0x46
236_G = 0x47
237_H = 0x48
238_I = 0x49
239_J = 0x4a
240_K = 0x4b
241_L = 0x4c
242_M = 0x4d
243_N = 0x4e
244_O = 0x4f
245_P = 0x50
246_Q = 0x51
247_R = 0x52
248_S = 0x53
249_T = 0x54
250_U = 0x55
251_V = 0x56
252_W = 0x57
253_X = 0x58
254_Y = 0x59
255_Z = 0x5a
256
257_bracketleft, _backslash, _bracketright, _circum, _underscore, _grave :: Word8
258_bracketleft   = 0x5b
259_backslash    = 0x5c
260_bracketright = 0x5d
261_circum       = 0x5e
262_underscore   = 0x5f
263_grave        = 0x60
264
265_a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z :: Word8
266_a = 0x61
267_b = 0x62
268_c = 0x63
269_d = 0x64
270_e = 0x65
271_f = 0x66
272_g = 0x67
273_h = 0x68
274_i = 0x69
275_j = 0x6a
276_k = 0x6b
277_l = 0x6c
278_m = 0x6d
279_n = 0x6e
280_o = 0x6f
281_p = 0x70
282_q = 0x71
283_r = 0x72
284_s = 0x73
285_t = 0x74
286_u = 0x75
287_v = 0x76
288_w = 0x77
289_x = 0x78
290_y = 0x79
291_z = 0x7a
292
293_braceleft, _bar, _braceright, _tilde, _del :: Word8
294_braceleft  = 0x7b
295_bar        = 0x7c
296_braceright = 0x7d
297_tilde      = 0x7e
298_del        = 0x7f
299
300_nbsp :: Word8
301_nbsp = 0xa0
302
303_ordfeminine, _softhyphen, _mu, _ordmasculine :: Word8
304_ordfeminine  = 0xaa
305_softhyphen   = 0xad
306_mu           = 0xb5
307_ordmasculine = 0xba
308
309_s2, _s3, _s1, _1'4, _1'2, _3'4  :: Word8
310_s2 = 0xb2
311_s3 = 0xb3
312_s1 = 0xb9
313_1'4 = 0xbc
314_1'2 = 0xbd
315_3'4 = 0xbe
316
317_Agrave, _Odieresis, _Oslash, _Thorn :: Word8
318_Agrave    = 0xc0
319_Odieresis = 0xd6
320_Oslash    = 0xd8
321_Thorn     = 0xde
322
323_germandbls, _agrave, _odieresis, _oslash, _thorn, _ydieresis :: Word8
324_germandbls = 0xdf
325_agrave     = 0xe0
326_odieresis  = 0xf6
327_oslash     = 0xf8
328_thorn      = 0xfe
329_ydieresis  = 0xff
330