1-- ------------------------------------------------------------ 2 3{- | 4 Module : Data.Char.Properties.XMLCharProps 5 Copyright : Copyright (C) 2010 - Uwe Schmidt 6 License : MIT 7 8 Maintainer : Uwe Schmidt (uwe@fh-wedel.de) 9 Stability : stable 10 Portability: portable 11 12 XML character properties 13 14-} 15 16-- ------------------------------------------------------------ 17 18module Data.Char.Properties.XMLCharProps 19 ( isXmlChar 20 , isXmlCharCR 21 , isXml1ByteChar 22 , isXmlLatin1Char 23 , isXmlSpaceChar 24 , isXmlSpaceCharCR 25 , isXml11SpaceChar 26 , isXmlNameChar 27 , isXmlNameStartChar 28 , isXmlNCNameChar 29 , isXmlNCNameStartChar 30 , isXmlPubidChar 31 , isXmlLetter 32 , isXmlBaseChar 33 , isXmlIdeographicChar 34 , isXmlCombiningChar 35 , isXmlDigit 36 , isXmlExtender 37 , isXmlControlOrPermanentlyUndefined 38 39 , charPropXmlChar 40 , charPropXmlCharCR 41 , charPropXml1ByteChar 42 , charPropXmlLatin1Char 43 , charPropXmlSpaceChar 44 , charPropXmlSpaceCharCR 45 , charPropXml11SpaceChar 46 , charPropXmlNameChar 47 , charPropXmlNameStartChar 48 , charPropXmlNCNameChar 49 , charPropXmlNCNameStartChar 50 , charPropXmlPubidChar 51 , charPropXmlLetter 52 , charPropXmlBaseChar 53 , charPropXmlIdeographicChar 54 , charPropXmlCombiningChar 55 , charPropXmlDigit 56 , charPropXmlExtender 57 , charPropXmlControlOrPermanentlyUndefined 58 ) 59where 60 61import Data.Set.CharSet 62 63-- ------------------------------------------------------------ 64 65-- | 66-- checking for valid XML characters 67 68isXmlChar :: Char -> Bool 69isXmlChar c -- optimized 70 = ( c >= ' ' && c <= '\55295' ) 71 || 72 c `elem` ['\n', '\t', '\r'] 73 || 74 ( c >= '\57344' 75 && 76 ( c <= '\65533' 77 || 78 c >= '\65536' && c <= '\1114111' 79 ) 80 ) 81 82{- old 83isXmlChar c = c `elemCS` charPropXmlChar 84-} 85 86{-# INLINE isXmlChar #-} 87 88charPropXmlChar :: CharSet 89charPropXmlChar 90 = [ ('\x0009', '\x000A') 91 , ('\x000D', '\x000D') 92 , ('\x0020', '\xD7FF') 93 , ('\xE000', '\xFFFD') 94 , ('\x10000', '\x10FFFF') 95 ] 96 97-- | 98-- checking for valid XML characters, except CR 99 100isXmlCharCR :: Char -> Bool 101isXmlCharCR c -- optimized 102 = ( c >= ' ' && c <= '\55295' ) 103 || 104 c `elem` ['\n', '\t'] 105 || 106 ( c >= '\57344' 107 && 108 ( c <= '\65533' 109 || 110 c >= '\65536' && c <= '\1114111' 111 ) 112 ) 113 114{- old 115isXmlCharCR c = c `elemCS` charPropXmlCharCR 116-} 117 118{-# INLINE isXmlCharCR #-} 119 120charPropXmlCharCR :: CharSet 121charPropXmlCharCR 122 = [ ('\x0009', '\x000A') 123 , ('\x0020', '\xD7FF') 124 , ('\xE000', '\xFFFD') 125 , ('\x10000', '\x10FFFF') 126 ] 127 128-- | 129-- check for a legal 1 byte XML char 130 131isXml1ByteChar :: Char -> Bool 132isXml1ByteChar c = c `elemCS` charPropXml1ByteChar 133{-# INLINE isXml1ByteChar #-} 134 135charPropXml1ByteChar :: CharSet 136charPropXml1ByteChar 137 = stringCS ['\x09', '\x0A', '\x0D'] 138 `unionCS` 139 [ ('\x20', '\x7F') ] 140 141-- | 142-- test for a legal latin1 XML char 143 144isXmlLatin1Char :: Char -> Bool 145isXmlLatin1Char c = c `elemCS` charPropXmlLatin1Char 146{-# INLINE isXmlLatin1Char #-} 147 148charPropXmlLatin1Char :: CharSet 149charPropXmlLatin1Char 150 = charPropXml1ByteChar 151 `unionCS` 152 [ ('\x80', '\xFF') ] 153 154-- | 155-- checking for XML space character: \\\n, \\\r, \\\t and \" \" 156 157isXmlSpaceChar :: Char -> Bool 158isXmlSpaceChar c 159 = c == ' ' 160 || 161 c == '\n' 162 || 163 c == '\t' 164 || 165 c == '\r' 166 167{- old 168isXmlSpaceChar c = c `elemCS` charPropXmlSpaceChar 169-} 170{-# INLINE isXmlSpaceChar #-} 171 172charPropXmlSpaceChar :: CharSet 173charPropXmlSpaceChar 174 = stringCS ['\x20', '\x09', '\x0D', '\x0A'] 175 176-- | 177-- checking for XML space character: \\\n, \\\t and \" \" 178 179isXmlSpaceCharCR :: Char -> Bool 180isXmlSpaceCharCR c 181 = c == ' ' 182 || 183 c == '\n' 184 || 185 c == '\t' 186 187{- old 188isXmlSpaceCharCR c = c `elemCS` charPropXmlSpaceCharCR 189-} 190{-# INLINE isXmlSpaceCharCR #-} 191 192charPropXmlSpaceCharCR :: CharSet 193charPropXmlSpaceCharCR 194 = stringCS ['\x20', '\x09', '\x0A'] 195 196-- | 197-- checking for XML1.1 space character: additional space 0x85 and 0x2028 198-- 199-- see also : 'isXmlSpaceChar' 200 201isXml11SpaceChar :: Char -> Bool 202isXml11SpaceChar c = c `elemCS` charPropXml11SpaceChar 203 204charPropXml11SpaceChar :: CharSet 205charPropXml11SpaceChar 206 = stringCS ['\x09', '\x0A', '\x0D', '\x20', '\x85', '\x2028'] 207 208-- | 209-- checking for XML name character 210 211isXmlNameChar :: Char -> Bool 212isXmlNameChar c -- optimized for ASCII chars 213 | c <= 'z' 214 = c >= 'a' 215 || 216 ( c >= 'A' && c <= 'Z' ) 217 || 218 ( c >= '0' && c <= '9' ) 219 || 220 c `elem` ['-', '.', ':', '_'] 221 | c >= '\183' 222 = c `elemCS` charPropXmlNameChar 223 | otherwise 224 = False 225{-# INLINE isXmlNameChar #-} 226 227charPropXmlNameChar :: CharSet 228charPropXmlNameChar 229 = charPropXmlLetter 230 `unionCS` 231 charPropXmlDigit 232 `unionCS` 233 (singleCS '\x2D' `unionCS` singleCS '\x2E') -- '-' | '.' 234 `unionCS` 235 (singleCS '\x3A' `unionCS` singleCS '\x5F') -- Letter | ':' | '_' 236 `unionCS` 237 charPropXmlCombiningChar 238 `unionCS` 239 charPropXmlExtender 240 241-- | 242-- checking for XML name start character 243-- 244-- see also : 'isXmlNameChar' 245 246isXmlNameStartChar :: Char -> Bool 247isXmlNameStartChar c -- optimized for ASCII chars 248 | c <= 'z' 249 = c >= 'a' 250 || 251 ( c >= 'A' && c <= 'Z' ) 252 || 253 c `elem` [':', '_'] 254 | c >= '\192' 255 = c `elemCS` charPropXmlNameStartChar 256 | otherwise 257 = False 258{-# INLINE isXmlNameStartChar #-} 259 260charPropXmlNameStartChar :: CharSet 261charPropXmlNameStartChar 262 = charPropXmlLetter 263 `unionCS` 264 singleCS '\x3A' 265 `unionCS` 266 singleCS '\x5F' -- Letter | ':' | '_' 267 268-- | 269-- checking for XML NCName character: no \":\" allowed 270-- 271-- see also : 'isXmlNameChar' 272 273isXmlNCNameChar :: Char -> Bool 274isXmlNCNameChar c -- optimized for ASCII chars 275 | c <= 'z' 276 = c >= 'a' 277 || 278 ( c >= 'A' && c <= 'Z' ) 279 || 280 ( c >= '0' && c <= '9' ) 281 || 282 c `elem` ['-', '.', '_'] 283 | c >= '\183' 284 = c `elemCS` charPropXmlNameChar 285 | otherwise 286 = False 287{-# INLINE isXmlNCNameChar #-} 288 289charPropXmlNCNameChar :: CharSet 290charPropXmlNCNameChar 291 = charPropXmlNameChar 292 `diffCS` 293 singleCS '\x3A' -- no : 294 295-- | 296-- checking for XML NCName start character: no \":\" allowed 297-- 298-- see also : 'isXmlNameChar', 'isXmlNCNameChar' 299 300isXmlNCNameStartChar :: Char -> Bool 301isXmlNCNameStartChar c -- optimized for ASCII chars 302 | c <= 'z' 303 = c >= 'a' 304 || 305 ( c >= 'A' && c <= 'Z' ) 306 || 307 c == '_' 308 | c >= '\192' 309 = c `elemCS` charPropXmlNameStartChar 310 | otherwise 311 = False 312{-# INLINE isXmlNCNameStartChar #-} 313 314charPropXmlNCNameStartChar :: CharSet 315charPropXmlNCNameStartChar 316 = charPropXmlNameStartChar 317 `diffCS` 318 singleCS '\x3A' -- no : 319 320-- | 321-- checking for XML public id character 322 323isXmlPubidChar :: Char -> Bool 324isXmlPubidChar c = c `elemCS` charPropXmlPubidChar 325 326charPropXmlPubidChar :: CharSet 327charPropXmlPubidChar 328 = rangeCS '0' '9' 329 `unionCS` 330 rangeCS 'A' 'Z' 331 `unionCS` 332 rangeCS 'a' 'z' 333 `unionCS` 334 stringCS " \r\n-'()+,./:=?;!*#@$_%" 335 336-- | 337-- checking for XML letter 338 339isXmlLetter :: Char -> Bool 340isXmlLetter c = c `elemCS` charPropXmlLetter 341{-# INLINE isXmlLetter #-} 342 343charPropXmlLetter :: CharSet 344charPropXmlLetter 345 = charPropXmlBaseChar 346 `unionCS` 347 charPropXmlIdeographicChar 348 349-- | 350-- checking for XML base charater 351 352isXmlBaseChar :: Char -> Bool 353isXmlBaseChar c = c `elemCS` charPropXmlBaseChar 354 355charPropXmlBaseChar :: CharSet 356charPropXmlBaseChar 357 = [ ('\x0041', '\x005A') 358 , ('\x0061', '\x007A') 359 , ('\x00C0', '\x00D6') 360 , ('\x00D8', '\x00F6') 361 , ('\x00F8', '\x0131') 362 , ('\x0134', '\x013E') 363 , ('\x0141', '\x0148') 364 , ('\x014A', '\x017E') 365 , ('\x0180', '\x01C3') 366 , ('\x01CD', '\x01F0') 367 , ('\x01F4', '\x01F5') 368 , ('\x01FA', '\x0217') 369 , ('\x0250', '\x02A8') 370 , ('\x02BB', '\x02C1') 371 , ('\x0386', '\x0386') 372 , ('\x0388', '\x038A') 373 , ('\x038C', '\x038C') 374 , ('\x038E', '\x03A1') 375 , ('\x03A3', '\x03CE') 376 , ('\x03D0', '\x03D6') 377 , ('\x03DA', '\x03DA') 378 , ('\x03DC', '\x03DC') 379 , ('\x03DE', '\x03DE') 380 , ('\x03E0', '\x03E0') 381 , ('\x03E2', '\x03F3') 382 , ('\x0401', '\x040C') 383 , ('\x040E', '\x044F') 384 , ('\x0451', '\x045C') 385 , ('\x045E', '\x0481') 386 , ('\x0490', '\x04C4') 387 , ('\x04C7', '\x04C8') 388 , ('\x04CB', '\x04CC') 389 , ('\x04D0', '\x04EB') 390 , ('\x04EE', '\x04F5') 391 , ('\x04F8', '\x04F9') 392 , ('\x0531', '\x0556') 393 , ('\x0559', '\x0559') 394 , ('\x0561', '\x0586') 395 , ('\x05D0', '\x05EA') 396 , ('\x05F0', '\x05F2') 397 , ('\x0621', '\x063A') 398 , ('\x0641', '\x064A') 399 , ('\x0671', '\x06B7') 400 , ('\x06BA', '\x06BE') 401 , ('\x06C0', '\x06CE') 402 , ('\x06D0', '\x06D3') 403 , ('\x06D5', '\x06D5') 404 , ('\x06E5', '\x06E6') 405 , ('\x0905', '\x0939') 406 , ('\x093D', '\x093D') 407 , ('\x0958', '\x0961') 408 , ('\x0985', '\x098C') 409 , ('\x098F', '\x0990') 410 , ('\x0993', '\x09A8') 411 , ('\x09AA', '\x09B0') 412 , ('\x09B2', '\x09B2') 413 , ('\x09B6', '\x09B9') 414 , ('\x09DC', '\x09DD') 415 , ('\x09DF', '\x09E1') 416 , ('\x09F0', '\x09F1') 417 , ('\x0A05', '\x0A0A') 418 , ('\x0A0F', '\x0A10') 419 , ('\x0A13', '\x0A28') 420 , ('\x0A2A', '\x0A30') 421 , ('\x0A32', '\x0A33') 422 , ('\x0A35', '\x0A36') 423 , ('\x0A38', '\x0A39') 424 , ('\x0A59', '\x0A5C') 425 , ('\x0A5E', '\x0A5E') 426 , ('\x0A72', '\x0A74') 427 , ('\x0A85', '\x0A8B') 428 , ('\x0A8D', '\x0A8D') 429 , ('\x0A8F', '\x0A91') 430 , ('\x0A93', '\x0AA8') 431 , ('\x0AAA', '\x0AB0') 432 , ('\x0AB2', '\x0AB3') 433 , ('\x0AB5', '\x0AB9') 434 , ('\x0ABD', '\x0ABD') 435 , ('\x0AE0', '\x0AE0') 436 , ('\x0B05', '\x0B0C') 437 , ('\x0B0F', '\x0B10') 438 , ('\x0B13', '\x0B28') 439 , ('\x0B2A', '\x0B30') 440 , ('\x0B32', '\x0B33') 441 , ('\x0B36', '\x0B39') 442 , ('\x0B3D', '\x0B3D') 443 , ('\x0B5C', '\x0B5D') 444 , ('\x0B5F', '\x0B61') 445 , ('\x0B85', '\x0B8A') 446 , ('\x0B8E', '\x0B90') 447 , ('\x0B92', '\x0B95') 448 , ('\x0B99', '\x0B9A') 449 , ('\x0B9C', '\x0B9C') 450 , ('\x0B9E', '\x0B9F') 451 , ('\x0BA3', '\x0BA4') 452 , ('\x0BA8', '\x0BAA') 453 , ('\x0BAE', '\x0BB5') 454 , ('\x0BB7', '\x0BB9') 455 , ('\x0C05', '\x0C0C') 456 , ('\x0C0E', '\x0C10') 457 , ('\x0C12', '\x0C28') 458 , ('\x0C2A', '\x0C33') 459 , ('\x0C35', '\x0C39') 460 , ('\x0C60', '\x0C61') 461 , ('\x0C85', '\x0C8C') 462 , ('\x0C8E', '\x0C90') 463 , ('\x0C92', '\x0CA8') 464 , ('\x0CAA', '\x0CB3') 465 , ('\x0CB5', '\x0CB9') 466 , ('\x0CDE', '\x0CDE') 467 , ('\x0CE0', '\x0CE1') 468 , ('\x0D05', '\x0D0C') 469 , ('\x0D0E', '\x0D10') 470 , ('\x0D12', '\x0D28') 471 , ('\x0D2A', '\x0D39') 472 , ('\x0D60', '\x0D61') 473 , ('\x0E01', '\x0E2E') 474 , ('\x0E30', '\x0E30') 475 , ('\x0E32', '\x0E33') 476 , ('\x0E40', '\x0E45') 477 , ('\x0E81', '\x0E82') 478 , ('\x0E84', '\x0E84') 479 , ('\x0E87', '\x0E88') 480 , ('\x0E8A', '\x0E8A') 481 , ('\x0E8D', '\x0E8D') 482 , ('\x0E94', '\x0E97') 483 , ('\x0E99', '\x0E9F') 484 , ('\x0EA1', '\x0EA3') 485 , ('\x0EA5', '\x0EA5') 486 , ('\x0EA7', '\x0EA7') 487 , ('\x0EAA', '\x0EAB') 488 , ('\x0EAD', '\x0EAE') 489 , ('\x0EB0', '\x0EB0') 490 , ('\x0EB2', '\x0EB3') 491 , ('\x0EBD', '\x0EBD') 492 , ('\x0EC0', '\x0EC4') 493 , ('\x0F40', '\x0F47') 494 , ('\x0F49', '\x0F69') 495 , ('\x10A0', '\x10C5') 496 , ('\x10D0', '\x10F6') 497 , ('\x1100', '\x1100') 498 , ('\x1102', '\x1103') 499 , ('\x1105', '\x1107') 500 , ('\x1109', '\x1109') 501 , ('\x110B', '\x110C') 502 , ('\x110E', '\x1112') 503 , ('\x113C', '\x113C') 504 , ('\x113E', '\x113E') 505 , ('\x1140', '\x1140') 506 , ('\x114C', '\x114C') 507 , ('\x114E', '\x114E') 508 , ('\x1150', '\x1150') 509 , ('\x1154', '\x1155') 510 , ('\x1159', '\x1159') 511 , ('\x115F', '\x1161') 512 , ('\x1163', '\x1163') 513 , ('\x1165', '\x1165') 514 , ('\x1167', '\x1167') 515 , ('\x1169', '\x1169') 516 , ('\x116D', '\x116E') 517 , ('\x1172', '\x1173') 518 , ('\x1175', '\x1175') 519 , ('\x119E', '\x119E') 520 , ('\x11A8', '\x11A8') 521 , ('\x11AB', '\x11AB') 522 , ('\x11AE', '\x11AF') 523 , ('\x11B7', '\x11B8') 524 , ('\x11BA', '\x11BA') 525 , ('\x11BC', '\x11C2') 526 , ('\x11EB', '\x11EB') 527 , ('\x11F0', '\x11F0') 528 , ('\x11F9', '\x11F9') 529 , ('\x1E00', '\x1E9B') 530 , ('\x1EA0', '\x1EF9') 531 , ('\x1F00', '\x1F15') 532 , ('\x1F18', '\x1F1D') 533 , ('\x1F20', '\x1F45') 534 , ('\x1F48', '\x1F4D') 535 , ('\x1F50', '\x1F57') 536 , ('\x1F59', '\x1F59') 537 , ('\x1F5B', '\x1F5B') 538 , ('\x1F5D', '\x1F5D') 539 , ('\x1F5F', '\x1F7D') 540 , ('\x1F80', '\x1FB4') 541 , ('\x1FB6', '\x1FBC') 542 , ('\x1FBE', '\x1FBE') 543 , ('\x1FC2', '\x1FC4') 544 , ('\x1FC6', '\x1FCC') 545 , ('\x1FD0', '\x1FD3') 546 , ('\x1FD6', '\x1FDB') 547 , ('\x1FE0', '\x1FEC') 548 , ('\x1FF2', '\x1FF4') 549 , ('\x1FF6', '\x1FFC') 550 , ('\x2126', '\x2126') 551 , ('\x212A', '\x212B') 552 , ('\x212E', '\x212E') 553 , ('\x2180', '\x2182') 554 , ('\x3041', '\x3094') 555 , ('\x30A1', '\x30FA') 556 , ('\x3105', '\x312C') 557 , ('\xAC00', '\xD7A3') 558 ] 559 560-- | 561-- checking for XML ideographic charater 562 563isXmlIdeographicChar :: Char -> Bool 564isXmlIdeographicChar c = c `elemCS` charPropXmlIdeographicChar 565{-# INLINE isXmlIdeographicChar #-} 566 567charPropXmlIdeographicChar :: CharSet 568charPropXmlIdeographicChar 569 = [ ('\x3007', '\x3007') 570 , ('\x3021', '\x3029') 571 , ('\x4E00', '\x9FA5') 572 ] 573 574-- | 575-- checking for XML combining charater 576 577isXmlCombiningChar :: Char -> Bool 578isXmlCombiningChar c = c `elemCS` charPropXmlCombiningChar 579 580charPropXmlCombiningChar :: CharSet 581charPropXmlCombiningChar 582 = [ ('\x0300', '\x0345') 583 , ('\x0360', '\x0361') 584 , ('\x0483', '\x0486') 585 , ('\x0591', '\x05A1') 586 , ('\x05A3', '\x05B9') 587 , ('\x05BB', '\x05BD') 588 , ('\x05BF', '\x05BF') 589 , ('\x05C1', '\x05C2') 590 , ('\x05C4', '\x05C4') 591 , ('\x064B', '\x0652') 592 , ('\x0670', '\x0670') 593 , ('\x06D6', '\x06DC') 594 , ('\x06DD', '\x06DF') 595 , ('\x06E0', '\x06E4') 596 , ('\x06E7', '\x06E8') 597 , ('\x06EA', '\x06ED') 598 , ('\x0901', '\x0903') 599 , ('\x093C', '\x093C') 600 , ('\x093E', '\x094C') 601 , ('\x094D', '\x094D') 602 , ('\x0951', '\x0954') 603 , ('\x0962', '\x0963') 604 , ('\x0981', '\x0983') 605 , ('\x09BC', '\x09BC') 606 , ('\x09BE', '\x09BE') 607 , ('\x09BF', '\x09BF') 608 , ('\x09C0', '\x09C4') 609 , ('\x09C7', '\x09C8') 610 , ('\x09CB', '\x09CD') 611 , ('\x09D7', '\x09D7') 612 , ('\x09E2', '\x09E3') 613 , ('\x0A02', '\x0A02') 614 , ('\x0A3C', '\x0A3C') 615 , ('\x0A3E', '\x0A3E') 616 , ('\x0A3F', '\x0A3F') 617 , ('\x0A40', '\x0A42') 618 , ('\x0A47', '\x0A48') 619 , ('\x0A4B', '\x0A4D') 620 , ('\x0A70', '\x0A71') 621 , ('\x0A81', '\x0A83') 622 , ('\x0ABC', '\x0ABC') 623 , ('\x0ABE', '\x0AC5') 624 , ('\x0AC7', '\x0AC9') 625 , ('\x0ACB', '\x0ACD') 626 , ('\x0B01', '\x0B03') 627 , ('\x0B3C', '\x0B3C') 628 , ('\x0B3E', '\x0B43') 629 , ('\x0B47', '\x0B48') 630 , ('\x0B4B', '\x0B4D') 631 , ('\x0B56', '\x0B57') 632 , ('\x0B82', '\x0B83') 633 , ('\x0BBE', '\x0BC2') 634 , ('\x0BC6', '\x0BC8') 635 , ('\x0BCA', '\x0BCD') 636 , ('\x0BD7', '\x0BD7') 637 , ('\x0C01', '\x0C03') 638 , ('\x0C3E', '\x0C44') 639 , ('\x0C46', '\x0C48') 640 , ('\x0C4A', '\x0C4D') 641 , ('\x0C55', '\x0C56') 642 , ('\x0C82', '\x0C83') 643 , ('\x0CBE', '\x0CC4') 644 , ('\x0CC6', '\x0CC8') 645 , ('\x0CCA', '\x0CCD') 646 , ('\x0CD5', '\x0CD6') 647 , ('\x0D02', '\x0D03') 648 , ('\x0D3E', '\x0D43') 649 , ('\x0D46', '\x0D48') 650 , ('\x0D4A', '\x0D4D') 651 , ('\x0D57', '\x0D57') 652 , ('\x0E31', '\x0E31') 653 , ('\x0E34', '\x0E3A') 654 , ('\x0E47', '\x0E4E') 655 , ('\x0EB1', '\x0EB1') 656 , ('\x0EB4', '\x0EB9') 657 , ('\x0EBB', '\x0EBC') 658 , ('\x0EC8', '\x0ECD') 659 , ('\x0F18', '\x0F19') 660 , ('\x0F35', '\x0F35') 661 , ('\x0F37', '\x0F37') 662 , ('\x0F39', '\x0F39') 663 , ('\x0F3E', '\x0F3E') 664 , ('\x0F3F', '\x0F3F') 665 , ('\x0F71', '\x0F84') 666 , ('\x0F86', '\x0F8B') 667 , ('\x0F90', '\x0F95') 668 , ('\x0F97', '\x0F97') 669 , ('\x0F99', '\x0FAD') 670 , ('\x0FB1', '\x0FB7') 671 , ('\x0FB9', '\x0FB9') 672 , ('\x20D0', '\x20DC') 673 , ('\x20E1', '\x20E1') 674 , ('\x302A', '\x302F') 675 , ('\x3099', '\x3099') 676 , ('\x309A', '\x309A') 677 ] 678 679-- | 680-- checking for XML digit 681 682isXmlDigit :: Char -> Bool 683isXmlDigit c = c `elemCS` charPropXmlDigit 684 685charPropXmlDigit :: CharSet 686charPropXmlDigit 687 = [ ('\x0030', '\x0039') 688 , ('\x0660', '\x0669') 689 , ('\x06F0', '\x06F9') 690 , ('\x0966', '\x096F') 691 , ('\x09E6', '\x09EF') 692 , ('\x0A66', '\x0A6F') 693 , ('\x0AE6', '\x0AEF') 694 , ('\x0B66', '\x0B6F') 695 , ('\x0BE7', '\x0BEF') 696 , ('\x0C66', '\x0C6F') 697 , ('\x0CE6', '\x0CEF') 698 , ('\x0D66', '\x0D6F') 699 , ('\x0E50', '\x0E59') 700 , ('\x0ED0', '\x0ED9') 701 , ('\x0F20', '\x0F29') 702 ] 703 704-- | 705-- checking for XML extender 706 707isXmlExtender :: Char -> Bool 708isXmlExtender c = c `elemCS` charPropXmlExtender 709 710charPropXmlExtender :: CharSet 711charPropXmlExtender 712 = [ ('\x00B7', '\x00B7') 713 , ('\x02D0', '\x02D0') 714 , ('\x02D1', '\x02D1') 715 , ('\x0387', '\x0387') 716 , ('\x0640', '\x0640') 717 , ('\x0E46', '\x0E46') 718 , ('\x0EC6', '\x0EC6') 719 , ('\x3005', '\x3005') 720 , ('\x3031', '\x3035') 721 , ('\x309D', '\x309E') 722 , ('\x30FC', '\x30FE') 723 ] 724 725-- | 726-- checking for XML control or permanently discouraged char 727-- 728-- see Errata to XML1.0 (http:\/\/www.w3.org\/XML\/xml-V10-2e-errata) No 46 729-- 730-- Document authors are encouraged to avoid "compatibility characters", 731-- as defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of [Unicode3]). 732-- The characters defined in the following ranges are also discouraged. 733-- They are either control characters or permanently undefined Unicode characters: 734 735 736isXmlControlOrPermanentlyUndefined :: Char -> Bool 737isXmlControlOrPermanentlyUndefined c = c `elemCS` charPropXmlControlOrPermanentlyUndefined 738 739charPropXmlControlOrPermanentlyUndefined :: CharSet 740charPropXmlControlOrPermanentlyUndefined 741 = [ ('\x7F', '\x84') 742 , ('\x86', '\x9F') 743 , ('\xFDD0', '\xFDDF') 744 , ('\x1FFFE', '\x1FFFF') 745 , ('\x2FFFE', '\x2FFFF') 746 , ('\x3FFFE', '\x3FFFF') 747 , ('\x4FFFE', '\x4FFFF') 748 , ('\x5FFFE', '\x5FFFF') 749 , ('\x6FFFE', '\x6FFFF') 750 , ('\x7FFFE', '\x7FFFF') 751 , ('\x8FFFE', '\x8FFFF') 752 , ('\x9FFFE', '\x9FFFF') 753 , ('\xAFFFE', '\xAFFFF') 754 , ('\xBFFFE', '\xBFFFF') 755 , ('\xCFFFE', '\xCFFFF') 756 , ('\xDFFFE', '\xDFFFF') 757 , ('\xEFFFE', '\xEFFFF') 758 , ('\xFFFFE', '\xFFFFF') 759 , ('\x10FFFE', '\x10FFFF') 760 ] 761 762-- ------------------------------------------------------------ 763