1 /*
2  * Copyright (C) 2008 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "config.h"
27 #include "CharacterClass.h"
28 
29 #if ENABLE(WREC)
30 
31 using namespace WTF;
32 
33 namespace JSC { namespace WREC {
34 
newline()35 const CharacterClass& CharacterClass::newline() {
36     static const UChar asciiNewlines[2] = { '\n', '\r' };
37     static const UChar unicodeNewlines[2] = { 0x2028, 0x2029 };
38     static const CharacterClass charClass = {
39         asciiNewlines, 2,
40         0, 0,
41         unicodeNewlines, 2,
42         0, 0,
43     };
44 
45     return charClass;
46 }
47 
digits()48 const CharacterClass& CharacterClass::digits() {
49     static const CharacterRange asciiDigitsRange[1] = { { '0', '9' } };
50     static const CharacterClass charClass = {
51         0, 0,
52         asciiDigitsRange, 1,
53         0, 0,
54         0, 0,
55     };
56 
57     return charClass;
58 }
59 
spaces()60 const CharacterClass& CharacterClass::spaces() {
61     static const UChar asciiSpaces[1] = { ' ' };
62     static const CharacterRange asciiSpacesRange[1] = { { '\t', '\r' } };
63     static const UChar unicodeSpaces[8] = { 0x00a0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000 };
64     static const CharacterRange unicodeSpacesRange[1] = { { 0x2000, 0x200a } };
65     static const CharacterClass charClass = {
66         asciiSpaces, 1,
67         asciiSpacesRange, 1,
68         unicodeSpaces, 8,
69         unicodeSpacesRange, 1,
70     };
71 
72     return charClass;
73 }
74 
wordchar()75 const CharacterClass& CharacterClass::wordchar() {
76     static const UChar asciiWordchar[1] = { '_' };
77     static const CharacterRange asciiWordcharRange[3] = { { '0', '9' }, { 'A', 'Z' }, { 'a', 'z' } };
78     static const CharacterClass charClass = {
79         asciiWordchar, 1,
80         asciiWordcharRange, 3,
81         0, 0,
82         0, 0,
83     };
84 
85     return charClass;
86 }
87 
nondigits()88 const CharacterClass& CharacterClass::nondigits() {
89     static const CharacterRange asciiNondigitsRange[2] = { { 0, '0' - 1 }, { '9' + 1, 0x7f } };
90     static const CharacterRange unicodeNondigitsRange[1] = { { 0x0080, 0xffff } };
91     static const CharacterClass charClass = {
92         0, 0,
93         asciiNondigitsRange, 2,
94         0, 0,
95         unicodeNondigitsRange, 1,
96     };
97 
98     return charClass;
99 }
100 
nonspaces()101 const CharacterClass& CharacterClass::nonspaces() {
102     static const CharacterRange asciiNonspacesRange[3] = { { 0, '\t' - 1 }, { '\r' + 1, ' ' - 1 }, { ' ' + 1, 0x7f } };
103     static const CharacterRange unicodeNonspacesRange[9] = {
104         { 0x0080, 0x009f },
105         { 0x00a1, 0x167f },
106         { 0x1681, 0x180d },
107         { 0x180f, 0x1fff },
108         { 0x200b, 0x2027 },
109         { 0x202a, 0x202e },
110         { 0x2030, 0x205e },
111         { 0x2060, 0x2fff },
112         { 0x3001, 0xffff }
113     };
114     static const CharacterClass charClass = {
115         0, 0,
116         asciiNonspacesRange, 3,
117         0, 0,
118         unicodeNonspacesRange, 9,
119     };
120 
121     return charClass;
122 }
123 
nonwordchar()124 const CharacterClass& CharacterClass::nonwordchar() {
125     static const UChar asciiNonwordchar[1] = { '`' };
126     static const CharacterRange asciiNonwordcharRange[4] = { { 0, '0' - 1 }, { '9' + 1, 'A' - 1 }, { 'Z' + 1, '_' - 1 }, { 'z' + 1, 0x7f } };
127     static const CharacterRange unicodeNonwordcharRange[1] = { { 0x0080, 0xffff } };
128     static const CharacterClass charClass = {
129         asciiNonwordchar, 1,
130         asciiNonwordcharRange, 4,
131         0, 0,
132         unicodeNonwordcharRange, 1,
133     };
134 
135     return charClass;
136 }
137 
138 } } // namespace JSC::WREC
139 
140 #endif // ENABLE(WREC)
141