1 /*
2  * Copyright (C) 2007, 2008, 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1.  Redistributions of source code must retain the above copyright
9  *     notice, this list of conditions and the following disclaimer.
10  * 2.  Redistributions in binary form must reproduce the above copyright
11  *     notice, this list of conditions and the following disclaimer in the
12  *     documentation and/or other materials provided with the distribution.
13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14  *     its contributors may be used to endorse or promote products derived
15  *     from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #ifndef WTF_ASCIICType_h
30 #define WTF_ASCIICType_h
31 
32 #include <wtf/Assertions.h>
33 #include <wtf/Platform.h>
34 
35 // The behavior of many of the functions in the <ctype.h> header is dependent
36 // on the current locale. But in the WebKit project, all uses of those functions
37 // are in code processing something that's not locale-specific. These equivalents
38 // for some of the <ctype.h> functions are named more explicitly, not dependent
39 // on the C library locale, and we should also optimize them as needed.
40 
41 // All functions return false or leave the character unchanged if passed a character
42 // that is outside the range 0-7F. So they can be used on Unicode strings or
43 // characters if the intent is to do processing only if the character is ASCII.
44 
45 namespace WTF {
46 
isASCII(char c)47     inline bool isASCII(char c) { return !(c & ~0x7F); }
isASCII(unsigned short c)48     inline bool isASCII(unsigned short c) { return !(c & ~0x7F); }
49 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
isASCII(wchar_t c)50     inline bool isASCII(wchar_t c) { return !(c & ~0x7F); }
51 #endif
isASCII(int c)52     inline bool isASCII(int c) { return !(c & ~0x7F); }
53 
isASCIIAlpha(char c)54     inline bool isASCIIAlpha(char c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
isASCIIAlpha(unsigned short c)55     inline bool isASCIIAlpha(unsigned short c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
56 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
isASCIIAlpha(wchar_t c)57     inline bool isASCIIAlpha(wchar_t c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
58 #endif
isASCIIAlpha(int c)59     inline bool isASCIIAlpha(int c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
60 
isASCIIAlphanumeric(char c)61     inline bool isASCIIAlphanumeric(char c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
isASCIIAlphanumeric(unsigned short c)62     inline bool isASCIIAlphanumeric(unsigned short c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
63 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
isASCIIAlphanumeric(wchar_t c)64     inline bool isASCIIAlphanumeric(wchar_t c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
65 #endif
isASCIIAlphanumeric(int c)66     inline bool isASCIIAlphanumeric(int c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
67 
isASCIIDigit(char c)68     inline bool isASCIIDigit(char c) { return (c >= '0') & (c <= '9'); }
isASCIIDigit(unsigned short c)69     inline bool isASCIIDigit(unsigned short c) { return (c >= '0') & (c <= '9'); }
70 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
isASCIIDigit(wchar_t c)71     inline bool isASCIIDigit(wchar_t c) { return (c >= '0') & (c <= '9'); }
72 #endif
isASCIIDigit(int c)73     inline bool isASCIIDigit(int c) { return (c >= '0') & (c <= '9'); }
74 
isASCIIHexDigit(char c)75     inline bool isASCIIHexDigit(char c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
isASCIIHexDigit(unsigned short c)76     inline bool isASCIIHexDigit(unsigned short c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
77 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
isASCIIHexDigit(wchar_t c)78     inline bool isASCIIHexDigit(wchar_t c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
79 #endif
isASCIIHexDigit(int c)80     inline bool isASCIIHexDigit(int c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
81 
isASCIIOctalDigit(char c)82     inline bool isASCIIOctalDigit(char c) { return (c >= '0') & (c <= '7'); }
isASCIIOctalDigit(unsigned short c)83     inline bool isASCIIOctalDigit(unsigned short c) { return (c >= '0') & (c <= '7'); }
84 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
isASCIIOctalDigit(wchar_t c)85     inline bool isASCIIOctalDigit(wchar_t c) { return (c >= '0') & (c <= '7'); }
86 #endif
isASCIIOctalDigit(int c)87     inline bool isASCIIOctalDigit(int c) { return (c >= '0') & (c <= '7'); }
88 
isASCIILower(char c)89     inline bool isASCIILower(char c) { return c >= 'a' && c <= 'z'; }
isASCIILower(unsigned short c)90     inline bool isASCIILower(unsigned short c) { return c >= 'a' && c <= 'z'; }
91 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
isASCIILower(wchar_t c)92     inline bool isASCIILower(wchar_t c) { return c >= 'a' && c <= 'z'; }
93 #endif
isASCIILower(int c)94     inline bool isASCIILower(int c) { return c >= 'a' && c <= 'z'; }
95 
isASCIIUpper(char c)96     inline bool isASCIIUpper(char c) { return c >= 'A' && c <= 'Z'; }
isASCIIUpper(unsigned short c)97     inline bool isASCIIUpper(unsigned short c) { return c >= 'A' && c <= 'Z'; }
98 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
isASCIIUpper(wchar_t c)99     inline bool isASCIIUpper(wchar_t c) { return c >= 'A' && c <= 'Z'; }
100 #endif
isASCIIUpper(int c)101     inline bool isASCIIUpper(int c) { return c >= 'A' && c <= 'Z'; }
102 
103     /*
104         Statistics from a run of Apple's page load test for callers of isASCIISpace:
105 
106             character          count
107             ---------          -----
108             non-spaces         689383
109         20  space              294720
110         0A  \n                 89059
111         09  \t                 28320
112         0D  \r                 0
113         0C  \f                 0
114         0B  \v                 0
115     */
isASCIISpace(char c)116     inline bool isASCIISpace(char c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
isASCIISpace(unsigned short c)117     inline bool isASCIISpace(unsigned short c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
118 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
isASCIISpace(wchar_t c)119     inline bool isASCIISpace(wchar_t c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
120 #endif
isASCIISpace(int c)121     inline bool isASCIISpace(int c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
122 
toASCIILower(char c)123     inline char toASCIILower(char c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
toASCIILower(unsigned short c)124     inline unsigned short toASCIILower(unsigned short c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
125 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
toASCIILower(wchar_t c)126     inline wchar_t toASCIILower(wchar_t c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
127 #endif
toASCIILower(int c)128     inline int toASCIILower(int c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
129 
toASCIIUpper(char c)130     inline char toASCIIUpper(char c) { return static_cast<char>(c & ~((c >= 'a' && c <= 'z') << 5)); }
toASCIIUpper(unsigned short c)131     inline unsigned short toASCIIUpper(unsigned short c) { return static_cast<unsigned short>(c & ~((c >= 'a' && c <= 'z') << 5)); }
132 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
toASCIIUpper(wchar_t c)133     inline wchar_t toASCIIUpper(wchar_t c) { return static_cast<wchar_t>(c & ~((c >= 'a' && c <= 'z') << 5)); }
134 #endif
toASCIIUpper(int c)135     inline int toASCIIUpper(int c) { return static_cast<int>(c & ~((c >= 'a' && c <= 'z') << 5)); }
136 
toASCIIHexValue(char c)137     inline int toASCIIHexValue(char c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
toASCIIHexValue(unsigned short c)138     inline int toASCIIHexValue(unsigned short c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
139 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
toASCIIHexValue(wchar_t c)140     inline int toASCIIHexValue(wchar_t c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
141 #endif
toASCIIHexValue(int c)142     inline int toASCIIHexValue(int c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
143 
isASCIIPrintable(char c)144     inline bool isASCIIPrintable(char c) { return c >= ' ' && c <= '~'; }
isASCIIPrintable(unsigned short c)145     inline bool isASCIIPrintable(unsigned short c) { return c >= ' ' && c <= '~'; }
146 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
isASCIIPrintable(wchar_t c)147     inline bool isASCIIPrintable(wchar_t c) { return c >= ' ' && c <= '~'; }
148 #endif
isASCIIPrintable(int c)149     inline bool isASCIIPrintable(int c) { return c >= ' ' && c <= '~'; }
150 }
151 
152 using WTF::isASCII;
153 using WTF::isASCIIAlpha;
154 using WTF::isASCIIAlphanumeric;
155 using WTF::isASCIIDigit;
156 using WTF::isASCIIHexDigit;
157 using WTF::isASCIILower;
158 using WTF::isASCIIOctalDigit;
159 using WTF::isASCIIPrintable;
160 using WTF::isASCIISpace;
161 using WTF::isASCIIUpper;
162 using WTF::toASCIIHexValue;
163 using WTF::toASCIILower;
164 using WTF::toASCIIUpper;
165 
166 #endif
167