1 // $Id: code.h,v 1.22 2004/02/17 13:29:11 ericb Exp $ -*- c++ -*-
2 // DO NOT MODIFY THIS FILE - it is generated using gencode.java.
3 //
4 // This software is subject to the terms of the IBM Jikes Compiler
5 // License Agreement available at the following URL:
6 // http://www.ibm.com/research/jikes.
7 // Copyright (C) 1999, 2004 IBM Corporation and others.  All Rights Reserved.
8 // You must accept the terms of that agreement to use this software.
9 //
10 
11 #include "platform.h"
12 
13 #ifdef HAVE_JIKES_NAMESPACE
14 namespace Jikes { // Open namespace Jikes block
15 #endif
16 
17 #ifndef code_INCLUDED
18 #define code_INCLUDED
19 
20 class Code
21 {
22     //
23     // To facilitate the scanning, the character set is partitioned into
24     // categories using the array CODE. These are described below together
25     // with some self-explanatory functions defined on CODE.
26     //
27     enum {
28         SHIFT = 9,
29         SPACE_CODE = 0,
30         BAD_CODE = 1,
31         DIGIT_CODE = 2,
32         ID_PART_CODE = 3,
33         LOWER_CODE = 4,
34         UPPER_CODE = 5,
35         ID_START_CODE = 6
36     };
37 
38     static char codes[13558];
39     static u2 blocks[2176];
40 
41 
42 public:
43 #ifdef JIKES_DEBUG
CodeCheck(u4 c)44     static inline void CodeCheck(u4 c)
45     {
46         assert((u2) (blocks[c >> SHIFT] + c) < 13558);
47     }
48 
CodeCheck(void)49     static inline bool CodeCheck(void)
50     {
51         for (u4 c = 0; c <= 1114111; c++)
52             CodeCheck(c);
53         return true;
54     }
55 #endif // JIKES_DEBUG
56 
57 //
58 // These methods test for Unicode surrogate pairs.
59 //
IsHighSurrogate(wchar_t c)60     static inline bool IsHighSurrogate(wchar_t c)
61     {
62         return c >= 0xd800 && c <= 0xdbff;
63     }
IsLowSurrogate(wchar_t c)64     static inline bool IsLowSurrogate(wchar_t c)
65     {
66         return c >= 0xdc00 && c <= 0xdfff;
67     }
68 
Codepoint(wchar_t hi,wchar_t lo)69     static inline u4 Codepoint(wchar_t hi, wchar_t lo)
70     {
71         assert(IsHighSurrogate(hi) && IsLowSurrogate(lo));
72         return (hi << 10) + lo + (0x10000 - (0xd800 << 10) - 0xdc00);
73     }
Codepoint(const wchar_t * p)74     static inline u4 Codepoint(const wchar_t* p)
75     {
76         u4 result = (u4) *p;
77         if (IsHighSurrogate(result) && IsLowSurrogate(p[1]))
78             result = Codepoint(result, p[1]);
79         return result;
80     }
Codelength(const wchar_t * p)81     static inline int Codelength(const wchar_t* p)
82     {
83         return (IsHighSurrogate(*p) && IsLowSurrogate(p[1])) ? 2 : 1;
84     }
85 
86 //
87 // These methods test for ASCII characteristics. Since it is strictly ASCII,
88 // there is no need to check for Unicode surrogate pairs.
89 //
IsNewline(wchar_t c)90     static inline bool IsNewline(wchar_t c)
91     {
92         return c == U_LF || c == U_CR;
93     }
IsSpaceButNotNewline(wchar_t c)94     static inline bool IsSpaceButNotNewline(wchar_t c)
95     {
96         return c == U_SP || c == U_FF || c == U_HT;
97     }
IsSpace(wchar_t c)98     static inline bool IsSpace(wchar_t c)
99     {
100         return c == U_SP || c == U_CR || c == U_LF ||
101             c == U_HT || c == U_FF;
102     }
103 
IsDecimalDigit(wchar_t c)104     static inline bool IsDecimalDigit(wchar_t c)
105     {
106         return c <= U_9 && c >= U_0;
107     }
IsOctalDigit(wchar_t c)108     static inline bool IsOctalDigit(wchar_t c)
109     {
110         return c <= U_7 && c >= U_0;
111     }
IsHexDigit(wchar_t c)112     static inline bool IsHexDigit(wchar_t c)
113     {
114         return c <= U_f && (c >= U_a ||
115                             (c >= U_A && c <= U_F) ||
116                             (c >= U_0 && c <= U_9));
117     }
Value(wchar_t c)118     static inline int Value(wchar_t c)
119     {
120         assert(IsHexDigit(c));
121         return c - (c <= U_9 ? U_0 : c < U_a ? U_A - 10 : U_a - 10);
122     }
IsSign(wchar_t c)123     static inline bool IsSign(wchar_t c)
124     {
125         return c == U_MINUS || c == U_PLUS;
126     }
127 
IsAsciiUpper(wchar_t c)128     static inline bool IsAsciiUpper(wchar_t c)
129     {
130         return c <= U_Z && c >= U_A;
131     }
IsAsciiLower(wchar_t c)132     static inline bool IsAsciiLower(wchar_t c)
133     {
134         return c <= U_z && c >= U_a;
135     }
136 
137 //
138 // The following methods recognize Unicode surrogate pairs, hence the need to
139 // pass a pointer. Use Codelength() to determine if one or two characters
140 // were used in the formation of a character.
141 //
IsWhitespace(const wchar_t * p)142     static inline bool IsWhitespace(const wchar_t* p)
143     {
144         u4 c = Codepoint(p);
145         return codes[(u2) (blocks[c >> SHIFT] + c)] == SPACE_CODE;
146     }
IsDigit(const wchar_t * p)147     static inline bool IsDigit(const wchar_t* p)
148     {
149         u4 c = Codepoint(p);
150         return codes[(u2) (blocks[c >> SHIFT] + c)] == DIGIT_CODE;
151     }
IsUpper(const wchar_t * p)152     static inline bool IsUpper(const wchar_t* p)
153     {
154         u4 c = Codepoint(p);
155         return codes[(u2) (blocks[c >> SHIFT] + c)] == UPPER_CODE;
156     }
IsLower(const wchar_t * p)157     static inline bool IsLower(const wchar_t* p)
158     {
159         u4 c = Codepoint(p);
160         return codes[(u2) (blocks[c >> SHIFT] + c)] == LOWER_CODE;
161     }
IsAlpha(const wchar_t * p)162     static inline bool IsAlpha(const wchar_t* p)
163     {
164         u4 c = Codepoint(p);
165         return codes[(u2) (blocks[c >> SHIFT] + c)] >= LOWER_CODE;
166     }
IsAlnum(const wchar_t * p)167     static inline bool IsAlnum(const wchar_t* p)
168     {
169         u4 c = Codepoint(p);
170         return codes[(u2) (blocks[c >> SHIFT] + c)] >= DIGIT_CODE;
171     }
172 };
173 
174 #endif // code_INCLUDED
175 
176 #ifdef HAVE_JIKES_NAMESPACE
177 } // Close namespace Jikes block
178 #endif
179 
180