1 // $Id: code.h,v 1.22 2004/02/17 13:29:11 ericb Exp $ -*- c++ -*- 2 // DO NOT MODIFY THIS FILE - it is generated using gencode.java. 3 // 4 // This software is subject to the terms of the IBM Jikes Compiler 5 // License Agreement available at the following URL: 6 // http://www.ibm.com/research/jikes. 7 // Copyright (C) 1999, 2004 IBM Corporation and others. All Rights Reserved. 8 // You must accept the terms of that agreement to use this software. 9 // 10 11 #include "platform.h" 12 13 #ifdef HAVE_JIKES_NAMESPACE 14 namespace Jikes { // Open namespace Jikes block 15 #endif 16 17 #ifndef code_INCLUDED 18 #define code_INCLUDED 19 20 class Code 21 { 22 // 23 // To facilitate the scanning, the character set is partitioned into 24 // categories using the array CODE. These are described below together 25 // with some self-explanatory functions defined on CODE. 26 // 27 enum { 28 SHIFT = 9, 29 SPACE_CODE = 0, 30 BAD_CODE = 1, 31 DIGIT_CODE = 2, 32 ID_PART_CODE = 3, 33 LOWER_CODE = 4, 34 UPPER_CODE = 5, 35 ID_START_CODE = 6 36 }; 37 38 static char codes[13558]; 39 static u2 blocks[2176]; 40 41 42 public: 43 #ifdef JIKES_DEBUG CodeCheck(u4 c)44 static inline void CodeCheck(u4 c) 45 { 46 assert((u2) (blocks[c >> SHIFT] + c) < 13558); 47 } 48 CodeCheck(void)49 static inline bool CodeCheck(void) 50 { 51 for (u4 c = 0; c <= 1114111; c++) 52 CodeCheck(c); 53 return true; 54 } 55 #endif // JIKES_DEBUG 56 57 // 58 // These methods test for Unicode surrogate pairs. 59 // IsHighSurrogate(wchar_t c)60 static inline bool IsHighSurrogate(wchar_t c) 61 { 62 return c >= 0xd800 && c <= 0xdbff; 63 } IsLowSurrogate(wchar_t c)64 static inline bool IsLowSurrogate(wchar_t c) 65 { 66 return c >= 0xdc00 && c <= 0xdfff; 67 } 68 Codepoint(wchar_t hi,wchar_t lo)69 static inline u4 Codepoint(wchar_t hi, wchar_t lo) 70 { 71 assert(IsHighSurrogate(hi) && IsLowSurrogate(lo)); 72 return (hi << 10) + lo + (0x10000 - (0xd800 << 10) - 0xdc00); 73 } Codepoint(const wchar_t * p)74 static inline u4 Codepoint(const wchar_t* p) 75 { 76 u4 result = (u4) *p; 77 if (IsHighSurrogate(result) && IsLowSurrogate(p[1])) 78 result = Codepoint(result, p[1]); 79 return result; 80 } Codelength(const wchar_t * p)81 static inline int Codelength(const wchar_t* p) 82 { 83 return (IsHighSurrogate(*p) && IsLowSurrogate(p[1])) ? 2 : 1; 84 } 85 86 // 87 // These methods test for ASCII characteristics. Since it is strictly ASCII, 88 // there is no need to check for Unicode surrogate pairs. 89 // IsNewline(wchar_t c)90 static inline bool IsNewline(wchar_t c) 91 { 92 return c == U_LF || c == U_CR; 93 } IsSpaceButNotNewline(wchar_t c)94 static inline bool IsSpaceButNotNewline(wchar_t c) 95 { 96 return c == U_SP || c == U_FF || c == U_HT; 97 } IsSpace(wchar_t c)98 static inline bool IsSpace(wchar_t c) 99 { 100 return c == U_SP || c == U_CR || c == U_LF || 101 c == U_HT || c == U_FF; 102 } 103 IsDecimalDigit(wchar_t c)104 static inline bool IsDecimalDigit(wchar_t c) 105 { 106 return c <= U_9 && c >= U_0; 107 } IsOctalDigit(wchar_t c)108 static inline bool IsOctalDigit(wchar_t c) 109 { 110 return c <= U_7 && c >= U_0; 111 } IsHexDigit(wchar_t c)112 static inline bool IsHexDigit(wchar_t c) 113 { 114 return c <= U_f && (c >= U_a || 115 (c >= U_A && c <= U_F) || 116 (c >= U_0 && c <= U_9)); 117 } Value(wchar_t c)118 static inline int Value(wchar_t c) 119 { 120 assert(IsHexDigit(c)); 121 return c - (c <= U_9 ? U_0 : c < U_a ? U_A - 10 : U_a - 10); 122 } IsSign(wchar_t c)123 static inline bool IsSign(wchar_t c) 124 { 125 return c == U_MINUS || c == U_PLUS; 126 } 127 IsAsciiUpper(wchar_t c)128 static inline bool IsAsciiUpper(wchar_t c) 129 { 130 return c <= U_Z && c >= U_A; 131 } IsAsciiLower(wchar_t c)132 static inline bool IsAsciiLower(wchar_t c) 133 { 134 return c <= U_z && c >= U_a; 135 } 136 137 // 138 // The following methods recognize Unicode surrogate pairs, hence the need to 139 // pass a pointer. Use Codelength() to determine if one or two characters 140 // were used in the formation of a character. 141 // IsWhitespace(const wchar_t * p)142 static inline bool IsWhitespace(const wchar_t* p) 143 { 144 u4 c = Codepoint(p); 145 return codes[(u2) (blocks[c >> SHIFT] + c)] == SPACE_CODE; 146 } IsDigit(const wchar_t * p)147 static inline bool IsDigit(const wchar_t* p) 148 { 149 u4 c = Codepoint(p); 150 return codes[(u2) (blocks[c >> SHIFT] + c)] == DIGIT_CODE; 151 } IsUpper(const wchar_t * p)152 static inline bool IsUpper(const wchar_t* p) 153 { 154 u4 c = Codepoint(p); 155 return codes[(u2) (blocks[c >> SHIFT] + c)] == UPPER_CODE; 156 } IsLower(const wchar_t * p)157 static inline bool IsLower(const wchar_t* p) 158 { 159 u4 c = Codepoint(p); 160 return codes[(u2) (blocks[c >> SHIFT] + c)] == LOWER_CODE; 161 } IsAlpha(const wchar_t * p)162 static inline bool IsAlpha(const wchar_t* p) 163 { 164 u4 c = Codepoint(p); 165 return codes[(u2) (blocks[c >> SHIFT] + c)] >= LOWER_CODE; 166 } IsAlnum(const wchar_t * p)167 static inline bool IsAlnum(const wchar_t* p) 168 { 169 u4 c = Codepoint(p); 170 return codes[(u2) (blocks[c >> SHIFT] + c)] >= DIGIT_CODE; 171 } 172 }; 173 174 #endif // code_INCLUDED 175 176 #ifdef HAVE_JIKES_NAMESPACE 177 } // Close namespace Jikes block 178 #endif 179 180