1*a9fa9459Szrj /* <ctype.h> replacement macros.
2*a9fa9459Szrj 
3*a9fa9459Szrj    Copyright (C) 2000, 2001, 2002, 2003, 2004,
4*a9fa9459Szrj    2005 Free Software Foundation, Inc.
5*a9fa9459Szrj    Contributed by Zack Weinberg <zackw@stanford.edu>.
6*a9fa9459Szrj 
7*a9fa9459Szrj This file is part of the libiberty library.
8*a9fa9459Szrj Libiberty is free software; you can redistribute it and/or
9*a9fa9459Szrj modify it under the terms of the GNU Library General Public
10*a9fa9459Szrj License as published by the Free Software Foundation; either
11*a9fa9459Szrj version 2 of the License, or (at your option) any later version.
12*a9fa9459Szrj 
13*a9fa9459Szrj Libiberty is distributed in the hope that it will be useful,
14*a9fa9459Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
15*a9fa9459Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16*a9fa9459Szrj Library General Public License for more details.
17*a9fa9459Szrj 
18*a9fa9459Szrj You should have received a copy of the GNU Library General Public
19*a9fa9459Szrj License along with libiberty; see the file COPYING.LIB.  If
20*a9fa9459Szrj not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
21*a9fa9459Szrj Boston, MA 02110-1301, USA.  */
22*a9fa9459Szrj 
23*a9fa9459Szrj /*
24*a9fa9459Szrj 
25*a9fa9459Szrj @defvr Extension HOST_CHARSET
26*a9fa9459Szrj This macro indicates the basic character set and encoding used by the
27*a9fa9459Szrj host: more precisely, the encoding used for character constants in
28*a9fa9459Szrj preprocessor @samp{#if} statements (the C "execution character set").
29*a9fa9459Szrj It is defined by @file{safe-ctype.h}, and will be an integer constant
30*a9fa9459Szrj with one of the following values:
31*a9fa9459Szrj 
32*a9fa9459Szrj @ftable @code
33*a9fa9459Szrj @item HOST_CHARSET_UNKNOWN
34*a9fa9459Szrj The host character set is unknown - that is, not one of the next two
35*a9fa9459Szrj possibilities.
36*a9fa9459Szrj 
37*a9fa9459Szrj @item HOST_CHARSET_ASCII
38*a9fa9459Szrj The host character set is ASCII.
39*a9fa9459Szrj 
40*a9fa9459Szrj @item HOST_CHARSET_EBCDIC
41*a9fa9459Szrj The host character set is some variant of EBCDIC.  (Only one of the
42*a9fa9459Szrj nineteen EBCDIC varying characters is tested; exercise caution.)
43*a9fa9459Szrj @end ftable
44*a9fa9459Szrj @end defvr
45*a9fa9459Szrj 
46*a9fa9459Szrj @deffn  Extension ISALPHA  (@var{c})
47*a9fa9459Szrj @deffnx Extension ISALNUM  (@var{c})
48*a9fa9459Szrj @deffnx Extension ISBLANK  (@var{c})
49*a9fa9459Szrj @deffnx Extension ISCNTRL  (@var{c})
50*a9fa9459Szrj @deffnx Extension ISDIGIT  (@var{c})
51*a9fa9459Szrj @deffnx Extension ISGRAPH  (@var{c})
52*a9fa9459Szrj @deffnx Extension ISLOWER  (@var{c})
53*a9fa9459Szrj @deffnx Extension ISPRINT  (@var{c})
54*a9fa9459Szrj @deffnx Extension ISPUNCT  (@var{c})
55*a9fa9459Szrj @deffnx Extension ISSPACE  (@var{c})
56*a9fa9459Szrj @deffnx Extension ISUPPER  (@var{c})
57*a9fa9459Szrj @deffnx Extension ISXDIGIT (@var{c})
58*a9fa9459Szrj 
59*a9fa9459Szrj These twelve macros are defined by @file{safe-ctype.h}.  Each has the
60*a9fa9459Szrj same meaning as the corresponding macro (with name in lowercase)
61*a9fa9459Szrj defined by the standard header @file{ctype.h}.  For example,
62*a9fa9459Szrj @code{ISALPHA} returns true for alphabetic characters and false for
63*a9fa9459Szrj others.  However, there are two differences between these macros and
64*a9fa9459Szrj those provided by @file{ctype.h}:
65*a9fa9459Szrj 
66*a9fa9459Szrj @itemize @bullet
67*a9fa9459Szrj @item These macros are guaranteed to have well-defined behavior for all
68*a9fa9459Szrj values representable by @code{signed char} and @code{unsigned char}, and
69*a9fa9459Szrj for @code{EOF}.
70*a9fa9459Szrj 
71*a9fa9459Szrj @item These macros ignore the current locale; they are true for these
72*a9fa9459Szrj fixed sets of characters:
73*a9fa9459Szrj @multitable {@code{XDIGIT}} {yada yada yada yada yada yada yada yada}
74*a9fa9459Szrj @item @code{ALPHA}  @tab @kbd{A-Za-z}
75*a9fa9459Szrj @item @code{ALNUM}  @tab @kbd{A-Za-z0-9}
76*a9fa9459Szrj @item @code{BLANK}  @tab @kbd{space tab}
77*a9fa9459Szrj @item @code{CNTRL}  @tab @code{!PRINT}
78*a9fa9459Szrj @item @code{DIGIT}  @tab @kbd{0-9}
79*a9fa9459Szrj @item @code{GRAPH}  @tab @code{ALNUM || PUNCT}
80*a9fa9459Szrj @item @code{LOWER}  @tab @kbd{a-z}
81*a9fa9459Szrj @item @code{PRINT}  @tab @code{GRAPH ||} @kbd{space}
82*a9fa9459Szrj @item @code{PUNCT}  @tab @kbd{`~!@@#$%^&*()_-=+[@{]@}\|;:'",<.>/?}
83*a9fa9459Szrj @item @code{SPACE}  @tab @kbd{space tab \n \r \f \v}
84*a9fa9459Szrj @item @code{UPPER}  @tab @kbd{A-Z}
85*a9fa9459Szrj @item @code{XDIGIT} @tab @kbd{0-9A-Fa-f}
86*a9fa9459Szrj @end multitable
87*a9fa9459Szrj 
88*a9fa9459Szrj Note that, if the host character set is ASCII or a superset thereof,
89*a9fa9459Szrj all these macros will return false for all values of @code{char} outside
90*a9fa9459Szrj the range of 7-bit ASCII.  In particular, both ISPRINT and ISCNTRL return
91*a9fa9459Szrj false for characters with numeric values from 128 to 255.
92*a9fa9459Szrj @end itemize
93*a9fa9459Szrj @end deffn
94*a9fa9459Szrj 
95*a9fa9459Szrj @deffn  Extension ISIDNUM         (@var{c})
96*a9fa9459Szrj @deffnx Extension ISIDST          (@var{c})
97*a9fa9459Szrj @deffnx Extension IS_VSPACE       (@var{c})
98*a9fa9459Szrj @deffnx Extension IS_NVSPACE      (@var{c})
99*a9fa9459Szrj @deffnx Extension IS_SPACE_OR_NUL (@var{c})
100*a9fa9459Szrj @deffnx Extension IS_ISOBASIC     (@var{c})
101*a9fa9459Szrj These six macros are defined by @file{safe-ctype.h} and provide
102*a9fa9459Szrj additional character classes which are useful when doing lexical
103*a9fa9459Szrj analysis of C or similar languages.  They are true for the following
104*a9fa9459Szrj sets of characters:
105*a9fa9459Szrj 
106*a9fa9459Szrj @multitable {@code{SPACE_OR_NUL}} {yada yada yada yada yada yada yada yada}
107*a9fa9459Szrj @item @code{IDNUM}        @tab @kbd{A-Za-z0-9_}
108*a9fa9459Szrj @item @code{IDST}         @tab @kbd{A-Za-z_}
109*a9fa9459Szrj @item @code{VSPACE}       @tab @kbd{\r \n}
110*a9fa9459Szrj @item @code{NVSPACE}      @tab @kbd{space tab \f \v \0}
111*a9fa9459Szrj @item @code{SPACE_OR_NUL} @tab @code{VSPACE || NVSPACE}
112*a9fa9459Szrj @item @code{ISOBASIC}     @tab @code{VSPACE || NVSPACE || PRINT}
113*a9fa9459Szrj @end multitable
114*a9fa9459Szrj @end deffn
115*a9fa9459Szrj 
116*a9fa9459Szrj */
117*a9fa9459Szrj 
118*a9fa9459Szrj #include "ansidecl.h"
119*a9fa9459Szrj #include <safe-ctype.h>
120*a9fa9459Szrj #include <stdio.h>  /* for EOF */
121*a9fa9459Szrj 
122*a9fa9459Szrj #if EOF != -1
123*a9fa9459Szrj  #error "<safe-ctype.h> requires EOF == -1"
124*a9fa9459Szrj #endif
125*a9fa9459Szrj 
126*a9fa9459Szrj /* Shorthand */
127*a9fa9459Szrj #define bl _sch_isblank
128*a9fa9459Szrj #define cn _sch_iscntrl
129*a9fa9459Szrj #define di _sch_isdigit
130*a9fa9459Szrj #define is _sch_isidst
131*a9fa9459Szrj #define lo _sch_islower
132*a9fa9459Szrj #define nv _sch_isnvsp
133*a9fa9459Szrj #define pn _sch_ispunct
134*a9fa9459Szrj #define pr _sch_isprint
135*a9fa9459Szrj #define sp _sch_isspace
136*a9fa9459Szrj #define up _sch_isupper
137*a9fa9459Szrj #define vs _sch_isvsp
138*a9fa9459Szrj #define xd _sch_isxdigit
139*a9fa9459Szrj 
140*a9fa9459Szrj /* Masks.  */
141*a9fa9459Szrj #define L  (const unsigned short) (lo|is   |pr)	/* lower case letter */
142*a9fa9459Szrj #define XL (const unsigned short) (lo|is|xd|pr)	/* lowercase hex digit */
143*a9fa9459Szrj #define U  (const unsigned short) (up|is   |pr)	/* upper case letter */
144*a9fa9459Szrj #define XU (const unsigned short) (up|is|xd|pr)	/* uppercase hex digit */
145*a9fa9459Szrj #define D  (const unsigned short) (di   |xd|pr)	/* decimal digit */
146*a9fa9459Szrj #define P  (const unsigned short) (pn      |pr)	/* punctuation */
147*a9fa9459Szrj #define _  (const unsigned short) (pn|is   |pr)	/* underscore */
148*a9fa9459Szrj 
149*a9fa9459Szrj #define C  (const unsigned short) (         cn)	/* control character */
150*a9fa9459Szrj #define Z  (const unsigned short) (nv      |cn)	/* NUL */
151*a9fa9459Szrj #define M  (const unsigned short) (nv|sp   |cn)	/* cursor movement: \f \v */
152*a9fa9459Szrj #define V  (const unsigned short) (vs|sp   |cn)	/* vertical space: \r \n */
153*a9fa9459Szrj #define T  (const unsigned short) (nv|sp|bl|cn)	/* tab */
154*a9fa9459Szrj #define S  (const unsigned short) (nv|sp|bl|pr)	/* space */
155*a9fa9459Szrj 
156*a9fa9459Szrj /* Are we ASCII? */
157*a9fa9459Szrj #if HOST_CHARSET == HOST_CHARSET_ASCII
158*a9fa9459Szrj 
159*a9fa9459Szrj const unsigned short _sch_istable[256] =
160*a9fa9459Szrj {
161*a9fa9459Szrj   Z,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
162*a9fa9459Szrj   C,  T,  V,  M,   M,  V,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
163*a9fa9459Szrj   C,  C,  C,  C,   C,  C,  C,  C,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
164*a9fa9459Szrj   C,  C,  C,  C,   C,  C,  C,  C,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
165*a9fa9459Szrj   S,  P,  P,  P,   P,  P,  P,  P,   /* SP  !   "   #    $   %   &   '   */
166*a9fa9459Szrj   P,  P,  P,  P,   P,  P,  P,  P,   /* (   )   *   +    ,   -   .   /   */
167*a9fa9459Szrj   D,  D,  D,  D,   D,  D,  D,  D,   /* 0   1   2   3    4   5   6   7   */
168*a9fa9459Szrj   D,  D,  P,  P,   P,  P,  P,  P,   /* 8   9   :   ;    <   =   >   ?   */
169*a9fa9459Szrj   P, XU, XU, XU,  XU, XU, XU,  U,   /* @   A   B   C    D   E   F   G   */
170*a9fa9459Szrj   U,  U,  U,  U,   U,  U,  U,  U,   /* H   I   J   K    L   M   N   O   */
171*a9fa9459Szrj   U,  U,  U,  U,   U,  U,  U,  U,   /* P   Q   R   S    T   U   V   W   */
172*a9fa9459Szrj   U,  U,  U,  P,   P,  P,  P,  _,   /* X   Y   Z   [    \   ]   ^   _   */
173*a9fa9459Szrj   P, XL, XL, XL,  XL, XL, XL,  L,   /* `   a   b   c    d   e   f   g   */
174*a9fa9459Szrj   L,  L,  L,  L,   L,  L,  L,  L,   /* h   i   j   k    l   m   n   o   */
175*a9fa9459Szrj   L,  L,  L,  L,   L,  L,  L,  L,   /* p   q   r   s    t   u   v   w   */
176*a9fa9459Szrj   L,  L,  L,  P,   P,  P,  P,  C,   /* x   y   z   {    |   }   ~   DEL */
177*a9fa9459Szrj 
178*a9fa9459Szrj   /* high half of unsigned char is locale-specific, so all tests are
179*a9fa9459Szrj      false in "C" locale */
180*a9fa9459Szrj   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
181*a9fa9459Szrj   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
182*a9fa9459Szrj   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
183*a9fa9459Szrj   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
184*a9fa9459Szrj 
185*a9fa9459Szrj   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
186*a9fa9459Szrj   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
187*a9fa9459Szrj   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
188*a9fa9459Szrj   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
189*a9fa9459Szrj };
190*a9fa9459Szrj 
191*a9fa9459Szrj const unsigned char _sch_tolower[256] =
192*a9fa9459Szrj {
193*a9fa9459Szrj    0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
194*a9fa9459Szrj   16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
195*a9fa9459Szrj   32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
196*a9fa9459Szrj   48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
197*a9fa9459Szrj   64,
198*a9fa9459Szrj 
199*a9fa9459Szrj   'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
200*a9fa9459Szrj   'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
201*a9fa9459Szrj 
202*a9fa9459Szrj   91, 92, 93, 94, 95, 96,
203*a9fa9459Szrj 
204*a9fa9459Szrj   'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
205*a9fa9459Szrj   'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
206*a9fa9459Szrj 
207*a9fa9459Szrj  123,124,125,126,127,
208*a9fa9459Szrj 
209*a9fa9459Szrj  128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
210*a9fa9459Szrj  144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
211*a9fa9459Szrj  160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
212*a9fa9459Szrj  176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
213*a9fa9459Szrj 
214*a9fa9459Szrj  192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
215*a9fa9459Szrj  208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
216*a9fa9459Szrj  224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
217*a9fa9459Szrj  240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
218*a9fa9459Szrj };
219*a9fa9459Szrj 
220*a9fa9459Szrj const unsigned char _sch_toupper[256] =
221*a9fa9459Szrj {
222*a9fa9459Szrj    0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
223*a9fa9459Szrj   16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
224*a9fa9459Szrj   32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
225*a9fa9459Szrj   48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
226*a9fa9459Szrj   64,
227*a9fa9459Szrj 
228*a9fa9459Szrj   'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
229*a9fa9459Szrj   'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
230*a9fa9459Szrj 
231*a9fa9459Szrj   91, 92, 93, 94, 95, 96,
232*a9fa9459Szrj 
233*a9fa9459Szrj   'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
234*a9fa9459Szrj   'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
235*a9fa9459Szrj 
236*a9fa9459Szrj  123,124,125,126,127,
237*a9fa9459Szrj 
238*a9fa9459Szrj  128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
239*a9fa9459Szrj  144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
240*a9fa9459Szrj  160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
241*a9fa9459Szrj  176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
242*a9fa9459Szrj 
243*a9fa9459Szrj  192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
244*a9fa9459Szrj  208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
245*a9fa9459Szrj  224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
246*a9fa9459Szrj  240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
247*a9fa9459Szrj };
248*a9fa9459Szrj 
249*a9fa9459Szrj #else
250*a9fa9459Szrj # if HOST_CHARSET == HOST_CHARSET_EBCDIC
251*a9fa9459Szrj   #error "FIXME: write tables for EBCDIC"
252*a9fa9459Szrj # else
253*a9fa9459Szrj   #error "Unrecognized host character set"
254*a9fa9459Szrj # endif
255*a9fa9459Szrj #endif
256