1*38fd1498Szrj /* <ctype.h> replacement macros. 2*38fd1498Szrj 3*38fd1498Szrj Copyright (C) 2000-2018 Free Software Foundation, Inc. 4*38fd1498Szrj Contributed by Zack Weinberg <zackw@stanford.edu>. 5*38fd1498Szrj 6*38fd1498Szrj This file is part of the libiberty library. 7*38fd1498Szrj Libiberty is free software; you can redistribute it and/or 8*38fd1498Szrj modify it under the terms of the GNU Library General Public 9*38fd1498Szrj License as published by the Free Software Foundation; either 10*38fd1498Szrj version 2 of the License, or (at your option) any later version. 11*38fd1498Szrj 12*38fd1498Szrj Libiberty is distributed in the hope that it will be useful, 13*38fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of 14*38fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15*38fd1498Szrj Library General Public License for more details. 16*38fd1498Szrj 17*38fd1498Szrj You should have received a copy of the GNU Library General Public 18*38fd1498Szrj License along with libiberty; see the file COPYING.LIB. If 19*38fd1498Szrj not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, 20*38fd1498Szrj Boston, MA 02110-1301, USA. */ 21*38fd1498Szrj 22*38fd1498Szrj /* 23*38fd1498Szrj 24*38fd1498Szrj @defvr Extension HOST_CHARSET 25*38fd1498Szrj This macro indicates the basic character set and encoding used by the 26*38fd1498Szrj host: more precisely, the encoding used for character constants in 27*38fd1498Szrj preprocessor @samp{#if} statements (the C "execution character set"). 28*38fd1498Szrj It is defined by @file{safe-ctype.h}, and will be an integer constant 29*38fd1498Szrj with one of the following values: 30*38fd1498Szrj 31*38fd1498Szrj @ftable @code 32*38fd1498Szrj @item HOST_CHARSET_UNKNOWN 33*38fd1498Szrj The host character set is unknown - that is, not one of the next two 34*38fd1498Szrj possibilities. 35*38fd1498Szrj 36*38fd1498Szrj @item HOST_CHARSET_ASCII 37*38fd1498Szrj The host character set is ASCII. 38*38fd1498Szrj 39*38fd1498Szrj @item HOST_CHARSET_EBCDIC 40*38fd1498Szrj The host character set is some variant of EBCDIC. (Only one of the 41*38fd1498Szrj nineteen EBCDIC varying characters is tested; exercise caution.) 42*38fd1498Szrj @end ftable 43*38fd1498Szrj @end defvr 44*38fd1498Szrj 45*38fd1498Szrj @deffn Extension ISALPHA (@var{c}) 46*38fd1498Szrj @deffnx Extension ISALNUM (@var{c}) 47*38fd1498Szrj @deffnx Extension ISBLANK (@var{c}) 48*38fd1498Szrj @deffnx Extension ISCNTRL (@var{c}) 49*38fd1498Szrj @deffnx Extension ISDIGIT (@var{c}) 50*38fd1498Szrj @deffnx Extension ISGRAPH (@var{c}) 51*38fd1498Szrj @deffnx Extension ISLOWER (@var{c}) 52*38fd1498Szrj @deffnx Extension ISPRINT (@var{c}) 53*38fd1498Szrj @deffnx Extension ISPUNCT (@var{c}) 54*38fd1498Szrj @deffnx Extension ISSPACE (@var{c}) 55*38fd1498Szrj @deffnx Extension ISUPPER (@var{c}) 56*38fd1498Szrj @deffnx Extension ISXDIGIT (@var{c}) 57*38fd1498Szrj 58*38fd1498Szrj These twelve macros are defined by @file{safe-ctype.h}. Each has the 59*38fd1498Szrj same meaning as the corresponding macro (with name in lowercase) 60*38fd1498Szrj defined by the standard header @file{ctype.h}. For example, 61*38fd1498Szrj @code{ISALPHA} returns true for alphabetic characters and false for 62*38fd1498Szrj others. However, there are two differences between these macros and 63*38fd1498Szrj those provided by @file{ctype.h}: 64*38fd1498Szrj 65*38fd1498Szrj @itemize @bullet 66*38fd1498Szrj @item These macros are guaranteed to have well-defined behavior for all 67*38fd1498Szrj values representable by @code{signed char} and @code{unsigned char}, and 68*38fd1498Szrj for @code{EOF}. 69*38fd1498Szrj 70*38fd1498Szrj @item These macros ignore the current locale; they are true for these 71*38fd1498Szrj fixed sets of characters: 72*38fd1498Szrj @multitable {@code{XDIGIT}} {yada yada yada yada yada yada yada yada} 73*38fd1498Szrj @item @code{ALPHA} @tab @kbd{A-Za-z} 74*38fd1498Szrj @item @code{ALNUM} @tab @kbd{A-Za-z0-9} 75*38fd1498Szrj @item @code{BLANK} @tab @kbd{space tab} 76*38fd1498Szrj @item @code{CNTRL} @tab @code{!PRINT} 77*38fd1498Szrj @item @code{DIGIT} @tab @kbd{0-9} 78*38fd1498Szrj @item @code{GRAPH} @tab @code{ALNUM || PUNCT} 79*38fd1498Szrj @item @code{LOWER} @tab @kbd{a-z} 80*38fd1498Szrj @item @code{PRINT} @tab @code{GRAPH ||} @kbd{space} 81*38fd1498Szrj @item @code{PUNCT} @tab @kbd{`~!@@#$%^&*()_-=+[@{]@}\|;:'",<.>/?} 82*38fd1498Szrj @item @code{SPACE} @tab @kbd{space tab \n \r \f \v} 83*38fd1498Szrj @item @code{UPPER} @tab @kbd{A-Z} 84*38fd1498Szrj @item @code{XDIGIT} @tab @kbd{0-9A-Fa-f} 85*38fd1498Szrj @end multitable 86*38fd1498Szrj 87*38fd1498Szrj Note that, if the host character set is ASCII or a superset thereof, 88*38fd1498Szrj all these macros will return false for all values of @code{char} outside 89*38fd1498Szrj the range of 7-bit ASCII. In particular, both ISPRINT and ISCNTRL return 90*38fd1498Szrj false for characters with numeric values from 128 to 255. 91*38fd1498Szrj @end itemize 92*38fd1498Szrj @end deffn 93*38fd1498Szrj 94*38fd1498Szrj @deffn Extension ISIDNUM (@var{c}) 95*38fd1498Szrj @deffnx Extension ISIDST (@var{c}) 96*38fd1498Szrj @deffnx Extension IS_VSPACE (@var{c}) 97*38fd1498Szrj @deffnx Extension IS_NVSPACE (@var{c}) 98*38fd1498Szrj @deffnx Extension IS_SPACE_OR_NUL (@var{c}) 99*38fd1498Szrj @deffnx Extension IS_ISOBASIC (@var{c}) 100*38fd1498Szrj These six macros are defined by @file{safe-ctype.h} and provide 101*38fd1498Szrj additional character classes which are useful when doing lexical 102*38fd1498Szrj analysis of C or similar languages. They are true for the following 103*38fd1498Szrj sets of characters: 104*38fd1498Szrj 105*38fd1498Szrj @multitable {@code{SPACE_OR_NUL}} {yada yada yada yada yada yada yada yada} 106*38fd1498Szrj @item @code{IDNUM} @tab @kbd{A-Za-z0-9_} 107*38fd1498Szrj @item @code{IDST} @tab @kbd{A-Za-z_} 108*38fd1498Szrj @item @code{VSPACE} @tab @kbd{\r \n} 109*38fd1498Szrj @item @code{NVSPACE} @tab @kbd{space tab \f \v \0} 110*38fd1498Szrj @item @code{SPACE_OR_NUL} @tab @code{VSPACE || NVSPACE} 111*38fd1498Szrj @item @code{ISOBASIC} @tab @code{VSPACE || NVSPACE || PRINT} 112*38fd1498Szrj @end multitable 113*38fd1498Szrj @end deffn 114*38fd1498Szrj 115*38fd1498Szrj */ 116*38fd1498Szrj 117*38fd1498Szrj #include "ansidecl.h" 118*38fd1498Szrj #include <safe-ctype.h> 119*38fd1498Szrj #include <stdio.h> /* for EOF */ 120*38fd1498Szrj 121*38fd1498Szrj #if EOF != -1 122*38fd1498Szrj #error "<safe-ctype.h> requires EOF == -1" 123*38fd1498Szrj #endif 124*38fd1498Szrj 125*38fd1498Szrj /* Shorthand */ 126*38fd1498Szrj #define bl _sch_isblank 127*38fd1498Szrj #define cn _sch_iscntrl 128*38fd1498Szrj #define di _sch_isdigit 129*38fd1498Szrj #define is _sch_isidst 130*38fd1498Szrj #define lo _sch_islower 131*38fd1498Szrj #define nv _sch_isnvsp 132*38fd1498Szrj #define pn _sch_ispunct 133*38fd1498Szrj #define pr _sch_isprint 134*38fd1498Szrj #define sp _sch_isspace 135*38fd1498Szrj #define up _sch_isupper 136*38fd1498Szrj #define vs _sch_isvsp 137*38fd1498Szrj #define xd _sch_isxdigit 138*38fd1498Szrj 139*38fd1498Szrj /* Masks. */ 140*38fd1498Szrj #define L (const unsigned short) (lo|is |pr) /* lower case letter */ 141*38fd1498Szrj #define XL (const unsigned short) (lo|is|xd|pr) /* lowercase hex digit */ 142*38fd1498Szrj #define U (const unsigned short) (up|is |pr) /* upper case letter */ 143*38fd1498Szrj #define XU (const unsigned short) (up|is|xd|pr) /* uppercase hex digit */ 144*38fd1498Szrj #define D (const unsigned short) (di |xd|pr) /* decimal digit */ 145*38fd1498Szrj #define P (const unsigned short) (pn |pr) /* punctuation */ 146*38fd1498Szrj #define _ (const unsigned short) (pn|is |pr) /* underscore */ 147*38fd1498Szrj 148*38fd1498Szrj #define C (const unsigned short) ( cn) /* control character */ 149*38fd1498Szrj #define Z (const unsigned short) (nv |cn) /* NUL */ 150*38fd1498Szrj #define M (const unsigned short) (nv|sp |cn) /* cursor movement: \f \v */ 151*38fd1498Szrj #define V (const unsigned short) (vs|sp |cn) /* vertical space: \r \n */ 152*38fd1498Szrj #define T (const unsigned short) (nv|sp|bl|cn) /* tab */ 153*38fd1498Szrj #define S (const unsigned short) (nv|sp|bl|pr) /* space */ 154*38fd1498Szrj 155*38fd1498Szrj /* Are we ASCII? */ 156*38fd1498Szrj #if HOST_CHARSET == HOST_CHARSET_ASCII 157*38fd1498Szrj 158*38fd1498Szrj const unsigned short _sch_istable[256] = 159*38fd1498Szrj { 160*38fd1498Szrj Z, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ 161*38fd1498Szrj C, T, V, M, M, V, C, C, /* BS HT LF VT FF CR SO SI */ 162*38fd1498Szrj C, C, C, C, C, C, C, C, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ 163*38fd1498Szrj C, C, C, C, C, C, C, C, /* CAN EM SUB ESC FS GS RS US */ 164*38fd1498Szrj S, P, P, P, P, P, P, P, /* SP ! " # $ % & ' */ 165*38fd1498Szrj P, P, P, P, P, P, P, P, /* ( ) * + , - . / */ 166*38fd1498Szrj D, D, D, D, D, D, D, D, /* 0 1 2 3 4 5 6 7 */ 167*38fd1498Szrj D, D, P, P, P, P, P, P, /* 8 9 : ; < = > ? */ 168*38fd1498Szrj P, XU, XU, XU, XU, XU, XU, U, /* @ A B C D E F G */ 169*38fd1498Szrj U, U, U, U, U, U, U, U, /* H I J K L M N O */ 170*38fd1498Szrj U, U, U, U, U, U, U, U, /* P Q R S T U V W */ 171*38fd1498Szrj U, U, U, P, P, P, P, _, /* X Y Z [ \ ] ^ _ */ 172*38fd1498Szrj P, XL, XL, XL, XL, XL, XL, L, /* ` a b c d e f g */ 173*38fd1498Szrj L, L, L, L, L, L, L, L, /* h i j k l m n o */ 174*38fd1498Szrj L, L, L, L, L, L, L, L, /* p q r s t u v w */ 175*38fd1498Szrj L, L, L, P, P, P, P, C, /* x y z { | } ~ DEL */ 176*38fd1498Szrj 177*38fd1498Szrj /* high half of unsigned char is locale-specific, so all tests are 178*38fd1498Szrj false in "C" locale */ 179*38fd1498Szrj 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180*38fd1498Szrj 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 181*38fd1498Szrj 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 182*38fd1498Szrj 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 183*38fd1498Szrj 184*38fd1498Szrj 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 185*38fd1498Szrj 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 186*38fd1498Szrj 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 187*38fd1498Szrj 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 188*38fd1498Szrj }; 189*38fd1498Szrj 190*38fd1498Szrj const unsigned char _sch_tolower[256] = 191*38fd1498Szrj { 192*38fd1498Szrj 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 193*38fd1498Szrj 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 194*38fd1498Szrj 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 195*38fd1498Szrj 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 196*38fd1498Szrj 64, 197*38fd1498Szrj 198*38fd1498Szrj 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 199*38fd1498Szrj 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 200*38fd1498Szrj 201*38fd1498Szrj 91, 92, 93, 94, 95, 96, 202*38fd1498Szrj 203*38fd1498Szrj 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 204*38fd1498Szrj 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 205*38fd1498Szrj 206*38fd1498Szrj 123,124,125,126,127, 207*38fd1498Szrj 208*38fd1498Szrj 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143, 209*38fd1498Szrj 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159, 210*38fd1498Szrj 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175, 211*38fd1498Szrj 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191, 212*38fd1498Szrj 213*38fd1498Szrj 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207, 214*38fd1498Szrj 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223, 215*38fd1498Szrj 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239, 216*38fd1498Szrj 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255, 217*38fd1498Szrj }; 218*38fd1498Szrj 219*38fd1498Szrj const unsigned char _sch_toupper[256] = 220*38fd1498Szrj { 221*38fd1498Szrj 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 222*38fd1498Szrj 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 223*38fd1498Szrj 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 224*38fd1498Szrj 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 225*38fd1498Szrj 64, 226*38fd1498Szrj 227*38fd1498Szrj 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 228*38fd1498Szrj 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 229*38fd1498Szrj 230*38fd1498Szrj 91, 92, 93, 94, 95, 96, 231*38fd1498Szrj 232*38fd1498Szrj 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 233*38fd1498Szrj 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 234*38fd1498Szrj 235*38fd1498Szrj 123,124,125,126,127, 236*38fd1498Szrj 237*38fd1498Szrj 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143, 238*38fd1498Szrj 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159, 239*38fd1498Szrj 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175, 240*38fd1498Szrj 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191, 241*38fd1498Szrj 242*38fd1498Szrj 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207, 243*38fd1498Szrj 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223, 244*38fd1498Szrj 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239, 245*38fd1498Szrj 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255, 246*38fd1498Szrj }; 247*38fd1498Szrj 248*38fd1498Szrj #else 249*38fd1498Szrj # if HOST_CHARSET == HOST_CHARSET_EBCDIC 250*38fd1498Szrj #error "FIXME: write tables for EBCDIC" 251*38fd1498Szrj # else 252*38fd1498Szrj #error "Unrecognized host character set" 253*38fd1498Szrj # endif 254*38fd1498Szrj #endif 255