1 /* 2 * charP.h - Character-related API private header 3 * 4 * Copyright (c) 2018-2020 Shiro Kawai <shiro@acm.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the authors nor the names of its contributors 18 * may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 27 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef GAUCHE_PRIV_CHARP_H 35 #define GAUCHE_PRIV_CHARP_H 36 37 /* In char.c; called by gauche.charconv */ 38 SCM_EXTERN void Scm__InstallCharconvHooks(ScmChar (*u2c)(int), 39 int (*c2u)(ScmChar)); 40 41 /* Predefined charsets 42 The enum value may change across versions, so we keep them private. 43 44 The charset object can be obtained by Scm_GetStandardCharSet(id) 45 where ID is either the enum defined below, or its negation for 46 complement charset. 47 */ 48 49 enum { 50 /* Unicode General Categories */ 51 SCM_CHAR_SET_L = 1, /* Letters */ 52 SCM_CHAR_SET_LC, /* Cased Letters (Lu + Ll + Lt) */ 53 SCM_CHAR_SET_Lu, /* Letter, uppercase */ 54 SCM_CHAR_SET_Ll, /* Letter, lowercase */ 55 SCM_CHAR_SET_Lt, /* Letter, titlecase */ 56 SCM_CHAR_SET_Lm, /* Letter, modifier */ 57 SCM_CHAR_SET_Lo, /* Letter, other */ 58 SCM_CHAR_SET_M, /* Marks */ 59 SCM_CHAR_SET_Mn, /* Mark, nonspacing */ 60 SCM_CHAR_SET_Mc, /* Mark, combining */ 61 SCM_CHAR_SET_Me, /* Mark, enclosing */ 62 SCM_CHAR_SET_N, /* Numbers */ 63 SCM_CHAR_SET_Nd, /* Number, decimal digit */ 64 SCM_CHAR_SET_Nl, /* Number, letter */ 65 SCM_CHAR_SET_No, /* Number, other */ 66 SCM_CHAR_SET_P, /* Punctuations */ 67 SCM_CHAR_SET_Pc, /* Punctuation, connector */ 68 SCM_CHAR_SET_Pd, /* Punctuation, dash */ 69 SCM_CHAR_SET_Ps, /* Punctuation, open */ 70 SCM_CHAR_SET_Pe, /* Punctuation, close */ 71 SCM_CHAR_SET_Pi, /* Punctuation, initial quote */ 72 SCM_CHAR_SET_Pf, /* Punctuation, final quote */ 73 SCM_CHAR_SET_Po, /* Punctuation, other */ 74 SCM_CHAR_SET_S, /* Symbols */ 75 SCM_CHAR_SET_Sm, /* Symbol, math */ 76 SCM_CHAR_SET_Sc, /* Symbol, currency */ 77 SCM_CHAR_SET_Sk, /* Symbol, modifier */ 78 SCM_CHAR_SET_So, /* Symbol, other */ 79 SCM_CHAR_SET_Z, /* Separators */ 80 SCM_CHAR_SET_Zs, /* Separator, space */ 81 SCM_CHAR_SET_Zl, /* Separator, line */ 82 SCM_CHAR_SET_Zp, /* Separator, paragraph */ 83 SCM_CHAR_SET_C, /* Others */ 84 SCM_CHAR_SET_Cc, /* Other, control */ 85 SCM_CHAR_SET_Cf, /* Other, format */ 86 SCM_CHAR_SET_Cs, /* Other, surrogate */ 87 SCM_CHAR_SET_Co, /* Other, private use */ 88 SCM_CHAR_SET_Cn, /* Other, not assigned */ 89 /* SRFI-14 sets */ 90 SCM_CHAR_SET_LOWER, /* Ll */ 91 SCM_CHAR_SET_ASCII_LOWER, 92 SCM_CHAR_SET_UPPER, /* Lu */ 93 SCM_CHAR_SET_ASCII_UPPER, 94 SCM_CHAR_SET_TITLE, /* Lt */ 95 SCM_CHAR_SET_LETTER, /* Lu|Ll|Lt|Lm|Lo */ 96 SCM_CHAR_SET_ASCII_LETTER, /* intersection(LETTER, ASCII) == A-Za-z */ 97 SCM_CHAR_SET_DIGIT, /* Nd */ 98 SCM_CHAR_SET_ASCII_DIGIT, /* [0-9] */ 99 SCM_CHAR_SET_LETTER_DIGIT, /* L*|Nd */ 100 SCM_CHAR_SET_ASCII_LETTER_DIGIT, 101 SCM_CHAR_SET_GRAPHIC, /* L*|N*|P*|S* */ 102 SCM_CHAR_SET_ASCII_GRAPHIC, 103 SCM_CHAR_SET_PRINTING, /* L*|N*|P*|S*|Z* */ 104 SCM_CHAR_SET_ASCII_PRINTING, 105 SCM_CHAR_SET_WHITESPACE, /* Z*|\u0009-\u000d */ 106 SCM_CHAR_SET_ASCII_WHITESPACE, /* \u0020, \u0009-\u000d */ 107 SCM_CHAR_SET_ISO_CONTROL, /* Cc */ 108 SCM_CHAR_SET_ASCII_CONTROL, 109 SCM_CHAR_SET_PUNCTUATION, /* P* */ 110 SCM_CHAR_SET_ASCII_PUNCTUATION, 111 SCM_CHAR_SET_SYMBOL, /* S* */ 112 SCM_CHAR_SET_ASCII_SYMBOL, 113 SCM_CHAR_SET_HEX_DIGIT, /* 0-9A-Fa-f */ 114 SCM_CHAR_SET_BLANK, /* Zs|\u0009 */ 115 SCM_CHAR_SET_ASCII_BLANK, /* \u0020,\u0009 */ 116 SCM_CHAR_SET_ASCII, /* \u0000-\u007f */ 117 SCM_CHAR_SET_EMPTY, 118 SCM_CHAR_SET_FULL, 119 /* internal use: word constituent chars */ 120 SCM_CHAR_SET_WORD, 121 SCM_CHAR_SET_ASCII_WORD, 122 SCM_CHAR_SET_NUM_PREDEFINED_SETS 123 }; 124 SCM_EXTERN ScmObj Scm_GetStandardCharSet(int id); 125 126 #endif /*GAUCHE_PRIV_CHARP_H*/ 127