1 /*
2  * charP.h - Character-related API private header
3  *
4  *   Copyright (c) 2018-2020  Shiro Kawai  <shiro@acm.org>
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *   1. Redistributions of source code must retain the above copyright
11  *      notice, this list of conditions and the following disclaimer.
12  *
13  *   2. Redistributions in binary form must reproduce the above copyright
14  *      notice, this list of conditions and the following disclaimer in the
15  *      documentation and/or other materials provided with the distribution.
16  *
17  *   3. Neither the name of the authors nor the names of its contributors
18  *      may be used to endorse or promote products derived from this
19  *      software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27  *   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28  *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29  *   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30  *   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  *   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef GAUCHE_PRIV_CHARP_H
35 #define GAUCHE_PRIV_CHARP_H
36 
37 /* In char.c; called by gauche.charconv */
38 SCM_EXTERN void Scm__InstallCharconvHooks(ScmChar (*u2c)(int),
39                                           int (*c2u)(ScmChar));
40 
41 /* Predefined charsets
42    The enum value may change across versions, so we keep them private.
43 
44    The charset object can be obtained by Scm_GetStandardCharSet(id)
45    where ID is either the enum defined below, or its negation for
46    complement charset.
47  */
48 
49 enum {
50     /* Unicode General Categories */
51     SCM_CHAR_SET_L = 1,         /* Letters */
52     SCM_CHAR_SET_LC,            /* Cased Letters (Lu + Ll + Lt) */
53     SCM_CHAR_SET_Lu,            /* Letter, uppercase */
54     SCM_CHAR_SET_Ll,            /* Letter, lowercase */
55     SCM_CHAR_SET_Lt,            /* Letter, titlecase */
56     SCM_CHAR_SET_Lm,            /* Letter, modifier */
57     SCM_CHAR_SET_Lo,            /* Letter, other */
58     SCM_CHAR_SET_M,             /* Marks */
59     SCM_CHAR_SET_Mn,            /* Mark, nonspacing */
60     SCM_CHAR_SET_Mc,            /* Mark, combining */
61     SCM_CHAR_SET_Me,            /* Mark, enclosing */
62     SCM_CHAR_SET_N,             /* Numbers */
63     SCM_CHAR_SET_Nd,            /* Number, decimal digit */
64     SCM_CHAR_SET_Nl,            /* Number, letter */
65     SCM_CHAR_SET_No,            /* Number, other */
66     SCM_CHAR_SET_P,             /* Punctuations */
67     SCM_CHAR_SET_Pc,            /* Punctuation, connector */
68     SCM_CHAR_SET_Pd,            /* Punctuation, dash */
69     SCM_CHAR_SET_Ps,            /* Punctuation, open */
70     SCM_CHAR_SET_Pe,            /* Punctuation, close */
71     SCM_CHAR_SET_Pi,            /* Punctuation, initial quote */
72     SCM_CHAR_SET_Pf,            /* Punctuation, final quote */
73     SCM_CHAR_SET_Po,            /* Punctuation, other */
74     SCM_CHAR_SET_S,             /* Symbols */
75     SCM_CHAR_SET_Sm,            /* Symbol, math */
76     SCM_CHAR_SET_Sc,            /* Symbol, currency */
77     SCM_CHAR_SET_Sk,            /* Symbol, modifier */
78     SCM_CHAR_SET_So,            /* Symbol, other */
79     SCM_CHAR_SET_Z,             /* Separators */
80     SCM_CHAR_SET_Zs,            /* Separator, space */
81     SCM_CHAR_SET_Zl,            /* Separator, line */
82     SCM_CHAR_SET_Zp,            /* Separator, paragraph */
83     SCM_CHAR_SET_C,             /* Others */
84     SCM_CHAR_SET_Cc,            /* Other, control */
85     SCM_CHAR_SET_Cf,            /* Other, format */
86     SCM_CHAR_SET_Cs,            /* Other, surrogate */
87     SCM_CHAR_SET_Co,            /* Other, private use */
88     SCM_CHAR_SET_Cn,            /* Other, not assigned */
89     /* SRFI-14 sets */
90     SCM_CHAR_SET_LOWER,         /* Ll */
91     SCM_CHAR_SET_ASCII_LOWER,
92     SCM_CHAR_SET_UPPER,         /* Lu */
93     SCM_CHAR_SET_ASCII_UPPER,
94     SCM_CHAR_SET_TITLE,         /* Lt */
95     SCM_CHAR_SET_LETTER,        /* Lu|Ll|Lt|Lm|Lo */
96     SCM_CHAR_SET_ASCII_LETTER,  /* intersection(LETTER, ASCII) == A-Za-z */
97     SCM_CHAR_SET_DIGIT,         /* Nd */
98     SCM_CHAR_SET_ASCII_DIGIT,   /* [0-9] */
99     SCM_CHAR_SET_LETTER_DIGIT,  /* L*|Nd */
100     SCM_CHAR_SET_ASCII_LETTER_DIGIT,
101     SCM_CHAR_SET_GRAPHIC,       /* L*|N*|P*|S* */
102     SCM_CHAR_SET_ASCII_GRAPHIC,
103     SCM_CHAR_SET_PRINTING,      /* L*|N*|P*|S*|Z* */
104     SCM_CHAR_SET_ASCII_PRINTING,
105     SCM_CHAR_SET_WHITESPACE,    /* Z*|\u0009-\u000d */
106     SCM_CHAR_SET_ASCII_WHITESPACE, /* \u0020, \u0009-\u000d */
107     SCM_CHAR_SET_ISO_CONTROL,   /* Cc */
108     SCM_CHAR_SET_ASCII_CONTROL,
109     SCM_CHAR_SET_PUNCTUATION,   /* P* */
110     SCM_CHAR_SET_ASCII_PUNCTUATION,
111     SCM_CHAR_SET_SYMBOL,        /* S* */
112     SCM_CHAR_SET_ASCII_SYMBOL,
113     SCM_CHAR_SET_HEX_DIGIT,     /* 0-9A-Fa-f */
114     SCM_CHAR_SET_BLANK,         /* Zs|\u0009 */
115     SCM_CHAR_SET_ASCII_BLANK,   /* \u0020,\u0009 */
116     SCM_CHAR_SET_ASCII,         /* \u0000-\u007f */
117     SCM_CHAR_SET_EMPTY,
118     SCM_CHAR_SET_FULL,
119     /* internal use: word constituent chars */
120     SCM_CHAR_SET_WORD,
121     SCM_CHAR_SET_ASCII_WORD,
122     SCM_CHAR_SET_NUM_PREDEFINED_SETS
123 };
124 SCM_EXTERN ScmObj Scm_GetStandardCharSet(int id);
125 
126 #endif /*GAUCHE_PRIV_CHARP_H*/
127