1 /* 2 * char_attr.h - Various character attributes 3 * 4 * Copyright (c) 2011-2020 Shiro Kawai <shiro@acm.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the authors nor the names of its contributors 18 * may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 27 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* This file is not a part of public Gauche C API. Should be included 35 only for Gauche internal functions. */ 36 #ifndef GAUCHE_CHAR_ATTR_H 37 #define GAUCHE_CHAR_ATTR_H 38 39 /* Unicode general categories */ 40 enum { 41 SCM_CHAR_CATEGORY_Lu, /* Uppercase_Letter */ 42 SCM_CHAR_CATEGORY_Ll, /* Lowercase_Letter */ 43 SCM_CHAR_CATEGORY_Lt, /* Titlecase_Letter */ 44 SCM_CHAR_CATEGORY_Lm, /* Modifier_Letter */ 45 SCM_CHAR_CATEGORY_Lo, /* Other_Letter */ 46 SCM_CHAR_CATEGORY_Mn, /* Nonspacing_Mark */ 47 SCM_CHAR_CATEGORY_Mc, /* Spacing_Mark */ 48 SCM_CHAR_CATEGORY_Me, /* Enclosing_Mark */ 49 SCM_CHAR_CATEGORY_Nd, /* Decimal_Number */ 50 SCM_CHAR_CATEGORY_Nl, /* Letter_Number */ 51 SCM_CHAR_CATEGORY_No, /* Other_Number */ 52 SCM_CHAR_CATEGORY_Pc, /* Connector_Punctuation */ 53 SCM_CHAR_CATEGORY_Pd, /* Dash_Punctuation */ 54 SCM_CHAR_CATEGORY_Ps, /* Open_Punctuation */ 55 SCM_CHAR_CATEGORY_Pe, /* Close_Punctuation */ 56 SCM_CHAR_CATEGORY_Pi, /* Initial_Punctuation */ 57 SCM_CHAR_CATEGORY_Pf, /* Final_Punctuation */ 58 SCM_CHAR_CATEGORY_Po, /* Other_Punctuation */ 59 SCM_CHAR_CATEGORY_Sm, /* Math_Symbol */ 60 SCM_CHAR_CATEGORY_Sc, /* Currency_Symbol */ 61 SCM_CHAR_CATEGORY_Sk, /* Modifier_Symbol */ 62 SCM_CHAR_CATEGORY_So, /* Other_Symbol */ 63 SCM_CHAR_CATEGORY_Zs, /* Space_Separator */ 64 SCM_CHAR_CATEGORY_Zl, /* Line_Separator */ 65 SCM_CHAR_CATEGORY_Zp, /* Paragraph_Separator */ 66 SCM_CHAR_CATEGORY_Cc, /* Control */ 67 SCM_CHAR_CATEGORY_Cf, /* Format */ 68 SCM_CHAR_CATEGORY_Cs, /* Surrogate */ 69 SCM_CHAR_CATEGORY_Co, /* Private_Use */ 70 SCM_CHAR_CATEGORY_Cn /* Unassigned */ 71 }; 72 73 #define SCM_CHAR_CATEGORY_MASK (0x1f) 74 75 /* Higher two bits of a category byte are used for these flags 76 00xxxxxx - non-alphabetic char 77 01xxxxxx - lowercase alphabetic char 78 10xxxxxx - uppercase alphabetic char 79 11xxxxxx - caseless or titlecase alphabetic char 80 */ 81 #define SCM_CHAR_ALPHA_MASK (0xc0u) 82 #define SCM_CHAR_ALPHABETIC_BITS (0xc0u) 83 #define SCM_CHAR_UPPERCASE_BITS (0x80u) 84 #define SCM_CHAR_LOWERCASE_BITS (0x40u) 85 86 /* Case mappings */ 87 88 /* In Unicode 6.0, the max length of full case mapping is 3, but we reserve 89 one more just in case for future ABI. */ 90 #define SCM_CHAR_FULL_CASE_MAPPING_SIZE 4 91 92 typedef struct { 93 int to_upper_simple; /* offset to add to produce uppercase */ 94 int to_lower_simple; /* offset to add to produce lowercase */ 95 int to_title_simple; /* offset to add to produce titlecase */ 96 ScmChar to_upper_full[SCM_CHAR_FULL_CASE_MAPPING_SIZE]; 97 ScmChar to_lower_full[SCM_CHAR_FULL_CASE_MAPPING_SIZE]; 98 ScmChar to_title_full[SCM_CHAR_FULL_CASE_MAPPING_SIZE]; 99 } ScmCharCaseMap; 100 101 /* Internal function to access case map info */ 102 SCM_EXTERN const ScmCharCaseMap *Scm__CharCaseMap(ScmChar ch, 103 ScmCharCaseMap *buf, 104 int full); 105 106 /* Casemap entry value */ 107 #define SCM_CHAR_NO_CASE_MAPPING 0xffff 108 #define SCM_CHAR_CASEMAP_TOLOWER(off) (((unsigned int)(off))&0x3fff) 109 #define SCM_CHAR_CASEMAP_TOUPPER(off) ((((unsigned int)(off))&0x3fff)|0x4000) 110 #define SCM_CHAR_CASEMAP_EXTENDED(off) ((((unsigned int)(off))&0x3fff)|0x8000) 111 112 113 #endif /*GAUCHE_CHAR_ATTR_H*/ 114