1 /** 2 * Yudit Unicode Editor Source File 3 * 4 * GNU Copyright (C) 1997-2006 Gaspar Sinai <gaspar@yudit.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License, version 2, 8 * dated June 1991. See file COPYYING for details. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 */ 19 #ifndef SCharClass_h 20 #define SCharClass_h 21 22 #include "stoolkit/STypes.h" 23 24 typedef enum 25 { 26 SD_CC_Xx=0, 27 SD_CC_Lu, // 01 Lu Letter, Uppercase 28 SD_CC_Ll, // 02 Ll Letter, Lowercase 29 SD_CC_Lt, // 03 Lt Letter, Titlecase 30 SD_CC_Mn, // 04 Mn Mark, Non-Spacing 31 SD_CC_Mc, // 05 Mc Mark, Spacing Combining 32 SD_CC_Me, // 06 Me Mark, Enclosing 33 SD_CC_Nd, // 07 Nd Number, Decimal Digit 34 SD_CC_Nl, // 08 Nl Number, Letter 35 SD_CC_No, // 09 No Number, Other 36 SD_CC_Zs, // 0A Zs Separator, Space 37 SD_CC_Zl, // 0B Zl Separator, Line 38 SD_CC_Zp, // 0C Zp Separator, Paragraph 39 SD_CC_Cc, // 0D Cc Other, Control 40 SD_CC_Cf, // 0E Cf Other, Format 41 SD_CC_Cs, // 0F Cs Other, Surrogate 42 SD_CC_Co, // 10 Co Other, Private Use 43 SD_CC_Cn, // 11 Cn Other, Not Assigned 44 SD_CC_Lm, // 12 Lm Letter, Modifier 45 SD_CC_Lo, // 13 Lo Letter, Other 46 SD_CC_Pc, // 14 Pc Punctuation, Connector 47 SD_CC_Pd, // 15 Pd Punctuation, Dash 48 SD_CC_Ps, // 16 Ps Punctuation, Open 49 SD_CC_Pe, // 17 Pe Punctuation, Close 50 SD_CC_Pi, // 18 Pi Punctuation, Initial quote 51 // (may behave like Ps or Pe depending on usage) 52 SD_CC_Pf, // 19 Pf Punctuation, Final quote 53 // (may behave like Ps or Pe depending on usage) 54 SD_CC_Po, // 1A Po Punctuation, Other 55 SD_CC_Sm, // 1B Sm Symbol, Math 56 SD_CC_Sc, // 1C Sc Symbol, Currency 57 SD_CC_Sk, // 1D Sk Symbol, Modifier 58 SD_CC_So, // 1E So Symbol, Other 59 SD_CC_MAX // No more 60 } SD_CharClass; 61 62 /* BiDi class */ 63 typedef enum 64 { 65 /* strong */ 66 SD_BC_XX=0, 67 SD_BC_L, // Left-to-Right 68 SD_BC_LRE, // Left-to-Right Embedding 69 SD_BC_LRO, // Left-to-Right Override 70 SD_BC_R, // Right-to-Left 71 SD_BC_AL, // Right-to-Left Arabic 72 SD_BC_RLE, // Right-to-Left Embedding 73 SD_BC_RLO, // Right-to-Left Override 74 75 /* weak */ 76 SD_BC_PDF, // Pop Directional Format 77 SD_BC_EN, // European Number 78 SD_BC_ES, // European Number Separator 79 SD_BC_ET, // European Number Terminator 80 SD_BC_AN, // Arabic Number 81 SD_BC_CS, // Common Number Separator 82 SD_BC_NSM, // Non-Spacing Mark 83 SD_BC_BN, // Boundary Neutral 84 85 /* neutral */ 86 SD_BC_B, // Paragraph Separator 87 SD_BC_S, // Segment Separator 88 SD_BC_WS, // Whitespace 89 SD_BC_ON, // Other Neutrals 90 SD_BC_MAX 91 92 } SD_BiDiClass; 93 94 #define SD_CD_ZWSP 0x200B /* Zero width space */ 95 #define SD_CD_ZWNJ 0x200C /* Zs */ 96 #define SD_CD_ZWJ 0x200D /* Cf */ 97 #define SD_CD_ARABIC_TATWEEL 0x0640 98 #define SD_CD_SYRIAC_LETTER_DALATH 0x0715 99 #define SD_CD_SYRIAC_LETTER_DOTLESS_DALATH 0x0716 100 #define SD_CD_SYRIAC_LETTER_RISH 0x072A 101 102 #define SD_CD_CTRL 0 103 #define SD_CD_LF ((SS_UCS4)'\n') 104 #define SD_CD_FF ((SS_UCS4)'\f') 105 #define SD_CD_CR ((SS_UCS4)'\r') 106 #define SD_CD_TAB ((SS_UCS4)'\t') 107 #define SD_CD_LS 0x2028 /* line separator */ 108 #define SD_CD_PS 0x2029 /* paragraph separator */ 109 110 #define SD_CD_LRO 0x202D /* left- to-right override */ 111 #define SD_CD_RLO 0x202E /* right-to-left override */ 112 #define SD_CD_LRE 0x202A /* left-to-right embedding */ 113 #define SD_CD_RLE 0x202B /* right-to-left embedding */ 114 #define SD_CD_PDF 0x202C /* pop directional format */ 115 116 #define SD_CD_LRM 0x200E /* LEFT-TO-RIGHT MARK */ 117 #define SD_CD_RLM 0x200F /* RIGHT-TO-LEFT MARK */ 118 119 /** 120 * Line breaking characters in utf-8 121 * NLF = one of SS_LB_DOS SS_LB_MAC SS_LB_UNIX SS_LB_NEL. 122 */ 123 #define SS_LB_DOS "\r\n" 124 #define SS_LB_MAC "\r" 125 #define SS_LB_UNIX "\n" 126 #define SS_LB_LS "\342\200\250" 127 #define SS_LB_PS "\342\200\251" /* PARAGRAPH BREAKING */ 128 #define SS_LB_FF "\f" 129 130 #define SS_LB_LRO "\342\200\255" 131 #define SS_LB_RLO "\342\200\256" 132 #define SS_LB_LRE "\342\200\252" 133 #define SS_LB_RLE "\342\200\253" 134 #define SS_LB_PDF "\342\200\254" 135 136 /** 137 * These line breaking chars are not supported here now. 138 */ 139 #define SS_LB_NEL "\702\102" 140 #define SS_LB_P_VT "\013" /* PARAGRAPH BREAKING */ 141 #define SS_LB_P_FF "\014" /* PARAGRAPH BREAKING */ 142 143 144 extern const char* ssCharClass[SD_CC_MAX]; 145 extern const char* ssBiDiClass[SD_BC_MAX]; 146 147 SD_CharClass getCharClass(SS_UCS4 in); 148 SD_BiDiClass getBiDiClass(SS_UCS4 in); 149 SS_UCS4 getMirroredCharacter (SS_UCS4 in); 150 151 typedef enum { 152 SS_PS_None=0, 153 SS_PS_LF, 154 SS_PS_CR, 155 SS_PS_CRLF, 156 SS_PS_PS 157 } SS_ParaSep; 158 159 160 typedef enum { 161 SS_EmbedNone=0, SS_EmbedLeft, SS_EmbedRight 162 } SS_Embedding; 163 164 typedef enum { 165 SS_DR_L, /* L-R character */ 166 SS_DR_R, /* R-L character */ 167 SS_DR_LE, /* inside L embedded */ 168 SS_DR_RE, /* inside R embedded */ 169 SS_DR_LO, /* inside L override */ 170 SS_DR_RO /* inside R override */ 171 } SS_DR_Dir; 172 173 #endif /*SCharClass_h*/ 174