1 /**
2  *  Yudit Unicode Editor Source File
3  *
4  *  GNU Copyright (C) 1997-2006  Gaspar Sinai <gaspar@yudit.org>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License, version 2,
8  *  dated June 1991. See file COPYYING for details.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, write to the Free Software
17  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18  */
19 #ifndef SCharClass_h
20 #define SCharClass_h
21 
22 #include "stoolkit/STypes.h"
23 
24 typedef enum
25 {
26   SD_CC_Xx=0,
27   SD_CC_Lu,  // 01 Lu Letter, Uppercase
28   SD_CC_Ll,  // 02 Ll Letter, Lowercase
29   SD_CC_Lt,  // 03 Lt Letter, Titlecase
30   SD_CC_Mn,  // 04 Mn Mark, Non-Spacing
31   SD_CC_Mc,  // 05 Mc Mark, Spacing Combining
32   SD_CC_Me,  // 06 Me Mark, Enclosing
33   SD_CC_Nd,  // 07 Nd Number, Decimal Digit
34   SD_CC_Nl,  // 08 Nl Number, Letter
35   SD_CC_No,  // 09 No Number, Other
36   SD_CC_Zs,  // 0A Zs Separator, Space
37   SD_CC_Zl,  // 0B Zl Separator, Line
38   SD_CC_Zp,  // 0C Zp Separator, Paragraph
39   SD_CC_Cc,  // 0D Cc Other, Control
40   SD_CC_Cf,  // 0E Cf Other, Format
41   SD_CC_Cs,  // 0F Cs Other, Surrogate
42   SD_CC_Co,  // 10 Co Other, Private Use
43   SD_CC_Cn,  // 11 Cn Other, Not Assigned
44   SD_CC_Lm,  // 12 Lm Letter, Modifier
45   SD_CC_Lo,  // 13 Lo Letter, Other
46   SD_CC_Pc,  // 14 Pc Punctuation, Connector
47   SD_CC_Pd,  // 15 Pd Punctuation, Dash
48   SD_CC_Ps,  // 16 Ps Punctuation, Open
49   SD_CC_Pe,  // 17 Pe Punctuation, Close
50   SD_CC_Pi,  // 18 Pi Punctuation, Initial quote
51             // (may behave like Ps or Pe depending on usage)
52   SD_CC_Pf,  // 19 Pf Punctuation, Final quote
53             // (may behave like Ps or Pe depending on usage)
54   SD_CC_Po,  // 1A Po Punctuation, Other
55   SD_CC_Sm,  // 1B Sm Symbol, Math
56   SD_CC_Sc,  // 1C Sc Symbol, Currency
57   SD_CC_Sk,  // 1D Sk Symbol, Modifier
58   SD_CC_So,  // 1E So Symbol, Other
59   SD_CC_MAX  // No more
60 } SD_CharClass;
61 
62 /* BiDi class */
63 typedef enum
64 {
65   /* strong */
66   SD_BC_XX=0,
67   SD_BC_L, // Left-to-Right
68   SD_BC_LRE, // Left-to-Right Embedding
69   SD_BC_LRO, // Left-to-Right Override
70   SD_BC_R, // Right-to-Left
71   SD_BC_AL, // Right-to-Left Arabic
72   SD_BC_RLE, // Right-to-Left Embedding
73   SD_BC_RLO, // Right-to-Left Override
74 
75   /* weak */
76   SD_BC_PDF, // Pop Directional Format
77   SD_BC_EN,  // European Number
78   SD_BC_ES, // European Number Separator
79   SD_BC_ET, // European Number Terminator
80   SD_BC_AN, // Arabic Number
81   SD_BC_CS, // Common Number Separator
82   SD_BC_NSM, // Non-Spacing Mark
83   SD_BC_BN, // Boundary Neutral
84 
85   /* neutral */
86   SD_BC_B,  // Paragraph Separator
87   SD_BC_S, // Segment Separator
88   SD_BC_WS, // Whitespace
89   SD_BC_ON, // Other Neutrals
90   SD_BC_MAX
91 
92 } SD_BiDiClass;
93 
94 #define SD_CD_ZWSP 0x200B /* Zero width space */
95 #define SD_CD_ZWNJ 0x200C /* Zs */
96 #define SD_CD_ZWJ 0x200D  /* Cf */
97 #define SD_CD_ARABIC_TATWEEL 0x0640
98 #define SD_CD_SYRIAC_LETTER_DALATH 0x0715
99 #define SD_CD_SYRIAC_LETTER_DOTLESS_DALATH 0x0716
100 #define SD_CD_SYRIAC_LETTER_RISH 0x072A
101 
102 #define SD_CD_CTRL 0
103 #define SD_CD_LF ((SS_UCS4)'\n')
104 #define SD_CD_FF ((SS_UCS4)'\f')
105 #define SD_CD_CR ((SS_UCS4)'\r')
106 #define SD_CD_TAB ((SS_UCS4)'\t')
107 #define SD_CD_LS 0x2028 /* line separator */
108 #define SD_CD_PS 0x2029 /* paragraph separator */
109 
110 #define SD_CD_LRO 0x202D /* left- to-right override */
111 #define SD_CD_RLO 0x202E /* right-to-left override */
112 #define SD_CD_LRE 0x202A /* left-to-right embedding */
113 #define SD_CD_RLE 0x202B /* right-to-left embedding */
114 #define SD_CD_PDF 0x202C /* pop directional format */
115 
116 #define SD_CD_LRM 0x200E /* LEFT-TO-RIGHT MARK */
117 #define SD_CD_RLM 0x200F /* RIGHT-TO-LEFT MARK */
118 
119 /**
120  * Line breaking characters in utf-8
121  * NLF = one of SS_LB_DOS SS_LB_MAC SS_LB_UNIX SS_LB_NEL.
122  */
123 #define SS_LB_DOS "\r\n"
124 #define SS_LB_MAC "\r"
125 #define SS_LB_UNIX "\n"
126 #define SS_LB_LS "\342\200\250"
127 #define SS_LB_PS "\342\200\251" /* PARAGRAPH BREAKING */
128 #define SS_LB_FF "\f"
129 
130 #define SS_LB_LRO "\342\200\255"
131 #define SS_LB_RLO "\342\200\256"
132 #define SS_LB_LRE "\342\200\252"
133 #define SS_LB_RLE "\342\200\253"
134 #define SS_LB_PDF "\342\200\254"
135 
136 /**
137  * These line breaking chars are not supported here now.
138  */
139 #define SS_LB_NEL "\702\102"
140 #define SS_LB_P_VT "\013" /* PARAGRAPH BREAKING */
141 #define SS_LB_P_FF "\014" /* PARAGRAPH BREAKING */
142 
143 
144 extern const char* ssCharClass[SD_CC_MAX];
145 extern const char* ssBiDiClass[SD_BC_MAX];
146 
147 SD_CharClass getCharClass(SS_UCS4 in);
148 SD_BiDiClass getBiDiClass(SS_UCS4 in);
149 SS_UCS4 getMirroredCharacter (SS_UCS4 in);
150 
151 typedef enum {
152    SS_PS_None=0,
153    SS_PS_LF,
154    SS_PS_CR,
155    SS_PS_CRLF,
156    SS_PS_PS
157 } SS_ParaSep;
158 
159 
160 typedef enum {
161   SS_EmbedNone=0, SS_EmbedLeft, SS_EmbedRight
162 } SS_Embedding;
163 
164 typedef enum {
165   SS_DR_L, /* L-R character */
166   SS_DR_R, /* R-L character */
167   SS_DR_LE, /* inside L embedded */
168   SS_DR_RE, /* inside R embedded */
169   SS_DR_LO, /* inside L override */
170   SS_DR_RO  /* inside R override */
171 } SS_DR_Dir;
172 
173 #endif /*SCharClass_h*/
174